Merge tag 'xfs-5.8-merge-8' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

+5 -5

Documentation/filesystems/xfs-self-describing-metadata.rst

··· 340 340 341 341 The structure of the verifiers and the identifiers checks is very similar to the 342 342 buffer code described above. The only difference is where they are called. For 343 - example, inode read verification is done in xfs_iread() when the inode is first 344 - read out of the buffer and the struct xfs_inode is instantiated. The inode is 345 - already extensively verified during writeback in xfs_iflush_int, so the only 346 - addition here is to add the LSN and CRC to the inode as it is copied back into 347 - the buffer. 343 + example, inode read verification is done in xfs_inode_from_disk() when the inode 344 + is first read out of the buffer and the struct xfs_inode is instantiated. The 345 + inode is already extensively verified during writeback in xfs_iflush_int, so the 346 + only addition here is to add the LSN and CRC to the inode as it is copied back 347 + into the buffer. 348 348 349 349 XXX: inode unlinked list modification doesn't recalculate the inode CRC! None of 350 350 the unlinked list modifications check or update CRCs, neither during unlink nor

+3 -2

fs/xfs/Makefile

··· 7 7 ccflags-y += -I $(srctree)/$(src) # needed for trace events 8 8 ccflags-y += -I $(srctree)/$(src)/libxfs 9 9 10 - ccflags-$(CONFIG_XFS_DEBUG) += -g 11 - 12 10 obj-$(CONFIG_XFS_FS) += xfs.o 13 11 14 12 # this one should be compiled first, as the tracing macros can easily blow up ··· 99 101 xfs_log_cil.o \ 100 102 xfs_bmap_item.o \ 101 103 xfs_buf_item.o \ 104 + xfs_buf_item_recover.o \ 105 + xfs_dquot_item_recover.o \ 102 106 xfs_extfree_item.o \ 103 107 xfs_icreate_item.o \ 104 108 xfs_inode_item.o \ 109 + xfs_inode_item_recover.o \ 105 110 xfs_refcount_item.o \ 106 111 xfs_rmap_item.o \ 107 112 xfs_log_recover.o \

+6 -2

fs/xfs/kmem.h

··· 1 - // SPDX-License-Identifier: GPL-2.0 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 2 /* 3 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 4 * All Rights Reserved. ··· 19 19 #define KM_NOFS ((__force xfs_km_flags_t)0x0004u) 20 20 #define KM_MAYFAIL ((__force xfs_km_flags_t)0x0008u) 21 21 #define KM_ZERO ((__force xfs_km_flags_t)0x0010u) 22 + #define KM_NOLOCKDEP ((__force xfs_km_flags_t)0x0020u) 22 23 23 24 /* 24 25 * We use a special process flag to avoid recursive callbacks into ··· 31 30 { 32 31 gfp_t lflags; 33 32 34 - BUG_ON(flags & ~(KM_NOFS|KM_MAYFAIL|KM_ZERO)); 33 + BUG_ON(flags & ~(KM_NOFS | KM_MAYFAIL | KM_ZERO | KM_NOLOCKDEP)); 35 34 36 35 lflags = GFP_KERNEL | __GFP_NOWARN; 37 36 if (flags & KM_NOFS) ··· 49 48 50 49 if (flags & KM_ZERO) 51 50 lflags |= __GFP_ZERO; 51 + 52 + if (flags & KM_NOLOCKDEP) 53 + lflags |= __GFP_NOLOCKDEP; 52 54 53 55 return lflags; 54 56 }

+1 -1

fs/xfs/libxfs/xfs_ag_resv.h

··· 1 - // SPDX-License-Identifier: GPL-2.0+ 1 + /* SPDX-License-Identifier: GPL-2.0+ */ 2 2 /* 3 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 4 * Author: Darrick J. Wong <darrick.wong@oracle.com>

+1 -1

fs/xfs/libxfs/xfs_alloc.h

+1 -1

fs/xfs/libxfs/xfs_alloc_btree.h

+8 -8

fs/xfs/libxfs/xfs_attr.c

··· 61 61 struct xfs_inode *ip) 62 62 { 63 63 if (!XFS_IFORK_Q(ip) || 64 - (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && 65 - ip->i_d.di_anextents == 0)) 64 + (ip->i_afp->if_format == XFS_DINODE_FMT_EXTENTS && 65 + ip->i_afp->if_nextents == 0)) 66 66 return 0; 67 67 return 1; 68 68 } ··· 84 84 if (!xfs_inode_hasattr(args->dp)) 85 85 return -ENOATTR; 86 86 87 - if (args->dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) 87 + if (args->dp->i_afp->if_format == XFS_DINODE_FMT_LOCAL) 88 88 return xfs_attr_shortform_getvalue(args); 89 89 if (xfs_bmap_one_block(args->dp, XFS_ATTR_FORK)) 90 90 return xfs_attr_leaf_get(args); ··· 212 212 * If the attribute list is non-existent or a shortform list, 213 213 * upgrade it to a single-leaf-block attribute list. 214 214 */ 215 - if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL || 216 - (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && 217 - dp->i_d.di_anextents == 0)) { 215 + if (dp->i_afp->if_format == XFS_DINODE_FMT_LOCAL || 216 + (dp->i_afp->if_format == XFS_DINODE_FMT_EXTENTS && 217 + dp->i_afp->if_nextents == 0)) { 218 218 219 219 /* 220 220 * Build initial attribute list (if required). 221 221 */ 222 - if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) 222 + if (dp->i_afp->if_format == XFS_DINODE_FMT_EXTENTS) 223 223 xfs_attr_shortform_create(args); 224 224 225 225 /* ··· 272 272 273 273 if (!xfs_inode_hasattr(dp)) { 274 274 error = -ENOATTR; 275 - } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { 275 + } else if (dp->i_afp->if_format == XFS_DINODE_FMT_LOCAL) { 276 276 ASSERT(dp->i_afp->if_flags & XFS_IFINLINE); 277 277 error = xfs_attr_shortform_remove(args); 278 278 } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {

+1 -1

fs/xfs/libxfs/xfs_attr.h

+27 -32

fs/xfs/libxfs/xfs_attr_leaf.c

··· 309 309 return fa; 310 310 311 311 /* 312 - * In recovery there is a transient state where count == 0 is valid 313 - * because we may have transitioned an empty shortform attr to a leaf 314 - * if the attr didn't fit in shortform. 315 - */ 316 - if (!xfs_log_in_recovery(mp) && ichdr.count == 0) 317 - return __this_address; 318 - 319 - /* 320 312 * firstused is the block offset of the first name info structure. 321 313 * Make sure it doesn't go off the block or crash into the header. 322 314 */ ··· 323 331 (char *)bp->b_addr + ichdr.firstused) 324 332 return __this_address; 325 333 334 + /* 335 + * NOTE: This verifier historically failed empty leaf buffers because 336 + * we expect the fork to be in another format. Empty attr fork format 337 + * conversions are possible during xattr set, however, and format 338 + * conversion is not atomic with the xattr set that triggers it. We 339 + * cannot assume leaf blocks are non-empty until that is addressed. 340 + */ 326 341 buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize; 327 342 for (i = 0, ent = entries; i < ichdr.count; ent++, i++) { 328 343 fa = xfs_attr3_leaf_verify_entry(mp, buf_end, leaf, &ichdr, ··· 488 489 } 489 490 490 491 if (!args->value) { 491 - args->value = kmem_alloc_large(valuelen, 0); 492 + args->value = kmem_alloc_large(valuelen, KM_NOLOCKDEP); 492 493 if (!args->value) 493 494 return -ENOMEM; 494 495 } ··· 538 539 /* rounded down */ 539 540 offset = (XFS_LITINO(mp) - bytes) >> 3; 540 541 541 - if (dp->i_d.di_format == XFS_DINODE_FMT_DEV) { 542 + if (dp->i_df.if_format == XFS_DINODE_FMT_DEV) { 542 543 minforkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; 543 544 return (offset >= minforkoff) ? minforkoff : 0; 544 545 } ··· 566 567 567 568 dsize = dp->i_df.if_bytes; 568 569 569 - switch (dp->i_d.di_format) { 570 + switch (dp->i_df.if_format) { 570 571 case XFS_DINODE_FMT_EXTENTS: 571 572 /* 572 573 * If there is no attr fork and the data fork is extents, ··· 635 636 * Create the initial contents of a shortform attribute list. 636 637 */ 637 638 void 638 - xfs_attr_shortform_create(xfs_da_args_t *args) 639 + xfs_attr_shortform_create( 640 + struct xfs_da_args *args) 639 641 { 640 - xfs_attr_sf_hdr_t *hdr; 641 - xfs_inode_t *dp; 642 - struct xfs_ifork *ifp; 642 + struct xfs_inode *dp = args->dp; 643 + struct xfs_ifork *ifp = dp->i_afp; 644 + struct xfs_attr_sf_hdr *hdr; 643 645 644 646 trace_xfs_attr_sf_create(args); 645 647 646 - dp = args->dp; 647 - ASSERT(dp != NULL); 648 - ifp = dp->i_afp; 649 - ASSERT(ifp != NULL); 650 648 ASSERT(ifp->if_bytes == 0); 651 - if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) { 649 + if (ifp->if_format == XFS_DINODE_FMT_EXTENTS) { 652 650 ifp->if_flags &= ~XFS_IFEXTENTS; /* just in case */ 653 - dp->i_d.di_aformat = XFS_DINODE_FMT_LOCAL; 651 + ifp->if_format = XFS_DINODE_FMT_LOCAL; 654 652 ifp->if_flags |= XFS_IFINLINE; 655 653 } else { 656 654 ASSERT(ifp->if_flags & XFS_IFINLINE); ··· 715 719 struct xfs_inode *ip, 716 720 struct xfs_trans *tp) 717 721 { 718 - xfs_idestroy_fork(ip, XFS_ATTR_FORK); 722 + ASSERT(ip->i_afp->if_nextents == 0); 723 + 724 + xfs_idestroy_fork(ip->i_afp); 725 + kmem_cache_free(xfs_ifork_zone, ip->i_afp); 726 + ip->i_afp = NULL; 719 727 ip->i_d.di_forkoff = 0; 720 - ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 721 - 722 - ASSERT(ip->i_d.di_anextents == 0); 723 - ASSERT(ip->i_afp == NULL); 724 - 725 728 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 726 729 } 727 730 ··· 770 775 totsize -= size; 771 776 if (totsize == sizeof(xfs_attr_sf_hdr_t) && 772 777 (mp->m_flags & XFS_MOUNT_ATTR2) && 773 - (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) && 778 + (dp->i_df.if_format != XFS_DINODE_FMT_BTREE) && 774 779 !(args->op_flags & XFS_DA_OP_ADDNAME)) { 775 780 xfs_attr_fork_remove(dp, args->trans); 776 781 } else { ··· 780 785 ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || 781 786 (args->op_flags & XFS_DA_OP_ADDNAME) || 782 787 !(mp->m_flags & XFS_MOUNT_ATTR2) || 783 - dp->i_d.di_format == XFS_DINODE_FMT_BTREE); 788 + dp->i_df.if_format == XFS_DINODE_FMT_BTREE); 784 789 xfs_trans_log_inode(args->trans, dp, 785 790 XFS_ILOG_CORE | XFS_ILOG_ADATA); 786 791 } ··· 957 962 + be16_to_cpu(name_loc->valuelen); 958 963 } 959 964 if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) && 960 - (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) && 965 + (dp->i_df.if_format != XFS_DINODE_FMT_BTREE) && 961 966 (bytes == sizeof(struct xfs_attr_sf_hdr))) 962 967 return -1; 963 968 return xfs_attr_shortform_bytesfit(dp, bytes); ··· 976 981 int i; 977 982 int64_t size; 978 983 979 - ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL); 984 + ASSERT(ip->i_afp->if_format == XFS_DINODE_FMT_LOCAL); 980 985 ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); 981 986 sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data; 982 987 size = ifp->if_bytes; ··· 1080 1085 1081 1086 if (forkoff == -1) { 1082 1087 ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2); 1083 - ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE); 1088 + ASSERT(dp->i_df.if_format != XFS_DINODE_FMT_BTREE); 1084 1089 xfs_attr_fork_remove(dp, args->trans); 1085 1090 goto out; 1086 1091 }

+1 -1

fs/xfs/libxfs/xfs_attr_leaf.h

+1 -1

fs/xfs/libxfs/xfs_attr_remote.h

+1 -1

fs/xfs/libxfs/xfs_attr_sf.h

+1 -1

fs/xfs/libxfs/xfs_bit.h

+132 -178

fs/xfs/libxfs/xfs_bmap.c

··· 61 61 int sz; /* root block size */ 62 62 63 63 /* 64 - * The maximum number of extents in a file, hence the maximum 65 - * number of leaf entries, is controlled by the type of di_nextents 66 - * (a signed 32-bit number, xfs_extnum_t), or by di_anextents 67 - * (a signed 16-bit number, xfs_aextnum_t). 64 + * The maximum number of extents in a file, hence the maximum number of 65 + * leaf entries, is controlled by the size of the on-disk extent count, 66 + * either a signed 32-bit number for the data fork, or a signed 16-bit 67 + * number for the attr fork. 68 68 * 69 69 * Note that we can no longer assume that if we are in ATTR1 that 70 70 * the fork offset of all the inodes will be ··· 120 120 */ 121 121 static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork) 122 122 { 123 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 124 + 123 125 return whichfork != XFS_COW_FORK && 124 - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && 125 - XFS_IFORK_NEXTENTS(ip, whichfork) > 126 - XFS_IFORK_MAXEXT(ip, whichfork); 126 + ifp->if_format == XFS_DINODE_FMT_EXTENTS && 127 + ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork); 127 128 } 128 129 129 130 /* ··· 132 131 */ 133 132 static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork) 134 133 { 134 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 135 + 135 136 return whichfork != XFS_COW_FORK && 136 - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && 137 - XFS_IFORK_NEXTENTS(ip, whichfork) <= 138 - XFS_IFORK_MAXEXT(ip, whichfork); 137 + ifp->if_format == XFS_DINODE_FMT_BTREE && 138 + ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork); 139 139 } 140 140 141 141 /* ··· 215 213 int whichfork) 216 214 { 217 215 if (whichfork == XFS_ATTR_FORK && 218 - ip->i_d.di_format != XFS_DINODE_FMT_DEV && 219 - ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { 216 + ip->i_df.if_format != XFS_DINODE_FMT_DEV && 217 + ip->i_df.if_format != XFS_DINODE_FMT_BTREE) { 220 218 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; 221 219 222 220 if (dfl_forkoff > ip->i_d.di_forkoff) ··· 317 315 xfs_inode_t *ip, /* incore inode pointer */ 318 316 int whichfork) /* data or attr fork */ 319 317 { 318 + struct xfs_mount *mp = ip->i_mount; 319 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 320 320 struct xfs_btree_block *block; /* current btree block */ 321 321 xfs_fsblock_t bno; /* block # of "block" */ 322 322 xfs_buf_t *bp; /* buffer for "block" */ 323 323 int error; /* error return value */ 324 324 xfs_extnum_t i=0, j; /* index into the extents list */ 325 - struct xfs_ifork *ifp; /* fork structure */ 326 325 int level; /* btree level, for checking */ 327 - xfs_mount_t *mp; /* file system mount structure */ 328 326 __be64 *pp; /* pointer to block address */ 329 327 xfs_bmbt_rec_t *ep; /* pointer to current extent */ 330 328 xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */ 331 329 xfs_bmbt_rec_t *nextp; /* pointer to next extent */ 332 330 int bp_release = 0; 333 331 334 - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) { 332 + if (ifp->if_format != XFS_DINODE_FMT_BTREE) 335 333 return; 336 - } 337 334 338 335 /* skip large extent count inodes */ 339 - if (ip->i_d.di_nextents > 10000) 336 + if (ip->i_df.if_nextents > 10000) 340 337 return; 341 338 342 339 bno = NULLFSBLOCK; 343 - mp = ip->i_mount; 344 - ifp = XFS_IFORK_PTR(ip, whichfork); 345 340 block = ifp->if_broot; 346 341 /* 347 342 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. ··· 603 604 ASSERT(cur); 604 605 ASSERT(whichfork != XFS_COW_FORK); 605 606 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 606 - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); 607 + ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE); 607 608 ASSERT(be16_to_cpu(rblock->bb_level) == 1); 608 609 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); 609 610 ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1); ··· 631 632 xfs_iroot_realloc(ip, -1, whichfork); 632 633 ASSERT(ifp->if_broot == NULL); 633 634 ASSERT((ifp->if_flags & XFS_IFBROOT) == 0); 634 - XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); 635 + ifp->if_format = XFS_DINODE_FMT_EXTENTS; 635 636 *logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 636 637 return 0; 637 638 } ··· 667 668 mp = ip->i_mount; 668 669 ASSERT(whichfork != XFS_COW_FORK); 669 670 ifp = XFS_IFORK_PTR(ip, whichfork); 670 - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); 671 + ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS); 671 672 672 673 /* 673 674 * Make space in the inode incore. This needs to be undone if we fail ··· 691 692 /* 692 693 * Convert to a btree with two levels, one record in root. 693 694 */ 694 - XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); 695 + ifp->if_format = XFS_DINODE_FMT_BTREE; 695 696 memset(&args, 0, sizeof(args)); 696 697 args.tp = tp; 697 698 args.mp = mp; ··· 749 750 xfs_bmbt_disk_set_all(arp, &rec); 750 751 cnt++; 751 752 } 752 - ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); 753 + ASSERT(cnt == ifp->if_nextents); 753 754 xfs_btree_set_numrecs(ablock, cnt); 754 755 755 756 /* ··· 777 778 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); 778 779 out_root_realloc: 779 780 xfs_iroot_realloc(ip, -1, whichfork); 780 - XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); 781 + ifp->if_format = XFS_DINODE_FMT_EXTENTS; 781 782 ASSERT(ifp->if_broot == NULL); 782 783 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 783 784 ··· 799 800 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 800 801 801 802 ASSERT(whichfork != XFS_COW_FORK); 802 - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); 803 + ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); 803 804 ASSERT(ifp->if_bytes == 0); 804 - ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); 805 + ASSERT(ifp->if_nextents == 0); 805 806 806 807 xfs_bmap_forkoff_reset(ip, whichfork); 807 808 ifp->if_flags &= ~XFS_IFINLINE; 808 809 ifp->if_flags |= XFS_IFEXTENTS; 809 810 ifp->if_u1.if_root = NULL; 810 811 ifp->if_height = 0; 811 - XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); 812 + ifp->if_format = XFS_DINODE_FMT_EXTENTS; 812 813 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 813 814 } 814 815 ··· 839 840 */ 840 841 ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK)); 841 842 ifp = XFS_IFORK_PTR(ip, whichfork); 842 - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); 843 + ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); 843 844 844 845 if (!ifp->if_bytes) { 845 846 xfs_bmap_local_to_extents_empty(tp, ip, whichfork); ··· 906 907 xfs_iext_first(ifp, &icur); 907 908 xfs_iext_insert(ip, &icur, &rec, 0); 908 909 909 - XFS_IFORK_NEXT_SET(ip, whichfork, 1); 910 + ifp->if_nextents = 1; 910 911 ip->i_d.di_nblocks = 1; 911 912 xfs_trans_mod_dquot_byino(tp, ip, 912 913 XFS_TRANS_DQ_BCOUNT, 1L); ··· 971 972 xfs_btree_cur_t *cur; /* bmap btree cursor */ 972 973 int error; /* error return value */ 973 974 974 - if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip)) 975 + if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <= 976 + XFS_IFORK_DSIZE(ip)) 975 977 return 0; 976 978 cur = NULL; 977 979 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags, ··· 1033 1033 int size, 1034 1034 int *version) 1035 1035 { 1036 - switch (ip->i_d.di_format) { 1036 + switch (ip->i_df.if_format) { 1037 1037 case XFS_DINODE_FMT_DEV: 1038 1038 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; 1039 1039 break; ··· 1091 1091 goto trans_cancel; 1092 1092 if (XFS_IFORK_Q(ip)) 1093 1093 goto trans_cancel; 1094 - if (XFS_IS_CORRUPT(mp, ip->i_d.di_anextents != 0)) { 1095 - error = -EFSCORRUPTED; 1096 - goto trans_cancel; 1097 - } 1098 - if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { 1099 - /* 1100 - * For inodes coming from pre-6.2 filesystems. 1101 - */ 1102 - ASSERT(ip->i_d.di_aformat == 0); 1103 - ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 1104 - } 1105 1094 1106 1095 xfs_trans_ijoin(tp, ip, 0); 1107 1096 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); ··· 1099 1110 goto trans_cancel; 1100 1111 ASSERT(ip->i_afp == NULL); 1101 1112 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, 0); 1113 + ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS; 1102 1114 ip->i_afp->if_flags = XFS_IFEXTENTS; 1103 1115 logflags = 0; 1104 - switch (ip->i_d.di_format) { 1116 + switch (ip->i_df.if_format) { 1105 1117 case XFS_DINODE_FMT_LOCAL: 1106 1118 error = xfs_bmap_add_attrfork_local(tp, ip, &logflags); 1107 1119 break; ··· 1173 1183 xfs_extnum_t num_recs; 1174 1184 xfs_extnum_t j; 1175 1185 int whichfork = cur->bc_ino.whichfork; 1186 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1176 1187 1177 1188 block = xfs_btree_get_block(cur, level, &bp); 1178 1189 1179 1190 /* Abort if we find more records than nextents. */ 1180 1191 num_recs = xfs_btree_get_numrecs(block); 1181 - if (unlikely(ir->loaded + num_recs > 1182 - XFS_IFORK_NEXTENTS(ip, whichfork))) { 1192 + if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) { 1183 1193 xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).", 1184 1194 (unsigned long long)ip->i_ino); 1185 1195 xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block, ··· 1205 1215 xfs_bmap_fork_to_state(whichfork)); 1206 1216 trace_xfs_read_extent(ip, &ir->icur, 1207 1217 xfs_bmap_fork_to_state(whichfork), _THIS_IP_); 1208 - xfs_iext_next(XFS_IFORK_PTR(ip, whichfork), &ir->icur); 1218 + xfs_iext_next(ifp, &ir->icur); 1209 1219 } 1210 1220 1211 1221 return 0; ··· 1228 1238 1229 1239 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1230 1240 1231 - if (XFS_IS_CORRUPT(mp, 1232 - XFS_IFORK_FORMAT(ip, whichfork) != 1233 - XFS_DINODE_FMT_BTREE)) { 1241 + if (XFS_IS_CORRUPT(mp, ifp->if_format != XFS_DINODE_FMT_BTREE)) { 1234 1242 error = -EFSCORRUPTED; 1235 1243 goto out; 1236 1244 } ··· 1242 1254 if (error) 1243 1255 goto out; 1244 1256 1245 - if (XFS_IS_CORRUPT(mp, 1246 - ir.loaded != XFS_IFORK_NEXTENTS(ip, whichfork))) { 1257 + if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) { 1247 1258 error = -EFSCORRUPTED; 1248 1259 goto out; 1249 1260 } ··· 1276 1289 xfs_fileoff_t lowest, max; 1277 1290 int error; 1278 1291 1279 - ASSERT(xfs_ifork_has_extents(ip, whichfork) || 1280 - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); 1281 - 1282 - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 1292 + if (ifp->if_format == XFS_DINODE_FMT_LOCAL) { 1283 1293 *first_unused = 0; 1284 1294 return 0; 1285 1295 } 1296 + 1297 + ASSERT(xfs_ifork_has_extents(ifp)); 1286 1298 1287 1299 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 1288 1300 error = xfs_iread_extents(tp, ip, whichfork); ··· 1323 1337 struct xfs_iext_cursor icur; 1324 1338 int error; 1325 1339 1326 - switch (XFS_IFORK_FORMAT(ip, whichfork)) { 1340 + switch (ifp->if_format) { 1327 1341 case XFS_DINODE_FMT_LOCAL: 1328 1342 *last_block = 0; 1329 1343 return 0; ··· 1422 1436 xfs_fileoff_t *last_block, 1423 1437 int whichfork) 1424 1438 { 1439 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1425 1440 struct xfs_bmbt_irec rec; 1426 1441 int is_empty; 1427 1442 int error; 1428 1443 1429 1444 *last_block = 0; 1430 1445 1431 - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) 1446 + if (ifp->if_format == XFS_DINODE_FMT_LOCAL) 1432 1447 return 0; 1433 1448 1434 - if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ip, whichfork))) 1449 + if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp))) 1435 1450 return -EFSCORRUPTED; 1436 1451 1437 1452 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); ··· 1450 1463 */ 1451 1464 int /* 1=>1 block, 0=>otherwise */ 1452 1465 xfs_bmap_one_block( 1453 - xfs_inode_t *ip, /* incore inode */ 1454 - int whichfork) /* data or attr fork */ 1466 + struct xfs_inode *ip, /* incore inode */ 1467 + int whichfork) /* data or attr fork */ 1455 1468 { 1456 - struct xfs_ifork *ifp; /* inode fork pointer */ 1457 - int rval; /* return value */ 1458 - xfs_bmbt_irec_t s; /* internal version of extent */ 1469 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1470 + int rval; /* return value */ 1471 + struct xfs_bmbt_irec s; /* internal version of extent */ 1459 1472 struct xfs_iext_cursor icur; 1460 1473 1461 1474 #ifndef DEBUG 1462 1475 if (whichfork == XFS_DATA_FORK) 1463 1476 return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize; 1464 1477 #endif /* !DEBUG */ 1465 - if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1) 1478 + if (ifp->if_nextents != 1) 1466 1479 return 0; 1467 - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) 1480 + if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) 1468 1481 return 0; 1469 - ifp = XFS_IFORK_PTR(ip, whichfork); 1470 1482 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 1471 1483 xfs_iext_first(ifp, &icur); 1472 1484 xfs_iext_get_extent(ifp, &icur, &s); ··· 1487 1501 struct xfs_bmalloca *bma, 1488 1502 int whichfork) 1489 1503 { 1504 + struct xfs_mount *mp = bma->ip->i_mount; 1505 + struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); 1490 1506 struct xfs_bmbt_irec *new = &bma->got; 1491 1507 int error; /* error return value */ 1492 1508 int i; /* temp state */ 1493 - struct xfs_ifork *ifp; /* inode fork pointer */ 1494 1509 xfs_fileoff_t new_endoff; /* end offset of new entry */ 1495 1510 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ 1496 1511 /* left is 0, right is 1, prev is 2 */ ··· 1501 1514 xfs_filblks_t da_old; /* old count del alloc blocks used */ 1502 1515 xfs_filblks_t temp=0; /* value for da_new calculations */ 1503 1516 int tmp_rval; /* partial logging flags */ 1504 - struct xfs_mount *mp; 1505 - xfs_extnum_t *nextents; 1506 1517 struct xfs_bmbt_irec old; 1507 1518 1508 - mp = bma->ip->i_mount; 1509 - ifp = XFS_IFORK_PTR(bma->ip, whichfork); 1510 1519 ASSERT(whichfork != XFS_ATTR_FORK); 1511 - nextents = (whichfork == XFS_COW_FORK ? &bma->ip->i_cnextents : 1512 - &bma->ip->i_d.di_nextents); 1513 - 1514 1520 ASSERT(!isnullstartblock(new->br_startblock)); 1515 1521 ASSERT(!bma->cur || 1516 1522 (bma->cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL)); ··· 1594 1614 xfs_iext_remove(bma->ip, &bma->icur, state); 1595 1615 xfs_iext_prev(ifp, &bma->icur); 1596 1616 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); 1597 - (*nextents)--; 1617 + ifp->if_nextents--; 1598 1618 1599 1619 if (bma->cur == NULL) 1600 1620 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; ··· 1698 1718 PREV.br_startblock = new->br_startblock; 1699 1719 PREV.br_state = new->br_state; 1700 1720 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1721 + ifp->if_nextents++; 1701 1722 1702 - (*nextents)++; 1703 1723 if (bma->cur == NULL) 1704 1724 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1705 1725 else { ··· 1764 1784 * The left neighbor is not contiguous. 1765 1785 */ 1766 1786 xfs_iext_update_extent(bma->ip, state, &bma->icur, new); 1767 - (*nextents)++; 1787 + ifp->if_nextents++; 1788 + 1768 1789 if (bma->cur == NULL) 1769 1790 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1770 1791 else { ··· 1851 1870 * The right neighbor is not contiguous. 1852 1871 */ 1853 1872 xfs_iext_update_extent(bma->ip, state, &bma->icur, new); 1854 - (*nextents)++; 1873 + ifp->if_nextents++; 1874 + 1855 1875 if (bma->cur == NULL) 1856 1876 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1857 1877 else { ··· 1937 1955 xfs_iext_next(ifp, &bma->icur); 1938 1956 xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); 1939 1957 xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); 1940 - (*nextents)++; 1958 + ifp->if_nextents++; 1941 1959 1942 1960 if (bma->cur == NULL) 1943 1961 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; ··· 2141 2159 xfs_iext_remove(ip, icur, state); 2142 2160 xfs_iext_prev(ifp, icur); 2143 2161 xfs_iext_update_extent(ip, state, icur, &LEFT); 2144 - XFS_IFORK_NEXT_SET(ip, whichfork, 2145 - XFS_IFORK_NEXTENTS(ip, whichfork) - 2); 2162 + ifp->if_nextents -= 2; 2146 2163 if (cur == NULL) 2147 2164 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2148 2165 else { ··· 2193 2212 xfs_iext_remove(ip, icur, state); 2194 2213 xfs_iext_prev(ifp, icur); 2195 2214 xfs_iext_update_extent(ip, state, icur, &LEFT); 2196 - XFS_IFORK_NEXT_SET(ip, whichfork, 2197 - XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 2215 + ifp->if_nextents--; 2198 2216 if (cur == NULL) 2199 2217 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2200 2218 else { ··· 2235 2255 xfs_iext_remove(ip, icur, state); 2236 2256 xfs_iext_prev(ifp, icur); 2237 2257 xfs_iext_update_extent(ip, state, icur, &PREV); 2258 + ifp->if_nextents--; 2238 2259 2239 - XFS_IFORK_NEXT_SET(ip, whichfork, 2240 - XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 2241 2260 if (cur == NULL) 2242 2261 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2243 2262 else { ··· 2343 2364 2344 2365 xfs_iext_update_extent(ip, state, icur, &PREV); 2345 2366 xfs_iext_insert(ip, icur, new, state); 2346 - XFS_IFORK_NEXT_SET(ip, whichfork, 2347 - XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2367 + ifp->if_nextents++; 2368 + 2348 2369 if (cur == NULL) 2349 2370 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2350 2371 else { ··· 2419 2440 xfs_iext_update_extent(ip, state, icur, &PREV); 2420 2441 xfs_iext_next(ifp, icur); 2421 2442 xfs_iext_insert(ip, icur, new, state); 2443 + ifp->if_nextents++; 2422 2444 2423 - XFS_IFORK_NEXT_SET(ip, whichfork, 2424 - XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2425 2445 if (cur == NULL) 2426 2446 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2427 2447 else { ··· 2471 2493 xfs_iext_next(ifp, icur); 2472 2494 xfs_iext_insert(ip, icur, &r[1], state); 2473 2495 xfs_iext_insert(ip, icur, &r[0], state); 2496 + ifp->if_nextents += 2; 2474 2497 2475 - XFS_IFORK_NEXT_SET(ip, whichfork, 2476 - XFS_IFORK_NEXTENTS(ip, whichfork) + 2); 2477 2498 if (cur == NULL) 2478 2499 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2479 2500 else { ··· 2787 2810 xfs_iext_remove(ip, icur, state); 2788 2811 xfs_iext_prev(ifp, icur); 2789 2812 xfs_iext_update_extent(ip, state, icur, &left); 2813 + ifp->if_nextents--; 2790 2814 2791 - XFS_IFORK_NEXT_SET(ip, whichfork, 2792 - XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 2793 2815 if (cur == NULL) { 2794 2816 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 2795 2817 } else { ··· 2886 2910 * Insert a new entry. 2887 2911 */ 2888 2912 xfs_iext_insert(ip, icur, new, state); 2889 - XFS_IFORK_NEXT_SET(ip, whichfork, 2890 - XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2913 + ifp->if_nextents++; 2914 + 2891 2915 if (cur == NULL) { 2892 2916 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 2893 2917 } else { ··· 3867 3891 int flags) 3868 3892 { 3869 3893 struct xfs_mount *mp = ip->i_mount; 3870 - struct xfs_ifork *ifp; 3894 + int whichfork = xfs_bmapi_whichfork(flags); 3895 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 3871 3896 struct xfs_bmbt_irec got; 3872 3897 xfs_fileoff_t obno; 3873 3898 xfs_fileoff_t end; ··· 3876 3899 int error; 3877 3900 bool eof = false; 3878 3901 int n = 0; 3879 - int whichfork = xfs_bmapi_whichfork(flags); 3880 3902 3881 3903 ASSERT(*nmap >= 1); 3882 - ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE| 3883 - XFS_BMAPI_COWFORK))); 3904 + ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE))); 3884 3905 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)); 3885 3906 3886 - if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || 3887 - XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 3907 + if (WARN_ON_ONCE(!ifp)) 3888 3908 return -EFSCORRUPTED; 3889 - } 3909 + 3910 + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 3911 + XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) 3912 + return -EFSCORRUPTED; 3890 3913 3891 3914 if (XFS_FORCED_SHUTDOWN(mp)) 3892 3915 return -EIO; 3893 3916 3894 3917 XFS_STATS_INC(mp, xs_blk_mapr); 3895 - 3896 - ifp = XFS_IFORK_PTR(ip, whichfork); 3897 - if (!ifp) { 3898 - /* No CoW fork? Return a hole. */ 3899 - if (whichfork == XFS_COW_FORK) { 3900 - mval->br_startoff = bno; 3901 - mval->br_startblock = HOLESTARTBLOCK; 3902 - mval->br_blockcount = len; 3903 - mval->br_state = XFS_EXT_NORM; 3904 - *nmap = 1; 3905 - return 0; 3906 - } 3907 - 3908 - /* 3909 - * A missing attr ifork implies that the inode says we're in 3910 - * extents or btree format but failed to pass the inode fork 3911 - * verifier while trying to load it. Treat that as a file 3912 - * corruption too. 3913 - */ 3914 - #ifdef DEBUG 3915 - xfs_alert(mp, "%s: inode %llu missing fork %d", 3916 - __func__, ip->i_ino, whichfork); 3917 - #endif /* DEBUG */ 3918 - return -EFSCORRUPTED; 3919 - } 3920 3918 3921 3919 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 3922 3920 error = xfs_iread_extents(NULL, ip, whichfork); ··· 4145 4193 bma->got.br_blockcount = bma->length; 4146 4194 bma->got.br_state = XFS_EXT_NORM; 4147 4195 4148 - /* 4149 - * In the data fork, a wasdelay extent has been initialized, so 4150 - * shouldn't be flagged as unwritten. 4151 - * 4152 - * For the cow fork, however, we convert delalloc reservations 4153 - * (extents allocated for speculative preallocation) to 4154 - * allocated unwritten extents, and only convert the unwritten 4155 - * extents to real extents when we're about to write the data. 4156 - */ 4157 - if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) && 4158 - (bma->flags & XFS_BMAPI_PREALLOC)) 4196 + if (bma->flags & XFS_BMAPI_PREALLOC) 4159 4197 bma->got.br_state = XFS_EXT_UNWRITTEN; 4160 4198 4161 4199 if (bma->wasdel) ··· 4259 4317 struct xfs_inode *ip, 4260 4318 int fork) 4261 4319 { 4320 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, fork); 4321 + 4262 4322 if (tp && tp->t_firstblock != NULLFSBLOCK) 4263 4323 return 0; 4264 - if (XFS_IFORK_FORMAT(ip, fork) != XFS_DINODE_FMT_BTREE) 4324 + if (ifp->if_format != XFS_DINODE_FMT_BTREE) 4265 4325 return 1; 4266 - return be16_to_cpu(XFS_IFORK_PTR(ip, fork)->if_broot->bb_level) + 1; 4326 + return be16_to_cpu(ifp->if_broot->bb_level) + 1; 4267 4327 } 4268 4328 4269 4329 /* ··· 4280 4336 int whichfork, 4281 4337 int error) 4282 4338 { 4339 + struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); 4340 + 4283 4341 if ((bma->logflags & xfs_ilog_fext(whichfork)) && 4284 - XFS_IFORK_FORMAT(bma->ip, whichfork) != XFS_DINODE_FMT_EXTENTS) 4342 + ifp->if_format != XFS_DINODE_FMT_EXTENTS) 4285 4343 bma->logflags &= ~xfs_ilog_fext(whichfork); 4286 4344 else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) && 4287 - XFS_IFORK_FORMAT(bma->ip, whichfork) != XFS_DINODE_FMT_BTREE) 4345 + ifp->if_format != XFS_DINODE_FMT_BTREE) 4288 4346 bma->logflags &= ~xfs_ilog_fbroot(whichfork); 4289 4347 4290 4348 if (bma->logflags) ··· 4318 4372 .total = total, 4319 4373 }; 4320 4374 struct xfs_mount *mp = ip->i_mount; 4321 - struct xfs_ifork *ifp; 4375 + int whichfork = xfs_bmapi_whichfork(flags); 4376 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 4322 4377 xfs_fileoff_t end; /* end of mapped file region */ 4323 4378 bool eof = false; /* after the end of extents */ 4324 4379 int error; /* error return */ 4325 4380 int n; /* current extent index */ 4326 4381 xfs_fileoff_t obno; /* old block number (offset) */ 4327 - int whichfork; /* data or attr fork */ 4328 4382 4329 4383 #ifdef DEBUG 4330 4384 xfs_fileoff_t orig_bno; /* original block number value */ ··· 4339 4393 orig_mval = mval; 4340 4394 orig_nmap = *nmap; 4341 4395 #endif 4342 - whichfork = xfs_bmapi_whichfork(flags); 4343 4396 4344 4397 ASSERT(*nmap >= 1); 4345 4398 ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); 4346 4399 ASSERT(tp != NULL); 4347 4400 ASSERT(len > 0); 4348 - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); 4401 + ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL); 4349 4402 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 4350 4403 ASSERT(!(flags & XFS_BMAPI_REMAP)); 4351 4404 ··· 4360 4415 ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) != 4361 4416 (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)); 4362 4417 4363 - if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || 4418 + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 4364 4419 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 4365 4420 return -EFSCORRUPTED; 4366 4421 } 4367 4422 4368 4423 if (XFS_FORCED_SHUTDOWN(mp)) 4369 4424 return -EIO; 4370 - 4371 - ifp = XFS_IFORK_PTR(ip, whichfork); 4372 4425 4373 4426 XFS_STATS_INC(mp, xs_blk_mapw); 4374 4427 ··· 4477 4534 if (error) 4478 4535 goto error0; 4479 4536 4480 - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || 4481 - XFS_IFORK_NEXTENTS(ip, whichfork) > 4482 - XFS_IFORK_MAXEXT(ip, whichfork)); 4537 + ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE || 4538 + ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork)); 4483 4539 xfs_bmapi_finish(&bma, whichfork, 0); 4484 4540 xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval, 4485 4541 orig_nmap, *nmap); ··· 4553 4611 bma.offset = bma.got.br_startoff; 4554 4612 bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN); 4555 4613 bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork); 4614 + 4615 + /* 4616 + * When we're converting the delalloc reservations backing dirty pages 4617 + * in the page cache, we must be careful about how we create the new 4618 + * extents: 4619 + * 4620 + * New CoW fork extents are created unwritten, turned into real extents 4621 + * when we're about to write the data to disk, and mapped into the data 4622 + * fork after the write finishes. End of story. 4623 + * 4624 + * New data fork extents must be mapped in as unwritten and converted 4625 + * to real extents after the write succeeds to avoid exposing stale 4626 + * disk contents if we crash. 4627 + */ 4628 + bma.flags = XFS_BMAPI_PREALLOC; 4556 4629 if (whichfork == XFS_COW_FORK) 4557 - bma.flags = XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC; 4630 + bma.flags |= XFS_BMAPI_COWFORK; 4558 4631 4559 4632 if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev)) 4560 4633 bma.prev.br_startoff = NULLFILEOFF; ··· 4639 4682 ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) != 4640 4683 (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)); 4641 4684 4642 - if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || 4685 + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 4643 4686 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 4644 4687 return -EFSCORRUPTED; 4645 4688 } ··· 4683 4726 error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork); 4684 4727 4685 4728 error0: 4686 - if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) 4729 + if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS) 4687 4730 logflags &= ~XFS_ILOG_DEXT; 4688 - else if (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) 4731 + else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE) 4689 4732 logflags &= ~XFS_ILOG_DBROOT; 4690 4733 4691 4734 if (logflags) ··· 5035 5078 * conversion to btree format, since the transaction will be dirty then. 5036 5079 */ 5037 5080 if (tp->t_blk_res == 0 && 5038 - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && 5039 - XFS_IFORK_NEXTENTS(ip, whichfork) >= 5040 - XFS_IFORK_MAXEXT(ip, whichfork) && 5081 + ifp->if_format == XFS_DINODE_FMT_EXTENTS && 5082 + ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) && 5041 5083 del->br_startoff > got.br_startoff && del_endoff < got_endoff) 5042 5084 return -ENOSPC; 5043 5085 ··· 5088 5132 */ 5089 5133 xfs_iext_remove(ip, icur, state); 5090 5134 xfs_iext_prev(ifp, icur); 5091 - XFS_IFORK_NEXT_SET(ip, whichfork, 5092 - XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 5135 + ifp->if_nextents--; 5136 + 5093 5137 flags |= XFS_ILOG_CORE; 5094 5138 if (!cur) { 5095 5139 flags |= xfs_ilog_fext(whichfork); ··· 5197 5241 } 5198 5242 } else 5199 5243 flags |= xfs_ilog_fext(whichfork); 5200 - XFS_IFORK_NEXT_SET(ip, whichfork, 5201 - XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 5244 + 5245 + ifp->if_nextents++; 5202 5246 xfs_iext_next(ifp, icur); 5203 5247 xfs_iext_insert(ip, icur, &new, state); 5204 5248 break; ··· 5278 5322 whichfork = xfs_bmapi_whichfork(flags); 5279 5323 ASSERT(whichfork != XFS_COW_FORK); 5280 5324 ifp = XFS_IFORK_PTR(ip, whichfork); 5281 - if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork))) 5325 + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp))) 5282 5326 return -EFSCORRUPTED; 5283 5327 if (XFS_FORCED_SHUTDOWN(mp)) 5284 5328 return -EIO; ··· 5316 5360 5317 5361 logflags = 0; 5318 5362 if (ifp->if_flags & XFS_IFBROOT) { 5319 - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); 5363 + ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE); 5320 5364 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5321 5365 cur->bc_ino.flags = 0; 5322 5366 } else ··· 5561 5605 * logging the extent records if we've converted to btree format. 5562 5606 */ 5563 5607 if ((logflags & xfs_ilog_fext(whichfork)) && 5564 - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) 5608 + ifp->if_format != XFS_DINODE_FMT_EXTENTS) 5565 5609 logflags &= ~xfs_ilog_fext(whichfork); 5566 5610 else if ((logflags & xfs_ilog_fbroot(whichfork)) && 5567 - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) 5611 + ifp->if_format != XFS_DINODE_FMT_BTREE) 5568 5612 logflags &= ~xfs_ilog_fbroot(whichfork); 5569 5613 /* 5570 5614 * Log inode even in the error case, if the transaction ··· 5646 5690 struct xfs_btree_cur *cur, 5647 5691 int *logflags) /* output */ 5648 5692 { 5693 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 5649 5694 struct xfs_bmbt_irec new; 5650 5695 xfs_filblks_t blockcount; 5651 5696 int error, i; ··· 5665 5708 * Update the on-disk extent count, the btree if necessary and log the 5666 5709 * inode. 5667 5710 */ 5668 - XFS_IFORK_NEXT_SET(ip, whichfork, 5669 - XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 5711 + ifp->if_nextents--; 5670 5712 *logflags |= XFS_ILOG_CORE; 5671 5713 if (!cur) { 5672 5714 *logflags |= XFS_ILOG_DEXT; ··· 5703 5747 5704 5748 done: 5705 5749 xfs_iext_remove(ip, icur, 0); 5706 - xfs_iext_prev(XFS_IFORK_PTR(ip, whichfork), icur); 5750 + xfs_iext_prev(ifp, icur); 5707 5751 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur, 5708 5752 &new); 5709 5753 ··· 5775 5819 int error = 0; 5776 5820 int logflags = 0; 5777 5821 5778 - if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || 5822 + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 5779 5823 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 5780 5824 return -EFSCORRUPTED; 5781 5825 } ··· 5892 5936 int error = 0; 5893 5937 int logflags = 0; 5894 5938 5895 - if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || 5939 + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 5896 5940 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 5897 5941 return -EFSCORRUPTED; 5898 5942 } ··· 5986 6030 xfs_fileoff_t split_fsb) 5987 6031 { 5988 6032 int whichfork = XFS_DATA_FORK; 6033 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 5989 6034 struct xfs_btree_cur *cur = NULL; 5990 6035 struct xfs_bmbt_irec got; 5991 6036 struct xfs_bmbt_irec new; /* split extent */ 5992 6037 struct xfs_mount *mp = ip->i_mount; 5993 - struct xfs_ifork *ifp; 5994 6038 xfs_fsblock_t gotblkcnt; /* new block count for got */ 5995 6039 struct xfs_iext_cursor icur; 5996 6040 int error = 0; 5997 6041 int logflags = 0; 5998 6042 int i = 0; 5999 6043 6000 - if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || 6044 + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 6001 6045 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 6002 6046 return -EFSCORRUPTED; 6003 6047 } ··· 6005 6049 if (XFS_FORCED_SHUTDOWN(mp)) 6006 6050 return -EIO; 6007 6051 6008 - ifp = XFS_IFORK_PTR(ip, whichfork); 6009 6052 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 6010 6053 /* Read in all the extents */ 6011 6054 error = xfs_iread_extents(tp, ip, whichfork); ··· 6052 6097 /* Add new extent */ 6053 6098 xfs_iext_next(ifp, &icur); 6054 6099 xfs_iext_insert(ip, &icur, &new, 0); 6055 - XFS_IFORK_NEXT_SET(ip, whichfork, 6056 - XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 6100 + ifp->if_nextents++; 6057 6101 6058 6102 if (cur) { 6059 6103 error = xfs_bmbt_lookup_eq(cur, &new, &i);

+1 -1

fs/xfs/libxfs/xfs_bmap.h

+1 -4

fs/xfs/libxfs/xfs_bmap_btree.c

··· 636 636 637 637 ASSERT(tp || buffer_list); 638 638 ASSERT(!(tp && buffer_list)); 639 - if (whichfork == XFS_DATA_FORK) 640 - ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_BTREE); 641 - else 642 - ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE); 639 + ASSERT(XFS_IFORK_PTR(ip, whichfork)->if_format == XFS_DINODE_FMT_BTREE); 643 640 644 641 cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); 645 642 if (!cur)

+1 -1

fs/xfs/libxfs/xfs_bmap_btree.h

+1 -1

fs/xfs/libxfs/xfs_btree.h

+1 -1

fs/xfs/libxfs/xfs_da_btree.h

+1 -1

fs/xfs/libxfs/xfs_da_format.h

+81 -81

fs/xfs/libxfs/xfs_defer.c

··· 178 178 [XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type, 179 179 }; 180 180 181 + static void 182 + xfs_defer_create_intent( 183 + struct xfs_trans *tp, 184 + struct xfs_defer_pending *dfp, 185 + bool sort) 186 + { 187 + const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type]; 188 + 189 + dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work, 190 + dfp->dfp_count, sort); 191 + } 192 + 181 193 /* 182 194 * For each pending item in the intake list, log its intent item and the 183 195 * associated extents, then add the entire intake list to the end of ··· 199 187 xfs_defer_create_intents( 200 188 struct xfs_trans *tp) 201 189 { 202 - struct list_head *li; 203 190 struct xfs_defer_pending *dfp; 204 - const struct xfs_defer_op_type *ops; 205 191 206 192 list_for_each_entry(dfp, &tp->t_dfops, dfp_list) { 207 - ops = defer_op_types[dfp->dfp_type]; 208 - dfp->dfp_intent = ops->create_intent(tp, dfp->dfp_count); 209 193 trace_xfs_defer_create_intent(tp->t_mountp, dfp); 210 - list_sort(tp->t_mountp, &dfp->dfp_work, ops->diff_items); 211 - list_for_each(li, &dfp->dfp_work) 212 - ops->log_item(tp, dfp->dfp_intent, li); 194 + xfs_defer_create_intent(tp, dfp, true); 213 195 } 214 196 } 215 197 ··· 240 234 struct xfs_log_item *lip; 241 235 struct xfs_buf *bplist[XFS_DEFER_OPS_NR_BUFS]; 242 236 struct xfs_inode *iplist[XFS_DEFER_OPS_NR_INODES]; 237 + unsigned int ordered = 0; /* bitmap */ 243 238 int bpcount = 0, ipcount = 0; 244 239 int i; 245 240 int error; 241 + 242 + BUILD_BUG_ON(NBBY * sizeof(ordered) < XFS_DEFER_OPS_NR_BUFS); 246 243 247 244 list_for_each_entry(lip, &tp->t_items, li_trans) { 248 245 switch (lip->li_type) { ··· 257 248 ASSERT(0); 258 249 return -EFSCORRUPTED; 259 250 } 260 - xfs_trans_dirty_buf(tp, bli->bli_buf); 251 + if (bli->bli_flags & XFS_BLI_ORDERED) 252 + ordered |= (1U << bpcount); 253 + else 254 + xfs_trans_dirty_buf(tp, bli->bli_buf); 261 255 bplist[bpcount++] = bli->bli_buf; 262 256 } 263 257 break; ··· 301 289 /* Rejoin the buffers and dirty them so the log moves forward. */ 302 290 for (i = 0; i < bpcount; i++) { 303 291 xfs_trans_bjoin(tp, bplist[i]); 292 + if (ordered & (1U << i)) 293 + xfs_trans_ordered_buf(tp, bplist[i]); 304 294 xfs_trans_bhold(tp, bplist[i]); 305 295 } 306 296 ··· 360 346 } 361 347 362 348 /* 349 + * Log an intent-done item for the first pending intent, and finish the work 350 + * items. 351 + */ 352 + static int 353 + xfs_defer_finish_one( 354 + struct xfs_trans *tp, 355 + struct xfs_defer_pending *dfp) 356 + { 357 + const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type]; 358 + struct xfs_btree_cur *state = NULL; 359 + struct list_head *li, *n; 360 + int error; 361 + 362 + trace_xfs_defer_pending_finish(tp->t_mountp, dfp); 363 + 364 + dfp->dfp_done = ops->create_done(tp, dfp->dfp_intent, dfp->dfp_count); 365 + list_for_each_safe(li, n, &dfp->dfp_work) { 366 + list_del(li); 367 + dfp->dfp_count--; 368 + error = ops->finish_item(tp, dfp->dfp_done, li, &state); 369 + if (error == -EAGAIN) { 370 + /* 371 + * Caller wants a fresh transaction; put the work item 372 + * back on the list and log a new log intent item to 373 + * replace the old one. See "Requesting a Fresh 374 + * Transaction while Finishing Deferred Work" above. 375 + */ 376 + list_add(li, &dfp->dfp_work); 377 + dfp->dfp_count++; 378 + dfp->dfp_done = NULL; 379 + xfs_defer_create_intent(tp, dfp, false); 380 + } 381 + 382 + if (error) 383 + goto out; 384 + } 385 + 386 + /* Done with the dfp, free it. */ 387 + list_del(&dfp->dfp_list); 388 + kmem_free(dfp); 389 + out: 390 + if (ops->finish_cleanup) 391 + ops->finish_cleanup(tp, state, error); 392 + return error; 393 + } 394 + 395 + /* 363 396 * Finish all the pending work. This involves logging intent items for 364 397 * any work items that wandered in since the last transaction roll (if 365 398 * one has even happened), rolling the transaction, and finishing the ··· 419 358 struct xfs_trans **tp) 420 359 { 421 360 struct xfs_defer_pending *dfp; 422 - struct list_head *li; 423 - struct list_head *n; 424 - void *state; 425 361 int error = 0; 426 - const struct xfs_defer_op_type *ops; 427 362 LIST_HEAD(dop_pending); 428 363 429 364 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); ··· 428 371 429 372 /* Until we run out of pending work to finish... */ 430 373 while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) { 431 - /* log intents and pull in intake items */ 432 374 xfs_defer_create_intents(*tp); 433 375 list_splice_tail_init(&(*tp)->t_dfops, &dop_pending); 434 376 435 - /* 436 - * Roll the transaction. 437 - */ 438 377 error = xfs_defer_trans_roll(tp); 439 378 if (error) 440 - goto out; 379 + goto out_shutdown; 441 380 442 - /* Log an intent-done item for the first pending item. */ 443 381 dfp = list_first_entry(&dop_pending, struct xfs_defer_pending, 444 382 dfp_list); 445 - ops = defer_op_types[dfp->dfp_type]; 446 - trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp); 447 - dfp->dfp_done = ops->create_done(*tp, dfp->dfp_intent, 448 - dfp->dfp_count); 449 - 450 - /* Finish the work items. */ 451 - state = NULL; 452 - list_for_each_safe(li, n, &dfp->dfp_work) { 453 - list_del(li); 454 - dfp->dfp_count--; 455 - error = ops->finish_item(*tp, li, dfp->dfp_done, 456 - &state); 457 - if (error == -EAGAIN) { 458 - /* 459 - * Caller wants a fresh transaction; 460 - * put the work item back on the list 461 - * and jump out. 462 - */ 463 - list_add(li, &dfp->dfp_work); 464 - dfp->dfp_count++; 465 - break; 466 - } else if (error) { 467 - /* 468 - * Clean up after ourselves and jump out. 469 - * xfs_defer_cancel will take care of freeing 470 - * all these lists and stuff. 471 - */ 472 - if (ops->finish_cleanup) 473 - ops->finish_cleanup(*tp, state, error); 474 - goto out; 475 - } 476 - } 477 - if (error == -EAGAIN) { 478 - /* 479 - * Caller wants a fresh transaction, so log a 480 - * new log intent item to replace the old one 481 - * and roll the transaction. See "Requesting 482 - * a Fresh Transaction while Finishing 483 - * Deferred Work" above. 484 - */ 485 - dfp->dfp_intent = ops->create_intent(*tp, 486 - dfp->dfp_count); 487 - dfp->dfp_done = NULL; 488 - list_for_each(li, &dfp->dfp_work) 489 - ops->log_item(*tp, dfp->dfp_intent, li); 490 - } else { 491 - /* Done with the dfp, free it. */ 492 - list_del(&dfp->dfp_list); 493 - kmem_free(dfp); 494 - } 495 - 496 - if (ops->finish_cleanup) 497 - ops->finish_cleanup(*tp, state, error); 498 - } 499 - 500 - out: 501 - if (error) { 502 - xfs_defer_trans_abort(*tp, &dop_pending); 503 - xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE); 504 - trace_xfs_defer_finish_error(*tp, error); 505 - xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending); 506 - xfs_defer_cancel(*tp); 507 - return error; 383 + error = xfs_defer_finish_one(*tp, dfp); 384 + if (error && error != -EAGAIN) 385 + goto out_shutdown; 508 386 } 509 387 510 388 trace_xfs_defer_finish_done(*tp, _RET_IP_); 511 389 return 0; 390 + 391 + out_shutdown: 392 + xfs_defer_trans_abort(*tp, &dop_pending); 393 + xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE); 394 + trace_xfs_defer_finish_error(*tp, error); 395 + xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending); 396 + xfs_defer_cancel(*tp); 397 + return error; 512 398 } 513 399 514 400 int

+14 -12

fs/xfs/libxfs/xfs_defer.h

··· 1 - // SPDX-License-Identifier: GPL-2.0+ 1 + /* SPDX-License-Identifier: GPL-2.0+ */ 2 2 /* 3 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> ··· 6 6 #ifndef __XFS_DEFER_H__ 7 7 #define __XFS_DEFER_H__ 8 8 9 + struct xfs_btree_cur; 9 10 struct xfs_defer_op_type; 10 11 11 12 /* ··· 29 28 struct xfs_defer_pending { 30 29 struct list_head dfp_list; /* pending items */ 31 30 struct list_head dfp_work; /* work items */ 32 - void *dfp_intent; /* log intent item */ 33 - void *dfp_done; /* log done item */ 31 + struct xfs_log_item *dfp_intent; /* log intent item */ 32 + struct xfs_log_item *dfp_done; /* log done item */ 34 33 unsigned int dfp_count; /* # extent items */ 35 34 enum xfs_defer_ops_type dfp_type; 36 35 }; ··· 44 43 45 44 /* Description of a deferred type. */ 46 45 struct xfs_defer_op_type { 47 - void (*abort_intent)(void *); 48 - void *(*create_done)(struct xfs_trans *, void *, unsigned int); 49 - int (*finish_item)(struct xfs_trans *, struct list_head *, void *, 50 - void **); 51 - void (*finish_cleanup)(struct xfs_trans *, void *, int); 52 - void (*cancel_item)(struct list_head *); 53 - int (*diff_items)(void *, struct list_head *, struct list_head *); 54 - void *(*create_intent)(struct xfs_trans *, uint); 55 - void (*log_item)(struct xfs_trans *, void *, struct list_head *); 46 + struct xfs_log_item *(*create_intent)(struct xfs_trans *tp, 47 + struct list_head *items, unsigned int count, bool sort); 48 + void (*abort_intent)(struct xfs_log_item *intent); 49 + struct xfs_log_item *(*create_done)(struct xfs_trans *tp, 50 + struct xfs_log_item *intent, unsigned int count); 51 + int (*finish_item)(struct xfs_trans *tp, struct xfs_log_item *done, 52 + struct list_head *item, struct xfs_btree_cur **state); 53 + void (*finish_cleanup)(struct xfs_trans *tp, 54 + struct xfs_btree_cur *state, int error); 55 + void (*cancel_item)(struct list_head *item); 56 56 unsigned int max_items; 57 57 }; 58 58

+4 -4

fs/xfs/libxfs/xfs_dir2.c

··· 278 278 if (!inum) 279 279 args->op_flags |= XFS_DA_OP_JUSTCHECK; 280 280 281 - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { 281 + if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) { 282 282 rval = xfs_dir2_sf_addname(args); 283 283 goto out_free; 284 284 } ··· 373 373 args->op_flags |= XFS_DA_OP_CILOOKUP; 374 374 375 375 lock_mode = xfs_ilock_data_map_shared(dp); 376 - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { 376 + if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) { 377 377 rval = xfs_dir2_sf_lookup(args); 378 378 goto out_check_rval; 379 379 } ··· 443 443 args->whichfork = XFS_DATA_FORK; 444 444 args->trans = tp; 445 445 446 - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { 446 + if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) { 447 447 rval = xfs_dir2_sf_removename(args); 448 448 goto out_free; 449 449 } ··· 504 504 args->whichfork = XFS_DATA_FORK; 505 505 args->trans = tp; 506 506 507 - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { 507 + if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) { 508 508 rval = xfs_dir2_sf_replace(args); 509 509 goto out_free; 510 510 }

+1 -1

fs/xfs/libxfs/xfs_dir2.h

+1 -1

fs/xfs/libxfs/xfs_dir2_block.c

··· 1104 1104 ASSERT(ifp->if_bytes == dp->i_d.di_size); 1105 1105 ASSERT(ifp->if_u1.if_data != NULL); 1106 1106 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count)); 1107 - ASSERT(dp->i_d.di_nextents == 0); 1107 + ASSERT(dp->i_df.if_nextents == 0); 1108 1108 1109 1109 /* 1110 1110 * Copy the directory into a temporary buffer.

+1 -1

fs/xfs/libxfs/xfs_dir2_priv.h

+6 -7

fs/xfs/libxfs/xfs_dir2_sf.c

··· 343 343 */ 344 344 ASSERT(dp->i_df.if_bytes == 0); 345 345 xfs_init_local_fork(dp, XFS_DATA_FORK, sfp, size); 346 - dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; 346 + dp->i_df.if_format = XFS_DINODE_FMT_LOCAL; 347 347 dp->i_d.di_size = size; 348 348 349 349 logflags |= XFS_ILOG_DDATA; ··· 710 710 struct xfs_inode *ip) 711 711 { 712 712 struct xfs_mount *mp = ip->i_mount; 713 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 713 714 struct xfs_dir2_sf_hdr *sfp; 714 715 struct xfs_dir2_sf_entry *sfep; 715 716 struct xfs_dir2_sf_entry *next_sfep; 716 717 char *endp; 717 - struct xfs_ifork *ifp; 718 718 xfs_ino_t ino; 719 719 int i; 720 720 int i8count; ··· 723 723 int error; 724 724 uint8_t filetype; 725 725 726 - ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL); 726 + ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); 727 727 728 - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 729 728 sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data; 730 729 size = ifp->if_bytes; 731 730 ··· 826 827 * If it's currently a zero-length extent file, 827 828 * convert it to local format. 828 829 */ 829 - if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) { 830 + if (dp->i_df.if_format == XFS_DINODE_FMT_EXTENTS) { 830 831 dp->i_df.if_flags &= ~XFS_IFEXTENTS; /* just in case */ 831 - dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; 832 + dp->i_df.if_format = XFS_DINODE_FMT_LOCAL; 832 833 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); 833 834 dp->i_df.if_flags |= XFS_IFINLINE; 834 835 } ··· 1026 1027 int newsize; 1027 1028 struct xfs_dir2_sf_hdr *sfp; 1028 1029 1029 - if (dp->i_d.di_format != XFS_DINODE_FMT_LOCAL) 1030 + if (dp->i_df.if_format != XFS_DINODE_FMT_LOCAL) 1030 1031 return false; 1031 1032 1032 1033 sfp = (struct xfs_dir2_sf_hdr *)dp->i_df.if_u1.if_data;

+4 -2

fs/xfs/libxfs/xfs_errortag.h

··· 1 - // SPDX-License-Identifier: GPL-2.0+ 1 + /* SPDX-License-Identifier: GPL-2.0+ */ 2 2 /* 3 3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 4 4 * Copyright (C) 2017 Oracle. ··· 55 55 #define XFS_ERRTAG_FORCE_SCRUB_REPAIR 32 56 56 #define XFS_ERRTAG_FORCE_SUMMARY_RECALC 33 57 57 #define XFS_ERRTAG_IUNLINK_FALLBACK 34 58 - #define XFS_ERRTAG_MAX 35 58 + #define XFS_ERRTAG_BUF_IOERROR 35 59 + #define XFS_ERRTAG_MAX 36 59 60 60 61 /* 61 62 * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. ··· 96 95 #define XFS_RANDOM_FORCE_SCRUB_REPAIR 1 97 96 #define XFS_RANDOM_FORCE_SUMMARY_RECALC 1 98 97 #define XFS_RANDOM_IUNLINK_FALLBACK (XFS_RANDOM_DEFAULT/10) 98 + #define XFS_RANDOM_BUF_IOERROR XFS_RANDOM_DEFAULT 99 99 100 100 #endif /* __XFS_ERRORTAG_H_ */

+4 -5

fs/xfs/libxfs/xfs_format.h

··· 1 - // SPDX-License-Identifier: GPL-2.0 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 2 /* 3 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 4 * All Rights Reserved. ··· 964 964 /* 965 965 * Inode data & attribute fork sizes, per inode. 966 966 */ 967 - #define XFS_DFORK_Q(dip) ((dip)->di_forkoff != 0) 968 967 #define XFS_DFORK_BOFF(dip) ((int)((dip)->di_forkoff << 3)) 969 968 970 969 #define XFS_DFORK_DSIZE(dip,mp) \ 971 - (XFS_DFORK_Q(dip) ? XFS_DFORK_BOFF(dip) : XFS_LITINO(mp)) 970 + ((dip)->di_forkoff ? XFS_DFORK_BOFF(dip) : XFS_LITINO(mp)) 972 971 #define XFS_DFORK_ASIZE(dip,mp) \ 973 - (XFS_DFORK_Q(dip) ? XFS_LITINO(mp) - XFS_DFORK_BOFF(dip) : 0) 972 + ((dip)->di_forkoff ? XFS_LITINO(mp) - XFS_DFORK_BOFF(dip) : 0) 974 973 #define XFS_DFORK_SIZE(dip,mp,w) \ 975 974 ((w) == XFS_DATA_FORK ? \ 976 975 XFS_DFORK_DSIZE(dip, mp) : \ ··· 1680 1681 1681 1682 struct xfs_acl { 1682 1683 __be32 acl_cnt; 1683 - struct xfs_acl_entry acl_entry[0]; 1684 + struct xfs_acl_entry acl_entry[]; 1684 1685 }; 1685 1686 1686 1687 /*

+1 -1

fs/xfs/libxfs/xfs_fs.h

+1 -1

fs/xfs/libxfs/xfs_health.h

··· 1 - // SPDX-License-Identifier: GPL-2.0+ 1 + /* SPDX-License-Identifier: GPL-2.0+ */ 2 2 /* 3 3 * Copyright (C) 2019 Oracle. All Rights Reserved. 4 4 * Author: Darrick J. Wong <darrick.wong@oracle.com>

+48 -138

fs/xfs/libxfs/xfs_inode_buf.c

··· 161 161 struct xfs_imap *imap, 162 162 struct xfs_dinode **dipp, 163 163 struct xfs_buf **bpp, 164 - uint buf_flags, 165 - uint iget_flags) 164 + uint buf_flags) 166 165 { 167 166 struct xfs_buf *bp; 168 167 int error; ··· 171 172 (int)imap->im_len, buf_flags, &bp, 172 173 &xfs_inode_buf_ops); 173 174 if (error) { 174 - if (error == -EAGAIN) { 175 - ASSERT(buf_flags & XBF_TRYLOCK); 176 - return error; 177 - } 178 - xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.", 179 - __func__, error); 175 + ASSERT(error != -EAGAIN || (buf_flags & XBF_TRYLOCK)); 180 176 return error; 181 177 } 182 178 ··· 180 186 return 0; 181 187 } 182 188 183 - void 189 + int 184 190 xfs_inode_from_disk( 185 191 struct xfs_inode *ip, 186 192 struct xfs_dinode *from) 187 193 { 188 194 struct xfs_icdinode *to = &ip->i_d; 189 195 struct inode *inode = VFS_I(ip); 196 + int error; 197 + xfs_failaddr_t fa; 198 + 199 + ASSERT(ip->i_cowfp == NULL); 200 + ASSERT(ip->i_afp == NULL); 201 + 202 + fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from); 203 + if (fa) { 204 + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", from, 205 + sizeof(*from), fa); 206 + return -EFSCORRUPTED; 207 + } 208 + 209 + /* 210 + * First get the permanent information that is needed to allocate an 211 + * inode. If the inode is unused, mode is zero and we shouldn't mess 212 + * with the unitialized part of it. 213 + */ 214 + to->di_flushiter = be16_to_cpu(from->di_flushiter); 215 + inode->i_generation = be32_to_cpu(from->di_gen); 216 + inode->i_mode = be16_to_cpu(from->di_mode); 217 + if (!inode->i_mode) 218 + return 0; 190 219 191 220 /* 192 221 * Convert v1 inodes immediately to v2 inode format as this is the ··· 225 208 be16_to_cpu(from->di_projid_lo); 226 209 } 227 210 228 - to->di_format = from->di_format; 229 211 i_uid_write(inode, be32_to_cpu(from->di_uid)); 230 212 i_gid_write(inode, be32_to_cpu(from->di_gid)); 231 - to->di_flushiter = be16_to_cpu(from->di_flushiter); 232 213 233 214 /* 234 215 * Time is signed, so need to convert to signed 32 bit before ··· 240 225 inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec); 241 226 inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec); 242 227 inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec); 243 - inode->i_generation = be32_to_cpu(from->di_gen); 244 - inode->i_mode = be16_to_cpu(from->di_mode); 245 228 246 229 to->di_size = be64_to_cpu(from->di_size); 247 230 to->di_nblocks = be64_to_cpu(from->di_nblocks); 248 231 to->di_extsize = be32_to_cpu(from->di_extsize); 249 - to->di_nextents = be32_to_cpu(from->di_nextents); 250 - to->di_anextents = be16_to_cpu(from->di_anextents); 251 232 to->di_forkoff = from->di_forkoff; 252 - to->di_aformat = from->di_aformat; 253 233 to->di_dmevmask = be32_to_cpu(from->di_dmevmask); 254 234 to->di_dmstate = be16_to_cpu(from->di_dmstate); 255 235 to->di_flags = be16_to_cpu(from->di_flags); ··· 257 247 to->di_flags2 = be64_to_cpu(from->di_flags2); 258 248 to->di_cowextsize = be32_to_cpu(from->di_cowextsize); 259 249 } 250 + 251 + error = xfs_iformat_data_fork(ip, from); 252 + if (error) 253 + return error; 254 + if (from->di_forkoff) { 255 + error = xfs_iformat_attr_fork(ip, from); 256 + if (error) 257 + goto out_destroy_data_fork; 258 + } 259 + if (xfs_is_reflink_inode(ip)) 260 + xfs_ifork_init_cow(ip); 261 + return 0; 262 + 263 + out_destroy_data_fork: 264 + xfs_idestroy_fork(&ip->i_df); 265 + return error; 260 266 } 261 267 262 268 void ··· 287 261 to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 288 262 to->di_onlink = 0; 289 263 290 - to->di_format = from->di_format; 264 + to->di_format = xfs_ifork_format(&ip->i_df); 291 265 to->di_uid = cpu_to_be32(i_uid_read(inode)); 292 266 to->di_gid = cpu_to_be32(i_gid_read(inode)); 293 267 to->di_projid_lo = cpu_to_be16(from->di_projid & 0xffff); ··· 307 281 to->di_size = cpu_to_be64(from->di_size); 308 282 to->di_nblocks = cpu_to_be64(from->di_nblocks); 309 283 to->di_extsize = cpu_to_be32(from->di_extsize); 310 - to->di_nextents = cpu_to_be32(from->di_nextents); 311 - to->di_anextents = cpu_to_be16(from->di_anextents); 284 + to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); 285 + to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp)); 312 286 to->di_forkoff = from->di_forkoff; 313 - to->di_aformat = from->di_aformat; 287 + to->di_aformat = xfs_ifork_format(ip->i_afp); 314 288 to->di_dmevmask = cpu_to_be32(from->di_dmevmask); 315 289 to->di_dmstate = cpu_to_be16(from->di_dmstate); 316 290 to->di_flags = cpu_to_be16(from->di_flags); ··· 431 405 struct xfs_dinode *dip, 432 406 struct xfs_mount *mp) 433 407 { 434 - if (!XFS_DFORK_Q(dip)) 408 + if (!dip->di_forkoff) 435 409 return NULL; 436 410 437 411 switch (dip->di_format) { ··· 534 508 return __this_address; 535 509 } 536 510 537 - if (XFS_DFORK_Q(dip)) { 511 + if (dip->di_forkoff) { 538 512 fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK); 539 513 if (fa) 540 514 return fa; ··· 608 582 crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize, 609 583 XFS_DINODE_CRC_OFF); 610 584 dip->di_crc = xfs_end_cksum(crc); 611 - } 612 - 613 - /* 614 - * Read the disk inode attributes into the in-core inode structure. 615 - * 616 - * For version 5 superblocks, if we are initialising a new inode and we are not 617 - * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new 618 - * inode core with a random generation number. If we are keeping inodes around, 619 - * we need to read the inode cluster to get the existing generation number off 620 - * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode 621 - * format) then log recovery is dependent on the di_flushiter field being 622 - * initialised from the current on-disk value and hence we must also read the 623 - * inode off disk. 624 - */ 625 - int 626 - xfs_iread( 627 - xfs_mount_t *mp, 628 - xfs_trans_t *tp, 629 - xfs_inode_t *ip, 630 - uint iget_flags) 631 - { 632 - xfs_buf_t *bp; 633 - xfs_dinode_t *dip; 634 - xfs_failaddr_t fa; 635 - int error; 636 - 637 - /* 638 - * Fill in the location information in the in-core inode. 639 - */ 640 - error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); 641 - if (error) 642 - return error; 643 - 644 - /* shortcut IO on inode allocation if possible */ 645 - if ((iget_flags & XFS_IGET_CREATE) && 646 - xfs_sb_version_has_v3inode(&mp->m_sb) && 647 - !(mp->m_flags & XFS_MOUNT_IKEEP)) { 648 - VFS_I(ip)->i_generation = prandom_u32(); 649 - return 0; 650 - } 651 - 652 - /* 653 - * Get pointers to the on-disk inode and the buffer containing it. 654 - */ 655 - error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags); 656 - if (error) 657 - return error; 658 - 659 - /* even unallocated inodes are verified */ 660 - fa = xfs_dinode_verify(mp, ip->i_ino, dip); 661 - if (fa) { 662 - xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip, 663 - sizeof(*dip), fa); 664 - error = -EFSCORRUPTED; 665 - goto out_brelse; 666 - } 667 - 668 - /* 669 - * If the on-disk inode is already linked to a directory 670 - * entry, copy all of the inode into the in-core inode. 671 - * xfs_iformat_fork() handles copying in the inode format 672 - * specific information. 673 - * Otherwise, just get the truly permanent information. 674 - */ 675 - if (dip->di_mode) { 676 - xfs_inode_from_disk(ip, dip); 677 - error = xfs_iformat_fork(ip, dip); 678 - if (error) { 679 - #ifdef DEBUG 680 - xfs_alert(mp, "%s: xfs_iformat() returned error %d", 681 - __func__, error); 682 - #endif /* DEBUG */ 683 - goto out_brelse; 684 - } 685 - } else { 686 - /* 687 - * Partial initialisation of the in-core inode. Just the bits 688 - * that xfs_ialloc won't overwrite or relies on being correct. 689 - */ 690 - VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen); 691 - ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); 692 - 693 - /* 694 - * Make sure to pull in the mode here as well in 695 - * case the inode is released without being used. 696 - * This ensures that xfs_inactive() will see that 697 - * the inode is already free and not try to mess 698 - * with the uninitialized part of it. 699 - */ 700 - VFS_I(ip)->i_mode = 0; 701 - } 702 - 703 - ip->i_delayed_blks = 0; 704 - 705 - /* 706 - * Mark the buffer containing the inode as something to keep 707 - * around for a while. This helps to keep recently accessed 708 - * meta-data in-core longer. 709 - */ 710 - xfs_buf_set_ref(bp, XFS_INO_REF); 711 - 712 - /* 713 - * Use xfs_trans_brelse() to release the buffer containing the on-disk 714 - * inode, because it was acquired with xfs_trans_read_buf() in 715 - * xfs_imap_to_bp() above. If tp is NULL, this is just a normal 716 - * brelse(). If we're within a transaction, then xfs_trans_brelse() 717 - * will only release the buffer if it is not dirty within the 718 - * transaction. It will be OK to release the buffer in this case, 719 - * because inodes on disk are never destroyed and we will be locking the 720 - * new in-core inode before putting it in the cache where other 721 - * processes can find it. Thus we don't have to worry about the inode 722 - * being changed just because we released the buffer. 723 - */ 724 - out_brelse: 725 - xfs_trans_brelse(tp, bp); 726 - return error; 727 585 } 728 586 729 587 /*

+2 -8

fs/xfs/libxfs/xfs_inode_buf.h

··· 16 16 * format specific structures at the appropriate time. 17 17 */ 18 18 struct xfs_icdinode { 19 - int8_t di_format; /* format of di_c data */ 20 19 uint16_t di_flushiter; /* incremented on flush */ 21 20 uint32_t di_projid; /* owner's project id */ 22 21 xfs_fsize_t di_size; /* number of bytes in file */ 23 22 xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */ 24 23 xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ 25 - xfs_extnum_t di_nextents; /* number of extents in data fork */ 26 - xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/ 27 24 uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */ 28 - int8_t di_aformat; /* format of attr fork's data */ 29 25 uint32_t di_dmevmask; /* DMIG event mask */ 30 26 uint16_t di_dmstate; /* DMIG state info */ 31 27 uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ ··· 44 48 45 49 int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *, 46 50 struct xfs_imap *, struct xfs_dinode **, 47 - struct xfs_buf **, uint, uint); 48 - int xfs_iread(struct xfs_mount *, struct xfs_trans *, 49 - struct xfs_inode *, uint); 51 + struct xfs_buf **, uint); 50 52 void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *); 51 53 void xfs_inode_to_disk(struct xfs_inode *ip, struct xfs_dinode *to, 52 54 xfs_lsn_t lsn); 53 - void xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from); 55 + int xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from); 54 56 void xfs_log_dinode_to_disk(struct xfs_log_dinode *from, 55 57 struct xfs_dinode *to); 56 58

+156 -164

fs/xfs/libxfs/xfs_inode_fork.c

··· 26 26 27 27 kmem_zone_t *xfs_ifork_zone; 28 28 29 - STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); 30 - STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); 31 - STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); 32 - 33 - /* 34 - * Copy inode type and data and attr format specific information from the 35 - * on-disk inode to the in-core inode and fork structures. For fifos, devices, 36 - * and sockets this means set i_rdev to the proper value. For files, 37 - * directories, and symlinks this means to bring in the in-line data or extent 38 - * pointers as well as the attribute fork. For a fork in B-tree format, only 39 - * the root is immediately brought in-core. The rest will be read in later when 40 - * first referenced (see xfs_iread_extents()). 41 - */ 42 - int 43 - xfs_iformat_fork( 44 - struct xfs_inode *ip, 45 - struct xfs_dinode *dip) 46 - { 47 - struct inode *inode = VFS_I(ip); 48 - struct xfs_attr_shortform *atp; 49 - int size; 50 - int error = 0; 51 - xfs_fsize_t di_size; 52 - 53 - switch (inode->i_mode & S_IFMT) { 54 - case S_IFIFO: 55 - case S_IFCHR: 56 - case S_IFBLK: 57 - case S_IFSOCK: 58 - ip->i_d.di_size = 0; 59 - inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip)); 60 - break; 61 - 62 - case S_IFREG: 63 - case S_IFLNK: 64 - case S_IFDIR: 65 - switch (dip->di_format) { 66 - case XFS_DINODE_FMT_LOCAL: 67 - di_size = be64_to_cpu(dip->di_size); 68 - size = (int)di_size; 69 - error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); 70 - break; 71 - case XFS_DINODE_FMT_EXTENTS: 72 - error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); 73 - break; 74 - case XFS_DINODE_FMT_BTREE: 75 - error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); 76 - break; 77 - default: 78 - xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, 79 - dip, sizeof(*dip), __this_address); 80 - return -EFSCORRUPTED; 81 - } 82 - break; 83 - 84 - default: 85 - xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, 86 - sizeof(*dip), __this_address); 87 - return -EFSCORRUPTED; 88 - } 89 - if (error) 90 - return error; 91 - 92 - if (xfs_is_reflink_inode(ip)) { 93 - ASSERT(ip->i_cowfp == NULL); 94 - xfs_ifork_init_cow(ip); 95 - } 96 - 97 - if (!XFS_DFORK_Q(dip)) 98 - return 0; 99 - 100 - ASSERT(ip->i_afp == NULL); 101 - ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_NOFS); 102 - 103 - switch (dip->di_aformat) { 104 - case XFS_DINODE_FMT_LOCAL: 105 - atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); 106 - size = be16_to_cpu(atp->hdr.totsize); 107 - 108 - error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); 109 - break; 110 - case XFS_DINODE_FMT_EXTENTS: 111 - error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); 112 - break; 113 - case XFS_DINODE_FMT_BTREE: 114 - error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); 115 - break; 116 - default: 117 - xfs_inode_verifier_error(ip, error, __func__, dip, 118 - sizeof(*dip), __this_address); 119 - error = -EFSCORRUPTED; 120 - break; 121 - } 122 - if (error) { 123 - kmem_cache_free(xfs_ifork_zone, ip->i_afp); 124 - ip->i_afp = NULL; 125 - if (ip->i_cowfp) 126 - kmem_cache_free(xfs_ifork_zone, ip->i_cowfp); 127 - ip->i_cowfp = NULL; 128 - xfs_idestroy_fork(ip, XFS_DATA_FORK); 129 - } 130 - return error; 131 - } 132 - 133 29 void 134 30 xfs_init_local_fork( 135 31 struct xfs_inode *ip, ··· 188 292 * or the number of extents is greater than the number of 189 293 * blocks. 190 294 */ 191 - if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= 192 - XFS_IFORK_MAXEXT(ip, whichfork) || 295 + if (unlikely(ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork) || 193 296 nrecs == 0 || 194 297 XFS_BMDR_SPACE_CALC(nrecs) > 195 298 XFS_DFORK_SIZE(dip, mp, whichfork) || 196 - XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) || 299 + ifp->if_nextents > ip->i_d.di_nblocks) || 197 300 level == 0 || level > XFS_BTREE_MAXLEVELS) { 198 301 xfs_warn(mp, "corrupt inode %Lu (btree).", 199 302 (unsigned long long) ip->i_ino); ··· 218 323 ifp->if_u1.if_root = NULL; 219 324 ifp->if_height = 0; 220 325 return 0; 326 + } 327 + 328 + int 329 + xfs_iformat_data_fork( 330 + struct xfs_inode *ip, 331 + struct xfs_dinode *dip) 332 + { 333 + struct inode *inode = VFS_I(ip); 334 + int error; 335 + 336 + /* 337 + * Initialize the extent count early, as the per-format routines may 338 + * depend on it. 339 + */ 340 + ip->i_df.if_format = dip->di_format; 341 + ip->i_df.if_nextents = be32_to_cpu(dip->di_nextents); 342 + 343 + switch (inode->i_mode & S_IFMT) { 344 + case S_IFIFO: 345 + case S_IFCHR: 346 + case S_IFBLK: 347 + case S_IFSOCK: 348 + ip->i_d.di_size = 0; 349 + inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip)); 350 + return 0; 351 + case S_IFREG: 352 + case S_IFLNK: 353 + case S_IFDIR: 354 + switch (ip->i_df.if_format) { 355 + case XFS_DINODE_FMT_LOCAL: 356 + error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, 357 + be64_to_cpu(dip->di_size)); 358 + if (!error) 359 + error = xfs_ifork_verify_local_data(ip); 360 + return error; 361 + case XFS_DINODE_FMT_EXTENTS: 362 + return xfs_iformat_extents(ip, dip, XFS_DATA_FORK); 363 + case XFS_DINODE_FMT_BTREE: 364 + return xfs_iformat_btree(ip, dip, XFS_DATA_FORK); 365 + default: 366 + xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, 367 + dip, sizeof(*dip), __this_address); 368 + return -EFSCORRUPTED; 369 + } 370 + break; 371 + default: 372 + xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, 373 + sizeof(*dip), __this_address); 374 + return -EFSCORRUPTED; 375 + } 376 + } 377 + 378 + static uint16_t 379 + xfs_dfork_attr_shortform_size( 380 + struct xfs_dinode *dip) 381 + { 382 + struct xfs_attr_shortform *atp = 383 + (struct xfs_attr_shortform *)XFS_DFORK_APTR(dip); 384 + 385 + return be16_to_cpu(atp->hdr.totsize); 386 + } 387 + 388 + int 389 + xfs_iformat_attr_fork( 390 + struct xfs_inode *ip, 391 + struct xfs_dinode *dip) 392 + { 393 + int error = 0; 394 + 395 + /* 396 + * Initialize the extent count early, as the per-format routines may 397 + * depend on it. 398 + */ 399 + ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_NOFS); 400 + ip->i_afp->if_format = dip->di_aformat; 401 + if (unlikely(ip->i_afp->if_format == 0)) /* pre IRIX 6.2 file system */ 402 + ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS; 403 + ip->i_afp->if_nextents = be16_to_cpu(dip->di_anextents); 404 + 405 + switch (ip->i_afp->if_format) { 406 + case XFS_DINODE_FMT_LOCAL: 407 + error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, 408 + xfs_dfork_attr_shortform_size(dip)); 409 + if (!error) 410 + error = xfs_ifork_verify_local_attr(ip); 411 + break; 412 + case XFS_DINODE_FMT_EXTENTS: 413 + error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); 414 + break; 415 + case XFS_DINODE_FMT_BTREE: 416 + error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); 417 + break; 418 + default: 419 + xfs_inode_verifier_error(ip, error, __func__, dip, 420 + sizeof(*dip), __this_address); 421 + error = -EFSCORRUPTED; 422 + break; 423 + } 424 + 425 + if (error) { 426 + kmem_cache_free(xfs_ifork_zone, ip->i_afp); 427 + ip->i_afp = NULL; 428 + } 429 + return error; 221 430 } 222 431 223 432 /* ··· 503 504 504 505 void 505 506 xfs_idestroy_fork( 506 - xfs_inode_t *ip, 507 - int whichfork) 507 + struct xfs_ifork *ifp) 508 508 { 509 - struct xfs_ifork *ifp; 510 - 511 - ifp = XFS_IFORK_PTR(ip, whichfork); 512 509 if (ifp->if_broot != NULL) { 513 510 kmem_free(ifp->if_broot); 514 511 ifp->if_broot = NULL; 515 512 } 516 513 517 514 /* 518 - * If the format is local, then we can't have an extents 519 - * array so just look for an inline data array. If we're 520 - * not local then we may or may not have an extents list, 521 - * so check and free it up if we do. 515 + * If the format is local, then we can't have an extents array so just 516 + * look for an inline data array. If we're not local then we may or may 517 + * not have an extents list, so check and free it up if we do. 522 518 */ 523 - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 524 - if (ifp->if_u1.if_data != NULL) { 525 - kmem_free(ifp->if_u1.if_data); 526 - ifp->if_u1.if_data = NULL; 527 - } 528 - } else if ((ifp->if_flags & XFS_IFEXTENTS) && ifp->if_height) { 529 - xfs_iext_destroy(ifp); 530 - } 531 - 532 - if (whichfork == XFS_ATTR_FORK) { 533 - kmem_cache_free(xfs_ifork_zone, ip->i_afp); 534 - ip->i_afp = NULL; 535 - } else if (whichfork == XFS_COW_FORK) { 536 - kmem_cache_free(xfs_ifork_zone, ip->i_cowfp); 537 - ip->i_cowfp = NULL; 519 + if (ifp->if_format == XFS_DINODE_FMT_LOCAL) { 520 + kmem_free(ifp->if_u1.if_data); 521 + ifp->if_u1.if_data = NULL; 522 + } else if (ifp->if_flags & XFS_IFEXTENTS) { 523 + if (ifp->if_height) 524 + xfs_iext_destroy(ifp); 538 525 } 539 526 } 540 527 ··· 577 592 xfs_iflush_fork( 578 593 xfs_inode_t *ip, 579 594 xfs_dinode_t *dip, 580 - xfs_inode_log_item_t *iip, 595 + struct xfs_inode_log_item *iip, 581 596 int whichfork) 582 597 { 583 598 char *cp; ··· 603 618 } 604 619 cp = XFS_DFORK_PTR(dip, whichfork); 605 620 mp = ip->i_mount; 606 - switch (XFS_IFORK_FORMAT(ip, whichfork)) { 621 + switch (ifp->if_format) { 607 622 case XFS_DINODE_FMT_LOCAL: 608 623 if ((iip->ili_fields & dataflag[whichfork]) && 609 624 (ifp->if_bytes > 0)) { ··· 618 633 !(iip->ili_fields & extflag[whichfork])); 619 634 if ((iip->ili_fields & extflag[whichfork]) && 620 635 (ifp->if_bytes > 0)) { 621 - ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); 636 + ASSERT(ifp->if_nextents > 0); 622 637 (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, 623 638 whichfork); 624 639 } ··· 676 691 ip->i_cowfp = kmem_zone_zalloc(xfs_ifork_zone, 677 692 KM_NOFS); 678 693 ip->i_cowfp->if_flags = XFS_IFEXTENTS; 679 - ip->i_cformat = XFS_DINODE_FMT_EXTENTS; 680 - ip->i_cnextents = 0; 694 + ip->i_cowfp->if_format = XFS_DINODE_FMT_EXTENTS; 681 695 } 682 696 683 - /* Default fork content verifiers. */ 684 - struct xfs_ifork_ops xfs_default_ifork_ops = { 685 - .verify_attr = xfs_attr_shortform_verify, 686 - .verify_dir = xfs_dir2_sf_verify, 687 - .verify_symlink = xfs_symlink_shortform_verify, 688 - }; 689 - 690 697 /* Verify the inline contents of the data fork of an inode. */ 691 - xfs_failaddr_t 692 - xfs_ifork_verify_data( 693 - struct xfs_inode *ip, 694 - struct xfs_ifork_ops *ops) 698 + int 699 + xfs_ifork_verify_local_data( 700 + struct xfs_inode *ip) 695 701 { 696 - /* Non-local data fork, we're done. */ 697 - if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) 698 - return NULL; 702 + xfs_failaddr_t fa = NULL; 699 703 700 - /* Check the inline data fork if there is one. */ 701 704 switch (VFS_I(ip)->i_mode & S_IFMT) { 702 705 case S_IFDIR: 703 - return ops->verify_dir(ip); 706 + fa = xfs_dir2_sf_verify(ip); 707 + break; 704 708 case S_IFLNK: 705 - return ops->verify_symlink(ip); 709 + fa = xfs_symlink_shortform_verify(ip); 710 + break; 706 711 default: 707 - return NULL; 712 + break; 708 713 } 714 + 715 + if (fa) { 716 + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork", 717 + ip->i_df.if_u1.if_data, ip->i_df.if_bytes, fa); 718 + return -EFSCORRUPTED; 719 + } 720 + 721 + return 0; 709 722 } 710 723 711 724 /* Verify the inline contents of the attr fork of an inode. */ 712 - xfs_failaddr_t 713 - xfs_ifork_verify_attr( 714 - struct xfs_inode *ip, 715 - struct xfs_ifork_ops *ops) 725 + int 726 + xfs_ifork_verify_local_attr( 727 + struct xfs_inode *ip) 716 728 { 717 - /* There has to be an attr fork allocated if aformat is local. */ 718 - if (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) 719 - return NULL; 720 - if (!XFS_IFORK_PTR(ip, XFS_ATTR_FORK)) 721 - return __this_address; 722 - return ops->verify_attr(ip); 729 + struct xfs_ifork *ifp = ip->i_afp; 730 + xfs_failaddr_t fa; 731 + 732 + if (!ifp) 733 + fa = __this_address; 734 + else 735 + fa = xfs_attr_shortform_verify(ip); 736 + 737 + if (fa) { 738 + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork", 739 + ifp ? ifp->if_u1.if_data : NULL, 740 + ifp ? ifp->if_bytes : 0, fa); 741 + return -EFSCORRUPTED; 742 + } 743 + 744 + return 0; 723 745 }

+26 -42

fs/xfs/libxfs/xfs_inode_fork.h

··· 23 23 } if_u1; 24 24 short if_broot_bytes; /* bytes allocated for root */ 25 25 unsigned char if_flags; /* per-fork flags */ 26 + int8_t if_format; /* format of this fork */ 27 + xfs_extnum_t if_nextents; /* # of extents in this fork */ 26 28 }; 27 29 28 30 /* ··· 57 55 ((w) == XFS_ATTR_FORK ? \ 58 56 XFS_IFORK_ASIZE(ip) : \ 59 57 0)) 60 - #define XFS_IFORK_FORMAT(ip,w) \ 61 - ((w) == XFS_DATA_FORK ? \ 62 - (ip)->i_d.di_format : \ 63 - ((w) == XFS_ATTR_FORK ? \ 64 - (ip)->i_d.di_aformat : \ 65 - (ip)->i_cformat)) 66 - #define XFS_IFORK_FMT_SET(ip,w,n) \ 67 - ((w) == XFS_DATA_FORK ? \ 68 - ((ip)->i_d.di_format = (n)) : \ 69 - ((w) == XFS_ATTR_FORK ? \ 70 - ((ip)->i_d.di_aformat = (n)) : \ 71 - ((ip)->i_cformat = (n)))) 72 - #define XFS_IFORK_NEXTENTS(ip,w) \ 73 - ((w) == XFS_DATA_FORK ? \ 74 - (ip)->i_d.di_nextents : \ 75 - ((w) == XFS_ATTR_FORK ? \ 76 - (ip)->i_d.di_anextents : \ 77 - (ip)->i_cnextents)) 78 - #define XFS_IFORK_NEXT_SET(ip,w,n) \ 79 - ((w) == XFS_DATA_FORK ? \ 80 - ((ip)->i_d.di_nextents = (n)) : \ 81 - ((w) == XFS_ATTR_FORK ? \ 82 - ((ip)->i_d.di_anextents = (n)) : \ 83 - ((ip)->i_cnextents = (n)))) 84 58 #define XFS_IFORK_MAXEXT(ip, w) \ 85 59 (XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t)) 86 60 87 - #define xfs_ifork_has_extents(ip, w) \ 88 - (XFS_IFORK_FORMAT((ip), (w)) == XFS_DINODE_FMT_EXTENTS || \ 89 - XFS_IFORK_FORMAT((ip), (w)) == XFS_DINODE_FMT_BTREE) 61 + static inline bool xfs_ifork_has_extents(struct xfs_ifork *ifp) 62 + { 63 + return ifp->if_format == XFS_DINODE_FMT_EXTENTS || 64 + ifp->if_format == XFS_DINODE_FMT_BTREE; 65 + } 66 + 67 + static inline xfs_extnum_t xfs_ifork_nextents(struct xfs_ifork *ifp) 68 + { 69 + if (!ifp) 70 + return 0; 71 + return ifp->if_nextents; 72 + } 73 + 74 + static inline int8_t xfs_ifork_format(struct xfs_ifork *ifp) 75 + { 76 + if (!ifp) 77 + return XFS_DINODE_FMT_EXTENTS; 78 + return ifp->if_format; 79 + } 90 80 91 81 struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state); 92 82 93 - int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *); 83 + int xfs_iformat_data_fork(struct xfs_inode *, struct xfs_dinode *); 84 + int xfs_iformat_attr_fork(struct xfs_inode *, struct xfs_dinode *); 94 85 void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, 95 86 struct xfs_inode_log_item *, int); 96 - void xfs_idestroy_fork(struct xfs_inode *, int); 87 + void xfs_idestroy_fork(struct xfs_ifork *ifp); 97 88 void xfs_idata_realloc(struct xfs_inode *ip, int64_t byte_diff, 98 89 int whichfork); 99 90 void xfs_iroot_realloc(struct xfs_inode *, int, int); ··· 170 175 171 176 extern void xfs_ifork_init_cow(struct xfs_inode *ip); 172 177 173 - typedef xfs_failaddr_t (*xfs_ifork_verifier_t)(struct xfs_inode *); 174 - 175 - struct xfs_ifork_ops { 176 - xfs_ifork_verifier_t verify_symlink; 177 - xfs_ifork_verifier_t verify_dir; 178 - xfs_ifork_verifier_t verify_attr; 179 - }; 180 - extern struct xfs_ifork_ops xfs_default_ifork_ops; 181 - 182 - xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip, 183 - struct xfs_ifork_ops *ops); 184 - xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip, 185 - struct xfs_ifork_ops *ops); 178 + int xfs_ifork_verify_local_data(struct xfs_inode *ip); 179 + int xfs_ifork_verify_local_attr(struct xfs_inode *ip); 186 180 187 181 #endif /* __XFS_INODE_FORK_H__ */

+79 -4

fs/xfs/libxfs/xfs_log_recover.h

··· 7 7 #define __XFS_LOG_RECOVER_H__ 8 8 9 9 /* 10 + * Each log item type (XFS_LI_*) gets its own xlog_recover_item_ops to 11 + * define how recovery should work for that type of log item. 12 + */ 13 + struct xlog_recover_item; 14 + 15 + /* Sorting hat for log items as they're read in. */ 16 + enum xlog_recover_reorder { 17 + XLOG_REORDER_BUFFER_LIST, 18 + XLOG_REORDER_ITEM_LIST, 19 + XLOG_REORDER_INODE_BUFFER_LIST, 20 + XLOG_REORDER_CANCEL_LIST, 21 + }; 22 + 23 + struct xlog_recover_item_ops { 24 + uint16_t item_type; /* XFS_LI_* type code. */ 25 + 26 + /* 27 + * Help sort recovered log items into the order required to replay them 28 + * correctly. Log item types that always use XLOG_REORDER_ITEM_LIST do 29 + * not have to supply a function here. See the comment preceding 30 + * xlog_recover_reorder_trans for more details about what the return 31 + * values mean. 32 + */ 33 + enum xlog_recover_reorder (*reorder)(struct xlog_recover_item *item); 34 + 35 + /* Start readahead for pass2, if provided. */ 36 + void (*ra_pass2)(struct xlog *log, struct xlog_recover_item *item); 37 + 38 + /* Do whatever work we need to do for pass1, if provided. */ 39 + int (*commit_pass1)(struct xlog *log, struct xlog_recover_item *item); 40 + 41 + /* 42 + * This function should do whatever work is needed for pass2 of log 43 + * recovery, if provided. 44 + * 45 + * If the recovered item is an intent item, this function should parse 46 + * the recovered item to construct an in-core log intent item and 47 + * insert it into the AIL. The in-core log intent item should have 1 48 + * refcount so that the item is freed either (a) when we commit the 49 + * recovered log item for the intent-done item; (b) replay the work and 50 + * log a new intent-done item; or (c) recovery fails and we have to 51 + * abort. 52 + * 53 + * If the recovered item is an intent-done item, this function should 54 + * parse the recovered item to find the id of the corresponding intent 55 + * log item. Next, it should find the in-core log intent item in the 56 + * AIL and release it. 57 + */ 58 + int (*commit_pass2)(struct xlog *log, struct list_head *buffer_list, 59 + struct xlog_recover_item *item, xfs_lsn_t lsn); 60 + }; 61 + 62 + extern const struct xlog_recover_item_ops xlog_icreate_item_ops; 63 + extern const struct xlog_recover_item_ops xlog_buf_item_ops; 64 + extern const struct xlog_recover_item_ops xlog_inode_item_ops; 65 + extern const struct xlog_recover_item_ops xlog_dquot_item_ops; 66 + extern const struct xlog_recover_item_ops xlog_quotaoff_item_ops; 67 + extern const struct xlog_recover_item_ops xlog_bui_item_ops; 68 + extern const struct xlog_recover_item_ops xlog_bud_item_ops; 69 + extern const struct xlog_recover_item_ops xlog_efi_item_ops; 70 + extern const struct xlog_recover_item_ops xlog_efd_item_ops; 71 + extern const struct xlog_recover_item_ops xlog_rui_item_ops; 72 + extern const struct xlog_recover_item_ops xlog_rud_item_ops; 73 + extern const struct xlog_recover_item_ops xlog_cui_item_ops; 74 + extern const struct xlog_recover_item_ops xlog_cud_item_ops; 75 + 76 + /* 10 77 * Macros, structures, prototypes for internal log manager use. 11 78 */ 12 79 ··· 89 22 /* 90 23 * item headers are in ri_buf[0]. Additional buffers follow. 91 24 */ 92 - typedef struct xlog_recover_item { 25 + struct xlog_recover_item { 93 26 struct list_head ri_list; 94 - int ri_type; 95 27 int ri_cnt; /* count of regions found */ 96 28 int ri_total; /* total regions */ 97 - xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */ 98 - } xlog_recover_item_t; 29 + struct xfs_log_iovec *ri_buf; /* ptr to regions buffer */ 30 + const struct xlog_recover_item_ops *ri_ops; 31 + }; 99 32 100 33 struct xlog_recover { 101 34 struct hlist_node r_list; ··· 117 50 #define XLOG_RECOVER_CRCPASS 0 118 51 #define XLOG_RECOVER_PASS1 1 119 52 #define XLOG_RECOVER_PASS2 2 53 + 54 + void xlog_buf_readahead(struct xlog *log, xfs_daddr_t blkno, uint len, 55 + const struct xfs_buf_ops *ops); 56 + bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len); 57 + void xlog_recover_iodone(struct xfs_buf *bp); 58 + 59 + void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type, 60 + uint64_t intent_id); 120 61 121 62 #endif /* __XFS_LOG_RECOVER_H__ */

-1

fs/xfs/libxfs/xfs_quota_defs.h

··· 100 100 #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ 101 101 #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ 102 102 #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ 103 - #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ 104 103 105 104 /* 106 105 * flags to xfs_trans_mod_dquot to indicate which field needs to be

+1 -1

fs/xfs/libxfs/xfs_rtbitmap.c

··· 66 66 67 67 ip = issum ? mp->m_rsumip : mp->m_rbmip; 68 68 69 - error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK); 69 + error = xfs_bmapi_read(ip, block, 1, &map, &nmap, 0); 70 70 if (error) 71 71 return error; 72 72

+1 -1

fs/xfs/libxfs/xfs_sb.c

··· 243 243 } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | 244 244 XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) { 245 245 xfs_notice(mp, 246 - "Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits."); 246 + "Superblock earlier than Version 5 has XFS_{P|G}QUOTA_{ENFD|CHKD} bits."); 247 247 return -EFSCORRUPTED; 248 248 } 249 249

+5 -9

fs/xfs/libxfs/xfs_symlink_remote.c

··· 204 204 xfs_symlink_shortform_verify( 205 205 struct xfs_inode *ip) 206 206 { 207 - char *sfp; 208 - char *endp; 209 - struct xfs_ifork *ifp; 210 - int size; 207 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 208 + char *sfp = (char *)ifp->if_u1.if_data; 209 + int size = ifp->if_bytes; 210 + char *endp = sfp + size; 211 211 212 - ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL); 213 - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 214 - sfp = (char *)ifp->if_u1.if_data; 215 - size = ifp->if_bytes; 216 - endp = sfp + size; 212 + ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); 217 213 218 214 /* 219 215 * Zero length symlinks should never occur in memory as they are

+1 -1

fs/xfs/libxfs/xfs_trans_inode.c

··· 27 27 struct xfs_inode *ip, 28 28 uint lock_flags) 29 29 { 30 - xfs_inode_log_item_t *iip; 30 + struct xfs_inode_log_item *iip; 31 31 32 32 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 33 33 if (ip->i_itemp == NULL)

+17 -23

fs/xfs/scrub/bmap.c

··· 566 566 struct xfs_scrub *sc, 567 567 int whichfork) 568 568 { 569 - loff_t size; 569 + struct xfs_ifork *ifp = XFS_IFORK_PTR(sc->ip, whichfork); 570 570 xfs_agnumber_t agno; 571 + bool zero_size; 571 572 int error; 572 573 573 574 if (!xfs_sb_version_hasrmapbt(&sc->mp->m_sb) || ··· 580 579 if (XFS_IS_REALTIME_INODE(sc->ip) && whichfork == XFS_DATA_FORK) 581 580 return 0; 582 581 582 + ASSERT(XFS_IFORK_PTR(sc->ip, whichfork) != NULL); 583 + 583 584 /* 584 585 * Only do this for complex maps that are in btree format, or for 585 586 * situations where we would seem to have a size but zero extents. ··· 589 586 * to flag this bmap as corrupt if there are rmaps that need to be 590 587 * reattached. 591 588 */ 592 - switch (whichfork) { 593 - case XFS_DATA_FORK: 594 - size = i_size_read(VFS_I(sc->ip)); 595 - break; 596 - case XFS_ATTR_FORK: 597 - size = XFS_IFORK_Q(sc->ip); 598 - break; 599 - default: 600 - size = 0; 601 - break; 602 - } 603 - if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE && 604 - (size == 0 || XFS_IFORK_NEXTENTS(sc->ip, whichfork) > 0)) 589 + 590 + if (whichfork == XFS_DATA_FORK) 591 + zero_size = i_size_read(VFS_I(sc->ip)) == 0; 592 + else 593 + zero_size = false; 594 + 595 + if (ifp->if_format != XFS_DINODE_FMT_BTREE && 596 + (zero_size || ifp->if_nextents > 0)) 605 597 return 0; 606 598 607 599 for (agno = 0; agno < sc->mp->m_sb.sb_agcount; agno++) { ··· 625 627 struct xchk_bmap_info info = { NULL }; 626 628 struct xfs_mount *mp = sc->mp; 627 629 struct xfs_inode *ip = sc->ip; 628 - struct xfs_ifork *ifp; 630 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 629 631 xfs_fileoff_t endoff; 630 632 struct xfs_iext_cursor icur; 631 633 int error = 0; 632 634 633 - ifp = XFS_IFORK_PTR(ip, whichfork); 635 + /* Non-existent forks can be ignored. */ 636 + if (!ifp) 637 + goto out; 634 638 635 639 info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip); 636 640 info.whichfork = whichfork; ··· 641 641 642 642 switch (whichfork) { 643 643 case XFS_COW_FORK: 644 - /* Non-existent CoW forks are ignorable. */ 645 - if (!ifp) 646 - goto out; 647 644 /* No CoW forks on non-reflink inodes/filesystems. */ 648 645 if (!xfs_is_reflink_inode(ip)) { 649 646 xchk_ino_set_corrupt(sc, sc->ip->i_ino); ··· 648 651 } 649 652 break; 650 653 case XFS_ATTR_FORK: 651 - if (!ifp) 652 - goto out_check_rmap; 653 654 if (!xfs_sb_version_hasattr(&mp->m_sb) && 654 655 !xfs_sb_version_hasattr2(&mp->m_sb)) 655 656 xchk_ino_set_corrupt(sc, sc->ip->i_ino); ··· 658 663 } 659 664 660 665 /* Check the fork values */ 661 - switch (XFS_IFORK_FORMAT(ip, whichfork)) { 666 + switch (ifp->if_format) { 662 667 case XFS_DINODE_FMT_UUID: 663 668 case XFS_DINODE_FMT_DEV: 664 669 case XFS_DINODE_FMT_LOCAL: ··· 712 717 goto out; 713 718 } 714 719 715 - out_check_rmap: 716 720 error = xchk_bmap_check_rmaps(sc, whichfork); 717 721 if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error)) 718 722 goto out;

+1 -1

fs/xfs/scrub/dabtree.c

··· 468 468 int error; 469 469 470 470 /* Skip short format data structures; no btree to scan. */ 471 - if (!xfs_ifork_has_extents(sc->ip, whichfork)) 471 + if (!xfs_ifork_has_extents(XFS_IFORK_PTR(sc->ip, whichfork))) 472 472 return 0; 473 473 474 474 /* Set up initial da state. */

+3 -4

fs/xfs/scrub/dir.c

··· 635 635 { 636 636 struct xfs_bmbt_irec got; 637 637 struct xfs_da_args args; 638 - struct xfs_ifork *ifp; 638 + struct xfs_ifork *ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK); 639 639 struct xfs_mount *mp = sc->mp; 640 640 xfs_fileoff_t leaf_lblk; 641 641 xfs_fileoff_t free_lblk; ··· 647 647 int error; 648 648 649 649 /* Ignore local format directories. */ 650 - if (sc->ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && 651 - sc->ip->i_d.di_format != XFS_DINODE_FMT_BTREE) 650 + if (ifp->if_format != XFS_DINODE_FMT_EXTENTS && 651 + ifp->if_format != XFS_DINODE_FMT_BTREE) 652 652 return 0; 653 653 654 - ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK); 655 654 lblk = XFS_B_TO_FSB(mp, XFS_DIR2_DATA_OFFSET); 656 655 leaf_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_LEAF_OFFSET); 657 656 free_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_FREE_OFFSET);

+1 -2

fs/xfs/scrub/ialloc.c

··· 278 278 &XFS_RMAP_OINFO_INODES); 279 279 280 280 /* Grab the inode cluster buffer. */ 281 - error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap, &dip, &cluster_bp, 282 - 0, 0); 281 + error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap, &dip, &cluster_bp, 0); 283 282 if (!xchk_btree_xref_process_error(bs->sc, bs->cur, 0, &error)) 284 283 return error; 285 284

+1 -1

fs/xfs/scrub/parent.c

··· 90 90 * if there is one. 91 91 */ 92 92 lock_mode = xfs_ilock_data_map_shared(parent); 93 - if (parent->i_d.di_nextents > 0) 93 + if (parent->i_df.if_nextents > 0) 94 94 error = xfs_dir3_data_readahead(parent, 0, 0); 95 95 xfs_iunlock(parent, lock_mode); 96 96 if (error)

+1 -1

fs/xfs/xfs_aops.c

··· 382 382 */ 383 383 retry: 384 384 xfs_ilock(ip, XFS_ILOCK_SHARED); 385 - ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 385 + ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE || 386 386 (ip->i_df.if_flags & XFS_IFEXTENTS)); 387 387 388 388 /*

+6 -3

fs/xfs/xfs_attr_inactive.c

··· 367 367 * removal below. 368 368 */ 369 369 if (xfs_inode_hasattr(dp) && 370 - dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) { 370 + dp->i_afp->if_format != XFS_DINODE_FMT_LOCAL) { 371 371 error = xfs_attr3_root_inactive(&trans, dp); 372 372 if (error) 373 373 goto out_cancel; ··· 388 388 xfs_trans_cancel(trans); 389 389 out_destroy_fork: 390 390 /* kill the in-core attr fork before we drop the inode lock */ 391 - if (dp->i_afp) 392 - xfs_idestroy_fork(dp, XFS_ATTR_FORK); 391 + if (dp->i_afp) { 392 + xfs_idestroy_fork(dp->i_afp); 393 + kmem_cache_free(xfs_ifork_zone, dp->i_afp); 394 + dp->i_afp = NULL; 395 + } 393 396 if (lock_mode) 394 397 xfs_iunlock(dp, lock_mode); 395 398 return error;

+2 -2

fs/xfs/xfs_attr_list.c

··· 512 512 */ 513 513 if (!xfs_inode_hasattr(dp)) 514 514 return 0; 515 - else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) 515 + if (dp->i_afp->if_format == XFS_DINODE_FMT_LOCAL) 516 516 return xfs_attr_shortform_list(context); 517 - else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) 517 + if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) 518 518 return xfs_attr_leaf_list(context); 519 519 return xfs_attr_node_list(context); 520 520 }

+172 -67

fs/xfs/xfs_bmap_item.c

··· 22 22 #include "xfs_bmap_btree.h" 23 23 #include "xfs_trans_space.h" 24 24 #include "xfs_error.h" 25 + #include "xfs_log_priv.h" 26 + #include "xfs_log_recover.h" 25 27 26 28 kmem_zone_t *xfs_bui_zone; 27 29 kmem_zone_t *xfs_bud_zone; 30 + 31 + static const struct xfs_item_ops xfs_bui_item_ops; 28 32 29 33 static inline struct xfs_bui_log_item *BUI_ITEM(struct xfs_log_item *lip) 30 34 { 31 35 return container_of(lip, struct xfs_bui_log_item, bui_item); 32 36 } 33 37 34 - void 38 + STATIC void 35 39 xfs_bui_item_free( 36 40 struct xfs_bui_log_item *buip) 37 41 { ··· 49 45 * committed vs unpin operations in bulk insert operations. Hence the reference 50 46 * count to ensure only the last caller frees the BUI. 51 47 */ 52 - void 48 + STATIC void 53 49 xfs_bui_release( 54 50 struct xfs_bui_log_item *buip) 55 51 { 56 52 ASSERT(atomic_read(&buip->bui_refcount) > 0); 57 53 if (atomic_dec_and_test(&buip->bui_refcount)) { 58 - xfs_trans_ail_remove(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR); 54 + xfs_trans_ail_delete(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR); 59 55 xfs_bui_item_free(buip); 60 56 } 61 57 } ··· 128 124 xfs_bui_release(BUI_ITEM(lip)); 129 125 } 130 126 131 - static const struct xfs_item_ops xfs_bui_item_ops = { 132 - .iop_size = xfs_bui_item_size, 133 - .iop_format = xfs_bui_item_format, 134 - .iop_unpin = xfs_bui_item_unpin, 135 - .iop_release = xfs_bui_item_release, 136 - }; 137 - 138 127 /* 139 128 * Allocate and initialize an bui item with the given number of extents. 140 129 */ 141 - struct xfs_bui_log_item * 130 + STATIC struct xfs_bui_log_item * 142 131 xfs_bui_init( 143 132 struct xfs_mount *mp) 144 133 ··· 275 278 return ba->bi_owner->i_ino - bb->bi_owner->i_ino; 276 279 } 277 280 278 - /* Get an BUI. */ 279 - STATIC void * 280 - xfs_bmap_update_create_intent( 281 - struct xfs_trans *tp, 282 - unsigned int count) 283 - { 284 - struct xfs_bui_log_item *buip; 285 - 286 - ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS); 287 - ASSERT(tp != NULL); 288 - 289 - buip = xfs_bui_init(tp->t_mountp); 290 - ASSERT(buip != NULL); 291 - 292 - /* 293 - * Get a log_item_desc to point at the new item. 294 - */ 295 - xfs_trans_add_item(tp, &buip->bui_item); 296 - return buip; 297 - } 298 - 299 281 /* Set the map extent flags for this mapping. */ 300 282 static void 301 283 xfs_trans_set_bmap_flags( ··· 302 326 STATIC void 303 327 xfs_bmap_update_log_item( 304 328 struct xfs_trans *tp, 305 - void *intent, 306 - struct list_head *item) 329 + struct xfs_bui_log_item *buip, 330 + struct xfs_bmap_intent *bmap) 307 331 { 308 - struct xfs_bui_log_item *buip = intent; 309 - struct xfs_bmap_intent *bmap; 310 332 uint next_extent; 311 333 struct xfs_map_extent *map; 312 - 313 - bmap = container_of(item, struct xfs_bmap_intent, bi_list); 314 334 315 335 tp->t_flags |= XFS_TRANS_DIRTY; 316 336 set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags); ··· 327 355 bmap->bi_bmap.br_state); 328 356 } 329 357 358 + static struct xfs_log_item * 359 + xfs_bmap_update_create_intent( 360 + struct xfs_trans *tp, 361 + struct list_head *items, 362 + unsigned int count, 363 + bool sort) 364 + { 365 + struct xfs_mount *mp = tp->t_mountp; 366 + struct xfs_bui_log_item *buip = xfs_bui_init(mp); 367 + struct xfs_bmap_intent *bmap; 368 + 369 + ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS); 370 + 371 + xfs_trans_add_item(tp, &buip->bui_item); 372 + if (sort) 373 + list_sort(mp, items, xfs_bmap_update_diff_items); 374 + list_for_each_entry(bmap, items, bi_list) 375 + xfs_bmap_update_log_item(tp, buip, bmap); 376 + return &buip->bui_item; 377 + } 378 + 330 379 /* Get an BUD so we can process all the deferred rmap updates. */ 331 - STATIC void * 380 + static struct xfs_log_item * 332 381 xfs_bmap_update_create_done( 333 382 struct xfs_trans *tp, 334 - void *intent, 383 + struct xfs_log_item *intent, 335 384 unsigned int count) 336 385 { 337 - return xfs_trans_get_bud(tp, intent); 386 + return &xfs_trans_get_bud(tp, BUI_ITEM(intent))->bud_item; 338 387 } 339 388 340 389 /* Process a deferred rmap update. */ 341 390 STATIC int 342 391 xfs_bmap_update_finish_item( 343 392 struct xfs_trans *tp, 393 + struct xfs_log_item *done, 344 394 struct list_head *item, 345 - void *done_item, 346 - void **state) 395 + struct xfs_btree_cur **state) 347 396 { 348 397 struct xfs_bmap_intent *bmap; 349 398 xfs_filblks_t count; ··· 372 379 373 380 bmap = container_of(item, struct xfs_bmap_intent, bi_list); 374 381 count = bmap->bi_bmap.br_blockcount; 375 - error = xfs_trans_log_finish_bmap_update(tp, done_item, 382 + error = xfs_trans_log_finish_bmap_update(tp, BUD_ITEM(done), 376 383 bmap->bi_type, 377 384 bmap->bi_owner, bmap->bi_whichfork, 378 385 bmap->bi_bmap.br_startoff, ··· 391 398 /* Abort all pending BUIs. */ 392 399 STATIC void 393 400 xfs_bmap_update_abort_intent( 394 - void *intent) 401 + struct xfs_log_item *intent) 395 402 { 396 - xfs_bui_release(intent); 403 + xfs_bui_release(BUI_ITEM(intent)); 397 404 } 398 405 399 406 /* Cancel a deferred rmap update. */ ··· 409 416 410 417 const struct xfs_defer_op_type xfs_bmap_update_defer_type = { 411 418 .max_items = XFS_BUI_MAX_FAST_EXTENTS, 412 - .diff_items = xfs_bmap_update_diff_items, 413 419 .create_intent = xfs_bmap_update_create_intent, 414 420 .abort_intent = xfs_bmap_update_abort_intent, 415 - .log_item = xfs_bmap_update_log_item, 416 421 .create_done = xfs_bmap_update_create_done, 417 422 .finish_item = xfs_bmap_update_finish_item, 418 423 .cancel_item = xfs_bmap_update_cancel_item, ··· 420 429 * Process a bmap update intent item that was recovered from the log. 421 430 * We need to update some inode's bmbt. 422 431 */ 423 - int 424 - xfs_bui_recover( 425 - struct xfs_trans *parent_tp, 426 - struct xfs_bui_log_item *buip) 432 + STATIC int 433 + xfs_bui_item_recover( 434 + struct xfs_log_item *lip, 435 + struct xfs_trans *parent_tp) 427 436 { 428 - int error = 0; 429 - unsigned int bui_type; 437 + struct xfs_bmbt_irec irec; 438 + struct xfs_bui_log_item *buip = BUI_ITEM(lip); 439 + struct xfs_trans *tp; 440 + struct xfs_inode *ip = NULL; 441 + struct xfs_mount *mp = parent_tp->t_mountp; 430 442 struct xfs_map_extent *bmap; 443 + struct xfs_bud_log_item *budp; 431 444 xfs_fsblock_t startblock_fsb; 432 445 xfs_fsblock_t inode_fsb; 433 446 xfs_filblks_t count; 434 - bool op_ok; 435 - struct xfs_bud_log_item *budp; 436 - enum xfs_bmap_intent_type type; 437 - int whichfork; 438 447 xfs_exntst_t state; 439 - struct xfs_trans *tp; 440 - struct xfs_inode *ip = NULL; 441 - struct xfs_bmbt_irec irec; 442 - struct xfs_mount *mp = parent_tp->t_mountp; 443 - 444 - ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags)); 448 + enum xfs_bmap_intent_type type; 449 + bool op_ok; 450 + unsigned int bui_type; 451 + int whichfork; 452 + int error = 0; 445 453 446 454 /* Only one mapping operation per BUI... */ 447 455 if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { 448 - set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); 449 456 xfs_bui_release(buip); 450 457 return -EFSCORRUPTED; 451 458 } ··· 477 488 * This will pull the BUI from the AIL and 478 489 * free the memory associated with it. 479 490 */ 480 - set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); 481 491 xfs_bui_release(buip); 482 492 return -EFSCORRUPTED; 483 493 } ··· 534 546 xfs_bmap_unmap_extent(tp, ip, &irec); 535 547 } 536 548 537 - set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); 538 549 xfs_defer_move(parent_tp, tp); 539 550 error = xfs_trans_commit(tp); 540 551 xfs_iunlock(ip, XFS_ILOCK_EXCL); ··· 550 563 } 551 564 return error; 552 565 } 566 + 567 + STATIC bool 568 + xfs_bui_item_match( 569 + struct xfs_log_item *lip, 570 + uint64_t intent_id) 571 + { 572 + return BUI_ITEM(lip)->bui_format.bui_id == intent_id; 573 + } 574 + 575 + static const struct xfs_item_ops xfs_bui_item_ops = { 576 + .iop_size = xfs_bui_item_size, 577 + .iop_format = xfs_bui_item_format, 578 + .iop_unpin = xfs_bui_item_unpin, 579 + .iop_release = xfs_bui_item_release, 580 + .iop_recover = xfs_bui_item_recover, 581 + .iop_match = xfs_bui_item_match, 582 + }; 583 + 584 + /* 585 + * Copy an BUI format buffer from the given buf, and into the destination 586 + * BUI format structure. The BUI/BUD items were designed not to need any 587 + * special alignment handling. 588 + */ 589 + static int 590 + xfs_bui_copy_format( 591 + struct xfs_log_iovec *buf, 592 + struct xfs_bui_log_format *dst_bui_fmt) 593 + { 594 + struct xfs_bui_log_format *src_bui_fmt; 595 + uint len; 596 + 597 + src_bui_fmt = buf->i_addr; 598 + len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents); 599 + 600 + if (buf->i_len == len) { 601 + memcpy(dst_bui_fmt, src_bui_fmt, len); 602 + return 0; 603 + } 604 + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); 605 + return -EFSCORRUPTED; 606 + } 607 + 608 + /* 609 + * This routine is called to create an in-core extent bmap update 610 + * item from the bui format structure which was logged on disk. 611 + * It allocates an in-core bui, copies the extents from the format 612 + * structure into it, and adds the bui to the AIL with the given 613 + * LSN. 614 + */ 615 + STATIC int 616 + xlog_recover_bui_commit_pass2( 617 + struct xlog *log, 618 + struct list_head *buffer_list, 619 + struct xlog_recover_item *item, 620 + xfs_lsn_t lsn) 621 + { 622 + int error; 623 + struct xfs_mount *mp = log->l_mp; 624 + struct xfs_bui_log_item *buip; 625 + struct xfs_bui_log_format *bui_formatp; 626 + 627 + bui_formatp = item->ri_buf[0].i_addr; 628 + 629 + if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { 630 + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); 631 + return -EFSCORRUPTED; 632 + } 633 + buip = xfs_bui_init(mp); 634 + error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format); 635 + if (error) { 636 + xfs_bui_item_free(buip); 637 + return error; 638 + } 639 + atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents); 640 + /* 641 + * Insert the intent into the AIL directly and drop one reference so 642 + * that finishing or canceling the work will drop the other. 643 + */ 644 + xfs_trans_ail_insert(log->l_ailp, &buip->bui_item, lsn); 645 + xfs_bui_release(buip); 646 + return 0; 647 + } 648 + 649 + const struct xlog_recover_item_ops xlog_bui_item_ops = { 650 + .item_type = XFS_LI_BUI, 651 + .commit_pass2 = xlog_recover_bui_commit_pass2, 652 + }; 653 + 654 + /* 655 + * This routine is called when an BUD format structure is found in a committed 656 + * transaction in the log. Its purpose is to cancel the corresponding BUI if it 657 + * was still in the log. To do this it searches the AIL for the BUI with an id 658 + * equal to that in the BUD format structure. If we find it we drop the BUD 659 + * reference, which removes the BUI from the AIL and frees it. 660 + */ 661 + STATIC int 662 + xlog_recover_bud_commit_pass2( 663 + struct xlog *log, 664 + struct list_head *buffer_list, 665 + struct xlog_recover_item *item, 666 + xfs_lsn_t lsn) 667 + { 668 + struct xfs_bud_log_format *bud_formatp; 669 + 670 + bud_formatp = item->ri_buf[0].i_addr; 671 + if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) { 672 + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); 673 + return -EFSCORRUPTED; 674 + } 675 + 676 + xlog_recover_release_intent(log, XFS_LI_BUI, bud_formatp->bud_bui_id); 677 + return 0; 678 + } 679 + 680 + const struct xlog_recover_item_ops xlog_bud_item_ops = { 681 + .item_type = XFS_LI_BUD, 682 + .commit_pass2 = xlog_recover_bud_commit_pass2, 683 + };

-11

fs/xfs/xfs_bmap_item.h

··· 33 33 #define XFS_BUI_MAX_FAST_EXTENTS 1 34 34 35 35 /* 36 - * Define BUI flag bits. Manipulated by set/clear/test_bit operators. 37 - */ 38 - #define XFS_BUI_RECOVERED 1 39 - 40 - /* 41 36 * This is the "bmap update intent" log item. It is used to log the fact that 42 37 * some reverse mappings need to change. It is used in conjunction with the 43 38 * "bmap update done" log item described below. ··· 44 49 struct xfs_log_item bui_item; 45 50 atomic_t bui_refcount; 46 51 atomic_t bui_next_extent; 47 - unsigned long bui_flags; /* misc flags */ 48 52 struct xfs_bui_log_format bui_format; 49 53 }; 50 54 ··· 67 73 68 74 extern struct kmem_zone *xfs_bui_zone; 69 75 extern struct kmem_zone *xfs_bud_zone; 70 - 71 - struct xfs_bui_log_item *xfs_bui_init(struct xfs_mount *); 72 - void xfs_bui_item_free(struct xfs_bui_log_item *); 73 - void xfs_bui_release(struct xfs_bui_log_item *); 74 - int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip); 75 76 76 77 #endif /* __XFS_BMAP_ITEM_H__ */

+41 -38

fs/xfs/xfs_bmap_util.c

··· 223 223 if (!ifp) 224 224 return 0; 225 225 226 - switch (XFS_IFORK_FORMAT(ip, whichfork)) { 226 + switch (ifp->if_format) { 227 227 case XFS_DINODE_FMT_BTREE: 228 228 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 229 229 error = xfs_iread_extents(tp, ip, whichfork); ··· 449 449 break; 450 450 } 451 451 452 - switch (XFS_IFORK_FORMAT(ip, whichfork)) { 452 + switch (ifp->if_format) { 453 453 case XFS_DINODE_FMT_EXTENTS: 454 454 case XFS_DINODE_FMT_BTREE: 455 455 break; ··· 1210 1210 struct xfs_inode *ip, /* target inode */ 1211 1211 struct xfs_inode *tip) /* tmp inode */ 1212 1212 { 1213 + struct xfs_ifork *ifp = &ip->i_df; 1214 + struct xfs_ifork *tifp = &tip->i_df; 1215 + 1216 + /* User/group/project quota ids must match if quotas are enforced. */ 1217 + if (XFS_IS_QUOTA_ON(ip->i_mount) && 1218 + (!uid_eq(VFS_I(ip)->i_uid, VFS_I(tip)->i_uid) || 1219 + !gid_eq(VFS_I(ip)->i_gid, VFS_I(tip)->i_gid) || 1220 + ip->i_d.di_projid != tip->i_d.di_projid)) 1221 + return -EINVAL; 1213 1222 1214 1223 /* Should never get a local format */ 1215 - if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || 1216 - tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) 1224 + if (ifp->if_format == XFS_DINODE_FMT_LOCAL || 1225 + tifp->if_format == XFS_DINODE_FMT_LOCAL) 1217 1226 return -EINVAL; 1218 1227 1219 1228 /* 1220 1229 * if the target inode has less extents that then temporary inode then 1221 1230 * why did userspace call us? 1222 1231 */ 1223 - if (ip->i_d.di_nextents < tip->i_d.di_nextents) 1232 + if (ifp->if_nextents < tifp->if_nextents) 1224 1233 return -EINVAL; 1225 1234 1226 1235 /* ··· 1244 1235 * form then we will end up with the target inode in the wrong format 1245 1236 * as we already know there are less extents in the temp inode. 1246 1237 */ 1247 - if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1248 - tip->i_d.di_format == XFS_DINODE_FMT_BTREE) 1238 + if (ifp->if_format == XFS_DINODE_FMT_EXTENTS && 1239 + tifp->if_format == XFS_DINODE_FMT_BTREE) 1249 1240 return -EINVAL; 1250 1241 1251 1242 /* Check temp in extent form to max in target */ 1252 - if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1253 - XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > 1254 - XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 1243 + if (tifp->if_format == XFS_DINODE_FMT_EXTENTS && 1244 + tifp->if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 1255 1245 return -EINVAL; 1256 1246 1257 1247 /* Check target in extent form to max in temp */ 1258 - if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1259 - XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > 1260 - XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 1248 + if (ifp->if_format == XFS_DINODE_FMT_EXTENTS && 1249 + ifp->if_nextents > XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 1261 1250 return -EINVAL; 1262 1251 1263 1252 /* ··· 1267 1260 * (a common defrag case) which will occur when the temp inode is in 1268 1261 * extent format... 1269 1262 */ 1270 - if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 1263 + if (tifp->if_format == XFS_DINODE_FMT_BTREE) { 1271 1264 if (XFS_IFORK_Q(ip) && 1272 - XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) 1265 + XFS_BMAP_BMDR_SPACE(tifp->if_broot) > XFS_IFORK_BOFF(ip)) 1273 1266 return -EINVAL; 1274 - if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= 1275 - XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 1267 + if (tifp->if_nextents <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 1276 1268 return -EINVAL; 1277 1269 } 1278 1270 1279 1271 /* Reciprocal target->temp btree format checks */ 1280 - if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 1272 + if (ifp->if_format == XFS_DINODE_FMT_BTREE) { 1281 1273 if (XFS_IFORK_Q(tip) && 1282 1274 XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) 1283 1275 return -EINVAL; 1284 - if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= 1285 - XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 1276 + if (ifp->if_nextents <= XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 1286 1277 return -EINVAL; 1287 1278 } 1288 1279 ··· 1432 1427 /* 1433 1428 * Count the number of extended attribute blocks 1434 1429 */ 1435 - if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && 1436 - (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 1430 + if (XFS_IFORK_Q(ip) && ip->i_afp->if_nextents > 0 && 1431 + ip->i_afp->if_format != XFS_DINODE_FMT_LOCAL) { 1437 1432 error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk, 1438 1433 &aforkblks); 1439 1434 if (error) 1440 1435 return error; 1441 1436 } 1442 - if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && 1443 - (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 1437 + if (XFS_IFORK_Q(tip) && tip->i_afp->if_nextents > 0 && 1438 + tip->i_afp->if_format != XFS_DINODE_FMT_LOCAL) { 1444 1439 error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk, 1445 1440 &taforkblks); 1446 1441 if (error) ··· 1455 1450 * bmbt scan as the last step. 1456 1451 */ 1457 1452 if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { 1458 - if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) 1453 + if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) 1459 1454 (*target_log_flags) |= XFS_ILOG_DOWNER; 1460 - if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) 1455 + if (tip->i_df.if_format == XFS_DINODE_FMT_BTREE) 1461 1456 (*src_log_flags) |= XFS_ILOG_DOWNER; 1462 1457 } 1463 1458 ··· 1473 1468 ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; 1474 1469 tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; 1475 1470 1476 - swap(ip->i_d.di_nextents, tip->i_d.di_nextents); 1477 - swap(ip->i_d.di_format, tip->i_d.di_format); 1478 - 1479 1471 /* 1480 1472 * The extents in the source inode could still contain speculative 1481 1473 * preallocation beyond EOF (e.g. the file is open but not modified ··· 1486 1484 tip->i_delayed_blks = ip->i_delayed_blks; 1487 1485 ip->i_delayed_blks = 0; 1488 1486 1489 - switch (ip->i_d.di_format) { 1487 + switch (ip->i_df.if_format) { 1490 1488 case XFS_DINODE_FMT_EXTENTS: 1491 1489 (*src_log_flags) |= XFS_ILOG_DEXT; 1492 1490 break; ··· 1497 1495 break; 1498 1496 } 1499 1497 1500 - switch (tip->i_d.di_format) { 1498 + switch (tip->i_df.if_format) { 1501 1499 case XFS_DINODE_FMT_EXTENTS: 1502 1500 (*target_log_flags) |= XFS_ILOG_DEXT; 1503 1501 break; ··· 1608 1606 if (xfs_inode_has_cow_data(tip)) { 1609 1607 error = xfs_reflink_cancel_cow_range(tip, 0, NULLFILEOFF, true); 1610 1608 if (error) 1611 - return error; 1609 + goto out_unlock; 1612 1610 } 1613 1611 1614 1612 /* ··· 1617 1615 * performed with log redo items! 1618 1616 */ 1619 1617 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { 1620 - int w = XFS_DATA_FORK; 1621 - uint32_t ipnext = XFS_IFORK_NEXTENTS(ip, w); 1622 - uint32_t tipnext = XFS_IFORK_NEXTENTS(tip, w); 1618 + int w = XFS_DATA_FORK; 1619 + uint32_t ipnext = ip->i_df.if_nextents; 1620 + uint32_t tipnext = tip->i_df.if_nextents; 1623 1621 1624 1622 /* 1625 1623 * Conceptually this shouldn't affect the shape of either bmbt, ··· 1719 1717 1720 1718 /* Swap the cow forks. */ 1721 1719 if (xfs_sb_version_hasreflink(&mp->m_sb)) { 1722 - ASSERT(ip->i_cformat == XFS_DINODE_FMT_EXTENTS); 1723 - ASSERT(tip->i_cformat == XFS_DINODE_FMT_EXTENTS); 1720 + ASSERT(!ip->i_cowfp || 1721 + ip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS); 1722 + ASSERT(!tip->i_cowfp || 1723 + tip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS); 1724 1724 1725 - swap(ip->i_cnextents, tip->i_cnextents); 1726 1725 swap(ip->i_cowfp, tip->i_cowfp); 1727 1726 1728 1727 if (ip->i_cowfp && ip->i_cowfp->if_bytes)

+55 -15

fs/xfs/xfs_buf.c

··· 1197 1197 bp->b_ops->verify_read(bp); 1198 1198 } 1199 1199 1200 - if (!bp->b_error) 1200 + if (!bp->b_error) { 1201 + bp->b_flags &= ~XBF_WRITE_FAIL; 1201 1202 bp->b_flags |= XBF_DONE; 1203 + } 1202 1204 1203 1205 if (bp->b_iodone) 1204 1206 (*(bp->b_iodone))(bp); ··· 1244 1242 struct xfs_buf *bp, 1245 1243 xfs_failaddr_t func) 1246 1244 { 1247 - xfs_alert_ratelimited(bp->b_mount, 1248 - "metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d", 1249 - func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, 1250 - -bp->b_error); 1245 + xfs_buf_alert_ratelimited(bp, "XFS: metadata IO error", 1246 + "metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d", 1247 + func, (uint64_t)XFS_BUF_ADDR(bp), 1248 + bp->b_length, -bp->b_error); 1249 + } 1250 + 1251 + /* 1252 + * To simulate an I/O failure, the buffer must be locked and held with at least 1253 + * three references. The LRU reference is dropped by the stale call. The buf 1254 + * item reference is dropped via ioend processing. The third reference is owned 1255 + * by the caller and is dropped on I/O completion if the buffer is XBF_ASYNC. 1256 + */ 1257 + void 1258 + xfs_buf_ioend_fail( 1259 + struct xfs_buf *bp) 1260 + { 1261 + bp->b_flags &= ~XBF_DONE; 1262 + xfs_buf_stale(bp); 1263 + xfs_buf_ioerror(bp, -EIO); 1264 + xfs_buf_ioend(bp); 1251 1265 } 1252 1266 1253 1267 int ··· 1276 1258 1277 1259 bp->b_flags |= XBF_WRITE; 1278 1260 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | 1279 - XBF_WRITE_FAIL | XBF_DONE); 1261 + XBF_DONE); 1280 1262 1281 1263 error = xfs_buf_submit(bp); 1282 1264 if (error) ··· 1289 1271 struct bio *bio) 1290 1272 { 1291 1273 struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private; 1274 + 1275 + if (!bio->bi_status && 1276 + (bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) && 1277 + XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR)) 1278 + bio->bi_status = BLK_STS_IOERR; 1292 1279 1293 1280 /* 1294 1281 * don't overwrite existing errors - otherwise we can lose errors on ··· 1503 1480 1504 1481 /* on shutdown we stale and complete the buffer immediately */ 1505 1482 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { 1506 - xfs_buf_ioerror(bp, -EIO); 1507 - bp->b_flags &= ~XBF_DONE; 1508 - xfs_buf_stale(bp); 1509 - xfs_buf_ioend(bp); 1483 + xfs_buf_ioend_fail(bp); 1510 1484 return -EIO; 1511 1485 } 1512 1486 ··· 1662 1642 struct xfs_buftarg *btp) 1663 1643 { 1664 1644 LIST_HEAD(dispose); 1665 - int loop = 0; 1645 + int loop = 0; 1646 + bool write_fail = false; 1666 1647 1667 1648 /* 1668 1649 * First wait on the buftarg I/O count for all in-flight buffers to be ··· 1691 1670 bp = list_first_entry(&dispose, struct xfs_buf, b_lru); 1692 1671 list_del_init(&bp->b_lru); 1693 1672 if (bp->b_flags & XBF_WRITE_FAIL) { 1694 - xfs_alert(btp->bt_mount, 1673 + write_fail = true; 1674 + xfs_buf_alert_ratelimited(bp, 1675 + "XFS: Corruption Alert", 1695 1676 "Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!", 1696 1677 (long long)bp->b_bn); 1697 - xfs_alert(btp->bt_mount, 1698 - "Please run xfs_repair to determine the extent of the problem."); 1699 1678 } 1700 1679 xfs_buf_rele(bp); 1701 1680 } 1702 1681 if (loop++ != 0) 1703 1682 delay(100); 1683 + } 1684 + 1685 + /* 1686 + * If one or more failed buffers were freed, that means dirty metadata 1687 + * was thrown away. This should only ever happen after I/O completion 1688 + * handling has elevated I/O error(s) to permanent failures and shuts 1689 + * down the fs. 1690 + */ 1691 + if (write_fail) { 1692 + ASSERT(XFS_FORCED_SHUTDOWN(btp->bt_mount)); 1693 + xfs_alert(btp->bt_mount, 1694 + "Please run xfs_repair to determine the extent of the problem."); 1704 1695 } 1705 1696 } 1706 1697 ··· 1845 1812 btp->bt_dev = bdev->bd_dev; 1846 1813 btp->bt_bdev = bdev; 1847 1814 btp->bt_daxdev = dax_dev; 1815 + 1816 + /* 1817 + * Buffer IO error rate limiting. Limit it to no more than 10 messages 1818 + * per 30 seconds so as to not spam logs too much on repeated errors. 1819 + */ 1820 + ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ, 1821 + DEFAULT_RATELIMIT_BURST); 1848 1822 1849 1823 if (xfs_setsize_buftarg_early(btp, bdev)) 1850 1824 goto error_free; ··· 2023 1983 * synchronously. Otherwise, drop the buffer from the delwri 2024 1984 * queue and submit async. 2025 1985 */ 2026 - bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_WRITE_FAIL); 1986 + bp->b_flags &= ~_XBF_DELWRI_Q; 2027 1987 bp->b_flags |= XBF_WRITE; 2028 1988 if (wait_list) { 2029 1989 bp->b_flags &= ~XBF_ASYNC;

+2

fs/xfs/xfs_buf.h

··· 91 91 struct list_lru bt_lru; 92 92 93 93 struct percpu_counter bt_io_count; 94 + struct ratelimit_state bt_ioerror_rl; 94 95 } xfs_buftarg_t; 95 96 96 97 struct xfs_buf; ··· 264 263 xfs_failaddr_t failaddr); 265 264 #define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address) 266 265 extern void xfs_buf_ioerror_alert(struct xfs_buf *bp, xfs_failaddr_t fa); 266 + void xfs_buf_ioend_fail(struct xfs_buf *); 267 267 268 268 extern int __xfs_buf_submit(struct xfs_buf *bp, bool); 269 269 static inline int xfs_buf_submit(struct xfs_buf *bp)

+19 -87

fs/xfs/xfs_buf_item.c

··· 410 410 { 411 411 struct xfs_buf_log_item *bip = BUF_ITEM(lip); 412 412 xfs_buf_t *bp = bip->bli_buf; 413 - struct xfs_ail *ailp = lip->li_ailp; 414 413 int stale = bip->bli_flags & XFS_BLI_STALE; 415 414 int freed; 416 415 ··· 451 452 } 452 453 453 454 /* 454 - * If we get called here because of an IO error, we may 455 - * or may not have the item on the AIL. xfs_trans_ail_delete() 456 - * will take care of that situation. 457 - * xfs_trans_ail_delete() drops the AIL lock. 455 + * If we get called here because of an IO error, we may or may 456 + * not have the item on the AIL. xfs_trans_ail_delete() will 457 + * take care of that situation. xfs_trans_ail_delete() drops 458 + * the AIL lock. 458 459 */ 459 460 if (bip->bli_flags & XFS_BLI_STALE_INODE) { 460 461 xfs_buf_do_callbacks(bp); ··· 462 463 list_del_init(&bp->b_li_list); 463 464 bp->b_iodone = NULL; 464 465 } else { 465 - spin_lock(&ailp->ail_lock); 466 - xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR); 466 + xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR); 467 467 xfs_buf_item_relse(bp); 468 468 ASSERT(bp->b_log_item == NULL); 469 469 } 470 470 xfs_buf_relse(bp); 471 471 } else if (freed && remove) { 472 472 /* 473 - * There are currently two references to the buffer - the active 474 - * LRU reference and the buf log item. What we are about to do 475 - * here - simulate a failed IO completion - requires 3 476 - * references. 477 - * 478 - * The LRU reference is removed by the xfs_buf_stale() call. The 479 - * buf item reference is removed by the xfs_buf_iodone() 480 - * callback that is run by xfs_buf_do_callbacks() during ioend 481 - * processing (via the bp->b_iodone callback), and then finally 482 - * the ioend processing will drop the IO reference if the buffer 483 - * is marked XBF_ASYNC. 484 - * 485 - * Hence we need to take an additional reference here so that IO 486 - * completion processing doesn't free the buffer prematurely. 473 + * The buffer must be locked and held by the caller to simulate 474 + * an async I/O failure. 487 475 */ 488 476 xfs_buf_lock(bp); 489 477 xfs_buf_hold(bp); 490 478 bp->b_flags |= XBF_ASYNC; 491 - xfs_buf_ioerror(bp, -EIO); 492 - bp->b_flags &= ~XBF_DONE; 493 - xfs_buf_stale(bp); 494 - xfs_buf_ioend(bp); 479 + xfs_buf_ioend_fail(bp); 495 480 } 496 481 } 497 - 498 - /* 499 - * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30 500 - * seconds so as to not spam logs too much on repeated detection of the same 501 - * buffer being bad.. 502 - */ 503 - 504 - static DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10); 505 482 506 483 STATIC uint 507 484 xfs_buf_item_push( ··· 508 533 trace_xfs_buf_item_push(bip); 509 534 510 535 /* has a previous flush failed due to IO errors? */ 511 - if ((bp->b_flags & XBF_WRITE_FAIL) && 512 - ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) { 513 - xfs_warn(bp->b_mount, 514 - "Failing async write on buffer block 0x%llx. Retrying async write.", 515 - (long long)bp->b_bn); 536 + if (bp->b_flags & XBF_WRITE_FAIL) { 537 + xfs_buf_alert_ratelimited(bp, "XFS: Failing async write", 538 + "Failing async write on buffer block 0x%llx. Retrying async write.", 539 + (long long)bp->b_bn); 516 540 } 517 541 518 542 if (!xfs_buf_delwri_queue(bp, buffer_list)) ··· 558 584 * state. 559 585 */ 560 586 if (aborted) 561 - xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR); 587 + xfs_trans_ail_delete(lip, 0); 562 588 xfs_buf_item_relse(bip->bli_buf); 563 589 return true; 564 590 } ··· 1203 1229 struct xfs_buf *bp, 1204 1230 struct xfs_log_item *lip) 1205 1231 { 1206 - struct xfs_ail *ailp = lip->li_ailp; 1207 - 1208 1232 ASSERT(BUF_ITEM(lip)->bli_buf == bp); 1209 1233 1210 1234 xfs_buf_rele(bp); 1211 1235 1212 1236 /* 1213 - * If we are forcibly shutting down, this may well be 1214 - * off the AIL already. That's because we simulate the 1215 - * log-committed callbacks to unpin these buffers. Or we may never 1216 - * have put this item on AIL because of the transaction was 1217 - * aborted forcibly. xfs_trans_ail_delete() takes care of these. 1237 + * If we are forcibly shutting down, this may well be off the AIL 1238 + * already. That's because we simulate the log-committed callbacks to 1239 + * unpin these buffers. Or we may never have put this item on AIL 1240 + * because of the transaction was aborted forcibly. 1241 + * xfs_trans_ail_delete() takes care of these. 1218 1242 * 1219 1243 * Either way, AIL is useless if we're forcing a shutdown. 1220 1244 */ 1221 - spin_lock(&ailp->ail_lock); 1222 - xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); 1245 + xfs_trans_ail_delete(lip, SHUTDOWN_CORRUPT_INCORE); 1223 1246 xfs_buf_item_free(BUF_ITEM(lip)); 1224 - } 1225 - 1226 - /* 1227 - * Requeue a failed buffer for writeback. 1228 - * 1229 - * We clear the log item failed state here as well, but we have to be careful 1230 - * about reference counts because the only active reference counts on the buffer 1231 - * may be the failed log items. Hence if we clear the log item failed state 1232 - * before queuing the buffer for IO we can release all active references to 1233 - * the buffer and free it, leading to use after free problems in 1234 - * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which 1235 - * order we process them in - the buffer is locked, and we own the buffer list 1236 - * so nothing on them is going to change while we are performing this action. 1237 - * 1238 - * Hence we can safely queue the buffer for IO before we clear the failed log 1239 - * item state, therefore always having an active reference to the buffer and 1240 - * avoiding the transient zero-reference state that leads to use-after-free. 1241 - * 1242 - * Return true if the buffer was added to the buffer list, false if it was 1243 - * already on the buffer list. 1244 - */ 1245 - bool 1246 - xfs_buf_resubmit_failed_buffers( 1247 - struct xfs_buf *bp, 1248 - struct list_head *buffer_list) 1249 - { 1250 - struct xfs_log_item *lip; 1251 - bool ret; 1252 - 1253 - ret = xfs_buf_delwri_queue(bp, buffer_list); 1254 - 1255 - /* 1256 - * XFS_LI_FAILED set/clear is protected by ail_lock, caller of this 1257 - * function already have it acquired 1258 - */ 1259 - list_for_each_entry(lip, &bp->b_li_list, li_bio_list) 1260 - xfs_clear_li_failed(lip); 1261 - 1262 - return ret; 1263 1247 }

-2

fs/xfs/xfs_buf_item.h

··· 59 59 struct xfs_log_item *); 60 60 void xfs_buf_iodone_callbacks(struct xfs_buf *); 61 61 void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); 62 - bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *, 63 - struct list_head *); 64 62 bool xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec); 65 63 66 64 extern kmem_zone_t *xfs_buf_item_zone;

+984

fs/xfs/xfs_buf_item_recover.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 + * All Rights Reserved. 5 + */ 6 + #include "xfs.h" 7 + #include "xfs_fs.h" 8 + #include "xfs_shared.h" 9 + #include "xfs_format.h" 10 + #include "xfs_log_format.h" 11 + #include "xfs_trans_resv.h" 12 + #include "xfs_bit.h" 13 + #include "xfs_mount.h" 14 + #include "xfs_trans.h" 15 + #include "xfs_buf_item.h" 16 + #include "xfs_trans_priv.h" 17 + #include "xfs_trace.h" 18 + #include "xfs_log.h" 19 + #include "xfs_log_priv.h" 20 + #include "xfs_log_recover.h" 21 + #include "xfs_error.h" 22 + #include "xfs_inode.h" 23 + #include "xfs_dir2.h" 24 + #include "xfs_quota.h" 25 + 26 + /* 27 + * This structure is used during recovery to record the buf log items which 28 + * have been canceled and should not be replayed. 29 + */ 30 + struct xfs_buf_cancel { 31 + xfs_daddr_t bc_blkno; 32 + uint bc_len; 33 + int bc_refcount; 34 + struct list_head bc_list; 35 + }; 36 + 37 + static struct xfs_buf_cancel * 38 + xlog_find_buffer_cancelled( 39 + struct xlog *log, 40 + xfs_daddr_t blkno, 41 + uint len) 42 + { 43 + struct list_head *bucket; 44 + struct xfs_buf_cancel *bcp; 45 + 46 + if (!log->l_buf_cancel_table) 47 + return NULL; 48 + 49 + bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno); 50 + list_for_each_entry(bcp, bucket, bc_list) { 51 + if (bcp->bc_blkno == blkno && bcp->bc_len == len) 52 + return bcp; 53 + } 54 + 55 + return NULL; 56 + } 57 + 58 + static bool 59 + xlog_add_buffer_cancelled( 60 + struct xlog *log, 61 + xfs_daddr_t blkno, 62 + uint len) 63 + { 64 + struct xfs_buf_cancel *bcp; 65 + 66 + /* 67 + * If we find an existing cancel record, this indicates that the buffer 68 + * was cancelled multiple times. To ensure that during pass 2 we keep 69 + * the record in the table until we reach its last occurrence in the 70 + * log, a reference count is kept to tell how many times we expect to 71 + * see this record during the second pass. 72 + */ 73 + bcp = xlog_find_buffer_cancelled(log, blkno, len); 74 + if (bcp) { 75 + bcp->bc_refcount++; 76 + return false; 77 + } 78 + 79 + bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), 0); 80 + bcp->bc_blkno = blkno; 81 + bcp->bc_len = len; 82 + bcp->bc_refcount = 1; 83 + list_add_tail(&bcp->bc_list, XLOG_BUF_CANCEL_BUCKET(log, blkno)); 84 + return true; 85 + } 86 + 87 + /* 88 + * Check if there is and entry for blkno, len in the buffer cancel record table. 89 + */ 90 + bool 91 + xlog_is_buffer_cancelled( 92 + struct xlog *log, 93 + xfs_daddr_t blkno, 94 + uint len) 95 + { 96 + return xlog_find_buffer_cancelled(log, blkno, len) != NULL; 97 + } 98 + 99 + /* 100 + * Check if there is and entry for blkno, len in the buffer cancel record table, 101 + * and decremented the reference count on it if there is one. 102 + * 103 + * Remove the cancel record once the refcount hits zero, so that if the same 104 + * buffer is re-used again after its last cancellation we actually replay the 105 + * changes made at that point. 106 + */ 107 + static bool 108 + xlog_put_buffer_cancelled( 109 + struct xlog *log, 110 + xfs_daddr_t blkno, 111 + uint len) 112 + { 113 + struct xfs_buf_cancel *bcp; 114 + 115 + bcp = xlog_find_buffer_cancelled(log, blkno, len); 116 + if (!bcp) { 117 + ASSERT(0); 118 + return false; 119 + } 120 + 121 + if (--bcp->bc_refcount == 0) { 122 + list_del(&bcp->bc_list); 123 + kmem_free(bcp); 124 + } 125 + return true; 126 + } 127 + 128 + /* log buffer item recovery */ 129 + 130 + /* 131 + * Sort buffer items for log recovery. Most buffer items should end up on the 132 + * buffer list and are recovered first, with the following exceptions: 133 + * 134 + * 1. XFS_BLF_CANCEL buffers must be processed last because some log items 135 + * might depend on the incor ecancellation record, and replaying a cancelled 136 + * buffer item can remove the incore record. 137 + * 138 + * 2. XFS_BLF_INODE_BUF buffers are handled after most regular items so that 139 + * we replay di_next_unlinked only after flushing the inode 'free' state 140 + * to the inode buffer. 141 + * 142 + * See xlog_recover_reorder_trans for more details. 143 + */ 144 + STATIC enum xlog_recover_reorder 145 + xlog_recover_buf_reorder( 146 + struct xlog_recover_item *item) 147 + { 148 + struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr; 149 + 150 + if (buf_f->blf_flags & XFS_BLF_CANCEL) 151 + return XLOG_REORDER_CANCEL_LIST; 152 + if (buf_f->blf_flags & XFS_BLF_INODE_BUF) 153 + return XLOG_REORDER_INODE_BUFFER_LIST; 154 + return XLOG_REORDER_BUFFER_LIST; 155 + } 156 + 157 + STATIC void 158 + xlog_recover_buf_ra_pass2( 159 + struct xlog *log, 160 + struct xlog_recover_item *item) 161 + { 162 + struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr; 163 + 164 + xlog_buf_readahead(log, buf_f->blf_blkno, buf_f->blf_len, NULL); 165 + } 166 + 167 + /* 168 + * Build up the table of buf cancel records so that we don't replay cancelled 169 + * data in the second pass. 170 + */ 171 + static int 172 + xlog_recover_buf_commit_pass1( 173 + struct xlog *log, 174 + struct xlog_recover_item *item) 175 + { 176 + struct xfs_buf_log_format *bf = item->ri_buf[0].i_addr; 177 + 178 + if (!xfs_buf_log_check_iovec(&item->ri_buf[0])) { 179 + xfs_err(log->l_mp, "bad buffer log item size (%d)", 180 + item->ri_buf[0].i_len); 181 + return -EFSCORRUPTED; 182 + } 183 + 184 + if (!(bf->blf_flags & XFS_BLF_CANCEL)) 185 + trace_xfs_log_recover_buf_not_cancel(log, bf); 186 + else if (xlog_add_buffer_cancelled(log, bf->blf_blkno, bf->blf_len)) 187 + trace_xfs_log_recover_buf_cancel_add(log, bf); 188 + else 189 + trace_xfs_log_recover_buf_cancel_ref_inc(log, bf); 190 + return 0; 191 + } 192 + 193 + /* 194 + * Validate the recovered buffer is of the correct type and attach the 195 + * appropriate buffer operations to them for writeback. Magic numbers are in a 196 + * few places: 197 + * the first 16 bits of the buffer (inode buffer, dquot buffer), 198 + * the first 32 bits of the buffer (most blocks), 199 + * inside a struct xfs_da_blkinfo at the start of the buffer. 200 + */ 201 + static void 202 + xlog_recover_validate_buf_type( 203 + struct xfs_mount *mp, 204 + struct xfs_buf *bp, 205 + struct xfs_buf_log_format *buf_f, 206 + xfs_lsn_t current_lsn) 207 + { 208 + struct xfs_da_blkinfo *info = bp->b_addr; 209 + uint32_t magic32; 210 + uint16_t magic16; 211 + uint16_t magicda; 212 + char *warnmsg = NULL; 213 + 214 + /* 215 + * We can only do post recovery validation on items on CRC enabled 216 + * fielsystems as we need to know when the buffer was written to be able 217 + * to determine if we should have replayed the item. If we replay old 218 + * metadata over a newer buffer, then it will enter a temporarily 219 + * inconsistent state resulting in verification failures. Hence for now 220 + * just avoid the verification stage for non-crc filesystems 221 + */ 222 + if (!xfs_sb_version_hascrc(&mp->m_sb)) 223 + return; 224 + 225 + magic32 = be32_to_cpu(*(__be32 *)bp->b_addr); 226 + magic16 = be16_to_cpu(*(__be16*)bp->b_addr); 227 + magicda = be16_to_cpu(info->magic); 228 + switch (xfs_blft_from_flags(buf_f)) { 229 + case XFS_BLFT_BTREE_BUF: 230 + switch (magic32) { 231 + case XFS_ABTB_CRC_MAGIC: 232 + case XFS_ABTB_MAGIC: 233 + bp->b_ops = &xfs_bnobt_buf_ops; 234 + break; 235 + case XFS_ABTC_CRC_MAGIC: 236 + case XFS_ABTC_MAGIC: 237 + bp->b_ops = &xfs_cntbt_buf_ops; 238 + break; 239 + case XFS_IBT_CRC_MAGIC: 240 + case XFS_IBT_MAGIC: 241 + bp->b_ops = &xfs_inobt_buf_ops; 242 + break; 243 + case XFS_FIBT_CRC_MAGIC: 244 + case XFS_FIBT_MAGIC: 245 + bp->b_ops = &xfs_finobt_buf_ops; 246 + break; 247 + case XFS_BMAP_CRC_MAGIC: 248 + case XFS_BMAP_MAGIC: 249 + bp->b_ops = &xfs_bmbt_buf_ops; 250 + break; 251 + case XFS_RMAP_CRC_MAGIC: 252 + bp->b_ops = &xfs_rmapbt_buf_ops; 253 + break; 254 + case XFS_REFC_CRC_MAGIC: 255 + bp->b_ops = &xfs_refcountbt_buf_ops; 256 + break; 257 + default: 258 + warnmsg = "Bad btree block magic!"; 259 + break; 260 + } 261 + break; 262 + case XFS_BLFT_AGF_BUF: 263 + if (magic32 != XFS_AGF_MAGIC) { 264 + warnmsg = "Bad AGF block magic!"; 265 + break; 266 + } 267 + bp->b_ops = &xfs_agf_buf_ops; 268 + break; 269 + case XFS_BLFT_AGFL_BUF: 270 + if (magic32 != XFS_AGFL_MAGIC) { 271 + warnmsg = "Bad AGFL block magic!"; 272 + break; 273 + } 274 + bp->b_ops = &xfs_agfl_buf_ops; 275 + break; 276 + case XFS_BLFT_AGI_BUF: 277 + if (magic32 != XFS_AGI_MAGIC) { 278 + warnmsg = "Bad AGI block magic!"; 279 + break; 280 + } 281 + bp->b_ops = &xfs_agi_buf_ops; 282 + break; 283 + case XFS_BLFT_UDQUOT_BUF: 284 + case XFS_BLFT_PDQUOT_BUF: 285 + case XFS_BLFT_GDQUOT_BUF: 286 + #ifdef CONFIG_XFS_QUOTA 287 + if (magic16 != XFS_DQUOT_MAGIC) { 288 + warnmsg = "Bad DQUOT block magic!"; 289 + break; 290 + } 291 + bp->b_ops = &xfs_dquot_buf_ops; 292 + #else 293 + xfs_alert(mp, 294 + "Trying to recover dquots without QUOTA support built in!"); 295 + ASSERT(0); 296 + #endif 297 + break; 298 + case XFS_BLFT_DINO_BUF: 299 + if (magic16 != XFS_DINODE_MAGIC) { 300 + warnmsg = "Bad INODE block magic!"; 301 + break; 302 + } 303 + bp->b_ops = &xfs_inode_buf_ops; 304 + break; 305 + case XFS_BLFT_SYMLINK_BUF: 306 + if (magic32 != XFS_SYMLINK_MAGIC) { 307 + warnmsg = "Bad symlink block magic!"; 308 + break; 309 + } 310 + bp->b_ops = &xfs_symlink_buf_ops; 311 + break; 312 + case XFS_BLFT_DIR_BLOCK_BUF: 313 + if (magic32 != XFS_DIR2_BLOCK_MAGIC && 314 + magic32 != XFS_DIR3_BLOCK_MAGIC) { 315 + warnmsg = "Bad dir block magic!"; 316 + break; 317 + } 318 + bp->b_ops = &xfs_dir3_block_buf_ops; 319 + break; 320 + case XFS_BLFT_DIR_DATA_BUF: 321 + if (magic32 != XFS_DIR2_DATA_MAGIC && 322 + magic32 != XFS_DIR3_DATA_MAGIC) { 323 + warnmsg = "Bad dir data magic!"; 324 + break; 325 + } 326 + bp->b_ops = &xfs_dir3_data_buf_ops; 327 + break; 328 + case XFS_BLFT_DIR_FREE_BUF: 329 + if (magic32 != XFS_DIR2_FREE_MAGIC && 330 + magic32 != XFS_DIR3_FREE_MAGIC) { 331 + warnmsg = "Bad dir3 free magic!"; 332 + break; 333 + } 334 + bp->b_ops = &xfs_dir3_free_buf_ops; 335 + break; 336 + case XFS_BLFT_DIR_LEAF1_BUF: 337 + if (magicda != XFS_DIR2_LEAF1_MAGIC && 338 + magicda != XFS_DIR3_LEAF1_MAGIC) { 339 + warnmsg = "Bad dir leaf1 magic!"; 340 + break; 341 + } 342 + bp->b_ops = &xfs_dir3_leaf1_buf_ops; 343 + break; 344 + case XFS_BLFT_DIR_LEAFN_BUF: 345 + if (magicda != XFS_DIR2_LEAFN_MAGIC && 346 + magicda != XFS_DIR3_LEAFN_MAGIC) { 347 + warnmsg = "Bad dir leafn magic!"; 348 + break; 349 + } 350 + bp->b_ops = &xfs_dir3_leafn_buf_ops; 351 + break; 352 + case XFS_BLFT_DA_NODE_BUF: 353 + if (magicda != XFS_DA_NODE_MAGIC && 354 + magicda != XFS_DA3_NODE_MAGIC) { 355 + warnmsg = "Bad da node magic!"; 356 + break; 357 + } 358 + bp->b_ops = &xfs_da3_node_buf_ops; 359 + break; 360 + case XFS_BLFT_ATTR_LEAF_BUF: 361 + if (magicda != XFS_ATTR_LEAF_MAGIC && 362 + magicda != XFS_ATTR3_LEAF_MAGIC) { 363 + warnmsg = "Bad attr leaf magic!"; 364 + break; 365 + } 366 + bp->b_ops = &xfs_attr3_leaf_buf_ops; 367 + break; 368 + case XFS_BLFT_ATTR_RMT_BUF: 369 + if (magic32 != XFS_ATTR3_RMT_MAGIC) { 370 + warnmsg = "Bad attr remote magic!"; 371 + break; 372 + } 373 + bp->b_ops = &xfs_attr3_rmt_buf_ops; 374 + break; 375 + case XFS_BLFT_SB_BUF: 376 + if (magic32 != XFS_SB_MAGIC) { 377 + warnmsg = "Bad SB block magic!"; 378 + break; 379 + } 380 + bp->b_ops = &xfs_sb_buf_ops; 381 + break; 382 + #ifdef CONFIG_XFS_RT 383 + case XFS_BLFT_RTBITMAP_BUF: 384 + case XFS_BLFT_RTSUMMARY_BUF: 385 + /* no magic numbers for verification of RT buffers */ 386 + bp->b_ops = &xfs_rtbuf_ops; 387 + break; 388 + #endif /* CONFIG_XFS_RT */ 389 + default: 390 + xfs_warn(mp, "Unknown buffer type %d!", 391 + xfs_blft_from_flags(buf_f)); 392 + break; 393 + } 394 + 395 + /* 396 + * Nothing else to do in the case of a NULL current LSN as this means 397 + * the buffer is more recent than the change in the log and will be 398 + * skipped. 399 + */ 400 + if (current_lsn == NULLCOMMITLSN) 401 + return; 402 + 403 + if (warnmsg) { 404 + xfs_warn(mp, warnmsg); 405 + ASSERT(0); 406 + } 407 + 408 + /* 409 + * We must update the metadata LSN of the buffer as it is written out to 410 + * ensure that older transactions never replay over this one and corrupt 411 + * the buffer. This can occur if log recovery is interrupted at some 412 + * point after the current transaction completes, at which point a 413 + * subsequent mount starts recovery from the beginning. 414 + * 415 + * Write verifiers update the metadata LSN from log items attached to 416 + * the buffer. Therefore, initialize a bli purely to carry the LSN to 417 + * the verifier. We'll clean it up in our ->iodone() callback. 418 + */ 419 + if (bp->b_ops) { 420 + struct xfs_buf_log_item *bip; 421 + 422 + ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone); 423 + bp->b_iodone = xlog_recover_iodone; 424 + xfs_buf_item_init(bp, mp); 425 + bip = bp->b_log_item; 426 + bip->bli_item.li_lsn = current_lsn; 427 + } 428 + } 429 + 430 + /* 431 + * Perform a 'normal' buffer recovery. Each logged region of the 432 + * buffer should be copied over the corresponding region in the 433 + * given buffer. The bitmap in the buf log format structure indicates 434 + * where to place the logged data. 435 + */ 436 + STATIC void 437 + xlog_recover_do_reg_buffer( 438 + struct xfs_mount *mp, 439 + struct xlog_recover_item *item, 440 + struct xfs_buf *bp, 441 + struct xfs_buf_log_format *buf_f, 442 + xfs_lsn_t current_lsn) 443 + { 444 + int i; 445 + int bit; 446 + int nbits; 447 + xfs_failaddr_t fa; 448 + const size_t size_disk_dquot = sizeof(struct xfs_disk_dquot); 449 + 450 + trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); 451 + 452 + bit = 0; 453 + i = 1; /* 0 is the buf format structure */ 454 + while (1) { 455 + bit = xfs_next_bit(buf_f->blf_data_map, 456 + buf_f->blf_map_size, bit); 457 + if (bit == -1) 458 + break; 459 + nbits = xfs_contig_bits(buf_f->blf_data_map, 460 + buf_f->blf_map_size, bit); 461 + ASSERT(nbits > 0); 462 + ASSERT(item->ri_buf[i].i_addr != NULL); 463 + ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); 464 + ASSERT(BBTOB(bp->b_length) >= 465 + ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); 466 + 467 + /* 468 + * The dirty regions logged in the buffer, even though 469 + * contiguous, may span multiple chunks. This is because the 470 + * dirty region may span a physical page boundary in a buffer 471 + * and hence be split into two separate vectors for writing into 472 + * the log. Hence we need to trim nbits back to the length of 473 + * the current region being copied out of the log. 474 + */ 475 + if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT)) 476 + nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT; 477 + 478 + /* 479 + * Do a sanity check if this is a dquot buffer. Just checking 480 + * the first dquot in the buffer should do. XXXThis is 481 + * probably a good thing to do for other buf types also. 482 + */ 483 + fa = NULL; 484 + if (buf_f->blf_flags & 485 + (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { 486 + if (item->ri_buf[i].i_addr == NULL) { 487 + xfs_alert(mp, 488 + "XFS: NULL dquot in %s.", __func__); 489 + goto next; 490 + } 491 + if (item->ri_buf[i].i_len < size_disk_dquot) { 492 + xfs_alert(mp, 493 + "XFS: dquot too small (%d) in %s.", 494 + item->ri_buf[i].i_len, __func__); 495 + goto next; 496 + } 497 + fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr, 498 + -1, 0); 499 + if (fa) { 500 + xfs_alert(mp, 501 + "dquot corrupt at %pS trying to replay into block 0x%llx", 502 + fa, bp->b_bn); 503 + goto next; 504 + } 505 + } 506 + 507 + memcpy(xfs_buf_offset(bp, 508 + (uint)bit << XFS_BLF_SHIFT), /* dest */ 509 + item->ri_buf[i].i_addr, /* source */ 510 + nbits<<XFS_BLF_SHIFT); /* length */ 511 + next: 512 + i++; 513 + bit += nbits; 514 + } 515 + 516 + /* Shouldn't be any more regions */ 517 + ASSERT(i == item->ri_total); 518 + 519 + xlog_recover_validate_buf_type(mp, bp, buf_f, current_lsn); 520 + } 521 + 522 + /* 523 + * Perform a dquot buffer recovery. 524 + * Simple algorithm: if we have found a QUOTAOFF log item of the same type 525 + * (ie. USR or GRP), then just toss this buffer away; don't recover it. 526 + * Else, treat it as a regular buffer and do recovery. 527 + * 528 + * Return false if the buffer was tossed and true if we recovered the buffer to 529 + * indicate to the caller if the buffer needs writing. 530 + */ 531 + STATIC bool 532 + xlog_recover_do_dquot_buffer( 533 + struct xfs_mount *mp, 534 + struct xlog *log, 535 + struct xlog_recover_item *item, 536 + struct xfs_buf *bp, 537 + struct xfs_buf_log_format *buf_f) 538 + { 539 + uint type; 540 + 541 + trace_xfs_log_recover_buf_dquot_buf(log, buf_f); 542 + 543 + /* 544 + * Filesystems are required to send in quota flags at mount time. 545 + */ 546 + if (!mp->m_qflags) 547 + return false; 548 + 549 + type = 0; 550 + if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF) 551 + type |= XFS_DQ_USER; 552 + if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF) 553 + type |= XFS_DQ_PROJ; 554 + if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF) 555 + type |= XFS_DQ_GROUP; 556 + /* 557 + * This type of quotas was turned off, so ignore this buffer 558 + */ 559 + if (log->l_quotaoffs_flag & type) 560 + return false; 561 + 562 + xlog_recover_do_reg_buffer(mp, item, bp, buf_f, NULLCOMMITLSN); 563 + return true; 564 + } 565 + 566 + /* 567 + * Perform recovery for a buffer full of inodes. In these buffers, the only 568 + * data which should be recovered is that which corresponds to the 569 + * di_next_unlinked pointers in the on disk inode structures. The rest of the 570 + * data for the inodes is always logged through the inodes themselves rather 571 + * than the inode buffer and is recovered in xlog_recover_inode_pass2(). 572 + * 573 + * The only time when buffers full of inodes are fully recovered is when the 574 + * buffer is full of newly allocated inodes. In this case the buffer will 575 + * not be marked as an inode buffer and so will be sent to 576 + * xlog_recover_do_reg_buffer() below during recovery. 577 + */ 578 + STATIC int 579 + xlog_recover_do_inode_buffer( 580 + struct xfs_mount *mp, 581 + struct xlog_recover_item *item, 582 + struct xfs_buf *bp, 583 + struct xfs_buf_log_format *buf_f) 584 + { 585 + int i; 586 + int item_index = 0; 587 + int bit = 0; 588 + int nbits = 0; 589 + int reg_buf_offset = 0; 590 + int reg_buf_bytes = 0; 591 + int next_unlinked_offset; 592 + int inodes_per_buf; 593 + xfs_agino_t *logged_nextp; 594 + xfs_agino_t *buffer_nextp; 595 + 596 + trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); 597 + 598 + /* 599 + * Post recovery validation only works properly on CRC enabled 600 + * filesystems. 601 + */ 602 + if (xfs_sb_version_hascrc(&mp->m_sb)) 603 + bp->b_ops = &xfs_inode_buf_ops; 604 + 605 + inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog; 606 + for (i = 0; i < inodes_per_buf; i++) { 607 + next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + 608 + offsetof(xfs_dinode_t, di_next_unlinked); 609 + 610 + while (next_unlinked_offset >= 611 + (reg_buf_offset + reg_buf_bytes)) { 612 + /* 613 + * The next di_next_unlinked field is beyond 614 + * the current logged region. Find the next 615 + * logged region that contains or is beyond 616 + * the current di_next_unlinked field. 617 + */ 618 + bit += nbits; 619 + bit = xfs_next_bit(buf_f->blf_data_map, 620 + buf_f->blf_map_size, bit); 621 + 622 + /* 623 + * If there are no more logged regions in the 624 + * buffer, then we're done. 625 + */ 626 + if (bit == -1) 627 + return 0; 628 + 629 + nbits = xfs_contig_bits(buf_f->blf_data_map, 630 + buf_f->blf_map_size, bit); 631 + ASSERT(nbits > 0); 632 + reg_buf_offset = bit << XFS_BLF_SHIFT; 633 + reg_buf_bytes = nbits << XFS_BLF_SHIFT; 634 + item_index++; 635 + } 636 + 637 + /* 638 + * If the current logged region starts after the current 639 + * di_next_unlinked field, then move on to the next 640 + * di_next_unlinked field. 641 + */ 642 + if (next_unlinked_offset < reg_buf_offset) 643 + continue; 644 + 645 + ASSERT(item->ri_buf[item_index].i_addr != NULL); 646 + ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); 647 + ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length)); 648 + 649 + /* 650 + * The current logged region contains a copy of the 651 + * current di_next_unlinked field. Extract its value 652 + * and copy it to the buffer copy. 653 + */ 654 + logged_nextp = item->ri_buf[item_index].i_addr + 655 + next_unlinked_offset - reg_buf_offset; 656 + if (XFS_IS_CORRUPT(mp, *logged_nextp == 0)) { 657 + xfs_alert(mp, 658 + "Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). " 659 + "Trying to replay bad (0) inode di_next_unlinked field.", 660 + item, bp); 661 + return -EFSCORRUPTED; 662 + } 663 + 664 + buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset); 665 + *buffer_nextp = *logged_nextp; 666 + 667 + /* 668 + * If necessary, recalculate the CRC in the on-disk inode. We 669 + * have to leave the inode in a consistent state for whoever 670 + * reads it next.... 671 + */ 672 + xfs_dinode_calc_crc(mp, 673 + xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); 674 + 675 + } 676 + 677 + return 0; 678 + } 679 + 680 + /* 681 + * V5 filesystems know the age of the buffer on disk being recovered. We can 682 + * have newer objects on disk than we are replaying, and so for these cases we 683 + * don't want to replay the current change as that will make the buffer contents 684 + * temporarily invalid on disk. 685 + * 686 + * The magic number might not match the buffer type we are going to recover 687 + * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags. Hence 688 + * extract the LSN of the existing object in the buffer based on it's current 689 + * magic number. If we don't recognise the magic number in the buffer, then 690 + * return a LSN of -1 so that the caller knows it was an unrecognised block and 691 + * so can recover the buffer. 692 + * 693 + * Note: we cannot rely solely on magic number matches to determine that the 694 + * buffer has a valid LSN - we also need to verify that it belongs to this 695 + * filesystem, so we need to extract the object's LSN and compare it to that 696 + * which we read from the superblock. If the UUIDs don't match, then we've got a 697 + * stale metadata block from an old filesystem instance that we need to recover 698 + * over the top of. 699 + */ 700 + static xfs_lsn_t 701 + xlog_recover_get_buf_lsn( 702 + struct xfs_mount *mp, 703 + struct xfs_buf *bp) 704 + { 705 + uint32_t magic32; 706 + uint16_t magic16; 707 + uint16_t magicda; 708 + void *blk = bp->b_addr; 709 + uuid_t *uuid; 710 + xfs_lsn_t lsn = -1; 711 + 712 + /* v4 filesystems always recover immediately */ 713 + if (!xfs_sb_version_hascrc(&mp->m_sb)) 714 + goto recover_immediately; 715 + 716 + magic32 = be32_to_cpu(*(__be32 *)blk); 717 + switch (magic32) { 718 + case XFS_ABTB_CRC_MAGIC: 719 + case XFS_ABTC_CRC_MAGIC: 720 + case XFS_ABTB_MAGIC: 721 + case XFS_ABTC_MAGIC: 722 + case XFS_RMAP_CRC_MAGIC: 723 + case XFS_REFC_CRC_MAGIC: 724 + case XFS_IBT_CRC_MAGIC: 725 + case XFS_IBT_MAGIC: { 726 + struct xfs_btree_block *btb = blk; 727 + 728 + lsn = be64_to_cpu(btb->bb_u.s.bb_lsn); 729 + uuid = &btb->bb_u.s.bb_uuid; 730 + break; 731 + } 732 + case XFS_BMAP_CRC_MAGIC: 733 + case XFS_BMAP_MAGIC: { 734 + struct xfs_btree_block *btb = blk; 735 + 736 + lsn = be64_to_cpu(btb->bb_u.l.bb_lsn); 737 + uuid = &btb->bb_u.l.bb_uuid; 738 + break; 739 + } 740 + case XFS_AGF_MAGIC: 741 + lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn); 742 + uuid = &((struct xfs_agf *)blk)->agf_uuid; 743 + break; 744 + case XFS_AGFL_MAGIC: 745 + lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn); 746 + uuid = &((struct xfs_agfl *)blk)->agfl_uuid; 747 + break; 748 + case XFS_AGI_MAGIC: 749 + lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn); 750 + uuid = &((struct xfs_agi *)blk)->agi_uuid; 751 + break; 752 + case XFS_SYMLINK_MAGIC: 753 + lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn); 754 + uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid; 755 + break; 756 + case XFS_DIR3_BLOCK_MAGIC: 757 + case XFS_DIR3_DATA_MAGIC: 758 + case XFS_DIR3_FREE_MAGIC: 759 + lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn); 760 + uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid; 761 + break; 762 + case XFS_ATTR3_RMT_MAGIC: 763 + /* 764 + * Remote attr blocks are written synchronously, rather than 765 + * being logged. That means they do not contain a valid LSN 766 + * (i.e. transactionally ordered) in them, and hence any time we 767 + * see a buffer to replay over the top of a remote attribute 768 + * block we should simply do so. 769 + */ 770 + goto recover_immediately; 771 + case XFS_SB_MAGIC: 772 + /* 773 + * superblock uuids are magic. We may or may not have a 774 + * sb_meta_uuid on disk, but it will be set in the in-core 775 + * superblock. We set the uuid pointer for verification 776 + * according to the superblock feature mask to ensure we check 777 + * the relevant UUID in the superblock. 778 + */ 779 + lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); 780 + if (xfs_sb_version_hasmetauuid(&mp->m_sb)) 781 + uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid; 782 + else 783 + uuid = &((struct xfs_dsb *)blk)->sb_uuid; 784 + break; 785 + default: 786 + break; 787 + } 788 + 789 + if (lsn != (xfs_lsn_t)-1) { 790 + if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid)) 791 + goto recover_immediately; 792 + return lsn; 793 + } 794 + 795 + magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic); 796 + switch (magicda) { 797 + case XFS_DIR3_LEAF1_MAGIC: 798 + case XFS_DIR3_LEAFN_MAGIC: 799 + case XFS_DA3_NODE_MAGIC: 800 + lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn); 801 + uuid = &((struct xfs_da3_blkinfo *)blk)->uuid; 802 + break; 803 + default: 804 + break; 805 + } 806 + 807 + if (lsn != (xfs_lsn_t)-1) { 808 + if (!uuid_equal(&mp->m_sb.sb_uuid, uuid)) 809 + goto recover_immediately; 810 + return lsn; 811 + } 812 + 813 + /* 814 + * We do individual object checks on dquot and inode buffers as they 815 + * have their own individual LSN records. Also, we could have a stale 816 + * buffer here, so we have to at least recognise these buffer types. 817 + * 818 + * A notd complexity here is inode unlinked list processing - it logs 819 + * the inode directly in the buffer, but we don't know which inodes have 820 + * been modified, and there is no global buffer LSN. Hence we need to 821 + * recover all inode buffer types immediately. This problem will be 822 + * fixed by logical logging of the unlinked list modifications. 823 + */ 824 + magic16 = be16_to_cpu(*(__be16 *)blk); 825 + switch (magic16) { 826 + case XFS_DQUOT_MAGIC: 827 + case XFS_DINODE_MAGIC: 828 + goto recover_immediately; 829 + default: 830 + break; 831 + } 832 + 833 + /* unknown buffer contents, recover immediately */ 834 + 835 + recover_immediately: 836 + return (xfs_lsn_t)-1; 837 + 838 + } 839 + 840 + /* 841 + * This routine replays a modification made to a buffer at runtime. 842 + * There are actually two types of buffer, regular and inode, which 843 + * are handled differently. Inode buffers are handled differently 844 + * in that we only recover a specific set of data from them, namely 845 + * the inode di_next_unlinked fields. This is because all other inode 846 + * data is actually logged via inode records and any data we replay 847 + * here which overlaps that may be stale. 848 + * 849 + * When meta-data buffers are freed at run time we log a buffer item 850 + * with the XFS_BLF_CANCEL bit set to indicate that previous copies 851 + * of the buffer in the log should not be replayed at recovery time. 852 + * This is so that if the blocks covered by the buffer are reused for 853 + * file data before we crash we don't end up replaying old, freed 854 + * meta-data into a user's file. 855 + * 856 + * To handle the cancellation of buffer log items, we make two passes 857 + * over the log during recovery. During the first we build a table of 858 + * those buffers which have been cancelled, and during the second we 859 + * only replay those buffers which do not have corresponding cancel 860 + * records in the table. See xlog_recover_buf_pass[1,2] above 861 + * for more details on the implementation of the table of cancel records. 862 + */ 863 + STATIC int 864 + xlog_recover_buf_commit_pass2( 865 + struct xlog *log, 866 + struct list_head *buffer_list, 867 + struct xlog_recover_item *item, 868 + xfs_lsn_t current_lsn) 869 + { 870 + struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr; 871 + struct xfs_mount *mp = log->l_mp; 872 + struct xfs_buf *bp; 873 + int error; 874 + uint buf_flags; 875 + xfs_lsn_t lsn; 876 + 877 + /* 878 + * In this pass we only want to recover all the buffers which have 879 + * not been cancelled and are not cancellation buffers themselves. 880 + */ 881 + if (buf_f->blf_flags & XFS_BLF_CANCEL) { 882 + if (xlog_put_buffer_cancelled(log, buf_f->blf_blkno, 883 + buf_f->blf_len)) 884 + goto cancelled; 885 + } else { 886 + 887 + if (xlog_is_buffer_cancelled(log, buf_f->blf_blkno, 888 + buf_f->blf_len)) 889 + goto cancelled; 890 + } 891 + 892 + trace_xfs_log_recover_buf_recover(log, buf_f); 893 + 894 + buf_flags = 0; 895 + if (buf_f->blf_flags & XFS_BLF_INODE_BUF) 896 + buf_flags |= XBF_UNMAPPED; 897 + 898 + error = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, 899 + buf_flags, &bp, NULL); 900 + if (error) 901 + return error; 902 + 903 + /* 904 + * Recover the buffer only if we get an LSN from it and it's less than 905 + * the lsn of the transaction we are replaying. 906 + * 907 + * Note that we have to be extremely careful of readahead here. 908 + * Readahead does not attach verfiers to the buffers so if we don't 909 + * actually do any replay after readahead because of the LSN we found 910 + * in the buffer if more recent than that current transaction then we 911 + * need to attach the verifier directly. Failure to do so can lead to 912 + * future recovery actions (e.g. EFI and unlinked list recovery) can 913 + * operate on the buffers and they won't get the verifier attached. This 914 + * can lead to blocks on disk having the correct content but a stale 915 + * CRC. 916 + * 917 + * It is safe to assume these clean buffers are currently up to date. 918 + * If the buffer is dirtied by a later transaction being replayed, then 919 + * the verifier will be reset to match whatever recover turns that 920 + * buffer into. 921 + */ 922 + lsn = xlog_recover_get_buf_lsn(mp, bp); 923 + if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { 924 + trace_xfs_log_recover_buf_skip(log, buf_f); 925 + xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN); 926 + goto out_release; 927 + } 928 + 929 + if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { 930 + error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); 931 + if (error) 932 + goto out_release; 933 + } else if (buf_f->blf_flags & 934 + (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { 935 + bool dirty; 936 + 937 + dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); 938 + if (!dirty) 939 + goto out_release; 940 + } else { 941 + xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn); 942 + } 943 + 944 + /* 945 + * Perform delayed write on the buffer. Asynchronous writes will be 946 + * slower when taking into account all the buffers to be flushed. 947 + * 948 + * Also make sure that only inode buffers with good sizes stay in 949 + * the buffer cache. The kernel moves inodes in buffers of 1 block 950 + * or inode_cluster_size bytes, whichever is bigger. The inode 951 + * buffers in the log can be a different size if the log was generated 952 + * by an older kernel using unclustered inode buffers or a newer kernel 953 + * running with a different inode cluster size. Regardless, if the 954 + * the inode buffer size isn't max(blocksize, inode_cluster_size) 955 + * for *our* value of inode_cluster_size, then we need to keep 956 + * the buffer out of the buffer cache so that the buffer won't 957 + * overlap with future reads of those inodes. 958 + */ 959 + if (XFS_DINODE_MAGIC == 960 + be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && 961 + (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) { 962 + xfs_buf_stale(bp); 963 + error = xfs_bwrite(bp); 964 + } else { 965 + ASSERT(bp->b_mount == mp); 966 + bp->b_iodone = xlog_recover_iodone; 967 + xfs_buf_delwri_queue(bp, buffer_list); 968 + } 969 + 970 + out_release: 971 + xfs_buf_relse(bp); 972 + return error; 973 + cancelled: 974 + trace_xfs_log_recover_buf_cancel(log, buf_f); 975 + return 0; 976 + } 977 + 978 + const struct xlog_recover_item_ops xlog_buf_item_ops = { 979 + .item_type = XFS_LI_BUF, 980 + .reorder = xlog_recover_buf_reorder, 981 + .ra_pass2 = xlog_recover_buf_ra_pass2, 982 + .commit_pass1 = xlog_recover_buf_commit_pass1, 983 + .commit_pass2 = xlog_recover_buf_commit_pass2, 984 + };

+1 -1

fs/xfs/xfs_dir2_readdir.c

··· 524 524 args.geo = dp->i_mount->m_dir_geo; 525 525 args.trans = tp; 526 526 527 - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 527 + if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) 528 528 rval = xfs_dir2_sf_getdents(&args, ctx); 529 529 else if ((rval = xfs_dir2_isblock(&args, &v))) 530 530 ;

+71 -47

fs/xfs/xfs_dquot.c

··· 75 75 int prealloc = 0; 76 76 77 77 ASSERT(d->d_id); 78 - defq = xfs_get_defquota(dq, q); 78 + defq = xfs_get_defquota(q, xfs_dquot_type(dq)); 79 79 80 80 if (defq->bsoftlimit && !d->d_blk_softlimit) { 81 81 d->d_blk_softlimit = cpu_to_be64(defq->bsoftlimit); ··· 114 114 void 115 115 xfs_qm_adjust_dqtimers( 116 116 struct xfs_mount *mp, 117 - struct xfs_disk_dquot *d) 117 + struct xfs_dquot *dq) 118 118 { 119 + struct xfs_quotainfo *qi = mp->m_quotainfo; 120 + struct xfs_disk_dquot *d = &dq->q_core; 121 + struct xfs_def_quota *defq; 122 + 119 123 ASSERT(d->d_id); 124 + defq = xfs_get_defquota(qi, xfs_dquot_type(dq)); 120 125 121 126 #ifdef DEBUG 122 127 if (d->d_blk_hardlimit) ··· 143 138 (be64_to_cpu(d->d_bcount) > 144 139 be64_to_cpu(d->d_blk_hardlimit)))) { 145 140 d->d_btimer = cpu_to_be32(ktime_get_real_seconds() + 146 - mp->m_quotainfo->qi_btimelimit); 141 + defq->btimelimit); 147 142 } else { 148 143 d->d_bwarns = 0; 149 144 } ··· 166 161 (be64_to_cpu(d->d_icount) > 167 162 be64_to_cpu(d->d_ino_hardlimit)))) { 168 163 d->d_itimer = cpu_to_be32(ktime_get_real_seconds() + 169 - mp->m_quotainfo->qi_itimelimit); 164 + defq->itimelimit); 170 165 } else { 171 166 d->d_iwarns = 0; 172 167 } ··· 189 184 (be64_to_cpu(d->d_rtbcount) > 190 185 be64_to_cpu(d->d_rtb_hardlimit)))) { 191 186 d->d_rtbtimer = cpu_to_be32(ktime_get_real_seconds() + 192 - mp->m_quotainfo->qi_rtbtimelimit); 187 + defq->rtbtimelimit); 193 188 } else { 194 189 d->d_rtbwarns = 0; 195 190 } ··· 210 205 */ 211 206 STATIC void 212 207 xfs_qm_init_dquot_blk( 213 - xfs_trans_t *tp, 214 - xfs_mount_t *mp, 215 - xfs_dqid_t id, 216 - uint type, 217 - xfs_buf_t *bp) 208 + struct xfs_trans *tp, 209 + struct xfs_mount *mp, 210 + xfs_dqid_t id, 211 + uint type, 212 + struct xfs_buf *bp) 218 213 { 219 214 struct xfs_quotainfo *q = mp->m_quotainfo; 220 - xfs_dqblk_t *d; 221 - xfs_dqid_t curid; 222 - int i; 215 + struct xfs_dqblk *d; 216 + xfs_dqid_t curid; 217 + unsigned int qflag; 218 + unsigned int blftype; 219 + int i; 223 220 224 221 ASSERT(tp); 225 222 ASSERT(xfs_buf_islocked(bp)); ··· 245 238 } 246 239 } 247 240 248 - xfs_trans_dquot_buf(tp, bp, 249 - (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF : 250 - ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : 251 - XFS_BLF_GDQUOT_BUF))); 252 - xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); 241 + if (type & XFS_DQ_USER) { 242 + qflag = XFS_UQUOTA_CHKD; 243 + blftype = XFS_BLF_UDQUOT_BUF; 244 + } else if (type & XFS_DQ_PROJ) { 245 + qflag = XFS_PQUOTA_CHKD; 246 + blftype = XFS_BLF_PDQUOT_BUF; 247 + } else { 248 + qflag = XFS_GQUOTA_CHKD; 249 + blftype = XFS_BLF_GDQUOT_BUF; 250 + } 251 + 252 + xfs_trans_dquot_buf(tp, bp, blftype); 253 + 254 + /* 255 + * quotacheck uses delayed writes to update all the dquots on disk in an 256 + * efficient manner instead of logging the individual dquot changes as 257 + * they are made. However if we log the buffer allocated here and crash 258 + * after quotacheck while the logged initialisation is still in the 259 + * active region of the log, log recovery can replay the dquot buffer 260 + * initialisation over the top of the checked dquots and corrupt quota 261 + * accounting. 262 + * 263 + * To avoid this problem, quotacheck cannot log the initialised buffer. 264 + * We must still dirty the buffer and write it back before the 265 + * allocation transaction clears the log. Therefore, mark the buffer as 266 + * ordered instead of logging it directly. This is safe for quotacheck 267 + * because it detects and repairs allocated but initialized dquot blocks 268 + * in the quota inodes. 269 + */ 270 + if (!(mp->m_qflags & qflag)) 271 + xfs_trans_ordered_buf(tp, bp); 272 + else 273 + xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); 253 274 } 254 275 255 276 /* ··· 1056 1021 struct xfs_dq_logitem *qip = (struct xfs_dq_logitem *)lip; 1057 1022 struct xfs_dquot *dqp = qip->qli_dquot; 1058 1023 struct xfs_ail *ailp = lip->li_ailp; 1024 + xfs_lsn_t tail_lsn; 1059 1025 1060 1026 /* 1061 1027 * We only want to pull the item from the AIL if its ··· 1070 1034 ((lip->li_lsn == qip->qli_flush_lsn) || 1071 1035 test_bit(XFS_LI_FAILED, &lip->li_flags))) { 1072 1036 1073 - /* xfs_trans_ail_delete() drops the AIL lock. */ 1074 1037 spin_lock(&ailp->ail_lock); 1075 1038 if (lip->li_lsn == qip->qli_flush_lsn) { 1076 - xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); 1039 + /* xfs_ail_update_finish() drops the AIL lock */ 1040 + tail_lsn = xfs_ail_delete_one(ailp, lip); 1041 + xfs_ail_update_finish(ailp, tail_lsn); 1077 1042 } else { 1078 1043 /* 1079 1044 * Clear the failed state since we are about to drop the ··· 1105 1068 struct xfs_buf **bpp) 1106 1069 { 1107 1070 struct xfs_mount *mp = dqp->q_mount; 1071 + struct xfs_log_item *lip = &dqp->q_logitem.qli_item; 1108 1072 struct xfs_buf *bp; 1109 1073 struct xfs_dqblk *dqb; 1110 1074 struct xfs_disk_dquot *ddqp; ··· 1122 1084 xfs_qm_dqunpin_wait(dqp); 1123 1085 1124 1086 /* 1125 - * This may have been unpinned because the filesystem is shutting 1126 - * down forcibly. If that's the case we must not write this dquot 1127 - * to disk, because the log record didn't make it to disk. 1128 - * 1129 - * We also have to remove the log item from the AIL in this case, 1130 - * as we wait for an emptry AIL as part of the unmount process. 1131 - */ 1132 - if (XFS_FORCED_SHUTDOWN(mp)) { 1133 - struct xfs_log_item *lip = &dqp->q_logitem.qli_item; 1134 - dqp->dq_flags &= ~XFS_DQ_DIRTY; 1135 - 1136 - xfs_trans_ail_remove(lip, SHUTDOWN_CORRUPT_INCORE); 1137 - 1138 - error = -EIO; 1139 - goto out_unlock; 1140 - } 1141 - 1142 - /* 1143 1087 * Get the buffer containing the on-disk dquot 1144 1088 */ 1145 1089 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, 1146 1090 mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK, 1147 1091 &bp, &xfs_dquot_buf_ops); 1148 - if (error) 1092 + if (error == -EAGAIN) 1149 1093 goto out_unlock; 1094 + if (error) 1095 + goto out_abort; 1150 1096 1151 1097 /* 1152 1098 * Calculate the location of the dquot inside the buffer. ··· 1138 1116 dqb = bp->b_addr + dqp->q_bufoffset; 1139 1117 ddqp = &dqb->dd_diskdq; 1140 1118 1141 - /* 1142 - * A simple sanity check in case we got a corrupted dquot. 1143 - */ 1144 - fa = xfs_dqblk_verify(mp, dqb, be32_to_cpu(ddqp->d_id), 0); 1119 + /* sanity check the in-core structure before we flush */ 1120 + fa = xfs_dquot_verify(mp, &dqp->q_core, be32_to_cpu(dqp->q_core.d_id), 1121 + 0); 1145 1122 if (fa) { 1146 1123 xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS", 1147 - be32_to_cpu(ddqp->d_id), fa); 1124 + be32_to_cpu(dqp->q_core.d_id), fa); 1148 1125 xfs_buf_relse(bp); 1149 - xfs_dqfunlock(dqp); 1150 - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1151 - return -EFSCORRUPTED; 1126 + error = -EFSCORRUPTED; 1127 + goto out_abort; 1152 1128 } 1153 1129 1154 1130 /* This is the only portion of data that needs to persist */ ··· 1195 1175 *bpp = bp; 1196 1176 return 0; 1197 1177 1178 + out_abort: 1179 + dqp->dq_flags &= ~XFS_DQ_DIRTY; 1180 + xfs_trans_ail_delete(lip, 0); 1181 + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1198 1182 out_unlock: 1199 1183 xfs_dqfunlock(dqp); 1200 1184 return error;

+1 -1

fs/xfs/xfs_dquot.h

··· 154 154 int xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf **bpp); 155 155 void xfs_qm_dqunpin_wait(struct xfs_dquot *dqp); 156 156 void xfs_qm_adjust_dqtimers(struct xfs_mount *mp, 157 - struct xfs_disk_dquot *d); 157 + struct xfs_dquot *d); 158 158 void xfs_qm_adjust_dqlimits(struct xfs_mount *mp, 159 159 struct xfs_dquot *d); 160 160 xfs_dqid_t xfs_qm_id_for_quotatype(struct xfs_inode *ip, uint type);

+1 -16

fs/xfs/xfs_dquot_item.c

··· 145 145 if (atomic_read(&dqp->q_pincount) > 0) 146 146 return XFS_ITEM_PINNED; 147 147 148 - /* 149 - * The buffer containing this item failed to be written back 150 - * previously. Resubmit the buffer for IO 151 - */ 152 - if (test_bit(XFS_LI_FAILED, &lip->li_flags)) { 153 - if (!xfs_buf_trylock(bp)) 154 - return XFS_ITEM_LOCKED; 155 - 156 - if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list)) 157 - rval = XFS_ITEM_FLUSHING; 158 - 159 - xfs_buf_unlock(bp); 160 - return rval; 161 - } 162 - 163 148 if (!xfs_dqlock_nowait(dqp)) 164 149 return XFS_ITEM_LOCKED; 165 150 ··· 343 358 ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags) || 344 359 test_bit(XFS_LI_ABORTED, &lip->li_flags) || 345 360 XFS_FORCED_SHUTDOWN(lip->li_mountp)); 346 - xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR); 361 + xfs_trans_ail_delete(lip, 0); 347 362 kmem_free(lip->li_lv_shadow); 348 363 kmem_free(qoff); 349 364 }

+201

fs/xfs/xfs_dquot_item_recover.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 + * All Rights Reserved. 5 + */ 6 + #include "xfs.h" 7 + #include "xfs_fs.h" 8 + #include "xfs_shared.h" 9 + #include "xfs_format.h" 10 + #include "xfs_log_format.h" 11 + #include "xfs_trans_resv.h" 12 + #include "xfs_mount.h" 13 + #include "xfs_inode.h" 14 + #include "xfs_quota.h" 15 + #include "xfs_trans.h" 16 + #include "xfs_buf_item.h" 17 + #include "xfs_trans_priv.h" 18 + #include "xfs_qm.h" 19 + #include "xfs_log.h" 20 + #include "xfs_log_priv.h" 21 + #include "xfs_log_recover.h" 22 + 23 + STATIC void 24 + xlog_recover_dquot_ra_pass2( 25 + struct xlog *log, 26 + struct xlog_recover_item *item) 27 + { 28 + struct xfs_mount *mp = log->l_mp; 29 + struct xfs_disk_dquot *recddq; 30 + struct xfs_dq_logformat *dq_f; 31 + uint type; 32 + 33 + if (mp->m_qflags == 0) 34 + return; 35 + 36 + recddq = item->ri_buf[1].i_addr; 37 + if (recddq == NULL) 38 + return; 39 + if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) 40 + return; 41 + 42 + type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); 43 + ASSERT(type); 44 + if (log->l_quotaoffs_flag & type) 45 + return; 46 + 47 + dq_f = item->ri_buf[0].i_addr; 48 + ASSERT(dq_f); 49 + ASSERT(dq_f->qlf_len == 1); 50 + 51 + xlog_buf_readahead(log, dq_f->qlf_blkno, 52 + XFS_FSB_TO_BB(mp, dq_f->qlf_len), 53 + &xfs_dquot_buf_ra_ops); 54 + } 55 + 56 + /* 57 + * Recover a dquot record 58 + */ 59 + STATIC int 60 + xlog_recover_dquot_commit_pass2( 61 + struct xlog *log, 62 + struct list_head *buffer_list, 63 + struct xlog_recover_item *item, 64 + xfs_lsn_t current_lsn) 65 + { 66 + struct xfs_mount *mp = log->l_mp; 67 + struct xfs_buf *bp; 68 + struct xfs_disk_dquot *ddq, *recddq; 69 + struct xfs_dq_logformat *dq_f; 70 + xfs_failaddr_t fa; 71 + int error; 72 + uint type; 73 + 74 + /* 75 + * Filesystems are required to send in quota flags at mount time. 76 + */ 77 + if (mp->m_qflags == 0) 78 + return 0; 79 + 80 + recddq = item->ri_buf[1].i_addr; 81 + if (recddq == NULL) { 82 + xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); 83 + return -EFSCORRUPTED; 84 + } 85 + if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) { 86 + xfs_alert(log->l_mp, "dquot too small (%d) in %s.", 87 + item->ri_buf[1].i_len, __func__); 88 + return -EFSCORRUPTED; 89 + } 90 + 91 + /* 92 + * This type of quotas was turned off, so ignore this record. 93 + */ 94 + type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); 95 + ASSERT(type); 96 + if (log->l_quotaoffs_flag & type) 97 + return 0; 98 + 99 + /* 100 + * At this point we know that quota was _not_ turned off. 101 + * Since the mount flags are not indicating to us otherwise, this 102 + * must mean that quota is on, and the dquot needs to be replayed. 103 + * Remember that we may not have fully recovered the superblock yet, 104 + * so we can't do the usual trick of looking at the SB quota bits. 105 + * 106 + * The other possibility, of course, is that the quota subsystem was 107 + * removed since the last mount - ENOSYS. 108 + */ 109 + dq_f = item->ri_buf[0].i_addr; 110 + ASSERT(dq_f); 111 + fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0); 112 + if (fa) { 113 + xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS", 114 + dq_f->qlf_id, fa); 115 + return -EFSCORRUPTED; 116 + } 117 + ASSERT(dq_f->qlf_len == 1); 118 + 119 + /* 120 + * At this point we are assuming that the dquots have been allocated 121 + * and hence the buffer has valid dquots stamped in it. It should, 122 + * therefore, pass verifier validation. If the dquot is bad, then the 123 + * we'll return an error here, so we don't need to specifically check 124 + * the dquot in the buffer after the verifier has run. 125 + */ 126 + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno, 127 + XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp, 128 + &xfs_dquot_buf_ops); 129 + if (error) 130 + return error; 131 + 132 + ASSERT(bp); 133 + ddq = xfs_buf_offset(bp, dq_f->qlf_boffset); 134 + 135 + /* 136 + * If the dquot has an LSN in it, recover the dquot only if it's less 137 + * than the lsn of the transaction we are replaying. 138 + */ 139 + if (xfs_sb_version_hascrc(&mp->m_sb)) { 140 + struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq; 141 + xfs_lsn_t lsn = be64_to_cpu(dqb->dd_lsn); 142 + 143 + if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { 144 + goto out_release; 145 + } 146 + } 147 + 148 + memcpy(ddq, recddq, item->ri_buf[1].i_len); 149 + if (xfs_sb_version_hascrc(&mp->m_sb)) { 150 + xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk), 151 + XFS_DQUOT_CRC_OFF); 152 + } 153 + 154 + ASSERT(dq_f->qlf_size == 2); 155 + ASSERT(bp->b_mount == mp); 156 + bp->b_iodone = xlog_recover_iodone; 157 + xfs_buf_delwri_queue(bp, buffer_list); 158 + 159 + out_release: 160 + xfs_buf_relse(bp); 161 + return 0; 162 + } 163 + 164 + const struct xlog_recover_item_ops xlog_dquot_item_ops = { 165 + .item_type = XFS_LI_DQUOT, 166 + .ra_pass2 = xlog_recover_dquot_ra_pass2, 167 + .commit_pass2 = xlog_recover_dquot_commit_pass2, 168 + }; 169 + 170 + /* 171 + * Recover QUOTAOFF records. We simply make a note of it in the xlog 172 + * structure, so that we know not to do any dquot item or dquot buffer recovery, 173 + * of that type. 174 + */ 175 + STATIC int 176 + xlog_recover_quotaoff_commit_pass1( 177 + struct xlog *log, 178 + struct xlog_recover_item *item) 179 + { 180 + struct xfs_qoff_logformat *qoff_f = item->ri_buf[0].i_addr; 181 + ASSERT(qoff_f); 182 + 183 + /* 184 + * The logitem format's flag tells us if this was user quotaoff, 185 + * group/project quotaoff or both. 186 + */ 187 + if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) 188 + log->l_quotaoffs_flag |= XFS_DQ_USER; 189 + if (qoff_f->qf_flags & XFS_PQUOTA_ACCT) 190 + log->l_quotaoffs_flag |= XFS_DQ_PROJ; 191 + if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) 192 + log->l_quotaoffs_flag |= XFS_DQ_GROUP; 193 + 194 + return 0; 195 + } 196 + 197 + const struct xlog_recover_item_ops xlog_quotaoff_item_ops = { 198 + .item_type = XFS_LI_QUOTAOFF, 199 + .commit_pass1 = xlog_recover_quotaoff_commit_pass1, 200 + /* nothing to commit in pass2 */ 201 + };

+3

fs/xfs/xfs_error.c

··· 53 53 XFS_RANDOM_FORCE_SCRUB_REPAIR, 54 54 XFS_RANDOM_FORCE_SUMMARY_RECALC, 55 55 XFS_RANDOM_IUNLINK_FALLBACK, 56 + XFS_RANDOM_BUF_IOERROR, 56 57 }; 57 58 58 59 struct xfs_errortag_attr { ··· 163 162 XFS_ERRORTAG_ATTR_RW(force_repair, XFS_ERRTAG_FORCE_SCRUB_REPAIR); 164 163 XFS_ERRORTAG_ATTR_RW(bad_summary, XFS_ERRTAG_FORCE_SUMMARY_RECALC); 165 164 XFS_ERRORTAG_ATTR_RW(iunlink_fallback, XFS_ERRTAG_IUNLINK_FALLBACK); 165 + XFS_ERRORTAG_ATTR_RW(buf_ioerror, XFS_ERRTAG_BUF_IOERROR); 166 166 167 167 static struct attribute *xfs_errortag_attrs[] = { 168 168 XFS_ERRORTAG_ATTR_LIST(noerror), ··· 201 199 XFS_ERRORTAG_ATTR_LIST(force_repair), 202 200 XFS_ERRORTAG_ATTR_LIST(bad_summary), 203 201 XFS_ERRORTAG_ATTR_LIST(iunlink_fallback), 202 + XFS_ERRORTAG_ATTR_LIST(buf_ioerror), 204 203 NULL, 205 204 }; 206 205

+146 -70

fs/xfs/xfs_extfree_item.c

··· 22 22 #include "xfs_bmap.h" 23 23 #include "xfs_trace.h" 24 24 #include "xfs_error.h" 25 + #include "xfs_log_priv.h" 26 + #include "xfs_log_recover.h" 25 27 26 28 kmem_zone_t *xfs_efi_zone; 27 29 kmem_zone_t *xfs_efd_zone; 30 + 31 + static const struct xfs_item_ops xfs_efi_item_ops; 28 32 29 33 static inline struct xfs_efi_log_item *EFI_ITEM(struct xfs_log_item *lip) 30 34 { 31 35 return container_of(lip, struct xfs_efi_log_item, efi_item); 32 36 } 33 37 34 - void 38 + STATIC void 35 39 xfs_efi_item_free( 36 40 struct xfs_efi_log_item *efip) 37 41 { ··· 53 49 * committed vs unpin operations in bulk insert operations. Hence the reference 54 50 * count to ensure only the last caller frees the EFI. 55 51 */ 56 - void 52 + STATIC void 57 53 xfs_efi_release( 58 54 struct xfs_efi_log_item *efip) 59 55 { 60 56 ASSERT(atomic_read(&efip->efi_refcount) > 0); 61 57 if (atomic_dec_and_test(&efip->efi_refcount)) { 62 - xfs_trans_ail_remove(&efip->efi_item, SHUTDOWN_LOG_IO_ERROR); 58 + xfs_trans_ail_delete(&efip->efi_item, SHUTDOWN_LOG_IO_ERROR); 63 59 xfs_efi_item_free(efip); 64 60 } 65 61 } ··· 143 139 xfs_efi_release(EFI_ITEM(lip)); 144 140 } 145 141 146 - static const struct xfs_item_ops xfs_efi_item_ops = { 147 - .iop_size = xfs_efi_item_size, 148 - .iop_format = xfs_efi_item_format, 149 - .iop_unpin = xfs_efi_item_unpin, 150 - .iop_release = xfs_efi_item_release, 151 - }; 152 - 153 - 154 142 /* 155 143 * Allocate and initialize an efi item with the given number of extents. 156 144 */ 157 - struct xfs_efi_log_item * 145 + STATIC struct xfs_efi_log_item * 158 146 xfs_efi_init( 159 147 struct xfs_mount *mp, 160 148 uint nextents) ··· 157 161 158 162 ASSERT(nextents > 0); 159 163 if (nextents > XFS_EFI_MAX_FAST_EXTENTS) { 160 - size = (uint)(sizeof(xfs_efi_log_item_t) + 164 + size = (uint)(sizeof(struct xfs_efi_log_item) + 161 165 ((nextents - 1) * sizeof(xfs_extent_t))); 162 166 efip = kmem_zalloc(size, 0); 163 167 } else { ··· 180 184 * one of which will be the native format for this kernel. 181 185 * It will handle the conversion of formats if necessary. 182 186 */ 183 - int 187 + STATIC int 184 188 xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) 185 189 { 186 190 xfs_efi_log_format_t *src_efi_fmt = buf->i_addr; ··· 408 412 XFS_FSB_TO_AGNO(mp, rb->xefi_startblock); 409 413 } 410 414 411 - /* Get an EFI. */ 412 - STATIC void * 413 - xfs_extent_free_create_intent( 414 - struct xfs_trans *tp, 415 - unsigned int count) 416 - { 417 - struct xfs_efi_log_item *efip; 418 - 419 - ASSERT(tp != NULL); 420 - ASSERT(count > 0); 421 - 422 - efip = xfs_efi_init(tp->t_mountp, count); 423 - ASSERT(efip != NULL); 424 - 425 - /* 426 - * Get a log_item_desc to point at the new item. 427 - */ 428 - xfs_trans_add_item(tp, &efip->efi_item); 429 - return efip; 430 - } 431 - 432 415 /* Log a free extent to the intent item. */ 433 416 STATIC void 434 417 xfs_extent_free_log_item( 435 418 struct xfs_trans *tp, 436 - void *intent, 437 - struct list_head *item) 419 + struct xfs_efi_log_item *efip, 420 + struct xfs_extent_free_item *free) 438 421 { 439 - struct xfs_efi_log_item *efip = intent; 440 - struct xfs_extent_free_item *free; 441 422 uint next_extent; 442 423 struct xfs_extent *extp; 443 - 444 - free = container_of(item, struct xfs_extent_free_item, xefi_list); 445 424 446 425 tp->t_flags |= XFS_TRANS_DIRTY; 447 426 set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags); ··· 433 462 extp->ext_len = free->xefi_blockcount; 434 463 } 435 464 465 + static struct xfs_log_item * 466 + xfs_extent_free_create_intent( 467 + struct xfs_trans *tp, 468 + struct list_head *items, 469 + unsigned int count, 470 + bool sort) 471 + { 472 + struct xfs_mount *mp = tp->t_mountp; 473 + struct xfs_efi_log_item *efip = xfs_efi_init(mp, count); 474 + struct xfs_extent_free_item *free; 475 + 476 + ASSERT(count > 0); 477 + 478 + xfs_trans_add_item(tp, &efip->efi_item); 479 + if (sort) 480 + list_sort(mp, items, xfs_extent_free_diff_items); 481 + list_for_each_entry(free, items, xefi_list) 482 + xfs_extent_free_log_item(tp, efip, free); 483 + return &efip->efi_item; 484 + } 485 + 436 486 /* Get an EFD so we can process all the free extents. */ 437 - STATIC void * 487 + static struct xfs_log_item * 438 488 xfs_extent_free_create_done( 439 489 struct xfs_trans *tp, 440 - void *intent, 490 + struct xfs_log_item *intent, 441 491 unsigned int count) 442 492 { 443 - return xfs_trans_get_efd(tp, intent, count); 493 + return &xfs_trans_get_efd(tp, EFI_ITEM(intent), count)->efd_item; 444 494 } 445 495 446 496 /* Process a free extent. */ 447 497 STATIC int 448 498 xfs_extent_free_finish_item( 449 499 struct xfs_trans *tp, 500 + struct xfs_log_item *done, 450 501 struct list_head *item, 451 - void *done_item, 452 - void **state) 502 + struct xfs_btree_cur **state) 453 503 { 454 504 struct xfs_extent_free_item *free; 455 505 int error; 456 506 457 507 free = container_of(item, struct xfs_extent_free_item, xefi_list); 458 - error = xfs_trans_free_extent(tp, done_item, 508 + error = xfs_trans_free_extent(tp, EFD_ITEM(done), 459 509 free->xefi_startblock, 460 510 free->xefi_blockcount, 461 511 &free->xefi_oinfo, free->xefi_skip_discard); ··· 487 495 /* Abort all pending EFIs. */ 488 496 STATIC void 489 497 xfs_extent_free_abort_intent( 490 - void *intent) 498 + struct xfs_log_item *intent) 491 499 { 492 - xfs_efi_release(intent); 500 + xfs_efi_release(EFI_ITEM(intent)); 493 501 } 494 502 495 503 /* Cancel a free extent. */ ··· 505 513 506 514 const struct xfs_defer_op_type xfs_extent_free_defer_type = { 507 515 .max_items = XFS_EFI_MAX_FAST_EXTENTS, 508 - .diff_items = xfs_extent_free_diff_items, 509 516 .create_intent = xfs_extent_free_create_intent, 510 517 .abort_intent = xfs_extent_free_abort_intent, 511 - .log_item = xfs_extent_free_log_item, 512 518 .create_done = xfs_extent_free_create_done, 513 519 .finish_item = xfs_extent_free_finish_item, 514 520 .cancel_item = xfs_extent_free_cancel_item, ··· 519 529 STATIC int 520 530 xfs_agfl_free_finish_item( 521 531 struct xfs_trans *tp, 532 + struct xfs_log_item *done, 522 533 struct list_head *item, 523 - void *done_item, 524 - void **state) 534 + struct xfs_btree_cur **state) 525 535 { 526 536 struct xfs_mount *mp = tp->t_mountp; 527 - struct xfs_efd_log_item *efdp = done_item; 537 + struct xfs_efd_log_item *efdp = EFD_ITEM(done); 528 538 struct xfs_extent_free_item *free; 529 539 struct xfs_extent *extp; 530 540 struct xfs_buf *agbp; ··· 569 579 /* sub-type with special handling for AGFL deferred frees */ 570 580 const struct xfs_defer_op_type xfs_agfl_free_defer_type = { 571 581 .max_items = XFS_EFI_MAX_FAST_EXTENTS, 572 - .diff_items = xfs_extent_free_diff_items, 573 582 .create_intent = xfs_extent_free_create_intent, 574 583 .abort_intent = xfs_extent_free_abort_intent, 575 - .log_item = xfs_extent_free_log_item, 576 584 .create_done = xfs_extent_free_create_done, 577 585 .finish_item = xfs_agfl_free_finish_item, 578 586 .cancel_item = xfs_extent_free_cancel_item, ··· 580 592 * Process an extent free intent item that was recovered from 581 593 * the log. We need to free the extents that it describes. 582 594 */ 583 - int 584 - xfs_efi_recover( 585 - struct xfs_mount *mp, 586 - struct xfs_efi_log_item *efip) 595 + STATIC int 596 + xfs_efi_item_recover( 597 + struct xfs_log_item *lip, 598 + struct xfs_trans *parent_tp) 587 599 { 588 - struct xfs_efd_log_item *efdp; 589 - struct xfs_trans *tp; 590 - int i; 591 - int error = 0; 592 - xfs_extent_t *extp; 593 - xfs_fsblock_t startblock_fsb; 594 - 595 - ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)); 600 + struct xfs_efi_log_item *efip = EFI_ITEM(lip); 601 + struct xfs_mount *mp = parent_tp->t_mountp; 602 + struct xfs_efd_log_item *efdp; 603 + struct xfs_trans *tp; 604 + struct xfs_extent *extp; 605 + xfs_fsblock_t startblock_fsb; 606 + int i; 607 + int error = 0; 596 608 597 609 /* 598 610 * First check the validity of the extents described by the ··· 611 623 * This will pull the EFI from the AIL and 612 624 * free the memory associated with it. 613 625 */ 614 - set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); 615 626 xfs_efi_release(efip); 616 627 return -EFSCORRUPTED; 617 628 } ··· 631 644 632 645 } 633 646 634 - set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); 635 647 error = xfs_trans_commit(tp); 636 648 return error; 637 649 ··· 638 652 xfs_trans_cancel(tp); 639 653 return error; 640 654 } 655 + 656 + STATIC bool 657 + xfs_efi_item_match( 658 + struct xfs_log_item *lip, 659 + uint64_t intent_id) 660 + { 661 + return EFI_ITEM(lip)->efi_format.efi_id == intent_id; 662 + } 663 + 664 + static const struct xfs_item_ops xfs_efi_item_ops = { 665 + .iop_size = xfs_efi_item_size, 666 + .iop_format = xfs_efi_item_format, 667 + .iop_unpin = xfs_efi_item_unpin, 668 + .iop_release = xfs_efi_item_release, 669 + .iop_recover = xfs_efi_item_recover, 670 + .iop_match = xfs_efi_item_match, 671 + }; 672 + 673 + /* 674 + * This routine is called to create an in-core extent free intent 675 + * item from the efi format structure which was logged on disk. 676 + * It allocates an in-core efi, copies the extents from the format 677 + * structure into it, and adds the efi to the AIL with the given 678 + * LSN. 679 + */ 680 + STATIC int 681 + xlog_recover_efi_commit_pass2( 682 + struct xlog *log, 683 + struct list_head *buffer_list, 684 + struct xlog_recover_item *item, 685 + xfs_lsn_t lsn) 686 + { 687 + struct xfs_mount *mp = log->l_mp; 688 + struct xfs_efi_log_item *efip; 689 + struct xfs_efi_log_format *efi_formatp; 690 + int error; 691 + 692 + efi_formatp = item->ri_buf[0].i_addr; 693 + 694 + efip = xfs_efi_init(mp, efi_formatp->efi_nextents); 695 + error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format); 696 + if (error) { 697 + xfs_efi_item_free(efip); 698 + return error; 699 + } 700 + atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents); 701 + /* 702 + * Insert the intent into the AIL directly and drop one reference so 703 + * that finishing or canceling the work will drop the other. 704 + */ 705 + xfs_trans_ail_insert(log->l_ailp, &efip->efi_item, lsn); 706 + xfs_efi_release(efip); 707 + return 0; 708 + } 709 + 710 + const struct xlog_recover_item_ops xlog_efi_item_ops = { 711 + .item_type = XFS_LI_EFI, 712 + .commit_pass2 = xlog_recover_efi_commit_pass2, 713 + }; 714 + 715 + /* 716 + * This routine is called when an EFD format structure is found in a committed 717 + * transaction in the log. Its purpose is to cancel the corresponding EFI if it 718 + * was still in the log. To do this it searches the AIL for the EFI with an id 719 + * equal to that in the EFD format structure. If we find it we drop the EFD 720 + * reference, which removes the EFI from the AIL and frees it. 721 + */ 722 + STATIC int 723 + xlog_recover_efd_commit_pass2( 724 + struct xlog *log, 725 + struct list_head *buffer_list, 726 + struct xlog_recover_item *item, 727 + xfs_lsn_t lsn) 728 + { 729 + struct xfs_efd_log_format *efd_formatp; 730 + 731 + efd_formatp = item->ri_buf[0].i_addr; 732 + ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + 733 + ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || 734 + (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + 735 + ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); 736 + 737 + xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id); 738 + return 0; 739 + } 740 + 741 + const struct xlog_recover_item_ops xlog_efd_item_ops = { 742 + .item_type = XFS_LI_EFD, 743 + .commit_pass2 = xlog_recover_efd_commit_pass2, 744 + };

+5 -20

fs/xfs/xfs_extfree_item.h

··· 17 17 #define XFS_EFI_MAX_FAST_EXTENTS 16 18 18 19 19 /* 20 - * Define EFI flag bits. Manipulated by set/clear/test_bit operators. 21 - */ 22 - #define XFS_EFI_RECOVERED 1 23 - 24 - /* 25 20 * This is the "extent free intention" log item. It is used to log the fact 26 21 * that some extents need to be free. It is used in conjunction with the 27 22 * "extent free done" log item described below. ··· 45 50 * of commit failure or log I/O errors. Note that the EFD is not inserted in the 46 51 * AIL, so at this point both the EFI and EFD are freed. 47 52 */ 48 - typedef struct xfs_efi_log_item { 53 + struct xfs_efi_log_item { 49 54 struct xfs_log_item efi_item; 50 55 atomic_t efi_refcount; 51 56 atomic_t efi_next_extent; 52 - unsigned long efi_flags; /* misc flags */ 53 57 xfs_efi_log_format_t efi_format; 54 - } xfs_efi_log_item_t; 58 + }; 55 59 56 60 /* 57 61 * This is the "extent free done" log item. It is used to log 58 62 * the fact that some extents earlier mentioned in an efi item 59 63 * have been freed. 60 64 */ 61 - typedef struct xfs_efd_log_item { 65 + struct xfs_efd_log_item { 62 66 struct xfs_log_item efd_item; 63 - xfs_efi_log_item_t *efd_efip; 67 + struct xfs_efi_log_item *efd_efip; 64 68 uint efd_next_extent; 65 69 xfs_efd_log_format_t efd_format; 66 - } xfs_efd_log_item_t; 70 + }; 67 71 68 72 /* 69 73 * Max number of extents in fast allocation path. ··· 71 77 72 78 extern struct kmem_zone *xfs_efi_zone; 73 79 extern struct kmem_zone *xfs_efd_zone; 74 - 75 - xfs_efi_log_item_t *xfs_efi_init(struct xfs_mount *, uint); 76 - int xfs_efi_copy_format(xfs_log_iovec_t *buf, 77 - xfs_efi_log_format_t *dst_efi_fmt); 78 - void xfs_efi_item_free(xfs_efi_log_item_t *); 79 - void xfs_efi_release(struct xfs_efi_log_item *); 80 - 81 - int xfs_efi_recover(struct xfs_mount *mp, 82 - struct xfs_efi_log_item *efip); 83 80 84 81 #endif /* __XFS_EXTFREE_ITEM_H__ */

+1 -1

fs/xfs/xfs_file.c

··· 1102 1102 * certain to have the next operation be a read there. 1103 1103 */ 1104 1104 mode = xfs_ilock_data_map_shared(ip); 1105 - if (ip->i_d.di_nextents > 0) 1105 + if (ip->i_df.if_nextents > 0) 1106 1106 error = xfs_dir3_data_readahead(ip, 0, 0); 1107 1107 xfs_iunlock(ip, mode); 1108 1108 return error;

+1 -4

fs/xfs/xfs_fsops.c

··· 504 504 } else if (logerror) { 505 505 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR, 506 506 "Log I/O Error Detected. Shutting down filesystem"); 507 - } else if (flags & SHUTDOWN_DEVICE_REQ) { 508 - xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, 509 - "All device paths lost. Shutting down filesystem"); 510 - } else if (!(flags & SHUTDOWN_REMOTE_REQ)) { 507 + } else { 511 508 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, 512 509 "I/O Error Detected. Shutting down filesystem"); 513 510 }

+168 -173

fs/xfs/xfs_icache.c

··· 22 22 #include "xfs_dquot_item.h" 23 23 #include "xfs_dquot.h" 24 24 #include "xfs_reflink.h" 25 + #include "xfs_ialloc.h" 25 26 26 27 #include <linux/iversion.h> 27 28 ··· 63 62 memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); 64 63 ip->i_afp = NULL; 65 64 ip->i_cowfp = NULL; 66 - ip->i_cnextents = 0; 67 - ip->i_cformat = XFS_DINODE_FMT_EXTENTS; 68 65 memset(&ip->i_df, 0, sizeof(ip->i_df)); 69 66 ip->i_flags = 0; 70 67 ip->i_delayed_blks = 0; ··· 87 88 case S_IFREG: 88 89 case S_IFDIR: 89 90 case S_IFLNK: 90 - xfs_idestroy_fork(ip, XFS_DATA_FORK); 91 + xfs_idestroy_fork(&ip->i_df); 91 92 break; 92 93 } 93 94 94 - if (ip->i_afp) 95 - xfs_idestroy_fork(ip, XFS_ATTR_FORK); 96 - if (ip->i_cowfp) 97 - xfs_idestroy_fork(ip, XFS_COW_FORK); 98 - 95 + if (ip->i_afp) { 96 + xfs_idestroy_fork(ip->i_afp); 97 + kmem_cache_free(xfs_ifork_zone, ip->i_afp); 98 + } 99 + if (ip->i_cowfp) { 100 + xfs_idestroy_fork(ip->i_cowfp); 101 + kmem_cache_free(xfs_ifork_zone, ip->i_cowfp); 102 + } 99 103 if (ip->i_itemp) { 100 104 ASSERT(!test_bit(XFS_LI_IN_AIL, 101 105 &ip->i_itemp->ili_item.li_flags)); ··· 425 423 spin_unlock(&ip->i_flags_lock); 426 424 rcu_read_unlock(); 427 425 426 + ASSERT(!rwsem_is_locked(&inode->i_rwsem)); 428 427 error = xfs_reinit_inode(mp, inode); 429 428 if (error) { 430 429 bool wake; ··· 458 455 inode->i_state = I_NEW; 459 456 ip->i_sick = 0; 460 457 ip->i_checked = 0; 461 - 462 - ASSERT(!rwsem_is_locked(&inode->i_rwsem)); 463 - init_rwsem(&inode->i_rwsem); 464 458 465 459 spin_unlock(&ip->i_flags_lock); 466 460 spin_unlock(&pag->pag_ici_lock); ··· 510 510 if (!ip) 511 511 return -ENOMEM; 512 512 513 - error = xfs_iread(mp, tp, ip, flags); 513 + error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, flags); 514 514 if (error) 515 515 goto out_destroy; 516 516 517 - if (!xfs_inode_verify_forks(ip)) { 518 - error = -EFSCORRUPTED; 519 - goto out_destroy; 517 + /* 518 + * For version 5 superblocks, if we are initialising a new inode and we 519 + * are not utilising the XFS_MOUNT_IKEEP inode cluster mode, we can 520 + * simply build the new inode core with a random generation number. 521 + * 522 + * For version 4 (and older) superblocks, log recovery is dependent on 523 + * the di_flushiter field being initialised from the current on-disk 524 + * value and hence we must also read the inode off disk even when 525 + * initializing new inodes. 526 + */ 527 + if (xfs_sb_version_has_v3inode(&mp->m_sb) && 528 + (flags & XFS_IGET_CREATE) && !(mp->m_flags & XFS_MOUNT_IKEEP)) { 529 + VFS_I(ip)->i_generation = prandom_u32(); 530 + } else { 531 + struct xfs_dinode *dip; 532 + struct xfs_buf *bp; 533 + 534 + error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0); 535 + if (error) 536 + goto out_destroy; 537 + 538 + error = xfs_inode_from_disk(ip, dip); 539 + if (!error) 540 + xfs_buf_set_ref(bp, XFS_INO_REF); 541 + xfs_trans_brelse(tp, bp); 542 + 543 + if (error) 544 + goto out_destroy; 520 545 } 521 546 522 547 trace_xfs_iget_miss(ip); 523 - 524 548 525 549 /* 526 550 * Check the inode free state is valid. This also detects lookup ··· 761 737 */ 762 738 #define XFS_LOOKUP_BATCH 32 763 739 764 - STATIC int 765 - xfs_inode_ag_walk_grab( 740 + /* 741 + * Decide if the given @ip is eligible to be a part of the inode walk, and 742 + * grab it if so. Returns true if it's ready to go or false if we should just 743 + * ignore it. 744 + */ 745 + STATIC bool 746 + xfs_inode_walk_ag_grab( 766 747 struct xfs_inode *ip, 767 748 int flags) 768 749 { 769 750 struct inode *inode = VFS_I(ip); 770 - bool newinos = !!(flags & XFS_AGITER_INEW_WAIT); 751 + bool newinos = !!(flags & XFS_INODE_WALK_INEW_WAIT); 771 752 772 753 ASSERT(rcu_read_lock_held()); 773 754 ··· 797 768 798 769 /* nothing to sync during shutdown */ 799 770 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 800 - return -EFSCORRUPTED; 771 + return false; 801 772 802 773 /* If we can't grab the inode, it must on it's way to reclaim. */ 803 774 if (!igrab(inode)) 804 - return -ENOENT; 775 + return false; 805 776 806 777 /* inode is valid */ 807 - return 0; 778 + return true; 808 779 809 780 out_unlock_noent: 810 781 spin_unlock(&ip->i_flags_lock); 811 - return -ENOENT; 782 + return false; 812 783 } 813 784 785 + /* 786 + * For a given per-AG structure @pag, grab, @execute, and rele all incore 787 + * inodes with the given radix tree @tag. 788 + */ 814 789 STATIC int 815 - xfs_inode_ag_walk( 816 - struct xfs_mount *mp, 790 + xfs_inode_walk_ag( 817 791 struct xfs_perag *pag, 818 - int (*execute)(struct xfs_inode *ip, int flags, 819 - void *args), 820 - int flags, 792 + int iter_flags, 793 + int (*execute)(struct xfs_inode *ip, void *args), 821 794 void *args, 822 - int tag, 823 - int iter_flags) 795 + int tag) 824 796 { 797 + struct xfs_mount *mp = pag->pag_mount; 825 798 uint32_t first_index; 826 799 int last_error = 0; 827 800 int skipped; 828 - int done; 801 + bool done; 829 802 int nr_found; 830 803 831 804 restart: 832 - done = 0; 805 + done = false; 833 806 skipped = 0; 834 807 first_index = 0; 835 808 nr_found = 0; ··· 842 811 843 812 rcu_read_lock(); 844 813 845 - if (tag == -1) 814 + if (tag == XFS_ICI_NO_TAG) 846 815 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 847 816 (void **)batch, first_index, 848 817 XFS_LOOKUP_BATCH); ··· 864 833 for (i = 0; i < nr_found; i++) { 865 834 struct xfs_inode *ip = batch[i]; 866 835 867 - if (done || xfs_inode_ag_walk_grab(ip, iter_flags)) 836 + if (done || !xfs_inode_walk_ag_grab(ip, iter_flags)) 868 837 batch[i] = NULL; 869 838 870 839 /* ··· 883 852 continue; 884 853 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 885 854 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 886 - done = 1; 855 + done = true; 887 856 } 888 857 889 858 /* unlock now we've grabbed the inodes. */ ··· 892 861 for (i = 0; i < nr_found; i++) { 893 862 if (!batch[i]) 894 863 continue; 895 - if ((iter_flags & XFS_AGITER_INEW_WAIT) && 864 + if ((iter_flags & XFS_INODE_WALK_INEW_WAIT) && 896 865 xfs_iflags_test(batch[i], XFS_INEW)) 897 866 xfs_inew_wait(batch[i]); 898 - error = execute(batch[i], flags, args); 867 + error = execute(batch[i], args); 899 868 xfs_irele(batch[i]); 900 869 if (error == -EAGAIN) { 901 870 skipped++; ··· 916 885 if (skipped) { 917 886 delay(1); 918 887 goto restart; 888 + } 889 + return last_error; 890 + } 891 + 892 + /* Fetch the next (possibly tagged) per-AG structure. */ 893 + static inline struct xfs_perag * 894 + xfs_inode_walk_get_perag( 895 + struct xfs_mount *mp, 896 + xfs_agnumber_t agno, 897 + int tag) 898 + { 899 + if (tag == XFS_ICI_NO_TAG) 900 + return xfs_perag_get(mp, agno); 901 + return xfs_perag_get_tag(mp, agno, tag); 902 + } 903 + 904 + /* 905 + * Call the @execute function on all incore inodes matching the radix tree 906 + * @tag. 907 + */ 908 + int 909 + xfs_inode_walk( 910 + struct xfs_mount *mp, 911 + int iter_flags, 912 + int (*execute)(struct xfs_inode *ip, void *args), 913 + void *args, 914 + int tag) 915 + { 916 + struct xfs_perag *pag; 917 + int error = 0; 918 + int last_error = 0; 919 + xfs_agnumber_t ag; 920 + 921 + ag = 0; 922 + while ((pag = xfs_inode_walk_get_perag(mp, ag, tag))) { 923 + ag = pag->pag_agno + 1; 924 + error = xfs_inode_walk_ag(pag, iter_flags, execute, args, tag); 925 + xfs_perag_put(pag); 926 + if (error) { 927 + last_error = error; 928 + if (error == -EFSCORRUPTED) 929 + break; 930 + } 919 931 } 920 932 return last_error; 921 933 } ··· 1024 950 sb_end_write(mp->m_super); 1025 951 1026 952 xfs_queue_cowblocks(mp); 1027 - } 1028 - 1029 - int 1030 - xfs_inode_ag_iterator_flags( 1031 - struct xfs_mount *mp, 1032 - int (*execute)(struct xfs_inode *ip, int flags, 1033 - void *args), 1034 - int flags, 1035 - void *args, 1036 - int iter_flags) 1037 - { 1038 - struct xfs_perag *pag; 1039 - int error = 0; 1040 - int last_error = 0; 1041 - xfs_agnumber_t ag; 1042 - 1043 - ag = 0; 1044 - while ((pag = xfs_perag_get(mp, ag))) { 1045 - ag = pag->pag_agno + 1; 1046 - error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1, 1047 - iter_flags); 1048 - xfs_perag_put(pag); 1049 - if (error) { 1050 - last_error = error; 1051 - if (error == -EFSCORRUPTED) 1052 - break; 1053 - } 1054 - } 1055 - return last_error; 1056 - } 1057 - 1058 - int 1059 - xfs_inode_ag_iterator( 1060 - struct xfs_mount *mp, 1061 - int (*execute)(struct xfs_inode *ip, int flags, 1062 - void *args), 1063 - int flags, 1064 - void *args) 1065 - { 1066 - return xfs_inode_ag_iterator_flags(mp, execute, flags, args, 0); 1067 - } 1068 - 1069 - int 1070 - xfs_inode_ag_iterator_tag( 1071 - struct xfs_mount *mp, 1072 - int (*execute)(struct xfs_inode *ip, int flags, 1073 - void *args), 1074 - int flags, 1075 - void *args, 1076 - int tag) 1077 - { 1078 - struct xfs_perag *pag; 1079 - int error = 0; 1080 - int last_error = 0; 1081 - xfs_agnumber_t ag; 1082 - 1083 - ag = 0; 1084 - while ((pag = xfs_perag_get_tag(mp, ag, tag))) { 1085 - ag = pag->pag_agno + 1; 1086 - error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag, 1087 - 0); 1088 - xfs_perag_put(pag); 1089 - if (error) { 1090 - last_error = error; 1091 - if (error == -EFSCORRUPTED) 1092 - break; 1093 - } 1094 - } 1095 - return last_error; 1096 953 } 1097 954 1098 955 /* ··· 1133 1128 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 1134 1129 xfs_iunpin_wait(ip); 1135 1130 /* xfs_iflush_abort() drops the flush lock */ 1136 - xfs_iflush_abort(ip, false); 1131 + xfs_iflush_abort(ip); 1137 1132 goto reclaim; 1138 1133 } 1139 1134 if (xfs_ipincount(ip)) { ··· 1424 1419 return reclaimable; 1425 1420 } 1426 1421 1427 - STATIC int 1422 + STATIC bool 1428 1423 xfs_inode_match_id( 1429 1424 struct xfs_inode *ip, 1430 1425 struct xfs_eofblocks *eofb) 1431 1426 { 1432 1427 if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) && 1433 1428 !uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid)) 1434 - return 0; 1429 + return false; 1435 1430 1436 1431 if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) && 1437 1432 !gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid)) 1438 - return 0; 1433 + return false; 1439 1434 1440 1435 if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) && 1441 1436 ip->i_d.di_projid != eofb->eof_prid) 1442 - return 0; 1437 + return false; 1443 1438 1444 - return 1; 1439 + return true; 1445 1440 } 1446 1441 1447 1442 /* 1448 1443 * A union-based inode filtering algorithm. Process the inode if any of the 1449 1444 * criteria match. This is for global/internal scans only. 1450 1445 */ 1451 - STATIC int 1446 + STATIC bool 1452 1447 xfs_inode_match_id_union( 1453 1448 struct xfs_inode *ip, 1454 1449 struct xfs_eofblocks *eofb) 1455 1450 { 1456 1451 if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) && 1457 1452 uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid)) 1458 - return 1; 1453 + return true; 1459 1454 1460 1455 if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) && 1461 1456 gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid)) 1462 - return 1; 1457 + return true; 1463 1458 1464 1459 if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) && 1465 1460 ip->i_d.di_projid == eofb->eof_prid) 1466 - return 1; 1461 + return true; 1467 1462 1468 - return 0; 1463 + return false; 1464 + } 1465 + 1466 + /* 1467 + * Is this inode @ip eligible for eof/cow block reclamation, given some 1468 + * filtering parameters @eofb? The inode is eligible if @eofb is null or 1469 + * if the predicate functions match. 1470 + */ 1471 + static bool 1472 + xfs_inode_matches_eofb( 1473 + struct xfs_inode *ip, 1474 + struct xfs_eofblocks *eofb) 1475 + { 1476 + bool match; 1477 + 1478 + if (!eofb) 1479 + return true; 1480 + 1481 + if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) 1482 + match = xfs_inode_match_id_union(ip, eofb); 1483 + else 1484 + match = xfs_inode_match_id(ip, eofb); 1485 + if (!match) 1486 + return false; 1487 + 1488 + /* skip the inode if the file size is too small */ 1489 + if ((eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE) && 1490 + XFS_ISIZE(ip) < eofb->eof_min_file_size) 1491 + return false; 1492 + 1493 + return true; 1469 1494 } 1470 1495 1471 1496 STATIC int 1472 1497 xfs_inode_free_eofblocks( 1473 1498 struct xfs_inode *ip, 1474 - int flags, 1475 1499 void *args) 1476 1500 { 1477 - int ret = 0; 1478 - struct xfs_eofblocks *eofb = args; 1479 - int match; 1501 + struct xfs_eofblocks *eofb = args; 1502 + bool wait; 1503 + int ret; 1504 + 1505 + wait = eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC); 1480 1506 1481 1507 if (!xfs_can_free_eofblocks(ip, false)) { 1482 1508 /* inode could be preallocated or append-only */ ··· 1520 1484 * If the mapping is dirty the operation can block and wait for some 1521 1485 * time. Unless we are waiting, skip it. 1522 1486 */ 1523 - if (!(flags & SYNC_WAIT) && 1524 - mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY)) 1487 + if (!wait && mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY)) 1525 1488 return 0; 1526 1489 1527 - if (eofb) { 1528 - if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) 1529 - match = xfs_inode_match_id_union(ip, eofb); 1530 - else 1531 - match = xfs_inode_match_id(ip, eofb); 1532 - if (!match) 1533 - return 0; 1534 - 1535 - /* skip the inode if the file size is too small */ 1536 - if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && 1537 - XFS_ISIZE(ip) < eofb->eof_min_file_size) 1538 - return 0; 1539 - } 1490 + if (!xfs_inode_matches_eofb(ip, eofb)) 1491 + return 0; 1540 1492 1541 1493 /* 1542 1494 * If the caller is waiting, return -EAGAIN to keep the background 1543 1495 * scanner moving and revisit the inode in a subsequent pass. 1544 1496 */ 1545 1497 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { 1546 - if (flags & SYNC_WAIT) 1547 - ret = -EAGAIN; 1548 - return ret; 1498 + if (wait) 1499 + return -EAGAIN; 1500 + return 0; 1549 1501 } 1502 + 1550 1503 ret = xfs_free_eofblocks(ip); 1551 1504 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1552 1505 1553 1506 return ret; 1554 - } 1555 - 1556 - static int 1557 - __xfs_icache_free_eofblocks( 1558 - struct xfs_mount *mp, 1559 - struct xfs_eofblocks *eofb, 1560 - int (*execute)(struct xfs_inode *ip, int flags, 1561 - void *args), 1562 - int tag) 1563 - { 1564 - int flags = SYNC_TRYLOCK; 1565 - 1566 - if (eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC)) 1567 - flags = SYNC_WAIT; 1568 - 1569 - return xfs_inode_ag_iterator_tag(mp, execute, flags, 1570 - eofb, tag); 1571 1507 } 1572 1508 1573 1509 int ··· 1547 1539 struct xfs_mount *mp, 1548 1540 struct xfs_eofblocks *eofb) 1549 1541 { 1550 - return __xfs_icache_free_eofblocks(mp, eofb, xfs_inode_free_eofblocks, 1542 + return xfs_inode_walk(mp, 0, xfs_inode_free_eofblocks, eofb, 1551 1543 XFS_ICI_EOFBLOCKS_TAG); 1552 1544 } 1553 1545 ··· 1764 1756 STATIC int 1765 1757 xfs_inode_free_cowblocks( 1766 1758 struct xfs_inode *ip, 1767 - int flags, 1768 1759 void *args) 1769 1760 { 1770 1761 struct xfs_eofblocks *eofb = args; 1771 - int match; 1772 1762 int ret = 0; 1773 1763 1774 1764 if (!xfs_prep_free_cowblocks(ip)) 1775 1765 return 0; 1776 1766 1777 - if (eofb) { 1778 - if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) 1779 - match = xfs_inode_match_id_union(ip, eofb); 1780 - else 1781 - match = xfs_inode_match_id(ip, eofb); 1782 - if (!match) 1783 - return 0; 1784 - 1785 - /* skip the inode if the file size is too small */ 1786 - if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && 1787 - XFS_ISIZE(ip) < eofb->eof_min_file_size) 1788 - return 0; 1789 - } 1767 + if (!xfs_inode_matches_eofb(ip, eofb)) 1768 + return 0; 1790 1769 1791 1770 /* Free the CoW blocks */ 1792 1771 xfs_ilock(ip, XFS_IOLOCK_EXCL); ··· 1797 1802 struct xfs_mount *mp, 1798 1803 struct xfs_eofblocks *eofb) 1799 1804 { 1800 - return __xfs_icache_free_eofblocks(mp, eofb, xfs_inode_free_cowblocks, 1805 + return xfs_inode_walk(mp, 0, xfs_inode_free_cowblocks, eofb, 1801 1806 XFS_ICI_COWBLOCKS_TAG); 1802 1807 } 1803 1808

+5 -46

fs/xfs/xfs_icache.h

··· 24 24 * tags for inode radix tree 25 25 */ 26 26 #define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup 27 - in xfs_inode_ag_iterator */ 27 + in xfs_inode_walk */ 28 28 #define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ 29 29 #define XFS_ICI_EOFBLOCKS_TAG 1 /* inode has blocks beyond EOF */ 30 30 #define XFS_ICI_COWBLOCKS_TAG 2 /* inode can have cow blocks to gc */ ··· 40 40 /* 41 41 * flags for AG inode iterator 42 42 */ 43 - #define XFS_AGITER_INEW_WAIT 0x1 /* wait on new inodes */ 43 + #define XFS_INODE_WALK_INEW_WAIT 0x1 /* wait on new inodes */ 44 44 45 45 int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino, 46 46 uint flags, uint lock_flags, xfs_inode_t **ipp); ··· 71 71 void xfs_cowblocks_worker(struct work_struct *); 72 72 void xfs_queue_cowblocks(struct xfs_mount *); 73 73 74 - int xfs_inode_ag_iterator(struct xfs_mount *mp, 75 - int (*execute)(struct xfs_inode *ip, int flags, void *args), 76 - int flags, void *args); 77 - int xfs_inode_ag_iterator_flags(struct xfs_mount *mp, 78 - int (*execute)(struct xfs_inode *ip, int flags, void *args), 79 - int flags, void *args, int iter_flags); 80 - int xfs_inode_ag_iterator_tag(struct xfs_mount *mp, 81 - int (*execute)(struct xfs_inode *ip, int flags, void *args), 82 - int flags, void *args, int tag); 83 - 84 - static inline int 85 - xfs_fs_eofblocks_from_user( 86 - struct xfs_fs_eofblocks *src, 87 - struct xfs_eofblocks *dst) 88 - { 89 - if (src->eof_version != XFS_EOFBLOCKS_VERSION) 90 - return -EINVAL; 91 - 92 - if (src->eof_flags & ~XFS_EOF_FLAGS_VALID) 93 - return -EINVAL; 94 - 95 - if (memchr_inv(&src->pad32, 0, sizeof(src->pad32)) || 96 - memchr_inv(src->pad64, 0, sizeof(src->pad64))) 97 - return -EINVAL; 98 - 99 - dst->eof_flags = src->eof_flags; 100 - dst->eof_prid = src->eof_prid; 101 - dst->eof_min_file_size = src->eof_min_file_size; 102 - 103 - dst->eof_uid = INVALID_UID; 104 - if (src->eof_flags & XFS_EOF_FLAGS_UID) { 105 - dst->eof_uid = make_kuid(current_user_ns(), src->eof_uid); 106 - if (!uid_valid(dst->eof_uid)) 107 - return -EINVAL; 108 - } 109 - 110 - dst->eof_gid = INVALID_GID; 111 - if (src->eof_flags & XFS_EOF_FLAGS_GID) { 112 - dst->eof_gid = make_kgid(current_user_ns(), src->eof_gid); 113 - if (!gid_valid(dst->eof_gid)) 114 - return -EINVAL; 115 - } 116 - return 0; 117 - } 74 + int xfs_inode_walk(struct xfs_mount *mp, int iter_flags, 75 + int (*execute)(struct xfs_inode *ip, void *args), 76 + void *args, int tag); 118 77 119 78 int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp, 120 79 xfs_ino_t ino, bool *inuse);

+152

fs/xfs/xfs_icreate_item.c

··· 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 8 #include "xfs_shared.h" 9 + #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 11 + #include "xfs_trans_resv.h" 12 + #include "xfs_mount.h" 13 + #include "xfs_inode.h" 10 14 #include "xfs_trans.h" 11 15 #include "xfs_trans_priv.h" 12 16 #include "xfs_icreate_item.h" 13 17 #include "xfs_log.h" 18 + #include "xfs_log_priv.h" 19 + #include "xfs_log_recover.h" 20 + #include "xfs_ialloc.h" 21 + #include "xfs_trace.h" 14 22 15 23 kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ 16 24 ··· 115 107 tp->t_flags |= XFS_TRANS_DIRTY; 116 108 set_bit(XFS_LI_DIRTY, &icp->ic_item.li_flags); 117 109 } 110 + 111 + static enum xlog_recover_reorder 112 + xlog_recover_icreate_reorder( 113 + struct xlog_recover_item *item) 114 + { 115 + /* 116 + * Inode allocation buffers must be replayed before subsequent inode 117 + * items try to modify those buffers. ICREATE items are the logical 118 + * equivalent of logging a newly initialized inode buffer, so recover 119 + * these at the same time that we recover logged buffers. 120 + */ 121 + return XLOG_REORDER_BUFFER_LIST; 122 + } 123 + 124 + /* 125 + * This routine is called when an inode create format structure is found in a 126 + * committed transaction in the log. It's purpose is to initialise the inodes 127 + * being allocated on disk. This requires us to get inode cluster buffers that 128 + * match the range to be initialised, stamped with inode templates and written 129 + * by delayed write so that subsequent modifications will hit the cached buffer 130 + * and only need writing out at the end of recovery. 131 + */ 132 + STATIC int 133 + xlog_recover_icreate_commit_pass2( 134 + struct xlog *log, 135 + struct list_head *buffer_list, 136 + struct xlog_recover_item *item, 137 + xfs_lsn_t lsn) 138 + { 139 + struct xfs_mount *mp = log->l_mp; 140 + struct xfs_icreate_log *icl; 141 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 142 + xfs_agnumber_t agno; 143 + xfs_agblock_t agbno; 144 + unsigned int count; 145 + unsigned int isize; 146 + xfs_agblock_t length; 147 + int bb_per_cluster; 148 + int cancel_count; 149 + int nbufs; 150 + int i; 151 + 152 + icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; 153 + if (icl->icl_type != XFS_LI_ICREATE) { 154 + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type"); 155 + return -EINVAL; 156 + } 157 + 158 + if (icl->icl_size != 1) { 159 + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size"); 160 + return -EINVAL; 161 + } 162 + 163 + agno = be32_to_cpu(icl->icl_ag); 164 + if (agno >= mp->m_sb.sb_agcount) { 165 + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno"); 166 + return -EINVAL; 167 + } 168 + agbno = be32_to_cpu(icl->icl_agbno); 169 + if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) { 170 + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno"); 171 + return -EINVAL; 172 + } 173 + isize = be32_to_cpu(icl->icl_isize); 174 + if (isize != mp->m_sb.sb_inodesize) { 175 + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize"); 176 + return -EINVAL; 177 + } 178 + count = be32_to_cpu(icl->icl_count); 179 + if (!count) { 180 + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count"); 181 + return -EINVAL; 182 + } 183 + length = be32_to_cpu(icl->icl_length); 184 + if (!length || length >= mp->m_sb.sb_agblocks) { 185 + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length"); 186 + return -EINVAL; 187 + } 188 + 189 + /* 190 + * The inode chunk is either full or sparse and we only support 191 + * m_ino_geo.ialloc_min_blks sized sparse allocations at this time. 192 + */ 193 + if (length != igeo->ialloc_blks && 194 + length != igeo->ialloc_min_blks) { 195 + xfs_warn(log->l_mp, 196 + "%s: unsupported chunk length", __FUNCTION__); 197 + return -EINVAL; 198 + } 199 + 200 + /* verify inode count is consistent with extent length */ 201 + if ((count >> mp->m_sb.sb_inopblog) != length) { 202 + xfs_warn(log->l_mp, 203 + "%s: inconsistent inode count and chunk length", 204 + __FUNCTION__); 205 + return -EINVAL; 206 + } 207 + 208 + /* 209 + * The icreate transaction can cover multiple cluster buffers and these 210 + * buffers could have been freed and reused. Check the individual 211 + * buffers for cancellation so we don't overwrite anything written after 212 + * a cancellation. 213 + */ 214 + bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster); 215 + nbufs = length / igeo->blocks_per_cluster; 216 + for (i = 0, cancel_count = 0; i < nbufs; i++) { 217 + xfs_daddr_t daddr; 218 + 219 + daddr = XFS_AGB_TO_DADDR(mp, agno, 220 + agbno + i * igeo->blocks_per_cluster); 221 + if (xlog_is_buffer_cancelled(log, daddr, bb_per_cluster)) 222 + cancel_count++; 223 + } 224 + 225 + /* 226 + * We currently only use icreate for a single allocation at a time. This 227 + * means we should expect either all or none of the buffers to be 228 + * cancelled. Be conservative and skip replay if at least one buffer is 229 + * cancelled, but warn the user that something is awry if the buffers 230 + * are not consistent. 231 + * 232 + * XXX: This must be refined to only skip cancelled clusters once we use 233 + * icreate for multiple chunk allocations. 234 + */ 235 + ASSERT(!cancel_count || cancel_count == nbufs); 236 + if (cancel_count) { 237 + if (cancel_count != nbufs) 238 + xfs_warn(mp, 239 + "WARNING: partial inode chunk cancellation, skipped icreate."); 240 + trace_xfs_log_recover_icreate_cancel(log, icl); 241 + return 0; 242 + } 243 + 244 + trace_xfs_log_recover_icreate_recover(log, icl); 245 + return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, 246 + length, be32_to_cpu(icl->icl_gen)); 247 + } 248 + 249 + const struct xlog_recover_item_ops xlog_icreate_item_ops = { 250 + .item_type = XFS_LI_ICREATE, 251 + .reorder = xlog_recover_icreate_reorder, 252 + .commit_pass2 = xlog_recover_icreate_commit_pass2, 253 + };

+89 -174

fs/xfs/xfs_inode.c

··· 112 112 { 113 113 uint lock_mode = XFS_ILOCK_SHARED; 114 114 115 - if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && 115 + if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE && 116 116 (ip->i_df.if_flags & XFS_IFEXTENTS) == 0) 117 117 lock_mode = XFS_ILOCK_EXCL; 118 118 xfs_ilock(ip, lock_mode); ··· 125 125 { 126 126 uint lock_mode = XFS_ILOCK_SHARED; 127 127 128 - if (ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE && 128 + if (ip->i_afp && 129 + ip->i_afp->if_format == XFS_DINODE_FMT_BTREE && 129 130 (ip->i_afp->if_flags & XFS_IFEXTENTS) == 0) 130 131 lock_mode = XFS_ILOCK_EXCL; 131 132 xfs_ilock(ip, lock_mode); ··· 826 825 inode->i_mode &= ~S_ISGID; 827 826 828 827 ip->i_d.di_size = 0; 829 - ip->i_d.di_nextents = 0; 828 + ip->i_df.if_nextents = 0; 830 829 ASSERT(ip->i_d.di_nblocks == 0); 831 830 832 831 tv = current_time(inode); ··· 852 851 case S_IFCHR: 853 852 case S_IFBLK: 854 853 case S_IFSOCK: 855 - ip->i_d.di_format = XFS_DINODE_FMT_DEV; 854 + ip->i_df.if_format = XFS_DINODE_FMT_DEV; 856 855 ip->i_df.if_flags = 0; 857 856 flags |= XFS_ILOG_DEV; 858 857 break; ··· 908 907 } 909 908 /* FALLTHROUGH */ 910 909 case S_IFLNK: 911 - ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 910 + ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; 912 911 ip->i_df.if_flags = XFS_IFEXTENTS; 913 912 ip->i_df.if_bytes = 0; 914 913 ip->i_df.if_u1.if_root = NULL; ··· 916 915 default: 917 916 ASSERT(0); 918 917 } 919 - /* 920 - * Attribute fork settings for new inode. 921 - */ 922 - ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 923 - ip->i_d.di_anextents = 0; 924 918 925 919 /* 926 920 * Log the new values stuffed into the inode. ··· 1682 1686 if (error) 1683 1687 goto error_trans_cancel; 1684 1688 1685 - ASSERT(ip->i_d.di_nextents == 0); 1689 + ASSERT(ip->i_df.if_nextents == 0); 1686 1690 1687 1691 error = xfs_trans_commit(tp); 1688 1692 if (error) ··· 1832 1836 1833 1837 if (S_ISREG(VFS_I(ip)->i_mode) && 1834 1838 (ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 || 1835 - ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0)) 1839 + ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0)) 1836 1840 truncate = 1; 1837 1841 1838 1842 error = xfs_qm_dqattach(ip); ··· 1858 1862 } 1859 1863 1860 1864 ASSERT(!ip->i_afp); 1861 - ASSERT(ip->i_d.di_anextents == 0); 1862 1865 ASSERT(ip->i_d.di_forkoff == 0); 1863 1866 1864 1867 /* ··· 2167 2172 2168 2173 ASSERT(xfs_verify_agino_or_null(mp, agno, next_agino)); 2169 2174 2170 - error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 0, 0); 2175 + error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 0); 2171 2176 if (error) 2172 2177 return error; 2173 2178 ··· 2297 2302 return error; 2298 2303 } 2299 2304 2300 - error = xfs_imap_to_bp(mp, tp, imap, dipp, bpp, 0, 0); 2305 + error = xfs_imap_to_bp(mp, tp, imap, dipp, bpp, 0); 2301 2306 if (error) { 2302 2307 xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.", 2303 2308 __func__, error); ··· 2597 2602 xfs_daddr_t blkno; 2598 2603 xfs_buf_t *bp; 2599 2604 xfs_inode_t *ip; 2600 - xfs_inode_log_item_t *iip; 2605 + struct xfs_inode_log_item *iip; 2601 2606 struct xfs_log_item *lip; 2602 2607 struct xfs_perag *pag; 2603 2608 struct xfs_ino_geometry *igeo = M_IGEO(mp); ··· 2657 2662 */ 2658 2663 list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { 2659 2664 if (lip->li_type == XFS_LI_INODE) { 2660 - iip = (xfs_inode_log_item_t *)lip; 2665 + iip = (struct xfs_inode_log_item *)lip; 2661 2666 ASSERT(iip->ili_logged == 1); 2662 2667 lip->li_cb = xfs_istale_done; 2663 2668 xfs_trans_ail_copy_lsn(mp->m_ail, ··· 2707 2712 } 2708 2713 2709 2714 /* 2710 - * Free any local-format buffers sitting around before we reset to 2711 - * extents format. 2712 - */ 2713 - static inline void 2714 - xfs_ifree_local_data( 2715 - struct xfs_inode *ip, 2716 - int whichfork) 2717 - { 2718 - struct xfs_ifork *ifp; 2719 - 2720 - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) 2721 - return; 2722 - 2723 - ifp = XFS_IFORK_PTR(ip, whichfork); 2724 - xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); 2725 - } 2726 - 2727 - /* 2728 2715 * This is called to return an inode to the inode free list. 2729 2716 * The inode should already be truncated to 0 length and have 2730 2717 * no pages associated with it. This routine also assumes that ··· 2726 2749 2727 2750 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 2728 2751 ASSERT(VFS_I(ip)->i_nlink == 0); 2729 - ASSERT(ip->i_d.di_nextents == 0); 2730 - ASSERT(ip->i_d.di_anextents == 0); 2752 + ASSERT(ip->i_df.if_nextents == 0); 2731 2753 ASSERT(ip->i_d.di_size == 0 || !S_ISREG(VFS_I(ip)->i_mode)); 2732 2754 ASSERT(ip->i_d.di_nblocks == 0); 2733 2755 ··· 2741 2765 if (error) 2742 2766 return error; 2743 2767 2744 - xfs_ifree_local_data(ip, XFS_DATA_FORK); 2745 - xfs_ifree_local_data(ip, XFS_ATTR_FORK); 2768 + /* 2769 + * Free any local-format data sitting around before we reset the 2770 + * data fork to extents format. Note that the attr fork data has 2771 + * already been freed by xfs_attr_inactive. 2772 + */ 2773 + if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) { 2774 + kmem_free(ip->i_df.if_u1.if_data); 2775 + ip->i_df.if_u1.if_data = NULL; 2776 + ip->i_df.if_bytes = 0; 2777 + } 2746 2778 2747 2779 VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ 2748 2780 ip->i_d.di_flags = 0; 2749 2781 ip->i_d.di_flags2 = 0; 2750 2782 ip->i_d.di_dmevmask = 0; 2751 2783 ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 2752 - ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 2753 - ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 2784 + ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; 2754 2785 2755 2786 /* Don't attempt to replay owner changes for a deleted inode */ 2756 2787 ip->i_itemp->ili_fields &= ~(XFS_ILOG_AOWNER|XFS_ILOG_DOWNER); ··· 3479 3496 struct xfs_inode **cilist; 3480 3497 struct xfs_inode *cip; 3481 3498 struct xfs_ino_geometry *igeo = M_IGEO(mp); 3499 + int error = 0; 3482 3500 int nr_found; 3483 3501 int clcount = 0; 3484 3502 int i; ··· 3572 3588 * re-check that it's dirty before flushing. 3573 3589 */ 3574 3590 if (!xfs_inode_clean(cip)) { 3575 - int error; 3576 3591 error = xfs_iflush_int(cip, bp); 3577 3592 if (error) { 3578 3593 xfs_iunlock(cip, XFS_ILOCK_SHARED); 3579 - goto cluster_corrupt_out; 3594 + goto out_free; 3580 3595 } 3581 3596 clcount++; 3582 3597 } else { ··· 3594 3611 kmem_free(cilist); 3595 3612 out_put: 3596 3613 xfs_perag_put(pag); 3597 - return 0; 3598 - 3599 - 3600 - cluster_corrupt_out: 3601 - /* 3602 - * Corruption detected in the clustering loop. Invalidate the 3603 - * inode buffer and shut down the filesystem. 3604 - */ 3605 - rcu_read_unlock(); 3606 - 3607 - /* 3608 - * We'll always have an inode attached to the buffer for completion 3609 - * process by the time we are called from xfs_iflush(). Hence we have 3610 - * always need to do IO completion processing to abort the inodes 3611 - * attached to the buffer. handle them just like the shutdown case in 3612 - * xfs_buf_submit(). 3613 - */ 3614 - ASSERT(bp->b_iodone); 3615 - bp->b_flags |= XBF_ASYNC; 3616 - bp->b_flags &= ~XBF_DONE; 3617 - xfs_buf_stale(bp); 3618 - xfs_buf_ioerror(bp, -EIO); 3619 - xfs_buf_ioend(bp); 3620 - 3621 - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3622 - 3623 - /* abort the corrupt inode, as it was not attached to the buffer */ 3624 - xfs_iflush_abort(cip, false); 3625 - kmem_free(cilist); 3626 - xfs_perag_put(pag); 3627 - return -EFSCORRUPTED; 3614 + return error; 3628 3615 } 3629 3616 3630 3617 /* ··· 3620 3667 3621 3668 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3622 3669 ASSERT(xfs_isiflocked(ip)); 3623 - ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3624 - ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 3670 + ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE || 3671 + ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 3625 3672 3626 3673 *bpp = NULL; 3627 3674 ··· 3641 3688 } 3642 3689 3643 3690 /* 3644 - * This may have been unpinned because the filesystem is shutting 3645 - * down forcibly. If that's the case we must not write this inode 3646 - * to disk, because the log record didn't make it to disk. 3647 - * 3648 - * We also have to remove the log item from the AIL in this case, 3649 - * as we wait for an empty AIL as part of the unmount process. 3650 - */ 3651 - if (XFS_FORCED_SHUTDOWN(mp)) { 3652 - error = -EIO; 3653 - goto abort_out; 3654 - } 3655 - 3656 - /* 3657 3691 * Get the buffer containing the on-disk inode. We are doing a try-lock 3658 - * operation here, so we may get an EAGAIN error. In that case, we 3659 - * simply want to return with the inode still dirty. 3692 + * operation here, so we may get an EAGAIN error. In that case, return 3693 + * leaving the inode dirty. 3660 3694 * 3661 3695 * If we get any other error, we effectively have a corruption situation 3662 - * and we cannot flush the inode, so we treat it the same as failing 3663 - * xfs_iflush_int(). 3696 + * and we cannot flush the inode. Abort the flush and shut down. 3664 3697 */ 3665 - error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK, 3666 - 0); 3698 + error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK); 3667 3699 if (error == -EAGAIN) { 3668 3700 xfs_ifunlock(ip); 3669 3701 return error; 3670 3702 } 3671 3703 if (error) 3672 - goto corrupt_out; 3673 - 3674 - /* 3675 - * First flush out the inode that xfs_iflush was called with. 3676 - */ 3677 - error = xfs_iflush_int(ip, bp); 3678 - if (error) 3679 - goto corrupt_out; 3704 + goto abort; 3680 3705 3681 3706 /* 3682 3707 * If the buffer is pinned then push on the log now so we won't ··· 3664 3733 xfs_log_force(mp, 0); 3665 3734 3666 3735 /* 3667 - * inode clustering: try to gather other inodes into this write 3736 + * Flush the provided inode then attempt to gather others from the 3737 + * cluster into the write. 3668 3738 * 3669 - * Note: Any error during clustering will result in the filesystem 3670 - * being shut down and completion callbacks run on the cluster buffer. 3671 - * As we have already flushed and attached this inode to the buffer, 3672 - * it has already been aborted and released by xfs_iflush_cluster() and 3673 - * so we have no further error handling to do here. 3739 + * Note: Once we attempt to flush an inode, we must run buffer 3740 + * completion callbacks on any failure. If this fails, simulate an I/O 3741 + * failure on the buffer and shut down. 3674 3742 */ 3675 - error = xfs_iflush_cluster(ip, bp); 3676 - if (error) 3677 - return error; 3743 + error = xfs_iflush_int(ip, bp); 3744 + if (!error) 3745 + error = xfs_iflush_cluster(ip, bp); 3746 + if (error) { 3747 + bp->b_flags |= XBF_ASYNC; 3748 + xfs_buf_ioend_fail(bp); 3749 + goto shutdown; 3750 + } 3678 3751 3679 3752 *bpp = bp; 3680 3753 return 0; 3681 3754 3682 - corrupt_out: 3683 - if (bp) 3684 - xfs_buf_relse(bp); 3755 + abort: 3756 + xfs_iflush_abort(ip); 3757 + shutdown: 3685 3758 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3686 - abort_out: 3687 - /* abort the corrupt inode, as it was not attached to the buffer */ 3688 - xfs_iflush_abort(ip, false); 3689 3759 return error; 3690 - } 3691 - 3692 - /* 3693 - * If there are inline format data / attr forks attached to this inode, 3694 - * make sure they're not corrupt. 3695 - */ 3696 - bool 3697 - xfs_inode_verify_forks( 3698 - struct xfs_inode *ip) 3699 - { 3700 - struct xfs_ifork *ifp; 3701 - xfs_failaddr_t fa; 3702 - 3703 - fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops); 3704 - if (fa) { 3705 - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 3706 - xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork", 3707 - ifp->if_u1.if_data, ifp->if_bytes, fa); 3708 - return false; 3709 - } 3710 - 3711 - fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops); 3712 - if (fa) { 3713 - ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); 3714 - xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork", 3715 - ifp ? ifp->if_u1.if_data : NULL, 3716 - ifp ? ifp->if_bytes : 0, fa); 3717 - return false; 3718 - } 3719 - return true; 3720 3760 } 3721 3761 3722 3762 STATIC int ··· 3698 3796 struct xfs_inode_log_item *iip = ip->i_itemp; 3699 3797 struct xfs_dinode *dip; 3700 3798 struct xfs_mount *mp = ip->i_mount; 3799 + int error; 3701 3800 3702 3801 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3703 3802 ASSERT(xfs_isiflocked(ip)); 3704 - ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3705 - ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 3803 + ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE || 3804 + ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 3706 3805 ASSERT(iip != NULL && iip->ili_fields != 0); 3707 3806 3708 - /* set *dip = inode's place in the buffer */ 3709 3807 dip = xfs_buf_offset(bp, ip->i_imap.im_boffset); 3710 3808 3809 + /* 3810 + * We don't flush the inode if any of the following checks fail, but we 3811 + * do still update the log item and attach to the backing buffer as if 3812 + * the flush happened. This is a formality to facilitate predictable 3813 + * error handling as the caller will shutdown and fail the buffer. 3814 + */ 3815 + error = -EFSCORRUPTED; 3711 3816 if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), 3712 3817 mp, XFS_ERRTAG_IFLUSH_1)) { 3713 3818 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3714 3819 "%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT, 3715 3820 __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); 3716 - goto corrupt_out; 3821 + goto flush_out; 3717 3822 } 3718 3823 if (S_ISREG(VFS_I(ip)->i_mode)) { 3719 3824 if (XFS_TEST_ERROR( 3720 - (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 3721 - (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 3825 + ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS && 3826 + ip->i_df.if_format != XFS_DINODE_FMT_BTREE, 3722 3827 mp, XFS_ERRTAG_IFLUSH_3)) { 3723 3828 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3724 3829 "%s: Bad regular inode %Lu, ptr "PTR_FMT, 3725 3830 __func__, ip->i_ino, ip); 3726 - goto corrupt_out; 3831 + goto flush_out; 3727 3832 } 3728 3833 } else if (S_ISDIR(VFS_I(ip)->i_mode)) { 3729 3834 if (XFS_TEST_ERROR( 3730 - (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 3731 - (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && 3732 - (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 3835 + ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS && 3836 + ip->i_df.if_format != XFS_DINODE_FMT_BTREE && 3837 + ip->i_df.if_format != XFS_DINODE_FMT_LOCAL, 3733 3838 mp, XFS_ERRTAG_IFLUSH_4)) { 3734 3839 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3735 3840 "%s: Bad directory inode %Lu, ptr "PTR_FMT, 3736 3841 __func__, ip->i_ino, ip); 3737 - goto corrupt_out; 3842 + goto flush_out; 3738 3843 } 3739 3844 } 3740 - if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > 3845 + if (XFS_TEST_ERROR(ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp) > 3741 3846 ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) { 3742 3847 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3743 3848 "%s: detected corrupt incore inode %Lu, " 3744 3849 "total extents = %d, nblocks = %Ld, ptr "PTR_FMT, 3745 3850 __func__, ip->i_ino, 3746 - ip->i_d.di_nextents + ip->i_d.di_anextents, 3851 + ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp), 3747 3852 ip->i_d.di_nblocks, ip); 3748 - goto corrupt_out; 3853 + goto flush_out; 3749 3854 } 3750 3855 if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 3751 3856 mp, XFS_ERRTAG_IFLUSH_6)) { 3752 3857 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3753 3858 "%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT, 3754 3859 __func__, ip->i_ino, ip->i_d.di_forkoff, ip); 3755 - goto corrupt_out; 3860 + goto flush_out; 3756 3861 } 3757 3862 3758 3863 /* ··· 3774 3865 if (!xfs_sb_version_has_v3inode(&mp->m_sb)) 3775 3866 ip->i_d.di_flushiter++; 3776 3867 3777 - /* Check the inline fork data before we write out. */ 3778 - if (!xfs_inode_verify_forks(ip)) 3779 - goto corrupt_out; 3868 + /* 3869 + * If there are inline format data / attr forks attached to this inode, 3870 + * make sure they are not corrupt. 3871 + */ 3872 + if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL && 3873 + xfs_ifork_verify_local_data(ip)) 3874 + goto flush_out; 3875 + if (ip->i_afp && ip->i_afp->if_format == XFS_DINODE_FMT_LOCAL && 3876 + xfs_ifork_verify_local_attr(ip)) 3877 + goto flush_out; 3780 3878 3781 3879 /* 3782 3880 * Copy the dirty parts of the inode into the on-disk inode. We always ··· 3826 3910 * need the AIL lock, because it is a 64 bit value that cannot be read 3827 3911 * atomically. 3828 3912 */ 3913 + error = 0; 3914 + flush_out: 3829 3915 iip->ili_last_fields = iip->ili_fields; 3830 3916 iip->ili_fields = 0; 3831 3917 iip->ili_fsync_fields = 0; ··· 3837 3919 &iip->ili_item.li_lsn); 3838 3920 3839 3921 /* 3840 - * Attach the function xfs_iflush_done to the inode's 3841 - * buffer. This will remove the inode from the AIL 3842 - * and unlock the inode's flush lock when the inode is 3843 - * completely written to disk. 3922 + * Attach the inode item callback to the buffer whether the flush 3923 + * succeeded or not. If not, the caller will shut down and fail I/O 3924 + * completion on the buffer to remove the inode from the AIL and release 3925 + * the flush lock. 3844 3926 */ 3845 3927 xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); 3846 3928 ··· 3849 3931 3850 3932 ASSERT(!list_empty(&bp->b_li_list)); 3851 3933 ASSERT(bp->b_iodone != NULL); 3852 - return 0; 3853 - 3854 - corrupt_out: 3855 - return -EFSCORRUPTED; 3934 + return error; 3856 3935 } 3857 3936 3858 3937 /* Release an inode. */

+1 -5

fs/xfs/xfs_inode.h

··· 57 57 58 58 struct xfs_icdinode i_d; /* most of ondisk inode */ 59 59 60 - xfs_extnum_t i_cnextents; /* # of extents in cow fork */ 61 - unsigned int i_cformat; /* format of cow fork */ 62 - 63 60 /* VFS inode */ 64 61 struct inode i_vnode; /* embedded VFS inode */ 65 62 ··· 464 467 /* from xfs_iops.c */ 465 468 extern void xfs_setup_inode(struct xfs_inode *ip); 466 469 extern void xfs_setup_iops(struct xfs_inode *ip); 470 + extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init); 467 471 468 472 /* 469 473 * When setting up a newly allocated inode, we need to call ··· 494 496 495 497 /* The default CoW extent size hint. */ 496 498 #define XFS_DEFAULT_COWEXTSZ_HINT 32 497 - 498 - bool xfs_inode_verify_forks(struct xfs_inode *ip); 499 499 500 500 int xfs_iunlink_init(struct xfs_perag *pag); 501 501 void xfs_iunlink_destroy(struct xfs_perag *pag);

+17 -37

fs/xfs/xfs_inode_item.c

··· 36 36 { 37 37 struct xfs_inode *ip = iip->ili_inode; 38 38 39 - switch (ip->i_d.di_format) { 39 + switch (ip->i_df.if_format) { 40 40 case XFS_DINODE_FMT_EXTENTS: 41 41 if ((iip->ili_fields & XFS_ILOG_DEXT) && 42 - ip->i_d.di_nextents > 0 && 42 + ip->i_df.if_nextents > 0 && 43 43 ip->i_df.if_bytes > 0) { 44 44 /* worst case, doesn't subtract delalloc extents */ 45 45 *nbytes += XFS_IFORK_DSIZE(ip); ··· 77 77 { 78 78 struct xfs_inode *ip = iip->ili_inode; 79 79 80 - switch (ip->i_d.di_aformat) { 80 + switch (ip->i_afp->if_format) { 81 81 case XFS_DINODE_FMT_EXTENTS: 82 82 if ((iip->ili_fields & XFS_ILOG_AEXT) && 83 - ip->i_d.di_anextents > 0 && 83 + ip->i_afp->if_nextents > 0 && 84 84 ip->i_afp->if_bytes > 0) { 85 85 /* worst case, doesn't subtract unused space */ 86 86 *nbytes += XFS_IFORK_ASIZE(ip); ··· 142 142 struct xfs_inode *ip = iip->ili_inode; 143 143 size_t data_bytes; 144 144 145 - switch (ip->i_d.di_format) { 145 + switch (ip->i_df.if_format) { 146 146 case XFS_DINODE_FMT_EXTENTS: 147 147 iip->ili_fields &= 148 148 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV); 149 149 150 150 if ((iip->ili_fields & XFS_ILOG_DEXT) && 151 - ip->i_d.di_nextents > 0 && 151 + ip->i_df.if_nextents > 0 && 152 152 ip->i_df.if_bytes > 0) { 153 153 struct xfs_bmbt_rec *p; 154 154 ··· 227 227 struct xfs_inode *ip = iip->ili_inode; 228 228 size_t data_bytes; 229 229 230 - switch (ip->i_d.di_aformat) { 230 + switch (ip->i_afp->if_format) { 231 231 case XFS_DINODE_FMT_EXTENTS: 232 232 iip->ili_fields &= 233 233 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT); 234 234 235 235 if ((iip->ili_fields & XFS_ILOG_AEXT) && 236 - ip->i_d.di_anextents > 0 && 236 + ip->i_afp->if_nextents > 0 && 237 237 ip->i_afp->if_bytes > 0) { 238 238 struct xfs_bmbt_rec *p; 239 239 240 240 ASSERT(xfs_iext_count(ip->i_afp) == 241 - ip->i_d.di_anextents); 241 + ip->i_afp->if_nextents); 242 242 243 243 p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT); 244 244 data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK); ··· 305 305 struct inode *inode = VFS_I(ip); 306 306 307 307 to->di_magic = XFS_DINODE_MAGIC; 308 - to->di_format = from->di_format; 308 + to->di_format = xfs_ifork_format(&ip->i_df); 309 309 to->di_uid = i_uid_read(inode); 310 310 to->di_gid = i_gid_read(inode); 311 311 to->di_projid_lo = from->di_projid & 0xffff; ··· 326 326 to->di_size = from->di_size; 327 327 to->di_nblocks = from->di_nblocks; 328 328 to->di_extsize = from->di_extsize; 329 - to->di_nextents = from->di_nextents; 330 - to->di_anextents = from->di_anextents; 329 + to->di_nextents = xfs_ifork_nextents(&ip->i_df); 330 + to->di_anextents = xfs_ifork_nextents(ip->i_afp); 331 331 to->di_forkoff = from->di_forkoff; 332 - to->di_aformat = from->di_aformat; 332 + to->di_aformat = xfs_ifork_format(ip->i_afp); 333 333 to->di_dmevmask = from->di_dmevmask; 334 334 to->di_dmstate = from->di_dmstate; 335 335 to->di_flags = from->di_flags; ··· 496 496 497 497 if (xfs_ipincount(ip) > 0) 498 498 return XFS_ITEM_PINNED; 499 - 500 - /* 501 - * The buffer containing this item failed to be written back 502 - * previously. Resubmit the buffer for IO. 503 - */ 504 - if (test_bit(XFS_LI_FAILED, &lip->li_flags)) { 505 - if (!xfs_buf_trylock(bp)) 506 - return XFS_ITEM_LOCKED; 507 - 508 - if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list)) 509 - rval = XFS_ITEM_FLUSHING; 510 - 511 - xfs_buf_unlock(bp); 512 - return rval; 513 - } 514 499 515 500 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) 516 501 return XFS_ITEM_LOCKED; ··· 762 777 */ 763 778 void 764 779 xfs_iflush_abort( 765 - xfs_inode_t *ip, 766 - bool stale) 780 + struct xfs_inode *ip) 767 781 { 768 - xfs_inode_log_item_t *iip = ip->i_itemp; 782 + struct xfs_inode_log_item *iip = ip->i_itemp; 769 783 770 784 if (iip) { 771 - if (test_bit(XFS_LI_IN_AIL, &iip->ili_item.li_flags)) { 772 - xfs_trans_ail_remove(&iip->ili_item, 773 - stale ? SHUTDOWN_LOG_IO_ERROR : 774 - SHUTDOWN_CORRUPT_INCORE); 775 - } 785 + xfs_trans_ail_delete(&iip->ili_item, 0); 776 786 iip->ili_logged = 0; 777 787 /* 778 788 * Clear the ili_last_fields bits now that we know that the ··· 792 812 struct xfs_buf *bp, 793 813 struct xfs_log_item *lip) 794 814 { 795 - xfs_iflush_abort(INODE_ITEM(lip)->ili_inode, true); 815 + xfs_iflush_abort(INODE_ITEM(lip)->ili_inode); 796 816 } 797 817 798 818 /*

+3 -3

fs/xfs/xfs_inode_item.h

··· 13 13 struct xfs_inode; 14 14 struct xfs_mount; 15 15 16 - typedef struct xfs_inode_log_item { 16 + struct xfs_inode_log_item { 17 17 struct xfs_log_item ili_item; /* common portion */ 18 18 struct xfs_inode *ili_inode; /* inode ptr */ 19 19 xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ ··· 23 23 unsigned int ili_last_fields; /* fields when flushed */ 24 24 unsigned int ili_fields; /* fields to be logged */ 25 25 unsigned int ili_fsync_fields; /* logged since last fsync */ 26 - } xfs_inode_log_item_t; 26 + }; 27 27 28 28 static inline int xfs_inode_clean(xfs_inode_t *ip) 29 29 { ··· 34 34 extern void xfs_inode_item_destroy(struct xfs_inode *); 35 35 extern void xfs_iflush_done(struct xfs_buf *, struct xfs_log_item *); 36 36 extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *); 37 - extern void xfs_iflush_abort(struct xfs_inode *, bool); 37 + extern void xfs_iflush_abort(struct xfs_inode *); 38 38 extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, 39 39 struct xfs_inode_log_format *); 40 40

+394

fs/xfs/xfs_inode_item_recover.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 + * All Rights Reserved. 5 + */ 6 + #include "xfs.h" 7 + #include "xfs_fs.h" 8 + #include "xfs_shared.h" 9 + #include "xfs_format.h" 10 + #include "xfs_log_format.h" 11 + #include "xfs_trans_resv.h" 12 + #include "xfs_mount.h" 13 + #include "xfs_inode.h" 14 + #include "xfs_trans.h" 15 + #include "xfs_inode_item.h" 16 + #include "xfs_trace.h" 17 + #include "xfs_trans_priv.h" 18 + #include "xfs_buf_item.h" 19 + #include "xfs_log.h" 20 + #include "xfs_error.h" 21 + #include "xfs_log_priv.h" 22 + #include "xfs_log_recover.h" 23 + #include "xfs_icache.h" 24 + #include "xfs_bmap_btree.h" 25 + 26 + STATIC void 27 + xlog_recover_inode_ra_pass2( 28 + struct xlog *log, 29 + struct xlog_recover_item *item) 30 + { 31 + if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { 32 + struct xfs_inode_log_format *ilfp = item->ri_buf[0].i_addr; 33 + 34 + xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len, 35 + &xfs_inode_buf_ra_ops); 36 + } else { 37 + struct xfs_inode_log_format_32 *ilfp = item->ri_buf[0].i_addr; 38 + 39 + xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len, 40 + &xfs_inode_buf_ra_ops); 41 + } 42 + } 43 + 44 + /* 45 + * Inode fork owner changes 46 + * 47 + * If we have been told that we have to reparent the inode fork, it's because an 48 + * extent swap operation on a CRC enabled filesystem has been done and we are 49 + * replaying it. We need to walk the BMBT of the appropriate fork and change the 50 + * owners of it. 51 + * 52 + * The complexity here is that we don't have an inode context to work with, so 53 + * after we've replayed the inode we need to instantiate one. This is where the 54 + * fun begins. 55 + * 56 + * We are in the middle of log recovery, so we can't run transactions. That 57 + * means we cannot use cache coherent inode instantiation via xfs_iget(), as 58 + * that will result in the corresponding iput() running the inode through 59 + * xfs_inactive(). If we've just replayed an inode core that changes the link 60 + * count to zero (i.e. it's been unlinked), then xfs_inactive() will run 61 + * transactions (bad!). 62 + * 63 + * So, to avoid this, we instantiate an inode directly from the inode core we've 64 + * just recovered. We have the buffer still locked, and all we really need to 65 + * instantiate is the inode core and the forks being modified. We can do this 66 + * manually, then run the inode btree owner change, and then tear down the 67 + * xfs_inode without having to run any transactions at all. 68 + * 69 + * Also, because we don't have a transaction context available here but need to 70 + * gather all the buffers we modify for writeback so we pass the buffer_list 71 + * instead for the operation to use. 72 + */ 73 + 74 + STATIC int 75 + xfs_recover_inode_owner_change( 76 + struct xfs_mount *mp, 77 + struct xfs_dinode *dip, 78 + struct xfs_inode_log_format *in_f, 79 + struct list_head *buffer_list) 80 + { 81 + struct xfs_inode *ip; 82 + int error; 83 + 84 + ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)); 85 + 86 + ip = xfs_inode_alloc(mp, in_f->ilf_ino); 87 + if (!ip) 88 + return -ENOMEM; 89 + 90 + /* instantiate the inode */ 91 + ASSERT(dip->di_version >= 3); 92 + 93 + error = xfs_inode_from_disk(ip, dip); 94 + if (error) 95 + goto out_free_ip; 96 + 97 + if (in_f->ilf_fields & XFS_ILOG_DOWNER) { 98 + ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT); 99 + error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK, 100 + ip->i_ino, buffer_list); 101 + if (error) 102 + goto out_free_ip; 103 + } 104 + 105 + if (in_f->ilf_fields & XFS_ILOG_AOWNER) { 106 + ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT); 107 + error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK, 108 + ip->i_ino, buffer_list); 109 + if (error) 110 + goto out_free_ip; 111 + } 112 + 113 + out_free_ip: 114 + xfs_inode_free(ip); 115 + return error; 116 + } 117 + 118 + STATIC int 119 + xlog_recover_inode_commit_pass2( 120 + struct xlog *log, 121 + struct list_head *buffer_list, 122 + struct xlog_recover_item *item, 123 + xfs_lsn_t current_lsn) 124 + { 125 + struct xfs_inode_log_format *in_f; 126 + struct xfs_mount *mp = log->l_mp; 127 + struct xfs_buf *bp; 128 + struct xfs_dinode *dip; 129 + int len; 130 + char *src; 131 + char *dest; 132 + int error; 133 + int attr_index; 134 + uint fields; 135 + struct xfs_log_dinode *ldip; 136 + uint isize; 137 + int need_free = 0; 138 + 139 + if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { 140 + in_f = item->ri_buf[0].i_addr; 141 + } else { 142 + in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), 0); 143 + need_free = 1; 144 + error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); 145 + if (error) 146 + goto error; 147 + } 148 + 149 + /* 150 + * Inode buffers can be freed, look out for it, 151 + * and do not replay the inode. 152 + */ 153 + if (xlog_is_buffer_cancelled(log, in_f->ilf_blkno, in_f->ilf_len)) { 154 + error = 0; 155 + trace_xfs_log_recover_inode_cancel(log, in_f); 156 + goto error; 157 + } 158 + trace_xfs_log_recover_inode_recover(log, in_f); 159 + 160 + error = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 161 + 0, &bp, &xfs_inode_buf_ops); 162 + if (error) 163 + goto error; 164 + ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); 165 + dip = xfs_buf_offset(bp, in_f->ilf_boffset); 166 + 167 + /* 168 + * Make sure the place we're flushing out to really looks 169 + * like an inode! 170 + */ 171 + if (XFS_IS_CORRUPT(mp, !xfs_verify_magic16(bp, dip->di_magic))) { 172 + xfs_alert(mp, 173 + "%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld", 174 + __func__, dip, bp, in_f->ilf_ino); 175 + error = -EFSCORRUPTED; 176 + goto out_release; 177 + } 178 + ldip = item->ri_buf[1].i_addr; 179 + if (XFS_IS_CORRUPT(mp, ldip->di_magic != XFS_DINODE_MAGIC)) { 180 + xfs_alert(mp, 181 + "%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld", 182 + __func__, item, in_f->ilf_ino); 183 + error = -EFSCORRUPTED; 184 + goto out_release; 185 + } 186 + 187 + /* 188 + * If the inode has an LSN in it, recover the inode only if it's less 189 + * than the lsn of the transaction we are replaying. Note: we still 190 + * need to replay an owner change even though the inode is more recent 191 + * than the transaction as there is no guarantee that all the btree 192 + * blocks are more recent than this transaction, too. 193 + */ 194 + if (dip->di_version >= 3) { 195 + xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn); 196 + 197 + if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { 198 + trace_xfs_log_recover_inode_skip(log, in_f); 199 + error = 0; 200 + goto out_owner_change; 201 + } 202 + } 203 + 204 + /* 205 + * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes 206 + * are transactional and if ordering is necessary we can determine that 207 + * more accurately by the LSN field in the V3 inode core. Don't trust 208 + * the inode versions we might be changing them here - use the 209 + * superblock flag to determine whether we need to look at di_flushiter 210 + * to skip replay when the on disk inode is newer than the log one 211 + */ 212 + if (!xfs_sb_version_has_v3inode(&mp->m_sb) && 213 + ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) { 214 + /* 215 + * Deal with the wrap case, DI_MAX_FLUSH is less 216 + * than smaller numbers 217 + */ 218 + if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && 219 + ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) { 220 + /* do nothing */ 221 + } else { 222 + trace_xfs_log_recover_inode_skip(log, in_f); 223 + error = 0; 224 + goto out_release; 225 + } 226 + } 227 + 228 + /* Take the opportunity to reset the flush iteration count */ 229 + ldip->di_flushiter = 0; 230 + 231 + if (unlikely(S_ISREG(ldip->di_mode))) { 232 + if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) && 233 + (ldip->di_format != XFS_DINODE_FMT_BTREE)) { 234 + XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", 235 + XFS_ERRLEVEL_LOW, mp, ldip, 236 + sizeof(*ldip)); 237 + xfs_alert(mp, 238 + "%s: Bad regular inode log record, rec ptr "PTR_FMT", " 239 + "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld", 240 + __func__, item, dip, bp, in_f->ilf_ino); 241 + error = -EFSCORRUPTED; 242 + goto out_release; 243 + } 244 + } else if (unlikely(S_ISDIR(ldip->di_mode))) { 245 + if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) && 246 + (ldip->di_format != XFS_DINODE_FMT_BTREE) && 247 + (ldip->di_format != XFS_DINODE_FMT_LOCAL)) { 248 + XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", 249 + XFS_ERRLEVEL_LOW, mp, ldip, 250 + sizeof(*ldip)); 251 + xfs_alert(mp, 252 + "%s: Bad dir inode log record, rec ptr "PTR_FMT", " 253 + "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld", 254 + __func__, item, dip, bp, in_f->ilf_ino); 255 + error = -EFSCORRUPTED; 256 + goto out_release; 257 + } 258 + } 259 + if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){ 260 + XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", 261 + XFS_ERRLEVEL_LOW, mp, ldip, 262 + sizeof(*ldip)); 263 + xfs_alert(mp, 264 + "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " 265 + "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld", 266 + __func__, item, dip, bp, in_f->ilf_ino, 267 + ldip->di_nextents + ldip->di_anextents, 268 + ldip->di_nblocks); 269 + error = -EFSCORRUPTED; 270 + goto out_release; 271 + } 272 + if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) { 273 + XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", 274 + XFS_ERRLEVEL_LOW, mp, ldip, 275 + sizeof(*ldip)); 276 + xfs_alert(mp, 277 + "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " 278 + "dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__, 279 + item, dip, bp, in_f->ilf_ino, ldip->di_forkoff); 280 + error = -EFSCORRUPTED; 281 + goto out_release; 282 + } 283 + isize = xfs_log_dinode_size(mp); 284 + if (unlikely(item->ri_buf[1].i_len > isize)) { 285 + XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", 286 + XFS_ERRLEVEL_LOW, mp, ldip, 287 + sizeof(*ldip)); 288 + xfs_alert(mp, 289 + "%s: Bad inode log record length %d, rec ptr "PTR_FMT, 290 + __func__, item->ri_buf[1].i_len, item); 291 + error = -EFSCORRUPTED; 292 + goto out_release; 293 + } 294 + 295 + /* recover the log dinode inode into the on disk inode */ 296 + xfs_log_dinode_to_disk(ldip, dip); 297 + 298 + fields = in_f->ilf_fields; 299 + if (fields & XFS_ILOG_DEV) 300 + xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); 301 + 302 + if (in_f->ilf_size == 2) 303 + goto out_owner_change; 304 + len = item->ri_buf[2].i_len; 305 + src = item->ri_buf[2].i_addr; 306 + ASSERT(in_f->ilf_size <= 4); 307 + ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK)); 308 + ASSERT(!(fields & XFS_ILOG_DFORK) || 309 + (len == in_f->ilf_dsize)); 310 + 311 + switch (fields & XFS_ILOG_DFORK) { 312 + case XFS_ILOG_DDATA: 313 + case XFS_ILOG_DEXT: 314 + memcpy(XFS_DFORK_DPTR(dip), src, len); 315 + break; 316 + 317 + case XFS_ILOG_DBROOT: 318 + xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, 319 + (struct xfs_bmdr_block *)XFS_DFORK_DPTR(dip), 320 + XFS_DFORK_DSIZE(dip, mp)); 321 + break; 322 + 323 + default: 324 + /* 325 + * There are no data fork flags set. 326 + */ 327 + ASSERT((fields & XFS_ILOG_DFORK) == 0); 328 + break; 329 + } 330 + 331 + /* 332 + * If we logged any attribute data, recover it. There may or 333 + * may not have been any other non-core data logged in this 334 + * transaction. 335 + */ 336 + if (in_f->ilf_fields & XFS_ILOG_AFORK) { 337 + if (in_f->ilf_fields & XFS_ILOG_DFORK) { 338 + attr_index = 3; 339 + } else { 340 + attr_index = 2; 341 + } 342 + len = item->ri_buf[attr_index].i_len; 343 + src = item->ri_buf[attr_index].i_addr; 344 + ASSERT(len == in_f->ilf_asize); 345 + 346 + switch (in_f->ilf_fields & XFS_ILOG_AFORK) { 347 + case XFS_ILOG_ADATA: 348 + case XFS_ILOG_AEXT: 349 + dest = XFS_DFORK_APTR(dip); 350 + ASSERT(len <= XFS_DFORK_ASIZE(dip, mp)); 351 + memcpy(dest, src, len); 352 + break; 353 + 354 + case XFS_ILOG_ABROOT: 355 + dest = XFS_DFORK_APTR(dip); 356 + xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, 357 + len, (struct xfs_bmdr_block *)dest, 358 + XFS_DFORK_ASIZE(dip, mp)); 359 + break; 360 + 361 + default: 362 + xfs_warn(log->l_mp, "%s: Invalid flag", __func__); 363 + ASSERT(0); 364 + error = -EFSCORRUPTED; 365 + goto out_release; 366 + } 367 + } 368 + 369 + out_owner_change: 370 + /* Recover the swapext owner change unless inode has been deleted */ 371 + if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) && 372 + (dip->di_mode != 0)) 373 + error = xfs_recover_inode_owner_change(mp, dip, in_f, 374 + buffer_list); 375 + /* re-generate the checksum. */ 376 + xfs_dinode_calc_crc(log->l_mp, dip); 377 + 378 + ASSERT(bp->b_mount == mp); 379 + bp->b_iodone = xlog_recover_iodone; 380 + xfs_buf_delwri_queue(bp, buffer_list); 381 + 382 + out_release: 383 + xfs_buf_relse(bp); 384 + error: 385 + if (need_free) 386 + kmem_free(in_f); 387 + return error; 388 + } 389 + 390 + const struct xlog_recover_item_ops xlog_inode_item_ops = { 391 + .item_type = XFS_LI_INODE, 392 + .ra_pass2 = xlog_recover_inode_ra_pass2, 393 + .commit_pass2 = xlog_recover_inode_commit_pass2, 394 + };

+46 -54

fs/xfs/xfs_ioctl.c

··· 1104 1104 bool attr, 1105 1105 struct fsxattr *fa) 1106 1106 { 1107 + struct xfs_ifork *ifp = attr ? ip->i_afp : &ip->i_df; 1108 + 1107 1109 simple_fill_fsxattr(fa, xfs_ip2xflags(ip)); 1108 1110 fa->fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; 1109 1111 fa->fsx_cowextsize = ip->i_d.di_cowextsize << 1110 1112 ip->i_mount->m_sb.sb_blocklog; 1111 1113 fa->fsx_projid = ip->i_d.di_projid; 1112 - 1113 - if (attr) { 1114 - if (ip->i_afp) { 1115 - if (ip->i_afp->if_flags & XFS_IFEXTENTS) 1116 - fa->fsx_nextents = xfs_iext_count(ip->i_afp); 1117 - else 1118 - fa->fsx_nextents = ip->i_d.di_anextents; 1119 - } else 1120 - fa->fsx_nextents = 0; 1121 - } else { 1122 - if (ip->i_df.if_flags & XFS_IFEXTENTS) 1123 - fa->fsx_nextents = xfs_iext_count(&ip->i_df); 1124 - else 1125 - fa->fsx_nextents = ip->i_d.di_nextents; 1126 - } 1114 + if (ifp && (ifp->if_flags & XFS_IFEXTENTS)) 1115 + fa->fsx_nextents = xfs_iext_count(ifp); 1116 + else 1117 + fa->fsx_nextents = xfs_ifork_nextents(ifp); 1127 1118 } 1128 1119 1129 1120 STATIC int ··· 1192 1201 return di_flags2; 1193 1202 } 1194 1203 1195 - STATIC void 1196 - xfs_diflags_to_linux( 1197 - struct xfs_inode *ip) 1198 - { 1199 - struct inode *inode = VFS_I(ip); 1200 - unsigned int xflags = xfs_ip2xflags(ip); 1201 - 1202 - if (xflags & FS_XFLAG_IMMUTABLE) 1203 - inode->i_flags |= S_IMMUTABLE; 1204 - else 1205 - inode->i_flags &= ~S_IMMUTABLE; 1206 - if (xflags & FS_XFLAG_APPEND) 1207 - inode->i_flags |= S_APPEND; 1208 - else 1209 - inode->i_flags &= ~S_APPEND; 1210 - if (xflags & FS_XFLAG_SYNC) 1211 - inode->i_flags |= S_SYNC; 1212 - else 1213 - inode->i_flags &= ~S_SYNC; 1214 - if (xflags & FS_XFLAG_NOATIME) 1215 - inode->i_flags |= S_NOATIME; 1216 - else 1217 - inode->i_flags &= ~S_NOATIME; 1218 - #if 0 /* disabled until the flag switching races are sorted out */ 1219 - if (xflags & FS_XFLAG_DAX) 1220 - inode->i_flags |= S_DAX; 1221 - else 1222 - inode->i_flags &= ~S_DAX; 1223 - #endif 1224 - } 1225 - 1226 1204 static int 1227 1205 xfs_ioctl_setattr_xflags( 1228 1206 struct xfs_trans *tp, ··· 1202 1242 uint64_t di_flags2; 1203 1243 1204 1244 /* Can't change realtime flag if any extents are allocated. */ 1205 - if ((ip->i_d.di_nextents || ip->i_delayed_blks) && 1245 + if ((ip->i_df.if_nextents || ip->i_delayed_blks) && 1206 1246 XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME)) 1207 1247 return -EINVAL; 1208 1248 ··· 1229 1269 ip->i_d.di_flags = xfs_flags2diflags(ip, fa->fsx_xflags); 1230 1270 ip->i_d.di_flags2 = di_flags2; 1231 1271 1232 - xfs_diflags_to_linux(ip); 1272 + xfs_diflags_to_iflags(ip, false); 1233 1273 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 1234 1274 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1235 1275 XFS_STATS_INC(mp, xs_ig_attrchg); ··· 1380 1420 xfs_extlen_t size; 1381 1421 xfs_fsblock_t extsize_fsb; 1382 1422 1383 - if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_d.di_nextents && 1423 + if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents && 1384 1424 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) 1385 1425 return -EINVAL; 1386 1426 ··· 1473 1513 struct fsxattr old_fa; 1474 1514 struct xfs_mount *mp = ip->i_mount; 1475 1515 struct xfs_trans *tp; 1476 - struct xfs_dquot *udqp = NULL; 1477 1516 struct xfs_dquot *pdqp = NULL; 1478 1517 struct xfs_dquot *olddquot = NULL; 1479 1518 int code; ··· 1495 1536 if (XFS_IS_QUOTA_ON(mp)) { 1496 1537 code = xfs_qm_vop_dqalloc(ip, VFS_I(ip)->i_uid, 1497 1538 VFS_I(ip)->i_gid, fa->fsx_projid, 1498 - XFS_QMOPT_PQUOTA, &udqp, NULL, &pdqp); 1539 + XFS_QMOPT_PQUOTA, NULL, NULL, &pdqp); 1499 1540 if (code) 1500 1541 return code; 1501 1542 } ··· 1519 1560 1520 1561 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) && 1521 1562 ip->i_d.di_projid != fa->fsx_projid) { 1522 - code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, pdqp, 1563 + code = xfs_qm_vop_chown_reserve(tp, ip, NULL, NULL, pdqp, 1523 1564 capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); 1524 1565 if (code) /* out of quota */ 1525 1566 goto error_trans_cancel; ··· 1585 1626 * Release any dquot(s) the inode had kept before chown. 1586 1627 */ 1587 1628 xfs_qm_dqrele(olddquot); 1588 - xfs_qm_dqrele(udqp); 1589 1629 xfs_qm_dqrele(pdqp); 1590 1630 1591 1631 return code; ··· 1592 1634 error_trans_cancel: 1593 1635 xfs_trans_cancel(tp); 1594 1636 error_free_dquots: 1595 - xfs_qm_dqrele(udqp); 1596 1637 xfs_qm_dqrele(pdqp); 1597 1638 return code; 1598 1639 } ··· 2037 2080 out: 2038 2081 mnt_drop_write_file(filp); 2039 2082 return error; 2083 + } 2084 + 2085 + static inline int 2086 + xfs_fs_eofblocks_from_user( 2087 + struct xfs_fs_eofblocks *src, 2088 + struct xfs_eofblocks *dst) 2089 + { 2090 + if (src->eof_version != XFS_EOFBLOCKS_VERSION) 2091 + return -EINVAL; 2092 + 2093 + if (src->eof_flags & ~XFS_EOF_FLAGS_VALID) 2094 + return -EINVAL; 2095 + 2096 + if (memchr_inv(&src->pad32, 0, sizeof(src->pad32)) || 2097 + memchr_inv(src->pad64, 0, sizeof(src->pad64))) 2098 + return -EINVAL; 2099 + 2100 + dst->eof_flags = src->eof_flags; 2101 + dst->eof_prid = src->eof_prid; 2102 + dst->eof_min_file_size = src->eof_min_file_size; 2103 + 2104 + dst->eof_uid = INVALID_UID; 2105 + if (src->eof_flags & XFS_EOF_FLAGS_UID) { 2106 + dst->eof_uid = make_kuid(current_user_ns(), src->eof_uid); 2107 + if (!uid_valid(dst->eof_uid)) 2108 + return -EINVAL; 2109 + } 2110 + 2111 + dst->eof_gid = INVALID_GID; 2112 + if (src->eof_flags & XFS_EOF_FLAGS_GID) { 2113 + dst->eof_gid = make_kgid(current_user_ns(), src->eof_gid); 2114 + if (!gid_valid(dst->eof_gid)) 2115 + return -EINVAL; 2116 + } 2117 + return 0; 2040 2118 } 2041 2119 2042 2120 /*

+57 -56

fs/xfs/xfs_iomap.c

··· 352 352 } 353 353 354 354 /* 355 - * If we are doing a write at the end of the file and there are no allocations 356 - * past this one, then extend the allocation out to the file system's write 357 - * iosize. 358 - * 359 355 * If we don't have a user specified preallocation size, dynamically increase 360 356 * the preallocation size as the size of the file grows. Cap the maximum size 361 357 * at a single extent or less if the filesystem is near full. The closer the 362 - * filesystem is to full, the smaller the maximum prealocation. 363 - * 364 - * As an exception we don't do any preallocation at all if the file is smaller 365 - * than the minimum preallocation and we are using the default dynamic 366 - * preallocation scheme, as it is likely this is the only write to the file that 367 - * is going to be done. 368 - * 369 - * We clean up any extra space left over when the file is closed in 370 - * xfs_inactive(). 358 + * filesystem is to being full, the smaller the maximum preallocation. 371 359 */ 372 360 STATIC xfs_fsblock_t 373 361 xfs_iomap_prealloc_size( ··· 365 377 loff_t count, 366 378 struct xfs_iext_cursor *icur) 367 379 { 380 + struct xfs_iext_cursor ncur = *icur; 381 + struct xfs_bmbt_irec prev, got; 368 382 struct xfs_mount *mp = ip->i_mount; 369 383 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 370 384 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 371 - struct xfs_bmbt_irec prev; 372 - int shift = 0; 373 385 int64_t freesp; 374 386 xfs_fsblock_t qblocks; 375 - int qshift = 0; 376 387 xfs_fsblock_t alloc_blocks = 0; 388 + xfs_extlen_t plen; 389 + int shift = 0; 390 + int qshift = 0; 377 391 378 - if (offset + count <= XFS_ISIZE(ip)) 379 - return 0; 380 - 381 - if (!(mp->m_flags & XFS_MOUNT_ALLOCSIZE) && 382 - (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_allocsize_blocks))) 392 + /* 393 + * As an exception we don't do any preallocation at all if the file is 394 + * smaller than the minimum preallocation and we are using the default 395 + * dynamic preallocation scheme, as it is likely this is the only write 396 + * to the file that is going to be done. 397 + */ 398 + if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_allocsize_blocks)) 383 399 return 0; 384 400 385 401 /* 386 - * If an explicit allocsize is set, the file is small, or we 387 - * are writing behind a hole, then use the minimum prealloc: 402 + * Use the minimum preallocation size for small files or if we are 403 + * writing right after a hole. 388 404 */ 389 - if ((mp->m_flags & XFS_MOUNT_ALLOCSIZE) || 390 - XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) || 391 - !xfs_iext_peek_prev_extent(ifp, icur, &prev) || 405 + if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) || 406 + !xfs_iext_prev_extent(ifp, &ncur, &prev) || 392 407 prev.br_startoff + prev.br_blockcount < offset_fsb) 393 408 return mp->m_allocsize_blocks; 394 409 395 410 /* 396 - * Determine the initial size of the preallocation. We are beyond the 397 - * current EOF here, but we need to take into account whether this is 398 - * a sparse write or an extending write when determining the 399 - * preallocation size. Hence we need to look up the extent that ends 400 - * at the current write offset and use the result to determine the 401 - * preallocation size. 402 - * 403 - * If the extent is a hole, then preallocation is essentially disabled. 404 - * Otherwise we take the size of the preceding data extent as the basis 405 - * for the preallocation size. If the size of the extent is greater than 406 - * half the maximum extent length, then use the current offset as the 407 - * basis. This ensures that for large files the preallocation size 408 - * always extends to MAXEXTLEN rather than falling short due to things 409 - * like stripe unit/width alignment of real extents. 411 + * Take the size of the preceding data extents as the basis for the 412 + * preallocation size. Note that we don't care if the previous extents 413 + * are written or not. 410 414 */ 411 - if (prev.br_blockcount <= (MAXEXTLEN >> 1)) 412 - alloc_blocks = prev.br_blockcount << 1; 413 - else 415 + plen = prev.br_blockcount; 416 + while (xfs_iext_prev_extent(ifp, &ncur, &got)) { 417 + if (plen > MAXEXTLEN / 2 || 418 + isnullstartblock(got.br_startblock) || 419 + got.br_startoff + got.br_blockcount != prev.br_startoff || 420 + got.br_startblock + got.br_blockcount != prev.br_startblock) 421 + break; 422 + plen += got.br_blockcount; 423 + prev = got; 424 + } 425 + 426 + /* 427 + * If the size of the extents is greater than half the maximum extent 428 + * length, then use the current offset as the basis. This ensures that 429 + * for large files the preallocation size always extends to MAXEXTLEN 430 + * rather than falling short due to things like stripe unit/width 431 + * alignment of real extents. 432 + */ 433 + alloc_blocks = plen * 2; 434 + if (alloc_blocks > MAXEXTLEN) 414 435 alloc_blocks = XFS_B_TO_FSB(mp, offset); 415 - if (!alloc_blocks) 416 - goto check_writeio; 417 436 qblocks = alloc_blocks; 418 437 419 438 /* 420 439 * MAXEXTLEN is not a power of two value but we round the prealloc down 421 440 * to the nearest power of two value after throttling. To prevent the 422 - * round down from unconditionally reducing the maximum supported prealloc 423 - * size, we round up first, apply appropriate throttling, round down and 424 - * cap the value to MAXEXTLEN. 441 + * round down from unconditionally reducing the maximum supported 442 + * prealloc size, we round up first, apply appropriate throttling, 443 + * round down and cap the value to MAXEXTLEN. 425 444 */ 426 445 alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN), 427 446 alloc_blocks); ··· 489 494 */ 490 495 while (alloc_blocks && alloc_blocks >= freesp) 491 496 alloc_blocks >>= 4; 492 - check_writeio: 493 497 if (alloc_blocks < mp->m_allocsize_blocks) 494 498 alloc_blocks = mp->m_allocsize_blocks; 495 499 trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift, ··· 557 563 xfs_trans_ijoin(tp, ip, 0); 558 564 559 565 error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0, 560 - XFS_QMOPT_RES_REGBLKS); 566 + XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES); 561 567 if (error) 562 568 goto error_on_bmapi_transaction; 563 569 ··· 850 856 851 857 xfs_ilock(ip, XFS_ILOCK_EXCL); 852 858 853 - if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, XFS_DATA_FORK)) || 859 + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(&ip->i_df)) || 854 860 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 855 861 error = -EFSCORRUPTED; 856 862 goto out_unlock; ··· 955 961 if (error) 956 962 goto out_unlock; 957 963 958 - if (eof) { 959 - prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, offset, 960 - count, &icur); 964 + if (eof && offset + count > XFS_ISIZE(ip)) { 965 + /* 966 + * Determine the initial size of the preallocation. 967 + * We clean up any extra preallocation when the file is closed. 968 + */ 969 + if (mp->m_flags & XFS_MOUNT_ALLOCSIZE) 970 + prealloc_blocks = mp->m_allocsize_blocks; 971 + else 972 + prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, 973 + offset, count, &icur); 961 974 if (prealloc_blocks) { 962 975 xfs_extlen_t align; 963 976 xfs_off_t end_offset; ··· 1259 1258 lockmode = xfs_ilock_attr_map_shared(ip); 1260 1259 1261 1260 /* if there are no attribute fork or extents, return ENOENT */ 1262 - if (!XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) { 1261 + if (!XFS_IFORK_Q(ip) || !ip->i_afp->if_nextents) { 1263 1262 error = -ENOENT; 1264 1263 goto out_unlock; 1265 1264 } 1266 1265 1267 - ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL); 1266 + ASSERT(ip->i_afp->if_format != XFS_DINODE_FMT_LOCAL); 1268 1267 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 1269 1268 &nimaps, XFS_BMAPI_ATTRFORK); 1270 1269 out_unlock:

+48 -29

fs/xfs/xfs_iops.c

··· 738 738 if (error) /* out of quota */ 739 739 goto out_cancel; 740 740 } 741 - } 742 741 743 - /* 744 - * Change file ownership. Must be the owner or privileged. 745 - */ 746 - if (mask & (ATTR_UID|ATTR_GID)) { 747 742 /* 748 743 * CAP_FSETID overrides the following restrictions: 749 744 * ··· 872 877 /* 873 878 * Short circuit the truncate case for zero length files. 874 879 */ 875 - if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { 880 + if (newsize == 0 && oldsize == 0 && ip->i_df.if_nextents == 0) { 876 881 if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME))) 877 882 return 0; 878 883 ··· 1238 1243 { 1239 1244 struct xfs_mount *mp = ip->i_mount; 1240 1245 1241 - /* Only supported on non-reflinked files. */ 1242 - if (!S_ISREG(VFS_I(ip)->i_mode) || xfs_is_reflink_inode(ip)) 1246 + /* Only supported on regular files. */ 1247 + if (!S_ISREG(VFS_I(ip)->i_mode)) 1243 1248 return false; 1244 1249 1245 - /* DAX mount option or DAX iflag must be set. */ 1246 - if (!(mp->m_flags & XFS_MOUNT_DAX) && 1247 - !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) 1250 + /* Only supported on non-reflinked files. */ 1251 + if (xfs_is_reflink_inode(ip)) 1248 1252 return false; 1249 1253 1250 1254 /* Block size must match page size */ ··· 1254 1260 return xfs_inode_buftarg(ip)->bt_daxdev != NULL; 1255 1261 } 1256 1262 1257 - STATIC void 1258 - xfs_diflags_to_iflags( 1259 - struct inode *inode, 1260 - struct xfs_inode *ip) 1263 + static bool 1264 + xfs_inode_should_enable_dax( 1265 + struct xfs_inode *ip) 1261 1266 { 1262 - uint16_t flags = ip->i_d.di_flags; 1267 + if (!IS_ENABLED(CONFIG_FS_DAX)) 1268 + return false; 1269 + if (ip->i_mount->m_flags & XFS_MOUNT_DAX_NEVER) 1270 + return false; 1271 + if (!xfs_inode_supports_dax(ip)) 1272 + return false; 1273 + if (ip->i_mount->m_flags & XFS_MOUNT_DAX_ALWAYS) 1274 + return true; 1275 + if (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX) 1276 + return true; 1277 + return false; 1278 + } 1263 1279 1264 - inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC | 1265 - S_NOATIME | S_DAX); 1280 + void 1281 + xfs_diflags_to_iflags( 1282 + struct xfs_inode *ip, 1283 + bool init) 1284 + { 1285 + struct inode *inode = VFS_I(ip); 1286 + unsigned int xflags = xfs_ip2xflags(ip); 1287 + unsigned int flags = 0; 1266 1288 1267 - if (flags & XFS_DIFLAG_IMMUTABLE) 1268 - inode->i_flags |= S_IMMUTABLE; 1269 - if (flags & XFS_DIFLAG_APPEND) 1270 - inode->i_flags |= S_APPEND; 1271 - if (flags & XFS_DIFLAG_SYNC) 1272 - inode->i_flags |= S_SYNC; 1273 - if (flags & XFS_DIFLAG_NOATIME) 1274 - inode->i_flags |= S_NOATIME; 1275 - if (xfs_inode_supports_dax(ip)) 1276 - inode->i_flags |= S_DAX; 1289 + ASSERT(!(IS_DAX(inode) && init)); 1290 + 1291 + if (xflags & FS_XFLAG_IMMUTABLE) 1292 + flags |= S_IMMUTABLE; 1293 + if (xflags & FS_XFLAG_APPEND) 1294 + flags |= S_APPEND; 1295 + if (xflags & FS_XFLAG_SYNC) 1296 + flags |= S_SYNC; 1297 + if (xflags & FS_XFLAG_NOATIME) 1298 + flags |= S_NOATIME; 1299 + if (init && xfs_inode_should_enable_dax(ip)) 1300 + flags |= S_DAX; 1301 + 1302 + /* 1303 + * S_DAX can only be set during inode initialization and is never set by 1304 + * the VFS, so we cannot mask off S_DAX in i_flags. 1305 + */ 1306 + inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC | S_NOATIME); 1307 + inode->i_flags |= flags; 1277 1308 } 1278 1309 1279 1310 /* ··· 1324 1305 inode_fake_hash(inode); 1325 1306 1326 1307 i_size_write(inode, ip->i_d.di_size); 1327 - xfs_diflags_to_iflags(inode, ip); 1308 + xfs_diflags_to_iflags(ip, true); 1328 1309 1329 1310 if (S_ISDIR(inode->i_mode)) { 1330 1311 /*

+3 -3

fs/xfs/xfs_itable.c

··· 104 104 105 105 buf->bs_xflags = xfs_ip2xflags(ip); 106 106 buf->bs_extsize_blks = dic->di_extsize; 107 - buf->bs_extents = dic->di_nextents; 107 + buf->bs_extents = xfs_ifork_nextents(&ip->i_df); 108 108 xfs_bulkstat_health(ip, buf); 109 - buf->bs_aextents = dic->di_anextents; 109 + buf->bs_aextents = xfs_ifork_nextents(ip->i_afp); 110 110 buf->bs_forkoff = XFS_IFORK_BOFF(ip); 111 111 buf->bs_version = XFS_BULKSTAT_VERSION_V5; 112 112 ··· 115 115 buf->bs_cowextsize_blks = dic->di_cowextsize; 116 116 } 117 117 118 - switch (dic->di_format) { 118 + switch (ip->i_df.if_format) { 119 119 case XFS_DINODE_FMT_DEV: 120 120 buf->bs_rdev = sysv_encode_dev(inode->i_rdev); 121 121 buf->bs_blksize = BLKDEV_IOSIZE;

+139 -2422

fs/xfs/xfs_log_recover.c

··· 18 18 #include "xfs_log.h" 19 19 #include "xfs_log_priv.h" 20 20 #include "xfs_log_recover.h" 21 - #include "xfs_inode_item.h" 22 - #include "xfs_extfree_item.h" 23 21 #include "xfs_trans_priv.h" 24 22 #include "xfs_alloc.h" 25 23 #include "xfs_ialloc.h" 26 - #include "xfs_quota.h" 27 24 #include "xfs_trace.h" 28 25 #include "xfs_icache.h" 29 - #include "xfs_bmap_btree.h" 30 26 #include "xfs_error.h" 31 - #include "xfs_dir2.h" 32 - #include "xfs_rmap_item.h" 33 27 #include "xfs_buf_item.h" 34 - #include "xfs_refcount_item.h" 35 - #include "xfs_bmap_item.h" 36 28 37 29 #define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) 38 30 ··· 46 54 STATIC int 47 55 xlog_do_recovery_pass( 48 56 struct xlog *, xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t *); 49 - 50 - /* 51 - * This structure is used during recovery to record the buf log items which 52 - * have been canceled and should not be replayed. 53 - */ 54 - struct xfs_buf_cancel { 55 - xfs_daddr_t bc_blkno; 56 - uint bc_len; 57 - int bc_refcount; 58 - struct list_head bc_list; 59 - }; 60 57 61 58 /* 62 59 * Sector aligned buffer routines for buffer create/read/write/access ··· 265 284 return 0; 266 285 } 267 286 268 - STATIC void 287 + void 269 288 xlog_recover_iodone( 270 289 struct xfs_buf *bp) 271 290 { ··· 1760 1779 return 0; 1761 1780 } 1762 1781 1782 + /* 1783 + * Release the recovered intent item in the AIL that matches the given intent 1784 + * type and intent id. 1785 + */ 1786 + void 1787 + xlog_recover_release_intent( 1788 + struct xlog *log, 1789 + unsigned short intent_type, 1790 + uint64_t intent_id) 1791 + { 1792 + struct xfs_ail_cursor cur; 1793 + struct xfs_log_item *lip; 1794 + struct xfs_ail *ailp = log->l_ailp; 1795 + 1796 + spin_lock(&ailp->ail_lock); 1797 + for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); lip != NULL; 1798 + lip = xfs_trans_ail_cursor_next(ailp, &cur)) { 1799 + if (lip->li_type != intent_type) 1800 + continue; 1801 + if (!lip->li_ops->iop_match(lip, intent_id)) 1802 + continue; 1803 + 1804 + spin_unlock(&ailp->ail_lock); 1805 + lip->li_ops->iop_release(lip); 1806 + spin_lock(&ailp->ail_lock); 1807 + break; 1808 + } 1809 + 1810 + xfs_trans_ail_cursor_done(&cur); 1811 + spin_unlock(&ailp->ail_lock); 1812 + } 1813 + 1763 1814 /****************************************************************************** 1764 1815 * 1765 1816 * Log recover routines 1766 1817 * 1767 1818 ****************************************************************************** 1768 1819 */ 1820 + static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = { 1821 + &xlog_buf_item_ops, 1822 + &xlog_inode_item_ops, 1823 + &xlog_dquot_item_ops, 1824 + &xlog_quotaoff_item_ops, 1825 + &xlog_icreate_item_ops, 1826 + &xlog_efi_item_ops, 1827 + &xlog_efd_item_ops, 1828 + &xlog_rui_item_ops, 1829 + &xlog_rud_item_ops, 1830 + &xlog_cui_item_ops, 1831 + &xlog_cud_item_ops, 1832 + &xlog_bui_item_ops, 1833 + &xlog_bud_item_ops, 1834 + }; 1835 + 1836 + static const struct xlog_recover_item_ops * 1837 + xlog_find_item_ops( 1838 + struct xlog_recover_item *item) 1839 + { 1840 + unsigned int i; 1841 + 1842 + for (i = 0; i < ARRAY_SIZE(xlog_recover_item_ops); i++) 1843 + if (ITEM_TYPE(item) == xlog_recover_item_ops[i]->item_type) 1844 + return xlog_recover_item_ops[i]; 1845 + 1846 + return NULL; 1847 + } 1769 1848 1770 1849 /* 1771 1850 * Sort the log items in the transaction. ··· 1882 1841 struct xlog_recover *trans, 1883 1842 int pass) 1884 1843 { 1885 - xlog_recover_item_t *item, *n; 1844 + struct xlog_recover_item *item, *n; 1886 1845 int error = 0; 1887 1846 LIST_HEAD(sort_list); 1888 1847 LIST_HEAD(cancel_list); 1889 1848 LIST_HEAD(buffer_list); 1890 1849 LIST_HEAD(inode_buffer_list); 1891 - LIST_HEAD(inode_list); 1850 + LIST_HEAD(item_list); 1892 1851 1893 1852 list_splice_init(&trans->r_itemq, &sort_list); 1894 1853 list_for_each_entry_safe(item, n, &sort_list, ri_list) { 1895 - xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; 1854 + enum xlog_recover_reorder fate = XLOG_REORDER_ITEM_LIST; 1896 1855 1897 - switch (ITEM_TYPE(item)) { 1898 - case XFS_LI_ICREATE: 1899 - list_move_tail(&item->ri_list, &buffer_list); 1900 - break; 1901 - case XFS_LI_BUF: 1902 - if (buf_f->blf_flags & XFS_BLF_CANCEL) { 1903 - trace_xfs_log_recover_item_reorder_head(log, 1904 - trans, item, pass); 1905 - list_move(&item->ri_list, &cancel_list); 1906 - break; 1907 - } 1908 - if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { 1909 - list_move(&item->ri_list, &inode_buffer_list); 1910 - break; 1911 - } 1912 - list_move_tail(&item->ri_list, &buffer_list); 1913 - break; 1914 - case XFS_LI_INODE: 1915 - case XFS_LI_DQUOT: 1916 - case XFS_LI_QUOTAOFF: 1917 - case XFS_LI_EFD: 1918 - case XFS_LI_EFI: 1919 - case XFS_LI_RUI: 1920 - case XFS_LI_RUD: 1921 - case XFS_LI_CUI: 1922 - case XFS_LI_CUD: 1923 - case XFS_LI_BUI: 1924 - case XFS_LI_BUD: 1925 - trace_xfs_log_recover_item_reorder_tail(log, 1926 - trans, item, pass); 1927 - list_move_tail(&item->ri_list, &inode_list); 1928 - break; 1929 - default: 1856 + item->ri_ops = xlog_find_item_ops(item); 1857 + if (!item->ri_ops) { 1930 1858 xfs_warn(log->l_mp, 1931 - "%s: unrecognized type of log operation", 1932 - __func__); 1859 + "%s: unrecognized type of log operation (%d)", 1860 + __func__, ITEM_TYPE(item)); 1933 1861 ASSERT(0); 1934 1862 /* 1935 1863 * return the remaining items back to the transaction ··· 1906 1896 */ 1907 1897 if (!list_empty(&sort_list)) 1908 1898 list_splice_init(&sort_list, &trans->r_itemq); 1909 - error = -EIO; 1910 - goto out; 1899 + error = -EFSCORRUPTED; 1900 + break; 1901 + } 1902 + 1903 + if (item->ri_ops->reorder) 1904 + fate = item->ri_ops->reorder(item); 1905 + 1906 + switch (fate) { 1907 + case XLOG_REORDER_BUFFER_LIST: 1908 + list_move_tail(&item->ri_list, &buffer_list); 1909 + break; 1910 + case XLOG_REORDER_CANCEL_LIST: 1911 + trace_xfs_log_recover_item_reorder_head(log, 1912 + trans, item, pass); 1913 + list_move(&item->ri_list, &cancel_list); 1914 + break; 1915 + case XLOG_REORDER_INODE_BUFFER_LIST: 1916 + list_move(&item->ri_list, &inode_buffer_list); 1917 + break; 1918 + case XLOG_REORDER_ITEM_LIST: 1919 + trace_xfs_log_recover_item_reorder_tail(log, 1920 + trans, item, pass); 1921 + list_move_tail(&item->ri_list, &item_list); 1922 + break; 1911 1923 } 1912 1924 } 1913 - out: 1925 + 1914 1926 ASSERT(list_empty(&sort_list)); 1915 1927 if (!list_empty(&buffer_list)) 1916 1928 list_splice(&buffer_list, &trans->r_itemq); 1917 - if (!list_empty(&inode_list)) 1918 - list_splice_tail(&inode_list, &trans->r_itemq); 1929 + if (!list_empty(&item_list)) 1930 + list_splice_tail(&item_list, &trans->r_itemq); 1919 1931 if (!list_empty(&inode_buffer_list)) 1920 1932 list_splice_tail(&inode_buffer_list, &trans->r_itemq); 1921 1933 if (!list_empty(&cancel_list)) ··· 1945 1913 return error; 1946 1914 } 1947 1915 1948 - /* 1949 - * Build up the table of buf cancel records so that we don't replay 1950 - * cancelled data in the second pass. For buffer records that are 1951 - * not cancel records, there is nothing to do here so we just return. 1952 - * 1953 - * If we get a cancel record which is already in the table, this indicates 1954 - * that the buffer was cancelled multiple times. In order to ensure 1955 - * that during pass 2 we keep the record in the table until we reach its 1956 - * last occurrence in the log, we keep a reference count in the cancel 1957 - * record in the table to tell us how many times we expect to see this 1958 - * record during the second pass. 1959 - */ 1960 - STATIC int 1961 - xlog_recover_buffer_pass1( 1962 - struct xlog *log, 1963 - struct xlog_recover_item *item) 1964 - { 1965 - xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; 1966 - struct list_head *bucket; 1967 - struct xfs_buf_cancel *bcp; 1968 - 1969 - if (!xfs_buf_log_check_iovec(&item->ri_buf[0])) { 1970 - xfs_err(log->l_mp, "bad buffer log item size (%d)", 1971 - item->ri_buf[0].i_len); 1972 - return -EFSCORRUPTED; 1973 - } 1974 - 1975 - /* 1976 - * If this isn't a cancel buffer item, then just return. 1977 - */ 1978 - if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { 1979 - trace_xfs_log_recover_buf_not_cancel(log, buf_f); 1980 - return 0; 1981 - } 1982 - 1983 - /* 1984 - * Insert an xfs_buf_cancel record into the hash table of them. 1985 - * If there is already an identical record, bump its reference count. 1986 - */ 1987 - bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno); 1988 - list_for_each_entry(bcp, bucket, bc_list) { 1989 - if (bcp->bc_blkno == buf_f->blf_blkno && 1990 - bcp->bc_len == buf_f->blf_len) { 1991 - bcp->bc_refcount++; 1992 - trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); 1993 - return 0; 1994 - } 1995 - } 1996 - 1997 - bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), 0); 1998 - bcp->bc_blkno = buf_f->blf_blkno; 1999 - bcp->bc_len = buf_f->blf_len; 2000 - bcp->bc_refcount = 1; 2001 - list_add_tail(&bcp->bc_list, bucket); 2002 - 2003 - trace_xfs_log_recover_buf_cancel_add(log, buf_f); 2004 - return 0; 2005 - } 2006 - 2007 - /* 2008 - * Check to see whether the buffer being recovered has a corresponding 2009 - * entry in the buffer cancel record table. If it is, return the cancel 2010 - * buffer structure to the caller. 2011 - */ 2012 - STATIC struct xfs_buf_cancel * 2013 - xlog_peek_buffer_cancelled( 1916 + void 1917 + xlog_buf_readahead( 2014 1918 struct xlog *log, 2015 1919 xfs_daddr_t blkno, 2016 1920 uint len, 2017 - unsigned short flags) 1921 + const struct xfs_buf_ops *ops) 2018 1922 { 2019 - struct list_head *bucket; 2020 - struct xfs_buf_cancel *bcp; 2021 - 2022 - if (!log->l_buf_cancel_table) { 2023 - /* empty table means no cancelled buffers in the log */ 2024 - ASSERT(!(flags & XFS_BLF_CANCEL)); 2025 - return NULL; 2026 - } 2027 - 2028 - bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno); 2029 - list_for_each_entry(bcp, bucket, bc_list) { 2030 - if (bcp->bc_blkno == blkno && bcp->bc_len == len) 2031 - return bcp; 2032 - } 2033 - 2034 - /* 2035 - * We didn't find a corresponding entry in the table, so return 0 so 2036 - * that the buffer is NOT cancelled. 2037 - */ 2038 - ASSERT(!(flags & XFS_BLF_CANCEL)); 2039 - return NULL; 2040 - } 2041 - 2042 - /* 2043 - * If the buffer is being cancelled then return 1 so that it will be cancelled, 2044 - * otherwise return 0. If the buffer is actually a buffer cancel item 2045 - * (XFS_BLF_CANCEL is set), then decrement the refcount on the entry in the 2046 - * table and remove it from the table if this is the last reference. 2047 - * 2048 - * We remove the cancel record from the table when we encounter its last 2049 - * occurrence in the log so that if the same buffer is re-used again after its 2050 - * last cancellation we actually replay the changes made at that point. 2051 - */ 2052 - STATIC int 2053 - xlog_check_buffer_cancelled( 2054 - struct xlog *log, 2055 - xfs_daddr_t blkno, 2056 - uint len, 2057 - unsigned short flags) 2058 - { 2059 - struct xfs_buf_cancel *bcp; 2060 - 2061 - bcp = xlog_peek_buffer_cancelled(log, blkno, len, flags); 2062 - if (!bcp) 2063 - return 0; 2064 - 2065 - /* 2066 - * We've go a match, so return 1 so that the recovery of this buffer 2067 - * is cancelled. If this buffer is actually a buffer cancel log 2068 - * item, then decrement the refcount on the one in the table and 2069 - * remove it if this is the last reference. 2070 - */ 2071 - if (flags & XFS_BLF_CANCEL) { 2072 - if (--bcp->bc_refcount == 0) { 2073 - list_del(&bcp->bc_list); 2074 - kmem_free(bcp); 2075 - } 2076 - } 2077 - return 1; 2078 - } 2079 - 2080 - /* 2081 - * Perform recovery for a buffer full of inodes. In these buffers, the only 2082 - * data which should be recovered is that which corresponds to the 2083 - * di_next_unlinked pointers in the on disk inode structures. The rest of the 2084 - * data for the inodes is always logged through the inodes themselves rather 2085 - * than the inode buffer and is recovered in xlog_recover_inode_pass2(). 2086 - * 2087 - * The only time when buffers full of inodes are fully recovered is when the 2088 - * buffer is full of newly allocated inodes. In this case the buffer will 2089 - * not be marked as an inode buffer and so will be sent to 2090 - * xlog_recover_do_reg_buffer() below during recovery. 2091 - */ 2092 - STATIC int 2093 - xlog_recover_do_inode_buffer( 2094 - struct xfs_mount *mp, 2095 - xlog_recover_item_t *item, 2096 - struct xfs_buf *bp, 2097 - xfs_buf_log_format_t *buf_f) 2098 - { 2099 - int i; 2100 - int item_index = 0; 2101 - int bit = 0; 2102 - int nbits = 0; 2103 - int reg_buf_offset = 0; 2104 - int reg_buf_bytes = 0; 2105 - int next_unlinked_offset; 2106 - int inodes_per_buf; 2107 - xfs_agino_t *logged_nextp; 2108 - xfs_agino_t *buffer_nextp; 2109 - 2110 - trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); 2111 - 2112 - /* 2113 - * Post recovery validation only works properly on CRC enabled 2114 - * filesystems. 2115 - */ 2116 - if (xfs_sb_version_hascrc(&mp->m_sb)) 2117 - bp->b_ops = &xfs_inode_buf_ops; 2118 - 2119 - inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog; 2120 - for (i = 0; i < inodes_per_buf; i++) { 2121 - next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + 2122 - offsetof(xfs_dinode_t, di_next_unlinked); 2123 - 2124 - while (next_unlinked_offset >= 2125 - (reg_buf_offset + reg_buf_bytes)) { 2126 - /* 2127 - * The next di_next_unlinked field is beyond 2128 - * the current logged region. Find the next 2129 - * logged region that contains or is beyond 2130 - * the current di_next_unlinked field. 2131 - */ 2132 - bit += nbits; 2133 - bit = xfs_next_bit(buf_f->blf_data_map, 2134 - buf_f->blf_map_size, bit); 2135 - 2136 - /* 2137 - * If there are no more logged regions in the 2138 - * buffer, then we're done. 2139 - */ 2140 - if (bit == -1) 2141 - return 0; 2142 - 2143 - nbits = xfs_contig_bits(buf_f->blf_data_map, 2144 - buf_f->blf_map_size, bit); 2145 - ASSERT(nbits > 0); 2146 - reg_buf_offset = bit << XFS_BLF_SHIFT; 2147 - reg_buf_bytes = nbits << XFS_BLF_SHIFT; 2148 - item_index++; 2149 - } 2150 - 2151 - /* 2152 - * If the current logged region starts after the current 2153 - * di_next_unlinked field, then move on to the next 2154 - * di_next_unlinked field. 2155 - */ 2156 - if (next_unlinked_offset < reg_buf_offset) 2157 - continue; 2158 - 2159 - ASSERT(item->ri_buf[item_index].i_addr != NULL); 2160 - ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); 2161 - ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length)); 2162 - 2163 - /* 2164 - * The current logged region contains a copy of the 2165 - * current di_next_unlinked field. Extract its value 2166 - * and copy it to the buffer copy. 2167 - */ 2168 - logged_nextp = item->ri_buf[item_index].i_addr + 2169 - next_unlinked_offset - reg_buf_offset; 2170 - if (XFS_IS_CORRUPT(mp, *logged_nextp == 0)) { 2171 - xfs_alert(mp, 2172 - "Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). " 2173 - "Trying to replay bad (0) inode di_next_unlinked field.", 2174 - item, bp); 2175 - return -EFSCORRUPTED; 2176 - } 2177 - 2178 - buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset); 2179 - *buffer_nextp = *logged_nextp; 2180 - 2181 - /* 2182 - * If necessary, recalculate the CRC in the on-disk inode. We 2183 - * have to leave the inode in a consistent state for whoever 2184 - * reads it next.... 2185 - */ 2186 - xfs_dinode_calc_crc(mp, 2187 - xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); 2188 - 2189 - } 2190 - 2191 - return 0; 2192 - } 2193 - 2194 - /* 2195 - * V5 filesystems know the age of the buffer on disk being recovered. We can 2196 - * have newer objects on disk than we are replaying, and so for these cases we 2197 - * don't want to replay the current change as that will make the buffer contents 2198 - * temporarily invalid on disk. 2199 - * 2200 - * The magic number might not match the buffer type we are going to recover 2201 - * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags. Hence 2202 - * extract the LSN of the existing object in the buffer based on it's current 2203 - * magic number. If we don't recognise the magic number in the buffer, then 2204 - * return a LSN of -1 so that the caller knows it was an unrecognised block and 2205 - * so can recover the buffer. 2206 - * 2207 - * Note: we cannot rely solely on magic number matches to determine that the 2208 - * buffer has a valid LSN - we also need to verify that it belongs to this 2209 - * filesystem, so we need to extract the object's LSN and compare it to that 2210 - * which we read from the superblock. If the UUIDs don't match, then we've got a 2211 - * stale metadata block from an old filesystem instance that we need to recover 2212 - * over the top of. 2213 - */ 2214 - static xfs_lsn_t 2215 - xlog_recover_get_buf_lsn( 2216 - struct xfs_mount *mp, 2217 - struct xfs_buf *bp) 2218 - { 2219 - uint32_t magic32; 2220 - uint16_t magic16; 2221 - uint16_t magicda; 2222 - void *blk = bp->b_addr; 2223 - uuid_t *uuid; 2224 - xfs_lsn_t lsn = -1; 2225 - 2226 - /* v4 filesystems always recover immediately */ 2227 - if (!xfs_sb_version_hascrc(&mp->m_sb)) 2228 - goto recover_immediately; 2229 - 2230 - magic32 = be32_to_cpu(*(__be32 *)blk); 2231 - switch (magic32) { 2232 - case XFS_ABTB_CRC_MAGIC: 2233 - case XFS_ABTC_CRC_MAGIC: 2234 - case XFS_ABTB_MAGIC: 2235 - case XFS_ABTC_MAGIC: 2236 - case XFS_RMAP_CRC_MAGIC: 2237 - case XFS_REFC_CRC_MAGIC: 2238 - case XFS_IBT_CRC_MAGIC: 2239 - case XFS_IBT_MAGIC: { 2240 - struct xfs_btree_block *btb = blk; 2241 - 2242 - lsn = be64_to_cpu(btb->bb_u.s.bb_lsn); 2243 - uuid = &btb->bb_u.s.bb_uuid; 2244 - break; 2245 - } 2246 - case XFS_BMAP_CRC_MAGIC: 2247 - case XFS_BMAP_MAGIC: { 2248 - struct xfs_btree_block *btb = blk; 2249 - 2250 - lsn = be64_to_cpu(btb->bb_u.l.bb_lsn); 2251 - uuid = &btb->bb_u.l.bb_uuid; 2252 - break; 2253 - } 2254 - case XFS_AGF_MAGIC: 2255 - lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn); 2256 - uuid = &((struct xfs_agf *)blk)->agf_uuid; 2257 - break; 2258 - case XFS_AGFL_MAGIC: 2259 - lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn); 2260 - uuid = &((struct xfs_agfl *)blk)->agfl_uuid; 2261 - break; 2262 - case XFS_AGI_MAGIC: 2263 - lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn); 2264 - uuid = &((struct xfs_agi *)blk)->agi_uuid; 2265 - break; 2266 - case XFS_SYMLINK_MAGIC: 2267 - lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn); 2268 - uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid; 2269 - break; 2270 - case XFS_DIR3_BLOCK_MAGIC: 2271 - case XFS_DIR3_DATA_MAGIC: 2272 - case XFS_DIR3_FREE_MAGIC: 2273 - lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn); 2274 - uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid; 2275 - break; 2276 - case XFS_ATTR3_RMT_MAGIC: 2277 - /* 2278 - * Remote attr blocks are written synchronously, rather than 2279 - * being logged. That means they do not contain a valid LSN 2280 - * (i.e. transactionally ordered) in them, and hence any time we 2281 - * see a buffer to replay over the top of a remote attribute 2282 - * block we should simply do so. 2283 - */ 2284 - goto recover_immediately; 2285 - case XFS_SB_MAGIC: 2286 - /* 2287 - * superblock uuids are magic. We may or may not have a 2288 - * sb_meta_uuid on disk, but it will be set in the in-core 2289 - * superblock. We set the uuid pointer for verification 2290 - * according to the superblock feature mask to ensure we check 2291 - * the relevant UUID in the superblock. 2292 - */ 2293 - lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); 2294 - if (xfs_sb_version_hasmetauuid(&mp->m_sb)) 2295 - uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid; 2296 - else 2297 - uuid = &((struct xfs_dsb *)blk)->sb_uuid; 2298 - break; 2299 - default: 2300 - break; 2301 - } 2302 - 2303 - if (lsn != (xfs_lsn_t)-1) { 2304 - if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid)) 2305 - goto recover_immediately; 2306 - return lsn; 2307 - } 2308 - 2309 - magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic); 2310 - switch (magicda) { 2311 - case XFS_DIR3_LEAF1_MAGIC: 2312 - case XFS_DIR3_LEAFN_MAGIC: 2313 - case XFS_DA3_NODE_MAGIC: 2314 - lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn); 2315 - uuid = &((struct xfs_da3_blkinfo *)blk)->uuid; 2316 - break; 2317 - default: 2318 - break; 2319 - } 2320 - 2321 - if (lsn != (xfs_lsn_t)-1) { 2322 - if (!uuid_equal(&mp->m_sb.sb_uuid, uuid)) 2323 - goto recover_immediately; 2324 - return lsn; 2325 - } 2326 - 2327 - /* 2328 - * We do individual object checks on dquot and inode buffers as they 2329 - * have their own individual LSN records. Also, we could have a stale 2330 - * buffer here, so we have to at least recognise these buffer types. 2331 - * 2332 - * A notd complexity here is inode unlinked list processing - it logs 2333 - * the inode directly in the buffer, but we don't know which inodes have 2334 - * been modified, and there is no global buffer LSN. Hence we need to 2335 - * recover all inode buffer types immediately. This problem will be 2336 - * fixed by logical logging of the unlinked list modifications. 2337 - */ 2338 - magic16 = be16_to_cpu(*(__be16 *)blk); 2339 - switch (magic16) { 2340 - case XFS_DQUOT_MAGIC: 2341 - case XFS_DINODE_MAGIC: 2342 - goto recover_immediately; 2343 - default: 2344 - break; 2345 - } 2346 - 2347 - /* unknown buffer contents, recover immediately */ 2348 - 2349 - recover_immediately: 2350 - return (xfs_lsn_t)-1; 2351 - 2352 - } 2353 - 2354 - /* 2355 - * Validate the recovered buffer is of the correct type and attach the 2356 - * appropriate buffer operations to them for writeback. Magic numbers are in a 2357 - * few places: 2358 - * the first 16 bits of the buffer (inode buffer, dquot buffer), 2359 - * the first 32 bits of the buffer (most blocks), 2360 - * inside a struct xfs_da_blkinfo at the start of the buffer. 2361 - */ 2362 - static void 2363 - xlog_recover_validate_buf_type( 2364 - struct xfs_mount *mp, 2365 - struct xfs_buf *bp, 2366 - xfs_buf_log_format_t *buf_f, 2367 - xfs_lsn_t current_lsn) 2368 - { 2369 - struct xfs_da_blkinfo *info = bp->b_addr; 2370 - uint32_t magic32; 2371 - uint16_t magic16; 2372 - uint16_t magicda; 2373 - char *warnmsg = NULL; 2374 - 2375 - /* 2376 - * We can only do post recovery validation on items on CRC enabled 2377 - * fielsystems as we need to know when the buffer was written to be able 2378 - * to determine if we should have replayed the item. If we replay old 2379 - * metadata over a newer buffer, then it will enter a temporarily 2380 - * inconsistent state resulting in verification failures. Hence for now 2381 - * just avoid the verification stage for non-crc filesystems 2382 - */ 2383 - if (!xfs_sb_version_hascrc(&mp->m_sb)) 2384 - return; 2385 - 2386 - magic32 = be32_to_cpu(*(__be32 *)bp->b_addr); 2387 - magic16 = be16_to_cpu(*(__be16*)bp->b_addr); 2388 - magicda = be16_to_cpu(info->magic); 2389 - switch (xfs_blft_from_flags(buf_f)) { 2390 - case XFS_BLFT_BTREE_BUF: 2391 - switch (magic32) { 2392 - case XFS_ABTB_CRC_MAGIC: 2393 - case XFS_ABTB_MAGIC: 2394 - bp->b_ops = &xfs_bnobt_buf_ops; 2395 - break; 2396 - case XFS_ABTC_CRC_MAGIC: 2397 - case XFS_ABTC_MAGIC: 2398 - bp->b_ops = &xfs_cntbt_buf_ops; 2399 - break; 2400 - case XFS_IBT_CRC_MAGIC: 2401 - case XFS_IBT_MAGIC: 2402 - bp->b_ops = &xfs_inobt_buf_ops; 2403 - break; 2404 - case XFS_FIBT_CRC_MAGIC: 2405 - case XFS_FIBT_MAGIC: 2406 - bp->b_ops = &xfs_finobt_buf_ops; 2407 - break; 2408 - case XFS_BMAP_CRC_MAGIC: 2409 - case XFS_BMAP_MAGIC: 2410 - bp->b_ops = &xfs_bmbt_buf_ops; 2411 - break; 2412 - case XFS_RMAP_CRC_MAGIC: 2413 - bp->b_ops = &xfs_rmapbt_buf_ops; 2414 - break; 2415 - case XFS_REFC_CRC_MAGIC: 2416 - bp->b_ops = &xfs_refcountbt_buf_ops; 2417 - break; 2418 - default: 2419 - warnmsg = "Bad btree block magic!"; 2420 - break; 2421 - } 2422 - break; 2423 - case XFS_BLFT_AGF_BUF: 2424 - if (magic32 != XFS_AGF_MAGIC) { 2425 - warnmsg = "Bad AGF block magic!"; 2426 - break; 2427 - } 2428 - bp->b_ops = &xfs_agf_buf_ops; 2429 - break; 2430 - case XFS_BLFT_AGFL_BUF: 2431 - if (magic32 != XFS_AGFL_MAGIC) { 2432 - warnmsg = "Bad AGFL block magic!"; 2433 - break; 2434 - } 2435 - bp->b_ops = &xfs_agfl_buf_ops; 2436 - break; 2437 - case XFS_BLFT_AGI_BUF: 2438 - if (magic32 != XFS_AGI_MAGIC) { 2439 - warnmsg = "Bad AGI block magic!"; 2440 - break; 2441 - } 2442 - bp->b_ops = &xfs_agi_buf_ops; 2443 - break; 2444 - case XFS_BLFT_UDQUOT_BUF: 2445 - case XFS_BLFT_PDQUOT_BUF: 2446 - case XFS_BLFT_GDQUOT_BUF: 2447 - #ifdef CONFIG_XFS_QUOTA 2448 - if (magic16 != XFS_DQUOT_MAGIC) { 2449 - warnmsg = "Bad DQUOT block magic!"; 2450 - break; 2451 - } 2452 - bp->b_ops = &xfs_dquot_buf_ops; 2453 - #else 2454 - xfs_alert(mp, 2455 - "Trying to recover dquots without QUOTA support built in!"); 2456 - ASSERT(0); 2457 - #endif 2458 - break; 2459 - case XFS_BLFT_DINO_BUF: 2460 - if (magic16 != XFS_DINODE_MAGIC) { 2461 - warnmsg = "Bad INODE block magic!"; 2462 - break; 2463 - } 2464 - bp->b_ops = &xfs_inode_buf_ops; 2465 - break; 2466 - case XFS_BLFT_SYMLINK_BUF: 2467 - if (magic32 != XFS_SYMLINK_MAGIC) { 2468 - warnmsg = "Bad symlink block magic!"; 2469 - break; 2470 - } 2471 - bp->b_ops = &xfs_symlink_buf_ops; 2472 - break; 2473 - case XFS_BLFT_DIR_BLOCK_BUF: 2474 - if (magic32 != XFS_DIR2_BLOCK_MAGIC && 2475 - magic32 != XFS_DIR3_BLOCK_MAGIC) { 2476 - warnmsg = "Bad dir block magic!"; 2477 - break; 2478 - } 2479 - bp->b_ops = &xfs_dir3_block_buf_ops; 2480 - break; 2481 - case XFS_BLFT_DIR_DATA_BUF: 2482 - if (magic32 != XFS_DIR2_DATA_MAGIC && 2483 - magic32 != XFS_DIR3_DATA_MAGIC) { 2484 - warnmsg = "Bad dir data magic!"; 2485 - break; 2486 - } 2487 - bp->b_ops = &xfs_dir3_data_buf_ops; 2488 - break; 2489 - case XFS_BLFT_DIR_FREE_BUF: 2490 - if (magic32 != XFS_DIR2_FREE_MAGIC && 2491 - magic32 != XFS_DIR3_FREE_MAGIC) { 2492 - warnmsg = "Bad dir3 free magic!"; 2493 - break; 2494 - } 2495 - bp->b_ops = &xfs_dir3_free_buf_ops; 2496 - break; 2497 - case XFS_BLFT_DIR_LEAF1_BUF: 2498 - if (magicda != XFS_DIR2_LEAF1_MAGIC && 2499 - magicda != XFS_DIR3_LEAF1_MAGIC) { 2500 - warnmsg = "Bad dir leaf1 magic!"; 2501 - break; 2502 - } 2503 - bp->b_ops = &xfs_dir3_leaf1_buf_ops; 2504 - break; 2505 - case XFS_BLFT_DIR_LEAFN_BUF: 2506 - if (magicda != XFS_DIR2_LEAFN_MAGIC && 2507 - magicda != XFS_DIR3_LEAFN_MAGIC) { 2508 - warnmsg = "Bad dir leafn magic!"; 2509 - break; 2510 - } 2511 - bp->b_ops = &xfs_dir3_leafn_buf_ops; 2512 - break; 2513 - case XFS_BLFT_DA_NODE_BUF: 2514 - if (magicda != XFS_DA_NODE_MAGIC && 2515 - magicda != XFS_DA3_NODE_MAGIC) { 2516 - warnmsg = "Bad da node magic!"; 2517 - break; 2518 - } 2519 - bp->b_ops = &xfs_da3_node_buf_ops; 2520 - break; 2521 - case XFS_BLFT_ATTR_LEAF_BUF: 2522 - if (magicda != XFS_ATTR_LEAF_MAGIC && 2523 - magicda != XFS_ATTR3_LEAF_MAGIC) { 2524 - warnmsg = "Bad attr leaf magic!"; 2525 - break; 2526 - } 2527 - bp->b_ops = &xfs_attr3_leaf_buf_ops; 2528 - break; 2529 - case XFS_BLFT_ATTR_RMT_BUF: 2530 - if (magic32 != XFS_ATTR3_RMT_MAGIC) { 2531 - warnmsg = "Bad attr remote magic!"; 2532 - break; 2533 - } 2534 - bp->b_ops = &xfs_attr3_rmt_buf_ops; 2535 - break; 2536 - case XFS_BLFT_SB_BUF: 2537 - if (magic32 != XFS_SB_MAGIC) { 2538 - warnmsg = "Bad SB block magic!"; 2539 - break; 2540 - } 2541 - bp->b_ops = &xfs_sb_buf_ops; 2542 - break; 2543 - #ifdef CONFIG_XFS_RT 2544 - case XFS_BLFT_RTBITMAP_BUF: 2545 - case XFS_BLFT_RTSUMMARY_BUF: 2546 - /* no magic numbers for verification of RT buffers */ 2547 - bp->b_ops = &xfs_rtbuf_ops; 2548 - break; 2549 - #endif /* CONFIG_XFS_RT */ 2550 - default: 2551 - xfs_warn(mp, "Unknown buffer type %d!", 2552 - xfs_blft_from_flags(buf_f)); 2553 - break; 2554 - } 2555 - 2556 - /* 2557 - * Nothing else to do in the case of a NULL current LSN as this means 2558 - * the buffer is more recent than the change in the log and will be 2559 - * skipped. 2560 - */ 2561 - if (current_lsn == NULLCOMMITLSN) 2562 - return; 2563 - 2564 - if (warnmsg) { 2565 - xfs_warn(mp, warnmsg); 2566 - ASSERT(0); 2567 - } 2568 - 2569 - /* 2570 - * We must update the metadata LSN of the buffer as it is written out to 2571 - * ensure that older transactions never replay over this one and corrupt 2572 - * the buffer. This can occur if log recovery is interrupted at some 2573 - * point after the current transaction completes, at which point a 2574 - * subsequent mount starts recovery from the beginning. 2575 - * 2576 - * Write verifiers update the metadata LSN from log items attached to 2577 - * the buffer. Therefore, initialize a bli purely to carry the LSN to 2578 - * the verifier. We'll clean it up in our ->iodone() callback. 2579 - */ 2580 - if (bp->b_ops) { 2581 - struct xfs_buf_log_item *bip; 2582 - 2583 - ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone); 2584 - bp->b_iodone = xlog_recover_iodone; 2585 - xfs_buf_item_init(bp, mp); 2586 - bip = bp->b_log_item; 2587 - bip->bli_item.li_lsn = current_lsn; 2588 - } 2589 - } 2590 - 2591 - /* 2592 - * Perform a 'normal' buffer recovery. Each logged region of the 2593 - * buffer should be copied over the corresponding region in the 2594 - * given buffer. The bitmap in the buf log format structure indicates 2595 - * where to place the logged data. 2596 - */ 2597 - STATIC void 2598 - xlog_recover_do_reg_buffer( 2599 - struct xfs_mount *mp, 2600 - xlog_recover_item_t *item, 2601 - struct xfs_buf *bp, 2602 - xfs_buf_log_format_t *buf_f, 2603 - xfs_lsn_t current_lsn) 2604 - { 2605 - int i; 2606 - int bit; 2607 - int nbits; 2608 - xfs_failaddr_t fa; 2609 - const size_t size_disk_dquot = sizeof(struct xfs_disk_dquot); 2610 - 2611 - trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); 2612 - 2613 - bit = 0; 2614 - i = 1; /* 0 is the buf format structure */ 2615 - while (1) { 2616 - bit = xfs_next_bit(buf_f->blf_data_map, 2617 - buf_f->blf_map_size, bit); 2618 - if (bit == -1) 2619 - break; 2620 - nbits = xfs_contig_bits(buf_f->blf_data_map, 2621 - buf_f->blf_map_size, bit); 2622 - ASSERT(nbits > 0); 2623 - ASSERT(item->ri_buf[i].i_addr != NULL); 2624 - ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); 2625 - ASSERT(BBTOB(bp->b_length) >= 2626 - ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); 2627 - 2628 - /* 2629 - * The dirty regions logged in the buffer, even though 2630 - * contiguous, may span multiple chunks. This is because the 2631 - * dirty region may span a physical page boundary in a buffer 2632 - * and hence be split into two separate vectors for writing into 2633 - * the log. Hence we need to trim nbits back to the length of 2634 - * the current region being copied out of the log. 2635 - */ 2636 - if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT)) 2637 - nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT; 2638 - 2639 - /* 2640 - * Do a sanity check if this is a dquot buffer. Just checking 2641 - * the first dquot in the buffer should do. XXXThis is 2642 - * probably a good thing to do for other buf types also. 2643 - */ 2644 - fa = NULL; 2645 - if (buf_f->blf_flags & 2646 - (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { 2647 - if (item->ri_buf[i].i_addr == NULL) { 2648 - xfs_alert(mp, 2649 - "XFS: NULL dquot in %s.", __func__); 2650 - goto next; 2651 - } 2652 - if (item->ri_buf[i].i_len < size_disk_dquot) { 2653 - xfs_alert(mp, 2654 - "XFS: dquot too small (%d) in %s.", 2655 - item->ri_buf[i].i_len, __func__); 2656 - goto next; 2657 - } 2658 - fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr, 2659 - -1, 0); 2660 - if (fa) { 2661 - xfs_alert(mp, 2662 - "dquot corrupt at %pS trying to replay into block 0x%llx", 2663 - fa, bp->b_bn); 2664 - goto next; 2665 - } 2666 - } 2667 - 2668 - memcpy(xfs_buf_offset(bp, 2669 - (uint)bit << XFS_BLF_SHIFT), /* dest */ 2670 - item->ri_buf[i].i_addr, /* source */ 2671 - nbits<<XFS_BLF_SHIFT); /* length */ 2672 - next: 2673 - i++; 2674 - bit += nbits; 2675 - } 2676 - 2677 - /* Shouldn't be any more regions */ 2678 - ASSERT(i == item->ri_total); 2679 - 2680 - xlog_recover_validate_buf_type(mp, bp, buf_f, current_lsn); 2681 - } 2682 - 2683 - /* 2684 - * Perform a dquot buffer recovery. 2685 - * Simple algorithm: if we have found a QUOTAOFF log item of the same type 2686 - * (ie. USR or GRP), then just toss this buffer away; don't recover it. 2687 - * Else, treat it as a regular buffer and do recovery. 2688 - * 2689 - * Return false if the buffer was tossed and true if we recovered the buffer to 2690 - * indicate to the caller if the buffer needs writing. 2691 - */ 2692 - STATIC bool 2693 - xlog_recover_do_dquot_buffer( 2694 - struct xfs_mount *mp, 2695 - struct xlog *log, 2696 - struct xlog_recover_item *item, 2697 - struct xfs_buf *bp, 2698 - struct xfs_buf_log_format *buf_f) 2699 - { 2700 - uint type; 2701 - 2702 - trace_xfs_log_recover_buf_dquot_buf(log, buf_f); 2703 - 2704 - /* 2705 - * Filesystems are required to send in quota flags at mount time. 2706 - */ 2707 - if (!mp->m_qflags) 2708 - return false; 2709 - 2710 - type = 0; 2711 - if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF) 2712 - type |= XFS_DQ_USER; 2713 - if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF) 2714 - type |= XFS_DQ_PROJ; 2715 - if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF) 2716 - type |= XFS_DQ_GROUP; 2717 - /* 2718 - * This type of quotas was turned off, so ignore this buffer 2719 - */ 2720 - if (log->l_quotaoffs_flag & type) 2721 - return false; 2722 - 2723 - xlog_recover_do_reg_buffer(mp, item, bp, buf_f, NULLCOMMITLSN); 2724 - return true; 2725 - } 2726 - 2727 - /* 2728 - * This routine replays a modification made to a buffer at runtime. 2729 - * There are actually two types of buffer, regular and inode, which 2730 - * are handled differently. Inode buffers are handled differently 2731 - * in that we only recover a specific set of data from them, namely 2732 - * the inode di_next_unlinked fields. This is because all other inode 2733 - * data is actually logged via inode records and any data we replay 2734 - * here which overlaps that may be stale. 2735 - * 2736 - * When meta-data buffers are freed at run time we log a buffer item 2737 - * with the XFS_BLF_CANCEL bit set to indicate that previous copies 2738 - * of the buffer in the log should not be replayed at recovery time. 2739 - * This is so that if the blocks covered by the buffer are reused for 2740 - * file data before we crash we don't end up replaying old, freed 2741 - * meta-data into a user's file. 2742 - * 2743 - * To handle the cancellation of buffer log items, we make two passes 2744 - * over the log during recovery. During the first we build a table of 2745 - * those buffers which have been cancelled, and during the second we 2746 - * only replay those buffers which do not have corresponding cancel 2747 - * records in the table. See xlog_recover_buffer_pass[1,2] above 2748 - * for more details on the implementation of the table of cancel records. 2749 - */ 2750 - STATIC int 2751 - xlog_recover_buffer_pass2( 2752 - struct xlog *log, 2753 - struct list_head *buffer_list, 2754 - struct xlog_recover_item *item, 2755 - xfs_lsn_t current_lsn) 2756 - { 2757 - xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; 2758 - xfs_mount_t *mp = log->l_mp; 2759 - xfs_buf_t *bp; 2760 - int error; 2761 - uint buf_flags; 2762 - xfs_lsn_t lsn; 2763 - 2764 - /* 2765 - * In this pass we only want to recover all the buffers which have 2766 - * not been cancelled and are not cancellation buffers themselves. 2767 - */ 2768 - if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno, 2769 - buf_f->blf_len, buf_f->blf_flags)) { 2770 - trace_xfs_log_recover_buf_cancel(log, buf_f); 2771 - return 0; 2772 - } 2773 - 2774 - trace_xfs_log_recover_buf_recover(log, buf_f); 2775 - 2776 - buf_flags = 0; 2777 - if (buf_f->blf_flags & XFS_BLF_INODE_BUF) 2778 - buf_flags |= XBF_UNMAPPED; 2779 - 2780 - error = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, 2781 - buf_flags, &bp, NULL); 2782 - if (error) 2783 - return error; 2784 - 2785 - /* 2786 - * Recover the buffer only if we get an LSN from it and it's less than 2787 - * the lsn of the transaction we are replaying. 2788 - * 2789 - * Note that we have to be extremely careful of readahead here. 2790 - * Readahead does not attach verfiers to the buffers so if we don't 2791 - * actually do any replay after readahead because of the LSN we found 2792 - * in the buffer if more recent than that current transaction then we 2793 - * need to attach the verifier directly. Failure to do so can lead to 2794 - * future recovery actions (e.g. EFI and unlinked list recovery) can 2795 - * operate on the buffers and they won't get the verifier attached. This 2796 - * can lead to blocks on disk having the correct content but a stale 2797 - * CRC. 2798 - * 2799 - * It is safe to assume these clean buffers are currently up to date. 2800 - * If the buffer is dirtied by a later transaction being replayed, then 2801 - * the verifier will be reset to match whatever recover turns that 2802 - * buffer into. 2803 - */ 2804 - lsn = xlog_recover_get_buf_lsn(mp, bp); 2805 - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { 2806 - trace_xfs_log_recover_buf_skip(log, buf_f); 2807 - xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN); 2808 - goto out_release; 2809 - } 2810 - 2811 - if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { 2812 - error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); 2813 - if (error) 2814 - goto out_release; 2815 - } else if (buf_f->blf_flags & 2816 - (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { 2817 - bool dirty; 2818 - 2819 - dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); 2820 - if (!dirty) 2821 - goto out_release; 2822 - } else { 2823 - xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn); 2824 - } 2825 - 2826 - /* 2827 - * Perform delayed write on the buffer. Asynchronous writes will be 2828 - * slower when taking into account all the buffers to be flushed. 2829 - * 2830 - * Also make sure that only inode buffers with good sizes stay in 2831 - * the buffer cache. The kernel moves inodes in buffers of 1 block 2832 - * or inode_cluster_size bytes, whichever is bigger. The inode 2833 - * buffers in the log can be a different size if the log was generated 2834 - * by an older kernel using unclustered inode buffers or a newer kernel 2835 - * running with a different inode cluster size. Regardless, if the 2836 - * the inode buffer size isn't max(blocksize, inode_cluster_size) 2837 - * for *our* value of inode_cluster_size, then we need to keep 2838 - * the buffer out of the buffer cache so that the buffer won't 2839 - * overlap with future reads of those inodes. 2840 - */ 2841 - if (XFS_DINODE_MAGIC == 2842 - be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && 2843 - (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) { 2844 - xfs_buf_stale(bp); 2845 - error = xfs_bwrite(bp); 2846 - } else { 2847 - ASSERT(bp->b_mount == mp); 2848 - bp->b_iodone = xlog_recover_iodone; 2849 - xfs_buf_delwri_queue(bp, buffer_list); 2850 - } 2851 - 2852 - out_release: 2853 - xfs_buf_relse(bp); 2854 - return error; 2855 - } 2856 - 2857 - /* 2858 - * Inode fork owner changes 2859 - * 2860 - * If we have been told that we have to reparent the inode fork, it's because an 2861 - * extent swap operation on a CRC enabled filesystem has been done and we are 2862 - * replaying it. We need to walk the BMBT of the appropriate fork and change the 2863 - * owners of it. 2864 - * 2865 - * The complexity here is that we don't have an inode context to work with, so 2866 - * after we've replayed the inode we need to instantiate one. This is where the 2867 - * fun begins. 2868 - * 2869 - * We are in the middle of log recovery, so we can't run transactions. That 2870 - * means we cannot use cache coherent inode instantiation via xfs_iget(), as 2871 - * that will result in the corresponding iput() running the inode through 2872 - * xfs_inactive(). If we've just replayed an inode core that changes the link 2873 - * count to zero (i.e. it's been unlinked), then xfs_inactive() will run 2874 - * transactions (bad!). 2875 - * 2876 - * So, to avoid this, we instantiate an inode directly from the inode core we've 2877 - * just recovered. We have the buffer still locked, and all we really need to 2878 - * instantiate is the inode core and the forks being modified. We can do this 2879 - * manually, then run the inode btree owner change, and then tear down the 2880 - * xfs_inode without having to run any transactions at all. 2881 - * 2882 - * Also, because we don't have a transaction context available here but need to 2883 - * gather all the buffers we modify for writeback so we pass the buffer_list 2884 - * instead for the operation to use. 2885 - */ 2886 - 2887 - STATIC int 2888 - xfs_recover_inode_owner_change( 2889 - struct xfs_mount *mp, 2890 - struct xfs_dinode *dip, 2891 - struct xfs_inode_log_format *in_f, 2892 - struct list_head *buffer_list) 2893 - { 2894 - struct xfs_inode *ip; 2895 - int error; 2896 - 2897 - ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)); 2898 - 2899 - ip = xfs_inode_alloc(mp, in_f->ilf_ino); 2900 - if (!ip) 2901 - return -ENOMEM; 2902 - 2903 - /* instantiate the inode */ 2904 - ASSERT(dip->di_version >= 3); 2905 - xfs_inode_from_disk(ip, dip); 2906 - 2907 - error = xfs_iformat_fork(ip, dip); 2908 - if (error) 2909 - goto out_free_ip; 2910 - 2911 - if (!xfs_inode_verify_forks(ip)) { 2912 - error = -EFSCORRUPTED; 2913 - goto out_free_ip; 2914 - } 2915 - 2916 - if (in_f->ilf_fields & XFS_ILOG_DOWNER) { 2917 - ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT); 2918 - error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK, 2919 - ip->i_ino, buffer_list); 2920 - if (error) 2921 - goto out_free_ip; 2922 - } 2923 - 2924 - if (in_f->ilf_fields & XFS_ILOG_AOWNER) { 2925 - ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT); 2926 - error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK, 2927 - ip->i_ino, buffer_list); 2928 - if (error) 2929 - goto out_free_ip; 2930 - } 2931 - 2932 - out_free_ip: 2933 - xfs_inode_free(ip); 2934 - return error; 2935 - } 2936 - 2937 - STATIC int 2938 - xlog_recover_inode_pass2( 2939 - struct xlog *log, 2940 - struct list_head *buffer_list, 2941 - struct xlog_recover_item *item, 2942 - xfs_lsn_t current_lsn) 2943 - { 2944 - struct xfs_inode_log_format *in_f; 2945 - xfs_mount_t *mp = log->l_mp; 2946 - xfs_buf_t *bp; 2947 - xfs_dinode_t *dip; 2948 - int len; 2949 - char *src; 2950 - char *dest; 2951 - int error; 2952 - int attr_index; 2953 - uint fields; 2954 - struct xfs_log_dinode *ldip; 2955 - uint isize; 2956 - int need_free = 0; 2957 - 2958 - if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { 2959 - in_f = item->ri_buf[0].i_addr; 2960 - } else { 2961 - in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), 0); 2962 - need_free = 1; 2963 - error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); 2964 - if (error) 2965 - goto error; 2966 - } 2967 - 2968 - /* 2969 - * Inode buffers can be freed, look out for it, 2970 - * and do not replay the inode. 2971 - */ 2972 - if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, 2973 - in_f->ilf_len, 0)) { 2974 - error = 0; 2975 - trace_xfs_log_recover_inode_cancel(log, in_f); 2976 - goto error; 2977 - } 2978 - trace_xfs_log_recover_inode_recover(log, in_f); 2979 - 2980 - error = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 2981 - 0, &bp, &xfs_inode_buf_ops); 2982 - if (error) 2983 - goto error; 2984 - ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); 2985 - dip = xfs_buf_offset(bp, in_f->ilf_boffset); 2986 - 2987 - /* 2988 - * Make sure the place we're flushing out to really looks 2989 - * like an inode! 2990 - */ 2991 - if (XFS_IS_CORRUPT(mp, !xfs_verify_magic16(bp, dip->di_magic))) { 2992 - xfs_alert(mp, 2993 - "%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld", 2994 - __func__, dip, bp, in_f->ilf_ino); 2995 - error = -EFSCORRUPTED; 2996 - goto out_release; 2997 - } 2998 - ldip = item->ri_buf[1].i_addr; 2999 - if (XFS_IS_CORRUPT(mp, ldip->di_magic != XFS_DINODE_MAGIC)) { 3000 - xfs_alert(mp, 3001 - "%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld", 3002 - __func__, item, in_f->ilf_ino); 3003 - error = -EFSCORRUPTED; 3004 - goto out_release; 3005 - } 3006 - 3007 - /* 3008 - * If the inode has an LSN in it, recover the inode only if it's less 3009 - * than the lsn of the transaction we are replaying. Note: we still 3010 - * need to replay an owner change even though the inode is more recent 3011 - * than the transaction as there is no guarantee that all the btree 3012 - * blocks are more recent than this transaction, too. 3013 - */ 3014 - if (dip->di_version >= 3) { 3015 - xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn); 3016 - 3017 - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { 3018 - trace_xfs_log_recover_inode_skip(log, in_f); 3019 - error = 0; 3020 - goto out_owner_change; 3021 - } 3022 - } 3023 - 3024 - /* 3025 - * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes 3026 - * are transactional and if ordering is necessary we can determine that 3027 - * more accurately by the LSN field in the V3 inode core. Don't trust 3028 - * the inode versions we might be changing them here - use the 3029 - * superblock flag to determine whether we need to look at di_flushiter 3030 - * to skip replay when the on disk inode is newer than the log one 3031 - */ 3032 - if (!xfs_sb_version_has_v3inode(&mp->m_sb) && 3033 - ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) { 3034 - /* 3035 - * Deal with the wrap case, DI_MAX_FLUSH is less 3036 - * than smaller numbers 3037 - */ 3038 - if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && 3039 - ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) { 3040 - /* do nothing */ 3041 - } else { 3042 - trace_xfs_log_recover_inode_skip(log, in_f); 3043 - error = 0; 3044 - goto out_release; 3045 - } 3046 - } 3047 - 3048 - /* Take the opportunity to reset the flush iteration count */ 3049 - ldip->di_flushiter = 0; 3050 - 3051 - if (unlikely(S_ISREG(ldip->di_mode))) { 3052 - if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) && 3053 - (ldip->di_format != XFS_DINODE_FMT_BTREE)) { 3054 - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", 3055 - XFS_ERRLEVEL_LOW, mp, ldip, 3056 - sizeof(*ldip)); 3057 - xfs_alert(mp, 3058 - "%s: Bad regular inode log record, rec ptr "PTR_FMT", " 3059 - "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld", 3060 - __func__, item, dip, bp, in_f->ilf_ino); 3061 - error = -EFSCORRUPTED; 3062 - goto out_release; 3063 - } 3064 - } else if (unlikely(S_ISDIR(ldip->di_mode))) { 3065 - if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) && 3066 - (ldip->di_format != XFS_DINODE_FMT_BTREE) && 3067 - (ldip->di_format != XFS_DINODE_FMT_LOCAL)) { 3068 - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", 3069 - XFS_ERRLEVEL_LOW, mp, ldip, 3070 - sizeof(*ldip)); 3071 - xfs_alert(mp, 3072 - "%s: Bad dir inode log record, rec ptr "PTR_FMT", " 3073 - "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld", 3074 - __func__, item, dip, bp, in_f->ilf_ino); 3075 - error = -EFSCORRUPTED; 3076 - goto out_release; 3077 - } 3078 - } 3079 - if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){ 3080 - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", 3081 - XFS_ERRLEVEL_LOW, mp, ldip, 3082 - sizeof(*ldip)); 3083 - xfs_alert(mp, 3084 - "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " 3085 - "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld", 3086 - __func__, item, dip, bp, in_f->ilf_ino, 3087 - ldip->di_nextents + ldip->di_anextents, 3088 - ldip->di_nblocks); 3089 - error = -EFSCORRUPTED; 3090 - goto out_release; 3091 - } 3092 - if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) { 3093 - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", 3094 - XFS_ERRLEVEL_LOW, mp, ldip, 3095 - sizeof(*ldip)); 3096 - xfs_alert(mp, 3097 - "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " 3098 - "dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__, 3099 - item, dip, bp, in_f->ilf_ino, ldip->di_forkoff); 3100 - error = -EFSCORRUPTED; 3101 - goto out_release; 3102 - } 3103 - isize = xfs_log_dinode_size(mp); 3104 - if (unlikely(item->ri_buf[1].i_len > isize)) { 3105 - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", 3106 - XFS_ERRLEVEL_LOW, mp, ldip, 3107 - sizeof(*ldip)); 3108 - xfs_alert(mp, 3109 - "%s: Bad inode log record length %d, rec ptr "PTR_FMT, 3110 - __func__, item->ri_buf[1].i_len, item); 3111 - error = -EFSCORRUPTED; 3112 - goto out_release; 3113 - } 3114 - 3115 - /* recover the log dinode inode into the on disk inode */ 3116 - xfs_log_dinode_to_disk(ldip, dip); 3117 - 3118 - fields = in_f->ilf_fields; 3119 - if (fields & XFS_ILOG_DEV) 3120 - xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); 3121 - 3122 - if (in_f->ilf_size == 2) 3123 - goto out_owner_change; 3124 - len = item->ri_buf[2].i_len; 3125 - src = item->ri_buf[2].i_addr; 3126 - ASSERT(in_f->ilf_size <= 4); 3127 - ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK)); 3128 - ASSERT(!(fields & XFS_ILOG_DFORK) || 3129 - (len == in_f->ilf_dsize)); 3130 - 3131 - switch (fields & XFS_ILOG_DFORK) { 3132 - case XFS_ILOG_DDATA: 3133 - case XFS_ILOG_DEXT: 3134 - memcpy(XFS_DFORK_DPTR(dip), src, len); 3135 - break; 3136 - 3137 - case XFS_ILOG_DBROOT: 3138 - xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, 3139 - (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip), 3140 - XFS_DFORK_DSIZE(dip, mp)); 3141 - break; 3142 - 3143 - default: 3144 - /* 3145 - * There are no data fork flags set. 3146 - */ 3147 - ASSERT((fields & XFS_ILOG_DFORK) == 0); 3148 - break; 3149 - } 3150 - 3151 - /* 3152 - * If we logged any attribute data, recover it. There may or 3153 - * may not have been any other non-core data logged in this 3154 - * transaction. 3155 - */ 3156 - if (in_f->ilf_fields & XFS_ILOG_AFORK) { 3157 - if (in_f->ilf_fields & XFS_ILOG_DFORK) { 3158 - attr_index = 3; 3159 - } else { 3160 - attr_index = 2; 3161 - } 3162 - len = item->ri_buf[attr_index].i_len; 3163 - src = item->ri_buf[attr_index].i_addr; 3164 - ASSERT(len == in_f->ilf_asize); 3165 - 3166 - switch (in_f->ilf_fields & XFS_ILOG_AFORK) { 3167 - case XFS_ILOG_ADATA: 3168 - case XFS_ILOG_AEXT: 3169 - dest = XFS_DFORK_APTR(dip); 3170 - ASSERT(len <= XFS_DFORK_ASIZE(dip, mp)); 3171 - memcpy(dest, src, len); 3172 - break; 3173 - 3174 - case XFS_ILOG_ABROOT: 3175 - dest = XFS_DFORK_APTR(dip); 3176 - xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, 3177 - len, (xfs_bmdr_block_t*)dest, 3178 - XFS_DFORK_ASIZE(dip, mp)); 3179 - break; 3180 - 3181 - default: 3182 - xfs_warn(log->l_mp, "%s: Invalid flag", __func__); 3183 - ASSERT(0); 3184 - error = -EFSCORRUPTED; 3185 - goto out_release; 3186 - } 3187 - } 3188 - 3189 - out_owner_change: 3190 - /* Recover the swapext owner change unless inode has been deleted */ 3191 - if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) && 3192 - (dip->di_mode != 0)) 3193 - error = xfs_recover_inode_owner_change(mp, dip, in_f, 3194 - buffer_list); 3195 - /* re-generate the checksum. */ 3196 - xfs_dinode_calc_crc(log->l_mp, dip); 3197 - 3198 - ASSERT(bp->b_mount == mp); 3199 - bp->b_iodone = xlog_recover_iodone; 3200 - xfs_buf_delwri_queue(bp, buffer_list); 3201 - 3202 - out_release: 3203 - xfs_buf_relse(bp); 3204 - error: 3205 - if (need_free) 3206 - kmem_free(in_f); 3207 - return error; 3208 - } 3209 - 3210 - /* 3211 - * Recover QUOTAOFF records. We simply make a note of it in the xlog 3212 - * structure, so that we know not to do any dquot item or dquot buffer recovery, 3213 - * of that type. 3214 - */ 3215 - STATIC int 3216 - xlog_recover_quotaoff_pass1( 3217 - struct xlog *log, 3218 - struct xlog_recover_item *item) 3219 - { 3220 - xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr; 3221 - ASSERT(qoff_f); 3222 - 3223 - /* 3224 - * The logitem format's flag tells us if this was user quotaoff, 3225 - * group/project quotaoff or both. 3226 - */ 3227 - if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) 3228 - log->l_quotaoffs_flag |= XFS_DQ_USER; 3229 - if (qoff_f->qf_flags & XFS_PQUOTA_ACCT) 3230 - log->l_quotaoffs_flag |= XFS_DQ_PROJ; 3231 - if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) 3232 - log->l_quotaoffs_flag |= XFS_DQ_GROUP; 3233 - 3234 - return 0; 3235 - } 3236 - 3237 - /* 3238 - * Recover a dquot record 3239 - */ 3240 - STATIC int 3241 - xlog_recover_dquot_pass2( 3242 - struct xlog *log, 3243 - struct list_head *buffer_list, 3244 - struct xlog_recover_item *item, 3245 - xfs_lsn_t current_lsn) 3246 - { 3247 - xfs_mount_t *mp = log->l_mp; 3248 - xfs_buf_t *bp; 3249 - struct xfs_disk_dquot *ddq, *recddq; 3250 - xfs_failaddr_t fa; 3251 - int error; 3252 - xfs_dq_logformat_t *dq_f; 3253 - uint type; 3254 - 3255 - 3256 - /* 3257 - * Filesystems are required to send in quota flags at mount time. 3258 - */ 3259 - if (mp->m_qflags == 0) 3260 - return 0; 3261 - 3262 - recddq = item->ri_buf[1].i_addr; 3263 - if (recddq == NULL) { 3264 - xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); 3265 - return -EFSCORRUPTED; 3266 - } 3267 - if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) { 3268 - xfs_alert(log->l_mp, "dquot too small (%d) in %s.", 3269 - item->ri_buf[1].i_len, __func__); 3270 - return -EFSCORRUPTED; 3271 - } 3272 - 3273 - /* 3274 - * This type of quotas was turned off, so ignore this record. 3275 - */ 3276 - type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); 3277 - ASSERT(type); 3278 - if (log->l_quotaoffs_flag & type) 3279 - return 0; 3280 - 3281 - /* 3282 - * At this point we know that quota was _not_ turned off. 3283 - * Since the mount flags are not indicating to us otherwise, this 3284 - * must mean that quota is on, and the dquot needs to be replayed. 3285 - * Remember that we may not have fully recovered the superblock yet, 3286 - * so we can't do the usual trick of looking at the SB quota bits. 3287 - * 3288 - * The other possibility, of course, is that the quota subsystem was 3289 - * removed since the last mount - ENOSYS. 3290 - */ 3291 - dq_f = item->ri_buf[0].i_addr; 3292 - ASSERT(dq_f); 3293 - fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0); 3294 - if (fa) { 3295 - xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS", 3296 - dq_f->qlf_id, fa); 3297 - return -EFSCORRUPTED; 3298 - } 3299 - ASSERT(dq_f->qlf_len == 1); 3300 - 3301 - /* 3302 - * At this point we are assuming that the dquots have been allocated 3303 - * and hence the buffer has valid dquots stamped in it. It should, 3304 - * therefore, pass verifier validation. If the dquot is bad, then the 3305 - * we'll return an error here, so we don't need to specifically check 3306 - * the dquot in the buffer after the verifier has run. 3307 - */ 3308 - error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno, 3309 - XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp, 3310 - &xfs_dquot_buf_ops); 3311 - if (error) 3312 - return error; 3313 - 3314 - ASSERT(bp); 3315 - ddq = xfs_buf_offset(bp, dq_f->qlf_boffset); 3316 - 3317 - /* 3318 - * If the dquot has an LSN in it, recover the dquot only if it's less 3319 - * than the lsn of the transaction we are replaying. 3320 - */ 3321 - if (xfs_sb_version_hascrc(&mp->m_sb)) { 3322 - struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq; 3323 - xfs_lsn_t lsn = be64_to_cpu(dqb->dd_lsn); 3324 - 3325 - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { 3326 - goto out_release; 3327 - } 3328 - } 3329 - 3330 - memcpy(ddq, recddq, item->ri_buf[1].i_len); 3331 - if (xfs_sb_version_hascrc(&mp->m_sb)) { 3332 - xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk), 3333 - XFS_DQUOT_CRC_OFF); 3334 - } 3335 - 3336 - ASSERT(dq_f->qlf_size == 2); 3337 - ASSERT(bp->b_mount == mp); 3338 - bp->b_iodone = xlog_recover_iodone; 3339 - xfs_buf_delwri_queue(bp, buffer_list); 3340 - 3341 - out_release: 3342 - xfs_buf_relse(bp); 3343 - return 0; 3344 - } 3345 - 3346 - /* 3347 - * This routine is called to create an in-core extent free intent 3348 - * item from the efi format structure which was logged on disk. 3349 - * It allocates an in-core efi, copies the extents from the format 3350 - * structure into it, and adds the efi to the AIL with the given 3351 - * LSN. 3352 - */ 3353 - STATIC int 3354 - xlog_recover_efi_pass2( 3355 - struct xlog *log, 3356 - struct xlog_recover_item *item, 3357 - xfs_lsn_t lsn) 3358 - { 3359 - int error; 3360 - struct xfs_mount *mp = log->l_mp; 3361 - struct xfs_efi_log_item *efip; 3362 - struct xfs_efi_log_format *efi_formatp; 3363 - 3364 - efi_formatp = item->ri_buf[0].i_addr; 3365 - 3366 - efip = xfs_efi_init(mp, efi_formatp->efi_nextents); 3367 - error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format); 3368 - if (error) { 3369 - xfs_efi_item_free(efip); 3370 - return error; 3371 - } 3372 - atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents); 3373 - 3374 - spin_lock(&log->l_ailp->ail_lock); 3375 - /* 3376 - * The EFI has two references. One for the EFD and one for EFI to ensure 3377 - * it makes it into the AIL. Insert the EFI into the AIL directly and 3378 - * drop the EFI reference. Note that xfs_trans_ail_update() drops the 3379 - * AIL lock. 3380 - */ 3381 - xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn); 3382 - xfs_efi_release(efip); 3383 - return 0; 3384 - } 3385 - 3386 - 3387 - /* 3388 - * This routine is called when an EFD format structure is found in a committed 3389 - * transaction in the log. Its purpose is to cancel the corresponding EFI if it 3390 - * was still in the log. To do this it searches the AIL for the EFI with an id 3391 - * equal to that in the EFD format structure. If we find it we drop the EFD 3392 - * reference, which removes the EFI from the AIL and frees it. 3393 - */ 3394 - STATIC int 3395 - xlog_recover_efd_pass2( 3396 - struct xlog *log, 3397 - struct xlog_recover_item *item) 3398 - { 3399 - xfs_efd_log_format_t *efd_formatp; 3400 - xfs_efi_log_item_t *efip = NULL; 3401 - struct xfs_log_item *lip; 3402 - uint64_t efi_id; 3403 - struct xfs_ail_cursor cur; 3404 - struct xfs_ail *ailp = log->l_ailp; 3405 - 3406 - efd_formatp = item->ri_buf[0].i_addr; 3407 - ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + 3408 - ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || 3409 - (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + 3410 - ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); 3411 - efi_id = efd_formatp->efd_efi_id; 3412 - 3413 - /* 3414 - * Search for the EFI with the id in the EFD format structure in the 3415 - * AIL. 3416 - */ 3417 - spin_lock(&ailp->ail_lock); 3418 - lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 3419 - while (lip != NULL) { 3420 - if (lip->li_type == XFS_LI_EFI) { 3421 - efip = (xfs_efi_log_item_t *)lip; 3422 - if (efip->efi_format.efi_id == efi_id) { 3423 - /* 3424 - * Drop the EFD reference to the EFI. This 3425 - * removes the EFI from the AIL and frees it. 3426 - */ 3427 - spin_unlock(&ailp->ail_lock); 3428 - xfs_efi_release(efip); 3429 - spin_lock(&ailp->ail_lock); 3430 - break; 3431 - } 3432 - } 3433 - lip = xfs_trans_ail_cursor_next(ailp, &cur); 3434 - } 3435 - 3436 - xfs_trans_ail_cursor_done(&cur); 3437 - spin_unlock(&ailp->ail_lock); 3438 - 3439 - return 0; 3440 - } 3441 - 3442 - /* 3443 - * This routine is called to create an in-core extent rmap update 3444 - * item from the rui format structure which was logged on disk. 3445 - * It allocates an in-core rui, copies the extents from the format 3446 - * structure into it, and adds the rui to the AIL with the given 3447 - * LSN. 3448 - */ 3449 - STATIC int 3450 - xlog_recover_rui_pass2( 3451 - struct xlog *log, 3452 - struct xlog_recover_item *item, 3453 - xfs_lsn_t lsn) 3454 - { 3455 - int error; 3456 - struct xfs_mount *mp = log->l_mp; 3457 - struct xfs_rui_log_item *ruip; 3458 - struct xfs_rui_log_format *rui_formatp; 3459 - 3460 - rui_formatp = item->ri_buf[0].i_addr; 3461 - 3462 - ruip = xfs_rui_init(mp, rui_formatp->rui_nextents); 3463 - error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format); 3464 - if (error) { 3465 - xfs_rui_item_free(ruip); 3466 - return error; 3467 - } 3468 - atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents); 3469 - 3470 - spin_lock(&log->l_ailp->ail_lock); 3471 - /* 3472 - * The RUI has two references. One for the RUD and one for RUI to ensure 3473 - * it makes it into the AIL. Insert the RUI into the AIL directly and 3474 - * drop the RUI reference. Note that xfs_trans_ail_update() drops the 3475 - * AIL lock. 3476 - */ 3477 - xfs_trans_ail_update(log->l_ailp, &ruip->rui_item, lsn); 3478 - xfs_rui_release(ruip); 3479 - return 0; 3480 - } 3481 - 3482 - 3483 - /* 3484 - * This routine is called when an RUD format structure is found in a committed 3485 - * transaction in the log. Its purpose is to cancel the corresponding RUI if it 3486 - * was still in the log. To do this it searches the AIL for the RUI with an id 3487 - * equal to that in the RUD format structure. If we find it we drop the RUD 3488 - * reference, which removes the RUI from the AIL and frees it. 3489 - */ 3490 - STATIC int 3491 - xlog_recover_rud_pass2( 3492 - struct xlog *log, 3493 - struct xlog_recover_item *item) 3494 - { 3495 - struct xfs_rud_log_format *rud_formatp; 3496 - struct xfs_rui_log_item *ruip = NULL; 3497 - struct xfs_log_item *lip; 3498 - uint64_t rui_id; 3499 - struct xfs_ail_cursor cur; 3500 - struct xfs_ail *ailp = log->l_ailp; 3501 - 3502 - rud_formatp = item->ri_buf[0].i_addr; 3503 - ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format)); 3504 - rui_id = rud_formatp->rud_rui_id; 3505 - 3506 - /* 3507 - * Search for the RUI with the id in the RUD format structure in the 3508 - * AIL. 3509 - */ 3510 - spin_lock(&ailp->ail_lock); 3511 - lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 3512 - while (lip != NULL) { 3513 - if (lip->li_type == XFS_LI_RUI) { 3514 - ruip = (struct xfs_rui_log_item *)lip; 3515 - if (ruip->rui_format.rui_id == rui_id) { 3516 - /* 3517 - * Drop the RUD reference to the RUI. This 3518 - * removes the RUI from the AIL and frees it. 3519 - */ 3520 - spin_unlock(&ailp->ail_lock); 3521 - xfs_rui_release(ruip); 3522 - spin_lock(&ailp->ail_lock); 3523 - break; 3524 - } 3525 - } 3526 - lip = xfs_trans_ail_cursor_next(ailp, &cur); 3527 - } 3528 - 3529 - xfs_trans_ail_cursor_done(&cur); 3530 - spin_unlock(&ailp->ail_lock); 3531 - 3532 - return 0; 3533 - } 3534 - 3535 - /* 3536 - * Copy an CUI format buffer from the given buf, and into the destination 3537 - * CUI format structure. The CUI/CUD items were designed not to need any 3538 - * special alignment handling. 3539 - */ 3540 - static int 3541 - xfs_cui_copy_format( 3542 - struct xfs_log_iovec *buf, 3543 - struct xfs_cui_log_format *dst_cui_fmt) 3544 - { 3545 - struct xfs_cui_log_format *src_cui_fmt; 3546 - uint len; 3547 - 3548 - src_cui_fmt = buf->i_addr; 3549 - len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents); 3550 - 3551 - if (buf->i_len == len) { 3552 - memcpy(dst_cui_fmt, src_cui_fmt, len); 3553 - return 0; 3554 - } 3555 - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); 3556 - return -EFSCORRUPTED; 3557 - } 3558 - 3559 - /* 3560 - * This routine is called to create an in-core extent refcount update 3561 - * item from the cui format structure which was logged on disk. 3562 - * It allocates an in-core cui, copies the extents from the format 3563 - * structure into it, and adds the cui to the AIL with the given 3564 - * LSN. 3565 - */ 3566 - STATIC int 3567 - xlog_recover_cui_pass2( 3568 - struct xlog *log, 3569 - struct xlog_recover_item *item, 3570 - xfs_lsn_t lsn) 3571 - { 3572 - int error; 3573 - struct xfs_mount *mp = log->l_mp; 3574 - struct xfs_cui_log_item *cuip; 3575 - struct xfs_cui_log_format *cui_formatp; 3576 - 3577 - cui_formatp = item->ri_buf[0].i_addr; 3578 - 3579 - cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); 3580 - error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format); 3581 - if (error) { 3582 - xfs_cui_item_free(cuip); 3583 - return error; 3584 - } 3585 - atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents); 3586 - 3587 - spin_lock(&log->l_ailp->ail_lock); 3588 - /* 3589 - * The CUI has two references. One for the CUD and one for CUI to ensure 3590 - * it makes it into the AIL. Insert the CUI into the AIL directly and 3591 - * drop the CUI reference. Note that xfs_trans_ail_update() drops the 3592 - * AIL lock. 3593 - */ 3594 - xfs_trans_ail_update(log->l_ailp, &cuip->cui_item, lsn); 3595 - xfs_cui_release(cuip); 3596 - return 0; 3597 - } 3598 - 3599 - 3600 - /* 3601 - * This routine is called when an CUD format structure is found in a committed 3602 - * transaction in the log. Its purpose is to cancel the corresponding CUI if it 3603 - * was still in the log. To do this it searches the AIL for the CUI with an id 3604 - * equal to that in the CUD format structure. If we find it we drop the CUD 3605 - * reference, which removes the CUI from the AIL and frees it. 3606 - */ 3607 - STATIC int 3608 - xlog_recover_cud_pass2( 3609 - struct xlog *log, 3610 - struct xlog_recover_item *item) 3611 - { 3612 - struct xfs_cud_log_format *cud_formatp; 3613 - struct xfs_cui_log_item *cuip = NULL; 3614 - struct xfs_log_item *lip; 3615 - uint64_t cui_id; 3616 - struct xfs_ail_cursor cur; 3617 - struct xfs_ail *ailp = log->l_ailp; 3618 - 3619 - cud_formatp = item->ri_buf[0].i_addr; 3620 - if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) { 3621 - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); 3622 - return -EFSCORRUPTED; 3623 - } 3624 - cui_id = cud_formatp->cud_cui_id; 3625 - 3626 - /* 3627 - * Search for the CUI with the id in the CUD format structure in the 3628 - * AIL. 3629 - */ 3630 - spin_lock(&ailp->ail_lock); 3631 - lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 3632 - while (lip != NULL) { 3633 - if (lip->li_type == XFS_LI_CUI) { 3634 - cuip = (struct xfs_cui_log_item *)lip; 3635 - if (cuip->cui_format.cui_id == cui_id) { 3636 - /* 3637 - * Drop the CUD reference to the CUI. This 3638 - * removes the CUI from the AIL and frees it. 3639 - */ 3640 - spin_unlock(&ailp->ail_lock); 3641 - xfs_cui_release(cuip); 3642 - spin_lock(&ailp->ail_lock); 3643 - break; 3644 - } 3645 - } 3646 - lip = xfs_trans_ail_cursor_next(ailp, &cur); 3647 - } 3648 - 3649 - xfs_trans_ail_cursor_done(&cur); 3650 - spin_unlock(&ailp->ail_lock); 3651 - 3652 - return 0; 3653 - } 3654 - 3655 - /* 3656 - * Copy an BUI format buffer from the given buf, and into the destination 3657 - * BUI format structure. The BUI/BUD items were designed not to need any 3658 - * special alignment handling. 3659 - */ 3660 - static int 3661 - xfs_bui_copy_format( 3662 - struct xfs_log_iovec *buf, 3663 - struct xfs_bui_log_format *dst_bui_fmt) 3664 - { 3665 - struct xfs_bui_log_format *src_bui_fmt; 3666 - uint len; 3667 - 3668 - src_bui_fmt = buf->i_addr; 3669 - len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents); 3670 - 3671 - if (buf->i_len == len) { 3672 - memcpy(dst_bui_fmt, src_bui_fmt, len); 3673 - return 0; 3674 - } 3675 - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); 3676 - return -EFSCORRUPTED; 3677 - } 3678 - 3679 - /* 3680 - * This routine is called to create an in-core extent bmap update 3681 - * item from the bui format structure which was logged on disk. 3682 - * It allocates an in-core bui, copies the extents from the format 3683 - * structure into it, and adds the bui to the AIL with the given 3684 - * LSN. 3685 - */ 3686 - STATIC int 3687 - xlog_recover_bui_pass2( 3688 - struct xlog *log, 3689 - struct xlog_recover_item *item, 3690 - xfs_lsn_t lsn) 3691 - { 3692 - int error; 3693 - struct xfs_mount *mp = log->l_mp; 3694 - struct xfs_bui_log_item *buip; 3695 - struct xfs_bui_log_format *bui_formatp; 3696 - 3697 - bui_formatp = item->ri_buf[0].i_addr; 3698 - 3699 - if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { 3700 - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); 3701 - return -EFSCORRUPTED; 3702 - } 3703 - buip = xfs_bui_init(mp); 3704 - error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format); 3705 - if (error) { 3706 - xfs_bui_item_free(buip); 3707 - return error; 3708 - } 3709 - atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents); 3710 - 3711 - spin_lock(&log->l_ailp->ail_lock); 3712 - /* 3713 - * The RUI has two references. One for the RUD and one for RUI to ensure 3714 - * it makes it into the AIL. Insert the RUI into the AIL directly and 3715 - * drop the RUI reference. Note that xfs_trans_ail_update() drops the 3716 - * AIL lock. 3717 - */ 3718 - xfs_trans_ail_update(log->l_ailp, &buip->bui_item, lsn); 3719 - xfs_bui_release(buip); 3720 - return 0; 3721 - } 3722 - 3723 - 3724 - /* 3725 - * This routine is called when an BUD format structure is found in a committed 3726 - * transaction in the log. Its purpose is to cancel the corresponding BUI if it 3727 - * was still in the log. To do this it searches the AIL for the BUI with an id 3728 - * equal to that in the BUD format structure. If we find it we drop the BUD 3729 - * reference, which removes the BUI from the AIL and frees it. 3730 - */ 3731 - STATIC int 3732 - xlog_recover_bud_pass2( 3733 - struct xlog *log, 3734 - struct xlog_recover_item *item) 3735 - { 3736 - struct xfs_bud_log_format *bud_formatp; 3737 - struct xfs_bui_log_item *buip = NULL; 3738 - struct xfs_log_item *lip; 3739 - uint64_t bui_id; 3740 - struct xfs_ail_cursor cur; 3741 - struct xfs_ail *ailp = log->l_ailp; 3742 - 3743 - bud_formatp = item->ri_buf[0].i_addr; 3744 - if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) { 3745 - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); 3746 - return -EFSCORRUPTED; 3747 - } 3748 - bui_id = bud_formatp->bud_bui_id; 3749 - 3750 - /* 3751 - * Search for the BUI with the id in the BUD format structure in the 3752 - * AIL. 3753 - */ 3754 - spin_lock(&ailp->ail_lock); 3755 - lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 3756 - while (lip != NULL) { 3757 - if (lip->li_type == XFS_LI_BUI) { 3758 - buip = (struct xfs_bui_log_item *)lip; 3759 - if (buip->bui_format.bui_id == bui_id) { 3760 - /* 3761 - * Drop the BUD reference to the BUI. This 3762 - * removes the BUI from the AIL and frees it. 3763 - */ 3764 - spin_unlock(&ailp->ail_lock); 3765 - xfs_bui_release(buip); 3766 - spin_lock(&ailp->ail_lock); 3767 - break; 3768 - } 3769 - } 3770 - lip = xfs_trans_ail_cursor_next(ailp, &cur); 3771 - } 3772 - 3773 - xfs_trans_ail_cursor_done(&cur); 3774 - spin_unlock(&ailp->ail_lock); 3775 - 3776 - return 0; 3777 - } 3778 - 3779 - /* 3780 - * This routine is called when an inode create format structure is found in a 3781 - * committed transaction in the log. It's purpose is to initialise the inodes 3782 - * being allocated on disk. This requires us to get inode cluster buffers that 3783 - * match the range to be initialised, stamped with inode templates and written 3784 - * by delayed write so that subsequent modifications will hit the cached buffer 3785 - * and only need writing out at the end of recovery. 3786 - */ 3787 - STATIC int 3788 - xlog_recover_do_icreate_pass2( 3789 - struct xlog *log, 3790 - struct list_head *buffer_list, 3791 - xlog_recover_item_t *item) 3792 - { 3793 - struct xfs_mount *mp = log->l_mp; 3794 - struct xfs_icreate_log *icl; 3795 - struct xfs_ino_geometry *igeo = M_IGEO(mp); 3796 - xfs_agnumber_t agno; 3797 - xfs_agblock_t agbno; 3798 - unsigned int count; 3799 - unsigned int isize; 3800 - xfs_agblock_t length; 3801 - int bb_per_cluster; 3802 - int cancel_count; 3803 - int nbufs; 3804 - int i; 3805 - 3806 - icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; 3807 - if (icl->icl_type != XFS_LI_ICREATE) { 3808 - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type"); 3809 - return -EINVAL; 3810 - } 3811 - 3812 - if (icl->icl_size != 1) { 3813 - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size"); 3814 - return -EINVAL; 3815 - } 3816 - 3817 - agno = be32_to_cpu(icl->icl_ag); 3818 - if (agno >= mp->m_sb.sb_agcount) { 3819 - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno"); 3820 - return -EINVAL; 3821 - } 3822 - agbno = be32_to_cpu(icl->icl_agbno); 3823 - if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) { 3824 - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno"); 3825 - return -EINVAL; 3826 - } 3827 - isize = be32_to_cpu(icl->icl_isize); 3828 - if (isize != mp->m_sb.sb_inodesize) { 3829 - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize"); 3830 - return -EINVAL; 3831 - } 3832 - count = be32_to_cpu(icl->icl_count); 3833 - if (!count) { 3834 - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count"); 3835 - return -EINVAL; 3836 - } 3837 - length = be32_to_cpu(icl->icl_length); 3838 - if (!length || length >= mp->m_sb.sb_agblocks) { 3839 - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length"); 3840 - return -EINVAL; 3841 - } 3842 - 3843 - /* 3844 - * The inode chunk is either full or sparse and we only support 3845 - * m_ino_geo.ialloc_min_blks sized sparse allocations at this time. 3846 - */ 3847 - if (length != igeo->ialloc_blks && 3848 - length != igeo->ialloc_min_blks) { 3849 - xfs_warn(log->l_mp, 3850 - "%s: unsupported chunk length", __FUNCTION__); 3851 - return -EINVAL; 3852 - } 3853 - 3854 - /* verify inode count is consistent with extent length */ 3855 - if ((count >> mp->m_sb.sb_inopblog) != length) { 3856 - xfs_warn(log->l_mp, 3857 - "%s: inconsistent inode count and chunk length", 3858 - __FUNCTION__); 3859 - return -EINVAL; 3860 - } 3861 - 3862 - /* 3863 - * The icreate transaction can cover multiple cluster buffers and these 3864 - * buffers could have been freed and reused. Check the individual 3865 - * buffers for cancellation so we don't overwrite anything written after 3866 - * a cancellation. 3867 - */ 3868 - bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster); 3869 - nbufs = length / igeo->blocks_per_cluster; 3870 - for (i = 0, cancel_count = 0; i < nbufs; i++) { 3871 - xfs_daddr_t daddr; 3872 - 3873 - daddr = XFS_AGB_TO_DADDR(mp, agno, 3874 - agbno + i * igeo->blocks_per_cluster); 3875 - if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0)) 3876 - cancel_count++; 3877 - } 3878 - 3879 - /* 3880 - * We currently only use icreate for a single allocation at a time. This 3881 - * means we should expect either all or none of the buffers to be 3882 - * cancelled. Be conservative and skip replay if at least one buffer is 3883 - * cancelled, but warn the user that something is awry if the buffers 3884 - * are not consistent. 3885 - * 3886 - * XXX: This must be refined to only skip cancelled clusters once we use 3887 - * icreate for multiple chunk allocations. 3888 - */ 3889 - ASSERT(!cancel_count || cancel_count == nbufs); 3890 - if (cancel_count) { 3891 - if (cancel_count != nbufs) 3892 - xfs_warn(mp, 3893 - "WARNING: partial inode chunk cancellation, skipped icreate."); 3894 - trace_xfs_log_recover_icreate_cancel(log, icl); 3895 - return 0; 3896 - } 3897 - 3898 - trace_xfs_log_recover_icreate_recover(log, icl); 3899 - return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, 3900 - length, be32_to_cpu(icl->icl_gen)); 3901 - } 3902 - 3903 - STATIC void 3904 - xlog_recover_buffer_ra_pass2( 3905 - struct xlog *log, 3906 - struct xlog_recover_item *item) 3907 - { 3908 - struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr; 3909 - struct xfs_mount *mp = log->l_mp; 3910 - 3911 - if (xlog_peek_buffer_cancelled(log, buf_f->blf_blkno, 3912 - buf_f->blf_len, buf_f->blf_flags)) { 3913 - return; 3914 - } 3915 - 3916 - xfs_buf_readahead(mp->m_ddev_targp, buf_f->blf_blkno, 3917 - buf_f->blf_len, NULL); 3918 - } 3919 - 3920 - STATIC void 3921 - xlog_recover_inode_ra_pass2( 3922 - struct xlog *log, 3923 - struct xlog_recover_item *item) 3924 - { 3925 - struct xfs_inode_log_format ilf_buf; 3926 - struct xfs_inode_log_format *ilfp; 3927 - struct xfs_mount *mp = log->l_mp; 3928 - int error; 3929 - 3930 - if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { 3931 - ilfp = item->ri_buf[0].i_addr; 3932 - } else { 3933 - ilfp = &ilf_buf; 3934 - memset(ilfp, 0, sizeof(*ilfp)); 3935 - error = xfs_inode_item_format_convert(&item->ri_buf[0], ilfp); 3936 - if (error) 3937 - return; 3938 - } 3939 - 3940 - if (xlog_peek_buffer_cancelled(log, ilfp->ilf_blkno, ilfp->ilf_len, 0)) 3941 - return; 3942 - 3943 - xfs_buf_readahead(mp->m_ddev_targp, ilfp->ilf_blkno, 3944 - ilfp->ilf_len, &xfs_inode_buf_ra_ops); 3945 - } 3946 - 3947 - STATIC void 3948 - xlog_recover_dquot_ra_pass2( 3949 - struct xlog *log, 3950 - struct xlog_recover_item *item) 3951 - { 3952 - struct xfs_mount *mp = log->l_mp; 3953 - struct xfs_disk_dquot *recddq; 3954 - struct xfs_dq_logformat *dq_f; 3955 - uint type; 3956 - int len; 3957 - 3958 - 3959 - if (mp->m_qflags == 0) 3960 - return; 3961 - 3962 - recddq = item->ri_buf[1].i_addr; 3963 - if (recddq == NULL) 3964 - return; 3965 - if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) 3966 - return; 3967 - 3968 - type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); 3969 - ASSERT(type); 3970 - if (log->l_quotaoffs_flag & type) 3971 - return; 3972 - 3973 - dq_f = item->ri_buf[0].i_addr; 3974 - ASSERT(dq_f); 3975 - ASSERT(dq_f->qlf_len == 1); 3976 - 3977 - len = XFS_FSB_TO_BB(mp, dq_f->qlf_len); 3978 - if (xlog_peek_buffer_cancelled(log, dq_f->qlf_blkno, len, 0)) 3979 - return; 3980 - 3981 - xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, len, 3982 - &xfs_dquot_buf_ra_ops); 3983 - } 3984 - 3985 - STATIC void 3986 - xlog_recover_ra_pass2( 3987 - struct xlog *log, 3988 - struct xlog_recover_item *item) 3989 - { 3990 - switch (ITEM_TYPE(item)) { 3991 - case XFS_LI_BUF: 3992 - xlog_recover_buffer_ra_pass2(log, item); 3993 - break; 3994 - case XFS_LI_INODE: 3995 - xlog_recover_inode_ra_pass2(log, item); 3996 - break; 3997 - case XFS_LI_DQUOT: 3998 - xlog_recover_dquot_ra_pass2(log, item); 3999 - break; 4000 - case XFS_LI_EFI: 4001 - case XFS_LI_EFD: 4002 - case XFS_LI_QUOTAOFF: 4003 - case XFS_LI_RUI: 4004 - case XFS_LI_RUD: 4005 - case XFS_LI_CUI: 4006 - case XFS_LI_CUD: 4007 - case XFS_LI_BUI: 4008 - case XFS_LI_BUD: 4009 - default: 4010 - break; 4011 - } 4012 - } 4013 - 4014 - STATIC int 4015 - xlog_recover_commit_pass1( 4016 - struct xlog *log, 4017 - struct xlog_recover *trans, 4018 - struct xlog_recover_item *item) 4019 - { 4020 - trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1); 4021 - 4022 - switch (ITEM_TYPE(item)) { 4023 - case XFS_LI_BUF: 4024 - return xlog_recover_buffer_pass1(log, item); 4025 - case XFS_LI_QUOTAOFF: 4026 - return xlog_recover_quotaoff_pass1(log, item); 4027 - case XFS_LI_INODE: 4028 - case XFS_LI_EFI: 4029 - case XFS_LI_EFD: 4030 - case XFS_LI_DQUOT: 4031 - case XFS_LI_ICREATE: 4032 - case XFS_LI_RUI: 4033 - case XFS_LI_RUD: 4034 - case XFS_LI_CUI: 4035 - case XFS_LI_CUD: 4036 - case XFS_LI_BUI: 4037 - case XFS_LI_BUD: 4038 - /* nothing to do in pass 1 */ 4039 - return 0; 4040 - default: 4041 - xfs_warn(log->l_mp, "%s: invalid item type (%d)", 4042 - __func__, ITEM_TYPE(item)); 4043 - ASSERT(0); 4044 - return -EFSCORRUPTED; 4045 - } 4046 - } 4047 - 4048 - STATIC int 4049 - xlog_recover_commit_pass2( 4050 - struct xlog *log, 4051 - struct xlog_recover *trans, 4052 - struct list_head *buffer_list, 4053 - struct xlog_recover_item *item) 4054 - { 4055 - trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); 4056 - 4057 - switch (ITEM_TYPE(item)) { 4058 - case XFS_LI_BUF: 4059 - return xlog_recover_buffer_pass2(log, buffer_list, item, 4060 - trans->r_lsn); 4061 - case XFS_LI_INODE: 4062 - return xlog_recover_inode_pass2(log, buffer_list, item, 4063 - trans->r_lsn); 4064 - case XFS_LI_EFI: 4065 - return xlog_recover_efi_pass2(log, item, trans->r_lsn); 4066 - case XFS_LI_EFD: 4067 - return xlog_recover_efd_pass2(log, item); 4068 - case XFS_LI_RUI: 4069 - return xlog_recover_rui_pass2(log, item, trans->r_lsn); 4070 - case XFS_LI_RUD: 4071 - return xlog_recover_rud_pass2(log, item); 4072 - case XFS_LI_CUI: 4073 - return xlog_recover_cui_pass2(log, item, trans->r_lsn); 4074 - case XFS_LI_CUD: 4075 - return xlog_recover_cud_pass2(log, item); 4076 - case XFS_LI_BUI: 4077 - return xlog_recover_bui_pass2(log, item, trans->r_lsn); 4078 - case XFS_LI_BUD: 4079 - return xlog_recover_bud_pass2(log, item); 4080 - case XFS_LI_DQUOT: 4081 - return xlog_recover_dquot_pass2(log, buffer_list, item, 4082 - trans->r_lsn); 4083 - case XFS_LI_ICREATE: 4084 - return xlog_recover_do_icreate_pass2(log, buffer_list, item); 4085 - case XFS_LI_QUOTAOFF: 4086 - /* nothing to do in pass2 */ 4087 - return 0; 4088 - default: 4089 - xfs_warn(log->l_mp, "%s: invalid item type (%d)", 4090 - __func__, ITEM_TYPE(item)); 4091 - ASSERT(0); 4092 - return -EFSCORRUPTED; 4093 - } 1923 + if (!xlog_is_buffer_cancelled(log, blkno, len)) 1924 + xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops); 4094 1925 } 4095 1926 4096 1927 STATIC int ··· 1967 4072 int error = 0; 1968 4073 1969 4074 list_for_each_entry(item, item_list, ri_list) { 1970 - error = xlog_recover_commit_pass2(log, trans, 1971 - buffer_list, item); 4075 + trace_xfs_log_recover_item_recover(log, trans, item, 4076 + XLOG_RECOVER_PASS2); 4077 + 4078 + if (item->ri_ops->commit_pass2) 4079 + error = item->ri_ops->commit_pass2(log, buffer_list, 4080 + item, trans->r_lsn); 1972 4081 if (error) 1973 4082 return error; 1974 4083 } ··· 2009 4110 return error; 2010 4111 2011 4112 list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) { 4113 + trace_xfs_log_recover_item_recover(log, trans, item, pass); 4114 + 2012 4115 switch (pass) { 2013 4116 case XLOG_RECOVER_PASS1: 2014 - error = xlog_recover_commit_pass1(log, trans, item); 4117 + if (item->ri_ops->commit_pass1) 4118 + error = item->ri_ops->commit_pass1(log, item); 2015 4119 break; 2016 4120 case XLOG_RECOVER_PASS2: 2017 - xlog_recover_ra_pass2(log, item); 4121 + if (item->ri_ops->ra_pass2) 4122 + item->ri_ops->ra_pass2(log, item); 2018 4123 list_move_tail(&item->ri_list, &ra_list); 2019 4124 items_queued++; 2020 4125 if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) { ··· 2055 4152 xlog_recover_add_item( 2056 4153 struct list_head *head) 2057 4154 { 2058 - xlog_recover_item_t *item; 4155 + struct xlog_recover_item *item; 2059 4156 2060 - item = kmem_zalloc(sizeof(xlog_recover_item_t), 0); 4157 + item = kmem_zalloc(sizeof(struct xlog_recover_item), 0); 2061 4158 INIT_LIST_HEAD(&item->ri_list); 2062 4159 list_add_tail(&item->ri_list, head); 2063 4160 } ··· 2069 4166 char *dp, 2070 4167 int len) 2071 4168 { 2072 - xlog_recover_item_t *item; 4169 + struct xlog_recover_item *item; 2073 4170 char *ptr, *old_ptr; 2074 4171 int old_len; 2075 4172 ··· 2092 4189 } 2093 4190 2094 4191 /* take the tail entry */ 2095 - item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); 4192 + item = list_entry(trans->r_itemq.prev, struct xlog_recover_item, 4193 + ri_list); 2096 4194 2097 4195 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; 2098 4196 old_len = item->ri_buf[item->ri_cnt-1].i_len; ··· 2127 4223 int len) 2128 4224 { 2129 4225 struct xfs_inode_log_format *in_f; /* any will do */ 2130 - xlog_recover_item_t *item; 4226 + struct xlog_recover_item *item; 2131 4227 char *ptr; 2132 4228 2133 4229 if (!len) ··· 2163 4259 in_f = (struct xfs_inode_log_format *)ptr; 2164 4260 2165 4261 /* take the tail entry */ 2166 - item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); 4262 + item = list_entry(trans->r_itemq.prev, struct xlog_recover_item, 4263 + ri_list); 2167 4264 if (item->ri_total != 0 && 2168 4265 item->ri_total == item->ri_cnt) { 2169 4266 /* tail item is in use, get a new one */ 2170 4267 xlog_recover_add_item(&trans->r_itemq); 2171 4268 item = list_entry(trans->r_itemq.prev, 2172 - xlog_recover_item_t, ri_list); 4269 + struct xlog_recover_item, ri_list); 2173 4270 } 2174 4271 2175 4272 if (item->ri_total == 0) { /* first region to be added */ ··· 2216 4311 xlog_recover_free_trans( 2217 4312 struct xlog_recover *trans) 2218 4313 { 2219 - xlog_recover_item_t *item, *n; 4314 + struct xlog_recover_item *item, *n; 2220 4315 int i; 2221 4316 2222 4317 hlist_del_init(&trans->r_list); ··· 2468 4563 return 0; 2469 4564 } 2470 4565 2471 - /* Recover the EFI if necessary. */ 2472 - STATIC int 2473 - xlog_recover_process_efi( 2474 - struct xfs_mount *mp, 2475 - struct xfs_ail *ailp, 2476 - struct xfs_log_item *lip) 2477 - { 2478 - struct xfs_efi_log_item *efip; 2479 - int error; 2480 - 2481 - /* 2482 - * Skip EFIs that we've already processed. 2483 - */ 2484 - efip = container_of(lip, struct xfs_efi_log_item, efi_item); 2485 - if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) 2486 - return 0; 2487 - 2488 - spin_unlock(&ailp->ail_lock); 2489 - error = xfs_efi_recover(mp, efip); 2490 - spin_lock(&ailp->ail_lock); 2491 - 2492 - return error; 2493 - } 2494 - 2495 - /* Release the EFI since we're cancelling everything. */ 2496 - STATIC void 2497 - xlog_recover_cancel_efi( 2498 - struct xfs_mount *mp, 2499 - struct xfs_ail *ailp, 2500 - struct xfs_log_item *lip) 2501 - { 2502 - struct xfs_efi_log_item *efip; 2503 - 2504 - efip = container_of(lip, struct xfs_efi_log_item, efi_item); 2505 - 2506 - spin_unlock(&ailp->ail_lock); 2507 - xfs_efi_release(efip); 2508 - spin_lock(&ailp->ail_lock); 2509 - } 2510 - 2511 - /* Recover the RUI if necessary. */ 2512 - STATIC int 2513 - xlog_recover_process_rui( 2514 - struct xfs_mount *mp, 2515 - struct xfs_ail *ailp, 2516 - struct xfs_log_item *lip) 2517 - { 2518 - struct xfs_rui_log_item *ruip; 2519 - int error; 2520 - 2521 - /* 2522 - * Skip RUIs that we've already processed. 2523 - */ 2524 - ruip = container_of(lip, struct xfs_rui_log_item, rui_item); 2525 - if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags)) 2526 - return 0; 2527 - 2528 - spin_unlock(&ailp->ail_lock); 2529 - error = xfs_rui_recover(mp, ruip); 2530 - spin_lock(&ailp->ail_lock); 2531 - 2532 - return error; 2533 - } 2534 - 2535 - /* Release the RUI since we're cancelling everything. */ 2536 - STATIC void 2537 - xlog_recover_cancel_rui( 2538 - struct xfs_mount *mp, 2539 - struct xfs_ail *ailp, 2540 - struct xfs_log_item *lip) 2541 - { 2542 - struct xfs_rui_log_item *ruip; 2543 - 2544 - ruip = container_of(lip, struct xfs_rui_log_item, rui_item); 2545 - 2546 - spin_unlock(&ailp->ail_lock); 2547 - xfs_rui_release(ruip); 2548 - spin_lock(&ailp->ail_lock); 2549 - } 2550 - 2551 - /* Recover the CUI if necessary. */ 2552 - STATIC int 2553 - xlog_recover_process_cui( 2554 - struct xfs_trans *parent_tp, 2555 - struct xfs_ail *ailp, 2556 - struct xfs_log_item *lip) 2557 - { 2558 - struct xfs_cui_log_item *cuip; 2559 - int error; 2560 - 2561 - /* 2562 - * Skip CUIs that we've already processed. 2563 - */ 2564 - cuip = container_of(lip, struct xfs_cui_log_item, cui_item); 2565 - if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)) 2566 - return 0; 2567 - 2568 - spin_unlock(&ailp->ail_lock); 2569 - error = xfs_cui_recover(parent_tp, cuip); 2570 - spin_lock(&ailp->ail_lock); 2571 - 2572 - return error; 2573 - } 2574 - 2575 - /* Release the CUI since we're cancelling everything. */ 2576 - STATIC void 2577 - xlog_recover_cancel_cui( 2578 - struct xfs_mount *mp, 2579 - struct xfs_ail *ailp, 2580 - struct xfs_log_item *lip) 2581 - { 2582 - struct xfs_cui_log_item *cuip; 2583 - 2584 - cuip = container_of(lip, struct xfs_cui_log_item, cui_item); 2585 - 2586 - spin_unlock(&ailp->ail_lock); 2587 - xfs_cui_release(cuip); 2588 - spin_lock(&ailp->ail_lock); 2589 - } 2590 - 2591 - /* Recover the BUI if necessary. */ 2592 - STATIC int 2593 - xlog_recover_process_bui( 2594 - struct xfs_trans *parent_tp, 2595 - struct xfs_ail *ailp, 2596 - struct xfs_log_item *lip) 2597 - { 2598 - struct xfs_bui_log_item *buip; 2599 - int error; 2600 - 2601 - /* 2602 - * Skip BUIs that we've already processed. 2603 - */ 2604 - buip = container_of(lip, struct xfs_bui_log_item, bui_item); 2605 - if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags)) 2606 - return 0; 2607 - 2608 - spin_unlock(&ailp->ail_lock); 2609 - error = xfs_bui_recover(parent_tp, buip); 2610 - spin_lock(&ailp->ail_lock); 2611 - 2612 - return error; 2613 - } 2614 - 2615 - /* Release the BUI since we're cancelling everything. */ 2616 - STATIC void 2617 - xlog_recover_cancel_bui( 2618 - struct xfs_mount *mp, 2619 - struct xfs_ail *ailp, 2620 - struct xfs_log_item *lip) 2621 - { 2622 - struct xfs_bui_log_item *buip; 2623 - 2624 - buip = container_of(lip, struct xfs_bui_log_item, bui_item); 2625 - 2626 - spin_unlock(&ailp->ail_lock); 2627 - xfs_bui_release(buip); 2628 - spin_lock(&ailp->ail_lock); 2629 - } 2630 - 2631 - /* Is this log item a deferred action intent? */ 2632 - static inline bool xlog_item_is_intent(struct xfs_log_item *lip) 2633 - { 2634 - switch (lip->li_type) { 2635 - case XFS_LI_EFI: 2636 - case XFS_LI_RUI: 2637 - case XFS_LI_CUI: 2638 - case XFS_LI_BUI: 2639 - return true; 2640 - default: 2641 - return false; 2642 - } 2643 - } 2644 - 2645 4566 /* Take all the collected deferred ops and finish them in order. */ 2646 4567 static int 2647 4568 xlog_finish_defer_ops( ··· 2500 4769 xfs_defer_move(tp, parent_tp); 2501 4770 2502 4771 return xfs_trans_commit(tp); 4772 + } 4773 + 4774 + /* Is this log item a deferred action intent? */ 4775 + static inline bool xlog_item_is_intent(struct xfs_log_item *lip) 4776 + { 4777 + return lip->li_ops->iop_recover != NULL && 4778 + lip->li_ops->iop_match != NULL; 2503 4779 } 2504 4780 2505 4781 /* ··· 2579 4841 2580 4842 /* 2581 4843 * NOTE: If your intent processing routine can create more 2582 - * deferred ops, you /must/ attach them to the dfops in this 2583 - * routine or else those subsequent intents will get 4844 + * deferred ops, you /must/ attach them to the transaction in 4845 + * this routine or else those subsequent intents will get 2584 4846 * replayed in the wrong order! 2585 4847 */ 2586 - switch (lip->li_type) { 2587 - case XFS_LI_EFI: 2588 - error = xlog_recover_process_efi(log->l_mp, ailp, lip); 2589 - break; 2590 - case XFS_LI_RUI: 2591 - error = xlog_recover_process_rui(log->l_mp, ailp, lip); 2592 - break; 2593 - case XFS_LI_CUI: 2594 - error = xlog_recover_process_cui(parent_tp, ailp, lip); 2595 - break; 2596 - case XFS_LI_BUI: 2597 - error = xlog_recover_process_bui(parent_tp, ailp, lip); 2598 - break; 4848 + if (!test_and_set_bit(XFS_LI_RECOVERED, &lip->li_flags)) { 4849 + spin_unlock(&ailp->ail_lock); 4850 + error = lip->li_ops->iop_recover(lip, parent_tp); 4851 + spin_lock(&ailp->ail_lock); 2599 4852 } 2600 4853 if (error) 2601 4854 goto out; ··· 2630 4901 break; 2631 4902 } 2632 4903 2633 - switch (lip->li_type) { 2634 - case XFS_LI_EFI: 2635 - xlog_recover_cancel_efi(log->l_mp, ailp, lip); 2636 - break; 2637 - case XFS_LI_RUI: 2638 - xlog_recover_cancel_rui(log->l_mp, ailp, lip); 2639 - break; 2640 - case XFS_LI_CUI: 2641 - xlog_recover_cancel_cui(log->l_mp, ailp, lip); 2642 - break; 2643 - case XFS_LI_BUI: 2644 - xlog_recover_cancel_bui(log->l_mp, ailp, lip); 2645 - break; 2646 - } 2647 - 4904 + spin_unlock(&ailp->ail_lock); 4905 + lip->li_ops->iop_release(lip); 4906 + spin_lock(&ailp->ail_lock); 2648 4907 lip = xfs_trans_ail_cursor_next(ailp, &cur); 2649 4908 } 2650 4909 ··· 2704 4987 /* 2705 4988 * Get the on disk inode to find the next inode in the bucket. 2706 4989 */ 2707 - error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0); 4990 + error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0); 2708 4991 if (error) 2709 4992 goto fail_iput; 2710 4993

+22

fs/xfs/xfs_message.c

··· 117 117 { 118 118 print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1); 119 119 } 120 + 121 + void 122 + xfs_buf_alert_ratelimited( 123 + struct xfs_buf *bp, 124 + const char *rlmsg, 125 + const char *fmt, 126 + ...) 127 + { 128 + struct xfs_mount *mp = bp->b_mount; 129 + struct va_format vaf; 130 + va_list args; 131 + 132 + /* use the more aggressive per-target rate limit for buffers */ 133 + if (!___ratelimit(&bp->b_target->bt_ioerror_rl, rlmsg)) 134 + return; 135 + 136 + va_start(args, fmt); 137 + vaf.fmt = fmt; 138 + vaf.va = &args; 139 + __xfs_printk(KERN_ALERT, mp, &vaf); 140 + va_end(args); 141 + }

+22 -2

fs/xfs/xfs_message.h

··· 31 31 } 32 32 #endif 33 33 34 - #define xfs_printk_ratelimited(func, dev, fmt, ...) \ 34 + #define xfs_printk_ratelimited(func, dev, fmt, ...) \ 35 35 do { \ 36 36 static DEFINE_RATELIMIT_STATE(_rs, \ 37 37 DEFAULT_RATELIMIT_INTERVAL, \ 38 38 DEFAULT_RATELIMIT_BURST); \ 39 39 if (__ratelimit(&_rs)) \ 40 - func(dev, fmt, ##__VA_ARGS__); \ 40 + func(dev, fmt, ##__VA_ARGS__); \ 41 41 } while (0) 42 + 43 + #define xfs_printk_once(func, dev, fmt, ...) \ 44 + ({ \ 45 + static bool __section(.data.once) __print_once; \ 46 + bool __ret_print_once = !__print_once; \ 47 + \ 48 + if (!__print_once) { \ 49 + __print_once = true; \ 50 + func(dev, fmt, ##__VA_ARGS__); \ 51 + } \ 52 + unlikely(__ret_print_once); \ 53 + }) 42 54 43 55 #define xfs_emerg_ratelimited(dev, fmt, ...) \ 44 56 xfs_printk_ratelimited(xfs_emerg, dev, fmt, ##__VA_ARGS__) ··· 69 57 #define xfs_debug_ratelimited(dev, fmt, ...) \ 70 58 xfs_printk_ratelimited(xfs_debug, dev, fmt, ##__VA_ARGS__) 71 59 60 + #define xfs_warn_once(dev, fmt, ...) \ 61 + xfs_printk_once(xfs_warn, dev, fmt, ##__VA_ARGS__) 62 + #define xfs_notice_once(dev, fmt, ...) \ 63 + xfs_printk_once(xfs_notice, dev, fmt, ##__VA_ARGS__) 64 + 72 65 void assfail(struct xfs_mount *mp, char *expr, char *f, int l); 73 66 void asswarn(struct xfs_mount *mp, char *expr, char *f, int l); 74 67 75 68 extern void xfs_hex_dump(const void *p, int length); 69 + 70 + void xfs_buf_alert_ratelimited(struct xfs_buf *bp, const char *rlmsg, 71 + const char *fmt, ...); 76 72 77 73 #endif /* __XFS_MESSAGE_H */

+3 -37

fs/xfs/xfs_mount.c

··· 1190 1190 } 1191 1191 1192 1192 /* 1193 - * Deltas for the inode count are +/-64, hence we use a large batch size 1194 - * of 128 so we don't need to take the counter lock on every update. 1195 - */ 1196 - #define XFS_ICOUNT_BATCH 128 1197 - int 1198 - xfs_mod_icount( 1199 - struct xfs_mount *mp, 1200 - int64_t delta) 1201 - { 1202 - percpu_counter_add_batch(&mp->m_icount, delta, XFS_ICOUNT_BATCH); 1203 - if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) { 1204 - ASSERT(0); 1205 - percpu_counter_add(&mp->m_icount, -delta); 1206 - return -EINVAL; 1207 - } 1208 - return 0; 1209 - } 1210 - 1211 - int 1212 - xfs_mod_ifree( 1213 - struct xfs_mount *mp, 1214 - int64_t delta) 1215 - { 1216 - percpu_counter_add(&mp->m_ifree, delta); 1217 - if (percpu_counter_compare(&mp->m_ifree, 0) < 0) { 1218 - ASSERT(0); 1219 - percpu_counter_add(&mp->m_ifree, -delta); 1220 - return -EINVAL; 1221 - } 1222 - return 0; 1223 - } 1224 - 1225 - /* 1226 1193 * Deltas for the block count can vary from 1 to very large, but lock contention 1227 1194 * only occurs on frequent small block count updates such as in the delayed 1228 1195 * allocation path for buffered writes (page a time updates). Hence we set ··· 1267 1300 spin_unlock(&mp->m_sb_lock); 1268 1301 return 0; 1269 1302 } 1270 - printk_once(KERN_WARNING 1271 - "Filesystem \"%s\": reserve blocks depleted! " 1272 - "Consider increasing reserve pool size.", 1273 - mp->m_super->s_id); 1303 + xfs_warn_once(mp, 1304 + "Reserve blocks depleted! Consider increasing reserve pool size."); 1305 + 1274 1306 fdblocks_enospc: 1275 1307 spin_unlock(&mp->m_sb_lock); 1276 1308 return -ENOSPC;

+86 -75

fs/xfs/xfs_mount.h

··· 55 55 long retry_timeout; /* in jiffies, -1 = infinite */ 56 56 }; 57 57 58 + /* 59 + * The struct xfsmount layout is optimised to separate read-mostly variables 60 + * from variables that are frequently modified. We put the read-mostly variables 61 + * first, then place all the other variables at the end. 62 + * 63 + * Typically, read-mostly variables are those that are set at mount time and 64 + * never changed again, or only change rarely as a result of things like sysfs 65 + * knobs being tweaked. 66 + */ 58 67 typedef struct xfs_mount { 59 - struct super_block *m_super; 60 - 61 - /* 62 - * Bitsets of per-fs metadata that have been checked and/or are sick. 63 - * Callers must hold m_sb_lock to access these two fields. 64 - */ 65 - uint8_t m_fs_checked; 66 - uint8_t m_fs_sick; 67 - /* 68 - * Bitsets of rt metadata that have been checked and/or are sick. 69 - * Callers must hold m_sb_lock to access this field. 70 - */ 71 - uint8_t m_rt_checked; 72 - uint8_t m_rt_sick; 73 - 74 - struct xfs_ail *m_ail; /* fs active log item list */ 75 - 76 68 struct xfs_sb m_sb; /* copy of fs superblock */ 77 - spinlock_t m_sb_lock; /* sb counter lock */ 78 - struct percpu_counter m_icount; /* allocated inodes counter */ 79 - struct percpu_counter m_ifree; /* free inodes counter */ 80 - struct percpu_counter m_fdblocks; /* free block counter */ 81 - /* 82 - * Count of data device blocks reserved for delayed allocations, 83 - * including indlen blocks. Does not include allocated CoW staging 84 - * extents or anything related to the rt device. 85 - */ 86 - struct percpu_counter m_delalloc_blks; 87 - 69 + struct super_block *m_super; 70 + struct xfs_ail *m_ail; /* fs active log item list */ 88 71 struct xfs_buf *m_sb_bp; /* buffer for superblock */ 89 72 char *m_rtname; /* realtime device name */ 90 73 char *m_logname; /* external log device name */ 91 - int m_bsize; /* fs logical block size */ 92 - xfs_agnumber_t m_agfrotor; /* last ag where space found */ 93 - xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ 94 - spinlock_t m_agirotor_lock;/* .. and lock protecting it */ 95 - xfs_agnumber_t m_maxagi; /* highest inode alloc group */ 96 - uint m_allocsize_log;/* min write size log bytes */ 97 - uint m_allocsize_blocks; /* min write size blocks */ 98 74 struct xfs_da_geometry *m_dir_geo; /* directory block geometry */ 99 75 struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */ 100 76 struct xlog *m_log; /* log specific stuff */ 101 - struct xfs_ino_geometry m_ino_geo; /* inode geometry */ 102 - int m_logbufs; /* number of log buffers */ 103 - int m_logbsize; /* size of each log buffer */ 104 - uint m_rsumlevels; /* rt summary levels */ 105 - uint m_rsumsize; /* size of rt summary, bytes */ 106 - /* 107 - * Optional cache of rt summary level per bitmap block with the 108 - * invariant that m_rsum_cache[bbno] <= the minimum i for which 109 - * rsum[i][bbno] != 0. Reads and writes are serialized by the rsumip 110 - * inode lock. 111 - */ 112 - uint8_t *m_rsum_cache; 113 77 struct xfs_inode *m_rbmip; /* pointer to bitmap inode */ 114 78 struct xfs_inode *m_rsumip; /* pointer to summary inode */ 115 79 struct xfs_inode *m_rootip; /* pointer to root directory */ ··· 81 117 xfs_buftarg_t *m_ddev_targp; /* saves taking the address */ 82 118 xfs_buftarg_t *m_logdev_targp;/* ptr to log device */ 83 119 xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */ 120 + /* 121 + * Optional cache of rt summary level per bitmap block with the 122 + * invariant that m_rsum_cache[bbno] <= the minimum i for which 123 + * rsum[i][bbno] != 0. Reads and writes are serialized by the rsumip 124 + * inode lock. 125 + */ 126 + uint8_t *m_rsum_cache; 127 + struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ 128 + struct workqueue_struct *m_buf_workqueue; 129 + struct workqueue_struct *m_unwritten_workqueue; 130 + struct workqueue_struct *m_cil_workqueue; 131 + struct workqueue_struct *m_reclaim_workqueue; 132 + struct workqueue_struct *m_eofblocks_workqueue; 133 + struct workqueue_struct *m_sync_workqueue; 134 + 135 + int m_bsize; /* fs logical block size */ 84 136 uint8_t m_blkbit_log; /* blocklog + NBBY */ 85 137 uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ 86 138 uint8_t m_agno_log; /* log #ag's */ 139 + uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ 87 140 uint m_blockmask; /* sb_blocksize-1 */ 88 141 uint m_blockwsize; /* sb_blocksize in words */ 89 142 uint m_blockwmask; /* blockwsize-1 */ ··· 119 138 xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */ 120 139 uint m_alloc_set_aside; /* space we can't use */ 121 140 uint m_ag_max_usable; /* max space per AG */ 141 + int m_dalign; /* stripe unit */ 142 + int m_swidth; /* stripe width */ 143 + xfs_agnumber_t m_maxagi; /* highest inode alloc group */ 144 + uint m_allocsize_log;/* min write size log bytes */ 145 + uint m_allocsize_blocks; /* min write size blocks */ 146 + int m_logbufs; /* number of log buffers */ 147 + int m_logbsize; /* size of each log buffer */ 148 + uint m_rsumlevels; /* rt summary levels */ 149 + uint m_rsumsize; /* size of rt summary, bytes */ 150 + int m_fixedfsid[2]; /* unchanged for life of FS */ 151 + uint m_qflags; /* quota status flags */ 152 + uint64_t m_flags; /* global mount flags */ 153 + int64_t m_low_space[XFS_LOWSP_MAX]; 154 + struct xfs_ino_geometry m_ino_geo; /* inode geometry */ 155 + struct xfs_trans_resv m_resv; /* precomputed res values */ 156 + /* low free space thresholds */ 157 + bool m_always_cow; 158 + bool m_fail_unmount; 159 + bool m_finobt_nores; /* no per-AG finobt resv. */ 160 + bool m_update_sb; /* sb needs update in mount */ 161 + 162 + /* 163 + * Bitsets of per-fs metadata that have been checked and/or are sick. 164 + * Callers must hold m_sb_lock to access these two fields. 165 + */ 166 + uint8_t m_fs_checked; 167 + uint8_t m_fs_sick; 168 + /* 169 + * Bitsets of rt metadata that have been checked and/or are sick. 170 + * Callers must hold m_sb_lock to access this field. 171 + */ 172 + uint8_t m_rt_checked; 173 + uint8_t m_rt_sick; 174 + 175 + /* 176 + * End of read-mostly variables. Frequently written variables and locks 177 + * should be placed below this comment from now on. The first variable 178 + * here is marked as cacheline aligned so they it is separated from 179 + * the read-mostly variables. 180 + */ 181 + 182 + spinlock_t ____cacheline_aligned m_sb_lock; /* sb counter lock */ 183 + struct percpu_counter m_icount; /* allocated inodes counter */ 184 + struct percpu_counter m_ifree; /* free inodes counter */ 185 + struct percpu_counter m_fdblocks; /* free block counter */ 186 + /* 187 + * Count of data device blocks reserved for delayed allocations, 188 + * including indlen blocks. Does not include allocated CoW staging 189 + * extents or anything related to the rt device. 190 + */ 191 + struct percpu_counter m_delalloc_blks; 192 + 122 193 struct radix_tree_root m_perag_tree; /* per-ag accounting info */ 123 194 spinlock_t m_perag_lock; /* lock for m_perag_tree */ 124 - struct mutex m_growlock; /* growfs mutex */ 125 - int m_fixedfsid[2]; /* unchanged for life of FS */ 126 - uint64_t m_flags; /* global mount flags */ 127 - bool m_finobt_nores; /* no per-AG finobt resv. */ 128 - uint m_qflags; /* quota status flags */ 129 - struct xfs_trans_resv m_resv; /* precomputed res values */ 130 195 uint64_t m_resblks; /* total reserved blocks */ 131 196 uint64_t m_resblks_avail;/* available reserved blocks */ 132 197 uint64_t m_resblks_save; /* reserved blks @ remount,ro */ 133 - int m_dalign; /* stripe unit */ 134 - int m_swidth; /* stripe width */ 135 - uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ 136 - atomic_t m_active_trans; /* number trans frozen */ 137 - struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ 138 198 struct delayed_work m_reclaim_work; /* background inode reclaim */ 139 199 struct delayed_work m_eofblocks_work; /* background eof blocks 140 200 trimming */ 141 201 struct delayed_work m_cowblocks_work; /* background cow blocks 142 202 trimming */ 143 - bool m_update_sb; /* sb needs update in mount */ 144 - int64_t m_low_space[XFS_LOWSP_MAX]; 145 - /* low free space thresholds */ 146 203 struct xfs_kobj m_kobj; 147 204 struct xfs_kobj m_error_kobj; 148 205 struct xfs_kobj m_error_meta_kobj; 149 206 struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX]; 150 207 struct xstats m_stats; /* per-fs stats */ 208 + xfs_agnumber_t m_agfrotor; /* last ag where space found */ 209 + xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ 210 + spinlock_t m_agirotor_lock;/* .. and lock protecting it */ 151 211 152 212 /* 153 213 * Workqueue item so that we can coalesce multiple inode flush attempts 154 214 * into a single flush. 155 215 */ 156 216 struct work_struct m_flush_inodes_work; 157 - struct workqueue_struct *m_buf_workqueue; 158 - struct workqueue_struct *m_unwritten_workqueue; 159 - struct workqueue_struct *m_cil_workqueue; 160 - struct workqueue_struct *m_reclaim_workqueue; 161 - struct workqueue_struct *m_eofblocks_workqueue; 162 - struct workqueue_struct *m_sync_workqueue; 163 217 164 218 /* 165 219 * Generation of the filesysyem layout. This is incremented by each ··· 206 190 * to various other kinds of pain inflicted on the pNFS server. 207 191 */ 208 192 uint32_t m_generation; 193 + struct mutex m_growlock; /* growfs mutex */ 209 194 210 - bool m_always_cow; 211 - bool m_fail_unmount; 212 195 #ifdef DEBUG 213 196 /* 214 197 * Frequency with which errors are injected. Replaces xfs_etest; the ··· 252 237 #define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams 253 238 allocator */ 254 239 #define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */ 255 - 256 - #define XFS_MOUNT_DAX (1ULL << 62) /* TEST ONLY! */ 240 + #define XFS_MOUNT_DAX_ALWAYS (1ULL << 26) 241 + #define XFS_MOUNT_DAX_NEVER (1ULL << 27) 257 242 258 243 /* 259 244 * Max and min values for mount-option defined I/O ··· 274 259 #define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */ 275 260 #define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */ 276 261 #define SHUTDOWN_CORRUPT_INCORE 0x0008 /* corrupt in-memory data structures */ 277 - #define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */ 278 - #define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */ 279 262 280 263 /* 281 264 * Flags for xfs_mountfs ··· 407 394 xfs_agnumber_t *maxagi); 408 395 extern void xfs_unmountfs(xfs_mount_t *); 409 396 410 - extern int xfs_mod_icount(struct xfs_mount *mp, int64_t delta); 411 - extern int xfs_mod_ifree(struct xfs_mount *mp, int64_t delta); 412 397 extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta, 413 398 bool reserved); 414 399 extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);

+2 -3

fs/xfs/xfs_pnfs.c

··· 58 58 { 59 59 struct xfs_mount *mp = XFS_M(sb); 60 60 61 - printk_once(KERN_NOTICE 62 - "XFS (%s): using experimental pNFS feature, use at your own risk!\n", 63 - mp->m_super->s_id); 61 + xfs_notice_once(mp, 62 + "Using experimental pNFS feature, use at your own risk!"); 64 63 65 64 if (*len < sizeof(uuid_t)) 66 65 return -EINVAL;

+29 -37

fs/xfs/xfs_qm.c

··· 558 558 return; 559 559 560 560 ddqp = &dqp->q_core; 561 - defq = xfs_get_defquota(dqp, qinf); 561 + defq = xfs_get_defquota(qinf, xfs_dquot_type(dqp)); 562 562 563 563 /* 564 564 * Timers and warnings have been already set, let's just set the ··· 577 577 static void 578 578 xfs_qm_init_timelimits( 579 579 struct xfs_mount *mp, 580 - struct xfs_quotainfo *qinf) 580 + uint type) 581 581 { 582 + struct xfs_quotainfo *qinf = mp->m_quotainfo; 583 + struct xfs_def_quota *defq; 582 584 struct xfs_disk_dquot *ddqp; 583 585 struct xfs_dquot *dqp; 584 - uint type; 585 586 int error; 586 587 587 - qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; 588 - qinf->qi_itimelimit = XFS_QM_ITIMELIMIT; 589 - qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT; 590 - qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT; 591 - qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT; 592 - qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; 588 + defq = xfs_get_defquota(qinf, type); 589 + 590 + defq->btimelimit = XFS_QM_BTIMELIMIT; 591 + defq->itimelimit = XFS_QM_ITIMELIMIT; 592 + defq->rtbtimelimit = XFS_QM_RTBTIMELIMIT; 593 + defq->bwarnlimit = XFS_QM_BWARNLIMIT; 594 + defq->iwarnlimit = XFS_QM_IWARNLIMIT; 595 + defq->rtbwarnlimit = XFS_QM_RTBWARNLIMIT; 593 596 594 597 /* 595 598 * We try to get the limits from the superuser's limits fields. ··· 600 597 * 601 598 * Since we may not have done a quotacheck by this point, just read 602 599 * the dquot without attaching it to any hashtables or lists. 603 - * 604 - * Timers and warnings are globally set by the first timer found in 605 - * user/group/proj quota types, otherwise a default value is used. 606 - * This should be split into different fields per quota type. 607 600 */ 608 - if (XFS_IS_UQUOTA_RUNNING(mp)) 609 - type = XFS_DQ_USER; 610 - else if (XFS_IS_GQUOTA_RUNNING(mp)) 611 - type = XFS_DQ_GROUP; 612 - else 613 - type = XFS_DQ_PROJ; 614 601 error = xfs_qm_dqget_uncached(mp, 0, type, &dqp); 615 602 if (error) 616 603 return; 617 604 618 605 ddqp = &dqp->q_core; 606 + 619 607 /* 620 608 * The warnings and timers set the grace period given to 621 609 * a user or group before he or she can not perform any 622 610 * more writing. If it is zero, a default is used. 623 611 */ 624 612 if (ddqp->d_btimer) 625 - qinf->qi_btimelimit = be32_to_cpu(ddqp->d_btimer); 613 + defq->btimelimit = be32_to_cpu(ddqp->d_btimer); 626 614 if (ddqp->d_itimer) 627 - qinf->qi_itimelimit = be32_to_cpu(ddqp->d_itimer); 615 + defq->itimelimit = be32_to_cpu(ddqp->d_itimer); 628 616 if (ddqp->d_rtbtimer) 629 - qinf->qi_rtbtimelimit = be32_to_cpu(ddqp->d_rtbtimer); 617 + defq->rtbtimelimit = be32_to_cpu(ddqp->d_rtbtimer); 630 618 if (ddqp->d_bwarns) 631 - qinf->qi_bwarnlimit = be16_to_cpu(ddqp->d_bwarns); 619 + defq->bwarnlimit = be16_to_cpu(ddqp->d_bwarns); 632 620 if (ddqp->d_iwarns) 633 - qinf->qi_iwarnlimit = be16_to_cpu(ddqp->d_iwarns); 621 + defq->iwarnlimit = be16_to_cpu(ddqp->d_iwarns); 634 622 if (ddqp->d_rtbwarns) 635 - qinf->qi_rtbwarnlimit = be16_to_cpu(ddqp->d_rtbwarns); 623 + defq->rtbwarnlimit = be16_to_cpu(ddqp->d_rtbwarns); 636 624 637 625 xfs_qm_dqdestroy(dqp); 638 626 } ··· 669 675 670 676 mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); 671 677 672 - xfs_qm_init_timelimits(mp, qinf); 678 + xfs_qm_init_timelimits(mp, XFS_DQ_USER); 679 + xfs_qm_init_timelimits(mp, XFS_DQ_GROUP); 680 + xfs_qm_init_timelimits(mp, XFS_DQ_PROJ); 673 681 674 682 if (XFS_IS_UQUOTA_RUNNING(mp)) 675 683 xfs_qm_set_defquota(mp, XFS_DQ_USER, qinf); ··· 776 780 } 777 781 778 782 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_create, 779 - XFS_QM_QINOCREATE_SPACE_RES(mp), 0, 0, &tp); 783 + need_alloc ? XFS_QM_QINOCREATE_SPACE_RES(mp) : 0, 784 + 0, 0, &tp); 780 785 if (error) 781 786 return error; 782 787 ··· 1113 1116 */ 1114 1117 if (dqp->q_core.d_id) { 1115 1118 xfs_qm_adjust_dqlimits(mp, dqp); 1116 - xfs_qm_adjust_dqtimers(mp, &dqp->q_core); 1119 + xfs_qm_adjust_dqtimers(mp, dqp); 1117 1120 } 1118 1121 1119 1122 dqp->dq_flags |= XFS_DQ_DIRTY; ··· 1727 1730 pq = xfs_qm_dqhold(ip->i_pdquot); 1728 1731 } 1729 1732 } 1730 - if (uq) 1731 - trace_xfs_dquot_dqalloc(ip); 1733 + trace_xfs_dquot_dqalloc(ip); 1732 1734 1733 1735 xfs_iunlock(ip, lockflags); 1734 1736 if (O_udqpp) ··· 1804 1808 { 1805 1809 struct xfs_mount *mp = ip->i_mount; 1806 1810 uint64_t delblks; 1807 - unsigned int blkflags, prjflags = 0; 1811 + unsigned int blkflags; 1808 1812 struct xfs_dquot *udq_unres = NULL; 1809 1813 struct xfs_dquot *gdq_unres = NULL; 1810 1814 struct xfs_dquot *pdq_unres = NULL; ··· 1845 1849 1846 1850 if (XFS_IS_PQUOTA_ON(ip->i_mount) && pdqp && 1847 1851 ip->i_d.di_projid != be32_to_cpu(pdqp->q_core.d_id)) { 1848 - prjflags = XFS_QMOPT_ENOSPC; 1849 1852 pdq_delblks = pdqp; 1850 1853 if (delblks) { 1851 1854 ASSERT(ip->i_pdquot); ··· 1854 1859 1855 1860 error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, 1856 1861 udq_delblks, gdq_delblks, pdq_delblks, 1857 - ip->i_d.di_nblocks, 1, 1858 - flags | blkflags | prjflags); 1862 + ip->i_d.di_nblocks, 1, flags | blkflags); 1859 1863 if (error) 1860 1864 return error; 1861 1865 ··· 1872 1878 ASSERT(udq_unres || gdq_unres || pdq_unres); 1873 1879 error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, 1874 1880 udq_delblks, gdq_delblks, pdq_delblks, 1875 - (xfs_qcnt_t)delblks, 0, 1876 - flags | blkflags | prjflags); 1881 + (xfs_qcnt_t)delblks, 0, flags | blkflags); 1877 1882 if (error) 1878 1883 return error; 1879 1884 xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, ··· 1925 1932 return; 1926 1933 1927 1934 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1928 - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 1929 1935 1930 1936 if (udqp && XFS_IS_UQUOTA_ON(mp)) { 1931 1937 ASSERT(ip->i_udquot == NULL);

+45 -33

fs/xfs/xfs_qm.h

··· 41 41 */ 42 42 #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 43 43 44 + /* Defaults for each quota type: time limits, warn limits, usage limits */ 44 45 struct xfs_def_quota { 45 - xfs_qcnt_t bhardlimit; /* default data blk hard limit */ 46 - xfs_qcnt_t bsoftlimit; /* default data blk soft limit */ 47 - xfs_qcnt_t ihardlimit; /* default inode count hard limit */ 48 - xfs_qcnt_t isoftlimit; /* default inode count soft limit */ 49 - xfs_qcnt_t rtbhardlimit; /* default realtime blk hard limit */ 50 - xfs_qcnt_t rtbsoftlimit; /* default realtime blk soft limit */ 46 + time64_t btimelimit; /* limit for blks timer */ 47 + time64_t itimelimit; /* limit for inodes timer */ 48 + time64_t rtbtimelimit; /* limit for rt blks timer */ 49 + xfs_qwarncnt_t bwarnlimit; /* limit for blks warnings */ 50 + xfs_qwarncnt_t iwarnlimit; /* limit for inodes warnings */ 51 + xfs_qwarncnt_t rtbwarnlimit; /* limit for rt blks warnings */ 52 + xfs_qcnt_t bhardlimit; /* default data blk hard limit */ 53 + xfs_qcnt_t bsoftlimit; /* default data blk soft limit */ 54 + xfs_qcnt_t ihardlimit; /* default inode count hard limit */ 55 + xfs_qcnt_t isoftlimit; /* default inode count soft limit */ 56 + xfs_qcnt_t rtbhardlimit; /* default realtime blk hard limit */ 57 + xfs_qcnt_t rtbsoftlimit; /* default realtime blk soft limit */ 51 58 }; 52 59 53 60 /* ··· 62 55 * The mount structure keeps a pointer to this. 63 56 */ 64 57 struct xfs_quotainfo { 65 - struct radix_tree_root qi_uquota_tree; 66 - struct radix_tree_root qi_gquota_tree; 67 - struct radix_tree_root qi_pquota_tree; 68 - struct mutex qi_tree_lock; 58 + struct radix_tree_root qi_uquota_tree; 59 + struct radix_tree_root qi_gquota_tree; 60 + struct radix_tree_root qi_pquota_tree; 61 + struct mutex qi_tree_lock; 69 62 struct xfs_inode *qi_uquotaip; /* user quota inode */ 70 63 struct xfs_inode *qi_gquotaip; /* group quota inode */ 71 64 struct xfs_inode *qi_pquotaip; /* project quota inode */ 72 - struct list_lru qi_lru; 73 - int qi_dquots; 74 - time64_t qi_btimelimit; /* limit for blks timer */ 75 - time64_t qi_itimelimit; /* limit for inodes timer */ 76 - time64_t qi_rtbtimelimit;/* limit for rt blks timer */ 77 - xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */ 78 - xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */ 79 - xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */ 80 - struct mutex qi_quotaofflock;/* to serialize quotaoff */ 81 - xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ 82 - uint qi_dqperchunk; /* # ondisk dqs in above chunk */ 65 + struct list_lru qi_lru; 66 + int qi_dquots; 67 + struct mutex qi_quotaofflock;/* to serialize quotaoff */ 68 + xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ 69 + uint qi_dqperchunk; /* # ondisk dq in above chunk */ 83 70 struct xfs_def_quota qi_usr_default; 84 71 struct xfs_def_quota qi_grp_default; 85 72 struct xfs_def_quota qi_prj_default; 86 - struct shrinker qi_shrinker; 73 + struct shrinker qi_shrinker; 87 74 }; 88 75 89 76 static inline struct radix_tree_root * ··· 112 111 ASSERT(0); 113 112 } 114 113 return NULL; 114 + } 115 + 116 + static inline int 117 + xfs_dquot_type(struct xfs_dquot *dqp) 118 + { 119 + if (XFS_QM_ISUDQ(dqp)) 120 + return XFS_DQ_USER; 121 + if (XFS_QM_ISGDQ(dqp)) 122 + return XFS_DQ_GROUP; 123 + ASSERT(XFS_QM_ISPDQ(dqp)); 124 + return XFS_DQ_PROJ; 115 125 } 116 126 117 127 extern void xfs_trans_mod_dquot(struct xfs_trans *tp, struct xfs_dquot *dqp, ··· 176 164 extern int xfs_qm_scall_quotaoff(struct xfs_mount *, uint); 177 165 178 166 static inline struct xfs_def_quota * 179 - xfs_get_defquota(struct xfs_dquot *dqp, struct xfs_quotainfo *qi) 167 + xfs_get_defquota(struct xfs_quotainfo *qi, int type) 180 168 { 181 - struct xfs_def_quota *defq; 182 - 183 - if (XFS_QM_ISUDQ(dqp)) 184 - defq = &qi->qi_usr_default; 185 - else if (XFS_QM_ISGDQ(dqp)) 186 - defq = &qi->qi_grp_default; 187 - else { 188 - ASSERT(XFS_QM_ISPDQ(dqp)); 189 - defq = &qi->qi_prj_default; 169 + switch (type) { 170 + case XFS_DQ_USER: 171 + return &qi->qi_usr_default; 172 + case XFS_DQ_GROUP: 173 + return &qi->qi_grp_default; 174 + case XFS_DQ_PROJ: 175 + return &qi->qi_prj_default; 176 + default: 177 + ASSERT(0); 178 + return NULL; 190 179 } 191 - return defq; 192 180 } 193 181 194 182 #endif /* __XFS_QM_H__ */

+46 -37

fs/xfs/xfs_qm_syscalls.c

··· 302 302 goto out_unlock; 303 303 } 304 304 305 - ASSERT(ip->i_d.di_nextents == 0); 305 + ASSERT(ip->i_df.if_nextents == 0); 306 306 307 307 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 308 308 error = xfs_trans_commit(tp); ··· 357 357 int error; 358 358 uint qf; 359 359 360 - flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); 361 360 /* 362 - * Switching on quota accounting must be done at mount time. 361 + * Switching on quota accounting must be done at mount time, 362 + * only consider quota enforcement stuff here. 363 363 */ 364 - flags &= ~(XFS_ALL_QUOTA_ACCT); 364 + flags &= XFS_ALL_QUOTA_ENFD; 365 365 366 366 if (flags == 0) { 367 367 xfs_debug(mp, "%s: zero flags, m_qflags=%x", ··· 479 479 goto out_unlock; 480 480 } 481 481 482 - defq = xfs_get_defquota(dqp, q); 482 + defq = xfs_get_defquota(q, xfs_dquot_type(dqp)); 483 483 xfs_dqunlock(dqp); 484 484 485 485 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_setqlim, 0, 0, 0, &tp); ··· 555 555 ddq->d_rtbwarns = cpu_to_be16(newlim->d_rt_spc_warns); 556 556 557 557 if (id == 0) { 558 - /* 559 - * Timelimits for the super user set the relative time 560 - * the other users can be over quota for this file system. 561 - * If it is zero a default is used. Ditto for the default 562 - * soft and hard limit values (already done, above), and 563 - * for warnings. 564 - */ 565 - if (newlim->d_fieldmask & QC_SPC_TIMER) { 566 - q->qi_btimelimit = newlim->d_spc_timer; 567 - ddq->d_btimer = cpu_to_be32(newlim->d_spc_timer); 568 - } 569 - if (newlim->d_fieldmask & QC_INO_TIMER) { 570 - q->qi_itimelimit = newlim->d_ino_timer; 571 - ddq->d_itimer = cpu_to_be32(newlim->d_ino_timer); 572 - } 573 - if (newlim->d_fieldmask & QC_RT_SPC_TIMER) { 574 - q->qi_rtbtimelimit = newlim->d_rt_spc_timer; 575 - ddq->d_rtbtimer = cpu_to_be32(newlim->d_rt_spc_timer); 576 - } 577 558 if (newlim->d_fieldmask & QC_SPC_WARNS) 578 - q->qi_bwarnlimit = newlim->d_spc_warns; 559 + defq->bwarnlimit = newlim->d_spc_warns; 579 560 if (newlim->d_fieldmask & QC_INO_WARNS) 580 - q->qi_iwarnlimit = newlim->d_ino_warns; 561 + defq->iwarnlimit = newlim->d_ino_warns; 581 562 if (newlim->d_fieldmask & QC_RT_SPC_WARNS) 582 - q->qi_rtbwarnlimit = newlim->d_rt_spc_warns; 583 - } else { 563 + defq->rtbwarnlimit = newlim->d_rt_spc_warns; 564 + } 565 + 566 + /* 567 + * Timelimits for the super user set the relative time the other users 568 + * can be over quota for this file system. If it is zero a default is 569 + * used. Ditto for the default soft and hard limit values (already 570 + * done, above), and for warnings. 571 + * 572 + * For other IDs, userspace can bump out the grace period if over 573 + * the soft limit. 574 + */ 575 + if (newlim->d_fieldmask & QC_SPC_TIMER) 576 + ddq->d_btimer = cpu_to_be32(newlim->d_spc_timer); 577 + if (newlim->d_fieldmask & QC_INO_TIMER) 578 + ddq->d_itimer = cpu_to_be32(newlim->d_ino_timer); 579 + if (newlim->d_fieldmask & QC_RT_SPC_TIMER) 580 + ddq->d_rtbtimer = cpu_to_be32(newlim->d_rt_spc_timer); 581 + 582 + if (id == 0) { 583 + if (newlim->d_fieldmask & QC_SPC_TIMER) 584 + defq->btimelimit = newlim->d_spc_timer; 585 + if (newlim->d_fieldmask & QC_INO_TIMER) 586 + defq->itimelimit = newlim->d_ino_timer; 587 + if (newlim->d_fieldmask & QC_RT_SPC_TIMER) 588 + defq->rtbtimelimit = newlim->d_rt_spc_timer; 589 + } 590 + 591 + if (id != 0) { 584 592 /* 585 593 * If the user is now over quota, start the timelimit. 586 594 * The user will not be 'warned'. ··· 596 588 * is on or off. We don't really want to bother with iterating 597 589 * over all ondisk dquots and turning the timers on/off. 598 590 */ 599 - xfs_qm_adjust_dqtimers(mp, ddq); 591 + xfs_qm_adjust_dqtimers(mp, dqp); 600 592 } 601 593 dqp->dq_flags |= XFS_DQ_DIRTY; 602 594 xfs_trans_log_dquot(tp, dqp); ··· 737 729 STATIC int 738 730 xfs_dqrele_inode( 739 731 struct xfs_inode *ip, 740 - int flags, 741 732 void *args) 742 733 { 734 + uint *flags = args; 735 + 743 736 /* skip quota inodes */ 744 737 if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || 745 738 ip == ip->i_mount->m_quotainfo->qi_gquotaip || ··· 752 743 } 753 744 754 745 xfs_ilock(ip, XFS_ILOCK_EXCL); 755 - if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { 746 + if ((*flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { 756 747 xfs_qm_dqrele(ip->i_udquot); 757 748 ip->i_udquot = NULL; 758 749 } 759 - if ((flags & XFS_GQUOTA_ACCT) && ip->i_gdquot) { 750 + if ((*flags & XFS_GQUOTA_ACCT) && ip->i_gdquot) { 760 751 xfs_qm_dqrele(ip->i_gdquot); 761 752 ip->i_gdquot = NULL; 762 753 } 763 - if ((flags & XFS_PQUOTA_ACCT) && ip->i_pdquot) { 754 + if ((*flags & XFS_PQUOTA_ACCT) && ip->i_pdquot) { 764 755 xfs_qm_dqrele(ip->i_pdquot); 765 756 ip->i_pdquot = NULL; 766 757 } ··· 777 768 */ 778 769 void 779 770 xfs_qm_dqrele_all_inodes( 780 - struct xfs_mount *mp, 781 - uint flags) 771 + struct xfs_mount *mp, 772 + uint flags) 782 773 { 783 774 ASSERT(mp->m_quotainfo); 784 - xfs_inode_ag_iterator_flags(mp, xfs_dqrele_inode, flags, NULL, 785 - XFS_AGITER_INEW_WAIT); 775 + xfs_inode_walk(mp, XFS_INODE_WALK_INEW_WAIT, xfs_dqrele_inode, 776 + &flags, XFS_ICI_NO_TAG); 786 777 }

+15 -15

fs/xfs/xfs_quotaops.c

··· 21 21 struct qc_type_state *tstate, 22 22 struct xfs_mount *mp, 23 23 struct xfs_inode *ip, 24 - xfs_ino_t ino) 24 + xfs_ino_t ino, 25 + struct xfs_def_quota *defq) 25 26 { 26 - struct xfs_quotainfo *q = mp->m_quotainfo; 27 - bool tempqip = false; 27 + bool tempqip = false; 28 28 29 29 tstate->ino = ino; 30 30 if (!ip && ino == NULLFSINO) ··· 36 36 } 37 37 tstate->flags |= QCI_SYSFILE; 38 38 tstate->blocks = ip->i_d.di_nblocks; 39 - tstate->nextents = ip->i_d.di_nextents; 40 - tstate->spc_timelimit = (u32)q->qi_btimelimit; 41 - tstate->ino_timelimit = (u32)q->qi_itimelimit; 42 - tstate->rt_spc_timelimit = (u32)q->qi_rtbtimelimit; 43 - tstate->spc_warnlimit = q->qi_bwarnlimit; 44 - tstate->ino_warnlimit = q->qi_iwarnlimit; 45 - tstate->rt_spc_warnlimit = q->qi_rtbwarnlimit; 39 + tstate->nextents = ip->i_df.if_nextents; 40 + tstate->spc_timelimit = (u32)defq->btimelimit; 41 + tstate->ino_timelimit = (u32)defq->itimelimit; 42 + tstate->rt_spc_timelimit = (u32)defq->rtbtimelimit; 43 + tstate->spc_warnlimit = defq->bwarnlimit; 44 + tstate->ino_warnlimit = defq->iwarnlimit; 45 + tstate->rt_spc_warnlimit = defq->rtbwarnlimit; 46 46 if (tempqip) 47 47 xfs_irele(ip); 48 48 } ··· 77 77 state->s_state[PRJQUOTA].flags |= QCI_LIMITS_ENFORCED; 78 78 79 79 xfs_qm_fill_state(&state->s_state[USRQUOTA], mp, q->qi_uquotaip, 80 - mp->m_sb.sb_uquotino); 80 + mp->m_sb.sb_uquotino, &q->qi_usr_default); 81 81 xfs_qm_fill_state(&state->s_state[GRPQUOTA], mp, q->qi_gquotaip, 82 - mp->m_sb.sb_gquotino); 82 + mp->m_sb.sb_gquotino, &q->qi_grp_default); 83 83 xfs_qm_fill_state(&state->s_state[PRJQUOTA], mp, q->qi_pquotaip, 84 - mp->m_sb.sb_pquotino); 84 + mp->m_sb.sb_pquotino, &q->qi_prj_default); 85 85 return 0; 86 86 } 87 87 ··· 109 109 int type, 110 110 struct qc_info *info) 111 111 { 112 - struct xfs_mount *mp = XFS_M(sb); 113 - struct qc_dqblk newlim; 112 + struct xfs_mount *mp = XFS_M(sb); 113 + struct qc_dqblk newlim; 114 114 115 115 if (sb_rdonly(sb)) 116 116 return -EROFS;

+170 -82

fs/xfs/xfs_refcount_item.c

··· 18 18 #include "xfs_log.h" 19 19 #include "xfs_refcount.h" 20 20 #include "xfs_error.h" 21 + #include "xfs_log_priv.h" 22 + #include "xfs_log_recover.h" 21 23 22 24 kmem_zone_t *xfs_cui_zone; 23 25 kmem_zone_t *xfs_cud_zone; 26 + 27 + static const struct xfs_item_ops xfs_cui_item_ops; 24 28 25 29 static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip) 26 30 { 27 31 return container_of(lip, struct xfs_cui_log_item, cui_item); 28 32 } 29 33 30 - void 34 + STATIC void 31 35 xfs_cui_item_free( 32 36 struct xfs_cui_log_item *cuip) 33 37 { ··· 48 44 * committed vs unpin operations in bulk insert operations. Hence the reference 49 45 * count to ensure only the last caller frees the CUI. 50 46 */ 51 - void 47 + STATIC void 52 48 xfs_cui_release( 53 49 struct xfs_cui_log_item *cuip) 54 50 { 55 51 ASSERT(atomic_read(&cuip->cui_refcount) > 0); 56 52 if (atomic_dec_and_test(&cuip->cui_refcount)) { 57 - xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR); 53 + xfs_trans_ail_delete(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR); 58 54 xfs_cui_item_free(cuip); 59 55 } 60 56 } ··· 127 123 xfs_cui_release(CUI_ITEM(lip)); 128 124 } 129 125 130 - static const struct xfs_item_ops xfs_cui_item_ops = { 131 - .iop_size = xfs_cui_item_size, 132 - .iop_format = xfs_cui_item_format, 133 - .iop_unpin = xfs_cui_item_unpin, 134 - .iop_release = xfs_cui_item_release, 135 - }; 136 - 137 126 /* 138 127 * Allocate and initialize an cui item with the given number of extents. 139 128 */ 140 - struct xfs_cui_log_item * 129 + STATIC struct xfs_cui_log_item * 141 130 xfs_cui_init( 142 131 struct xfs_mount *mp, 143 132 uint nextents) ··· 281 284 XFS_FSB_TO_AGNO(mp, rb->ri_startblock); 282 285 } 283 286 284 - /* Get an CUI. */ 285 - STATIC void * 286 - xfs_refcount_update_create_intent( 287 - struct xfs_trans *tp, 288 - unsigned int count) 289 - { 290 - struct xfs_cui_log_item *cuip; 291 - 292 - ASSERT(tp != NULL); 293 - ASSERT(count > 0); 294 - 295 - cuip = xfs_cui_init(tp->t_mountp, count); 296 - ASSERT(cuip != NULL); 297 - 298 - /* 299 - * Get a log_item_desc to point at the new item. 300 - */ 301 - xfs_trans_add_item(tp, &cuip->cui_item); 302 - return cuip; 303 - } 304 - 305 287 /* Set the phys extent flags for this reverse mapping. */ 306 288 static void 307 289 xfs_trans_set_refcount_flags( ··· 304 328 STATIC void 305 329 xfs_refcount_update_log_item( 306 330 struct xfs_trans *tp, 307 - void *intent, 308 - struct list_head *item) 331 + struct xfs_cui_log_item *cuip, 332 + struct xfs_refcount_intent *refc) 309 333 { 310 - struct xfs_cui_log_item *cuip = intent; 311 - struct xfs_refcount_intent *refc; 312 334 uint next_extent; 313 335 struct xfs_phys_extent *ext; 314 - 315 - refc = container_of(item, struct xfs_refcount_intent, ri_list); 316 336 317 337 tp->t_flags |= XFS_TRANS_DIRTY; 318 338 set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags); ··· 326 354 xfs_trans_set_refcount_flags(ext, refc->ri_type); 327 355 } 328 356 357 + static struct xfs_log_item * 358 + xfs_refcount_update_create_intent( 359 + struct xfs_trans *tp, 360 + struct list_head *items, 361 + unsigned int count, 362 + bool sort) 363 + { 364 + struct xfs_mount *mp = tp->t_mountp; 365 + struct xfs_cui_log_item *cuip = xfs_cui_init(mp, count); 366 + struct xfs_refcount_intent *refc; 367 + 368 + ASSERT(count > 0); 369 + 370 + xfs_trans_add_item(tp, &cuip->cui_item); 371 + if (sort) 372 + list_sort(mp, items, xfs_refcount_update_diff_items); 373 + list_for_each_entry(refc, items, ri_list) 374 + xfs_refcount_update_log_item(tp, cuip, refc); 375 + return &cuip->cui_item; 376 + } 377 + 329 378 /* Get an CUD so we can process all the deferred refcount updates. */ 330 - STATIC void * 379 + static struct xfs_log_item * 331 380 xfs_refcount_update_create_done( 332 381 struct xfs_trans *tp, 333 - void *intent, 382 + struct xfs_log_item *intent, 334 383 unsigned int count) 335 384 { 336 - return xfs_trans_get_cud(tp, intent); 385 + return &xfs_trans_get_cud(tp, CUI_ITEM(intent))->cud_item; 337 386 } 338 387 339 388 /* Process a deferred refcount update. */ 340 389 STATIC int 341 390 xfs_refcount_update_finish_item( 342 391 struct xfs_trans *tp, 392 + struct xfs_log_item *done, 343 393 struct list_head *item, 344 - void *done_item, 345 - void **state) 394 + struct xfs_btree_cur **state) 346 395 { 347 396 struct xfs_refcount_intent *refc; 348 397 xfs_fsblock_t new_fsb; ··· 371 378 int error; 372 379 373 380 refc = container_of(item, struct xfs_refcount_intent, ri_list); 374 - error = xfs_trans_log_finish_refcount_update(tp, done_item, 375 - refc->ri_type, 376 - refc->ri_startblock, 377 - refc->ri_blockcount, 378 - &new_fsb, &new_aglen, 379 - (struct xfs_btree_cur **)state); 381 + error = xfs_trans_log_finish_refcount_update(tp, CUD_ITEM(done), 382 + refc->ri_type, refc->ri_startblock, refc->ri_blockcount, 383 + &new_fsb, &new_aglen, state); 384 + 380 385 /* Did we run out of reservation? Requeue what we didn't finish. */ 381 386 if (!error && new_aglen > 0) { 382 387 ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE || ··· 387 396 return error; 388 397 } 389 398 390 - /* Clean up after processing deferred refcounts. */ 391 - STATIC void 392 - xfs_refcount_update_finish_cleanup( 393 - struct xfs_trans *tp, 394 - void *state, 395 - int error) 396 - { 397 - struct xfs_btree_cur *rcur = state; 398 - 399 - xfs_refcount_finish_one_cleanup(tp, rcur, error); 400 - } 401 - 402 399 /* Abort all pending CUIs. */ 403 400 STATIC void 404 401 xfs_refcount_update_abort_intent( 405 - void *intent) 402 + struct xfs_log_item *intent) 406 403 { 407 - xfs_cui_release(intent); 404 + xfs_cui_release(CUI_ITEM(intent)); 408 405 } 409 406 410 407 /* Cancel a deferred refcount update. */ ··· 408 429 409 430 const struct xfs_defer_op_type xfs_refcount_update_defer_type = { 410 431 .max_items = XFS_CUI_MAX_FAST_EXTENTS, 411 - .diff_items = xfs_refcount_update_diff_items, 412 432 .create_intent = xfs_refcount_update_create_intent, 413 433 .abort_intent = xfs_refcount_update_abort_intent, 414 - .log_item = xfs_refcount_update_log_item, 415 434 .create_done = xfs_refcount_update_create_done, 416 435 .finish_item = xfs_refcount_update_finish_item, 417 - .finish_cleanup = xfs_refcount_update_finish_cleanup, 436 + .finish_cleanup = xfs_refcount_finish_one_cleanup, 418 437 .cancel_item = xfs_refcount_update_cancel_item, 419 438 }; 420 439 ··· 420 443 * Process a refcount update intent item that was recovered from the log. 421 444 * We need to update the refcountbt. 422 445 */ 423 - int 424 - xfs_cui_recover( 425 - struct xfs_trans *parent_tp, 426 - struct xfs_cui_log_item *cuip) 446 + STATIC int 447 + xfs_cui_item_recover( 448 + struct xfs_log_item *lip, 449 + struct xfs_trans *parent_tp) 427 450 { 428 - int i; 429 - int error = 0; 430 - unsigned int refc_type; 451 + struct xfs_bmbt_irec irec; 452 + struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 431 453 struct xfs_phys_extent *refc; 432 - xfs_fsblock_t startblock_fsb; 433 - bool op_ok; 434 454 struct xfs_cud_log_item *cudp; 435 455 struct xfs_trans *tp; 436 456 struct xfs_btree_cur *rcur = NULL; 437 - enum xfs_refcount_intent_type type; 457 + struct xfs_mount *mp = parent_tp->t_mountp; 458 + xfs_fsblock_t startblock_fsb; 438 459 xfs_fsblock_t new_fsb; 439 460 xfs_extlen_t new_len; 440 - struct xfs_bmbt_irec irec; 461 + unsigned int refc_type; 462 + bool op_ok; 441 463 bool requeue_only = false; 442 - struct xfs_mount *mp = parent_tp->t_mountp; 443 - 444 - ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)); 464 + enum xfs_refcount_intent_type type; 465 + int i; 466 + int error = 0; 445 467 446 468 /* 447 469 * First check the validity of the extents described by the ··· 471 495 * This will pull the CUI from the AIL and 472 496 * free the memory associated with it. 473 497 */ 474 - set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); 475 498 xfs_cui_release(cuip); 476 499 return -EFSCORRUPTED; 477 500 } ··· 554 579 } 555 580 556 581 xfs_refcount_finish_one_cleanup(tp, rcur, error); 557 - set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); 558 582 xfs_defer_move(parent_tp, tp); 559 583 error = xfs_trans_commit(tp); 560 584 return error; ··· 564 590 xfs_trans_cancel(tp); 565 591 return error; 566 592 } 593 + 594 + STATIC bool 595 + xfs_cui_item_match( 596 + struct xfs_log_item *lip, 597 + uint64_t intent_id) 598 + { 599 + return CUI_ITEM(lip)->cui_format.cui_id == intent_id; 600 + } 601 + 602 + static const struct xfs_item_ops xfs_cui_item_ops = { 603 + .iop_size = xfs_cui_item_size, 604 + .iop_format = xfs_cui_item_format, 605 + .iop_unpin = xfs_cui_item_unpin, 606 + .iop_release = xfs_cui_item_release, 607 + .iop_recover = xfs_cui_item_recover, 608 + .iop_match = xfs_cui_item_match, 609 + }; 610 + 611 + /* 612 + * Copy an CUI format buffer from the given buf, and into the destination 613 + * CUI format structure. The CUI/CUD items were designed not to need any 614 + * special alignment handling. 615 + */ 616 + static int 617 + xfs_cui_copy_format( 618 + struct xfs_log_iovec *buf, 619 + struct xfs_cui_log_format *dst_cui_fmt) 620 + { 621 + struct xfs_cui_log_format *src_cui_fmt; 622 + uint len; 623 + 624 + src_cui_fmt = buf->i_addr; 625 + len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents); 626 + 627 + if (buf->i_len == len) { 628 + memcpy(dst_cui_fmt, src_cui_fmt, len); 629 + return 0; 630 + } 631 + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); 632 + return -EFSCORRUPTED; 633 + } 634 + 635 + /* 636 + * This routine is called to create an in-core extent refcount update 637 + * item from the cui format structure which was logged on disk. 638 + * It allocates an in-core cui, copies the extents from the format 639 + * structure into it, and adds the cui to the AIL with the given 640 + * LSN. 641 + */ 642 + STATIC int 643 + xlog_recover_cui_commit_pass2( 644 + struct xlog *log, 645 + struct list_head *buffer_list, 646 + struct xlog_recover_item *item, 647 + xfs_lsn_t lsn) 648 + { 649 + int error; 650 + struct xfs_mount *mp = log->l_mp; 651 + struct xfs_cui_log_item *cuip; 652 + struct xfs_cui_log_format *cui_formatp; 653 + 654 + cui_formatp = item->ri_buf[0].i_addr; 655 + 656 + cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); 657 + error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format); 658 + if (error) { 659 + xfs_cui_item_free(cuip); 660 + return error; 661 + } 662 + atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents); 663 + /* 664 + * Insert the intent into the AIL directly and drop one reference so 665 + * that finishing or canceling the work will drop the other. 666 + */ 667 + xfs_trans_ail_insert(log->l_ailp, &cuip->cui_item, lsn); 668 + xfs_cui_release(cuip); 669 + return 0; 670 + } 671 + 672 + const struct xlog_recover_item_ops xlog_cui_item_ops = { 673 + .item_type = XFS_LI_CUI, 674 + .commit_pass2 = xlog_recover_cui_commit_pass2, 675 + }; 676 + 677 + /* 678 + * This routine is called when an CUD format structure is found in a committed 679 + * transaction in the log. Its purpose is to cancel the corresponding CUI if it 680 + * was still in the log. To do this it searches the AIL for the CUI with an id 681 + * equal to that in the CUD format structure. If we find it we drop the CUD 682 + * reference, which removes the CUI from the AIL and frees it. 683 + */ 684 + STATIC int 685 + xlog_recover_cud_commit_pass2( 686 + struct xlog *log, 687 + struct list_head *buffer_list, 688 + struct xlog_recover_item *item, 689 + xfs_lsn_t lsn) 690 + { 691 + struct xfs_cud_log_format *cud_formatp; 692 + 693 + cud_formatp = item->ri_buf[0].i_addr; 694 + if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) { 695 + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); 696 + return -EFSCORRUPTED; 697 + } 698 + 699 + xlog_recover_release_intent(log, XFS_LI_CUI, cud_formatp->cud_cui_id); 700 + return 0; 701 + } 702 + 703 + const struct xlog_recover_item_ops xlog_cud_item_ops = { 704 + .item_type = XFS_LI_CUD, 705 + .commit_pass2 = xlog_recover_cud_commit_pass2, 706 + };

-11

fs/xfs/xfs_refcount_item.h

··· 33 33 #define XFS_CUI_MAX_FAST_EXTENTS 16 34 34 35 35 /* 36 - * Define CUI flag bits. Manipulated by set/clear/test_bit operators. 37 - */ 38 - #define XFS_CUI_RECOVERED 1 39 - 40 - /* 41 36 * This is the "refcount update intent" log item. It is used to log 42 37 * the fact that some reverse mappings need to change. It is used in 43 38 * conjunction with the "refcount update done" log item described ··· 46 51 struct xfs_log_item cui_item; 47 52 atomic_t cui_refcount; 48 53 atomic_t cui_next_extent; 49 - unsigned long cui_flags; /* misc flags */ 50 54 struct xfs_cui_log_format cui_format; 51 55 }; 52 56 ··· 70 76 71 77 extern struct kmem_zone *xfs_cui_zone; 72 78 extern struct kmem_zone *xfs_cud_zone; 73 - 74 - struct xfs_cui_log_item *xfs_cui_init(struct xfs_mount *, uint); 75 - void xfs_cui_item_free(struct xfs_cui_log_item *); 76 - void xfs_cui_release(struct xfs_cui_log_item *); 77 - int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip); 78 79 79 80 #endif /* __XFS_REFCOUNT_ITEM_H__ */

+145 -84

fs/xfs/xfs_rmap_item.c

··· 18 18 #include "xfs_log.h" 19 19 #include "xfs_rmap.h" 20 20 #include "xfs_error.h" 21 + #include "xfs_log_priv.h" 22 + #include "xfs_log_recover.h" 21 23 22 24 kmem_zone_t *xfs_rui_zone; 23 25 kmem_zone_t *xfs_rud_zone; 26 + 27 + static const struct xfs_item_ops xfs_rui_item_ops; 24 28 25 29 static inline struct xfs_rui_log_item *RUI_ITEM(struct xfs_log_item *lip) 26 30 { 27 31 return container_of(lip, struct xfs_rui_log_item, rui_item); 28 32 } 29 33 30 - void 34 + STATIC void 31 35 xfs_rui_item_free( 32 36 struct xfs_rui_log_item *ruip) 33 37 { ··· 48 44 * committed vs unpin operations in bulk insert operations. Hence the reference 49 45 * count to ensure only the last caller frees the RUI. 50 46 */ 51 - void 47 + STATIC void 52 48 xfs_rui_release( 53 49 struct xfs_rui_log_item *ruip) 54 50 { 55 51 ASSERT(atomic_read(&ruip->rui_refcount) > 0); 56 52 if (atomic_dec_and_test(&ruip->rui_refcount)) { 57 - xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR); 53 + xfs_trans_ail_delete(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR); 58 54 xfs_rui_item_free(ruip); 59 55 } 60 56 } ··· 126 122 xfs_rui_release(RUI_ITEM(lip)); 127 123 } 128 124 129 - static const struct xfs_item_ops xfs_rui_item_ops = { 130 - .iop_size = xfs_rui_item_size, 131 - .iop_format = xfs_rui_item_format, 132 - .iop_unpin = xfs_rui_item_unpin, 133 - .iop_release = xfs_rui_item_release, 134 - }; 135 - 136 125 /* 137 126 * Allocate and initialize an rui item with the given number of extents. 138 127 */ 139 - struct xfs_rui_log_item * 128 + STATIC struct xfs_rui_log_item * 140 129 xfs_rui_init( 141 130 struct xfs_mount *mp, 142 131 uint nextents) ··· 157 160 * RUI format structure. The RUI/RUD items were designed not to need any 158 161 * special alignment handling. 159 162 */ 160 - int 163 + STATIC int 161 164 xfs_rui_copy_format( 162 165 struct xfs_log_iovec *buf, 163 166 struct xfs_rui_log_format *dst_rui_fmt) ··· 349 352 XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock); 350 353 } 351 354 352 - /* Get an RUI. */ 353 - STATIC void * 354 - xfs_rmap_update_create_intent( 355 - struct xfs_trans *tp, 356 - unsigned int count) 357 - { 358 - struct xfs_rui_log_item *ruip; 359 - 360 - ASSERT(tp != NULL); 361 - ASSERT(count > 0); 362 - 363 - ruip = xfs_rui_init(tp->t_mountp, count); 364 - ASSERT(ruip != NULL); 365 - 366 - /* 367 - * Get a log_item_desc to point at the new item. 368 - */ 369 - xfs_trans_add_item(tp, &ruip->rui_item); 370 - return ruip; 371 - } 372 - 373 355 /* Log rmap updates in the intent item. */ 374 356 STATIC void 375 357 xfs_rmap_update_log_item( 376 358 struct xfs_trans *tp, 377 - void *intent, 378 - struct list_head *item) 359 + struct xfs_rui_log_item *ruip, 360 + struct xfs_rmap_intent *rmap) 379 361 { 380 - struct xfs_rui_log_item *ruip = intent; 381 - struct xfs_rmap_intent *rmap; 382 362 uint next_extent; 383 363 struct xfs_map_extent *map; 384 - 385 - rmap = container_of(item, struct xfs_rmap_intent, ri_list); 386 364 387 365 tp->t_flags |= XFS_TRANS_DIRTY; 388 366 set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags); ··· 378 406 rmap->ri_bmap.br_state); 379 407 } 380 408 409 + static struct xfs_log_item * 410 + xfs_rmap_update_create_intent( 411 + struct xfs_trans *tp, 412 + struct list_head *items, 413 + unsigned int count, 414 + bool sort) 415 + { 416 + struct xfs_mount *mp = tp->t_mountp; 417 + struct xfs_rui_log_item *ruip = xfs_rui_init(mp, count); 418 + struct xfs_rmap_intent *rmap; 419 + 420 + ASSERT(count > 0); 421 + 422 + xfs_trans_add_item(tp, &ruip->rui_item); 423 + if (sort) 424 + list_sort(mp, items, xfs_rmap_update_diff_items); 425 + list_for_each_entry(rmap, items, ri_list) 426 + xfs_rmap_update_log_item(tp, ruip, rmap); 427 + return &ruip->rui_item; 428 + } 429 + 381 430 /* Get an RUD so we can process all the deferred rmap updates. */ 382 - STATIC void * 431 + static struct xfs_log_item * 383 432 xfs_rmap_update_create_done( 384 433 struct xfs_trans *tp, 385 - void *intent, 434 + struct xfs_log_item *intent, 386 435 unsigned int count) 387 436 { 388 - return xfs_trans_get_rud(tp, intent); 437 + return &xfs_trans_get_rud(tp, RUI_ITEM(intent))->rud_item; 389 438 } 390 439 391 440 /* Process a deferred rmap update. */ 392 441 STATIC int 393 442 xfs_rmap_update_finish_item( 394 443 struct xfs_trans *tp, 444 + struct xfs_log_item *done, 395 445 struct list_head *item, 396 - void *done_item, 397 - void **state) 446 + struct xfs_btree_cur **state) 398 447 { 399 448 struct xfs_rmap_intent *rmap; 400 449 int error; 401 450 402 451 rmap = container_of(item, struct xfs_rmap_intent, ri_list); 403 - error = xfs_trans_log_finish_rmap_update(tp, done_item, 404 - rmap->ri_type, 405 - rmap->ri_owner, rmap->ri_whichfork, 406 - rmap->ri_bmap.br_startoff, 407 - rmap->ri_bmap.br_startblock, 408 - rmap->ri_bmap.br_blockcount, 409 - rmap->ri_bmap.br_state, 410 - (struct xfs_btree_cur **)state); 452 + error = xfs_trans_log_finish_rmap_update(tp, RUD_ITEM(done), 453 + rmap->ri_type, rmap->ri_owner, rmap->ri_whichfork, 454 + rmap->ri_bmap.br_startoff, rmap->ri_bmap.br_startblock, 455 + rmap->ri_bmap.br_blockcount, rmap->ri_bmap.br_state, 456 + state); 411 457 kmem_free(rmap); 412 458 return error; 413 - } 414 - 415 - /* Clean up after processing deferred rmaps. */ 416 - STATIC void 417 - xfs_rmap_update_finish_cleanup( 418 - struct xfs_trans *tp, 419 - void *state, 420 - int error) 421 - { 422 - struct xfs_btree_cur *rcur = state; 423 - 424 - xfs_rmap_finish_one_cleanup(tp, rcur, error); 425 459 } 426 460 427 461 /* Abort all pending RUIs. */ 428 462 STATIC void 429 463 xfs_rmap_update_abort_intent( 430 - void *intent) 464 + struct xfs_log_item *intent) 431 465 { 432 - xfs_rui_release(intent); 466 + xfs_rui_release(RUI_ITEM(intent)); 433 467 } 434 468 435 469 /* Cancel a deferred rmap update. */ ··· 451 473 452 474 const struct xfs_defer_op_type xfs_rmap_update_defer_type = { 453 475 .max_items = XFS_RUI_MAX_FAST_EXTENTS, 454 - .diff_items = xfs_rmap_update_diff_items, 455 476 .create_intent = xfs_rmap_update_create_intent, 456 477 .abort_intent = xfs_rmap_update_abort_intent, 457 - .log_item = xfs_rmap_update_log_item, 458 478 .create_done = xfs_rmap_update_create_done, 459 479 .finish_item = xfs_rmap_update_finish_item, 460 - .finish_cleanup = xfs_rmap_update_finish_cleanup, 480 + .finish_cleanup = xfs_rmap_finish_one_cleanup, 461 481 .cancel_item = xfs_rmap_update_cancel_item, 462 482 }; 463 483 ··· 463 487 * Process an rmap update intent item that was recovered from the log. 464 488 * We need to update the rmapbt. 465 489 */ 466 - int 467 - xfs_rui_recover( 468 - struct xfs_mount *mp, 469 - struct xfs_rui_log_item *ruip) 490 + STATIC int 491 + xfs_rui_item_recover( 492 + struct xfs_log_item *lip, 493 + struct xfs_trans *parent_tp) 470 494 { 471 - int i; 472 - int error = 0; 495 + struct xfs_rui_log_item *ruip = RUI_ITEM(lip); 473 496 struct xfs_map_extent *rmap; 474 - xfs_fsblock_t startblock_fsb; 475 - bool op_ok; 476 497 struct xfs_rud_log_item *rudp; 477 - enum xfs_rmap_intent_type type; 478 - int whichfork; 479 - xfs_exntst_t state; 480 498 struct xfs_trans *tp; 481 499 struct xfs_btree_cur *rcur = NULL; 482 - 483 - ASSERT(!test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags)); 500 + struct xfs_mount *mp = parent_tp->t_mountp; 501 + xfs_fsblock_t startblock_fsb; 502 + enum xfs_rmap_intent_type type; 503 + xfs_exntst_t state; 504 + bool op_ok; 505 + int i; 506 + int whichfork; 507 + int error = 0; 484 508 485 509 /* 486 510 * First check the validity of the extents described by the ··· 515 539 * This will pull the RUI from the AIL and 516 540 * free the memory associated with it. 517 541 */ 518 - set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags); 519 542 xfs_rui_release(ruip); 520 543 return -EFSCORRUPTED; 521 544 } ··· 572 597 } 573 598 574 599 xfs_rmap_finish_one_cleanup(tp, rcur, error); 575 - set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags); 576 600 error = xfs_trans_commit(tp); 577 601 return error; 578 602 ··· 580 606 xfs_trans_cancel(tp); 581 607 return error; 582 608 } 609 + 610 + STATIC bool 611 + xfs_rui_item_match( 612 + struct xfs_log_item *lip, 613 + uint64_t intent_id) 614 + { 615 + return RUI_ITEM(lip)->rui_format.rui_id == intent_id; 616 + } 617 + 618 + static const struct xfs_item_ops xfs_rui_item_ops = { 619 + .iop_size = xfs_rui_item_size, 620 + .iop_format = xfs_rui_item_format, 621 + .iop_unpin = xfs_rui_item_unpin, 622 + .iop_release = xfs_rui_item_release, 623 + .iop_recover = xfs_rui_item_recover, 624 + .iop_match = xfs_rui_item_match, 625 + }; 626 + 627 + /* 628 + * This routine is called to create an in-core extent rmap update 629 + * item from the rui format structure which was logged on disk. 630 + * It allocates an in-core rui, copies the extents from the format 631 + * structure into it, and adds the rui to the AIL with the given 632 + * LSN. 633 + */ 634 + STATIC int 635 + xlog_recover_rui_commit_pass2( 636 + struct xlog *log, 637 + struct list_head *buffer_list, 638 + struct xlog_recover_item *item, 639 + xfs_lsn_t lsn) 640 + { 641 + int error; 642 + struct xfs_mount *mp = log->l_mp; 643 + struct xfs_rui_log_item *ruip; 644 + struct xfs_rui_log_format *rui_formatp; 645 + 646 + rui_formatp = item->ri_buf[0].i_addr; 647 + 648 + ruip = xfs_rui_init(mp, rui_formatp->rui_nextents); 649 + error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format); 650 + if (error) { 651 + xfs_rui_item_free(ruip); 652 + return error; 653 + } 654 + atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents); 655 + /* 656 + * Insert the intent into the AIL directly and drop one reference so 657 + * that finishing or canceling the work will drop the other. 658 + */ 659 + xfs_trans_ail_insert(log->l_ailp, &ruip->rui_item, lsn); 660 + xfs_rui_release(ruip); 661 + return 0; 662 + } 663 + 664 + const struct xlog_recover_item_ops xlog_rui_item_ops = { 665 + .item_type = XFS_LI_RUI, 666 + .commit_pass2 = xlog_recover_rui_commit_pass2, 667 + }; 668 + 669 + /* 670 + * This routine is called when an RUD format structure is found in a committed 671 + * transaction in the log. Its purpose is to cancel the corresponding RUI if it 672 + * was still in the log. To do this it searches the AIL for the RUI with an id 673 + * equal to that in the RUD format structure. If we find it we drop the RUD 674 + * reference, which removes the RUI from the AIL and frees it. 675 + */ 676 + STATIC int 677 + xlog_recover_rud_commit_pass2( 678 + struct xlog *log, 679 + struct list_head *buffer_list, 680 + struct xlog_recover_item *item, 681 + xfs_lsn_t lsn) 682 + { 683 + struct xfs_rud_log_format *rud_formatp; 684 + 685 + rud_formatp = item->ri_buf[0].i_addr; 686 + ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format)); 687 + 688 + xlog_recover_release_intent(log, XFS_LI_RUI, rud_formatp->rud_rui_id); 689 + return 0; 690 + } 691 + 692 + const struct xlog_recover_item_ops xlog_rud_item_ops = { 693 + .item_type = XFS_LI_RUD, 694 + .commit_pass2 = xlog_recover_rud_commit_pass2, 695 + };

-13

fs/xfs/xfs_rmap_item.h

··· 36 36 #define XFS_RUI_MAX_FAST_EXTENTS 16 37 37 38 38 /* 39 - * Define RUI flag bits. Manipulated by set/clear/test_bit operators. 40 - */ 41 - #define XFS_RUI_RECOVERED 1 42 - 43 - /* 44 39 * This is the "rmap update intent" log item. It is used to log the fact that 45 40 * some reverse mappings need to change. It is used in conjunction with the 46 41 * "rmap update done" log item described below. ··· 47 52 struct xfs_log_item rui_item; 48 53 atomic_t rui_refcount; 49 54 atomic_t rui_next_extent; 50 - unsigned long rui_flags; /* misc flags */ 51 55 struct xfs_rui_log_format rui_format; 52 56 }; 53 57 ··· 70 76 71 77 extern struct kmem_zone *xfs_rui_zone; 72 78 extern struct kmem_zone *xfs_rud_zone; 73 - 74 - struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint); 75 - int xfs_rui_copy_format(struct xfs_log_iovec *buf, 76 - struct xfs_rui_log_format *dst_rui_fmt); 77 - void xfs_rui_item_free(struct xfs_rui_log_item *); 78 - void xfs_rui_release(struct xfs_rui_log_item *); 79 - int xfs_rui_recover(struct xfs_mount *mp, struct xfs_rui_log_item *ruip); 80 79 81 80 #endif /* __XFS_RMAP_ITEM_H__ */

+50 -18

fs/xfs/xfs_super.c

··· 47 47 static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ 48 48 #endif 49 49 50 + enum xfs_dax_mode { 51 + XFS_DAX_INODE = 0, 52 + XFS_DAX_ALWAYS = 1, 53 + XFS_DAX_NEVER = 2, 54 + }; 55 + 56 + static void 57 + xfs_mount_set_dax_mode( 58 + struct xfs_mount *mp, 59 + enum xfs_dax_mode mode) 60 + { 61 + switch (mode) { 62 + case XFS_DAX_INODE: 63 + mp->m_flags &= ~(XFS_MOUNT_DAX_ALWAYS | XFS_MOUNT_DAX_NEVER); 64 + break; 65 + case XFS_DAX_ALWAYS: 66 + mp->m_flags |= XFS_MOUNT_DAX_ALWAYS; 67 + mp->m_flags &= ~XFS_MOUNT_DAX_NEVER; 68 + break; 69 + case XFS_DAX_NEVER: 70 + mp->m_flags |= XFS_MOUNT_DAX_NEVER; 71 + mp->m_flags &= ~XFS_MOUNT_DAX_ALWAYS; 72 + break; 73 + } 74 + } 75 + 76 + static const struct constant_table dax_param_enums[] = { 77 + {"inode", XFS_DAX_INODE }, 78 + {"always", XFS_DAX_ALWAYS }, 79 + {"never", XFS_DAX_NEVER }, 80 + {} 81 + }; 82 + 50 83 /* 51 84 * Table driven mount option parser. 52 85 */ ··· 92 59 Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, 93 60 Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, 94 61 Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, 95 - Opt_discard, Opt_nodiscard, Opt_dax, 62 + Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, 96 63 }; 97 64 98 65 static const struct fs_parameter_spec xfs_fs_parameters[] = { ··· 136 103 fsparam_flag("discard", Opt_discard), 137 104 fsparam_flag("nodiscard", Opt_nodiscard), 138 105 fsparam_flag("dax", Opt_dax), 106 + fsparam_enum("dax", Opt_dax_enum, dax_param_enums), 139 107 {} 140 108 }; 141 109 ··· 163 129 { XFS_MOUNT_GRPID, ",grpid" }, 164 130 { XFS_MOUNT_DISCARD, ",discard" }, 165 131 { XFS_MOUNT_LARGEIO, ",largeio" }, 166 - { XFS_MOUNT_DAX, ",dax" }, 132 + { XFS_MOUNT_DAX_ALWAYS, ",dax=always" }, 133 + { XFS_MOUNT_DAX_NEVER, ",dax=never" }, 167 134 { 0, NULL } 168 135 }; 169 136 struct xfs_mount *mp = XFS_M(root->d_sb); ··· 807 772 statp->f_blocks = sbp->sb_dblocks - lsize; 808 773 spin_unlock(&mp->m_sb_lock); 809 774 810 - statp->f_bfree = fdblocks - mp->m_alloc_set_aside; 775 + /* make sure statp->f_bfree does not underflow */ 776 + statp->f_bfree = max_t(int64_t, fdblocks - mp->m_alloc_set_aside, 0); 811 777 statp->f_bavail = statp->f_bfree; 812 778 813 779 fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree); ··· 874 838 * there is no log replay required to write the inodes to disk - this is the 875 839 * primary difference between a sync and a quiesce. 876 840 * 877 - * Note: xfs_log_quiesce() stops background log work - the callers must ensure 878 - * it is started again when appropriate. 841 + * We cancel log work early here to ensure all transactions the log worker may 842 + * run have finished before we clean up and log the superblock and write an 843 + * unmount record. The unfreeze process is responsible for restarting the log 844 + * worker correctly. 879 845 */ 880 846 void 881 847 xfs_quiesce_attr( ··· 885 847 { 886 848 int error = 0; 887 849 888 - /* wait for all modifications to complete */ 889 - while (atomic_read(&mp->m_active_trans) > 0) 890 - delay(100); 850 + cancel_delayed_work_sync(&mp->m_log->l_work); 891 851 892 852 /* force the log to unpin objects from the now complete transactions */ 893 853 xfs_log_force(mp, XFS_LOG_SYNC); ··· 899 863 if (error) 900 864 xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " 901 865 "Frozen image may not be consistent."); 902 - /* 903 - * Just warn here till VFS can correctly support 904 - * read-only remount without racing. 905 - */ 906 - WARN_ON(atomic_read(&mp->m_active_trans) != 0); 907 - 908 866 xfs_log_quiesce(mp); 909 867 } 910 868 ··· 1291 1261 return 0; 1292 1262 #ifdef CONFIG_FS_DAX 1293 1263 case Opt_dax: 1294 - mp->m_flags |= XFS_MOUNT_DAX; 1264 + xfs_mount_set_dax_mode(mp, XFS_DAX_ALWAYS); 1265 + return 0; 1266 + case Opt_dax_enum: 1267 + xfs_mount_set_dax_mode(mp, result.uint_32); 1295 1268 return 0; 1296 1269 #endif 1297 1270 default: ··· 1487 1454 if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) 1488 1455 sb->s_flags |= SB_I_VERSION; 1489 1456 1490 - if (mp->m_flags & XFS_MOUNT_DAX) { 1457 + if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS) { 1491 1458 bool rtdev_is_dax = false, datadev_is_dax; 1492 1459 1493 1460 xfs_warn(mp, ··· 1501 1468 if (!rtdev_is_dax && !datadev_is_dax) { 1502 1469 xfs_alert(mp, 1503 1470 "DAX unsupported by block device. Turning off DAX."); 1504 - mp->m_flags &= ~XFS_MOUNT_DAX; 1471 + xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); 1505 1472 } 1506 1473 if (xfs_sb_version_hasreflink(&mp->m_sb)) { 1507 1474 xfs_alert(mp, ··· 1787 1754 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); 1788 1755 spin_lock_init(&mp->m_perag_lock); 1789 1756 mutex_init(&mp->m_growlock); 1790 - atomic_set(&mp->m_active_trans, 0); 1791 1757 INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); 1792 1758 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); 1793 1759 INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);

+4 -6

fs/xfs/xfs_symlink.c

··· 243 243 */ 244 244 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); 245 245 246 - if (resblks) 247 - resblks -= XFS_IALLOC_SPACE_RES(mp); 246 + resblks -= XFS_IALLOC_SPACE_RES(mp); 248 247 /* 249 248 * If the symlink will fit into the inode, write it inline. 250 249 */ ··· 251 252 xfs_init_local_fork(ip, XFS_DATA_FORK, target_path, pathlen); 252 253 253 254 ip->i_d.di_size = pathlen; 254 - ip->i_d.di_format = XFS_DINODE_FMT_LOCAL; 255 + ip->i_df.if_format = XFS_DINODE_FMT_LOCAL; 255 256 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE); 256 257 } else { 257 258 int offset; ··· 264 265 if (error) 265 266 goto out_trans_cancel; 266 267 267 - if (resblks) 268 - resblks -= fs_blocks; 268 + resblks -= fs_blocks; 269 269 ip->i_d.di_size = pathlen; 270 270 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 271 271 ··· 384 386 * either 1 or 2 extents and that we can 385 387 * free them all in one bunmapi call. 386 388 */ 387 - ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); 389 + ASSERT(ip->i_df.if_nextents > 0 && ip->i_df.if_nextents <= 2); 388 390 389 391 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); 390 392 if (error)

+2 -2

fs/xfs/xfs_trace.h

··· 1897 1897 __entry->dev = VFS_I(ip)->i_sb->s_dev; 1898 1898 __entry->which = which; 1899 1899 __entry->ino = ip->i_ino; 1900 - __entry->format = ip->i_d.di_format; 1901 - __entry->nex = ip->i_d.di_nextents; 1900 + __entry->format = ip->i_df.if_format; 1901 + __entry->nex = ip->i_df.if_nextents; 1902 1902 __entry->broot_size = ip->i_df.if_broot_bytes; 1903 1903 __entry->fork_off = XFS_IFORK_BOFF(ip); 1904 1904 ),

+42 -161

fs/xfs/xfs_trans.c

··· 68 68 xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); 69 69 70 70 trace_xfs_trans_free(tp, _RET_IP_); 71 - atomic_dec(&tp->t_mountp->m_active_trans); 72 71 if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) 73 72 sb_end_intwrite(tp->t_mountp->m_super); 74 73 xfs_trans_free_dqinfo(tp); ··· 124 125 xfs_defer_move(ntp, tp); 125 126 126 127 xfs_trans_dup_dqinfo(tp, ntp); 127 - 128 - atomic_inc(&tp->t_mountp->m_active_trans); 129 128 return ntp; 130 129 } 131 130 ··· 272 275 */ 273 276 WARN_ON(resp->tr_logres > 0 && 274 277 mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); 275 - atomic_inc(&mp->m_active_trans); 276 278 277 279 tp->t_magic = XFS_TRANS_HEADER_MAGIC; 278 280 tp->t_flags = flags; ··· 295 299 296 300 /* 297 301 * Create an empty transaction with no reservation. This is a defensive 298 - * mechanism for routines that query metadata without actually modifying 299 - * them -- if the metadata being queried is somehow cross-linked (think a 300 - * btree block pointer that points higher in the tree), we risk deadlock. 301 - * However, blocks grabbed as part of a transaction can be re-grabbed. 302 - * The verifiers will notice the corrupt block and the operation will fail 303 - * back to userspace without deadlocking. 302 + * mechanism for routines that query metadata without actually modifying them -- 303 + * if the metadata being queried is somehow cross-linked (think a btree block 304 + * pointer that points higher in the tree), we risk deadlock. However, blocks 305 + * grabbed as part of a transaction can be re-grabbed. The verifiers will 306 + * notice the corrupt block and the operation will fail back to userspace 307 + * without deadlocking. 304 308 * 305 - * Note the zero-length reservation; this transaction MUST be cancelled 306 - * without any dirty data. 309 + * Note the zero-length reservation; this transaction MUST be cancelled without 310 + * any dirty data. 307 311 * 308 - * Callers should obtain freeze protection to avoid two conflicts with fs 309 - * freezing: (1) having active transactions trip the m_active_trans ASSERTs; 310 - * and (2) grabbing buffers at the same time that freeze is trying to drain 311 - * the buffer LRU list. 312 + * Callers should obtain freeze protection to avoid a conflict with fs freezing 313 + * where we can be grabbing buffers at the same time that freeze is trying to 314 + * drain the buffer LRU list. 312 315 */ 313 316 int 314 317 xfs_trans_alloc_empty( ··· 529 534 sizeof(sbp->sb_frextents) - 1); 530 535 } 531 536 532 - STATIC int 533 - xfs_sb_mod8( 534 - uint8_t *field, 535 - int8_t delta) 536 - { 537 - int8_t counter = *field; 538 - 539 - counter += delta; 540 - if (counter < 0) { 541 - ASSERT(0); 542 - return -EINVAL; 543 - } 544 - *field = counter; 545 - return 0; 546 - } 547 - 548 - STATIC int 549 - xfs_sb_mod32( 550 - uint32_t *field, 551 - int32_t delta) 552 - { 553 - int32_t counter = *field; 554 - 555 - counter += delta; 556 - if (counter < 0) { 557 - ASSERT(0); 558 - return -EINVAL; 559 - } 560 - *field = counter; 561 - return 0; 562 - } 563 - 564 - STATIC int 565 - xfs_sb_mod64( 566 - uint64_t *field, 567 - int64_t delta) 568 - { 569 - int64_t counter = *field; 570 - 571 - counter += delta; 572 - if (counter < 0) { 573 - ASSERT(0); 574 - return -EINVAL; 575 - } 576 - *field = counter; 577 - return 0; 578 - } 579 - 580 537 /* 581 - * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations 582 - * and apply superblock counter changes to the in-core superblock. The 538 + * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations and 539 + * apply superblock counter changes to the in-core superblock. The 583 540 * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT 584 541 * applied to the in-core superblock. The idea is that that has already been 585 542 * done. ··· 540 593 * used block counts are not updated in the on disk superblock. In this case, 541 594 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we 542 595 * still need to update the incore superblock with the changes. 596 + * 597 + * Deltas for the inode count are +/-64, hence we use a large batch size of 128 598 + * so we don't need to take the counter lock on every update. 543 599 */ 600 + #define XFS_ICOUNT_BATCH 128 601 + 544 602 void 545 603 xfs_trans_unreserve_and_mod_sb( 546 604 struct xfs_trans *tp) ··· 581 629 /* apply the per-cpu counters */ 582 630 if (blkdelta) { 583 631 error = xfs_mod_fdblocks(mp, blkdelta, rsvd); 584 - if (error) 585 - goto out; 632 + ASSERT(!error); 586 633 } 587 634 588 635 if (idelta) { 589 - error = xfs_mod_icount(mp, idelta); 590 - if (error) 591 - goto out_undo_fdblocks; 636 + percpu_counter_add_batch(&mp->m_icount, idelta, 637 + XFS_ICOUNT_BATCH); 638 + if (idelta < 0) 639 + ASSERT(__percpu_counter_compare(&mp->m_icount, 0, 640 + XFS_ICOUNT_BATCH) >= 0); 592 641 } 593 642 594 643 if (ifreedelta) { 595 - error = xfs_mod_ifree(mp, ifreedelta); 596 - if (error) 597 - goto out_undo_icount; 644 + percpu_counter_add(&mp->m_ifree, ifreedelta); 645 + if (ifreedelta < 0) 646 + ASSERT(percpu_counter_compare(&mp->m_ifree, 0) >= 0); 598 647 } 599 648 600 649 if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY)) ··· 603 650 604 651 /* apply remaining deltas */ 605 652 spin_lock(&mp->m_sb_lock); 606 - if (rtxdelta) { 607 - error = xfs_sb_mod64(&mp->m_sb.sb_frextents, rtxdelta); 608 - if (error) 609 - goto out_undo_ifree; 610 - } 611 - 612 - if (tp->t_dblocks_delta != 0) { 613 - error = xfs_sb_mod64(&mp->m_sb.sb_dblocks, tp->t_dblocks_delta); 614 - if (error) 615 - goto out_undo_frextents; 616 - } 617 - if (tp->t_agcount_delta != 0) { 618 - error = xfs_sb_mod32(&mp->m_sb.sb_agcount, tp->t_agcount_delta); 619 - if (error) 620 - goto out_undo_dblocks; 621 - } 622 - if (tp->t_imaxpct_delta != 0) { 623 - error = xfs_sb_mod8(&mp->m_sb.sb_imax_pct, tp->t_imaxpct_delta); 624 - if (error) 625 - goto out_undo_agcount; 626 - } 627 - if (tp->t_rextsize_delta != 0) { 628 - error = xfs_sb_mod32(&mp->m_sb.sb_rextsize, 629 - tp->t_rextsize_delta); 630 - if (error) 631 - goto out_undo_imaxpct; 632 - } 633 - if (tp->t_rbmblocks_delta != 0) { 634 - error = xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, 635 - tp->t_rbmblocks_delta); 636 - if (error) 637 - goto out_undo_rextsize; 638 - } 639 - if (tp->t_rblocks_delta != 0) { 640 - error = xfs_sb_mod64(&mp->m_sb.sb_rblocks, tp->t_rblocks_delta); 641 - if (error) 642 - goto out_undo_rbmblocks; 643 - } 644 - if (tp->t_rextents_delta != 0) { 645 - error = xfs_sb_mod64(&mp->m_sb.sb_rextents, 646 - tp->t_rextents_delta); 647 - if (error) 648 - goto out_undo_rblocks; 649 - } 650 - if (tp->t_rextslog_delta != 0) { 651 - error = xfs_sb_mod8(&mp->m_sb.sb_rextslog, 652 - tp->t_rextslog_delta); 653 - if (error) 654 - goto out_undo_rextents; 655 - } 653 + mp->m_sb.sb_frextents += rtxdelta; 654 + mp->m_sb.sb_dblocks += tp->t_dblocks_delta; 655 + mp->m_sb.sb_agcount += tp->t_agcount_delta; 656 + mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta; 657 + mp->m_sb.sb_rextsize += tp->t_rextsize_delta; 658 + mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta; 659 + mp->m_sb.sb_rblocks += tp->t_rblocks_delta; 660 + mp->m_sb.sb_rextents += tp->t_rextents_delta; 661 + mp->m_sb.sb_rextslog += tp->t_rextslog_delta; 656 662 spin_unlock(&mp->m_sb_lock); 657 - return; 658 663 659 - out_undo_rextents: 660 - if (tp->t_rextents_delta) 661 - xfs_sb_mod64(&mp->m_sb.sb_rextents, -tp->t_rextents_delta); 662 - out_undo_rblocks: 663 - if (tp->t_rblocks_delta) 664 - xfs_sb_mod64(&mp->m_sb.sb_rblocks, -tp->t_rblocks_delta); 665 - out_undo_rbmblocks: 666 - if (tp->t_rbmblocks_delta) 667 - xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, -tp->t_rbmblocks_delta); 668 - out_undo_rextsize: 669 - if (tp->t_rextsize_delta) 670 - xfs_sb_mod32(&mp->m_sb.sb_rextsize, -tp->t_rextsize_delta); 671 - out_undo_imaxpct: 672 - if (tp->t_rextsize_delta) 673 - xfs_sb_mod8(&mp->m_sb.sb_imax_pct, -tp->t_imaxpct_delta); 674 - out_undo_agcount: 675 - if (tp->t_agcount_delta) 676 - xfs_sb_mod32(&mp->m_sb.sb_agcount, -tp->t_agcount_delta); 677 - out_undo_dblocks: 678 - if (tp->t_dblocks_delta) 679 - xfs_sb_mod64(&mp->m_sb.sb_dblocks, -tp->t_dblocks_delta); 680 - out_undo_frextents: 681 - if (rtxdelta) 682 - xfs_sb_mod64(&mp->m_sb.sb_frextents, -rtxdelta); 683 - out_undo_ifree: 684 - spin_unlock(&mp->m_sb_lock); 685 - if (ifreedelta) 686 - xfs_mod_ifree(mp, -ifreedelta); 687 - out_undo_icount: 688 - if (idelta) 689 - xfs_mod_icount(mp, -idelta); 690 - out_undo_fdblocks: 691 - if (blkdelta) 692 - xfs_mod_fdblocks(mp, -blkdelta, rsvd); 693 - out: 694 - ASSERT(error == 0); 664 + /* 665 + * Debug checks outside of the spinlock so they don't lock up the 666 + * machine if they fail. 667 + */ 668 + ASSERT(mp->m_sb.sb_imax_pct >= 0); 669 + ASSERT(mp->m_sb.sb_rextslog >= 0); 695 670 return; 696 671 } 697 672

+5 -1

fs/xfs/xfs_trans.h

··· 59 59 #define XFS_LI_ABORTED 1 60 60 #define XFS_LI_FAILED 2 61 61 #define XFS_LI_DIRTY 3 /* log item dirty in transaction */ 62 + #define XFS_LI_RECOVERED 4 /* log intent item has been recovered */ 62 63 63 64 #define XFS_LI_FLAGS \ 64 65 { (1 << XFS_LI_IN_AIL), "IN_AIL" }, \ 65 66 { (1 << XFS_LI_ABORTED), "ABORTED" }, \ 66 67 { (1 << XFS_LI_FAILED), "FAILED" }, \ 67 - { (1 << XFS_LI_DIRTY), "DIRTY" } 68 + { (1 << XFS_LI_DIRTY), "DIRTY" }, \ 69 + { (1 << XFS_LI_RECOVERED), "RECOVERED" } 68 70 69 71 struct xfs_item_ops { 70 72 unsigned flags; ··· 79 77 void (*iop_release)(struct xfs_log_item *); 80 78 xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t); 81 79 void (*iop_error)(struct xfs_log_item *, xfs_buf_t *); 80 + int (*iop_recover)(struct xfs_log_item *lip, struct xfs_trans *tp); 81 + bool (*iop_match)(struct xfs_log_item *item, uint64_t id); 82 82 }; 83 83 84 84 /*

+56 -23

fs/xfs/xfs_trans_ail.c

··· 345 345 xfs_trans_ail_cursor_clear(ailp, lip); 346 346 } 347 347 348 + /* 349 + * Requeue a failed buffer for writeback. 350 + * 351 + * We clear the log item failed state here as well, but we have to be careful 352 + * about reference counts because the only active reference counts on the buffer 353 + * may be the failed log items. Hence if we clear the log item failed state 354 + * before queuing the buffer for IO we can release all active references to 355 + * the buffer and free it, leading to use after free problems in 356 + * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which 357 + * order we process them in - the buffer is locked, and we own the buffer list 358 + * so nothing on them is going to change while we are performing this action. 359 + * 360 + * Hence we can safely queue the buffer for IO before we clear the failed log 361 + * item state, therefore always having an active reference to the buffer and 362 + * avoiding the transient zero-reference state that leads to use-after-free. 363 + */ 364 + static inline int 365 + xfsaild_resubmit_item( 366 + struct xfs_log_item *lip, 367 + struct list_head *buffer_list) 368 + { 369 + struct xfs_buf *bp = lip->li_buf; 370 + 371 + if (!xfs_buf_trylock(bp)) 372 + return XFS_ITEM_LOCKED; 373 + 374 + if (!xfs_buf_delwri_queue(bp, buffer_list)) { 375 + xfs_buf_unlock(bp); 376 + return XFS_ITEM_FLUSHING; 377 + } 378 + 379 + /* protected by ail_lock */ 380 + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) 381 + xfs_clear_li_failed(lip); 382 + 383 + xfs_buf_unlock(bp); 384 + return XFS_ITEM_SUCCESS; 385 + } 386 + 348 387 static inline uint 349 388 xfsaild_push_item( 350 389 struct xfs_ail *ailp, ··· 404 365 */ 405 366 if (!lip->li_ops->iop_push) 406 367 return XFS_ITEM_PINNED; 368 + if (test_bit(XFS_LI_FAILED, &lip->li_flags)) 369 + return xfsaild_resubmit_item(lip, &ailp->ail_buf_list); 407 370 return lip->li_ops->iop_push(lip, &ailp->ail_buf_list); 408 371 } 409 372 ··· 815 774 xfs_ail_update_finish(ailp, tail_lsn); 816 775 } 817 776 777 + /* Insert a log item into the AIL. */ 778 + void 779 + xfs_trans_ail_insert( 780 + struct xfs_ail *ailp, 781 + struct xfs_log_item *lip, 782 + xfs_lsn_t lsn) 783 + { 784 + spin_lock(&ailp->ail_lock); 785 + xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); 786 + } 787 + 818 788 /* 819 789 * Delete one log item from the AIL. 820 790 * ··· 852 800 return 0; 853 801 } 854 802 855 - /** 856 - * Remove a log items from the AIL 857 - * 858 - * @xfs_trans_ail_delete_bulk takes an array of log items that all need to 859 - * removed from the AIL. The caller is already holding the AIL lock, and done 860 - * all the checks necessary to ensure the items passed in via @log_items are 861 - * ready for deletion. This includes checking that the items are in the AIL. 862 - * 863 - * For each log item to be removed, unlink it from the AIL, clear the IN_AIL 864 - * flag from the item and reset the item's lsn to 0. If we remove the first 865 - * item in the AIL, update the log tail to match the new minimum LSN in the 866 - * AIL. 867 - * 868 - * This function will not drop the AIL lock until all items are removed from 869 - * the AIL to minimise the amount of lock traffic on the AIL. This does not 870 - * greatly increase the AIL hold time, but does significantly reduce the amount 871 - * of traffic on the lock, especially during IO completion. 872 - * 873 - * This function must be called with the AIL lock held. The lock is dropped 874 - * before returning. 875 - */ 876 803 void 877 804 xfs_trans_ail_delete( 878 - struct xfs_ail *ailp, 879 805 struct xfs_log_item *lip, 880 806 int shutdown_type) 881 807 { 808 + struct xfs_ail *ailp = lip->li_ailp; 882 809 struct xfs_mount *mp = ailp->ail_mount; 883 810 xfs_lsn_t tail_lsn; 884 811 812 + spin_lock(&ailp->ail_lock); 885 813 if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) { 886 814 spin_unlock(&ailp->ail_lock); 887 - if (!XFS_FORCED_SHUTDOWN(mp)) { 815 + if (shutdown_type && !XFS_FORCED_SHUTDOWN(mp)) { 888 816 xfs_alert_tag(mp, XFS_PTAG_AILDELETE, 889 817 "%s: attempting to delete a log item that is not in the AIL", 890 818 __func__); ··· 873 841 return; 874 842 } 875 843 844 + /* xfs_ail_update_finish() drops the AIL lock */ 876 845 tail_lsn = xfs_ail_delete_one(ailp, lip); 877 846 xfs_ail_update_finish(ailp, tail_lsn); 878 847 }

+9 -14

fs/xfs/xfs_trans_dquot.c

··· 388 388 */ 389 389 if (d->d_id) { 390 390 xfs_qm_adjust_dqlimits(tp->t_mountp, dqp); 391 - xfs_qm_adjust_dqtimers(tp->t_mountp, d); 391 + xfs_qm_adjust_dqtimers(tp->t_mountp, dqp); 392 392 } 393 393 394 394 dqp->dq_flags |= XFS_DQ_DIRTY; ··· 591 591 592 592 xfs_dqlock(dqp); 593 593 594 - defq = xfs_get_defquota(dqp, q); 594 + defq = xfs_get_defquota(q, xfs_dquot_type(dqp)); 595 595 596 596 if (flags & XFS_TRANS_DQ_RES_BLKS) { 597 597 hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit); ··· 602 602 softlimit = defq->bsoftlimit; 603 603 timer = be32_to_cpu(dqp->q_core.d_btimer); 604 604 warns = be16_to_cpu(dqp->q_core.d_bwarns); 605 - warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit; 605 + warnlimit = defq->bwarnlimit; 606 606 resbcountp = &dqp->q_res_bcount; 607 607 } else { 608 608 ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS); ··· 614 614 softlimit = defq->rtbsoftlimit; 615 615 timer = be32_to_cpu(dqp->q_core.d_rtbtimer); 616 616 warns = be16_to_cpu(dqp->q_core.d_rtbwarns); 617 - warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit; 617 + warnlimit = defq->rtbwarnlimit; 618 618 resbcountp = &dqp->q_res_rtbcount; 619 619 } 620 620 ··· 650 650 total_count = be64_to_cpu(dqp->q_core.d_icount) + ninos; 651 651 timer = be32_to_cpu(dqp->q_core.d_itimer); 652 652 warns = be16_to_cpu(dqp->q_core.d_iwarns); 653 - warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit; 653 + warnlimit = defq->iwarnlimit; 654 654 hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); 655 655 if (!hardlimit) 656 656 hardlimit = defq->ihardlimit; ··· 711 711 712 712 error_return: 713 713 xfs_dqunlock(dqp); 714 - if (flags & XFS_QMOPT_ENOSPC) 714 + if (XFS_QM_ISPDQ(dqp)) 715 715 return -ENOSPC; 716 716 return -EDQUOT; 717 717 } ··· 751 751 ASSERT(flags & XFS_QMOPT_RESBLK_MASK); 752 752 753 753 if (udqp) { 754 - error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos, 755 - (flags & ~XFS_QMOPT_ENOSPC)); 754 + error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos, flags); 756 755 if (error) 757 756 return error; 758 757 } ··· 802 803 803 804 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) 804 805 return 0; 805 - if (XFS_IS_PQUOTA_ON(mp)) 806 - flags |= XFS_QMOPT_ENOSPC; 807 806 808 807 ASSERT(!xfs_is_quota_inode(&mp->m_sb, ip->i_ino)); 809 808 810 809 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 811 - ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == 812 - XFS_TRANS_DQ_RES_RTBLKS || 813 - (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == 814 - XFS_TRANS_DQ_RES_BLKS); 810 + ASSERT((flags & ~(XFS_QMOPT_FORCE_RES)) == XFS_TRANS_DQ_RES_RTBLKS || 811 + (flags & ~(XFS_QMOPT_FORCE_RES)) == XFS_TRANS_DQ_RES_BLKS); 815 812 816 813 /* 817 814 * Reserve nblks against these dquots, with trans as the mediator.

+4 -17

fs/xfs/xfs_trans_priv.h

··· 91 91 xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); 92 92 } 93 93 94 + void xfs_trans_ail_insert(struct xfs_ail *ailp, struct xfs_log_item *lip, 95 + xfs_lsn_t lsn); 96 + 94 97 xfs_lsn_t xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip); 95 98 void xfs_ail_update_finish(struct xfs_ail *ailp, xfs_lsn_t old_lsn) 96 99 __releases(ailp->ail_lock); 97 - void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip, 98 - int shutdown_type); 99 - 100 - static inline void 101 - xfs_trans_ail_remove( 102 - struct xfs_log_item *lip, 103 - int shutdown_type) 104 - { 105 - struct xfs_ail *ailp = lip->li_ailp; 106 - 107 - spin_lock(&ailp->ail_lock); 108 - /* xfs_trans_ail_delete() drops the AIL lock */ 109 - if (test_bit(XFS_LI_IN_AIL, &lip->li_flags)) 110 - xfs_trans_ail_delete(ailp, lip, shutdown_type); 111 - else 112 - spin_unlock(&ailp->ail_lock); 113 - } 100 + void xfs_trans_ail_delete(struct xfs_log_item *lip, int shutdown_type); 114 101 115 102 void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); 116 103 void xfs_ail_push_all(struct xfs_ail *);

-1

fs/xfs/xfs_xattr.c

··· 12 12 #include "xfs_inode.h" 13 13 #include "xfs_attr.h" 14 14 #include "xfs_acl.h" 15 - #include "xfs_da_format.h" 16 15 #include "xfs_da_btree.h" 17 16 18 17 #include <linux/posix_acl_xattr.h>