Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'xfs-5.3-merge-12' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs updates from Darrick Wong:
"In this release there are a significant amounts of consolidations and
cleanups in the log code; restructuring of the log to issue struct
bios directly; new bulkstat ioctls to return v5 fs inode information
(and fix all the padding problems of the old ioctl); the beginnings of
multithreaded inode walks (e.g. quotacheck); and a reduction in memory
usage in the online scrub code leading to reduced runtimes.

- Refactor inode geometry calculation into a single structure instead
of open-coding pieces everywhere.

- Add online repair to build options.

- Remove unnecessary function call flags and functions.

- Claim maintainership of various loose xfs documentation and header
files.

- Use struct bio directly for log buffer IOs instead of struct
xfs_buf.

- Reduce log item boilerplate code requirements.

- Merge log item code spread across too many files.

- Further distinguish between log item commits and cancellations.

- Various small cleanups to the ag small allocator.

- Support cgroup-aware writeback

- libxfs refactoring for mkfs cleanup

- Remove unneeded #includes

- Fix a memory allocation miscalculation in the new log bio code

- Fix bisection problems

- Fix a crash in ioend processing caused by tripping over freeing of
preallocated transactions

- Split out a generic inode walk mechanism from the bulkstat code,
hook up all the internal users to use the walking code, then clean
up bulkstat to serve only the bulkstat ioctls.

- Add a multithreaded iwalk implementation to speed up quotacheck on
fast storage with many CPUs.

- Remove unnecessary return values in logging teardown functions.

- Supplement the bstat and inogrp structures with new bulkstat and
inumbers structures that have all the fields we need for v5
filesystem features and none of the padding problems of their
predecessors.

- Wire up new ioctls that use the new structures with a much simpler
bulk_ireq structure at the head instead of the pointerhappy mess we
had before.

- Enable userspace to constrain bulkstat returns to a single AG or a
single special inode so that we can phase out a lot of geometry
guesswork in userspace.

- Reduce memory consumption and zeroing overhead in extended
attribute scrub code.

- Fix some behavioral regressions in the new bulkstat backend code.

- Fix some behavioral regressions in the new log bio code"

* tag 'xfs-5.3-merge-12' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (100 commits)
xfs: chain bios the right way around in xfs_rw_bdev
xfs: bump INUMBERS cursor correctly in xfs_inumbers_walk
xfs: don't update lastino for FSBULKSTAT_SINGLE
xfs: online scrub needn't bother zeroing its temporary buffer
xfs: only allocate memory for scrubbing attributes when we need it
xfs: refactor attr scrub memory allocation function
xfs: refactor extended attribute buffer pointer functions
xfs: attribute scrub should use seen_enough to pass error values
xfs: allow single bulkstat of special inodes
xfs: specify AG in bulk req
xfs: wire up the v5 inumbers ioctl
xfs: wire up new v5 bulkstat ioctls
xfs: introduce v5 inode group structure
xfs: introduce new v5 bulkstat structure
xfs: rename bulkstat functions
xfs: remove various bulk request typedef usage
fs: xfs: xfs_log: Change return type from int to void
xfs: poll waiting for quotacheck
xfs: multithreaded iwalk implementation
xfs: refactor INUMBERS to use iwalk functions
...

+4445 -4745
+4 -4
Documentation/filesystems/xfs-self-describing-metadata.txt
··· 222 222 xfs_foo_read_verify( 223 223 struct xfs_buf *bp) 224 224 { 225 - struct xfs_mount *mp = bp->b_target->bt_mount; 225 + struct xfs_mount *mp = bp->b_mount; 226 226 227 227 if ((xfs_sb_version_hascrc(&mp->m_sb) && 228 228 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), ··· 245 245 xfs_foo_verify( 246 246 struct xfs_buf *bp) 247 247 { 248 - struct xfs_mount *mp = bp->b_target->bt_mount; 248 + struct xfs_mount *mp = bp->b_mount; 249 249 struct xfs_ondisk_hdr *hdr = bp->b_addr; 250 250 251 251 if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC)) ··· 272 272 xfs_foo_verify( 273 273 struct xfs_buf *bp) 274 274 { 275 - struct xfs_mount *mp = bp->b_target->bt_mount; 275 + struct xfs_mount *mp = bp->b_mount; 276 276 struct xfs_ondisk_hdr *hdr = bp->b_addr; 277 277 278 278 if (hdr->magic == cpu_to_be32(XFS_FOO_CRC_MAGIC)) { ··· 297 297 xfs_foo_write_verify( 298 298 struct xfs_buf *bp) 299 299 { 300 - struct xfs_mount *mp = bp->b_target->bt_mount; 300 + struct xfs_mount *mp = bp->b_mount; 301 301 struct xfs_buf_log_item *bip = bp->b_fspriv; 302 302 303 303 if (!xfs_foo_verify(bp)) {
+6
MAINTAINERS
··· 17544 17544 T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git 17545 17545 S: Supported 17546 17546 F: Documentation/filesystems/xfs.txt 17547 + F: Documentation/ABI/testing/sysfs-fs-xfs 17548 + F: Documentation/filesystems/xfs.txt 17549 + F: Documentation/filesystems/xfs-delayed-logging-design.txt 17550 + F: Documentation/filesystems/xfs-self-describing-metadata.txt 17547 17551 F: fs/xfs/ 17552 + F: include/uapi/linux/dqblk_xfs.h 17553 + F: include/uapi/linux/fsmap.h 17548 17554 17549 17555 XILINX AXI ETHERNET DRIVER 17550 17556 M: Anirudha Sarangi <anirudh@xilinx.com>
+4 -5
fs/xfs/Makefile
··· 62 62 xfs_attr_inactive.o \ 63 63 xfs_attr_list.o \ 64 64 xfs_bmap_util.o \ 65 + xfs_bio_io.o \ 65 66 xfs_buf.o \ 66 67 xfs_dir2_readdir.o \ 67 68 xfs_discard.o \ ··· 81 80 xfs_iops.o \ 82 81 xfs_inode.o \ 83 82 xfs_itable.o \ 83 + xfs_iwalk.o \ 84 84 xfs_message.o \ 85 85 xfs_mount.o \ 86 86 xfs_mru_cache.o \ 87 + xfs_pwork.o \ 87 88 xfs_reflink.o \ 88 89 xfs_stats.o \ 89 90 xfs_super.o \ ··· 107 104 xfs_rmap_item.o \ 108 105 xfs_log_recover.o \ 109 106 xfs_trans_ail.o \ 110 - xfs_trans_bmap.o \ 111 107 xfs_trans_buf.o \ 112 - xfs_trans_extfree.o \ 113 - xfs_trans_inode.o \ 114 - xfs_trans_refcount.o \ 115 - xfs_trans_rmap.o \ 108 + xfs_trans_inode.o 116 109 117 110 # optional features 118 111 xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
-5
fs/xfs/kmem.c
··· 3 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 4 * All Rights Reserved. 5 5 */ 6 - #include <linux/mm.h> 7 6 #include <linux/sched/mm.h> 8 - #include <linux/highmem.h> 9 - #include <linux/slab.h> 10 - #include <linux/swap.h> 11 - #include <linux/blkdev.h> 12 7 #include <linux/backing-dev.h> 13 8 #include "kmem.h" 14 9 #include "xfs_message.h"
+8
fs/xfs/kmem.h
··· 124 124 return kmem_zone_alloc(zone, flags | KM_ZERO); 125 125 } 126 126 127 + static inline struct page * 128 + kmem_to_page(void *addr) 129 + { 130 + if (is_vmalloc_addr(addr)) 131 + return vmalloc_to_page(addr); 132 + return virt_to_page(addr); 133 + } 134 + 127 135 #endif /* __XFS_SUPPORT_KMEM_H__ */
+88 -16
fs/xfs/libxfs/xfs_ag.c
··· 10 10 #include "xfs_shared.h" 11 11 #include "xfs_format.h" 12 12 #include "xfs_trans_resv.h" 13 + #include "xfs_bit.h" 13 14 #include "xfs_sb.h" 14 15 #include "xfs_mount.h" 15 16 #include "xfs_btree.h" ··· 45 44 return bp; 46 45 } 47 46 47 + static inline bool is_log_ag(struct xfs_mount *mp, struct aghdr_init_data *id) 48 + { 49 + return mp->m_sb.sb_logstart > 0 && 50 + id->agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart); 51 + } 52 + 48 53 /* 49 54 * Generic btree root block init function 50 55 */ ··· 60 53 struct xfs_buf *bp, 61 54 struct aghdr_init_data *id) 62 55 { 63 - xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno, 0); 56 + xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno); 57 + } 58 + 59 + /* Finish initializing a free space btree. */ 60 + static void 61 + xfs_freesp_init_recs( 62 + struct xfs_mount *mp, 63 + struct xfs_buf *bp, 64 + struct aghdr_init_data *id) 65 + { 66 + struct xfs_alloc_rec *arec; 67 + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 68 + 69 + arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 70 + arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); 71 + 72 + if (is_log_ag(mp, id)) { 73 + struct xfs_alloc_rec *nrec; 74 + xfs_agblock_t start = XFS_FSB_TO_AGBNO(mp, 75 + mp->m_sb.sb_logstart); 76 + 77 + ASSERT(start >= mp->m_ag_prealloc_blocks); 78 + if (start != mp->m_ag_prealloc_blocks) { 79 + /* 80 + * Modify first record to pad stripe align of log 81 + */ 82 + arec->ar_blockcount = cpu_to_be32(start - 83 + mp->m_ag_prealloc_blocks); 84 + nrec = arec + 1; 85 + 86 + /* 87 + * Insert second record at start of internal log 88 + * which then gets trimmed. 89 + */ 90 + nrec->ar_startblock = cpu_to_be32( 91 + be32_to_cpu(arec->ar_startblock) + 92 + be32_to_cpu(arec->ar_blockcount)); 93 + arec = nrec; 94 + be16_add_cpu(&block->bb_numrecs, 1); 95 + } 96 + /* 97 + * Change record start to after the internal log 98 + */ 99 + be32_add_cpu(&arec->ar_startblock, mp->m_sb.sb_logblocks); 100 + } 101 + 102 + /* 103 + * Calculate the record block count and check for the case where 104 + * the log might have consumed all available space in the AG. If 105 + * so, reset the record count to 0 to avoid exposure of an invalid 106 + * record start block. 107 + */ 108 + arec->ar_blockcount = cpu_to_be32(id->agsize - 109 + be32_to_cpu(arec->ar_startblock)); 110 + if (!arec->ar_blockcount) 111 + block->bb_numrecs = 0; 64 112 } 65 113 66 114 /* ··· 127 65 struct xfs_buf *bp, 128 66 struct aghdr_init_data *id) 129 67 { 130 - struct xfs_alloc_rec *arec; 131 - 132 - xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno, 0); 133 - arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 134 - arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); 135 - arec->ar_blockcount = cpu_to_be32(id->agsize - 136 - be32_to_cpu(arec->ar_startblock)); 68 + xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno); 69 + xfs_freesp_init_recs(mp, bp, id); 137 70 } 138 71 139 72 static void ··· 137 80 struct xfs_buf *bp, 138 81 struct aghdr_init_data *id) 139 82 { 140 - struct xfs_alloc_rec *arec; 141 - 142 - xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno, 0); 143 - arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 144 - arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); 145 - arec->ar_blockcount = cpu_to_be32(id->agsize - 146 - be32_to_cpu(arec->ar_startblock)); 83 + xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno); 84 + xfs_freesp_init_recs(mp, bp, id); 147 85 } 148 86 149 87 /* ··· 153 101 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 154 102 struct xfs_rmap_rec *rrec; 155 103 156 - xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno, 0); 104 + xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno); 157 105 158 106 /* 159 107 * mark the AG header regions as static metadata The BNO ··· 198 146 rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp)); 199 147 rrec->rm_blockcount = cpu_to_be32(1); 200 148 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC); 149 + rrec->rm_offset = 0; 150 + be16_add_cpu(&block->bb_numrecs, 1); 151 + } 152 + 153 + /* account for the log space */ 154 + if (is_log_ag(mp, id)) { 155 + rrec = XFS_RMAP_REC_ADDR(block, 156 + be16_to_cpu(block->bb_numrecs) + 1); 157 + rrec->rm_startblock = cpu_to_be32( 158 + XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart)); 159 + rrec->rm_blockcount = cpu_to_be32(mp->m_sb.sb_logblocks); 160 + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG); 201 161 rrec->rm_offset = 0; 202 162 be16_add_cpu(&block->bb_numrecs, 1); 203 163 } ··· 272 208 xfs_refc_block(mp)); 273 209 agf->agf_refcount_level = cpu_to_be32(1); 274 210 agf->agf_refcount_blocks = cpu_to_be32(1); 211 + } 212 + 213 + if (is_log_ag(mp, id)) { 214 + int64_t logblocks = mp->m_sb.sb_logblocks; 215 + 216 + be32_add_cpu(&agf->agf_freeblks, -logblocks); 217 + agf->agf_longest = cpu_to_be32(id->agsize - 218 + XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart) - logblocks); 275 219 } 276 220 } 277 221
-8
fs/xfs/libxfs/xfs_ag_resv.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 - #include "xfs_sb.h" 13 12 #include "xfs_mount.h" 14 - #include "xfs_defer.h" 15 13 #include "xfs_alloc.h" 16 14 #include "xfs_errortag.h" 17 15 #include "xfs_error.h" 18 16 #include "xfs_trace.h" 19 - #include "xfs_cksum.h" 20 17 #include "xfs_trans.h" 21 - #include "xfs_bit.h" 22 - #include "xfs_bmap.h" 23 - #include "xfs_bmap_btree.h" 24 - #include "xfs_ag_resv.h" 25 - #include "xfs_trans_space.h" 26 18 #include "xfs_rmap_btree.h" 27 19 #include "xfs_btree.h" 28 20 #include "xfs_refcount_btree.h"
+109 -118
fs/xfs/libxfs/xfs_alloc.c
··· 13 13 #include "xfs_sb.h" 14 14 #include "xfs_mount.h" 15 15 #include "xfs_defer.h" 16 - #include "xfs_inode.h" 17 16 #include "xfs_btree.h" 18 17 #include "xfs_rmap.h" 19 18 #include "xfs_alloc_btree.h" ··· 20 21 #include "xfs_extent_busy.h" 21 22 #include "xfs_errortag.h" 22 23 #include "xfs_error.h" 23 - #include "xfs_cksum.h" 24 24 #include "xfs_trace.h" 25 25 #include "xfs_trans.h" 26 26 #include "xfs_buf_item.h" ··· 39 41 STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); 40 42 STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); 41 43 STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); 42 - STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, 43 - xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); 44 44 45 45 /* 46 46 * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in ··· 551 555 xfs_agfl_verify( 552 556 struct xfs_buf *bp) 553 557 { 554 - struct xfs_mount *mp = bp->b_target->bt_mount; 558 + struct xfs_mount *mp = bp->b_mount; 555 559 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); 556 560 int i; 557 561 ··· 592 596 xfs_agfl_read_verify( 593 597 struct xfs_buf *bp) 594 598 { 595 - struct xfs_mount *mp = bp->b_target->bt_mount; 599 + struct xfs_mount *mp = bp->b_mount; 596 600 xfs_failaddr_t fa; 597 601 598 602 /* ··· 617 621 xfs_agfl_write_verify( 618 622 struct xfs_buf *bp) 619 623 { 620 - struct xfs_mount *mp = bp->b_target->bt_mount; 624 + struct xfs_mount *mp = bp->b_mount; 621 625 struct xfs_buf_log_item *bip = bp->b_log_item; 622 626 xfs_failaddr_t fa; 623 627 ··· 694 698 /* 695 699 * Allocation group level functions. 696 700 */ 701 + 702 + /* 703 + * Deal with the case where only small freespaces remain. Either return the 704 + * contents of the last freespace record, or allocate space from the freelist if 705 + * there is nothing in the tree. 706 + */ 707 + STATIC int /* error */ 708 + xfs_alloc_ag_vextent_small( 709 + struct xfs_alloc_arg *args, /* allocation argument structure */ 710 + struct xfs_btree_cur *ccur, /* optional by-size cursor */ 711 + xfs_agblock_t *fbnop, /* result block number */ 712 + xfs_extlen_t *flenp, /* result length */ 713 + int *stat) /* status: 0-freelist, 1-normal/none */ 714 + { 715 + int error = 0; 716 + xfs_agblock_t fbno = NULLAGBLOCK; 717 + xfs_extlen_t flen = 0; 718 + int i = 0; 719 + 720 + /* 721 + * If a cntbt cursor is provided, try to allocate the largest record in 722 + * the tree. Try the AGFL if the cntbt is empty, otherwise fail the 723 + * allocation. Make sure to respect minleft even when pulling from the 724 + * freelist. 725 + */ 726 + if (ccur) 727 + error = xfs_btree_decrement(ccur, 0, &i); 728 + if (error) 729 + goto error; 730 + if (i) { 731 + error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i); 732 + if (error) 733 + goto error; 734 + XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error); 735 + goto out; 736 + } 737 + 738 + if (args->minlen != 1 || args->alignment != 1 || 739 + args->resv == XFS_AG_RESV_AGFL || 740 + (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <= 741 + args->minleft)) 742 + goto out; 743 + 744 + error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); 745 + if (error) 746 + goto error; 747 + if (fbno == NULLAGBLOCK) 748 + goto out; 749 + 750 + xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, 751 + xfs_alloc_allow_busy_reuse(args->datatype)); 752 + 753 + if (xfs_alloc_is_userdata(args->datatype)) { 754 + struct xfs_buf *bp; 755 + 756 + bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno); 757 + if (!bp) { 758 + error = -EFSCORRUPTED; 759 + goto error; 760 + } 761 + xfs_trans_binval(args->tp, bp); 762 + } 763 + *fbnop = args->agbno = fbno; 764 + *flenp = args->len = 1; 765 + XFS_WANT_CORRUPTED_GOTO(args->mp, 766 + fbno < be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 767 + error); 768 + args->wasfromfl = 1; 769 + trace_xfs_alloc_small_freelist(args); 770 + 771 + /* 772 + * If we're feeding an AGFL block to something that doesn't live in the 773 + * free space, we need to clear out the OWN_AG rmap. 774 + */ 775 + error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1, 776 + &XFS_RMAP_OINFO_AG); 777 + if (error) 778 + goto error; 779 + 780 + *stat = 0; 781 + return 0; 782 + 783 + out: 784 + /* 785 + * Can't do the allocation, give up. 786 + */ 787 + if (flen < args->minlen) { 788 + args->agbno = NULLAGBLOCK; 789 + trace_xfs_alloc_small_notenough(args); 790 + flen = 0; 791 + } 792 + *fbnop = fbno; 793 + *flenp = flen; 794 + *stat = 1; 795 + trace_xfs_alloc_small_done(args); 796 + return 0; 797 + 798 + error: 799 + trace_xfs_alloc_small_error(args); 800 + return error; 801 + } 697 802 698 803 /* 699 804 * Allocate a variable extent in the allocation group agno. ··· 1680 1583 } 1681 1584 1682 1585 /* 1683 - * Deal with the case where only small freespaces remain. 1684 - * Either return the contents of the last freespace record, 1685 - * or allocate space from the freelist if there is nothing in the tree. 1686 - */ 1687 - STATIC int /* error */ 1688 - xfs_alloc_ag_vextent_small( 1689 - xfs_alloc_arg_t *args, /* allocation argument structure */ 1690 - xfs_btree_cur_t *ccur, /* by-size cursor */ 1691 - xfs_agblock_t *fbnop, /* result block number */ 1692 - xfs_extlen_t *flenp, /* result length */ 1693 - int *stat) /* status: 0-freelist, 1-normal/none */ 1694 - { 1695 - int error; 1696 - xfs_agblock_t fbno; 1697 - xfs_extlen_t flen; 1698 - int i; 1699 - 1700 - if ((error = xfs_btree_decrement(ccur, 0, &i))) 1701 - goto error0; 1702 - if (i) { 1703 - if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) 1704 - goto error0; 1705 - XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1706 - } 1707 - /* 1708 - * Nothing in the btree, try the freelist. Make sure 1709 - * to respect minleft even when pulling from the 1710 - * freelist. 1711 - */ 1712 - else if (args->minlen == 1 && args->alignment == 1 && 1713 - args->resv != XFS_AG_RESV_AGFL && 1714 - (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) 1715 - > args->minleft)) { 1716 - error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); 1717 - if (error) 1718 - goto error0; 1719 - if (fbno != NULLAGBLOCK) { 1720 - xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, 1721 - xfs_alloc_allow_busy_reuse(args->datatype)); 1722 - 1723 - if (xfs_alloc_is_userdata(args->datatype)) { 1724 - xfs_buf_t *bp; 1725 - 1726 - bp = xfs_btree_get_bufs(args->mp, args->tp, 1727 - args->agno, fbno, 0); 1728 - if (!bp) { 1729 - error = -EFSCORRUPTED; 1730 - goto error0; 1731 - } 1732 - xfs_trans_binval(args->tp, bp); 1733 - } 1734 - args->len = 1; 1735 - args->agbno = fbno; 1736 - XFS_WANT_CORRUPTED_GOTO(args->mp, 1737 - args->agbno + args->len <= 1738 - be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1739 - error0); 1740 - args->wasfromfl = 1; 1741 - trace_xfs_alloc_small_freelist(args); 1742 - 1743 - /* 1744 - * If we're feeding an AGFL block to something that 1745 - * doesn't live in the free space, we need to clear 1746 - * out the OWN_AG rmap. 1747 - */ 1748 - error = xfs_rmap_free(args->tp, args->agbp, args->agno, 1749 - fbno, 1, &XFS_RMAP_OINFO_AG); 1750 - if (error) 1751 - goto error0; 1752 - 1753 - *stat = 0; 1754 - return 0; 1755 - } 1756 - /* 1757 - * Nothing in the freelist. 1758 - */ 1759 - else 1760 - flen = 0; 1761 - } 1762 - /* 1763 - * Can't allocate from the freelist for some reason. 1764 - */ 1765 - else { 1766 - fbno = NULLAGBLOCK; 1767 - flen = 0; 1768 - } 1769 - /* 1770 - * Can't do the allocation, give up. 1771 - */ 1772 - if (flen < args->minlen) { 1773 - args->agbno = NULLAGBLOCK; 1774 - trace_xfs_alloc_small_notenough(args); 1775 - flen = 0; 1776 - } 1777 - *fbnop = fbno; 1778 - *flenp = flen; 1779 - *stat = 1; 1780 - trace_xfs_alloc_small_done(args); 1781 - return 0; 1782 - 1783 - error0: 1784 - trace_xfs_alloc_small_error(args); 1785 - return error; 1786 - } 1787 - 1788 - /* 1789 1586 * Free the extent starting at agno/bno for length. 1790 1587 */ 1791 1588 STATIC int ··· 2086 2095 if (error) 2087 2096 return error; 2088 2097 2089 - bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno, 0); 2098 + bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno); 2090 2099 if (!bp) 2091 2100 return -EFSCORRUPTED; 2092 2101 xfs_trans_binval(tp, bp); ··· 2577 2586 xfs_agf_verify( 2578 2587 struct xfs_buf *bp) 2579 2588 { 2580 - struct xfs_mount *mp = bp->b_target->bt_mount; 2589 + struct xfs_mount *mp = bp->b_mount; 2581 2590 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); 2582 2591 2583 2592 if (xfs_sb_version_hascrc(&mp->m_sb)) { ··· 2635 2644 xfs_agf_read_verify( 2636 2645 struct xfs_buf *bp) 2637 2646 { 2638 - struct xfs_mount *mp = bp->b_target->bt_mount; 2647 + struct xfs_mount *mp = bp->b_mount; 2639 2648 xfs_failaddr_t fa; 2640 2649 2641 2650 if (xfs_sb_version_hascrc(&mp->m_sb) && ··· 2652 2661 xfs_agf_write_verify( 2653 2662 struct xfs_buf *bp) 2654 2663 { 2655 - struct xfs_mount *mp = bp->b_target->bt_mount; 2664 + struct xfs_mount *mp = bp->b_mount; 2656 2665 struct xfs_buf_log_item *bip = bp->b_log_item; 2657 2666 xfs_failaddr_t fa; 2658 2667 ··· 3137 3146 3138 3147 /* 3139 3148 * Walk all the blocks in the AGFL. The @walk_fn can return any negative 3140 - * error code or XFS_BTREE_QUERY_RANGE_ABORT. 3149 + * error code or XFS_ITER_*. 3141 3150 */ 3142 3151 int 3143 3152 xfs_agfl_walk(
+1 -2
fs/xfs/libxfs/xfs_alloc_btree.c
··· 17 17 #include "xfs_extent_busy.h" 18 18 #include "xfs_error.h" 19 19 #include "xfs_trace.h" 20 - #include "xfs_cksum.h" 21 20 #include "xfs_trans.h" 22 21 23 22 ··· 291 292 xfs_allocbt_verify( 292 293 struct xfs_buf *bp) 293 294 { 294 - struct xfs_mount *mp = bp->b_target->bt_mount; 295 + struct xfs_mount *mp = bp->b_mount; 295 296 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 296 297 struct xfs_perag *pag = bp->b_pag; 297 298 xfs_failaddr_t fa;
-5
fs/xfs/libxfs/xfs_attr.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 - #include "xfs_bit.h" 13 12 #include "xfs_mount.h" 14 13 #include "xfs_defer.h" 15 14 #include "xfs_da_format.h" 16 15 #include "xfs_da_btree.h" 17 16 #include "xfs_attr_sf.h" 18 17 #include "xfs_inode.h" 19 - #include "xfs_alloc.h" 20 18 #include "xfs_trans.h" 21 - #include "xfs_inode_item.h" 22 19 #include "xfs_bmap.h" 23 - #include "xfs_bmap_util.h" 24 20 #include "xfs_bmap_btree.h" 25 21 #include "xfs_attr.h" 26 22 #include "xfs_attr_leaf.h" 27 23 #include "xfs_attr_remote.h" 28 - #include "xfs_error.h" 29 24 #include "xfs_quota.h" 30 25 #include "xfs_trans_space.h" 31 26 #include "xfs_trace.h"
+7 -1
fs/xfs/libxfs/xfs_attr.h
··· 112 112 struct xfs_inode *dp; /* inode */ 113 113 struct attrlist_cursor_kern *cursor; /* position in list */ 114 114 char *alist; /* output buffer */ 115 - int seen_enough; /* T/F: seen enough of list? */ 115 + 116 + /* 117 + * Abort attribute list iteration if non-zero. Can be used to pass 118 + * error values to the xfs_attr_list caller. 119 + */ 120 + int seen_enough; 121 + 116 122 ssize_t count; /* num used entries */ 117 123 int dupcnt; /* count dup hashvals seen */ 118 124 int bufsize; /* total buffer size */
+6 -9
fs/xfs/libxfs/xfs_attr_leaf.c
··· 10 10 #include "xfs_format.h" 11 11 #include "xfs_log_format.h" 12 12 #include "xfs_trans_resv.h" 13 - #include "xfs_bit.h" 14 13 #include "xfs_sb.h" 15 14 #include "xfs_mount.h" 16 15 #include "xfs_da_format.h" 17 16 #include "xfs_da_btree.h" 18 17 #include "xfs_inode.h" 19 18 #include "xfs_trans.h" 20 - #include "xfs_inode_item.h" 21 19 #include "xfs_bmap_btree.h" 22 20 #include "xfs_bmap.h" 23 21 #include "xfs_attr_sf.h" ··· 25 27 #include "xfs_error.h" 26 28 #include "xfs_trace.h" 27 29 #include "xfs_buf_item.h" 28 - #include "xfs_cksum.h" 29 30 #include "xfs_dir2.h" 30 31 #include "xfs_log.h" 31 32 ··· 237 240 struct xfs_buf *bp) 238 241 { 239 242 struct xfs_attr3_icleaf_hdr ichdr; 240 - struct xfs_mount *mp = bp->b_target->bt_mount; 243 + struct xfs_mount *mp = bp->b_mount; 241 244 struct xfs_attr_leafblock *leaf = bp->b_addr; 242 245 struct xfs_attr_leaf_entry *entries; 243 246 uint32_t end; /* must be 32bit - see below */ ··· 310 313 xfs_attr3_leaf_write_verify( 311 314 struct xfs_buf *bp) 312 315 { 313 - struct xfs_mount *mp = bp->b_target->bt_mount; 316 + struct xfs_mount *mp = bp->b_mount; 314 317 struct xfs_buf_log_item *bip = bp->b_log_item; 315 318 struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; 316 319 xfs_failaddr_t fa; ··· 340 343 xfs_attr3_leaf_read_verify( 341 344 struct xfs_buf *bp) 342 345 { 343 - struct xfs_mount *mp = bp->b_target->bt_mount; 346 + struct xfs_mount *mp = bp->b_mount; 344 347 xfs_failaddr_t fa; 345 348 346 349 if (xfs_sb_version_hascrc(&mp->m_sb) && ··· 862 865 struct xfs_attr3_icleaf_hdr leafhdr; 863 866 int bytes; 864 867 int i; 865 - struct xfs_mount *mp = bp->b_target->bt_mount; 868 + struct xfs_mount *mp = bp->b_mount; 866 869 867 870 leaf = bp->b_addr; 868 871 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); ··· 1522 1525 { 1523 1526 struct xfs_attr3_icleaf_hdr ichdr1; 1524 1527 struct xfs_attr3_icleaf_hdr ichdr2; 1525 - struct xfs_mount *mp = leaf1_bp->b_target->bt_mount; 1528 + struct xfs_mount *mp = leaf1_bp->b_mount; 1526 1529 1527 1530 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr1, leaf1_bp->b_addr); 1528 1531 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr2, leaf2_bp->b_addr); ··· 2565 2568 { 2566 2569 struct xfs_attr3_icleaf_hdr ichdr; 2567 2570 struct xfs_attr_leaf_entry *entries; 2568 - struct xfs_mount *mp = bp->b_target->bt_mount; 2571 + struct xfs_mount *mp = bp->b_mount; 2569 2572 2570 2573 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, bp->b_addr); 2571 2574 entries = xfs_attr3_leaf_entryp(bp->b_addr);
+3 -11
fs/xfs/libxfs/xfs_attr_remote.c
··· 16 16 #include "xfs_da_format.h" 17 17 #include "xfs_da_btree.h" 18 18 #include "xfs_inode.h" 19 - #include "xfs_alloc.h" 20 19 #include "xfs_trans.h" 21 - #include "xfs_inode_item.h" 22 20 #include "xfs_bmap.h" 23 - #include "xfs_bmap_util.h" 24 21 #include "xfs_attr.h" 25 - #include "xfs_attr_leaf.h" 26 - #include "xfs_attr_remote.h" 27 - #include "xfs_trans_space.h" 28 22 #include "xfs_trace.h" 29 - #include "xfs_cksum.h" 30 - #include "xfs_buf_item.h" 31 23 #include "xfs_error.h" 32 24 33 25 #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ ··· 103 111 bool check_crc, 104 112 xfs_failaddr_t *failaddr) 105 113 { 106 - struct xfs_mount *mp = bp->b_target->bt_mount; 114 + struct xfs_mount *mp = bp->b_mount; 107 115 char *ptr; 108 116 int len; 109 117 xfs_daddr_t bno; ··· 167 175 xfs_attr3_rmt_write_verify( 168 176 struct xfs_buf *bp) 169 177 { 170 - struct xfs_mount *mp = bp->b_target->bt_mount; 178 + struct xfs_mount *mp = bp->b_mount; 171 179 xfs_failaddr_t fa; 172 180 int blksize = mp->m_attr_geo->blksize; 173 181 char *ptr; ··· 527 535 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 528 536 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 529 537 530 - bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0); 538 + bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt); 531 539 if (!bp) 532 540 return -ENOMEM; 533 541 bp->b_ops = &xfs_attr3_rmt_buf_ops;
-1
fs/xfs/libxfs/xfs_bit.c
··· 5 5 */ 6 6 #include "xfs.h" 7 7 #include "xfs_log_format.h" 8 - #include "xfs_bit.h" 9 8 10 9 /* 11 10 * XFS bit manipulation routines, used in non-realtime code.
+7 -12
fs/xfs/libxfs/xfs_bmap.c
··· 13 13 #include "xfs_sb.h" 14 14 #include "xfs_mount.h" 15 15 #include "xfs_defer.h" 16 - #include "xfs_da_format.h" 17 - #include "xfs_da_btree.h" 18 16 #include "xfs_dir2.h" 19 17 #include "xfs_inode.h" 20 18 #include "xfs_btree.h" 21 19 #include "xfs_trans.h" 22 - #include "xfs_inode_item.h" 23 - #include "xfs_extfree_item.h" 24 20 #include "xfs_alloc.h" 25 21 #include "xfs_bmap.h" 26 22 #include "xfs_bmap_util.h" ··· 28 32 #include "xfs_trans_space.h" 29 33 #include "xfs_buf_item.h" 30 34 #include "xfs_trace.h" 31 - #include "xfs_symlink.h" 32 35 #include "xfs_attr_leaf.h" 33 36 #include "xfs_filestream.h" 34 37 #include "xfs_rmap.h" ··· 365 370 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); 366 371 if (!bp) { 367 372 bp_release = 1; 368 - error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, 373 + error = xfs_btree_read_bufl(mp, NULL, bno, &bp, 369 374 XFS_BMAP_BTREE_REF, 370 375 &xfs_bmbt_buf_ops); 371 376 if (error) ··· 449 454 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); 450 455 if (!bp) { 451 456 bp_release = 1; 452 - error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, 457 + error = xfs_btree_read_bufl(mp, NULL, bno, &bp, 453 458 XFS_BMAP_BTREE_REF, 454 459 &xfs_bmbt_buf_ops); 455 460 if (error) ··· 614 619 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 615 620 xfs_btree_check_lptr(cur, cbno, 1)); 616 621 #endif 617 - error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF, 622 + error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF, 618 623 &xfs_bmbt_buf_ops); 619 624 if (error) 620 625 return error; ··· 727 732 cur->bc_private.b.allocated++; 728 733 ip->i_d.di_nblocks++; 729 734 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); 730 - abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); 735 + abp = xfs_btree_get_bufl(mp, tp, args.fsbno); 731 736 if (!abp) { 732 737 error = -EFSCORRUPTED; 733 738 goto out_unreserve_dquot; ··· 873 878 ASSERT(args.fsbno != NULLFSBLOCK); 874 879 ASSERT(args.len == 1); 875 880 tp->t_firstblock = args.fsbno; 876 - bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); 881 + bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno); 877 882 878 883 /* 879 884 * Initialize the block, copy the data and log the remote buffer. ··· 1198 1203 * pointer (leftmost) at each level. 1199 1204 */ 1200 1205 while (level-- > 0) { 1201 - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 1206 + error = xfs_btree_read_bufl(mp, tp, bno, &bp, 1202 1207 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); 1203 1208 if (error) 1204 1209 goto out; ··· 1271 1276 */ 1272 1277 if (bno == NULLFSBLOCK) 1273 1278 break; 1274 - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 1279 + error = xfs_btree_read_bufl(mp, tp, bno, &bp, 1275 1280 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); 1276 1281 if (error) 1277 1282 goto out;
+1 -4
fs/xfs/libxfs/xfs_bmap_btree.c
··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_bit.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_defer.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_trans.h" 17 - #include "xfs_inode_item.h" 18 16 #include "xfs_alloc.h" 19 17 #include "xfs_btree.h" 20 18 #include "xfs_bmap_btree.h" ··· 20 22 #include "xfs_error.h" 21 23 #include "xfs_quota.h" 22 24 #include "xfs_trace.h" 23 - #include "xfs_cksum.h" 24 25 #include "xfs_rmap.h" 25 26 26 27 /* ··· 408 411 xfs_bmbt_verify( 409 412 struct xfs_buf *bp) 410 413 { 411 - struct xfs_mount *mp = bp->b_target->bt_mount; 414 + struct xfs_mount *mp = bp->b_mount; 412 415 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 413 416 xfs_failaddr_t fa; 414 417 unsigned int level;
+19 -30
fs/xfs/libxfs/xfs_btree.c
··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_bit.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_defer.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_trans.h" 17 - #include "xfs_inode_item.h" 18 16 #include "xfs_buf_item.h" 19 17 #include "xfs_btree.h" 20 18 #include "xfs_errortag.h" 21 19 #include "xfs_error.h" 22 20 #include "xfs_trace.h" 23 - #include "xfs_cksum.h" 24 21 #include "xfs_alloc.h" 25 22 #include "xfs_log.h" 26 23 ··· 273 276 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 274 277 struct xfs_buf_log_item *bip = bp->b_log_item; 275 278 276 - if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 279 + if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb)) 277 280 return; 278 281 if (bip) 279 282 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); ··· 285 288 struct xfs_buf *bp) 286 289 { 287 290 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 288 - struct xfs_mount *mp = bp->b_target->bt_mount; 291 + struct xfs_mount *mp = bp->b_mount; 289 292 290 293 if (xfs_sb_version_hascrc(&mp->m_sb)) { 291 294 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) ··· 311 314 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 312 315 struct xfs_buf_log_item *bip = bp->b_log_item; 313 316 314 - if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 317 + if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb)) 315 318 return; 316 319 if (bip) 317 320 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); ··· 323 326 struct xfs_buf *bp) 324 327 { 325 328 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 326 - struct xfs_mount *mp = bp->b_target->bt_mount; 329 + struct xfs_mount *mp = bp->b_mount; 327 330 328 331 if (xfs_sb_version_hascrc(&mp->m_sb)) { 329 332 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) ··· 688 691 xfs_btree_get_bufl( 689 692 xfs_mount_t *mp, /* file system mount point */ 690 693 xfs_trans_t *tp, /* transaction pointer */ 691 - xfs_fsblock_t fsbno, /* file system block number */ 692 - uint lock) /* lock flags for get_buf */ 694 + xfs_fsblock_t fsbno) /* file system block number */ 693 695 { 694 696 xfs_daddr_t d; /* real disk block address */ 695 697 696 698 ASSERT(fsbno != NULLFSBLOCK); 697 699 d = XFS_FSB_TO_DADDR(mp, fsbno); 698 - return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); 700 + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0); 699 701 } 700 702 701 703 /* ··· 706 710 xfs_mount_t *mp, /* file system mount point */ 707 711 xfs_trans_t *tp, /* transaction pointer */ 708 712 xfs_agnumber_t agno, /* allocation group number */ 709 - xfs_agblock_t agbno, /* allocation group block number */ 710 - uint lock) /* lock flags for get_buf */ 713 + xfs_agblock_t agbno) /* allocation group block number */ 711 714 { 712 715 xfs_daddr_t d; /* real disk block address */ 713 716 714 717 ASSERT(agno != NULLAGNUMBER); 715 718 ASSERT(agbno != NULLAGBLOCK); 716 719 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 717 - return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); 720 + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0); 718 721 } 719 722 720 723 /* ··· 840 845 struct xfs_mount *mp, /* file system mount point */ 841 846 struct xfs_trans *tp, /* transaction pointer */ 842 847 xfs_fsblock_t fsbno, /* file system block number */ 843 - uint lock, /* lock flags for read_buf */ 844 848 struct xfs_buf **bpp, /* buffer for fsbno */ 845 849 int refval, /* ref count value for buffer */ 846 850 const struct xfs_buf_ops *ops) ··· 852 858 return -EFSCORRUPTED; 853 859 d = XFS_FSB_TO_DADDR(mp, fsbno); 854 860 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 855 - mp->m_bsize, lock, &bp, ops); 861 + mp->m_bsize, 0, &bp, ops); 856 862 if (error) 857 863 return error; 858 864 if (bp) ··· 1179 1185 xfs_btnum_t btnum, 1180 1186 __u16 level, 1181 1187 __u16 numrecs, 1182 - __u64 owner, 1183 - unsigned int flags) 1188 + __u64 owner) 1184 1189 { 1185 1190 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, 1186 - btnum, level, numrecs, owner, flags); 1191 + btnum, level, numrecs, owner, 0); 1187 1192 } 1188 1193 1189 1194 STATIC void ··· 1281 1288 xfs_btree_get_buf_block( 1282 1289 struct xfs_btree_cur *cur, 1283 1290 union xfs_btree_ptr *ptr, 1284 - int flags, 1285 1291 struct xfs_btree_block **block, 1286 1292 struct xfs_buf **bpp) 1287 1293 { ··· 1288 1296 xfs_daddr_t d; 1289 1297 int error; 1290 1298 1291 - /* need to sort out how callers deal with failures first */ 1292 - ASSERT(!(flags & XBF_TRYLOCK)); 1293 - 1294 1299 error = xfs_btree_ptr_to_daddr(cur, ptr, &d); 1295 1300 if (error) 1296 1301 return error; 1297 1302 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, 1298 - mp->m_bsize, flags); 1303 + mp->m_bsize, 0); 1299 1304 1300 1305 if (!*bpp) 1301 1306 return -ENOMEM; ··· 2695 2706 XFS_BTREE_STATS_INC(cur, alloc); 2696 2707 2697 2708 /* Set up the new block as "right". */ 2698 - error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp); 2709 + error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp); 2699 2710 if (error) 2700 2711 goto error0; 2701 2712 ··· 2950 2961 XFS_BTREE_STATS_INC(cur, alloc); 2951 2962 2952 2963 /* Copy the root into a real block. */ 2953 - error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp); 2964 + error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp); 2954 2965 if (error) 2955 2966 goto error0; 2956 2967 ··· 3047 3058 XFS_BTREE_STATS_INC(cur, alloc); 3048 3059 3049 3060 /* Set up the new block. */ 3050 - error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp); 3061 + error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp); 3051 3062 if (error) 3052 3063 goto error0; 3053 3064 ··· 4422 4433 struct xfs_buf *bp, 4423 4434 uint64_t owner) 4424 4435 { 4425 - struct xfs_mount *mp = bp->b_target->bt_mount; 4436 + struct xfs_mount *mp = bp->b_mount; 4426 4437 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4427 4438 4428 4439 if (!xfs_sb_version_hascrc(&mp->m_sb)) ··· 4443 4454 struct xfs_buf *bp, 4444 4455 unsigned int max_recs) 4445 4456 { 4446 - struct xfs_mount *mp = bp->b_target->bt_mount; 4457 + struct xfs_mount *mp = bp->b_mount; 4447 4458 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4448 4459 4449 4460 /* numrecs verification */ ··· 4473 4484 xfs_btree_sblock_v5hdr_verify( 4474 4485 struct xfs_buf *bp) 4475 4486 { 4476 - struct xfs_mount *mp = bp->b_target->bt_mount; 4487 + struct xfs_mount *mp = bp->b_mount; 4477 4488 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4478 4489 struct xfs_perag *pag = bp->b_pag; 4479 4490 ··· 4499 4510 struct xfs_buf *bp, 4500 4511 unsigned int max_recs) 4501 4512 { 4502 - struct xfs_mount *mp = bp->b_target->bt_mount; 4513 + struct xfs_mount *mp = bp->b_mount; 4503 4514 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4504 4515 xfs_agblock_t agno; 4505 4516
+5 -9
fs/xfs/libxfs/xfs_btree.h
··· 301 301 xfs_btree_get_bufl( 302 302 struct xfs_mount *mp, /* file system mount point */ 303 303 struct xfs_trans *tp, /* transaction pointer */ 304 - xfs_fsblock_t fsbno, /* file system block number */ 305 - uint lock); /* lock flags for get_buf */ 304 + xfs_fsblock_t fsbno); /* file system block number */ 306 305 307 306 /* 308 307 * Get a buffer for the block, return it with no data read. ··· 312 313 struct xfs_mount *mp, /* file system mount point */ 313 314 struct xfs_trans *tp, /* transaction pointer */ 314 315 xfs_agnumber_t agno, /* allocation group number */ 315 - xfs_agblock_t agbno, /* allocation group block number */ 316 - uint lock); /* lock flags for get_buf */ 316 + xfs_agblock_t agbno); /* allocation group block number */ 317 317 318 318 /* 319 319 * Check for the cursor referring to the last block at the given level. ··· 343 345 struct xfs_mount *mp, /* file system mount point */ 344 346 struct xfs_trans *tp, /* transaction pointer */ 345 347 xfs_fsblock_t fsbno, /* file system block number */ 346 - uint lock, /* lock flags for read_buf */ 347 348 struct xfs_buf **bpp, /* buffer for fsbno */ 348 349 int refval, /* ref count value for buffer */ 349 350 const struct xfs_buf_ops *ops); ··· 380 383 xfs_btnum_t btnum, 381 384 __u16 level, 382 385 __u16 numrecs, 383 - __u64 owner, 384 - unsigned int flags); 386 + __u64 owner); 385 387 386 388 void 387 389 xfs_btree_init_block_int( ··· 465 469 unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len); 466 470 467 471 /* return codes */ 468 - #define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */ 469 - #define XFS_BTREE_QUERY_RANGE_ABORT 1 /* stop iterating */ 472 + #define XFS_BTREE_QUERY_RANGE_CONTINUE (XFS_ITER_CONTINUE) /* keep iterating */ 473 + #define XFS_BTREE_QUERY_RANGE_ABORT (XFS_ITER_ABORT) /* stop iterating */ 470 474 typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur, 471 475 union xfs_btree_rec *rec, void *priv); 472 476
+3 -9
fs/xfs/libxfs/xfs_da_btree.c
··· 12 12 #include "xfs_trans_resv.h" 13 13 #include "xfs_bit.h" 14 14 #include "xfs_mount.h" 15 - #include "xfs_da_format.h" 16 - #include "xfs_da_btree.h" 17 15 #include "xfs_dir2.h" 18 16 #include "xfs_dir2_priv.h" 19 17 #include "xfs_inode.h" 20 18 #include "xfs_trans.h" 21 - #include "xfs_inode_item.h" 22 - #include "xfs_alloc.h" 23 19 #include "xfs_bmap.h" 24 - #include "xfs_attr.h" 25 20 #include "xfs_attr_leaf.h" 26 21 #include "xfs_error.h" 27 22 #include "xfs_trace.h" 28 - #include "xfs_cksum.h" 29 23 #include "xfs_buf_item.h" 30 24 #include "xfs_log.h" 31 25 ··· 120 126 struct xfs_buf *bp, 121 127 struct xfs_da3_blkinfo *hdr3) 122 128 { 123 - struct xfs_mount *mp = bp->b_target->bt_mount; 129 + struct xfs_mount *mp = bp->b_mount; 124 130 struct xfs_da_blkinfo *hdr = &hdr3->hdr; 125 131 126 132 if (!xfs_verify_magic16(bp, hdr->magic)) ··· 142 148 xfs_da3_node_verify( 143 149 struct xfs_buf *bp) 144 150 { 145 - struct xfs_mount *mp = bp->b_target->bt_mount; 151 + struct xfs_mount *mp = bp->b_mount; 146 152 struct xfs_da_intnode *hdr = bp->b_addr; 147 153 struct xfs_da3_icnode_hdr ichdr; 148 154 const struct xfs_dir_ops *ops; ··· 180 186 xfs_da3_node_write_verify( 181 187 struct xfs_buf *bp) 182 188 { 183 - struct xfs_mount *mp = bp->b_target->bt_mount; 189 + struct xfs_mount *mp = bp->b_mount; 184 190 struct xfs_buf_log_item *bip = bp->b_log_item; 185 191 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 186 192 xfs_failaddr_t fa;
-3
fs/xfs/libxfs/xfs_da_format.c
··· 11 11 #include "xfs_log_format.h" 12 12 #include "xfs_trans_resv.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_da_format.h" 15 - #include "xfs_da_btree.h" 16 14 #include "xfs_inode.h" 17 15 #include "xfs_dir2.h" 18 - #include "xfs_dir2_priv.h" 19 16 20 17 /* 21 18 * Shortform directory ops
-2
fs/xfs/libxfs/xfs_defer.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 - #include "xfs_bit.h" 13 - #include "xfs_sb.h" 14 12 #include "xfs_mount.h" 15 13 #include "xfs_defer.h" 16 14 #include "xfs_trans.h"
+1 -5
fs/xfs/libxfs/xfs_dir2.c
··· 5 5 */ 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 11 #include "xfs_trans_resv.h" 11 12 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_da_format.h" 14 - #include "xfs_da_btree.h" 15 13 #include "xfs_inode.h" 16 14 #include "xfs_trans.h" 17 - #include "xfs_inode_item.h" 18 15 #include "xfs_bmap.h" 19 16 #include "xfs_dir2.h" 20 17 #include "xfs_dir2_priv.h" 21 - #include "xfs_ialloc.h" 22 18 #include "xfs_errortag.h" 23 19 #include "xfs_error.h" 24 20 #include "xfs_trace.h"
+4 -7
fs/xfs/libxfs/xfs_dir2_block.c
··· 6 6 */ 7 7 #include "xfs.h" 8 8 #include "xfs_fs.h" 9 + #include "xfs_shared.h" 9 10 #include "xfs_format.h" 10 11 #include "xfs_log_format.h" 11 12 #include "xfs_trans_resv.h" 12 13 #include "xfs_mount.h" 13 - #include "xfs_da_format.h" 14 - #include "xfs_da_btree.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_trans.h" 17 - #include "xfs_inode_item.h" 18 16 #include "xfs_bmap.h" 19 17 #include "xfs_buf_item.h" 20 18 #include "xfs_dir2.h" 21 19 #include "xfs_dir2_priv.h" 22 20 #include "xfs_error.h" 23 21 #include "xfs_trace.h" 24 - #include "xfs_cksum.h" 25 22 #include "xfs_log.h" 26 23 27 24 /* ··· 47 50 xfs_dir3_block_verify( 48 51 struct xfs_buf *bp) 49 52 { 50 - struct xfs_mount *mp = bp->b_target->bt_mount; 53 + struct xfs_mount *mp = bp->b_mount; 51 54 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 52 55 53 56 if (!xfs_verify_magic(bp, hdr3->magic)) ··· 68 71 xfs_dir3_block_read_verify( 69 72 struct xfs_buf *bp) 70 73 { 71 - struct xfs_mount *mp = bp->b_target->bt_mount; 74 + struct xfs_mount *mp = bp->b_mount; 72 75 xfs_failaddr_t fa; 73 76 74 77 if (xfs_sb_version_hascrc(&mp->m_sb) && ··· 85 88 xfs_dir3_block_write_verify( 86 89 struct xfs_buf *bp) 87 90 { 88 - struct xfs_mount *mp = bp->b_target->bt_mount; 91 + struct xfs_mount *mp = bp->b_mount; 89 92 struct xfs_buf_log_item *bip = bp->b_log_item; 90 93 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 91 94 xfs_failaddr_t fa;
+5 -9
fs/xfs/libxfs/xfs_dir2_data.c
··· 6 6 */ 7 7 #include "xfs.h" 8 8 #include "xfs_fs.h" 9 + #include "xfs_shared.h" 9 10 #include "xfs_format.h" 10 11 #include "xfs_log_format.h" 11 12 #include "xfs_trans_resv.h" 12 13 #include "xfs_mount.h" 13 - #include "xfs_da_format.h" 14 - #include "xfs_da_btree.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_dir2.h" 17 - #include "xfs_dir2_priv.h" 18 16 #include "xfs_error.h" 19 17 #include "xfs_trans.h" 20 18 #include "xfs_buf_item.h" 21 - #include "xfs_cksum.h" 22 19 #include "xfs_log.h" 23 20 24 21 static xfs_failaddr_t xfs_dir2_data_freefind_verify( ··· 47 50 int i; /* leaf index */ 48 51 int lastfree; /* last entry was unused */ 49 52 xfs_dir2_leaf_entry_t *lep=NULL; /* block leaf entries */ 50 - xfs_mount_t *mp; /* filesystem mount point */ 53 + struct xfs_mount *mp = bp->b_mount; 51 54 char *p; /* current data position */ 52 55 int stale; /* count of stale leaves */ 53 56 struct xfs_name name; 54 57 const struct xfs_dir_ops *ops; 55 58 struct xfs_da_geometry *geo; 56 59 57 - mp = bp->b_target->bt_mount; 58 60 geo = mp->m_dir_geo; 59 61 60 62 /* ··· 245 249 xfs_dir3_data_verify( 246 250 struct xfs_buf *bp) 247 251 { 248 - struct xfs_mount *mp = bp->b_target->bt_mount; 252 + struct xfs_mount *mp = bp->b_mount; 249 253 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 250 254 251 255 if (!xfs_verify_magic(bp, hdr3->magic)) ··· 294 298 xfs_dir3_data_read_verify( 295 299 struct xfs_buf *bp) 296 300 { 297 - struct xfs_mount *mp = bp->b_target->bt_mount; 301 + struct xfs_mount *mp = bp->b_mount; 298 302 xfs_failaddr_t fa; 299 303 300 304 if (xfs_sb_version_hascrc(&mp->m_sb) && ··· 311 315 xfs_dir3_data_write_verify( 312 316 struct xfs_buf *bp) 313 317 { 314 - struct xfs_mount *mp = bp->b_target->bt_mount; 318 + struct xfs_mount *mp = bp->b_mount; 315 319 struct xfs_buf_log_item *bip = bp->b_log_item; 316 320 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 317 321 xfs_failaddr_t fa;
+4 -7
fs/xfs/libxfs/xfs_dir2_leaf.c
··· 6 6 */ 7 7 #include "xfs.h" 8 8 #include "xfs_fs.h" 9 + #include "xfs_shared.h" 9 10 #include "xfs_format.h" 10 11 #include "xfs_log_format.h" 11 12 #include "xfs_trans_resv.h" 12 13 #include "xfs_mount.h" 13 - #include "xfs_da_format.h" 14 - #include "xfs_da_btree.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_bmap.h" 17 16 #include "xfs_dir2.h" ··· 19 20 #include "xfs_trace.h" 20 21 #include "xfs_trans.h" 21 22 #include "xfs_buf_item.h" 22 - #include "xfs_cksum.h" 23 - #include "xfs_log.h" 24 23 25 24 /* 26 25 * Local function declarations. ··· 141 144 xfs_dir3_leaf_verify( 142 145 struct xfs_buf *bp) 143 146 { 144 - struct xfs_mount *mp = bp->b_target->bt_mount; 147 + struct xfs_mount *mp = bp->b_mount; 145 148 struct xfs_dir2_leaf *leaf = bp->b_addr; 146 149 xfs_failaddr_t fa; 147 150 ··· 156 159 xfs_dir3_leaf_read_verify( 157 160 struct xfs_buf *bp) 158 161 { 159 - struct xfs_mount *mp = bp->b_target->bt_mount; 162 + struct xfs_mount *mp = bp->b_mount; 160 163 xfs_failaddr_t fa; 161 164 162 165 if (xfs_sb_version_hascrc(&mp->m_sb) && ··· 173 176 xfs_dir3_leaf_write_verify( 174 177 struct xfs_buf *bp) 175 178 { 176 - struct xfs_mount *mp = bp->b_target->bt_mount; 179 + struct xfs_mount *mp = bp->b_mount; 177 180 struct xfs_buf_log_item *bip = bp->b_log_item; 178 181 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; 179 182 xfs_failaddr_t fa;
+4 -6
fs/xfs/libxfs/xfs_dir2_node.c
··· 6 6 */ 7 7 #include "xfs.h" 8 8 #include "xfs_fs.h" 9 + #include "xfs_shared.h" 9 10 #include "xfs_format.h" 10 11 #include "xfs_log_format.h" 11 12 #include "xfs_trans_resv.h" 12 13 #include "xfs_mount.h" 13 - #include "xfs_da_format.h" 14 - #include "xfs_da_btree.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_bmap.h" 17 16 #include "xfs_dir2.h" ··· 19 20 #include "xfs_trace.h" 20 21 #include "xfs_trans.h" 21 22 #include "xfs_buf_item.h" 22 - #include "xfs_cksum.h" 23 23 #include "xfs_log.h" 24 24 25 25 /* ··· 82 84 xfs_dir3_free_verify( 83 85 struct xfs_buf *bp) 84 86 { 85 - struct xfs_mount *mp = bp->b_target->bt_mount; 87 + struct xfs_mount *mp = bp->b_mount; 86 88 struct xfs_dir2_free_hdr *hdr = bp->b_addr; 87 89 88 90 if (!xfs_verify_magic(bp, hdr->magic)) ··· 108 110 xfs_dir3_free_read_verify( 109 111 struct xfs_buf *bp) 110 112 { 111 - struct xfs_mount *mp = bp->b_target->bt_mount; 113 + struct xfs_mount *mp = bp->b_mount; 112 114 xfs_failaddr_t fa; 113 115 114 116 if (xfs_sb_version_hascrc(&mp->m_sb) && ··· 125 127 xfs_dir3_free_write_verify( 126 128 struct xfs_buf *bp) 127 129 { 128 - struct xfs_mount *mp = bp->b_target->bt_mount; 130 + struct xfs_mount *mp = bp->b_mount; 129 131 struct xfs_buf_log_item *bip = bp->b_log_item; 130 132 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 131 133 xfs_failaddr_t fa;
+1 -4
fs/xfs/libxfs/xfs_dir2_sf.c
··· 5 5 */ 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 11 #include "xfs_trans_resv.h" 11 12 #include "xfs_mount.h" 12 - #include "xfs_da_format.h" 13 - #include "xfs_da_btree.h" 14 13 #include "xfs_inode.h" 15 14 #include "xfs_trans.h" 16 - #include "xfs_inode_item.h" 17 - #include "xfs_error.h" 18 15 #include "xfs_dir2.h" 19 16 #include "xfs_dir2_priv.h" 20 17 #include "xfs_trace.h"
+4 -6
fs/xfs/libxfs/xfs_dquot_buf.c
··· 16 16 #include "xfs_trans.h" 17 17 #include "xfs_qm.h" 18 18 #include "xfs_error.h" 19 - #include "xfs_cksum.h" 20 - #include "xfs_trace.h" 21 19 22 20 int 23 21 xfs_calc_dquots_per_chunk( ··· 222 224 xfs_dquot_buf_verify_struct( 223 225 struct xfs_buf *bp) 224 226 { 225 - struct xfs_mount *mp = bp->b_target->bt_mount; 227 + struct xfs_mount *mp = bp->b_mount; 226 228 227 229 return xfs_dquot_buf_verify(mp, bp, false); 228 230 } ··· 231 233 xfs_dquot_buf_read_verify( 232 234 struct xfs_buf *bp) 233 235 { 234 - struct xfs_mount *mp = bp->b_target->bt_mount; 236 + struct xfs_mount *mp = bp->b_mount; 235 237 236 238 if (!xfs_dquot_buf_verify_crc(mp, bp, false)) 237 239 return; ··· 248 250 xfs_dquot_buf_readahead_verify( 249 251 struct xfs_buf *bp) 250 252 { 251 - struct xfs_mount *mp = bp->b_target->bt_mount; 253 + struct xfs_mount *mp = bp->b_mount; 252 254 253 255 if (!xfs_dquot_buf_verify_crc(mp, bp, true) || 254 256 xfs_dquot_buf_verify(mp, bp, true) != NULL) { ··· 266 268 xfs_dquot_buf_write_verify( 267 269 struct xfs_buf *bp) 268 270 { 269 - struct xfs_mount *mp = bp->b_target->bt_mount; 271 + struct xfs_mount *mp = bp->b_mount; 270 272 271 273 xfs_dquot_buf_verify(mp, bp, false); 272 274 }
+1 -1
fs/xfs/libxfs/xfs_format.h
··· 1071 1071 #define XFS_INO_MASK(k) (uint32_t)((1ULL << (k)) - 1) 1072 1072 #define XFS_INO_OFFSET_BITS(mp) (mp)->m_sb.sb_inopblog 1073 1073 #define XFS_INO_AGBNO_BITS(mp) (mp)->m_sb.sb_agblklog 1074 - #define XFS_INO_AGINO_BITS(mp) (mp)->m_agino_log 1074 + #define XFS_INO_AGINO_BITS(mp) ((mp)->m_ino_geo.agino_log) 1075 1075 #define XFS_INO_AGNO_BITS(mp) (mp)->m_agno_log 1076 1076 #define XFS_INO_BITS(mp) \ 1077 1077 XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp)
+114 -10
fs/xfs/libxfs/xfs_fs.h
··· 97 97 * For use by backup and restore programs to set the XFS on-disk inode 98 98 * fields di_dmevmask and di_dmstate. These must be set to exactly and 99 99 * only values previously obtained via xfs_bulkstat! (Specifically the 100 - * xfs_bstat_t fields bs_dmevmask and bs_dmstate.) 100 + * struct xfs_bstat fields bs_dmevmask and bs_dmstate.) 101 101 */ 102 102 #ifndef HAVE_FSDMIDATA 103 103 struct fsdmidata { ··· 328 328 __s32 tv_nsec; /* and nanoseconds */ 329 329 } xfs_bstime_t; 330 330 331 - typedef struct xfs_bstat { 331 + struct xfs_bstat { 332 332 __u64 bs_ino; /* inode number */ 333 333 __u16 bs_mode; /* type and mode */ 334 334 __u16 bs_nlink; /* number of links */ ··· 356 356 __u32 bs_dmevmask; /* DMIG event mask */ 357 357 __u16 bs_dmstate; /* DMIG state info */ 358 358 __u16 bs_aextents; /* attribute number of extents */ 359 - } xfs_bstat_t; 359 + }; 360 + 361 + /* New bulkstat structure that reports v5 features and fixes padding issues */ 362 + struct xfs_bulkstat { 363 + uint64_t bs_ino; /* inode number */ 364 + uint64_t bs_size; /* file size */ 365 + 366 + uint64_t bs_blocks; /* number of blocks */ 367 + uint64_t bs_xflags; /* extended flags */ 368 + 369 + uint64_t bs_atime; /* access time, seconds */ 370 + uint64_t bs_mtime; /* modify time, seconds */ 371 + 372 + uint64_t bs_ctime; /* inode change time, seconds */ 373 + uint64_t bs_btime; /* creation time, seconds */ 374 + 375 + uint32_t bs_gen; /* generation count */ 376 + uint32_t bs_uid; /* user id */ 377 + uint32_t bs_gid; /* group id */ 378 + uint32_t bs_projectid; /* project id */ 379 + 380 + uint32_t bs_atime_nsec; /* access time, nanoseconds */ 381 + uint32_t bs_mtime_nsec; /* modify time, nanoseconds */ 382 + uint32_t bs_ctime_nsec; /* inode change time, nanoseconds */ 383 + uint32_t bs_btime_nsec; /* creation time, nanoseconds */ 384 + 385 + uint32_t bs_blksize; /* block size */ 386 + uint32_t bs_rdev; /* device value */ 387 + uint32_t bs_cowextsize_blks; /* cow extent size hint, blocks */ 388 + uint32_t bs_extsize_blks; /* extent size hint, blocks */ 389 + 390 + uint32_t bs_nlink; /* number of links */ 391 + uint32_t bs_extents; /* number of extents */ 392 + uint32_t bs_aextents; /* attribute number of extents */ 393 + uint16_t bs_version; /* structure version */ 394 + uint16_t bs_forkoff; /* inode fork offset in bytes */ 395 + 396 + uint16_t bs_sick; /* sick inode metadata */ 397 + uint16_t bs_checked; /* checked inode metadata */ 398 + uint16_t bs_mode; /* type and mode */ 399 + uint16_t bs_pad2; /* zeroed */ 400 + 401 + uint64_t bs_pad[7]; /* zeroed */ 402 + }; 403 + 404 + #define XFS_BULKSTAT_VERSION_V1 (1) 405 + #define XFS_BULKSTAT_VERSION_V5 (5) 360 406 361 407 /* bs_sick flags */ 362 408 #define XFS_BS_SICK_INODE (1 << 0) /* inode core */ ··· 420 374 * to retain compatibility with "old" filesystems). 421 375 */ 422 376 static inline uint32_t 423 - bstat_get_projid(struct xfs_bstat *bs) 377 + bstat_get_projid(const struct xfs_bstat *bs) 424 378 { 425 379 return (uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo; 426 380 } ··· 428 382 /* 429 383 * The user-level BulkStat Request interface structure. 430 384 */ 431 - typedef struct xfs_fsop_bulkreq { 385 + struct xfs_fsop_bulkreq { 432 386 __u64 __user *lastip; /* last inode # pointer */ 433 387 __s32 icount; /* count of entries in buffer */ 434 388 void __user *ubuffer;/* user buffer for inode desc. */ 435 389 __s32 __user *ocount; /* output count pointer */ 436 - } xfs_fsop_bulkreq_t; 437 - 390 + }; 438 391 439 392 /* 440 393 * Structures returned from xfs_inumbers routine (XFS_IOC_FSINUMBERS). 441 394 */ 442 - typedef struct xfs_inogrp { 395 + struct xfs_inogrp { 443 396 __u64 xi_startino; /* starting inode number */ 444 397 __s32 xi_alloccount; /* # bits set in allocmask */ 445 398 __u64 xi_allocmask; /* mask of allocated inodes */ 446 - } xfs_inogrp_t; 399 + }; 447 400 401 + /* New inumbers structure that reports v5 features and fixes padding issues */ 402 + struct xfs_inumbers { 403 + uint64_t xi_startino; /* starting inode number */ 404 + uint64_t xi_allocmask; /* mask of allocated inodes */ 405 + uint8_t xi_alloccount; /* # bits set in allocmask */ 406 + uint8_t xi_version; /* version */ 407 + uint8_t xi_padding[6]; /* zero */ 408 + }; 409 + 410 + #define XFS_INUMBERS_VERSION_V1 (1) 411 + #define XFS_INUMBERS_VERSION_V5 (5) 412 + 413 + /* Header for bulk inode requests. */ 414 + struct xfs_bulk_ireq { 415 + uint64_t ino; /* I/O: start with this inode */ 416 + uint32_t flags; /* I/O: operation flags */ 417 + uint32_t icount; /* I: count of entries in buffer */ 418 + uint32_t ocount; /* O: count of entries filled out */ 419 + uint32_t agno; /* I: see comment for IREQ_AGNO */ 420 + uint64_t reserved[5]; /* must be zero */ 421 + }; 422 + 423 + /* 424 + * Only return results from the specified @agno. If @ino is zero, start 425 + * with the first inode of @agno. 426 + */ 427 + #define XFS_BULK_IREQ_AGNO (1 << 0) 428 + 429 + /* 430 + * Return bulkstat information for a single inode, where @ino value is a 431 + * special value, not a literal inode number. See the XFS_BULK_IREQ_SPECIAL_* 432 + * values below. Not compatible with XFS_BULK_IREQ_AGNO. 433 + */ 434 + #define XFS_BULK_IREQ_SPECIAL (1 << 1) 435 + 436 + #define XFS_BULK_IREQ_FLAGS_ALL (XFS_BULK_IREQ_AGNO | \ 437 + XFS_BULK_IREQ_SPECIAL) 438 + 439 + /* Operate on the root directory inode. */ 440 + #define XFS_BULK_IREQ_SPECIAL_ROOT (1) 441 + 442 + /* 443 + * ioctl structures for v5 bulkstat and inumbers requests 444 + */ 445 + struct xfs_bulkstat_req { 446 + struct xfs_bulk_ireq hdr; 447 + struct xfs_bulkstat bulkstat[]; 448 + }; 449 + #define XFS_BULKSTAT_REQ_SIZE(nr) (sizeof(struct xfs_bulkstat_req) + \ 450 + (nr) * sizeof(struct xfs_bulkstat)) 451 + 452 + struct xfs_inumbers_req { 453 + struct xfs_bulk_ireq hdr; 454 + struct xfs_inumbers inumbers[]; 455 + }; 456 + #define XFS_INUMBERS_REQ_SIZE(nr) (sizeof(struct xfs_inumbers_req) + \ 457 + (nr) * sizeof(struct xfs_inumbers)) 448 458 449 459 /* 450 460 * Error injection. ··· 631 529 xfs_off_t sx_offset; /* offset into file */ 632 530 xfs_off_t sx_length; /* leng from offset */ 633 531 char sx_pad[16]; /* pad space, unused */ 634 - xfs_bstat_t sx_stat; /* stat of target b4 copy */ 532 + struct xfs_bstat sx_stat; /* stat of target b4 copy */ 635 533 } xfs_swapext_t; 636 534 637 535 /* ··· 803 701 #define XFS_IOC_FSGEOMETRY_V4 _IOR ('X', 124, struct xfs_fsop_geom_v4) 804 702 #define XFS_IOC_GOINGDOWN _IOR ('X', 125, uint32_t) 805 703 #define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom) 704 + #define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req) 705 + #define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req) 806 706 /* XFS_IOC_GETFSUUID ---------- deprecated 140 */ 807 707 808 708
+1 -1
fs/xfs/libxfs/xfs_health.h
··· 185 185 186 186 void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo); 187 187 void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo); 188 - void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bstat *bs); 188 + void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs); 189 189 190 190 #endif /* __XFS_HEALTH_H__ */
+163 -82
fs/xfs/libxfs/xfs_ialloc.c
··· 12 12 #include "xfs_bit.h" 13 13 #include "xfs_sb.h" 14 14 #include "xfs_mount.h" 15 - #include "xfs_defer.h" 16 15 #include "xfs_inode.h" 17 16 #include "xfs_btree.h" 18 17 #include "xfs_ialloc.h" 19 18 #include "xfs_ialloc_btree.h" 20 19 #include "xfs_alloc.h" 21 - #include "xfs_rtalloc.h" 22 20 #include "xfs_errortag.h" 23 21 #include "xfs_error.h" 24 22 #include "xfs_bmap.h" 25 - #include "xfs_cksum.h" 26 23 #include "xfs_trans.h" 27 24 #include "xfs_buf_item.h" 28 25 #include "xfs_icreate_item.h" ··· 27 30 #include "xfs_trace.h" 28 31 #include "xfs_log.h" 29 32 #include "xfs_rmap.h" 30 - 31 - 32 - /* 33 - * Allocation group level functions. 34 - */ 35 - int 36 - xfs_ialloc_cluster_alignment( 37 - struct xfs_mount *mp) 38 - { 39 - if (xfs_sb_version_hasalign(&mp->m_sb) && 40 - mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) 41 - return mp->m_sb.sb_inoalignmt; 42 - return 1; 43 - } 44 33 45 34 /* 46 35 * Lookup a record by ino in the btree given by cur. ··· 282 299 * sizes, manipulate the inodes in buffers which are multiples of the 283 300 * blocks size. 284 301 */ 285 - nbufs = length / mp->m_blocks_per_cluster; 302 + nbufs = length / M_IGEO(mp)->blocks_per_cluster; 286 303 287 304 /* 288 305 * Figure out what version number to use in the inodes we create. If ··· 326 343 * Get the block. 327 344 */ 328 345 d = XFS_AGB_TO_DADDR(mp, agno, agbno + 329 - (j * mp->m_blocks_per_cluster)); 346 + (j * M_IGEO(mp)->blocks_per_cluster)); 330 347 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 331 - mp->m_bsize * mp->m_blocks_per_cluster, 348 + mp->m_bsize * 349 + M_IGEO(mp)->blocks_per_cluster, 332 350 XBF_UNMAPPED); 333 351 if (!fbuf) 334 352 return -ENOMEM; ··· 337 353 /* Initialize the inode buffers and log them appropriately. */ 338 354 fbuf->b_ops = &xfs_inode_buf_ops; 339 355 xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); 340 - for (i = 0; i < mp->m_inodes_per_cluster; i++) { 356 + for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) { 341 357 int ioffset = i << mp->m_sb.sb_inodelog; 342 358 uint isize = xfs_dinode_size(version); 343 359 ··· 600 616 * Allocate new inodes in the allocation group specified by agbp. 601 617 * Return 0 for success, else error code. 602 618 */ 603 - STATIC int /* error code or 0 */ 619 + STATIC int 604 620 xfs_ialloc_ag_alloc( 605 - xfs_trans_t *tp, /* transaction pointer */ 606 - xfs_buf_t *agbp, /* alloc group buffer */ 607 - int *alloc) 621 + struct xfs_trans *tp, 622 + struct xfs_buf *agbp, 623 + int *alloc) 608 624 { 609 - xfs_agi_t *agi; /* allocation group header */ 610 - xfs_alloc_arg_t args; /* allocation argument structure */ 611 - xfs_agnumber_t agno; 612 - int error; 613 - xfs_agino_t newino; /* new first inode's number */ 614 - xfs_agino_t newlen; /* new number of inodes */ 615 - int isaligned = 0; /* inode allocation at stripe unit */ 616 - /* boundary */ 617 - uint16_t allocmask = (uint16_t) -1; /* init. to full chunk */ 625 + struct xfs_agi *agi; 626 + struct xfs_alloc_arg args; 627 + xfs_agnumber_t agno; 628 + int error; 629 + xfs_agino_t newino; /* new first inode's number */ 630 + xfs_agino_t newlen; /* new number of inodes */ 631 + int isaligned = 0; /* inode allocation at stripe */ 632 + /* unit boundary */ 633 + /* init. to full chunk */ 634 + uint16_t allocmask = (uint16_t) -1; 618 635 struct xfs_inobt_rec_incore rec; 619 - struct xfs_perag *pag; 620 - int do_sparse = 0; 636 + struct xfs_perag *pag; 637 + struct xfs_ino_geometry *igeo = M_IGEO(tp->t_mountp); 638 + int do_sparse = 0; 621 639 622 640 memset(&args, 0, sizeof(args)); 623 641 args.tp = tp; ··· 630 644 #ifdef DEBUG 631 645 /* randomly do sparse inode allocations */ 632 646 if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) && 633 - args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks) 647 + igeo->ialloc_min_blks < igeo->ialloc_blks) 634 648 do_sparse = prandom_u32() & 1; 635 649 #endif 636 650 ··· 638 652 * Locking will ensure that we don't have two callers in here 639 653 * at one time. 640 654 */ 641 - newlen = args.mp->m_ialloc_inos; 642 - if (args.mp->m_maxicount && 655 + newlen = igeo->ialloc_inos; 656 + if (igeo->maxicount && 643 657 percpu_counter_read_positive(&args.mp->m_icount) + newlen > 644 - args.mp->m_maxicount) 658 + igeo->maxicount) 645 659 return -ENOSPC; 646 - args.minlen = args.maxlen = args.mp->m_ialloc_blks; 660 + args.minlen = args.maxlen = igeo->ialloc_blks; 647 661 /* 648 662 * First try to allocate inodes contiguous with the last-allocated 649 663 * chunk of inodes. If the filesystem is striped, this will fill ··· 653 667 newino = be32_to_cpu(agi->agi_newino); 654 668 agno = be32_to_cpu(agi->agi_seqno); 655 669 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + 656 - args.mp->m_ialloc_blks; 670 + igeo->ialloc_blks; 657 671 if (do_sparse) 658 672 goto sparse_alloc; 659 673 if (likely(newino != NULLAGINO && ··· 676 690 * but not to use them in the actual exact allocation. 677 691 */ 678 692 args.alignment = 1; 679 - args.minalignslop = args.mp->m_cluster_align - 1; 693 + args.minalignslop = igeo->cluster_align - 1; 680 694 681 695 /* Allow space for the inode btree to split. */ 682 - args.minleft = args.mp->m_in_maxlevels - 1; 696 + args.minleft = igeo->inobt_maxlevels - 1; 683 697 if ((error = xfs_alloc_vextent(&args))) 684 698 return error; 685 699 ··· 706 720 * pieces, so don't need alignment anyway. 707 721 */ 708 722 isaligned = 0; 709 - if (args.mp->m_sinoalign) { 723 + if (igeo->ialloc_align) { 710 724 ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); 711 725 args.alignment = args.mp->m_dalign; 712 726 isaligned = 1; 713 727 } else 714 - args.alignment = args.mp->m_cluster_align; 728 + args.alignment = igeo->cluster_align; 715 729 /* 716 730 * Need to figure out where to allocate the inode blocks. 717 731 * Ideally they should be spaced out through the a.g. ··· 727 741 /* 728 742 * Allow space for the inode btree to split. 729 743 */ 730 - args.minleft = args.mp->m_in_maxlevels - 1; 744 + args.minleft = igeo->inobt_maxlevels - 1; 731 745 if ((error = xfs_alloc_vextent(&args))) 732 746 return error; 733 747 } ··· 740 754 args.type = XFS_ALLOCTYPE_NEAR_BNO; 741 755 args.agbno = be32_to_cpu(agi->agi_root); 742 756 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 743 - args.alignment = args.mp->m_cluster_align; 757 + args.alignment = igeo->cluster_align; 744 758 if ((error = xfs_alloc_vextent(&args))) 745 759 return error; 746 760 } ··· 750 764 * the sparse allocation length is smaller than a full chunk. 751 765 */ 752 766 if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) && 753 - args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks && 767 + igeo->ialloc_min_blks < igeo->ialloc_blks && 754 768 args.fsbno == NULLFSBLOCK) { 755 769 sparse_alloc: 756 770 args.type = XFS_ALLOCTYPE_NEAR_BNO; ··· 759 773 args.alignment = args.mp->m_sb.sb_spino_align; 760 774 args.prod = 1; 761 775 762 - args.minlen = args.mp->m_ialloc_min_blks; 776 + args.minlen = igeo->ialloc_min_blks; 763 777 args.maxlen = args.minlen; 764 778 765 779 /* ··· 775 789 args.min_agbno = args.mp->m_sb.sb_inoalignmt; 776 790 args.max_agbno = round_down(args.mp->m_sb.sb_agblocks, 777 791 args.mp->m_sb.sb_inoalignmt) - 778 - args.mp->m_ialloc_blks; 792 + igeo->ialloc_blks; 779 793 780 794 error = xfs_alloc_vextent(&args); 781 795 if (error) ··· 992 1006 * space needed for alignment of inode chunks when checking the 993 1007 * longest contiguous free space in the AG - this prevents us 994 1008 * from getting ENOSPC because we have free space larger than 995 - * m_ialloc_blks but alignment constraints prevent us from using 1009 + * ialloc_blks but alignment constraints prevent us from using 996 1010 * it. 997 1011 * 998 1012 * If we can't find an AG with space for full alignment slack to ··· 1001 1015 * if we fail allocation due to alignment issues then it is most 1002 1016 * likely a real ENOSPC condition. 1003 1017 */ 1004 - ineed = mp->m_ialloc_min_blks; 1018 + ineed = M_IGEO(mp)->ialloc_min_blks; 1005 1019 if (flags && ineed > 1) 1006 - ineed += mp->m_cluster_align; 1020 + ineed += M_IGEO(mp)->cluster_align; 1007 1021 longest = pag->pagf_longest; 1008 1022 if (!longest) 1009 1023 longest = pag->pagf_flcount > 0; ··· 1689 1703 int noroom = 0; 1690 1704 xfs_agnumber_t start_agno; 1691 1705 struct xfs_perag *pag; 1706 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 1692 1707 int okalloc = 1; 1693 1708 1694 1709 if (*IO_agbp) { ··· 1720 1733 * Read rough value of mp->m_icount by percpu_counter_read_positive, 1721 1734 * which will sacrifice the preciseness but improve the performance. 1722 1735 */ 1723 - if (mp->m_maxicount && 1724 - percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos 1725 - > mp->m_maxicount) { 1736 + if (igeo->maxicount && 1737 + percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos 1738 + > igeo->maxicount) { 1726 1739 noroom = 1; 1727 1740 okalloc = 0; 1728 1741 } ··· 1839 1852 if (!xfs_inobt_issparse(rec->ir_holemask)) { 1840 1853 /* not sparse, calculate extent info directly */ 1841 1854 xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), 1842 - mp->m_ialloc_blks, &XFS_RMAP_OINFO_INODES); 1855 + M_IGEO(mp)->ialloc_blks, 1856 + &XFS_RMAP_OINFO_INODES); 1843 1857 return; 1844 1858 } 1845 1859 ··· 2249 2261 2250 2262 /* check that the returned record contains the required inode */ 2251 2263 if (rec.ir_startino > agino || 2252 - rec.ir_startino + mp->m_ialloc_inos <= agino) 2264 + rec.ir_startino + M_IGEO(mp)->ialloc_inos <= agino) 2253 2265 return -EINVAL; 2254 2266 2255 2267 /* for untrusted inodes check it is allocated first */ ··· 2340 2352 * If the inode cluster size is the same as the blocksize or 2341 2353 * smaller we get to the buffer by simple arithmetics. 2342 2354 */ 2343 - if (mp->m_blocks_per_cluster == 1) { 2355 + if (M_IGEO(mp)->blocks_per_cluster == 1) { 2344 2356 offset = XFS_INO_TO_OFFSET(mp, ino); 2345 2357 ASSERT(offset < mp->m_sb.sb_inopblock); 2346 2358 ··· 2356 2368 * find the location. Otherwise we have to do a btree 2357 2369 * lookup to find the location. 2358 2370 */ 2359 - if (mp->m_inoalign_mask) { 2360 - offset_agbno = agbno & mp->m_inoalign_mask; 2371 + if (M_IGEO(mp)->inoalign_mask) { 2372 + offset_agbno = agbno & M_IGEO(mp)->inoalign_mask; 2361 2373 chunk_agbno = agbno - offset_agbno; 2362 2374 } else { 2363 2375 error = xfs_imap_lookup(mp, tp, agno, agino, agbno, ··· 2369 2381 out_map: 2370 2382 ASSERT(agbno >= chunk_agbno); 2371 2383 cluster_agbno = chunk_agbno + 2372 - ((offset_agbno / mp->m_blocks_per_cluster) * 2373 - mp->m_blocks_per_cluster); 2384 + ((offset_agbno / M_IGEO(mp)->blocks_per_cluster) * 2385 + M_IGEO(mp)->blocks_per_cluster); 2374 2386 offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + 2375 2387 XFS_INO_TO_OFFSET(mp, ino); 2376 2388 2377 2389 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); 2378 - imap->im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster); 2390 + imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster); 2379 2391 imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog); 2380 2392 2381 2393 /* ··· 2394 2406 return -EINVAL; 2395 2407 } 2396 2408 return 0; 2397 - } 2398 - 2399 - /* 2400 - * Compute and fill in value of m_in_maxlevels. 2401 - */ 2402 - void 2403 - xfs_ialloc_compute_maxlevels( 2404 - xfs_mount_t *mp) /* file system mount structure */ 2405 - { 2406 - uint inodes; 2407 - 2408 - inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG; 2409 - mp->m_in_maxlevels = xfs_btree_compute_maxlevels(mp->m_inobt_mnr, 2410 - inodes); 2411 2409 } 2412 2410 2413 2411 /* ··· 2467 2493 xfs_agi_verify( 2468 2494 struct xfs_buf *bp) 2469 2495 { 2470 - struct xfs_mount *mp = bp->b_target->bt_mount; 2496 + struct xfs_mount *mp = bp->b_mount; 2471 2497 struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); 2472 2498 int i; 2473 2499 ··· 2519 2545 xfs_agi_read_verify( 2520 2546 struct xfs_buf *bp) 2521 2547 { 2522 - struct xfs_mount *mp = bp->b_target->bt_mount; 2548 + struct xfs_mount *mp = bp->b_mount; 2523 2549 xfs_failaddr_t fa; 2524 2550 2525 2551 if (xfs_sb_version_hascrc(&mp->m_sb) && ··· 2536 2562 xfs_agi_write_verify( 2537 2563 struct xfs_buf *bp) 2538 2564 { 2539 - struct xfs_mount *mp = bp->b_target->bt_mount; 2565 + struct xfs_mount *mp = bp->b_mount; 2540 2566 struct xfs_buf_log_item *bip = bp->b_log_item; 2541 2567 xfs_failaddr_t fa; 2542 2568 ··· 2741 2767 *count = ci.count; 2742 2768 *freecount = ci.freecount; 2743 2769 return 0; 2770 + } 2771 + 2772 + /* 2773 + * Initialize inode-related geometry information. 2774 + * 2775 + * Compute the inode btree min and max levels and set maxicount. 2776 + * 2777 + * Set the inode cluster size. This may still be overridden by the file 2778 + * system block size if it is larger than the chosen cluster size. 2779 + * 2780 + * For v5 filesystems, scale the cluster size with the inode size to keep a 2781 + * constant ratio of inode per cluster buffer, but only if mkfs has set the 2782 + * inode alignment value appropriately for larger cluster sizes. 2783 + * 2784 + * Then compute the inode cluster alignment information. 2785 + */ 2786 + void 2787 + xfs_ialloc_setup_geometry( 2788 + struct xfs_mount *mp) 2789 + { 2790 + struct xfs_sb *sbp = &mp->m_sb; 2791 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 2792 + uint64_t icount; 2793 + uint inodes; 2794 + 2795 + /* Compute inode btree geometry. */ 2796 + igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog; 2797 + igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); 2798 + igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0); 2799 + igeo->inobt_mnr[0] = igeo->inobt_mxr[0] / 2; 2800 + igeo->inobt_mnr[1] = igeo->inobt_mxr[1] / 2; 2801 + 2802 + igeo->ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK, 2803 + sbp->sb_inopblock); 2804 + igeo->ialloc_blks = igeo->ialloc_inos >> sbp->sb_inopblog; 2805 + 2806 + if (sbp->sb_spino_align) 2807 + igeo->ialloc_min_blks = sbp->sb_spino_align; 2808 + else 2809 + igeo->ialloc_min_blks = igeo->ialloc_blks; 2810 + 2811 + /* Compute and fill in value of m_ino_geo.inobt_maxlevels. */ 2812 + inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG; 2813 + igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr, 2814 + inodes); 2815 + 2816 + /* Set the maximum inode count for this filesystem. */ 2817 + if (sbp->sb_imax_pct) { 2818 + /* 2819 + * Make sure the maximum inode count is a multiple 2820 + * of the units we allocate inodes in. 2821 + */ 2822 + icount = sbp->sb_dblocks * sbp->sb_imax_pct; 2823 + do_div(icount, 100); 2824 + do_div(icount, igeo->ialloc_blks); 2825 + igeo->maxicount = XFS_FSB_TO_INO(mp, 2826 + icount * igeo->ialloc_blks); 2827 + } else { 2828 + igeo->maxicount = 0; 2829 + } 2830 + 2831 + /* 2832 + * Compute the desired size of an inode cluster buffer size, which 2833 + * starts at 8K and (on v5 filesystems) scales up with larger inode 2834 + * sizes. 2835 + * 2836 + * Preserve the desired inode cluster size because the sparse inodes 2837 + * feature uses that desired size (not the actual size) to compute the 2838 + * sparse inode alignment. The mount code validates this value, so we 2839 + * cannot change the behavior. 2840 + */ 2841 + igeo->inode_cluster_size_raw = XFS_INODE_BIG_CLUSTER_SIZE; 2842 + if (xfs_sb_version_hascrc(&mp->m_sb)) { 2843 + int new_size = igeo->inode_cluster_size_raw; 2844 + 2845 + new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; 2846 + if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) 2847 + igeo->inode_cluster_size_raw = new_size; 2848 + } 2849 + 2850 + /* Calculate inode cluster ratios. */ 2851 + if (igeo->inode_cluster_size_raw > mp->m_sb.sb_blocksize) 2852 + igeo->blocks_per_cluster = XFS_B_TO_FSBT(mp, 2853 + igeo->inode_cluster_size_raw); 2854 + else 2855 + igeo->blocks_per_cluster = 1; 2856 + igeo->inode_cluster_size = XFS_FSB_TO_B(mp, igeo->blocks_per_cluster); 2857 + igeo->inodes_per_cluster = XFS_FSB_TO_INO(mp, igeo->blocks_per_cluster); 2858 + 2859 + /* Calculate inode cluster alignment. */ 2860 + if (xfs_sb_version_hasalign(&mp->m_sb) && 2861 + mp->m_sb.sb_inoalignmt >= igeo->blocks_per_cluster) 2862 + igeo->cluster_align = mp->m_sb.sb_inoalignmt; 2863 + else 2864 + igeo->cluster_align = 1; 2865 + igeo->inoalign_mask = igeo->cluster_align - 1; 2866 + igeo->cluster_align_inodes = XFS_FSB_TO_INO(mp, igeo->cluster_align); 2867 + 2868 + /* 2869 + * If we are using stripe alignment, check whether 2870 + * the stripe unit is a multiple of the inode alignment 2871 + */ 2872 + if (mp->m_dalign && igeo->inoalign_mask && 2873 + !(mp->m_dalign & igeo->inoalign_mask)) 2874 + igeo->ialloc_align = mp->m_dalign; 2875 + else 2876 + igeo->ialloc_align = 0; 2744 2877 }
+1 -17
fs/xfs/libxfs/xfs_ialloc.h
··· 23 23 * sparse chunks */ 24 24 }; 25 25 26 - /* Calculate and return the number of filesystem blocks per inode cluster */ 27 - static inline int 28 - xfs_icluster_size_fsb( 29 - struct xfs_mount *mp) 30 - { 31 - if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size) 32 - return 1; 33 - return mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog; 34 - } 35 - 36 26 /* 37 27 * Make an inode pointer out of the buffer/offset. 38 28 */ ··· 84 94 xfs_ino_t ino, /* inode to locate */ 85 95 struct xfs_imap *imap, /* location map structure */ 86 96 uint flags); /* flags for inode btree lookup */ 87 - 88 - /* 89 - * Compute and fill in value of m_in_maxlevels. 90 - */ 91 - void 92 - xfs_ialloc_compute_maxlevels( 93 - struct xfs_mount *mp); /* file system mount structure */ 94 97 95 98 /* 96 99 * Log specified fields for the ag hdr (inode section) ··· 151 168 int *stat); 152 169 153 170 int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); 171 + void xfs_ialloc_setup_geometry(struct xfs_mount *mp); 154 172 155 173 #endif /* __XFS_IALLOC_H__ */
+42 -14
fs/xfs/libxfs/xfs_ialloc_btree.c
··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_bit.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_inode.h" 15 14 #include "xfs_btree.h" 16 15 #include "xfs_ialloc.h" 17 16 #include "xfs_ialloc_btree.h" 18 17 #include "xfs_alloc.h" 19 18 #include "xfs_error.h" 20 19 #include "xfs_trace.h" 21 - #include "xfs_cksum.h" 22 20 #include "xfs_trans.h" 23 21 #include "xfs_rmap.h" 24 22 ··· 26 28 struct xfs_btree_cur *cur, 27 29 int level) 28 30 { 29 - return cur->bc_mp->m_inobt_mnr[level != 0]; 31 + return M_IGEO(cur->bc_mp)->inobt_mnr[level != 0]; 30 32 } 31 33 32 34 STATIC struct xfs_btree_cur * ··· 162 164 struct xfs_btree_cur *cur, 163 165 int level) 164 166 { 165 - return cur->bc_mp->m_inobt_mxr[level != 0]; 167 + return M_IGEO(cur->bc_mp)->inobt_mxr[level != 0]; 166 168 } 167 169 168 170 STATIC void ··· 253 255 xfs_inobt_verify( 254 256 struct xfs_buf *bp) 255 257 { 256 - struct xfs_mount *mp = bp->b_target->bt_mount; 258 + struct xfs_mount *mp = bp->b_mount; 257 259 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 258 260 xfs_failaddr_t fa; 259 261 unsigned int level; ··· 279 281 280 282 /* level verification */ 281 283 level = be16_to_cpu(block->bb_level); 282 - if (level >= mp->m_in_maxlevels) 284 + if (level >= M_IGEO(mp)->inobt_maxlevels) 283 285 return __this_address; 284 286 285 - return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]); 287 + return xfs_btree_sblock_verify(bp, 288 + M_IGEO(mp)->inobt_mxr[level != 0]); 286 289 } 287 290 288 291 static void ··· 545 546 xfs_agblock_t agblocks = xfs_ag_block_count(mp, agno); 546 547 547 548 /* Bail out if we're uninitialized, which can happen in mkfs. */ 548 - if (mp->m_inobt_mxr[0] == 0) 549 + if (M_IGEO(mp)->inobt_mxr[0] == 0) 549 550 return 0; 550 551 551 552 /* ··· 557 558 XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno) 558 559 agblocks -= mp->m_sb.sb_logblocks; 559 560 560 - return xfs_btree_calc_size(mp->m_inobt_mnr, 561 + return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, 561 562 (uint64_t)agblocks * mp->m_sb.sb_inopblock / 562 563 XFS_INODES_PER_CHUNK); 564 + } 565 + 566 + /* Read AGI and create inobt cursor. */ 567 + int 568 + xfs_inobt_cur( 569 + struct xfs_mount *mp, 570 + struct xfs_trans *tp, 571 + xfs_agnumber_t agno, 572 + xfs_btnum_t which, 573 + struct xfs_btree_cur **curpp, 574 + struct xfs_buf **agi_bpp) 575 + { 576 + struct xfs_btree_cur *cur; 577 + int error; 578 + 579 + ASSERT(*agi_bpp == NULL); 580 + ASSERT(*curpp == NULL); 581 + 582 + error = xfs_ialloc_read_agi(mp, tp, agno, agi_bpp); 583 + if (error) 584 + return error; 585 + 586 + cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, agno, which); 587 + if (!cur) { 588 + xfs_trans_brelse(tp, *agi_bpp); 589 + *agi_bpp = NULL; 590 + return -ENOMEM; 591 + } 592 + *curpp = cur; 593 + return 0; 563 594 } 564 595 565 596 static int ··· 600 571 xfs_btnum_t btnum, 601 572 xfs_extlen_t *tree_blocks) 602 573 { 603 - struct xfs_buf *agbp; 604 - struct xfs_btree_cur *cur; 574 + struct xfs_buf *agbp = NULL; 575 + struct xfs_btree_cur *cur = NULL; 605 576 int error; 606 577 607 - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 578 + error = xfs_inobt_cur(mp, tp, agno, btnum, &cur, &agbp); 608 579 if (error) 609 580 return error; 610 581 611 - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); 612 582 error = xfs_btree_count_blocks(cur, tree_blocks); 613 583 xfs_btree_del_cursor(cur, error); 614 584 xfs_trans_brelse(tp, agbp); ··· 647 619 struct xfs_mount *mp, 648 620 unsigned long long len) 649 621 { 650 - return xfs_btree_calc_size(mp->m_inobt_mnr, len); 622 + return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, len); 651 623 }
+3
fs/xfs/libxfs/xfs_ialloc_btree.h
··· 64 64 xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); 65 65 extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp, 66 66 unsigned long long len); 67 + int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp, 68 + xfs_agnumber_t agno, xfs_btnum_t btnum, 69 + struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp); 67 70 68 71 #endif /* __XFS_IALLOC_BTREE_H__ */
+1 -5
fs/xfs/libxfs/xfs_iext_tree.c
··· 3 3 * Copyright (c) 2017 Christoph Hellwig. 4 4 */ 5 5 6 - #include <linux/cache.h> 7 - #include <linux/kernel.h> 8 - #include <linux/slab.h> 9 6 #include "xfs.h" 7 + #include "xfs_shared.h" 10 8 #include "xfs_format.h" 11 9 #include "xfs_bit.h" 12 10 #include "xfs_log_format.h" 13 11 #include "xfs_inode.h" 14 - #include "xfs_inode_fork.h" 15 12 #include "xfs_trans_resv.h" 16 13 #include "xfs_mount.h" 17 - #include "xfs_bmap.h" 18 14 #include "xfs_trace.h" 19 15 20 16 /*
+2 -7
fs/xfs/libxfs/xfs_inode_buf.c
··· 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 - #include "xfs_defer.h" 14 13 #include "xfs_inode.h" 15 14 #include "xfs_errortag.h" 16 15 #include "xfs_error.h" 17 - #include "xfs_cksum.h" 18 16 #include "xfs_icache.h" 19 17 #include "xfs_trans.h" 20 18 #include "xfs_ialloc.h" ··· 31 33 xfs_buf_t *bp) 32 34 { 33 35 int i; 34 - int j; 35 36 xfs_dinode_t *dip; 36 37 37 - j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 38 - 39 - for (i = 0; i < j; i++) { 38 + for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) { 40 39 dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize); 41 40 if (!dip->di_next_unlinked) { 42 41 xfs_alert(mp, ··· 75 80 struct xfs_buf *bp, 76 81 bool readahead) 77 82 { 78 - struct xfs_mount *mp = bp->b_target->bt_mount; 83 + struct xfs_mount *mp = bp->b_mount; 79 84 xfs_agnumber_t agno; 80 85 int i; 81 86 int ni;
+1 -3
fs/xfs/libxfs/xfs_inode_fork.c
··· 3 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 4 * All Rights Reserved. 5 5 */ 6 - #include <linux/log2.h> 7 6 8 7 #include "xfs.h" 9 8 #include "xfs_fs.h" 9 + #include "xfs_shared.h" 10 10 #include "xfs_format.h" 11 11 #include "xfs_log_format.h" 12 12 #include "xfs_trans_resv.h" ··· 19 19 #include "xfs_bmap.h" 20 20 #include "xfs_error.h" 21 21 #include "xfs_trace.h" 22 - #include "xfs_attr_sf.h" 23 22 #include "xfs_da_format.h" 24 23 #include "xfs_da_btree.h" 25 24 #include "xfs_dir2_priv.h" 26 25 #include "xfs_attr_leaf.h" 27 - #include "xfs_shared.h" 28 26 29 27 kmem_zone_t *xfs_ifork_zone; 30 28
-2
fs/xfs/libxfs/xfs_log_rlimit.c
··· 12 12 #include "xfs_mount.h" 13 13 #include "xfs_da_format.h" 14 14 #include "xfs_trans_space.h" 15 - #include "xfs_inode.h" 16 15 #include "xfs_da_btree.h" 17 - #include "xfs_attr_leaf.h" 18 16 #include "xfs_bmap_btree.h" 19 17 20 18 /*
-2
fs/xfs/libxfs/xfs_refcount.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 - #include "xfs_sb.h" 13 12 #include "xfs_mount.h" 14 13 #include "xfs_defer.h" 15 14 #include "xfs_btree.h" ··· 18 19 #include "xfs_errortag.h" 19 20 #include "xfs_error.h" 20 21 #include "xfs_trace.h" 21 - #include "xfs_cksum.h" 22 22 #include "xfs_trans.h" 23 23 #include "xfs_bit.h" 24 24 #include "xfs_refcount.h"
+1 -3
fs/xfs/libxfs/xfs_refcount_btree.c
··· 12 12 #include "xfs_sb.h" 13 13 #include "xfs_mount.h" 14 14 #include "xfs_btree.h" 15 - #include "xfs_bmap.h" 16 15 #include "xfs_refcount_btree.h" 17 16 #include "xfs_alloc.h" 18 17 #include "xfs_error.h" 19 18 #include "xfs_trace.h" 20 - #include "xfs_cksum.h" 21 19 #include "xfs_trans.h" 22 20 #include "xfs_bit.h" 23 21 #include "xfs_rmap.h" ··· 201 203 xfs_refcountbt_verify( 202 204 struct xfs_buf *bp) 203 205 { 204 - struct xfs_mount *mp = bp->b_target->bt_mount; 206 + struct xfs_mount *mp = bp->b_mount; 205 207 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 206 208 struct xfs_perag *pag = bp->b_pag; 207 209 xfs_failaddr_t fa;
-7
fs/xfs/libxfs/xfs_rmap.c
··· 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_bit.h" 13 - #include "xfs_sb.h" 14 13 #include "xfs_mount.h" 15 14 #include "xfs_defer.h" 16 - #include "xfs_da_format.h" 17 - #include "xfs_da_btree.h" 18 15 #include "xfs_btree.h" 19 16 #include "xfs_trans.h" 20 17 #include "xfs_alloc.h" 21 18 #include "xfs_rmap.h" 22 19 #include "xfs_rmap_btree.h" 23 - #include "xfs_trans_space.h" 24 20 #include "xfs_trace.h" 25 21 #include "xfs_errortag.h" 26 22 #include "xfs_error.h" 27 - #include "xfs_extent_busy.h" 28 - #include "xfs_bmap.h" 29 23 #include "xfs_inode.h" 30 - #include "xfs_ialloc.h" 31 24 32 25 /* 33 26 * Lookup the first record less than or equal to [bno, len, owner, offset]
+1 -5
fs/xfs/libxfs/xfs_rmap_btree.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 - #include "xfs_bit.h" 13 12 #include "xfs_sb.h" 14 13 #include "xfs_mount.h" 15 - #include "xfs_defer.h" 16 - #include "xfs_inode.h" 17 14 #include "xfs_trans.h" 18 15 #include "xfs_alloc.h" 19 16 #include "xfs_btree.h" 20 17 #include "xfs_rmap.h" 21 18 #include "xfs_rmap_btree.h" 22 19 #include "xfs_trace.h" 23 - #include "xfs_cksum.h" 24 20 #include "xfs_error.h" 25 21 #include "xfs_extent_busy.h" 26 22 #include "xfs_ag_resv.h" ··· 288 292 xfs_rmapbt_verify( 289 293 struct xfs_buf *bp) 290 294 { 291 - struct xfs_mount *mp = bp->b_target->bt_mount; 295 + struct xfs_mount *mp = bp->b_mount; 292 296 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 293 297 struct xfs_perag *pag = bp->b_pag; 294 298 xfs_failaddr_t fa;
-8
fs/xfs/libxfs/xfs_rtbitmap.c
··· 13 13 #include "xfs_mount.h" 14 14 #include "xfs_inode.h" 15 15 #include "xfs_bmap.h" 16 - #include "xfs_bmap_util.h" 17 - #include "xfs_bmap_btree.h" 18 - #include "xfs_alloc.h" 19 - #include "xfs_error.h" 20 16 #include "xfs_trans.h" 21 - #include "xfs_trans_space.h" 22 - #include "xfs_trace.h" 23 - #include "xfs_buf.h" 24 - #include "xfs_icache.h" 25 17 #include "xfs_rtalloc.h" 26 18 27 19
+10 -29
fs/xfs/libxfs/xfs_sb.c
··· 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_bit.h" 13 - #include "xfs_sb.h" 14 13 #include "xfs_mount.h" 15 - #include "xfs_defer.h" 16 - #include "xfs_inode.h" 17 14 #include "xfs_ialloc.h" 18 15 #include "xfs_alloc.h" 19 16 #include "xfs_error.h" 20 17 #include "xfs_trace.h" 21 - #include "xfs_cksum.h" 22 18 #include "xfs_trans.h" 23 19 #include "xfs_buf_item.h" 24 20 #include "xfs_bmap_btree.h" 25 21 #include "xfs_alloc_btree.h" 26 - #include "xfs_ialloc_btree.h" 27 22 #include "xfs_log.h" 28 23 #include "xfs_rmap_btree.h" 29 - #include "xfs_bmap.h" 30 24 #include "xfs_refcount_btree.h" 31 25 #include "xfs_da_format.h" 32 - #include "xfs_da_btree.h" 33 26 #include "xfs_health.h" 34 27 35 28 /* ··· 679 686 struct xfs_buf *bp) 680 687 { 681 688 struct xfs_sb sb; 682 - struct xfs_mount *mp = bp->b_target->bt_mount; 689 + struct xfs_mount *mp = bp->b_mount; 683 690 struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); 684 691 int error; 685 692 ··· 745 752 struct xfs_buf *bp) 746 753 { 747 754 struct xfs_sb sb; 748 - struct xfs_mount *mp = bp->b_target->bt_mount; 755 + struct xfs_mount *mp = bp->b_mount; 749 756 struct xfs_buf_log_item *bip = bp->b_log_item; 750 757 int error; 751 758 ··· 793 800 * 794 801 * Mount initialization code establishing various mount 795 802 * fields from the superblock associated with the given 796 - * mount structure 803 + * mount structure. 804 + * 805 + * Inode geometry are calculated in xfs_ialloc_setup_geometry. 797 806 */ 798 807 void 799 808 xfs_sb_mount_common( 800 - struct xfs_mount *mp, 801 - struct xfs_sb *sbp) 809 + struct xfs_mount *mp, 810 + struct xfs_sb *sbp) 802 811 { 803 812 mp->m_agfrotor = mp->m_agirotor = 0; 804 813 mp->m_maxagi = mp->m_sb.sb_agcount; ··· 808 813 mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; 809 814 mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; 810 815 mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; 811 - mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; 812 816 mp->m_blockmask = sbp->sb_blocksize - 1; 813 817 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; 814 818 mp->m_blockwmask = mp->m_blockwsize - 1; ··· 816 822 mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0); 817 823 mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; 818 824 mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2; 819 - 820 - mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); 821 - mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0); 822 - mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2; 823 - mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2; 824 825 825 826 mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1); 826 827 mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0); ··· 833 844 mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2; 834 845 835 846 mp->m_bsize = XFS_FSB_TO_BB(mp, 1); 836 - mp->m_ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK, 837 - sbp->sb_inopblock); 838 - mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; 839 - 840 - if (sbp->sb_spino_align) 841 - mp->m_ialloc_min_blks = sbp->sb_spino_align; 842 - else 843 - mp->m_ialloc_min_blks = mp->m_ialloc_blks; 844 847 mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); 845 848 mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp); 846 849 } ··· 920 939 struct xfs_trans *tp) 921 940 { 922 941 struct xfs_mount *mp = tp->t_mountp; 923 - struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0); 942 + struct xfs_buf *bp = xfs_trans_getsb(tp, mp); 924 943 925 944 mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); 926 945 mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); ··· 986 1005 987 1006 bp = xfs_buf_get(mp->m_ddev_targp, 988 1007 XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), 989 - XFS_FSS_TO_BB(mp, 1), 0); 1008 + XFS_FSS_TO_BB(mp, 1)); 990 1009 /* 991 1010 * If we get an error reading or writing alternate superblocks, 992 1011 * continue. xfs_repair chooses the "best" superblock based ··· 1050 1069 if (error) 1051 1070 return error; 1052 1071 1053 - bp = xfs_trans_getsb(tp, mp, 0); 1072 + bp = xfs_trans_getsb(tp, mp); 1054 1073 xfs_log_sb(tp); 1055 1074 xfs_trans_bhold(tp, bp); 1056 1075 xfs_trans_set_sync(tp);
+48 -1
fs/xfs/libxfs/xfs_shared.h
··· 65 65 #define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ 66 66 #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ 67 67 #define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */ 68 - #define XFS_TRANS_NOFS 0x80 /* pass KM_NOFS to kmem_alloc */ 69 68 /* 70 69 * LOWMODE is used by the allocator to activate the lowspace algorithm - when 71 70 * free space is running low the extent allocator may choose to allocate an ··· 134 135 void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, 135 136 struct xfs_inode *ip, struct xfs_ifork *ifp); 136 137 xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip); 138 + 139 + /* Computed inode geometry for the filesystem. */ 140 + struct xfs_ino_geometry { 141 + /* Maximum inode count in this filesystem. */ 142 + uint64_t maxicount; 143 + 144 + /* Actual inode cluster buffer size, in bytes. */ 145 + unsigned int inode_cluster_size; 146 + 147 + /* 148 + * Desired inode cluster buffer size, in bytes. This value is not 149 + * rounded up to at least one filesystem block, which is necessary for 150 + * the sole purpose of validating sb_spino_align. Runtime code must 151 + * only ever use inode_cluster_size. 152 + */ 153 + unsigned int inode_cluster_size_raw; 154 + 155 + /* Inode cluster sizes, adjusted to be at least 1 fsb. */ 156 + unsigned int inodes_per_cluster; 157 + unsigned int blocks_per_cluster; 158 + 159 + /* Inode cluster alignment. */ 160 + unsigned int cluster_align; 161 + unsigned int cluster_align_inodes; 162 + unsigned int inoalign_mask; /* mask sb_inoalignmt if used */ 163 + 164 + unsigned int inobt_mxr[2]; /* max inobt btree records */ 165 + unsigned int inobt_mnr[2]; /* min inobt btree records */ 166 + unsigned int inobt_maxlevels; /* max inobt btree levels. */ 167 + 168 + /* Size of inode allocations under normal operation. */ 169 + unsigned int ialloc_inos; 170 + unsigned int ialloc_blks; 171 + 172 + /* Minimum inode blocks for a sparse allocation. */ 173 + unsigned int ialloc_min_blks; 174 + 175 + /* stripe unit inode alignment */ 176 + unsigned int ialloc_align; 177 + 178 + unsigned int agino_log; /* #bits for agino in inum */ 179 + }; 180 + 181 + /* Keep iterating the data structure. */ 182 + #define XFS_ITER_CONTINUE (0) 183 + 184 + /* Stop iterating the data structure. */ 185 + #define XFS_ITER_ABORT (1) 137 186 138 187 #endif /* __XFS_SHARED_H__ */
+8 -9
fs/xfs/libxfs/xfs_trans_resv.c
··· 15 15 #include "xfs_da_btree.h" 16 16 #include "xfs_inode.h" 17 17 #include "xfs_bmap_btree.h" 18 - #include "xfs_ialloc.h" 19 18 #include "xfs_quota.h" 20 19 #include "xfs_trans.h" 21 20 #include "xfs_qm.h" 22 21 #include "xfs_trans_space.h" 23 - #include "xfs_trace.h" 24 22 25 23 #define _ALLOC true 26 24 #define _FREE false ··· 134 136 xfs_calc_inobt_res( 135 137 struct xfs_mount *mp) 136 138 { 137 - return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 138 - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 139 - XFS_FSB_TO_B(mp, 1)); 139 + return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels, 140 + XFS_FSB_TO_B(mp, 1)) + 141 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 142 + XFS_FSB_TO_B(mp, 1)); 140 143 } 141 144 142 145 /* ··· 166 167 * includes: 167 168 * 168 169 * the allocation btrees: 2 trees * (max depth - 1) * block size 169 - * the inode chunk: m_ialloc_blks * N 170 + * the inode chunk: m_ino_geo.ialloc_blks * N 170 171 * 171 172 * The size N of the inode chunk reservation depends on whether it is for 172 173 * allocation or free and which type of create transaction is in use. An inode ··· 192 193 size = XFS_FSB_TO_B(mp, 1); 193 194 } 194 195 195 - res += xfs_calc_buf_res(mp->m_ialloc_blks, size); 196 + res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size); 196 197 return res; 197 198 } 198 199 ··· 306 307 struct xfs_mount *mp) 307 308 { 308 309 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 309 - 2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); 310 + 2 * M_IGEO(mp)->inode_cluster_size; 310 311 } 311 312 312 313 /* ··· 344 345 xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) 345 346 { 346 347 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 347 - max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); 348 + M_IGEO(mp)->inode_cluster_size; 348 349 } 349 350 350 351 /*
+4 -3
fs/xfs/libxfs/xfs_trans_space.h
··· 56 56 #define XFS_DIRREMOVE_SPACE_RES(mp) \ 57 57 XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) 58 58 #define XFS_IALLOC_SPACE_RES(mp) \ 59 - ((mp)->m_ialloc_blks + \ 59 + (M_IGEO(mp)->ialloc_blks + \ 60 60 (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \ 61 - ((mp)->m_in_maxlevels - 1))) 61 + (M_IGEO(mp)->inobt_maxlevels - 1))) 62 62 63 63 /* 64 64 * Space reservation values for various transactions. ··· 94 94 #define XFS_SYMLINK_SPACE_RES(mp,nl,b) \ 95 95 (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b)) 96 96 #define XFS_IFREE_SPACE_RES(mp) \ 97 - (xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0) 97 + (xfs_sb_version_hasfinobt(&mp->m_sb) ? \ 98 + M_IGEO(mp)->inobt_maxlevels : 0) 98 99 99 100 100 101 #endif /* __XFS_TRANS_SPACE_H__ */
+2 -11
fs/xfs/libxfs/xfs_types.c
··· 7 7 #include "xfs.h" 8 8 #include "xfs_fs.h" 9 9 #include "xfs_format.h" 10 - #include "xfs_log_format.h" 11 10 #include "xfs_shared.h" 12 11 #include "xfs_trans_resv.h" 13 12 #include "xfs_bit.h" 14 - #include "xfs_sb.h" 15 13 #include "xfs_mount.h" 16 - #include "xfs_defer.h" 17 - #include "xfs_inode.h" 18 - #include "xfs_btree.h" 19 - #include "xfs_rmap.h" 20 - #include "xfs_alloc_btree.h" 21 - #include "xfs_alloc.h" 22 - #include "xfs_ialloc.h" 23 14 24 15 /* Find the size of the AG, in blocks. */ 25 16 xfs_agblock_t ··· 78 87 * Calculate the first inode, which will be in the first 79 88 * cluster-aligned block after the AGFL. 80 89 */ 81 - bno = round_up(XFS_AGFL_BLOCK(mp) + 1, mp->m_cluster_align); 90 + bno = round_up(XFS_AGFL_BLOCK(mp) + 1, M_IGEO(mp)->cluster_align); 82 91 *first = XFS_AGB_TO_AGINO(mp, bno); 83 92 84 93 /* 85 94 * Calculate the last inode, which will be at the end of the 86 95 * last (aligned) cluster that can be allocated in the AG. 87 96 */ 88 - bno = round_down(eoag, mp->m_cluster_align); 97 + bno = round_down(eoag, M_IGEO(mp)->cluster_align); 89 98 *last = XFS_AGB_TO_AGINO(mp, bno) - 1; 90 99 } 91 100
+2 -9
fs/xfs/scrub/agheader.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 - #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 13 #include "xfs_sb.h" 18 - #include "xfs_inode.h" 19 14 #include "xfs_alloc.h" 20 15 #include "xfs_ialloc.h" 21 16 #include "xfs_rmap.h" 22 - #include "scrub/xfs_scrub.h" 23 17 #include "scrub/scrub.h" 24 18 #include "scrub/common.h" 25 - #include "scrub/trace.h" 26 19 27 20 /* Superblock */ 28 21 ··· 639 646 xchk_agfl_block_xref(sc, agbno); 640 647 641 648 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 642 - return XFS_BTREE_QUERY_RANGE_ABORT; 649 + return XFS_ITER_ABORT; 643 650 644 651 return 0; 645 652 } ··· 730 737 /* Check the blocks in the AGFL. */ 731 738 error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp), 732 739 sc->sa.agfl_bp, xchk_agfl_block, &sai); 733 - if (error == XFS_BTREE_QUERY_RANGE_ABORT) { 740 + if (error == XFS_ITER_ABORT) { 734 741 error = 0; 735 742 goto out_free; 736 743 }
-5
fs/xfs/scrub/agheader_repair.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 13 #include "xfs_log_format.h" 16 14 #include "xfs_trans.h" 17 15 #include "xfs_sb.h" 18 - #include "xfs_inode.h" 19 16 #include "xfs_alloc.h" 20 17 #include "xfs_alloc_btree.h" 21 18 #include "xfs_ialloc.h" 22 19 #include "xfs_ialloc_btree.h" 23 20 #include "xfs_rmap.h" 24 21 #include "xfs_rmap_btree.h" 25 - #include "xfs_refcount.h" 26 22 #include "xfs_refcount_btree.h" 27 - #include "scrub/xfs_scrub.h" 28 23 #include "scrub/scrub.h" 29 24 #include "scrub/common.h" 30 25 #include "scrub/trace.h"
-7
fs/xfs/scrub/alloc.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 - #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 13 #include "xfs_alloc.h" 19 14 #include "xfs_rmap.h" 20 - #include "scrub/xfs_scrub.h" 21 15 #include "scrub/scrub.h" 22 16 #include "scrub/common.h" 23 17 #include "scrub/btree.h" 24 - #include "scrub/trace.h" 25 18 26 19 /* 27 20 * Set us up to scrub free space btrees.
+91 -31
fs/xfs/scrub/attr.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 12 #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 13 #include "xfs_inode.h" 19 14 #include "xfs_da_format.h" 20 15 #include "xfs_da_btree.h" 21 - #include "xfs_dir2.h" 22 16 #include "xfs_attr.h" 23 17 #include "xfs_attr_leaf.h" 24 - #include "scrub/xfs_scrub.h" 25 18 #include "scrub/scrub.h" 26 19 #include "scrub/common.h" 27 20 #include "scrub/dabtree.h" 28 - #include "scrub/trace.h" 21 + #include "scrub/attr.h" 29 22 30 - #include <linux/posix_acl_xattr.h> 31 - #include <linux/xattr.h> 23 + /* 24 + * Allocate enough memory to hold an attr value and attr block bitmaps, 25 + * reallocating the buffer if necessary. Buffer contents are not preserved 26 + * across a reallocation. 27 + */ 28 + int 29 + xchk_setup_xattr_buf( 30 + struct xfs_scrub *sc, 31 + size_t value_size, 32 + xfs_km_flags_t flags) 33 + { 34 + size_t sz; 35 + struct xchk_xattr_buf *ab = sc->buf; 36 + 37 + /* 38 + * We need enough space to read an xattr value from the file or enough 39 + * space to hold three copies of the xattr free space bitmap. We don't 40 + * need the buffer space for both purposes at the same time. 41 + */ 42 + sz = 3 * sizeof(long) * BITS_TO_LONGS(sc->mp->m_attr_geo->blksize); 43 + sz = max_t(size_t, sz, value_size); 44 + 45 + /* 46 + * If there's already a buffer, figure out if we need to reallocate it 47 + * to accommodate a larger size. 48 + */ 49 + if (ab) { 50 + if (sz <= ab->sz) 51 + return 0; 52 + kmem_free(ab); 53 + sc->buf = NULL; 54 + } 55 + 56 + /* 57 + * Don't zero the buffer upon allocation to avoid runtime overhead. 58 + * All users must be careful never to read uninitialized contents. 59 + */ 60 + ab = kmem_alloc_large(sizeof(*ab) + sz, flags); 61 + if (!ab) 62 + return -ENOMEM; 63 + 64 + ab->sz = sz; 65 + sc->buf = ab; 66 + return 0; 67 + } 32 68 33 69 /* Set us up to scrub an inode's extended attributes. */ 34 70 int ··· 72 36 struct xfs_scrub *sc, 73 37 struct xfs_inode *ip) 74 38 { 75 - size_t sz; 39 + int error; 76 40 77 41 /* 78 - * Allocate the buffer without the inode lock held. We need enough 79 - * space to read every xattr value in the file or enough space to 80 - * hold three copies of the xattr free space bitmap. (Not both at 81 - * the same time.) 42 + * We failed to get memory while checking attrs, so this time try to 43 + * get all the memory we're ever going to need. Allocate the buffer 44 + * without the inode lock held, which means we can sleep. 82 45 */ 83 - sz = max_t(size_t, XATTR_SIZE_MAX, 3 * sizeof(long) * 84 - BITS_TO_LONGS(sc->mp->m_attr_geo->blksize)); 85 - sc->buf = kmem_zalloc_large(sz, KM_SLEEP); 86 - if (!sc->buf) 87 - return -ENOMEM; 46 + if (sc->flags & XCHK_TRY_HARDER) { 47 + error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, KM_SLEEP); 48 + if (error) 49 + return error; 50 + } 88 51 89 52 return xchk_setup_inode_contents(sc, ip, 0); 90 53 } ··· 118 83 sx = container_of(context, struct xchk_xattr, context); 119 84 120 85 if (xchk_should_terminate(sx->sc, &error)) { 121 - context->seen_enough = 1; 86 + context->seen_enough = error; 122 87 return; 123 88 } 124 89 ··· 134 99 return; 135 100 } 136 101 102 + /* 103 + * Try to allocate enough memory to extrat the attr value. If that 104 + * doesn't work, we overload the seen_enough variable to convey 105 + * the error message back to the main scrub function. 106 + */ 107 + error = xchk_setup_xattr_buf(sx->sc, valuelen, KM_MAYFAIL); 108 + if (error == -ENOMEM) 109 + error = -EDEADLOCK; 110 + if (error) { 111 + context->seen_enough = error; 112 + return; 113 + } 114 + 137 115 args.flags = ATTR_KERNOTIME; 138 116 if (flags & XFS_ATTR_ROOT) 139 117 args.flags |= ATTR_ROOT; ··· 159 111 args.namelen = namelen; 160 112 args.hashval = xfs_da_hashname(args.name, args.namelen); 161 113 args.trans = context->tp; 162 - args.value = sx->sc->buf; 163 - args.valuelen = XATTR_SIZE_MAX; 114 + args.value = xchk_xattr_valuebuf(sx->sc); 115 + args.valuelen = valuelen; 164 116 165 117 error = xfs_attr_get_ilocked(context->dp, &args); 166 118 if (error == -EEXIST) ··· 173 125 args.blkno); 174 126 fail_xref: 175 127 if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 176 - context->seen_enough = 1; 128 + context->seen_enough = XFS_ITER_ABORT; 177 129 return; 178 130 } 179 131 ··· 218 170 unsigned long *map, 219 171 struct xfs_attr3_icleaf_hdr *leafhdr) 220 172 { 221 - unsigned long *freemap; 222 - unsigned long *dstmap; 173 + unsigned long *freemap = xchk_xattr_freemap(sc); 174 + unsigned long *dstmap = xchk_xattr_dstmap(sc); 223 175 unsigned int mapsize = sc->mp->m_attr_geo->blksize; 224 176 int i; 225 177 226 178 /* Construct bitmap of freemap contents. */ 227 - freemap = (unsigned long *)sc->buf + BITS_TO_LONGS(mapsize); 228 179 bitmap_zero(freemap, mapsize); 229 180 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { 230 181 if (!xchk_xattr_set_map(sc, freemap, ··· 233 186 } 234 187 235 188 /* Look for bits that are set in freemap and are marked in use. */ 236 - dstmap = freemap + BITS_TO_LONGS(mapsize); 237 189 return bitmap_and(dstmap, freemap, map, mapsize) == 0; 238 190 } 239 191 ··· 247 201 char *buf_end, 248 202 struct xfs_attr_leafblock *leaf, 249 203 struct xfs_attr3_icleaf_hdr *leafhdr, 250 - unsigned long *usedmap, 251 204 struct xfs_attr_leaf_entry *ent, 252 205 int idx, 253 206 unsigned int *usedbytes, 254 207 __u32 *last_hashval) 255 208 { 256 209 struct xfs_mount *mp = ds->state->mp; 210 + unsigned long *usedmap = xchk_xattr_usedmap(ds->sc); 257 211 char *name_end; 258 212 struct xfs_attr_leaf_name_local *lentry; 259 213 struct xfs_attr_leaf_name_remote *rentry; ··· 313 267 struct xfs_attr_leafblock *leaf = bp->b_addr; 314 268 struct xfs_attr_leaf_entry *ent; 315 269 struct xfs_attr_leaf_entry *entries; 316 - unsigned long *usedmap = ds->sc->buf; 270 + unsigned long *usedmap; 317 271 char *buf_end; 318 272 size_t off; 319 273 __u32 last_hashval = 0; 320 274 unsigned int usedbytes = 0; 321 275 unsigned int hdrsize; 322 276 int i; 277 + int error; 323 278 324 279 if (*last_checked == blk->blkno) 325 280 return 0; 281 + 282 + /* Allocate memory for block usage checking. */ 283 + error = xchk_setup_xattr_buf(ds->sc, 0, KM_MAYFAIL); 284 + if (error == -ENOMEM) 285 + return -EDEADLOCK; 286 + if (error) 287 + return error; 288 + usedmap = xchk_xattr_usedmap(ds->sc); 289 + 326 290 *last_checked = blk->blkno; 327 291 bitmap_zero(usedmap, mp->m_attr_geo->blksize); 328 292 ··· 380 324 381 325 /* Check the entry and nameval. */ 382 326 xchk_xattr_entry(ds, level, buf_end, leaf, &leafhdr, 383 - usedmap, ent, i, &usedbytes, &last_hashval); 327 + ent, i, &usedbytes, &last_hashval); 384 328 385 329 if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 386 330 goto out; ··· 520 464 error = xfs_attr_list_int_ilocked(&sx.context); 521 465 if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error)) 522 466 goto out; 467 + 468 + /* Did our listent function try to return any errors? */ 469 + if (sx.context.seen_enough < 0) 470 + error = sx.context.seen_enough; 523 471 out: 524 472 return error; 525 473 }
+71
fs/xfs/scrub/attr.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * Copyright (C) 2019 Oracle. All Rights Reserved. 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + */ 6 + #ifndef __XFS_SCRUB_ATTR_H__ 7 + #define __XFS_SCRUB_ATTR_H__ 8 + 9 + /* 10 + * Temporary storage for online scrub and repair of extended attributes. 11 + */ 12 + struct xchk_xattr_buf { 13 + /* Size of @buf, in bytes. */ 14 + size_t sz; 15 + 16 + /* 17 + * Memory buffer -- either used for extracting attr values while 18 + * walking the attributes; or for computing attr block bitmaps when 19 + * checking the attribute tree. 20 + * 21 + * Each bitmap contains enough bits to track every byte in an attr 22 + * block (rounded up to the size of an unsigned long). The attr block 23 + * used space bitmap starts at the beginning of the buffer; the free 24 + * space bitmap follows immediately after; and we have a third buffer 25 + * for storing intermediate bitmap results. 26 + */ 27 + uint8_t buf[0]; 28 + }; 29 + 30 + /* A place to store attribute values. */ 31 + static inline uint8_t * 32 + xchk_xattr_valuebuf( 33 + struct xfs_scrub *sc) 34 + { 35 + struct xchk_xattr_buf *ab = sc->buf; 36 + 37 + return ab->buf; 38 + } 39 + 40 + /* A bitmap of space usage computed by walking an attr leaf block. */ 41 + static inline unsigned long * 42 + xchk_xattr_usedmap( 43 + struct xfs_scrub *sc) 44 + { 45 + struct xchk_xattr_buf *ab = sc->buf; 46 + 47 + return (unsigned long *)ab->buf; 48 + } 49 + 50 + /* A bitmap of free space computed by walking attr leaf block free info. */ 51 + static inline unsigned long * 52 + xchk_xattr_freemap( 53 + struct xfs_scrub *sc) 54 + { 55 + return xchk_xattr_usedmap(sc) + 56 + BITS_TO_LONGS(sc->mp->m_attr_geo->blksize); 57 + } 58 + 59 + /* A bitmap used to hold temporary results. */ 60 + static inline unsigned long * 61 + xchk_xattr_dstmap( 62 + struct xfs_scrub *sc) 63 + { 64 + return xchk_xattr_freemap(sc) + 65 + BITS_TO_LONGS(sc->mp->m_attr_geo->blksize); 66 + } 67 + 68 + int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size, 69 + xfs_km_flags_t flags); 70 + 71 + #endif /* __XFS_SCRUB_ATTR_H__ */
-5
fs/xfs/scrub/bitmap.c
··· 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 12 #include "xfs_btree.h" 13 - #include "scrub/xfs_scrub.h" 14 - #include "scrub/scrub.h" 15 - #include "scrub/common.h" 16 - #include "scrub/trace.h" 17 - #include "scrub/repair.h" 18 13 #include "scrub/bitmap.h" 19 14 20 15 /*
-8
fs/xfs/scrub/bmap.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 13 #include "xfs_bit.h" 15 14 #include "xfs_log_format.h" 16 15 #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 16 #include "xfs_inode.h" 19 - #include "xfs_inode_fork.h" 20 17 #include "xfs_alloc.h" 21 - #include "xfs_rtalloc.h" 22 18 #include "xfs_bmap.h" 23 - #include "xfs_bmap_util.h" 24 19 #include "xfs_bmap_btree.h" 25 20 #include "xfs_rmap.h" 26 21 #include "xfs_rmap_btree.h" 27 - #include "xfs_refcount.h" 28 - #include "scrub/xfs_scrub.h" 29 22 #include "scrub/scrub.h" 30 23 #include "scrub/common.h" 31 24 #include "scrub/btree.h" 32 - #include "scrub/trace.h" 33 25 34 26 /* Set us up with an inode's bmap. */ 35 27 int
-7
fs/xfs/scrub/btree.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 - #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 - #include "xfs_inode.h" 19 - #include "xfs_alloc.h" 20 13 #include "scrub/scrub.h" 21 14 #include "scrub/common.h" 22 15 #include "scrub/btree.h"
-8
fs/xfs/scrub/common.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 13 #include "xfs_log_format.h" 16 14 #include "xfs_trans.h" 17 15 #include "xfs_sb.h" 18 16 #include "xfs_inode.h" 19 17 #include "xfs_icache.h" 20 - #include "xfs_itable.h" 21 18 #include "xfs_alloc.h" 22 19 #include "xfs_alloc_btree.h" 23 - #include "xfs_bmap.h" 24 - #include "xfs_bmap_btree.h" 25 20 #include "xfs_ialloc.h" 26 21 #include "xfs_ialloc_btree.h" 27 - #include "xfs_refcount.h" 28 22 #include "xfs_refcount_btree.h" 29 23 #include "xfs_rmap.h" 30 24 #include "xfs_rmap_btree.h" ··· 26 32 #include "xfs_trans_priv.h" 27 33 #include "xfs_attr.h" 28 34 #include "xfs_reflink.h" 29 - #include "scrub/xfs_scrub.h" 30 35 #include "scrub/scrub.h" 31 36 #include "scrub/common.h" 32 37 #include "scrub/trace.h" 33 - #include "scrub/btree.h" 34 38 #include "scrub/repair.h" 35 39 #include "scrub/health.h" 36 40
-8
fs/xfs/scrub/dabtree.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 12 #include "xfs_log_format.h" 16 13 #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 14 #include "xfs_inode.h" 19 - #include "xfs_inode_fork.h" 20 - #include "xfs_da_format.h" 21 - #include "xfs_da_btree.h" 22 15 #include "xfs_dir2.h" 23 16 #include "xfs_dir2_priv.h" 24 17 #include "xfs_attr_leaf.h" 25 - #include "scrub/xfs_scrub.h" 26 18 #include "scrub/scrub.h" 27 19 #include "scrub/common.h" 28 20 #include "scrub/trace.h"
-10
fs/xfs/scrub/dir.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 12 #include "xfs_log_format.h" 16 13 #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 14 #include "xfs_inode.h" 19 15 #include "xfs_icache.h" 20 - #include "xfs_itable.h" 21 - #include "xfs_da_format.h" 22 - #include "xfs_da_btree.h" 23 16 #include "xfs_dir2.h" 24 17 #include "xfs_dir2_priv.h" 25 - #include "xfs_ialloc.h" 26 - #include "scrub/xfs_scrub.h" 27 18 #include "scrub/scrub.h" 28 19 #include "scrub/common.h" 29 - #include "scrub/trace.h" 30 20 #include "scrub/dabtree.h" 31 21 32 22 /* Set us up to scrub directories. */
-12
fs/xfs/scrub/fscounters.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 - #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 12 #include "xfs_sb.h" 18 - #include "xfs_inode.h" 19 13 #include "xfs_alloc.h" 20 14 #include "xfs_ialloc.h" 21 - #include "xfs_rmap.h" 22 - #include "xfs_error.h" 23 - #include "xfs_errortag.h" 24 - #include "xfs_icache.h" 25 15 #include "xfs_health.h" 26 - #include "xfs_bmap.h" 27 - #include "scrub/xfs_scrub.h" 28 16 #include "scrub/scrub.h" 29 17 #include "scrub/common.h" 30 18 #include "scrub/trace.h"
-8
fs/xfs/scrub/health.c
··· 7 7 #include "xfs_fs.h" 8 8 #include "xfs_shared.h" 9 9 #include "xfs_format.h" 10 - #include "xfs_trans_resv.h" 11 - #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 10 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 - #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 11 #include "xfs_sb.h" 18 - #include "xfs_inode.h" 19 12 #include "xfs_health.h" 20 13 #include "scrub/scrub.h" 21 - #include "scrub/health.h" 22 14 23 15 /* 24 16 * Scrub and In-Core Filesystem Health Assessments
+11 -17
fs/xfs/scrub/ialloc.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 13 #include "xfs_log_format.h" 16 14 #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 15 #include "xfs_inode.h" 19 - #include "xfs_alloc.h" 20 16 #include "xfs_ialloc.h" 21 17 #include "xfs_ialloc_btree.h" 22 18 #include "xfs_icache.h" 23 19 #include "xfs_rmap.h" 24 - #include "xfs_log.h" 25 - #include "xfs_trans_priv.h" 26 - #include "scrub/xfs_scrub.h" 27 20 #include "scrub/scrub.h" 28 21 #include "scrub/common.h" 29 22 #include "scrub/btree.h" ··· 223 230 int error = 0; 224 231 225 232 nr_inodes = min_t(unsigned int, XFS_INODES_PER_CHUNK, 226 - mp->m_inodes_per_cluster); 233 + M_IGEO(mp)->inodes_per_cluster); 227 234 228 235 /* Map this inode cluster */ 229 236 agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino + cluster_base); ··· 244 251 */ 245 252 ir_holemask = (irec->ir_holemask & cluster_mask); 246 253 imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); 247 - imap.im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster); 254 + imap.im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster); 248 255 imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino) << 249 256 mp->m_sb.sb_inodelog; 250 257 ··· 269 276 /* If any part of this is a hole, skip it. */ 270 277 if (ir_holemask) { 271 278 xchk_xref_is_not_owned_by(bs->sc, agbno, 272 - mp->m_blocks_per_cluster, 279 + M_IGEO(mp)->blocks_per_cluster, 273 280 &XFS_RMAP_OINFO_INODES); 274 281 return 0; 275 282 } 276 283 277 - xchk_xref_is_owned_by(bs->sc, agbno, mp->m_blocks_per_cluster, 284 + xchk_xref_is_owned_by(bs->sc, agbno, M_IGEO(mp)->blocks_per_cluster, 278 285 &XFS_RMAP_OINFO_INODES); 279 286 280 287 /* Grab the inode cluster buffer. */ ··· 326 333 */ 327 334 for (cluster_base = 0; 328 335 cluster_base < XFS_INODES_PER_CHUNK; 329 - cluster_base += bs->sc->mp->m_inodes_per_cluster) { 336 + cluster_base += M_IGEO(bs->sc->mp)->inodes_per_cluster) { 330 337 error = xchk_iallocbt_check_cluster(bs, irec, cluster_base); 331 338 if (error) 332 339 break; ··· 348 355 { 349 356 struct xfs_mount *mp = bs->sc->mp; 350 357 struct xchk_iallocbt *iabt = bs->private; 358 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 351 359 352 360 /* 353 361 * finobt records have different positioning requirements than inobt ··· 366 372 unsigned int imask; 367 373 368 374 imask = min_t(unsigned int, XFS_INODES_PER_CHUNK, 369 - mp->m_cluster_align_inodes) - 1; 375 + igeo->cluster_align_inodes) - 1; 370 376 if (irec->ir_startino & imask) 371 377 xchk_btree_set_corrupt(bs->sc, bs->cur, 0); 372 378 return; ··· 394 400 } 395 401 396 402 /* inobt records must be aligned to cluster and inoalignmnt size. */ 397 - if (irec->ir_startino & (mp->m_cluster_align_inodes - 1)) { 403 + if (irec->ir_startino & (igeo->cluster_align_inodes - 1)) { 398 404 xchk_btree_set_corrupt(bs->sc, bs->cur, 0); 399 405 return; 400 406 } 401 407 402 - if (irec->ir_startino & (mp->m_inodes_per_cluster - 1)) { 408 + if (irec->ir_startino & (igeo->inodes_per_cluster - 1)) { 403 409 xchk_btree_set_corrupt(bs->sc, bs->cur, 0); 404 410 return; 405 411 } 406 412 407 - if (mp->m_inodes_per_cluster <= XFS_INODES_PER_CHUNK) 413 + if (igeo->inodes_per_cluster <= XFS_INODES_PER_CHUNK) 408 414 return; 409 415 410 416 /* ··· 413 419 * after this one. 414 420 */ 415 421 iabt->next_startino = irec->ir_startino + XFS_INODES_PER_CHUNK; 416 - iabt->next_cluster_ino = irec->ir_startino + mp->m_inodes_per_cluster; 422 + iabt->next_cluster_ino = irec->ir_startino + igeo->inodes_per_cluster; 417 423 } 418 424 419 425 /* Scrub an inobt/finobt record. */
-10
fs/xfs/scrub/inode.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 13 #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 14 #include "xfs_inode.h" 19 - #include "xfs_icache.h" 20 - #include "xfs_inode_buf.h" 21 - #include "xfs_inode_fork.h" 22 15 #include "xfs_ialloc.h" 23 16 #include "xfs_da_format.h" 24 17 #include "xfs_reflink.h" 25 18 #include "xfs_rmap.h" 26 - #include "xfs_bmap.h" 27 19 #include "xfs_bmap_util.h" 28 - #include "scrub/xfs_scrub.h" 29 20 #include "scrub/scrub.h" 30 21 #include "scrub/common.h" 31 22 #include "scrub/btree.h" 32 - #include "scrub/trace.h" 33 23 34 24 /* 35 25 * Grab total control of the inode metadata. It doesn't matter here if
-8
fs/xfs/scrub/parent.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 12 #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 13 #include "xfs_inode.h" 19 14 #include "xfs_icache.h" 20 15 #include "xfs_dir2.h" 21 16 #include "xfs_dir2_priv.h" 22 - #include "xfs_ialloc.h" 23 - #include "scrub/xfs_scrub.h" 24 17 #include "scrub/scrub.h" 25 18 #include "scrub/common.h" 26 - #include "scrub/trace.h" 27 19 28 20 /* Set us up to scrub parents. */ 29 21 int
+1 -12
fs/xfs/scrub/quota.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 12 #include "xfs_log_format.h" 16 13 #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 14 #include "xfs_inode.h" 19 - #include "xfs_inode_fork.h" 20 - #include "xfs_alloc.h" 21 - #include "xfs_bmap.h" 22 15 #include "xfs_quota.h" 23 16 #include "xfs_qm.h" 24 - #include "xfs_dquot.h" 25 - #include "xfs_dquot_item.h" 26 - #include "scrub/xfs_scrub.h" 27 17 #include "scrub/scrub.h" 28 18 #include "scrub/common.h" 29 - #include "scrub/trace.h" 30 19 31 20 /* Convert a scrub type code to a DQ flag, or return 0 if error. */ 32 21 static inline uint ··· 133 144 if (bsoft > bhard) 134 145 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); 135 146 136 - if (ihard > mp->m_maxicount) 147 + if (ihard > M_IGEO(mp)->maxicount) 137 148 xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset); 138 149 if (isoft > ihard) 139 150 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
-10
fs/xfs/scrub/refcount.c
··· 7 7 #include "xfs_fs.h" 8 8 #include "xfs_shared.h" 9 9 #include "xfs_format.h" 10 - #include "xfs_trans_resv.h" 11 - #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 10 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 - #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 - #include "xfs_alloc.h" 19 11 #include "xfs_rmap.h" 20 12 #include "xfs_refcount.h" 21 - #include "scrub/xfs_scrub.h" 22 13 #include "scrub/scrub.h" 23 14 #include "scrub/common.h" 24 15 #include "scrub/btree.h" 25 - #include "scrub/trace.h" 26 16 27 17 /* 28 18 * Set us up to scrub reference count btrees.
+3 -11
fs/xfs/scrub/repair.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 13 #include "xfs_log_format.h" 16 14 #include "xfs_trans.h" 17 15 #include "xfs_sb.h" 18 16 #include "xfs_inode.h" 19 - #include "xfs_icache.h" 20 17 #include "xfs_alloc.h" 21 18 #include "xfs_alloc_btree.h" 22 19 #include "xfs_ialloc.h" 23 20 #include "xfs_ialloc_btree.h" 24 21 #include "xfs_rmap.h" 25 22 #include "xfs_rmap_btree.h" 26 - #include "xfs_refcount.h" 27 23 #include "xfs_refcount_btree.h" 28 24 #include "xfs_extent_busy.h" 29 25 #include "xfs_ag_resv.h" 30 - #include "xfs_trans_space.h" 31 26 #include "xfs_quota.h" 32 - #include "xfs_attr.h" 33 - #include "xfs_reflink.h" 34 - #include "scrub/xfs_scrub.h" 35 27 #include "scrub/scrub.h" 36 28 #include "scrub/common.h" 37 29 #include "scrub/trace.h" ··· 349 357 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb), 350 358 XFS_FSB_TO_BB(mp, 1), 0); 351 359 xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); 352 - xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno, 0); 360 + xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno); 353 361 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF); 354 362 xfs_trans_log_buf(tp, bp, 0, bp->b_length); 355 363 bp->b_ops = ops; ··· 664 672 { 665 673 xfs_agblock_t *agbno = priv; 666 674 667 - return (*agbno == bno) ? XFS_BTREE_QUERY_RANGE_ABORT : 0; 675 + return (*agbno == bno) ? XFS_ITER_ABORT : 0; 668 676 } 669 677 670 678 /* Does this block match the btree information passed in? */ ··· 694 702 if (owner == XFS_RMAP_OWN_AG) { 695 703 error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp, 696 704 xrep_findroot_agfl_walk, &agbno); 697 - if (error == XFS_BTREE_QUERY_RANGE_ABORT) 705 + if (error == XFS_ITER_ABORT) 698 706 return 0; 699 707 if (error) 700 708 return error;
-9
fs/xfs/scrub/rmap.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 - #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 - #include "xfs_alloc.h" 19 - #include "xfs_ialloc.h" 20 13 #include "xfs_rmap.h" 21 14 #include "xfs_refcount.h" 22 - #include "scrub/xfs_scrub.h" 23 15 #include "scrub/scrub.h" 24 16 #include "scrub/common.h" 25 17 #include "scrub/btree.h" 26 - #include "scrub/trace.h" 27 18 28 19 /* 29 20 * Set us up to scrub reverse mapping btrees.
-7
fs/xfs/scrub/rtbitmap.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 12 #include "xfs_log_format.h" 16 13 #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 - #include "xfs_alloc.h" 19 14 #include "xfs_rtalloc.h" 20 15 #include "xfs_inode.h" 21 - #include "scrub/xfs_scrub.h" 22 16 #include "scrub/scrub.h" 23 17 #include "scrub/common.h" 24 - #include "scrub/trace.h" 25 18 26 19 /* Set us up with the realtime metadata locked. */ 27 20 int
-20
fs/xfs/scrub/scrub.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 12 #include "xfs_log_format.h" 16 13 #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 14 #include "xfs_inode.h" 19 - #include "xfs_icache.h" 20 - #include "xfs_itable.h" 21 - #include "xfs_alloc.h" 22 - #include "xfs_alloc_btree.h" 23 - #include "xfs_bmap.h" 24 - #include "xfs_bmap_btree.h" 25 - #include "xfs_ialloc.h" 26 - #include "xfs_ialloc_btree.h" 27 - #include "xfs_refcount.h" 28 - #include "xfs_refcount_btree.h" 29 - #include "xfs_rmap.h" 30 - #include "xfs_rmap_btree.h" 31 15 #include "xfs_quota.h" 32 16 #include "xfs_qm.h" 33 17 #include "xfs_errortag.h" 34 18 #include "xfs_error.h" 35 - #include "xfs_log.h" 36 - #include "xfs_trans_priv.h" 37 - #include "scrub/xfs_scrub.h" 38 19 #include "scrub/scrub.h" 39 20 #include "scrub/common.h" 40 21 #include "scrub/trace.h" 41 - #include "scrub/btree.h" 42 22 #include "scrub/repair.h" 43 23 #include "scrub/health.h" 44 24
-8
fs/xfs/scrub/symlink.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 12 #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 13 #include "xfs_inode.h" 19 - #include "xfs_inode_fork.h" 20 14 #include "xfs_symlink.h" 21 - #include "scrub/xfs_scrub.h" 22 15 #include "scrub/scrub.h" 23 16 #include "scrub/common.h" 24 - #include "scrub/trace.h" 25 17 26 18 /* Set us up to scrub a symbolic link. */ 27 19 int
-6
fs/xfs/scrub/trace.c
··· 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 - #include "xfs_defer.h" 14 - #include "xfs_da_format.h" 15 13 #include "xfs_inode.h" 16 14 #include "xfs_btree.h" 17 - #include "xfs_trans.h" 18 - #include "xfs_bit.h" 19 - #include "scrub/xfs_scrub.h" 20 15 #include "scrub/scrub.h" 21 - #include "scrub/common.h" 22 16 23 17 /* Figure out which block the btree cursor was pointing to. */ 24 18 static inline xfs_fsblock_t
+1 -3
fs/xfs/xfs_acl.c
··· 4 4 * All Rights Reserved. 5 5 */ 6 6 #include "xfs.h" 7 + #include "xfs_shared.h" 7 8 #include "xfs_format.h" 8 9 #include "xfs_log_format.h" 9 10 #include "xfs_trans_resv.h" 10 11 #include "xfs_mount.h" 11 12 #include "xfs_inode.h" 12 - #include "xfs_acl.h" 13 13 #include "xfs_attr.h" 14 14 #include "xfs_trace.h" 15 - #include <linux/slab.h> 16 - #include <linux/xattr.h> 17 15 #include <linux/posix_acl_xattr.h> 18 16 19 17
+65 -56
fs/xfs/xfs_aops.c
··· 12 12 #include "xfs_mount.h" 13 13 #include "xfs_inode.h" 14 14 #include "xfs_trans.h" 15 - #include "xfs_inode_item.h" 16 - #include "xfs_alloc.h" 17 - #include "xfs_error.h" 18 15 #include "xfs_iomap.h" 19 16 #include "xfs_trace.h" 20 17 #include "xfs_bmap.h" 21 18 #include "xfs_bmap_util.h" 22 - #include "xfs_bmap_btree.h" 23 19 #include "xfs_reflink.h" 24 - #include <linux/writeback.h> 25 20 26 21 /* 27 22 * structure owned by writepages passed to individual writepage calls ··· 133 138 struct xfs_trans *tp; 134 139 int error; 135 140 136 - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 137 - XFS_TRANS_NOFS, &tp); 141 + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); 138 142 if (error) 139 143 return error; 140 144 ··· 234 240 struct xfs_inode *ip = XFS_I(ioend->io_inode); 235 241 xfs_off_t offset = ioend->io_offset; 236 242 size_t size = ioend->io_size; 243 + unsigned int nofs_flag; 237 244 int error; 245 + 246 + /* 247 + * We can allocate memory here while doing writeback on behalf of 248 + * memory reclaim. To avoid memory allocation deadlocks set the 249 + * task-wide nofs context for the following operations. 250 + */ 251 + nofs_flag = memalloc_nofs_save(); 238 252 239 253 /* 240 254 * Just clean up the in-memory strutures if the fs has been shut down. ··· 284 282 list_del_init(&ioend->io_list); 285 283 xfs_destroy_ioend(ioend, error); 286 284 } 285 + 286 + memalloc_nofs_restore(nofs_flag); 287 287 } 288 288 289 289 /* ··· 294 290 static bool 295 291 xfs_ioend_can_merge( 296 292 struct xfs_ioend *ioend, 297 - int ioend_error, 298 293 struct xfs_ioend *next) 299 294 { 300 - int next_error; 301 - 302 - next_error = blk_status_to_errno(next->io_bio->bi_status); 303 - if (ioend_error != next_error) 295 + if (ioend->io_bio->bi_status != next->io_bio->bi_status) 304 296 return false; 305 297 if ((ioend->io_fork == XFS_COW_FORK) ^ (next->io_fork == XFS_COW_FORK)) 306 298 return false; ··· 305 305 return false; 306 306 if (ioend->io_offset + ioend->io_size != next->io_offset) 307 307 return false; 308 - if (xfs_ioend_is_append(ioend) != xfs_ioend_is_append(next)) 309 - return false; 310 308 return true; 309 + } 310 + 311 + /* 312 + * If the to be merged ioend has a preallocated transaction for file 313 + * size updates we need to ensure the ioend it is merged into also 314 + * has one. If it already has one we can simply cancel the transaction 315 + * as it is guaranteed to be clean. 316 + */ 317 + static void 318 + xfs_ioend_merge_append_transactions( 319 + struct xfs_ioend *ioend, 320 + struct xfs_ioend *next) 321 + { 322 + if (!ioend->io_append_trans) { 323 + ioend->io_append_trans = next->io_append_trans; 324 + next->io_append_trans = NULL; 325 + } else { 326 + xfs_setfilesize_ioend(next, -ECANCELED); 327 + } 311 328 } 312 329 313 330 /* Try to merge adjacent completions. */ ··· 334 317 struct list_head *more_ioends) 335 318 { 336 319 struct xfs_ioend *next_ioend; 337 - int ioend_error; 338 - int error; 339 - 340 - if (list_empty(more_ioends)) 341 - return; 342 - 343 - ioend_error = blk_status_to_errno(ioend->io_bio->bi_status); 344 320 345 321 while (!list_empty(more_ioends)) { 346 322 next_ioend = list_first_entry(more_ioends, struct xfs_ioend, 347 323 io_list); 348 - if (!xfs_ioend_can_merge(ioend, ioend_error, next_ioend)) 324 + if (!xfs_ioend_can_merge(ioend, next_ioend)) 349 325 break; 350 326 list_move_tail(&next_ioend->io_list, &ioend->io_list); 351 327 ioend->io_size += next_ioend->io_size; 352 - if (ioend->io_append_trans) { 353 - error = xfs_setfilesize_ioend(next_ioend, 1); 354 - ASSERT(error == 1); 355 - } 328 + if (next_ioend->io_append_trans) 329 + xfs_ioend_merge_append_transactions(ioend, next_ioend); 356 330 } 357 331 } 358 332 ··· 634 626 * reference to the ioend to ensure that the ioend completion is only done once 635 627 * all bios have been submitted and the ioend is really done. 636 628 * 637 - * If @fail is non-zero, it means that we have a situation where some part of 629 + * If @status is non-zero, it means that we have a situation where some part of 638 630 * the submission process has failed after we have marked paged for writeback 639 631 * and unlocked them. In this situation, we need to fail the bio and ioend 640 632 * rather than submit it to IO. This typically only happens on a filesystem ··· 646 638 struct xfs_ioend *ioend, 647 639 int status) 648 640 { 641 + unsigned int nofs_flag; 642 + 643 + /* 644 + * We can allocate memory here while doing writeback on behalf of 645 + * memory reclaim. To avoid memory allocation deadlocks set the 646 + * task-wide nofs context for the following operations. 647 + */ 648 + nofs_flag = memalloc_nofs_save(); 649 + 649 650 /* Convert CoW extents to regular */ 650 651 if (!status && ioend->io_fork == XFS_COW_FORK) { 651 - /* 652 - * Yuk. This can do memory allocation, but is not a 653 - * transactional operation so everything is done in GFP_KERNEL 654 - * context. That can deadlock, because we hold pages in 655 - * writeback state and GFP_KERNEL allocations can block on them. 656 - * Hence we must operate in nofs conditions here. 657 - */ 658 - unsigned nofs_flag; 659 - 660 - nofs_flag = memalloc_nofs_save(); 661 652 status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), 662 653 ioend->io_offset, ioend->io_size); 663 - memalloc_nofs_restore(nofs_flag); 664 654 } 665 655 666 656 /* Reserve log space if we might write beyond the on-disk inode size. */ ··· 669 663 !ioend->io_append_trans) 670 664 status = xfs_setfilesize_trans_alloc(ioend); 671 665 666 + memalloc_nofs_restore(nofs_flag); 667 + 672 668 ioend->io_bio->bi_private = ioend; 673 669 ioend->io_bio->bi_end_io = xfs_end_bio; 674 - ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); 675 670 676 671 /* 677 672 * If we are failing the IO now, just mark the ioend with an ··· 686 679 return status; 687 680 } 688 681 689 - ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; 690 682 submit_bio(ioend->io_bio); 691 683 return 0; 692 684 } ··· 697 691 xfs_exntst_t state, 698 692 xfs_off_t offset, 699 693 struct block_device *bdev, 700 - sector_t sector) 694 + sector_t sector, 695 + struct writeback_control *wbc) 701 696 { 702 697 struct xfs_ioend *ioend; 703 698 struct bio *bio; ··· 706 699 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset); 707 700 bio_set_dev(bio, bdev); 708 701 bio->bi_iter.bi_sector = sector; 702 + bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); 703 + bio->bi_write_hint = inode->i_write_hint; 704 + wbc_init_bio(wbc, bio); 709 705 710 706 ioend = container_of(bio, struct xfs_ioend, io_inline_bio); 711 707 INIT_LIST_HEAD(&ioend->io_list); ··· 729 719 * so that the bi_private linkage is set up in the right direction for the 730 720 * traversal in xfs_destroy_ioend(). 731 721 */ 732 - static void 722 + static struct bio * 733 723 xfs_chain_bio( 734 - struct xfs_ioend *ioend, 735 - struct writeback_control *wbc, 736 - struct block_device *bdev, 737 - sector_t sector) 724 + struct bio *prev) 738 725 { 739 726 struct bio *new; 740 727 741 728 new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); 742 - bio_set_dev(new, bdev); 743 - new->bi_iter.bi_sector = sector; 744 - bio_chain(ioend->io_bio, new); 745 - bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ 746 - ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); 747 - ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; 748 - submit_bio(ioend->io_bio); 749 - ioend->io_bio = new; 729 + bio_copy_dev(new, prev);/* also copies over blkcg information */ 730 + new->bi_iter.bi_sector = bio_end_sector(prev); 731 + new->bi_opf = prev->bi_opf; 732 + new->bi_write_hint = prev->bi_write_hint; 733 + 734 + bio_chain(prev, new); 735 + bio_get(prev); /* for xfs_destroy_ioend */ 736 + submit_bio(prev); 737 + return new; 750 738 } 751 739 752 740 /* ··· 780 772 if (wpc->ioend) 781 773 list_add(&wpc->ioend->io_list, iolist); 782 774 wpc->ioend = xfs_alloc_ioend(inode, wpc->fork, 783 - wpc->imap.br_state, offset, bdev, sector); 775 + wpc->imap.br_state, offset, bdev, sector, wbc); 784 776 } 785 777 786 778 merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, ··· 791 783 792 784 if (!merged) { 793 785 if (bio_full(wpc->ioend->io_bio, len)) 794 - xfs_chain_bio(wpc->ioend, wbc, bdev, sector); 786 + wpc->ioend->io_bio = xfs_chain_bio(wpc->ioend->io_bio); 795 787 bio_add_page(wpc->ioend->io_bio, page, len, poff); 796 788 } 797 789 798 790 wpc->ioend->io_size += len; 791 + wbc_account_io(wbc, page, len); 799 792 } 800 793 801 794 STATIC void
-1
fs/xfs/xfs_aops.h
··· 28 28 29 29 int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); 30 30 31 - extern void xfs_count_page_state(struct page *, int *, int *); 32 31 extern struct block_device *xfs_find_bdev_for_inode(struct inode *); 33 32 extern struct dax_device *xfs_find_daxdev_for_inode(struct inode *); 34 33
+1 -6
fs/xfs/xfs_attr_inactive.c
··· 15 15 #include "xfs_da_format.h" 16 16 #include "xfs_da_btree.h" 17 17 #include "xfs_inode.h" 18 - #include "xfs_alloc.h" 19 18 #include "xfs_attr_remote.h" 20 19 #include "xfs_trans.h" 21 - #include "xfs_inode_item.h" 22 20 #include "xfs_bmap.h" 23 21 #include "xfs_attr.h" 24 22 #include "xfs_attr_leaf.h" 25 - #include "xfs_error.h" 26 23 #include "xfs_quota.h" 27 - #include "xfs_trace.h" 28 24 #include "xfs_dir2.h" 29 - #include "xfs_defer.h" 30 25 31 26 /* 32 27 * Look at all the extents for this logical region, ··· 116 121 int size; 117 122 int tmp; 118 123 int i; 119 - struct xfs_mount *mp = bp->b_target->bt_mount; 124 + struct xfs_mount *mp = bp->b_mount; 120 125 121 126 leaf = bp->b_addr; 122 127 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
+1 -6
fs/xfs/xfs_attr_list.c
··· 6 6 */ 7 7 #include "xfs.h" 8 8 #include "xfs_fs.h" 9 + #include "xfs_shared.h" 9 10 #include "xfs_format.h" 10 11 #include "xfs_log_format.h" 11 12 #include "xfs_trans_resv.h" 12 - #include "xfs_bit.h" 13 13 #include "xfs_mount.h" 14 14 #include "xfs_da_format.h" 15 - #include "xfs_da_btree.h" 16 15 #include "xfs_inode.h" 17 16 #include "xfs_trans.h" 18 - #include "xfs_inode_item.h" 19 17 #include "xfs_bmap.h" 20 18 #include "xfs_attr.h" 21 19 #include "xfs_attr_sf.h" 22 - #include "xfs_attr_remote.h" 23 20 #include "xfs_attr_leaf.h" 24 21 #include "xfs_error.h" 25 22 #include "xfs_trace.h" 26 - #include "xfs_buf_item.h" 27 - #include "xfs_cksum.h" 28 23 #include "xfs_dir2.h" 29 24 30 25 STATIC int
+61
fs/xfs/xfs_bio_io.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (c) 2019 Christoph Hellwig. 4 + */ 5 + #include "xfs.h" 6 + 7 + static inline unsigned int bio_max_vecs(unsigned int count) 8 + { 9 + return min_t(unsigned, howmany(count, PAGE_SIZE), BIO_MAX_PAGES); 10 + } 11 + 12 + int 13 + xfs_rw_bdev( 14 + struct block_device *bdev, 15 + sector_t sector, 16 + unsigned int count, 17 + char *data, 18 + unsigned int op) 19 + 20 + { 21 + unsigned int is_vmalloc = is_vmalloc_addr(data); 22 + unsigned int left = count; 23 + int error; 24 + struct bio *bio; 25 + 26 + if (is_vmalloc && op == REQ_OP_WRITE) 27 + flush_kernel_vmap_range(data, count); 28 + 29 + bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left)); 30 + bio_set_dev(bio, bdev); 31 + bio->bi_iter.bi_sector = sector; 32 + bio->bi_opf = op | REQ_META | REQ_SYNC; 33 + 34 + do { 35 + struct page *page = kmem_to_page(data); 36 + unsigned int off = offset_in_page(data); 37 + unsigned int len = min_t(unsigned, left, PAGE_SIZE - off); 38 + 39 + while (bio_add_page(bio, page, len, off) != len) { 40 + struct bio *prev = bio; 41 + 42 + bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left)); 43 + bio_copy_dev(bio, prev); 44 + bio->bi_iter.bi_sector = bio_end_sector(prev); 45 + bio->bi_opf = prev->bi_opf; 46 + bio_chain(prev, bio); 47 + 48 + submit_bio(prev); 49 + } 50 + 51 + data += len; 52 + left -= len; 53 + } while (left > 0); 54 + 55 + error = submit_bio_wait(bio); 56 + bio_put(bio); 57 + 58 + if (is_vmalloc && op == REQ_OP_READ) 59 + invalidate_kernel_vmap_range(data, count); 60 + return error; 61 + }
+211 -155
fs/xfs/xfs_bmap_item.c
··· 9 9 #include "xfs_log_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_bit.h" 12 + #include "xfs_shared.h" 12 13 #include "xfs_mount.h" 13 14 #include "xfs_defer.h" 14 15 #include "xfs_inode.h" 15 16 #include "xfs_trans.h" 16 17 #include "xfs_trans_priv.h" 17 - #include "xfs_buf_item.h" 18 18 #include "xfs_bmap_item.h" 19 19 #include "xfs_log.h" 20 20 #include "xfs_bmap.h" 21 21 #include "xfs_icache.h" 22 - #include "xfs_trace.h" 23 22 #include "xfs_bmap_btree.h" 24 23 #include "xfs_trans_space.h" 25 24 ··· 95 96 } 96 97 97 98 /* 98 - * Pinning has no meaning for an bui item, so just return. 99 - */ 100 - STATIC void 101 - xfs_bui_item_pin( 102 - struct xfs_log_item *lip) 103 - { 104 - } 105 - 106 - /* 107 99 * The unpin operation is the last place an BUI is manipulated in the log. It is 108 100 * either inserted in the AIL or aborted in the event of a log I/O error. In 109 101 * either case, the BUI transaction has been successfully committed to make it ··· 113 123 } 114 124 115 125 /* 116 - * BUI items have no locking or pushing. However, since BUIs are pulled from 117 - * the AIL when their corresponding BUDs are committed to disk, their situation 118 - * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller 119 - * will eventually flush the log. This should help in getting the BUI out of 120 - * the AIL. 121 - */ 122 - STATIC uint 123 - xfs_bui_item_push( 124 - struct xfs_log_item *lip, 125 - struct list_head *buffer_list) 126 - { 127 - return XFS_ITEM_PINNED; 128 - } 129 - 130 - /* 131 126 * The BUI has been either committed or aborted if the transaction has been 132 127 * cancelled. If the transaction was cancelled, an BUD isn't going to be 133 128 * constructed and thus we free the BUI here directly. 134 129 */ 135 130 STATIC void 136 - xfs_bui_item_unlock( 131 + xfs_bui_item_release( 137 132 struct xfs_log_item *lip) 138 133 { 139 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) 140 - xfs_bui_release(BUI_ITEM(lip)); 134 + xfs_bui_release(BUI_ITEM(lip)); 141 135 } 142 136 143 - /* 144 - * The BUI is logged only once and cannot be moved in the log, so simply return 145 - * the lsn at which it's been logged. 146 - */ 147 - STATIC xfs_lsn_t 148 - xfs_bui_item_committed( 149 - struct xfs_log_item *lip, 150 - xfs_lsn_t lsn) 151 - { 152 - return lsn; 153 - } 154 - 155 - /* 156 - * The BUI dependency tracking op doesn't do squat. It can't because 157 - * it doesn't know where the free extent is coming from. The dependency 158 - * tracking has to be handled by the "enclosing" metadata object. For 159 - * example, for inodes, the inode is locked throughout the extent freeing 160 - * so the dependency should be recorded there. 161 - */ 162 - STATIC void 163 - xfs_bui_item_committing( 164 - struct xfs_log_item *lip, 165 - xfs_lsn_t lsn) 166 - { 167 - } 168 - 169 - /* 170 - * This is the ops vector shared by all bui log items. 171 - */ 172 137 static const struct xfs_item_ops xfs_bui_item_ops = { 173 138 .iop_size = xfs_bui_item_size, 174 139 .iop_format = xfs_bui_item_format, 175 - .iop_pin = xfs_bui_item_pin, 176 140 .iop_unpin = xfs_bui_item_unpin, 177 - .iop_unlock = xfs_bui_item_unlock, 178 - .iop_committed = xfs_bui_item_committed, 179 - .iop_push = xfs_bui_item_push, 180 - .iop_committing = xfs_bui_item_committing, 141 + .iop_release = xfs_bui_item_release, 181 142 }; 182 143 183 144 /* ··· 190 249 } 191 250 192 251 /* 193 - * Pinning has no meaning for an bud item, so just return. 194 - */ 195 - STATIC void 196 - xfs_bud_item_pin( 197 - struct xfs_log_item *lip) 198 - { 199 - } 200 - 201 - /* 202 - * Since pinning has no meaning for an bud item, unpinning does 203 - * not either. 204 - */ 205 - STATIC void 206 - xfs_bud_item_unpin( 207 - struct xfs_log_item *lip, 208 - int remove) 209 - { 210 - } 211 - 212 - /* 213 - * There isn't much you can do to push on an bud item. It is simply stuck 214 - * waiting for the log to be flushed to disk. 215 - */ 216 - STATIC uint 217 - xfs_bud_item_push( 218 - struct xfs_log_item *lip, 219 - struct list_head *buffer_list) 220 - { 221 - return XFS_ITEM_PINNED; 222 - } 223 - 224 - /* 225 252 * The BUD is either committed or aborted if the transaction is cancelled. If 226 253 * the transaction is cancelled, drop our reference to the BUI and free the 227 254 * BUD. 228 255 */ 229 256 STATIC void 230 - xfs_bud_item_unlock( 257 + xfs_bud_item_release( 231 258 struct xfs_log_item *lip) 232 259 { 233 260 struct xfs_bud_log_item *budp = BUD_ITEM(lip); 234 261 235 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { 236 - xfs_bui_release(budp->bud_buip); 237 - kmem_zone_free(xfs_bud_zone, budp); 238 - } 239 - } 240 - 241 - /* 242 - * When the bud item is committed to disk, all we need to do is delete our 243 - * reference to our partner bui item and then free ourselves. Since we're 244 - * freeing ourselves we must return -1 to keep the transaction code from 245 - * further referencing this item. 246 - */ 247 - STATIC xfs_lsn_t 248 - xfs_bud_item_committed( 249 - struct xfs_log_item *lip, 250 - xfs_lsn_t lsn) 251 - { 252 - struct xfs_bud_log_item *budp = BUD_ITEM(lip); 253 - 254 - /* 255 - * Drop the BUI reference regardless of whether the BUD has been 256 - * aborted. Once the BUD transaction is constructed, it is the sole 257 - * responsibility of the BUD to release the BUI (even if the BUI is 258 - * aborted due to log I/O error). 259 - */ 260 262 xfs_bui_release(budp->bud_buip); 261 263 kmem_zone_free(xfs_bud_zone, budp); 262 - 263 - return (xfs_lsn_t)-1; 264 264 } 265 265 266 - /* 267 - * The BUD dependency tracking op doesn't do squat. It can't because 268 - * it doesn't know where the free extent is coming from. The dependency 269 - * tracking has to be handled by the "enclosing" metadata object. For 270 - * example, for inodes, the inode is locked throughout the extent freeing 271 - * so the dependency should be recorded there. 272 - */ 273 - STATIC void 274 - xfs_bud_item_committing( 275 - struct xfs_log_item *lip, 276 - xfs_lsn_t lsn) 277 - { 278 - } 279 - 280 - /* 281 - * This is the ops vector shared by all bud log items. 282 - */ 283 266 static const struct xfs_item_ops xfs_bud_item_ops = { 267 + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, 284 268 .iop_size = xfs_bud_item_size, 285 269 .iop_format = xfs_bud_item_format, 286 - .iop_pin = xfs_bud_item_pin, 287 - .iop_unpin = xfs_bud_item_unpin, 288 - .iop_unlock = xfs_bud_item_unlock, 289 - .iop_committed = xfs_bud_item_committed, 290 - .iop_push = xfs_bud_item_push, 291 - .iop_committing = xfs_bud_item_committing, 270 + .iop_release = xfs_bud_item_release, 292 271 }; 293 272 294 - /* 295 - * Allocate and initialize an bud item with the given number of extents. 296 - */ 297 - struct xfs_bud_log_item * 298 - xfs_bud_init( 299 - struct xfs_mount *mp, 273 + static struct xfs_bud_log_item * 274 + xfs_trans_get_bud( 275 + struct xfs_trans *tp, 300 276 struct xfs_bui_log_item *buip) 301 - 302 277 { 303 - struct xfs_bud_log_item *budp; 278 + struct xfs_bud_log_item *budp; 304 279 305 280 budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP); 306 - xfs_log_item_init(mp, &budp->bud_item, XFS_LI_BUD, &xfs_bud_item_ops); 281 + xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD, 282 + &xfs_bud_item_ops); 307 283 budp->bud_buip = buip; 308 284 budp->bud_format.bud_bui_id = buip->bui_format.bui_id; 309 285 286 + xfs_trans_add_item(tp, &budp->bud_item); 310 287 return budp; 311 288 } 289 + 290 + /* 291 + * Finish an bmap update and log it to the BUD. Note that the 292 + * transaction is marked dirty regardless of whether the bmap update 293 + * succeeds or fails to support the BUI/BUD lifecycle rules. 294 + */ 295 + static int 296 + xfs_trans_log_finish_bmap_update( 297 + struct xfs_trans *tp, 298 + struct xfs_bud_log_item *budp, 299 + enum xfs_bmap_intent_type type, 300 + struct xfs_inode *ip, 301 + int whichfork, 302 + xfs_fileoff_t startoff, 303 + xfs_fsblock_t startblock, 304 + xfs_filblks_t *blockcount, 305 + xfs_exntst_t state) 306 + { 307 + int error; 308 + 309 + error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff, 310 + startblock, blockcount, state); 311 + 312 + /* 313 + * Mark the transaction dirty, even on error. This ensures the 314 + * transaction is aborted, which: 315 + * 316 + * 1.) releases the BUI and frees the BUD 317 + * 2.) shuts down the filesystem 318 + */ 319 + tp->t_flags |= XFS_TRANS_DIRTY; 320 + set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags); 321 + 322 + return error; 323 + } 324 + 325 + /* Sort bmap intents by inode. */ 326 + static int 327 + xfs_bmap_update_diff_items( 328 + void *priv, 329 + struct list_head *a, 330 + struct list_head *b) 331 + { 332 + struct xfs_bmap_intent *ba; 333 + struct xfs_bmap_intent *bb; 334 + 335 + ba = container_of(a, struct xfs_bmap_intent, bi_list); 336 + bb = container_of(b, struct xfs_bmap_intent, bi_list); 337 + return ba->bi_owner->i_ino - bb->bi_owner->i_ino; 338 + } 339 + 340 + /* Get an BUI. */ 341 + STATIC void * 342 + xfs_bmap_update_create_intent( 343 + struct xfs_trans *tp, 344 + unsigned int count) 345 + { 346 + struct xfs_bui_log_item *buip; 347 + 348 + ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS); 349 + ASSERT(tp != NULL); 350 + 351 + buip = xfs_bui_init(tp->t_mountp); 352 + ASSERT(buip != NULL); 353 + 354 + /* 355 + * Get a log_item_desc to point at the new item. 356 + */ 357 + xfs_trans_add_item(tp, &buip->bui_item); 358 + return buip; 359 + } 360 + 361 + /* Set the map extent flags for this mapping. */ 362 + static void 363 + xfs_trans_set_bmap_flags( 364 + struct xfs_map_extent *bmap, 365 + enum xfs_bmap_intent_type type, 366 + int whichfork, 367 + xfs_exntst_t state) 368 + { 369 + bmap->me_flags = 0; 370 + switch (type) { 371 + case XFS_BMAP_MAP: 372 + case XFS_BMAP_UNMAP: 373 + bmap->me_flags = type; 374 + break; 375 + default: 376 + ASSERT(0); 377 + } 378 + if (state == XFS_EXT_UNWRITTEN) 379 + bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN; 380 + if (whichfork == XFS_ATTR_FORK) 381 + bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK; 382 + } 383 + 384 + /* Log bmap updates in the intent item. */ 385 + STATIC void 386 + xfs_bmap_update_log_item( 387 + struct xfs_trans *tp, 388 + void *intent, 389 + struct list_head *item) 390 + { 391 + struct xfs_bui_log_item *buip = intent; 392 + struct xfs_bmap_intent *bmap; 393 + uint next_extent; 394 + struct xfs_map_extent *map; 395 + 396 + bmap = container_of(item, struct xfs_bmap_intent, bi_list); 397 + 398 + tp->t_flags |= XFS_TRANS_DIRTY; 399 + set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags); 400 + 401 + /* 402 + * atomic_inc_return gives us the value after the increment; 403 + * we want to use it as an array index so we need to subtract 1 from 404 + * it. 405 + */ 406 + next_extent = atomic_inc_return(&buip->bui_next_extent) - 1; 407 + ASSERT(next_extent < buip->bui_format.bui_nextents); 408 + map = &buip->bui_format.bui_extents[next_extent]; 409 + map->me_owner = bmap->bi_owner->i_ino; 410 + map->me_startblock = bmap->bi_bmap.br_startblock; 411 + map->me_startoff = bmap->bi_bmap.br_startoff; 412 + map->me_len = bmap->bi_bmap.br_blockcount; 413 + xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork, 414 + bmap->bi_bmap.br_state); 415 + } 416 + 417 + /* Get an BUD so we can process all the deferred rmap updates. */ 418 + STATIC void * 419 + xfs_bmap_update_create_done( 420 + struct xfs_trans *tp, 421 + void *intent, 422 + unsigned int count) 423 + { 424 + return xfs_trans_get_bud(tp, intent); 425 + } 426 + 427 + /* Process a deferred rmap update. */ 428 + STATIC int 429 + xfs_bmap_update_finish_item( 430 + struct xfs_trans *tp, 431 + struct list_head *item, 432 + void *done_item, 433 + void **state) 434 + { 435 + struct xfs_bmap_intent *bmap; 436 + xfs_filblks_t count; 437 + int error; 438 + 439 + bmap = container_of(item, struct xfs_bmap_intent, bi_list); 440 + count = bmap->bi_bmap.br_blockcount; 441 + error = xfs_trans_log_finish_bmap_update(tp, done_item, 442 + bmap->bi_type, 443 + bmap->bi_owner, bmap->bi_whichfork, 444 + bmap->bi_bmap.br_startoff, 445 + bmap->bi_bmap.br_startblock, 446 + &count, 447 + bmap->bi_bmap.br_state); 448 + if (!error && count > 0) { 449 + ASSERT(bmap->bi_type == XFS_BMAP_UNMAP); 450 + bmap->bi_bmap.br_blockcount = count; 451 + return -EAGAIN; 452 + } 453 + kmem_free(bmap); 454 + return error; 455 + } 456 + 457 + /* Abort all pending BUIs. */ 458 + STATIC void 459 + xfs_bmap_update_abort_intent( 460 + void *intent) 461 + { 462 + xfs_bui_release(intent); 463 + } 464 + 465 + /* Cancel a deferred rmap update. */ 466 + STATIC void 467 + xfs_bmap_update_cancel_item( 468 + struct list_head *item) 469 + { 470 + struct xfs_bmap_intent *bmap; 471 + 472 + bmap = container_of(item, struct xfs_bmap_intent, bi_list); 473 + kmem_free(bmap); 474 + } 475 + 476 + const struct xfs_defer_op_type xfs_bmap_update_defer_type = { 477 + .max_items = XFS_BUI_MAX_FAST_EXTENTS, 478 + .diff_items = xfs_bmap_update_diff_items, 479 + .create_intent = xfs_bmap_update_create_intent, 480 + .abort_intent = xfs_bmap_update_abort_intent, 481 + .log_item = xfs_bmap_update_log_item, 482 + .create_done = xfs_bmap_update_create_done, 483 + .finish_item = xfs_bmap_update_finish_item, 484 + .cancel_item = xfs_bmap_update_cancel_item, 485 + }; 312 486 313 487 /* 314 488 * Process a bmap update intent item that was recovered from the log.
-2
fs/xfs/xfs_bmap_item.h
··· 75 75 extern struct kmem_zone *xfs_bud_zone; 76 76 77 77 struct xfs_bui_log_item *xfs_bui_init(struct xfs_mount *); 78 - struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *, 79 - struct xfs_bui_log_item *); 80 78 void xfs_bui_item_free(struct xfs_bui_log_item *); 81 79 void xfs_bui_release(struct xfs_bui_log_item *); 82 80 int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip);
+3 -8
fs/xfs/xfs_bmap_util.c
··· 12 12 #include "xfs_trans_resv.h" 13 13 #include "xfs_bit.h" 14 14 #include "xfs_mount.h" 15 - #include "xfs_da_format.h" 16 15 #include "xfs_defer.h" 17 16 #include "xfs_inode.h" 18 17 #include "xfs_btree.h" 19 18 #include "xfs_trans.h" 20 - #include "xfs_extfree_item.h" 21 19 #include "xfs_alloc.h" 22 20 #include "xfs_bmap.h" 23 21 #include "xfs_bmap_util.h" ··· 26 28 #include "xfs_trans_space.h" 27 29 #include "xfs_trace.h" 28 30 #include "xfs_icache.h" 29 - #include "xfs_log.h" 30 - #include "xfs_rmap_btree.h" 31 31 #include "xfs_iomap.h" 32 32 #include "xfs_reflink.h" 33 - #include "xfs_refcount.h" 34 33 35 34 /* Kernel only BMAP related definitions and functions */ 36 35 ··· 271 276 struct xfs_btree_block *block, *nextblock; 272 277 int numrecs; 273 278 274 - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, 279 + error = xfs_btree_read_bufl(mp, tp, bno, &bp, XFS_BMAP_BTREE_REF, 275 280 &xfs_bmbt_buf_ops); 276 281 if (error) 277 282 return error; ··· 282 287 /* Not at node above leaves, count this level of nodes */ 283 288 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 284 289 while (nextbno != NULLFSBLOCK) { 285 - error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, 290 + error = xfs_btree_read_bufl(mp, tp, nextbno, &nbp, 286 291 XFS_BMAP_BTREE_REF, 287 292 &xfs_bmbt_buf_ops); 288 293 if (error) ··· 316 321 if (nextbno == NULLFSBLOCK) 317 322 break; 318 323 bno = nextbno; 319 - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 324 + error = xfs_btree_read_bufl(mp, tp, bno, &bp, 320 325 XFS_BMAP_BTREE_REF, 321 326 &xfs_bmbt_buf_ops); 322 327 if (error)
+24 -147
fs/xfs/xfs_buf.c
··· 4 4 * All Rights Reserved. 5 5 */ 6 6 #include "xfs.h" 7 - #include <linux/stddef.h> 8 - #include <linux/errno.h> 9 - #include <linux/gfp.h> 10 - #include <linux/pagemap.h> 11 - #include <linux/init.h> 12 - #include <linux/vmalloc.h> 13 - #include <linux/bio.h> 14 - #include <linux/sysctl.h> 15 - #include <linux/proc_fs.h> 16 - #include <linux/workqueue.h> 17 - #include <linux/percpu.h> 18 - #include <linux/blkdev.h> 19 - #include <linux/hash.h> 20 - #include <linux/kthread.h> 21 - #include <linux/migrate.h> 22 7 #include <linux/backing-dev.h> 23 - #include <linux/freezer.h> 24 8 9 + #include "xfs_shared.h" 25 10 #include "xfs_format.h" 26 11 #include "xfs_log_format.h" 27 12 #include "xfs_trans_resv.h" ··· 198 213 } 199 214 } 200 215 201 - struct xfs_buf * 216 + static struct xfs_buf * 202 217 _xfs_buf_alloc( 203 218 struct xfs_buftarg *target, 204 219 struct xfs_buf_map *map, ··· 228 243 sema_init(&bp->b_sema, 0); /* held, no waiters */ 229 244 spin_lock_init(&bp->b_lock); 230 245 bp->b_target = target; 246 + bp->b_mount = target->bt_mount; 231 247 bp->b_flags = flags; 232 248 233 249 /* ··· 249 263 bp->b_maps[i].bm_len = map[i].bm_len; 250 264 bp->b_length += map[i].bm_len; 251 265 } 252 - bp->b_io_length = bp->b_length; 253 266 254 267 atomic_set(&bp->b_pin_count, 0); 255 268 init_waitqueue_head(&bp->b_waiters); 256 269 257 - XFS_STATS_INC(target->bt_mount, xb_create); 270 + XFS_STATS_INC(bp->b_mount, xb_create); 258 271 trace_xfs_buf_init(bp, _RET_IP_); 259 272 260 273 return bp; ··· 410 425 current->comm, current->pid, 411 426 __func__, gfp_mask); 412 427 413 - XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries); 428 + XFS_STATS_INC(bp->b_mount, xb_page_retries); 414 429 congestion_wait(BLK_RW_ASYNC, HZ/50); 415 430 goto retry; 416 431 } 417 432 418 - XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found); 433 + XFS_STATS_INC(bp->b_mount, xb_page_found); 419 434 420 435 nbytes = min_t(size_t, size, PAGE_SIZE - offset); 421 436 size -= nbytes; ··· 894 909 return 0; 895 910 } 896 911 897 - /* 898 - * Return a buffer allocated as an empty buffer and associated to external 899 - * memory via xfs_buf_associate_memory() back to it's empty state. 900 - */ 901 - void 902 - xfs_buf_set_empty( 903 - struct xfs_buf *bp, 904 - size_t numblks) 905 - { 906 - if (bp->b_pages) 907 - _xfs_buf_free_pages(bp); 908 - 909 - bp->b_pages = NULL; 910 - bp->b_page_count = 0; 911 - bp->b_addr = NULL; 912 - bp->b_length = numblks; 913 - bp->b_io_length = numblks; 914 - 915 - ASSERT(bp->b_map_count == 1); 916 - bp->b_bn = XFS_BUF_DADDR_NULL; 917 - bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL; 918 - bp->b_maps[0].bm_len = bp->b_length; 919 - } 920 - 921 - static inline struct page * 922 - mem_to_page( 923 - void *addr) 924 - { 925 - if ((!is_vmalloc_addr(addr))) { 926 - return virt_to_page(addr); 927 - } else { 928 - return vmalloc_to_page(addr); 929 - } 930 - } 931 - 932 - int 933 - xfs_buf_associate_memory( 934 - xfs_buf_t *bp, 935 - void *mem, 936 - size_t len) 937 - { 938 - int rval; 939 - int i = 0; 940 - unsigned long pageaddr; 941 - unsigned long offset; 942 - size_t buflen; 943 - int page_count; 944 - 945 - pageaddr = (unsigned long)mem & PAGE_MASK; 946 - offset = (unsigned long)mem - pageaddr; 947 - buflen = PAGE_ALIGN(len + offset); 948 - page_count = buflen >> PAGE_SHIFT; 949 - 950 - /* Free any previous set of page pointers */ 951 - if (bp->b_pages) 952 - _xfs_buf_free_pages(bp); 953 - 954 - bp->b_pages = NULL; 955 - bp->b_addr = mem; 956 - 957 - rval = _xfs_buf_get_pages(bp, page_count); 958 - if (rval) 959 - return rval; 960 - 961 - bp->b_offset = offset; 962 - 963 - for (i = 0; i < bp->b_page_count; i++) { 964 - bp->b_pages[i] = mem_to_page((void *)pageaddr); 965 - pageaddr += PAGE_SIZE; 966 - } 967 - 968 - bp->b_io_length = BTOBB(len); 969 - bp->b_length = BTOBB(buflen); 970 - 971 - return 0; 972 - } 973 - 974 912 xfs_buf_t * 975 913 xfs_buf_get_uncached( 976 914 struct xfs_buftarg *target, ··· 1088 1180 trace_xfs_buf_lock(bp, _RET_IP_); 1089 1181 1090 1182 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 1091 - xfs_log_force(bp->b_target->bt_mount, 0); 1183 + xfs_log_force(bp->b_mount, 0); 1092 1184 down(&bp->b_sema); 1093 1185 1094 1186 trace_xfs_buf_lock_done(bp, _RET_IP_); ··· 1177 1269 struct xfs_buf *bp) 1178 1270 { 1179 1271 INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); 1180 - queue_work(bp->b_ioend_wq, &bp->b_ioend_work); 1272 + queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work); 1181 1273 } 1182 1274 1183 1275 void ··· 1196 1288 struct xfs_buf *bp, 1197 1289 const char *func) 1198 1290 { 1199 - xfs_alert(bp->b_target->bt_mount, 1291 + xfs_alert(bp->b_mount, 1200 1292 "metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d", 1201 1293 func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, 1202 1294 -bp->b_error); ··· 1215 1307 XBF_WRITE_FAIL | XBF_DONE); 1216 1308 1217 1309 error = xfs_buf_submit(bp); 1218 - if (error) { 1219 - xfs_force_shutdown(bp->b_target->bt_mount, 1220 - SHUTDOWN_META_IO_ERROR); 1221 - } 1310 + if (error) 1311 + xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); 1222 1312 return error; 1223 1313 } 1224 1314 ··· 1342 1436 */ 1343 1437 bp->b_error = 0; 1344 1438 1345 - /* 1346 - * Initialize the I/O completion workqueue if we haven't yet or the 1347 - * submitter has not opted to specify a custom one. 1348 - */ 1349 - if (!bp->b_ioend_wq) 1350 - bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue; 1351 - 1352 1439 if (bp->b_flags & XBF_WRITE) { 1353 1440 op = REQ_OP_WRITE; 1354 - if (bp->b_flags & XBF_SYNCIO) 1355 - op_flags = REQ_SYNC; 1356 - if (bp->b_flags & XBF_FUA) 1357 - op_flags |= REQ_FUA; 1358 - if (bp->b_flags & XBF_FLUSH) 1359 - op_flags |= REQ_PREFLUSH; 1360 1441 1361 1442 /* 1362 1443 * Run the write verifier callback function if it exists. If ··· 1353 1460 if (bp->b_ops) { 1354 1461 bp->b_ops->verify_write(bp); 1355 1462 if (bp->b_error) { 1356 - xfs_force_shutdown(bp->b_target->bt_mount, 1463 + xfs_force_shutdown(bp->b_mount, 1357 1464 SHUTDOWN_CORRUPT_INCORE); 1358 1465 return; 1359 1466 } 1360 1467 } else if (bp->b_bn != XFS_BUF_DADDR_NULL) { 1361 - struct xfs_mount *mp = bp->b_target->bt_mount; 1468 + struct xfs_mount *mp = bp->b_mount; 1362 1469 1363 1470 /* 1364 1471 * non-crc filesystems don't attach verifiers during ··· 1390 1497 * subsequent call. 1391 1498 */ 1392 1499 offset = bp->b_offset; 1393 - size = BBTOB(bp->b_io_length); 1500 + size = BBTOB(bp->b_length); 1394 1501 blk_start_plug(&plug); 1395 1502 for (i = 0; i < bp->b_map_count; i++) { 1396 1503 xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags); ··· 1436 1543 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); 1437 1544 1438 1545 /* on shutdown we stale and complete the buffer immediately */ 1439 - if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { 1546 + if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { 1440 1547 xfs_buf_ioerror(bp, -EIO); 1441 1548 bp->b_flags &= ~XBF_DONE; 1442 1549 xfs_buf_stale(bp); ··· 1506 1613 return page_address(page) + (offset & (PAGE_SIZE-1)); 1507 1614 } 1508 1615 1509 - /* 1510 - * Move data into or out of a buffer. 1511 - */ 1512 1616 void 1513 - xfs_buf_iomove( 1514 - xfs_buf_t *bp, /* buffer to process */ 1515 - size_t boff, /* starting buffer offset */ 1516 - size_t bsize, /* length to copy */ 1517 - void *data, /* data address */ 1518 - xfs_buf_rw_t mode) /* read/write/zero flag */ 1617 + xfs_buf_zero( 1618 + struct xfs_buf *bp, 1619 + size_t boff, 1620 + size_t bsize) 1519 1621 { 1520 1622 size_t bend; 1521 1623 ··· 1523 1635 page_offset = (boff + bp->b_offset) & ~PAGE_MASK; 1524 1636 page = bp->b_pages[page_index]; 1525 1637 csize = min_t(size_t, PAGE_SIZE - page_offset, 1526 - BBTOB(bp->b_io_length) - boff); 1638 + BBTOB(bp->b_length) - boff); 1527 1639 1528 1640 ASSERT((csize + page_offset) <= PAGE_SIZE); 1529 1641 1530 - switch (mode) { 1531 - case XBRW_ZERO: 1532 - memset(page_address(page) + page_offset, 0, csize); 1533 - break; 1534 - case XBRW_READ: 1535 - memcpy(data, page_address(page) + page_offset, csize); 1536 - break; 1537 - case XBRW_WRITE: 1538 - memcpy(page_address(page) + page_offset, data, csize); 1539 - } 1642 + memset(page_address(page) + page_offset, 0, csize); 1540 1643 1541 1644 boff += csize; 1542 - data += csize; 1543 1645 } 1544 1646 } 1545 1647 ··· 2076 2198 * This allows userspace to disrupt buffer caching for debug/testing 2077 2199 * purposes. 2078 2200 */ 2079 - if (XFS_TEST_ERROR(false, bp->b_target->bt_mount, 2080 - XFS_ERRTAG_BUF_LRU_REF)) 2201 + if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF)) 2081 2202 lru_ref = 0; 2082 2203 2083 2204 atomic_set(&bp->b_lru_ref, lru_ref); ··· 2092 2215 struct xfs_buf *bp, 2093 2216 __be32 dmagic) 2094 2217 { 2095 - struct xfs_mount *mp = bp->b_target->bt_mount; 2218 + struct xfs_mount *mp = bp->b_mount; 2096 2219 int idx; 2097 2220 2098 2221 idx = xfs_sb_version_hascrc(&mp->m_sb); ··· 2110 2233 struct xfs_buf *bp, 2111 2234 __be16 dmagic) 2112 2235 { 2113 - struct xfs_mount *mp = bp->b_target->bt_mount; 2236 + struct xfs_mount *mp = bp->b_mount; 2114 2237 int idx; 2115 2238 2116 2239 idx = xfs_sb_version_hascrc(&mp->m_sb);
+7 -46
fs/xfs/xfs_buf.h
··· 21 21 22 22 #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) 23 23 24 - typedef enum { 25 - XBRW_READ = 1, /* transfer into target memory */ 26 - XBRW_WRITE = 2, /* transfer from target memory */ 27 - XBRW_ZERO = 3, /* Zero target memory */ 28 - } xfs_buf_rw_t; 29 - 30 24 #define XBF_READ (1 << 0) /* buffer intended for reading from device */ 31 25 #define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ 32 26 #define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ ··· 28 34 #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ 29 35 #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ 30 36 #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ 31 - #define XBF_WRITE_FAIL (1 << 24)/* async writes have failed on this buffer */ 32 - 33 - /* I/O hints for the BIO layer */ 34 - #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ 35 - #define XBF_FUA (1 << 11)/* force cache write through mode */ 36 - #define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ 37 + #define XBF_WRITE_FAIL (1 << 7) /* async writes have failed on this buffer */ 37 38 38 39 /* flags used only as arguments to access routines */ 39 40 #define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ ··· 38 49 #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ 39 50 #define _XBF_KMEM (1 << 21)/* backed by heap memory */ 40 51 #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ 41 - #define _XBF_COMPOUND (1 << 23)/* compound buffer */ 42 52 43 53 typedef unsigned int xfs_buf_flags_t; 44 54 ··· 50 62 { XBF_DONE, "DONE" }, \ 51 63 { XBF_STALE, "STALE" }, \ 52 64 { XBF_WRITE_FAIL, "WRITE_FAIL" }, \ 53 - { XBF_SYNCIO, "SYNCIO" }, \ 54 - { XBF_FUA, "FUA" }, \ 55 - { XBF_FLUSH, "FLUSH" }, \ 56 65 { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ 57 66 { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ 58 67 { _XBF_PAGES, "PAGES" }, \ 59 68 { _XBF_KMEM, "KMEM" }, \ 60 - { _XBF_DELWRI_Q, "DELWRI_Q" }, \ 61 - { _XBF_COMPOUND, "COMPOUND" } 69 + { _XBF_DELWRI_Q, "DELWRI_Q" } 62 70 63 71 64 72 /* ··· 145 161 wait_queue_head_t b_waiters; /* unpin waiters */ 146 162 struct list_head b_list; 147 163 struct xfs_perag *b_pag; /* contains rbtree root */ 164 + struct xfs_mount *b_mount; 148 165 xfs_buftarg_t *b_target; /* buffer target (device) */ 149 166 void *b_addr; /* virtual address of buffer */ 150 167 struct work_struct b_ioend_work; 151 - struct workqueue_struct *b_ioend_wq; /* I/O completion wq */ 152 168 xfs_buf_iodone_t b_iodone; /* I/O completion function */ 153 169 struct completion b_iowait; /* queue for I/O waiters */ 154 - void *b_log_item; 170 + struct xfs_buf_log_item *b_log_item; 155 171 struct list_head b_li_list; /* Log items list head */ 156 172 struct xfs_trans *b_transp; 157 173 struct page **b_pages; /* array of page pointers */ ··· 159 175 struct xfs_buf_map *b_maps; /* compound buffer map */ 160 176 struct xfs_buf_map __b_map; /* inline compound buffer map */ 161 177 int b_map_count; 162 - int b_io_length; /* IO size in BBs */ 163 178 atomic_t b_pin_count; /* pin count */ 164 179 atomic_t b_io_remaining; /* #outstanding I/O requests */ 165 180 unsigned int b_page_count; /* size of page array */ ··· 192 209 xfs_daddr_t blkno, size_t numblks, 193 210 xfs_buf_flags_t flags); 194 211 195 - struct xfs_buf *_xfs_buf_alloc(struct xfs_buftarg *target, 196 - struct xfs_buf_map *map, int nmaps, 197 - xfs_buf_flags_t flags); 198 - 199 - static inline struct xfs_buf * 200 - xfs_buf_alloc( 201 - struct xfs_buftarg *target, 202 - xfs_daddr_t blkno, 203 - size_t numblks, 204 - xfs_buf_flags_t flags) 205 - { 206 - DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); 207 - return _xfs_buf_alloc(target, &map, 1, flags); 208 - } 209 - 210 212 struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target, 211 213 struct xfs_buf_map *map, int nmaps, 212 214 xfs_buf_flags_t flags); ··· 207 239 xfs_buf_get( 208 240 struct xfs_buftarg *target, 209 241 xfs_daddr_t blkno, 210 - size_t numblks, 211 - xfs_buf_flags_t flags) 242 + size_t numblks) 212 243 { 213 244 DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); 214 - return xfs_buf_get_map(target, &map, 1, flags); 245 + return xfs_buf_get_map(target, &map, 1, 0); 215 246 } 216 247 217 248 static inline struct xfs_buf * ··· 235 268 DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); 236 269 return xfs_buf_readahead_map(target, &map, 1, ops); 237 270 } 238 - 239 - void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks); 240 - int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length); 241 271 242 272 struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, 243 273 int flags); ··· 269 305 return __xfs_buf_submit(bp, wait); 270 306 } 271 307 272 - extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, 273 - xfs_buf_rw_t); 274 - #define xfs_buf_zero(bp, off, len) \ 275 - xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) 308 + void xfs_buf_zero(struct xfs_buf *bp, size_t boff, size_t bsize); 276 309 277 310 /* Buffer Utility Routines */ 278 311 extern void *xfs_buf_offset(struct xfs_buf *, size_t);
+18 -22
fs/xfs/xfs_buf_item.c
··· 5 5 */ 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 11 #include "xfs_trans_resv.h" 11 12 #include "xfs_bit.h" 12 - #include "xfs_sb.h" 13 13 #include "xfs_mount.h" 14 14 #include "xfs_trans.h" 15 15 #include "xfs_buf_item.h" 16 16 #include "xfs_trans_priv.h" 17 - #include "xfs_error.h" 18 17 #include "xfs_trace.h" 19 18 #include "xfs_log.h" 20 - #include "xfs_inode.h" 21 19 22 20 23 21 kmem_zone_t *xfs_buf_item_zone; ··· 518 520 /* has a previous flush failed due to IO errors? */ 519 521 if ((bp->b_flags & XBF_WRITE_FAIL) && 520 522 ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) { 521 - xfs_warn(bp->b_target->bt_mount, 523 + xfs_warn(bp->b_mount, 522 524 "Failing async write on buffer block 0x%llx. Retrying async write.", 523 525 (long long)bp->b_bn); 524 526 } ··· 592 594 * free the item. 593 595 */ 594 596 STATIC void 595 - xfs_buf_item_unlock( 597 + xfs_buf_item_release( 596 598 struct xfs_log_item *lip) 597 599 { 598 600 struct xfs_buf_log_item *bip = BUF_ITEM(lip); ··· 607 609 &lip->li_flags); 608 610 #endif 609 611 610 - trace_xfs_buf_item_unlock(bip); 612 + trace_xfs_buf_item_release(bip); 611 613 612 614 /* 613 615 * The bli dirty state should match whether the blf has logged segments ··· 635 637 return; 636 638 ASSERT(!stale || aborted); 637 639 xfs_buf_relse(bp); 640 + } 641 + 642 + STATIC void 643 + xfs_buf_item_committing( 644 + struct xfs_log_item *lip, 645 + xfs_lsn_t commit_lsn) 646 + { 647 + return xfs_buf_item_release(lip); 638 648 } 639 649 640 650 /* ··· 677 671 return lsn; 678 672 } 679 673 680 - STATIC void 681 - xfs_buf_item_committing( 682 - struct xfs_log_item *lip, 683 - xfs_lsn_t commit_lsn) 684 - { 685 - } 686 - 687 - /* 688 - * This is the ops vector shared by all buf log items. 689 - */ 690 674 static const struct xfs_item_ops xfs_buf_item_ops = { 691 675 .iop_size = xfs_buf_item_size, 692 676 .iop_format = xfs_buf_item_format, 693 677 .iop_pin = xfs_buf_item_pin, 694 678 .iop_unpin = xfs_buf_item_unpin, 695 - .iop_unlock = xfs_buf_item_unlock, 679 + .iop_release = xfs_buf_item_release, 680 + .iop_committing = xfs_buf_item_committing, 696 681 .iop_committed = xfs_buf_item_committed, 697 682 .iop_push = xfs_buf_item_push, 698 - .iop_committing = xfs_buf_item_committing 699 683 }; 700 684 701 685 STATIC int ··· 739 743 * this buffer. If we do already have one, there is 740 744 * nothing to do here so return. 741 745 */ 742 - ASSERT(bp->b_target->bt_mount == mp); 746 + ASSERT(bp->b_mount == mp); 743 747 if (bip) { 744 748 ASSERT(bip->bli_item.li_type == XFS_LI_BUF); 745 749 ASSERT(!bp->b_transp); ··· 976 980 */ 977 981 void 978 982 xfs_buf_attach_iodone( 979 - xfs_buf_t *bp, 980 - void (*cb)(xfs_buf_t *, xfs_log_item_t *), 981 - xfs_log_item_t *lip) 983 + struct xfs_buf *bp, 984 + void (*cb)(struct xfs_buf *, struct xfs_log_item *), 985 + struct xfs_log_item *lip) 982 986 { 983 987 ASSERT(xfs_buf_islocked(bp)); 984 988
+3 -3
fs/xfs/xfs_buf_item.h
··· 39 39 * locked, and which 128 byte chunks of the buffer are dirty. 40 40 */ 41 41 struct xfs_buf_log_item { 42 - xfs_log_item_t bli_item; /* common item structure */ 42 + struct xfs_log_item bli_item; /* common item structure */ 43 43 struct xfs_buf *bli_buf; /* real buffer pointer */ 44 44 unsigned int bli_flags; /* misc flags */ 45 45 unsigned int bli_recur; /* lock recursion count */ ··· 55 55 void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); 56 56 bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); 57 57 void xfs_buf_attach_iodone(struct xfs_buf *, 58 - void(*)(struct xfs_buf *, xfs_log_item_t *), 59 - xfs_log_item_t *); 58 + void(*)(struct xfs_buf *, struct xfs_log_item *), 59 + struct xfs_log_item *); 60 60 void xfs_buf_iodone_callbacks(struct xfs_buf *); 61 61 void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); 62 62 bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *,
+1 -4
fs/xfs/xfs_dir2_readdir.c
··· 6 6 */ 7 7 #include "xfs.h" 8 8 #include "xfs_fs.h" 9 + #include "xfs_shared.h" 9 10 #include "xfs_format.h" 10 11 #include "xfs_log_format.h" 11 12 #include "xfs_trans_resv.h" 12 - #include "xfs_bit.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_da_format.h" 15 - #include "xfs_da_btree.h" 16 14 #include "xfs_inode.h" 17 15 #include "xfs_dir2.h" 18 16 #include "xfs_dir2_priv.h" 19 - #include "xfs_error.h" 20 17 #include "xfs_trace.h" 21 18 #include "xfs_bmap.h" 22 19 #include "xfs_trans.h"
+1 -3
fs/xfs/xfs_discard.c
··· 4 4 * All Rights Reserved. 5 5 */ 6 6 #include "xfs.h" 7 + #include "xfs_shared.h" 7 8 #include "xfs_format.h" 8 9 #include "xfs_log_format.h" 9 10 #include "xfs_trans_resv.h" 10 11 #include "xfs_sb.h" 11 12 #include "xfs_mount.h" 12 - #include "xfs_quota.h" 13 - #include "xfs_inode.h" 14 13 #include "xfs_btree.h" 15 14 #include "xfs_alloc_btree.h" 16 15 #include "xfs_alloc.h" 17 16 #include "xfs_error.h" 18 17 #include "xfs_extent_busy.h" 19 - #include "xfs_discard.h" 20 18 #include "xfs_trace.h" 21 19 #include "xfs_log.h" 22 20
+1 -5
fs/xfs/xfs_dquot.c
··· 14 14 #include "xfs_defer.h" 15 15 #include "xfs_inode.h" 16 16 #include "xfs_bmap.h" 17 - #include "xfs_bmap_util.h" 18 - #include "xfs_alloc.h" 19 17 #include "xfs_quota.h" 20 - #include "xfs_error.h" 21 18 #include "xfs_trans.h" 22 19 #include "xfs_buf_item.h" 23 20 #include "xfs_trans_space.h" 24 21 #include "xfs_trans_priv.h" 25 22 #include "xfs_qm.h" 26 - #include "xfs_cksum.h" 27 23 #include "xfs_trace.h" 28 24 #include "xfs_log.h" 29 25 #include "xfs_bmap_btree.h" ··· 1239 1243 /* 1240 1244 * Iterate every dquot of a particular type. The caller must ensure that the 1241 1245 * particular quota type is active. iter_fn can return negative error codes, 1242 - * or XFS_BTREE_QUERY_RANGE_ABORT to indicate that it wants to stop iterating. 1246 + * or XFS_ITER_ABORT to indicate that it wants to stop iterating. 1243 1247 */ 1244 1248 int 1245 1249 xfs_qm_dqiterate(
-1
fs/xfs/xfs_dquot.h
··· 34 34 uint dq_flags; /* various flags (XFS_DQ_*) */ 35 35 struct list_head q_lru; /* global free list of dquots */ 36 36 struct xfs_mount*q_mount; /* filesystem this relates to */ 37 - struct xfs_trans*q_transp; /* trans this belongs to currently */ 38 37 uint q_nrefs; /* # active refs from inodes */ 39 38 xfs_daddr_t q_blkno; /* blkno of dquot buffer */ 40 39 int q_bufoffset; /* off of dq in buffer (# dquots) */
+6 -112
fs/xfs/xfs_dquot_item.c
··· 5 5 */ 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 11 #include "xfs_trans_resv.h" 11 12 #include "xfs_mount.h" 12 13 #include "xfs_inode.h" 13 14 #include "xfs_quota.h" 14 - #include "xfs_error.h" 15 15 #include "xfs_trans.h" 16 16 #include "xfs_buf_item.h" 17 17 #include "xfs_trans_priv.h" ··· 92 92 ASSERT(atomic_read(&dqp->q_pincount) > 0); 93 93 if (atomic_dec_and_test(&dqp->q_pincount)) 94 94 wake_up(&dqp->q_pinwait); 95 - } 96 - 97 - STATIC xfs_lsn_t 98 - xfs_qm_dquot_logitem_committed( 99 - struct xfs_log_item *lip, 100 - xfs_lsn_t lsn) 101 - { 102 - /* 103 - * We always re-log the entire dquot when it becomes dirty, 104 - * so, the latest copy _is_ the only one that matters. 105 - */ 106 - return lsn; 107 95 } 108 96 109 97 /* ··· 197 209 return rval; 198 210 } 199 211 200 - /* 201 - * Unlock the dquot associated with the log item. 202 - * Clear the fields of the dquot and dquot log item that 203 - * are specific to the current transaction. If the 204 - * hold flags is set, do not unlock the dquot. 205 - */ 206 212 STATIC void 207 - xfs_qm_dquot_logitem_unlock( 213 + xfs_qm_dquot_logitem_release( 208 214 struct xfs_log_item *lip) 209 215 { 210 216 struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; 211 217 212 218 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 213 - 214 - /* 215 - * Clear the transaction pointer in the dquot 216 - */ 217 - dqp->q_transp = NULL; 218 219 219 220 /* 220 221 * dquots are never 'held' from getting unlocked at the end of ··· 214 237 xfs_dqunlock(dqp); 215 238 } 216 239 217 - /* 218 - * this needs to stamp an lsn into the dquot, I think. 219 - * rpc's that look at user dquot's would then have to 220 - * push on the dependency recorded in the dquot 221 - */ 222 240 STATIC void 223 241 xfs_qm_dquot_logitem_committing( 224 242 struct xfs_log_item *lip, 225 - xfs_lsn_t lsn) 243 + xfs_lsn_t commit_lsn) 226 244 { 245 + return xfs_qm_dquot_logitem_release(lip); 227 246 } 228 247 229 - /* 230 - * This is the ops vector for dquots 231 - */ 232 248 static const struct xfs_item_ops xfs_dquot_item_ops = { 233 249 .iop_size = xfs_qm_dquot_logitem_size, 234 250 .iop_format = xfs_qm_dquot_logitem_format, 235 251 .iop_pin = xfs_qm_dquot_logitem_pin, 236 252 .iop_unpin = xfs_qm_dquot_logitem_unpin, 237 - .iop_unlock = xfs_qm_dquot_logitem_unlock, 238 - .iop_committed = xfs_qm_dquot_logitem_committed, 253 + .iop_release = xfs_qm_dquot_logitem_release, 254 + .iop_committing = xfs_qm_dquot_logitem_committing, 239 255 .iop_push = xfs_qm_dquot_logitem_push, 240 - .iop_committing = xfs_qm_dquot_logitem_committing, 241 256 .iop_error = xfs_dquot_item_error 242 257 }; 243 258 ··· 289 320 } 290 321 291 322 /* 292 - * Pinning has no meaning for an quotaoff item, so just return. 293 - */ 294 - STATIC void 295 - xfs_qm_qoff_logitem_pin( 296 - struct xfs_log_item *lip) 297 - { 298 - } 299 - 300 - /* 301 - * Since pinning has no meaning for an quotaoff item, unpinning does 302 - * not either. 303 - */ 304 - STATIC void 305 - xfs_qm_qoff_logitem_unpin( 306 - struct xfs_log_item *lip, 307 - int remove) 308 - { 309 - } 310 - 311 - /* 312 323 * There isn't much you can do to push a quotaoff item. It is simply 313 324 * stuck waiting for the log to be flushed to disk. 314 325 */ ··· 298 349 struct list_head *buffer_list) 299 350 { 300 351 return XFS_ITEM_LOCKED; 301 - } 302 - 303 - /* 304 - * Quotaoff items have no locking or pushing, so return failure 305 - * so that the caller doesn't bother with us. 306 - */ 307 - STATIC void 308 - xfs_qm_qoff_logitem_unlock( 309 - struct xfs_log_item *lip) 310 - { 311 - } 312 - 313 - /* 314 - * The quotaoff-start-item is logged only once and cannot be moved in the log, 315 - * so simply return the lsn at which it's been logged. 316 - */ 317 - STATIC xfs_lsn_t 318 - xfs_qm_qoff_logitem_committed( 319 - struct xfs_log_item *lip, 320 - xfs_lsn_t lsn) 321 - { 322 - return lsn; 323 352 } 324 353 325 354 STATIC xfs_lsn_t ··· 323 396 return (xfs_lsn_t)-1; 324 397 } 325 398 326 - /* 327 - * XXX rcc - don't know quite what to do with this. I think we can 328 - * just ignore it. The only time that isn't the case is if we allow 329 - * the client to somehow see that quotas have been turned off in which 330 - * we can't allow that to get back until the quotaoff hits the disk. 331 - * So how would that happen? Also, do we need different routines for 332 - * quotaoff start and quotaoff end? I suspect the answer is yes but 333 - * to be sure, I need to look at the recovery code and see how quota off 334 - * recovery is handled (do we roll forward or back or do something else). 335 - * If we roll forwards or backwards, then we need two separate routines, 336 - * one that does nothing and one that stamps in the lsn that matters 337 - * (truly makes the quotaoff irrevocable). If we do something else, 338 - * then maybe we don't need two. 339 - */ 340 - STATIC void 341 - xfs_qm_qoff_logitem_committing( 342 - struct xfs_log_item *lip, 343 - xfs_lsn_t commit_lsn) 344 - { 345 - } 346 - 347 399 static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { 348 400 .iop_size = xfs_qm_qoff_logitem_size, 349 401 .iop_format = xfs_qm_qoff_logitem_format, 350 - .iop_pin = xfs_qm_qoff_logitem_pin, 351 - .iop_unpin = xfs_qm_qoff_logitem_unpin, 352 - .iop_unlock = xfs_qm_qoff_logitem_unlock, 353 402 .iop_committed = xfs_qm_qoffend_logitem_committed, 354 403 .iop_push = xfs_qm_qoff_logitem_push, 355 - .iop_committing = xfs_qm_qoff_logitem_committing 356 404 }; 357 405 358 - /* 359 - * This is the ops vector shared by all quotaoff-start log items. 360 - */ 361 406 static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = { 362 407 .iop_size = xfs_qm_qoff_logitem_size, 363 408 .iop_format = xfs_qm_qoff_logitem_format, 364 - .iop_pin = xfs_qm_qoff_logitem_pin, 365 - .iop_unpin = xfs_qm_qoff_logitem_unpin, 366 - .iop_unlock = xfs_qm_qoff_logitem_unlock, 367 - .iop_committed = xfs_qm_qoff_logitem_committed, 368 409 .iop_push = xfs_qm_qoff_logitem_push, 369 - .iop_committing = xfs_qm_qoff_logitem_committing 370 410 }; 371 411 372 412 /*
+2 -2
fs/xfs/xfs_dquot_item.h
··· 12 12 struct xfs_qoff_logitem; 13 13 14 14 typedef struct xfs_dq_logitem { 15 - xfs_log_item_t qli_item; /* common portion */ 15 + struct xfs_log_item qli_item; /* common portion */ 16 16 struct xfs_dquot *qli_dquot; /* dquot ptr */ 17 17 xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ 18 18 } xfs_dq_logitem_t; 19 19 20 20 typedef struct xfs_qoff_logitem { 21 - xfs_log_item_t qql_item; /* common portion */ 21 + struct xfs_log_item qql_item; /* common portion */ 22 22 struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ 23 23 unsigned int qql_flags; 24 24 } xfs_qoff_logitem_t;
+2 -1
fs/xfs/xfs_error.c
··· 4 4 * All Rights Reserved. 5 5 */ 6 6 #include "xfs.h" 7 + #include "xfs_shared.h" 7 8 #include "xfs_format.h" 8 9 #include "xfs_fs.h" 9 10 #include "xfs_log_format.h" ··· 354 353 size_t bufsz, 355 354 xfs_failaddr_t failaddr) 356 355 { 357 - struct xfs_mount *mp = bp->b_target->bt_mount; 356 + struct xfs_mount *mp = bp->b_mount; 358 357 xfs_failaddr_t fa; 359 358 int sz; 360 359
+1 -3
fs/xfs/xfs_export.c
··· 4 4 * All Rights Reserved. 5 5 */ 6 6 #include "xfs.h" 7 + #include "xfs_shared.h" 7 8 #include "xfs_format.h" 8 9 #include "xfs_log_format.h" 9 10 #include "xfs_trans_resv.h" 10 11 #include "xfs_mount.h" 11 - #include "xfs_da_format.h" 12 - #include "xfs_da_btree.h" 13 12 #include "xfs_dir2.h" 14 13 #include "xfs_export.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_trans.h" 17 16 #include "xfs_inode_item.h" 18 - #include "xfs_trace.h" 19 17 #include "xfs_icache.h" 20 18 #include "xfs_log.h" 21 19 #include "xfs_pnfs.h"
+266 -158
fs/xfs/xfs_extfree_item.c
··· 9 9 #include "xfs_log_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_bit.h" 12 + #include "xfs_shared.h" 12 13 #include "xfs_mount.h" 14 + #include "xfs_defer.h" 13 15 #include "xfs_trans.h" 14 16 #include "xfs_trans_priv.h" 15 - #include "xfs_buf_item.h" 16 17 #include "xfs_extfree_item.h" 17 18 #include "xfs_log.h" 18 19 #include "xfs_btree.h" 19 20 #include "xfs_rmap.h" 21 + #include "xfs_alloc.h" 22 + #include "xfs_bmap.h" 23 + #include "xfs_trace.h" 20 24 21 25 22 26 kmem_zone_t *xfs_efi_zone; ··· 111 107 112 108 113 109 /* 114 - * Pinning has no meaning for an efi item, so just return. 115 - */ 116 - STATIC void 117 - xfs_efi_item_pin( 118 - struct xfs_log_item *lip) 119 - { 120 - } 121 - 122 - /* 123 110 * The unpin operation is the last place an EFI is manipulated in the log. It is 124 111 * either inserted in the AIL or aborted in the event of a log I/O error. In 125 112 * either case, the EFI transaction has been successfully committed to make it ··· 128 133 } 129 134 130 135 /* 131 - * Efi items have no locking or pushing. However, since EFIs are pulled from 132 - * the AIL when their corresponding EFDs are committed to disk, their situation 133 - * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller 134 - * will eventually flush the log. This should help in getting the EFI out of 135 - * the AIL. 136 - */ 137 - STATIC uint 138 - xfs_efi_item_push( 139 - struct xfs_log_item *lip, 140 - struct list_head *buffer_list) 141 - { 142 - return XFS_ITEM_PINNED; 143 - } 144 - 145 - /* 146 136 * The EFI has been either committed or aborted if the transaction has been 147 137 * cancelled. If the transaction was cancelled, an EFD isn't going to be 148 138 * constructed and thus we free the EFI here directly. 149 139 */ 150 140 STATIC void 151 - xfs_efi_item_unlock( 141 + xfs_efi_item_release( 152 142 struct xfs_log_item *lip) 153 143 { 154 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) 155 - xfs_efi_release(EFI_ITEM(lip)); 144 + xfs_efi_release(EFI_ITEM(lip)); 156 145 } 157 146 158 - /* 159 - * The EFI is logged only once and cannot be moved in the log, so simply return 160 - * the lsn at which it's been logged. 161 - */ 162 - STATIC xfs_lsn_t 163 - xfs_efi_item_committed( 164 - struct xfs_log_item *lip, 165 - xfs_lsn_t lsn) 166 - { 167 - return lsn; 168 - } 169 - 170 - /* 171 - * The EFI dependency tracking op doesn't do squat. It can't because 172 - * it doesn't know where the free extent is coming from. The dependency 173 - * tracking has to be handled by the "enclosing" metadata object. For 174 - * example, for inodes, the inode is locked throughout the extent freeing 175 - * so the dependency should be recorded there. 176 - */ 177 - STATIC void 178 - xfs_efi_item_committing( 179 - struct xfs_log_item *lip, 180 - xfs_lsn_t lsn) 181 - { 182 - } 183 - 184 - /* 185 - * This is the ops vector shared by all efi log items. 186 - */ 187 147 static const struct xfs_item_ops xfs_efi_item_ops = { 188 148 .iop_size = xfs_efi_item_size, 189 149 .iop_format = xfs_efi_item_format, 190 - .iop_pin = xfs_efi_item_pin, 191 150 .iop_unpin = xfs_efi_item_unpin, 192 - .iop_unlock = xfs_efi_item_unlock, 193 - .iop_committed = xfs_efi_item_committed, 194 - .iop_push = xfs_efi_item_push, 195 - .iop_committing = xfs_efi_item_committing 151 + .iop_release = xfs_efi_item_release, 196 152 }; 197 153 198 154 ··· 295 349 } 296 350 297 351 /* 298 - * Pinning has no meaning for an efd item, so just return. 299 - */ 300 - STATIC void 301 - xfs_efd_item_pin( 302 - struct xfs_log_item *lip) 303 - { 304 - } 305 - 306 - /* 307 - * Since pinning has no meaning for an efd item, unpinning does 308 - * not either. 309 - */ 310 - STATIC void 311 - xfs_efd_item_unpin( 312 - struct xfs_log_item *lip, 313 - int remove) 314 - { 315 - } 316 - 317 - /* 318 - * There isn't much you can do to push on an efd item. It is simply stuck 319 - * waiting for the log to be flushed to disk. 320 - */ 321 - STATIC uint 322 - xfs_efd_item_push( 323 - struct xfs_log_item *lip, 324 - struct list_head *buffer_list) 325 - { 326 - return XFS_ITEM_PINNED; 327 - } 328 - 329 - /* 330 352 * The EFD is either committed or aborted if the transaction is cancelled. If 331 353 * the transaction is cancelled, drop our reference to the EFI and free the EFD. 332 354 */ 333 355 STATIC void 334 - xfs_efd_item_unlock( 356 + xfs_efd_item_release( 335 357 struct xfs_log_item *lip) 336 358 { 337 359 struct xfs_efd_log_item *efdp = EFD_ITEM(lip); 338 360 339 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { 340 - xfs_efi_release(efdp->efd_efip); 341 - xfs_efd_item_free(efdp); 342 - } 343 - } 344 - 345 - /* 346 - * When the efd item is committed to disk, all we need to do is delete our 347 - * reference to our partner efi item and then free ourselves. Since we're 348 - * freeing ourselves we must return -1 to keep the transaction code from further 349 - * referencing this item. 350 - */ 351 - STATIC xfs_lsn_t 352 - xfs_efd_item_committed( 353 - struct xfs_log_item *lip, 354 - xfs_lsn_t lsn) 355 - { 356 - struct xfs_efd_log_item *efdp = EFD_ITEM(lip); 357 - 358 - /* 359 - * Drop the EFI reference regardless of whether the EFD has been 360 - * aborted. Once the EFD transaction is constructed, it is the sole 361 - * responsibility of the EFD to release the EFI (even if the EFI is 362 - * aborted due to log I/O error). 363 - */ 364 361 xfs_efi_release(efdp->efd_efip); 365 362 xfs_efd_item_free(efdp); 366 - 367 - return (xfs_lsn_t)-1; 368 363 } 369 364 370 - /* 371 - * The EFD dependency tracking op doesn't do squat. It can't because 372 - * it doesn't know where the free extent is coming from. The dependency 373 - * tracking has to be handled by the "enclosing" metadata object. For 374 - * example, for inodes, the inode is locked throughout the extent freeing 375 - * so the dependency should be recorded there. 376 - */ 377 - STATIC void 378 - xfs_efd_item_committing( 379 - struct xfs_log_item *lip, 380 - xfs_lsn_t lsn) 381 - { 382 - } 383 - 384 - /* 385 - * This is the ops vector shared by all efd log items. 386 - */ 387 365 static const struct xfs_item_ops xfs_efd_item_ops = { 366 + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, 388 367 .iop_size = xfs_efd_item_size, 389 368 .iop_format = xfs_efd_item_format, 390 - .iop_pin = xfs_efd_item_pin, 391 - .iop_unpin = xfs_efd_item_unpin, 392 - .iop_unlock = xfs_efd_item_unlock, 393 - .iop_committed = xfs_efd_item_committed, 394 - .iop_push = xfs_efd_item_push, 395 - .iop_committing = xfs_efd_item_committing 369 + .iop_release = xfs_efd_item_release, 396 370 }; 397 371 398 372 /* 399 - * Allocate and initialize an efd item with the given number of extents. 373 + * Allocate an "extent free done" log item that will hold nextents worth of 374 + * extents. The caller must use all nextents extents, because we are not 375 + * flexible about this at all. 400 376 */ 401 - struct xfs_efd_log_item * 402 - xfs_efd_init( 403 - struct xfs_mount *mp, 404 - struct xfs_efi_log_item *efip, 405 - uint nextents) 406 - 377 + static struct xfs_efd_log_item * 378 + xfs_trans_get_efd( 379 + struct xfs_trans *tp, 380 + struct xfs_efi_log_item *efip, 381 + unsigned int nextents) 407 382 { 408 - struct xfs_efd_log_item *efdp; 409 - uint size; 383 + struct xfs_efd_log_item *efdp; 410 384 411 385 ASSERT(nextents > 0); 386 + 412 387 if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { 413 - size = (uint)(sizeof(xfs_efd_log_item_t) + 414 - ((nextents - 1) * sizeof(xfs_extent_t))); 415 - efdp = kmem_zalloc(size, KM_SLEEP); 388 + efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) + 389 + (nextents - 1) * sizeof(struct xfs_extent), 390 + KM_SLEEP); 416 391 } else { 417 392 efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP); 418 393 } 419 394 420 - xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops); 395 + xfs_log_item_init(tp->t_mountp, &efdp->efd_item, XFS_LI_EFD, 396 + &xfs_efd_item_ops); 421 397 efdp->efd_efip = efip; 422 398 efdp->efd_format.efd_nextents = nextents; 423 399 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; 424 400 401 + xfs_trans_add_item(tp, &efdp->efd_item); 425 402 return efdp; 426 403 } 404 + 405 + /* 406 + * Free an extent and log it to the EFD. Note that the transaction is marked 407 + * dirty regardless of whether the extent free succeeds or fails to support the 408 + * EFI/EFD lifecycle rules. 409 + */ 410 + static int 411 + xfs_trans_free_extent( 412 + struct xfs_trans *tp, 413 + struct xfs_efd_log_item *efdp, 414 + xfs_fsblock_t start_block, 415 + xfs_extlen_t ext_len, 416 + const struct xfs_owner_info *oinfo, 417 + bool skip_discard) 418 + { 419 + struct xfs_mount *mp = tp->t_mountp; 420 + struct xfs_extent *extp; 421 + uint next_extent; 422 + xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block); 423 + xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, 424 + start_block); 425 + int error; 426 + 427 + trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); 428 + 429 + error = __xfs_free_extent(tp, start_block, ext_len, 430 + oinfo, XFS_AG_RESV_NONE, skip_discard); 431 + /* 432 + * Mark the transaction dirty, even on error. This ensures the 433 + * transaction is aborted, which: 434 + * 435 + * 1.) releases the EFI and frees the EFD 436 + * 2.) shuts down the filesystem 437 + */ 438 + tp->t_flags |= XFS_TRANS_DIRTY; 439 + set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); 440 + 441 + next_extent = efdp->efd_next_extent; 442 + ASSERT(next_extent < efdp->efd_format.efd_nextents); 443 + extp = &(efdp->efd_format.efd_extents[next_extent]); 444 + extp->ext_start = start_block; 445 + extp->ext_len = ext_len; 446 + efdp->efd_next_extent++; 447 + 448 + return error; 449 + } 450 + 451 + /* Sort bmap items by AG. */ 452 + static int 453 + xfs_extent_free_diff_items( 454 + void *priv, 455 + struct list_head *a, 456 + struct list_head *b) 457 + { 458 + struct xfs_mount *mp = priv; 459 + struct xfs_extent_free_item *ra; 460 + struct xfs_extent_free_item *rb; 461 + 462 + ra = container_of(a, struct xfs_extent_free_item, xefi_list); 463 + rb = container_of(b, struct xfs_extent_free_item, xefi_list); 464 + return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) - 465 + XFS_FSB_TO_AGNO(mp, rb->xefi_startblock); 466 + } 467 + 468 + /* Get an EFI. */ 469 + STATIC void * 470 + xfs_extent_free_create_intent( 471 + struct xfs_trans *tp, 472 + unsigned int count) 473 + { 474 + struct xfs_efi_log_item *efip; 475 + 476 + ASSERT(tp != NULL); 477 + ASSERT(count > 0); 478 + 479 + efip = xfs_efi_init(tp->t_mountp, count); 480 + ASSERT(efip != NULL); 481 + 482 + /* 483 + * Get a log_item_desc to point at the new item. 484 + */ 485 + xfs_trans_add_item(tp, &efip->efi_item); 486 + return efip; 487 + } 488 + 489 + /* Log a free extent to the intent item. */ 490 + STATIC void 491 + xfs_extent_free_log_item( 492 + struct xfs_trans *tp, 493 + void *intent, 494 + struct list_head *item) 495 + { 496 + struct xfs_efi_log_item *efip = intent; 497 + struct xfs_extent_free_item *free; 498 + uint next_extent; 499 + struct xfs_extent *extp; 500 + 501 + free = container_of(item, struct xfs_extent_free_item, xefi_list); 502 + 503 + tp->t_flags |= XFS_TRANS_DIRTY; 504 + set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags); 505 + 506 + /* 507 + * atomic_inc_return gives us the value after the increment; 508 + * we want to use it as an array index so we need to subtract 1 from 509 + * it. 510 + */ 511 + next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; 512 + ASSERT(next_extent < efip->efi_format.efi_nextents); 513 + extp = &efip->efi_format.efi_extents[next_extent]; 514 + extp->ext_start = free->xefi_startblock; 515 + extp->ext_len = free->xefi_blockcount; 516 + } 517 + 518 + /* Get an EFD so we can process all the free extents. */ 519 + STATIC void * 520 + xfs_extent_free_create_done( 521 + struct xfs_trans *tp, 522 + void *intent, 523 + unsigned int count) 524 + { 525 + return xfs_trans_get_efd(tp, intent, count); 526 + } 527 + 528 + /* Process a free extent. */ 529 + STATIC int 530 + xfs_extent_free_finish_item( 531 + struct xfs_trans *tp, 532 + struct list_head *item, 533 + void *done_item, 534 + void **state) 535 + { 536 + struct xfs_extent_free_item *free; 537 + int error; 538 + 539 + free = container_of(item, struct xfs_extent_free_item, xefi_list); 540 + error = xfs_trans_free_extent(tp, done_item, 541 + free->xefi_startblock, 542 + free->xefi_blockcount, 543 + &free->xefi_oinfo, free->xefi_skip_discard); 544 + kmem_free(free); 545 + return error; 546 + } 547 + 548 + /* Abort all pending EFIs. */ 549 + STATIC void 550 + xfs_extent_free_abort_intent( 551 + void *intent) 552 + { 553 + xfs_efi_release(intent); 554 + } 555 + 556 + /* Cancel a free extent. */ 557 + STATIC void 558 + xfs_extent_free_cancel_item( 559 + struct list_head *item) 560 + { 561 + struct xfs_extent_free_item *free; 562 + 563 + free = container_of(item, struct xfs_extent_free_item, xefi_list); 564 + kmem_free(free); 565 + } 566 + 567 + const struct xfs_defer_op_type xfs_extent_free_defer_type = { 568 + .max_items = XFS_EFI_MAX_FAST_EXTENTS, 569 + .diff_items = xfs_extent_free_diff_items, 570 + .create_intent = xfs_extent_free_create_intent, 571 + .abort_intent = xfs_extent_free_abort_intent, 572 + .log_item = xfs_extent_free_log_item, 573 + .create_done = xfs_extent_free_create_done, 574 + .finish_item = xfs_extent_free_finish_item, 575 + .cancel_item = xfs_extent_free_cancel_item, 576 + }; 577 + 578 + /* 579 + * AGFL blocks are accounted differently in the reserve pools and are not 580 + * inserted into the busy extent list. 581 + */ 582 + STATIC int 583 + xfs_agfl_free_finish_item( 584 + struct xfs_trans *tp, 585 + struct list_head *item, 586 + void *done_item, 587 + void **state) 588 + { 589 + struct xfs_mount *mp = tp->t_mountp; 590 + struct xfs_efd_log_item *efdp = done_item; 591 + struct xfs_extent_free_item *free; 592 + struct xfs_extent *extp; 593 + struct xfs_buf *agbp; 594 + int error; 595 + xfs_agnumber_t agno; 596 + xfs_agblock_t agbno; 597 + uint next_extent; 598 + 599 + free = container_of(item, struct xfs_extent_free_item, xefi_list); 600 + ASSERT(free->xefi_blockcount == 1); 601 + agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); 602 + agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); 603 + 604 + trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount); 605 + 606 + error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); 607 + if (!error) 608 + error = xfs_free_agfl_block(tp, agno, agbno, agbp, 609 + &free->xefi_oinfo); 610 + 611 + /* 612 + * Mark the transaction dirty, even on error. This ensures the 613 + * transaction is aborted, which: 614 + * 615 + * 1.) releases the EFI and frees the EFD 616 + * 2.) shuts down the filesystem 617 + */ 618 + tp->t_flags |= XFS_TRANS_DIRTY; 619 + set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); 620 + 621 + next_extent = efdp->efd_next_extent; 622 + ASSERT(next_extent < efdp->efd_format.efd_nextents); 623 + extp = &(efdp->efd_format.efd_extents[next_extent]); 624 + extp->ext_start = free->xefi_startblock; 625 + extp->ext_len = free->xefi_blockcount; 626 + efdp->efd_next_extent++; 627 + 628 + kmem_free(free); 629 + return error; 630 + } 631 + 632 + /* sub-type with special handling for AGFL deferred frees */ 633 + const struct xfs_defer_op_type xfs_agfl_free_defer_type = { 634 + .max_items = XFS_EFI_MAX_FAST_EXTENTS, 635 + .diff_items = xfs_extent_free_diff_items, 636 + .create_intent = xfs_extent_free_create_intent, 637 + .abort_intent = xfs_extent_free_abort_intent, 638 + .log_item = xfs_extent_free_log_item, 639 + .create_done = xfs_extent_free_create_done, 640 + .finish_item = xfs_agfl_free_finish_item, 641 + .cancel_item = xfs_extent_free_cancel_item, 642 + }; 427 643 428 644 /* 429 645 * Process an extent free intent item that was recovered from
+2 -4
fs/xfs/xfs_extfree_item.h
··· 51 51 * AIL, so at this point both the EFI and EFD are freed. 52 52 */ 53 53 typedef struct xfs_efi_log_item { 54 - xfs_log_item_t efi_item; 54 + struct xfs_log_item efi_item; 55 55 atomic_t efi_refcount; 56 56 atomic_t efi_next_extent; 57 57 unsigned long efi_flags; /* misc flags */ ··· 64 64 * have been freed. 65 65 */ 66 66 typedef struct xfs_efd_log_item { 67 - xfs_log_item_t efd_item; 67 + struct xfs_log_item efd_item; 68 68 xfs_efi_log_item_t *efd_efip; 69 69 uint efd_next_extent; 70 70 xfs_efd_log_format_t efd_format; ··· 79 79 extern struct kmem_zone *xfs_efd_zone; 80 80 81 81 xfs_efi_log_item_t *xfs_efi_init(struct xfs_mount *, uint); 82 - xfs_efd_log_item_t *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *, 83 - uint); 84 82 int xfs_efi_copy_format(xfs_log_iovec_t *buf, 85 83 xfs_efi_log_format_t *dst_efi_fmt); 86 84 void xfs_efi_item_free(xfs_efi_log_item_t *);
+15 -8
fs/xfs/xfs_file.c
··· 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 - #include "xfs_da_format.h" 14 - #include "xfs_da_btree.h" 15 13 #include "xfs_inode.h" 16 14 #include "xfs_trans.h" 17 15 #include "xfs_inode_item.h" 18 16 #include "xfs_bmap.h" 19 17 #include "xfs_bmap_util.h" 20 - #include "xfs_error.h" 21 18 #include "xfs_dir2.h" 22 19 #include "xfs_dir2_priv.h" 23 20 #include "xfs_ioctl.h" ··· 25 28 #include "xfs_iomap.h" 26 29 #include "xfs_reflink.h" 27 30 28 - #include <linux/dcache.h> 29 31 #include <linux/falloc.h> 30 - #include <linux/pagevec.h> 31 32 #include <linux/backing-dev.h> 32 33 #include <linux/mman.h> 33 34 ··· 374 379 struct inode *inode = file_inode(iocb->ki_filp); 375 380 struct xfs_inode *ip = XFS_I(inode); 376 381 loff_t offset = iocb->ki_pos; 382 + unsigned int nofs_flag; 377 383 int error = 0; 378 384 379 385 trace_xfs_end_io_direct_write(ip, offset, size); ··· 391 395 */ 392 396 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size); 393 397 398 + /* 399 + * We can allocate memory here while doing writeback on behalf of 400 + * memory reclaim. To avoid memory allocation deadlocks set the 401 + * task-wide nofs context for the following operations. 402 + */ 403 + nofs_flag = memalloc_nofs_save(); 404 + 394 405 if (flags & IOMAP_DIO_COW) { 395 406 error = xfs_reflink_end_cow(ip, offset, size); 396 407 if (error) 397 - return error; 408 + goto out; 398 409 } 399 410 400 411 /* ··· 410 407 * earlier allows a racing dio read to find unwritten extents before 411 408 * they are converted. 412 409 */ 413 - if (flags & IOMAP_DIO_UNWRITTEN) 414 - return xfs_iomap_write_unwritten(ip, offset, size, true); 410 + if (flags & IOMAP_DIO_UNWRITTEN) { 411 + error = xfs_iomap_write_unwritten(ip, offset, size, true); 412 + goto out; 413 + } 415 414 416 415 /* 417 416 * We need to update the in-core inode size here so that we don't end up ··· 435 430 spin_unlock(&ip->i_flags_lock); 436 431 } 437 432 433 + out: 434 + memalloc_nofs_restore(nofs_flag); 438 435 return error; 439 436 } 440 437
+1 -4
fs/xfs/xfs_filestream.c
··· 5 5 * All Rights Reserved. 6 6 */ 7 7 #include "xfs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 11 #include "xfs_trans_resv.h" 11 12 #include "xfs_sb.h" 12 13 #include "xfs_mount.h" 13 - #include "xfs_defer.h" 14 14 #include "xfs_inode.h" 15 15 #include "xfs_bmap.h" 16 - #include "xfs_bmap_util.h" 17 16 #include "xfs_alloc.h" 18 17 #include "xfs_mru_cache.h" 19 - #include "xfs_filestream.h" 20 18 #include "xfs_trace.h" 21 19 #include "xfs_ag_resv.h" 22 20 #include "xfs_trans.h" 23 - #include "xfs_shared.h" 24 21 25 22 struct xfs_fstrm_item { 26 23 struct xfs_mru_cache_elem mru;
-4
fs/xfs/xfs_fsmap.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 - #include "xfs_sb.h" 13 12 #include "xfs_mount.h" 14 - #include "xfs_defer.h" 15 13 #include "xfs_inode.h" 16 14 #include "xfs_trans.h" 17 - #include "xfs_error.h" 18 15 #include "xfs_btree.h" 19 16 #include "xfs_rmap_btree.h" 20 17 #include "xfs_trace.h" 21 - #include "xfs_log.h" 22 18 #include "xfs_rmap.h" 23 19 #include "xfs_alloc.h" 24 20 #include "xfs_bit.h"
+2 -6
fs/xfs/xfs_fsops.c
··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_sb.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_defer.h" 15 14 #include "xfs_trans.h" 16 15 #include "xfs_error.h" 17 - #include "xfs_btree.h" 18 16 #include "xfs_alloc.h" 19 17 #include "xfs_fsops.h" 20 18 #include "xfs_trans_space.h" 21 - #include "xfs_rtalloc.h" 22 - #include "xfs_trace.h" 23 19 #include "xfs_log.h" 24 20 #include "xfs_ag.h" 25 21 #include "xfs_ag_resv.h" ··· 247 251 if (mp->m_sb.sb_imax_pct) { 248 252 uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct; 249 253 do_div(icount, 100); 250 - mp->m_maxicount = XFS_FSB_TO_INO(mp, icount); 254 + M_IGEO(mp)->maxicount = XFS_FSB_TO_INO(mp, icount); 251 255 } else 252 - mp->m_maxicount = 0; 256 + M_IGEO(mp)->maxicount = 0; 253 257 254 258 /* Update secondary superblocks now the physical grow has completed */ 255 259 error = xfs_update_secondary_sbs(mp);
+3 -1
fs/xfs/xfs_globals.c
··· 4 4 * All Rights Reserved. 5 5 */ 6 6 #include "xfs.h" 7 - #include "xfs_sysctl.h" 8 7 9 8 /* 10 9 * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, ··· 39 40 .bug_on_assert = true, /* assert failures BUG() */ 40 41 #else 41 42 .bug_on_assert = false, /* assert failures WARN() */ 43 + #endif 44 + #ifdef DEBUG 45 + .pwork_threads = -1, /* automatic thread detection */ 42 46 #endif 43 47 };
+1 -5
fs/xfs/xfs_health.c
··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 - #include "xfs_bit.h" 13 12 #include "xfs_sb.h" 14 13 #include "xfs_mount.h" 15 - #include "xfs_defer.h" 16 - #include "xfs_da_format.h" 17 - #include "xfs_da_btree.h" 18 14 #include "xfs_inode.h" 19 15 #include "xfs_trace.h" 20 16 #include "xfs_health.h" ··· 369 373 void 370 374 xfs_bulkstat_health( 371 375 struct xfs_inode *ip, 372 - struct xfs_bstat *bs) 376 + struct xfs_bulkstat *bs) 373 377 { 374 378 const struct ioctl_sick_map *m; 375 379 unsigned int sick;
+1 -3
fs/xfs/xfs_icache.c
··· 5 5 */ 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 11 #include "xfs_trans_resv.h" 11 12 #include "xfs_sb.h" 12 13 #include "xfs_mount.h" 13 14 #include "xfs_inode.h" 14 - #include "xfs_error.h" 15 15 #include "xfs_trans.h" 16 16 #include "xfs_trans_priv.h" 17 17 #include "xfs_inode_item.h" ··· 23 23 #include "xfs_dquot.h" 24 24 #include "xfs_reflink.h" 25 25 26 - #include <linux/kthread.h> 27 - #include <linux/freezer.h> 28 26 #include <linux/iversion.h> 29 27 30 28 /*
+4 -71
fs/xfs/xfs_icreate_item.c
··· 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 8 #include "xfs_shared.h" 9 - #include "xfs_format.h" 10 9 #include "xfs_log_format.h" 11 - #include "xfs_trans_resv.h" 12 - #include "xfs_bit.h" 13 - #include "xfs_mount.h" 14 10 #include "xfs_trans.h" 15 11 #include "xfs_trans_priv.h" 16 - #include "xfs_error.h" 17 12 #include "xfs_icreate_item.h" 18 13 #include "xfs_log.h" 19 14 ··· 51 56 sizeof(struct xfs_icreate_log)); 52 57 } 53 58 54 - 55 - /* Pinning has no meaning for the create item, so just return. */ 56 59 STATIC void 57 - xfs_icreate_item_pin( 60 + xfs_icreate_item_release( 58 61 struct xfs_log_item *lip) 59 62 { 63 + kmem_zone_free(xfs_icreate_zone, ICR_ITEM(lip)); 60 64 } 61 65 62 - 63 - /* pinning has no meaning for the create item, so just return. */ 64 - STATIC void 65 - xfs_icreate_item_unpin( 66 - struct xfs_log_item *lip, 67 - int remove) 68 - { 69 - } 70 - 71 - STATIC void 72 - xfs_icreate_item_unlock( 73 - struct xfs_log_item *lip) 74 - { 75 - struct xfs_icreate_item *icp = ICR_ITEM(lip); 76 - 77 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) 78 - kmem_zone_free(xfs_icreate_zone, icp); 79 - return; 80 - } 81 - 82 - /* 83 - * Because we have ordered buffers being tracked in the AIL for the inode 84 - * creation, we don't need the create item after this. Hence we can free 85 - * the log item and return -1 to tell the caller we're done with the item. 86 - */ 87 - STATIC xfs_lsn_t 88 - xfs_icreate_item_committed( 89 - struct xfs_log_item *lip, 90 - xfs_lsn_t lsn) 91 - { 92 - struct xfs_icreate_item *icp = ICR_ITEM(lip); 93 - 94 - kmem_zone_free(xfs_icreate_zone, icp); 95 - return (xfs_lsn_t)-1; 96 - } 97 - 98 - /* item can never get into the AIL */ 99 - STATIC uint 100 - xfs_icreate_item_push( 101 - struct xfs_log_item *lip, 102 - struct list_head *buffer_list) 103 - { 104 - ASSERT(0); 105 - return XFS_ITEM_SUCCESS; 106 - } 107 - 108 - /* Ordered buffers do the dependency tracking here, so this does nothing. */ 109 - STATIC void 110 - xfs_icreate_item_committing( 111 - struct xfs_log_item *lip, 112 - xfs_lsn_t lsn) 113 - { 114 - } 115 - 116 - /* 117 - * This is the ops vector shared by all buf log items. 118 - */ 119 66 static const struct xfs_item_ops xfs_icreate_item_ops = { 67 + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, 120 68 .iop_size = xfs_icreate_item_size, 121 69 .iop_format = xfs_icreate_item_format, 122 - .iop_pin = xfs_icreate_item_pin, 123 - .iop_unpin = xfs_icreate_item_unpin, 124 - .iop_push = xfs_icreate_item_push, 125 - .iop_unlock = xfs_icreate_item_unlock, 126 - .iop_committed = xfs_icreate_item_committed, 127 - .iop_committing = xfs_icreate_item_committing, 70 + .iop_release = xfs_icreate_item_release, 128 71 }; 129 72 130 73
+18 -24
fs/xfs/xfs_inode.c
··· 3 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 4 * All Rights Reserved. 5 5 */ 6 - #include <linux/log2.h> 7 6 #include <linux/iversion.h> 8 7 9 8 #include "xfs.h" ··· 15 16 #include "xfs_mount.h" 16 17 #include "xfs_defer.h" 17 18 #include "xfs_inode.h" 18 - #include "xfs_da_format.h" 19 - #include "xfs_da_btree.h" 20 19 #include "xfs_dir2.h" 21 - #include "xfs_attr_sf.h" 22 20 #include "xfs_attr.h" 23 21 #include "xfs_trans_space.h" 24 22 #include "xfs_trans.h" ··· 28 32 #include "xfs_error.h" 29 33 #include "xfs_quota.h" 30 34 #include "xfs_filestream.h" 31 - #include "xfs_cksum.h" 32 35 #include "xfs_trace.h" 33 36 #include "xfs_icache.h" 34 37 #include "xfs_symlink.h" ··· 35 40 #include "xfs_log.h" 36 41 #include "xfs_bmap_btree.h" 37 42 #include "xfs_reflink.h" 38 - #include "xfs_dir2_priv.h" 39 43 40 44 kmem_zone_t *xfs_inode_zone; 41 45 ··· 435 441 */ 436 442 static void 437 443 xfs_lock_inodes( 438 - xfs_inode_t **ips, 439 - int inodes, 440 - uint lock_mode) 444 + struct xfs_inode **ips, 445 + int inodes, 446 + uint lock_mode) 441 447 { 442 - int attempts = 0, i, j, try_lock; 443 - xfs_log_item_t *lp; 448 + int attempts = 0, i, j, try_lock; 449 + struct xfs_log_item *lp; 444 450 445 451 /* 446 452 * Currently supports between 2 and 5 inodes with exclusive locking. We ··· 479 485 */ 480 486 if (!try_lock) { 481 487 for (j = (i - 1); j >= 0 && !try_lock; j--) { 482 - lp = (xfs_log_item_t *)ips[j]->i_itemp; 488 + lp = &ips[j]->i_itemp->ili_item; 483 489 if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) 484 490 try_lock++; 485 491 } ··· 545 551 struct xfs_inode *temp; 546 552 uint mode_temp; 547 553 int attempts = 0; 548 - xfs_log_item_t *lp; 554 + struct xfs_log_item *lp; 549 555 550 556 ASSERT(hweight32(ip0_mode) == 1); 551 557 ASSERT(hweight32(ip1_mode) == 1); ··· 579 585 * the second lock. If we can't get it, we must release the first one 580 586 * and try again. 581 587 */ 582 - lp = (xfs_log_item_t *)ip0->i_itemp; 588 + lp = &ip0->i_itemp->ili_item; 583 589 if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) { 584 590 if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) { 585 591 xfs_iunlock(ip0, ip0_mode); ··· 2531 2537 xfs_inode_log_item_t *iip; 2532 2538 struct xfs_log_item *lip; 2533 2539 struct xfs_perag *pag; 2540 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 2534 2541 xfs_ino_t inum; 2535 2542 2536 2543 inum = xic->first_ino; 2537 2544 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); 2538 - nbufs = mp->m_ialloc_blks / mp->m_blocks_per_cluster; 2545 + nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster; 2539 2546 2540 - for (j = 0; j < nbufs; j++, inum += mp->m_inodes_per_cluster) { 2547 + for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) { 2541 2548 /* 2542 2549 * The allocation bitmap tells us which inodes of the chunk were 2543 2550 * physically allocated. Skip the cluster if an inode falls into ··· 2546 2551 */ 2547 2552 ioffset = inum - xic->first_ino; 2548 2553 if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) { 2549 - ASSERT(ioffset % mp->m_inodes_per_cluster == 0); 2554 + ASSERT(ioffset % igeo->inodes_per_cluster == 0); 2550 2555 continue; 2551 2556 } 2552 2557 ··· 2562 2567 * to mark all the active inodes on the buffer stale. 2563 2568 */ 2564 2569 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 2565 - mp->m_bsize * mp->m_blocks_per_cluster, 2570 + mp->m_bsize * igeo->blocks_per_cluster, 2566 2571 XBF_UNMAPPED); 2567 2572 2568 2573 if (!bp) ··· 2609 2614 * transaction stale above, which means there is no point in 2610 2615 * even trying to lock them. 2611 2616 */ 2612 - for (i = 0; i < mp->m_inodes_per_cluster; i++) { 2617 + for (i = 0; i < igeo->inodes_per_cluster; i++) { 2613 2618 retry: 2614 2619 rcu_read_lock(); 2615 2620 ip = radix_tree_lookup(&pag->pag_ici_root, ··· 3467 3472 struct xfs_mount *mp = ip->i_mount; 3468 3473 struct xfs_perag *pag; 3469 3474 unsigned long first_index, mask; 3470 - unsigned long inodes_per_cluster; 3471 3475 int cilist_size; 3472 3476 struct xfs_inode **cilist; 3473 3477 struct xfs_inode *cip; 3478 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 3474 3479 int nr_found; 3475 3480 int clcount = 0; 3476 3481 int i; 3477 3482 3478 3483 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 3479 3484 3480 - inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 3481 - cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 3485 + cilist_size = igeo->inodes_per_cluster * sizeof(struct xfs_inode *); 3482 3486 cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS); 3483 3487 if (!cilist) 3484 3488 goto out_put; 3485 3489 3486 - mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1); 3490 + mask = ~(igeo->inodes_per_cluster - 1); 3487 3491 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 3488 3492 rcu_read_lock(); 3489 3493 /* really need a gang lookup range call here */ 3490 3494 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist, 3491 - first_index, inodes_per_cluster); 3495 + first_index, igeo->inodes_per_cluster); 3492 3496 if (nr_found == 0) 3493 3497 goto out_free; 3494 3498
+7 -9
fs/xfs/xfs_inode_item.c
··· 5 5 */ 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 11 #include "xfs_trans_resv.h" ··· 13 12 #include "xfs_inode.h" 14 13 #include "xfs_trans.h" 15 14 #include "xfs_inode_item.h" 16 - #include "xfs_error.h" 17 15 #include "xfs_trace.h" 18 16 #include "xfs_trans_priv.h" 19 17 #include "xfs_buf_item.h" ··· 565 565 * Unlock the inode associated with the inode log item. 566 566 */ 567 567 STATIC void 568 - xfs_inode_item_unlock( 568 + xfs_inode_item_release( 569 569 struct xfs_log_item *lip) 570 570 { 571 571 struct xfs_inode_log_item *iip = INODE_ITEM(lip); ··· 621 621 STATIC void 622 622 xfs_inode_item_committing( 623 623 struct xfs_log_item *lip, 624 - xfs_lsn_t lsn) 624 + xfs_lsn_t commit_lsn) 625 625 { 626 - INODE_ITEM(lip)->ili_last_lsn = lsn; 626 + INODE_ITEM(lip)->ili_last_lsn = commit_lsn; 627 + return xfs_inode_item_release(lip); 627 628 } 628 629 629 - /* 630 - * This is the ops vector shared by all buf log items. 631 - */ 632 630 static const struct xfs_item_ops xfs_inode_item_ops = { 633 631 .iop_size = xfs_inode_item_size, 634 632 .iop_format = xfs_inode_item_format, 635 633 .iop_pin = xfs_inode_item_pin, 636 634 .iop_unpin = xfs_inode_item_unpin, 637 - .iop_unlock = xfs_inode_item_unlock, 635 + .iop_release = xfs_inode_item_release, 638 636 .iop_committed = xfs_inode_item_committed, 639 637 .iop_push = xfs_inode_item_push, 640 - .iop_committing = xfs_inode_item_committing, 638 + .iop_committing = xfs_inode_item_committing, 641 639 .iop_error = xfs_inode_item_error 642 640 }; 643 641
+1 -1
fs/xfs/xfs_inode_item.h
··· 14 14 struct xfs_mount; 15 15 16 16 typedef struct xfs_inode_log_item { 17 - xfs_log_item_t ili_item; /* common portion */ 17 + struct xfs_log_item ili_item; /* common portion */ 18 18 struct xfs_inode *ili_inode; /* inode ptr */ 19 19 xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ 20 20 xfs_lsn_t ili_last_lsn; /* lsn at last transaction */
+261 -33
fs/xfs/xfs_ioctl.c
··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 13 #include "xfs_inode.h" 14 - #include "xfs_ioctl.h" 15 - #include "xfs_alloc.h" 16 14 #include "xfs_rtalloc.h" 15 + #include "xfs_iwalk.h" 17 16 #include "xfs_itable.h" 18 17 #include "xfs_error.h" 19 18 #include "xfs_attr.h" ··· 24 25 #include "xfs_export.h" 25 26 #include "xfs_trace.h" 26 27 #include "xfs_icache.h" 27 - #include "xfs_symlink.h" 28 28 #include "xfs_trans.h" 29 29 #include "xfs_acl.h" 30 30 #include "xfs_btree.h" ··· 34 36 #include "xfs_ag.h" 35 37 #include "xfs_health.h" 36 38 37 - #include <linux/capability.h> 38 - #include <linux/cred.h> 39 - #include <linux/dcache.h> 40 39 #include <linux/mount.h> 41 40 #include <linux/namei.h> 42 - #include <linux/pagemap.h> 43 - #include <linux/slab.h> 44 - #include <linux/exportfs.h> 45 41 46 42 /* 47 43 * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to ··· 713 721 return error; 714 722 } 715 723 724 + /* Return 0 on success or positive error */ 725 + int 726 + xfs_fsbulkstat_one_fmt( 727 + struct xfs_ibulk *breq, 728 + const struct xfs_bulkstat *bstat) 729 + { 730 + struct xfs_bstat bs1; 731 + 732 + xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat); 733 + if (copy_to_user(breq->ubuffer, &bs1, sizeof(bs1))) 734 + return -EFAULT; 735 + return xfs_ibulk_advance(breq, sizeof(struct xfs_bstat)); 736 + } 737 + 738 + int 739 + xfs_fsinumbers_fmt( 740 + struct xfs_ibulk *breq, 741 + const struct xfs_inumbers *igrp) 742 + { 743 + struct xfs_inogrp ig1; 744 + 745 + xfs_inumbers_to_inogrp(&ig1, igrp); 746 + if (copy_to_user(breq->ubuffer, &ig1, sizeof(struct xfs_inogrp))) 747 + return -EFAULT; 748 + return xfs_ibulk_advance(breq, sizeof(struct xfs_inogrp)); 749 + } 750 + 716 751 STATIC int 717 - xfs_ioc_bulkstat( 752 + xfs_ioc_fsbulkstat( 718 753 xfs_mount_t *mp, 719 754 unsigned int cmd, 720 755 void __user *arg) 721 756 { 722 - xfs_fsop_bulkreq_t bulkreq; 723 - int count; /* # of records returned */ 724 - xfs_ino_t inlast; /* last inode number */ 725 - int done; 757 + struct xfs_fsop_bulkreq bulkreq; 758 + struct xfs_ibulk breq = { 759 + .mp = mp, 760 + .ocount = 0, 761 + }; 762 + xfs_ino_t lastino; 726 763 int error; 727 764 728 765 /* done = 1 if there are more stats to get and if bulkstat */ ··· 763 742 if (XFS_FORCED_SHUTDOWN(mp)) 764 743 return -EIO; 765 744 766 - if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) 745 + if (copy_from_user(&bulkreq, arg, sizeof(struct xfs_fsop_bulkreq))) 767 746 return -EFAULT; 768 747 769 - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) 748 + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) 770 749 return -EFAULT; 771 750 772 - if ((count = bulkreq.icount) <= 0) 751 + if (bulkreq.icount <= 0) 773 752 return -EINVAL; 774 753 775 754 if (bulkreq.ubuffer == NULL) 776 755 return -EINVAL; 777 756 778 - if (cmd == XFS_IOC_FSINUMBERS) 779 - error = xfs_inumbers(mp, &inlast, &count, 780 - bulkreq.ubuffer, xfs_inumbers_fmt); 781 - else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) 782 - error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer, 783 - sizeof(xfs_bstat_t), NULL, &done); 784 - else /* XFS_IOC_FSBULKSTAT */ 785 - error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, 786 - sizeof(xfs_bstat_t), bulkreq.ubuffer, 787 - &done); 757 + breq.ubuffer = bulkreq.ubuffer; 758 + breq.icount = bulkreq.icount; 759 + 760 + /* 761 + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number 762 + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect 763 + * that *lastip contains either zero or the number of the last inode to 764 + * be examined by the previous call and return results starting with 765 + * the next inode after that. The new bulk request back end functions 766 + * take the inode to start with, so we have to compute the startino 767 + * parameter from lastino to maintain correct function. lastino == 0 768 + * is a special case because it has traditionally meant "first inode 769 + * in filesystem". 770 + */ 771 + if (cmd == XFS_IOC_FSINUMBERS) { 772 + breq.startino = lastino ? lastino + 1 : 0; 773 + error = xfs_inumbers(&breq, xfs_fsinumbers_fmt); 774 + lastino = breq.startino - 1; 775 + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) { 776 + breq.startino = lastino; 777 + breq.icount = 1; 778 + error = xfs_bulkstat_one(&breq, xfs_fsbulkstat_one_fmt); 779 + } else { /* XFS_IOC_FSBULKSTAT */ 780 + breq.startino = lastino ? lastino + 1 : 0; 781 + error = xfs_bulkstat(&breq, xfs_fsbulkstat_one_fmt); 782 + lastino = breq.startino - 1; 783 + } 788 784 789 785 if (error) 790 786 return error; 791 787 792 - if (bulkreq.ocount != NULL) { 793 - if (copy_to_user(bulkreq.lastip, &inlast, 794 - sizeof(xfs_ino_t))) 795 - return -EFAULT; 788 + if (bulkreq.lastip != NULL && 789 + copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t))) 790 + return -EFAULT; 796 791 797 - if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) 798 - return -EFAULT; 792 + if (bulkreq.ocount != NULL && 793 + copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32))) 794 + return -EFAULT; 795 + 796 + return 0; 797 + } 798 + 799 + /* Return 0 on success or positive error */ 800 + static int 801 + xfs_bulkstat_fmt( 802 + struct xfs_ibulk *breq, 803 + const struct xfs_bulkstat *bstat) 804 + { 805 + if (copy_to_user(breq->ubuffer, bstat, sizeof(struct xfs_bulkstat))) 806 + return -EFAULT; 807 + return xfs_ibulk_advance(breq, sizeof(struct xfs_bulkstat)); 808 + } 809 + 810 + /* 811 + * Check the incoming bulk request @hdr from userspace and initialize the 812 + * internal @breq bulk request appropriately. Returns 0 if the bulk request 813 + * should proceed; XFS_ITER_ABORT if there's nothing to do; or the usual 814 + * negative error code. 815 + */ 816 + static int 817 + xfs_bulk_ireq_setup( 818 + struct xfs_mount *mp, 819 + struct xfs_bulk_ireq *hdr, 820 + struct xfs_ibulk *breq, 821 + void __user *ubuffer) 822 + { 823 + if (hdr->icount == 0 || 824 + (hdr->flags & ~XFS_BULK_IREQ_FLAGS_ALL) || 825 + memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved))) 826 + return -EINVAL; 827 + 828 + breq->startino = hdr->ino; 829 + breq->ubuffer = ubuffer; 830 + breq->icount = hdr->icount; 831 + breq->ocount = 0; 832 + breq->flags = 0; 833 + 834 + /* 835 + * The @ino parameter is a special value, so we must look it up here. 836 + * We're not allowed to have IREQ_AGNO, and we only return one inode 837 + * worth of data. 838 + */ 839 + if (hdr->flags & XFS_BULK_IREQ_SPECIAL) { 840 + if (hdr->flags & XFS_BULK_IREQ_AGNO) 841 + return -EINVAL; 842 + 843 + switch (hdr->ino) { 844 + case XFS_BULK_IREQ_SPECIAL_ROOT: 845 + hdr->ino = mp->m_sb.sb_rootino; 846 + break; 847 + default: 848 + return -EINVAL; 849 + } 850 + breq->icount = 1; 799 851 } 852 + 853 + /* 854 + * The IREQ_AGNO flag means that we only want results from a given AG. 855 + * If @hdr->ino is zero, we start iterating in that AG. If @hdr->ino is 856 + * beyond the specified AG then we return no results. 857 + */ 858 + if (hdr->flags & XFS_BULK_IREQ_AGNO) { 859 + if (hdr->agno >= mp->m_sb.sb_agcount) 860 + return -EINVAL; 861 + 862 + if (breq->startino == 0) 863 + breq->startino = XFS_AGINO_TO_INO(mp, hdr->agno, 0); 864 + else if (XFS_INO_TO_AGNO(mp, breq->startino) < hdr->agno) 865 + return -EINVAL; 866 + 867 + breq->flags |= XFS_IBULK_SAME_AG; 868 + 869 + /* Asking for an inode past the end of the AG? We're done! */ 870 + if (XFS_INO_TO_AGNO(mp, breq->startino) > hdr->agno) 871 + return XFS_ITER_ABORT; 872 + } else if (hdr->agno) 873 + return -EINVAL; 874 + 875 + /* Asking for an inode past the end of the FS? We're done! */ 876 + if (XFS_INO_TO_AGNO(mp, breq->startino) >= mp->m_sb.sb_agcount) 877 + return XFS_ITER_ABORT; 878 + 879 + return 0; 880 + } 881 + 882 + /* 883 + * Update the userspace bulk request @hdr to reflect the end state of the 884 + * internal bulk request @breq. 885 + */ 886 + static void 887 + xfs_bulk_ireq_teardown( 888 + struct xfs_bulk_ireq *hdr, 889 + struct xfs_ibulk *breq) 890 + { 891 + hdr->ino = breq->startino; 892 + hdr->ocount = breq->ocount; 893 + } 894 + 895 + /* Handle the v5 bulkstat ioctl. */ 896 + STATIC int 897 + xfs_ioc_bulkstat( 898 + struct xfs_mount *mp, 899 + unsigned int cmd, 900 + struct xfs_bulkstat_req __user *arg) 901 + { 902 + struct xfs_bulk_ireq hdr; 903 + struct xfs_ibulk breq = { 904 + .mp = mp, 905 + }; 906 + int error; 907 + 908 + if (!capable(CAP_SYS_ADMIN)) 909 + return -EPERM; 910 + 911 + if (XFS_FORCED_SHUTDOWN(mp)) 912 + return -EIO; 913 + 914 + if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr))) 915 + return -EFAULT; 916 + 917 + error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->bulkstat); 918 + if (error == XFS_ITER_ABORT) 919 + goto out_teardown; 920 + if (error < 0) 921 + return error; 922 + 923 + error = xfs_bulkstat(&breq, xfs_bulkstat_fmt); 924 + if (error) 925 + return error; 926 + 927 + out_teardown: 928 + xfs_bulk_ireq_teardown(&hdr, &breq); 929 + if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr))) 930 + return -EFAULT; 931 + 932 + return 0; 933 + } 934 + 935 + STATIC int 936 + xfs_inumbers_fmt( 937 + struct xfs_ibulk *breq, 938 + const struct xfs_inumbers *igrp) 939 + { 940 + if (copy_to_user(breq->ubuffer, igrp, sizeof(struct xfs_inumbers))) 941 + return -EFAULT; 942 + return xfs_ibulk_advance(breq, sizeof(struct xfs_inumbers)); 943 + } 944 + 945 + /* Handle the v5 inumbers ioctl. */ 946 + STATIC int 947 + xfs_ioc_inumbers( 948 + struct xfs_mount *mp, 949 + unsigned int cmd, 950 + struct xfs_inumbers_req __user *arg) 951 + { 952 + struct xfs_bulk_ireq hdr; 953 + struct xfs_ibulk breq = { 954 + .mp = mp, 955 + }; 956 + int error; 957 + 958 + if (!capable(CAP_SYS_ADMIN)) 959 + return -EPERM; 960 + 961 + if (XFS_FORCED_SHUTDOWN(mp)) 962 + return -EIO; 963 + 964 + if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr))) 965 + return -EFAULT; 966 + 967 + error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->inumbers); 968 + if (error == XFS_ITER_ABORT) 969 + goto out_teardown; 970 + if (error < 0) 971 + return error; 972 + 973 + error = xfs_inumbers(&breq, xfs_inumbers_fmt); 974 + if (error) 975 + return error; 976 + 977 + out_teardown: 978 + xfs_bulk_ireq_teardown(&hdr, &breq); 979 + if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr))) 980 + return -EFAULT; 800 981 801 982 return 0; 802 983 } ··· 2149 1926 case XFS_IOC_FSBULKSTAT_SINGLE: 2150 1927 case XFS_IOC_FSBULKSTAT: 2151 1928 case XFS_IOC_FSINUMBERS: 1929 + return xfs_ioc_fsbulkstat(mp, cmd, arg); 1930 + 1931 + case XFS_IOC_BULKSTAT: 2152 1932 return xfs_ioc_bulkstat(mp, cmd, arg); 1933 + case XFS_IOC_INUMBERS: 1934 + return xfs_ioc_inumbers(mp, cmd, arg); 2153 1935 2154 1936 case XFS_IOC_FSGEOMETRY_V1: 2155 1937 return xfs_ioc_fsgeometry(mp, arg, 3);
+8
fs/xfs/xfs_ioctl.h
··· 77 77 uint evmask, 78 78 uint16_t state); 79 79 80 + struct xfs_ibulk; 81 + struct xfs_bstat; 82 + struct xfs_inogrp; 83 + 84 + int xfs_fsbulkstat_one_fmt(struct xfs_ibulk *breq, 85 + const struct xfs_bulkstat *bstat); 86 + int xfs_fsinumbers_fmt(struct xfs_ibulk *breq, const struct xfs_inumbers *igrp); 87 + 80 88 #endif
+81 -82
fs/xfs/xfs_ioctl32.c
··· 3 3 * Copyright (c) 2004-2005 Silicon Graphics, Inc. 4 4 * All Rights Reserved. 5 5 */ 6 - #include <linux/compat.h> 7 - #include <linux/ioctl.h> 8 6 #include <linux/mount.h> 9 - #include <linux/slab.h> 10 - #include <linux/uaccess.h> 11 7 #include <linux/fsmap.h> 12 8 #include "xfs.h" 13 9 #include "xfs_fs.h" 10 + #include "xfs_shared.h" 14 11 #include "xfs_format.h" 15 12 #include "xfs_log_format.h" 16 13 #include "xfs_trans_resv.h" 17 14 #include "xfs_mount.h" 18 15 #include "xfs_inode.h" 16 + #include "xfs_iwalk.h" 19 17 #include "xfs_itable.h" 20 - #include "xfs_error.h" 21 18 #include "xfs_fsops.h" 22 - #include "xfs_alloc.h" 23 19 #include "xfs_rtalloc.h" 24 20 #include "xfs_attr.h" 25 21 #include "xfs_ioctl.h" ··· 80 84 } 81 85 82 86 STATIC int 83 - xfs_inumbers_fmt_compat( 84 - void __user *ubuffer, 85 - const struct xfs_inogrp *buffer, 86 - long count, 87 - long *written) 87 + xfs_fsinumbers_fmt_compat( 88 + struct xfs_ibulk *breq, 89 + const struct xfs_inumbers *ig) 88 90 { 89 - compat_xfs_inogrp_t __user *p32 = ubuffer; 90 - long i; 91 + struct compat_xfs_inogrp __user *p32 = breq->ubuffer; 92 + struct xfs_inogrp ig1; 93 + struct xfs_inogrp *igrp = &ig1; 91 94 92 - for (i = 0; i < count; i++) { 93 - if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || 94 - put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || 95 - put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) 96 - return -EFAULT; 97 - } 98 - *written = count * sizeof(*p32); 99 - return 0; 95 + xfs_inumbers_to_inogrp(&ig1, ig); 96 + 97 + if (put_user(igrp->xi_startino, &p32->xi_startino) || 98 + put_user(igrp->xi_alloccount, &p32->xi_alloccount) || 99 + put_user(igrp->xi_allocmask, &p32->xi_allocmask)) 100 + return -EFAULT; 101 + 102 + return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_inogrp)); 100 103 } 101 104 102 105 #else 103 - #define xfs_inumbers_fmt_compat xfs_inumbers_fmt 106 + #define xfs_fsinumbers_fmt_compat xfs_fsinumbers_fmt 104 107 #endif /* BROKEN_X86_ALIGNMENT */ 105 108 106 109 STATIC int ··· 116 121 return 0; 117 122 } 118 123 119 - /* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */ 124 + /* 125 + * struct xfs_bstat has differing alignment on intel, & bstime_t sizes 126 + * everywhere 127 + */ 120 128 STATIC int 121 129 xfs_ioctl32_bstat_copyin( 122 - xfs_bstat_t *bstat, 123 - compat_xfs_bstat_t __user *bstat32) 130 + struct xfs_bstat *bstat, 131 + struct compat_xfs_bstat __user *bstat32) 124 132 { 125 133 if (get_user(bstat->bs_ino, &bstat32->bs_ino) || 126 134 get_user(bstat->bs_mode, &bstat32->bs_mode) || ··· 169 171 170 172 /* Return 0 on success or positive error (to xfs_bulkstat()) */ 171 173 STATIC int 172 - xfs_bulkstat_one_fmt_compat( 173 - void __user *ubuffer, 174 - int ubsize, 175 - int *ubused, 176 - const xfs_bstat_t *buffer) 174 + xfs_fsbulkstat_one_fmt_compat( 175 + struct xfs_ibulk *breq, 176 + const struct xfs_bulkstat *bstat) 177 177 { 178 - compat_xfs_bstat_t __user *p32 = ubuffer; 178 + struct compat_xfs_bstat __user *p32 = breq->ubuffer; 179 + struct xfs_bstat bs1; 180 + struct xfs_bstat *buffer = &bs1; 179 181 180 - if (ubsize < sizeof(*p32)) 181 - return -ENOMEM; 182 + xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat); 182 183 183 184 if (put_user(buffer->bs_ino, &p32->bs_ino) || 184 185 put_user(buffer->bs_mode, &p32->bs_mode) || ··· 202 205 put_user(buffer->bs_dmstate, &p32->bs_dmstate) || 203 206 put_user(buffer->bs_aextents, &p32->bs_aextents)) 204 207 return -EFAULT; 205 - if (ubused) 206 - *ubused = sizeof(*p32); 207 - return 0; 208 - } 209 208 210 - STATIC int 211 - xfs_bulkstat_one_compat( 212 - xfs_mount_t *mp, /* mount point for filesystem */ 213 - xfs_ino_t ino, /* inode number to get data for */ 214 - void __user *buffer, /* buffer to place output in */ 215 - int ubsize, /* size of buffer */ 216 - int *ubused, /* bytes used by me */ 217 - int *stat) /* BULKSTAT_RV_... */ 218 - { 219 - return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, 220 - xfs_bulkstat_one_fmt_compat, 221 - ubused, stat); 209 + return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_bstat)); 222 210 } 223 211 224 212 /* copied from xfs_ioctl.c */ 225 213 STATIC int 226 - xfs_compat_ioc_bulkstat( 214 + xfs_compat_ioc_fsbulkstat( 227 215 xfs_mount_t *mp, 228 216 unsigned int cmd, 229 - compat_xfs_fsop_bulkreq_t __user *p32) 217 + struct compat_xfs_fsop_bulkreq __user *p32) 230 218 { 231 219 u32 addr; 232 - xfs_fsop_bulkreq_t bulkreq; 233 - int count; /* # of records returned */ 234 - xfs_ino_t inlast; /* last inode number */ 235 - int done; 220 + struct xfs_fsop_bulkreq bulkreq; 221 + struct xfs_ibulk breq = { 222 + .mp = mp, 223 + .ocount = 0, 224 + }; 225 + xfs_ino_t lastino; 236 226 int error; 237 227 238 228 /* ··· 228 244 * to userpace memory via bulkreq.ubuffer. Normally the compat 229 245 * functions and structure size are the correct ones to use ... 230 246 */ 231 - inumbers_fmt_pf inumbers_func = xfs_inumbers_fmt_compat; 232 - bulkstat_one_pf bs_one_func = xfs_bulkstat_one_compat; 233 - size_t bs_one_size = sizeof(struct compat_xfs_bstat); 247 + inumbers_fmt_pf inumbers_func = xfs_fsinumbers_fmt_compat; 248 + bulkstat_one_fmt_pf bs_one_func = xfs_fsbulkstat_one_fmt_compat; 234 249 235 250 #ifdef CONFIG_X86_X32 236 251 if (in_x32_syscall()) { ··· 241 258 * the data written out in compat layout will not match what 242 259 * x32 userspace expects. 243 260 */ 244 - inumbers_func = xfs_inumbers_fmt; 245 - bs_one_func = xfs_bulkstat_one; 246 - bs_one_size = sizeof(struct xfs_bstat); 261 + inumbers_func = xfs_fsinumbers_fmt; 262 + bs_one_func = xfs_fsbulkstat_one_fmt; 247 263 } 248 264 #endif 249 265 ··· 266 284 return -EFAULT; 267 285 bulkreq.ocount = compat_ptr(addr); 268 286 269 - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) 287 + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) 270 288 return -EFAULT; 271 289 272 - if ((count = bulkreq.icount) <= 0) 290 + if (bulkreq.icount <= 0) 273 291 return -EINVAL; 274 292 275 293 if (bulkreq.ubuffer == NULL) 276 294 return -EINVAL; 277 295 278 - if (cmd == XFS_IOC_FSINUMBERS_32) { 279 - error = xfs_inumbers(mp, &inlast, &count, 280 - bulkreq.ubuffer, inumbers_func); 281 - } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { 282 - int res; 296 + breq.ubuffer = bulkreq.ubuffer; 297 + breq.icount = bulkreq.icount; 283 298 284 - error = bs_one_func(mp, inlast, bulkreq.ubuffer, 285 - bs_one_size, NULL, &res); 299 + /* 300 + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number 301 + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect 302 + * that *lastip contains either zero or the number of the last inode to 303 + * be examined by the previous call and return results starting with 304 + * the next inode after that. The new bulk request back end functions 305 + * take the inode to start with, so we have to compute the startino 306 + * parameter from lastino to maintain correct function. lastino == 0 307 + * is a special case because it has traditionally meant "first inode 308 + * in filesystem". 309 + */ 310 + if (cmd == XFS_IOC_FSINUMBERS_32) { 311 + breq.startino = lastino ? lastino + 1 : 0; 312 + error = xfs_inumbers(&breq, inumbers_func); 313 + lastino = breq.startino - 1; 314 + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { 315 + breq.startino = lastino; 316 + breq.icount = 1; 317 + error = xfs_bulkstat_one(&breq, bs_one_func); 318 + lastino = breq.startino; 286 319 } else if (cmd == XFS_IOC_FSBULKSTAT_32) { 287 - error = xfs_bulkstat(mp, &inlast, &count, 288 - bs_one_func, bs_one_size, 289 - bulkreq.ubuffer, &done); 290 - } else 320 + breq.startino = lastino ? lastino + 1 : 0; 321 + error = xfs_bulkstat(&breq, bs_one_func); 322 + lastino = breq.startino - 1; 323 + } else { 291 324 error = -EINVAL; 325 + } 292 326 if (error) 293 327 return error; 294 328 295 - if (bulkreq.ocount != NULL) { 296 - if (copy_to_user(bulkreq.lastip, &inlast, 297 - sizeof(xfs_ino_t))) 298 - return -EFAULT; 329 + if (bulkreq.lastip != NULL && 330 + copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t))) 331 + return -EFAULT; 299 332 300 - if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) 301 - return -EFAULT; 302 - } 333 + if (bulkreq.ocount != NULL && 334 + copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32))) 335 + return -EFAULT; 303 336 304 337 return 0; 305 338 } ··· 574 577 case XFS_IOC_ERROR_CLEARALL: 575 578 case FS_IOC_GETFSMAP: 576 579 case XFS_IOC_SCRUB_METADATA: 580 + case XFS_IOC_BULKSTAT: 581 + case XFS_IOC_INUMBERS: 577 582 return xfs_file_ioctl(filp, cmd, p); 578 583 #if !defined(BROKEN_X86_ALIGNMENT) || defined(CONFIG_X86_X32) 579 584 /* ··· 673 674 case XFS_IOC_FSBULKSTAT_32: 674 675 case XFS_IOC_FSBULKSTAT_SINGLE_32: 675 676 case XFS_IOC_FSINUMBERS_32: 676 - return xfs_compat_ioc_bulkstat(mp, cmd, arg); 677 + return xfs_compat_ioc_fsbulkstat(mp, cmd, arg); 677 678 case XFS_IOC_FD_TO_HANDLE_32: 678 679 case XFS_IOC_PATH_TO_HANDLE_32: 679 680 case XFS_IOC_PATH_TO_FSHANDLE_32: {
+7 -7
fs/xfs/xfs_ioctl32.h
··· 36 36 __s32 tv_nsec; /* and nanoseconds */ 37 37 } compat_xfs_bstime_t; 38 38 39 - typedef struct compat_xfs_bstat { 39 + struct compat_xfs_bstat { 40 40 __u64 bs_ino; /* inode number */ 41 41 __u16 bs_mode; /* type and mode */ 42 42 __u16 bs_nlink; /* number of links */ ··· 61 61 __u32 bs_dmevmask; /* DMIG event mask */ 62 62 __u16 bs_dmstate; /* DMIG state info */ 63 63 __u16 bs_aextents; /* attribute number of extents */ 64 - } __compat_packed compat_xfs_bstat_t; 64 + } __compat_packed; 65 65 66 - typedef struct compat_xfs_fsop_bulkreq { 66 + struct compat_xfs_fsop_bulkreq { 67 67 compat_uptr_t lastip; /* last inode # pointer */ 68 68 __s32 icount; /* count of entries in buffer */ 69 69 compat_uptr_t ubuffer; /* user buffer for inode desc. */ 70 70 compat_uptr_t ocount; /* output count pointer */ 71 - } compat_xfs_fsop_bulkreq_t; 71 + }; 72 72 73 73 #define XFS_IOC_FSBULKSTAT_32 \ 74 74 _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) ··· 106 106 xfs_off_t sx_offset; /* offset into file */ 107 107 xfs_off_t sx_length; /* leng from offset */ 108 108 char sx_pad[16]; /* pad space, unused */ 109 - compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */ 109 + struct compat_xfs_bstat sx_stat; /* stat of target b4 copy */ 110 110 } __compat_packed compat_xfs_swapext_t; 111 111 112 112 #define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext) ··· 201 201 #define XFS_IOC_FSGEOMETRY_V1_32 \ 202 202 _IOR('X', 100, struct compat_xfs_fsop_geom_v1) 203 203 204 - typedef struct compat_xfs_inogrp { 204 + struct compat_xfs_inogrp { 205 205 __u64 xi_startino; /* starting inode number */ 206 206 __s32 xi_alloccount; /* # bits set in allocmask */ 207 207 __u64 xi_allocmask; /* mask of allocated inodes */ 208 - } __attribute__((packed)) compat_xfs_inogrp_t; 208 + } __attribute__((packed)); 209 209 210 210 /* These growfs input structures have padding on the end, so must translate */ 211 211 typedef struct compat_xfs_growfs_data {
+1 -4
fs/xfs/xfs_iomap.c
··· 4 4 * Copyright (c) 2016-2018 Christoph Hellwig. 5 5 * All Rights Reserved. 6 6 */ 7 - #include <linux/iomap.h> 8 7 #include "xfs.h" 9 8 #include "xfs_fs.h" 10 9 #include "xfs_shared.h" ··· 11 12 #include "xfs_log_format.h" 12 13 #include "xfs_trans_resv.h" 13 14 #include "xfs_mount.h" 14 - #include "xfs_defer.h" 15 15 #include "xfs_inode.h" 16 16 #include "xfs_btree.h" 17 17 #include "xfs_bmap_btree.h" ··· 23 25 #include "xfs_inode_item.h" 24 26 #include "xfs_iomap.h" 25 27 #include "xfs_trace.h" 26 - #include "xfs_icache.h" 27 28 #include "xfs_quota.h" 28 29 #include "xfs_dquot_item.h" 29 30 #include "xfs_dquot.h" ··· 776 779 * complete here and might deadlock on the iolock. 777 780 */ 778 781 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 779 - XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp); 782 + XFS_TRANS_RESERVE, &tp); 780 783 if (error) 781 784 return error; 782 785
-10
fs/xfs/xfs_iops.c
··· 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 - #include "xfs_da_format.h" 14 13 #include "xfs_inode.h" 15 - #include "xfs_bmap.h" 16 - #include "xfs_bmap_util.h" 17 14 #include "xfs_acl.h" 18 15 #include "xfs_quota.h" 19 - #include "xfs_error.h" 20 16 #include "xfs_attr.h" 21 17 #include "xfs_trans.h" 22 18 #include "xfs_trace.h" 23 19 #include "xfs_icache.h" 24 20 #include "xfs_symlink.h" 25 - #include "xfs_da_btree.h" 26 21 #include "xfs_dir2.h" 27 - #include "xfs_trans_space.h" 28 22 #include "xfs_iomap.h" 29 - #include "xfs_defer.h" 30 23 31 - #include <linux/capability.h> 32 24 #include <linux/xattr.h> 33 25 #include <linux/posix_acl.h> 34 26 #include <linux/security.h> 35 - #include <linux/iomap.h> 36 - #include <linux/slab.h> 37 27 #include <linux/iversion.h> 38 28 39 29 /*
+255 -500
fs/xfs/xfs_itable.c
··· 14 14 #include "xfs_btree.h" 15 15 #include "xfs_ialloc.h" 16 16 #include "xfs_ialloc_btree.h" 17 + #include "xfs_iwalk.h" 17 18 #include "xfs_itable.h" 18 19 #include "xfs_error.h" 19 - #include "xfs_trace.h" 20 20 #include "xfs_icache.h" 21 21 #include "xfs_health.h" 22 22 23 23 /* 24 - * Return stat information for one inode. 25 - * Return 0 if ok, else errno. 24 + * Bulk Stat 25 + * ========= 26 + * 27 + * Use the inode walking functions to fill out struct xfs_bulkstat for every 28 + * allocated inode, then pass the stat information to some externally provided 29 + * iteration function. 26 30 */ 27 - int 31 + 32 + struct xfs_bstat_chunk { 33 + bulkstat_one_fmt_pf formatter; 34 + struct xfs_ibulk *breq; 35 + struct xfs_bulkstat *buf; 36 + }; 37 + 38 + /* 39 + * Fill out the bulkstat info for a single inode and report it somewhere. 40 + * 41 + * bc->breq->lastino is effectively the inode cursor as we walk through the 42 + * filesystem. Therefore, we update it any time we need to move the cursor 43 + * forward, regardless of whether or not we're sending any bstat information 44 + * back to userspace. If the inode is internal metadata or, has been freed 45 + * out from under us, we just simply keep going. 46 + * 47 + * However, if any other type of error happens we want to stop right where we 48 + * are so that userspace will call back with exact number of the bad inode and 49 + * we can send back an error code. 50 + * 51 + * Note that if the formatter tells us there's no space left in the buffer we 52 + * move the cursor forward and abort the walk. 53 + */ 54 + STATIC int 28 55 xfs_bulkstat_one_int( 29 - struct xfs_mount *mp, /* mount point for filesystem */ 30 - xfs_ino_t ino, /* inode to get data for */ 31 - void __user *buffer, /* buffer to place output in */ 32 - int ubsize, /* size of buffer */ 33 - bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ 34 - int *ubused, /* bytes used by me */ 35 - int *stat) /* BULKSTAT_RV_... */ 56 + struct xfs_mount *mp, 57 + struct xfs_trans *tp, 58 + xfs_ino_t ino, 59 + struct xfs_bstat_chunk *bc) 36 60 { 37 61 struct xfs_icdinode *dic; /* dinode core info pointer */ 38 62 struct xfs_inode *ip; /* incore inode pointer */ 39 63 struct inode *inode; 40 - struct xfs_bstat *buf; /* return buffer */ 41 - int error = 0; /* error value */ 64 + struct xfs_bulkstat *buf = bc->buf; 65 + int error = -EINVAL; 42 66 43 - *stat = BULKSTAT_RV_NOTHING; 67 + if (xfs_internal_inum(mp, ino)) 68 + goto out_advance; 44 69 45 - if (!buffer || xfs_internal_inum(mp, ino)) 46 - return -EINVAL; 47 - 48 - buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); 49 - if (!buf) 50 - return -ENOMEM; 51 - 52 - error = xfs_iget(mp, NULL, ino, 70 + error = xfs_iget(mp, tp, ino, 53 71 (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), 54 72 XFS_ILOCK_SHARED, &ip); 73 + if (error == -ENOENT || error == -EINVAL) 74 + goto out_advance; 55 75 if (error) 56 - goto out_free; 76 + goto out; 57 77 58 78 ASSERT(ip != NULL); 59 79 ASSERT(ip->i_imap.im_blkno != 0); ··· 84 64 /* xfs_iget returns the following without needing 85 65 * further change. 86 66 */ 87 - buf->bs_projid_lo = dic->di_projid_lo; 88 - buf->bs_projid_hi = dic->di_projid_hi; 67 + buf->bs_projectid = xfs_get_projid(ip); 89 68 buf->bs_ino = ino; 90 69 buf->bs_uid = dic->di_uid; 91 70 buf->bs_gid = dic->di_gid; 92 71 buf->bs_size = dic->di_size; 93 72 94 73 buf->bs_nlink = inode->i_nlink; 95 - buf->bs_atime.tv_sec = inode->i_atime.tv_sec; 96 - buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec; 97 - buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec; 98 - buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec; 99 - buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec; 100 - buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec; 74 + buf->bs_atime = inode->i_atime.tv_sec; 75 + buf->bs_atime_nsec = inode->i_atime.tv_nsec; 76 + buf->bs_mtime = inode->i_mtime.tv_sec; 77 + buf->bs_mtime_nsec = inode->i_mtime.tv_nsec; 78 + buf->bs_ctime = inode->i_ctime.tv_sec; 79 + buf->bs_ctime_nsec = inode->i_ctime.tv_nsec; 80 + buf->bs_btime = dic->di_crtime.t_sec; 81 + buf->bs_btime_nsec = dic->di_crtime.t_nsec; 101 82 buf->bs_gen = inode->i_generation; 102 83 buf->bs_mode = inode->i_mode; 103 84 104 85 buf->bs_xflags = xfs_ip2xflags(ip); 105 - buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; 86 + buf->bs_extsize_blks = dic->di_extsize; 106 87 buf->bs_extents = dic->di_nextents; 107 - memset(buf->bs_pad, 0, sizeof(buf->bs_pad)); 108 88 xfs_bulkstat_health(ip, buf); 109 - buf->bs_dmevmask = dic->di_dmevmask; 110 - buf->bs_dmstate = dic->di_dmstate; 111 89 buf->bs_aextents = dic->di_anextents; 112 90 buf->bs_forkoff = XFS_IFORK_BOFF(ip); 91 + buf->bs_version = XFS_BULKSTAT_VERSION_V5; 113 92 114 93 if (dic->di_version == 3) { 115 94 if (dic->di_flags2 & XFS_DIFLAG2_COWEXTSIZE) 116 - buf->bs_cowextsize = dic->di_cowextsize << 117 - mp->m_sb.sb_blocklog; 95 + buf->bs_cowextsize_blks = dic->di_cowextsize; 118 96 } 119 97 120 98 switch (dic->di_format) { ··· 136 118 xfs_iunlock(ip, XFS_ILOCK_SHARED); 137 119 xfs_irele(ip); 138 120 139 - error = formatter(buffer, ubsize, ubused, buf); 140 - if (!error) 141 - *stat = BULKSTAT_RV_DIDONE; 121 + error = bc->formatter(bc->breq, buf); 122 + if (error == XFS_IBULK_ABORT) 123 + goto out_advance; 124 + if (error) 125 + goto out; 142 126 143 - out_free: 144 - kmem_free(buf); 127 + out_advance: 128 + /* 129 + * Advance the cursor to the inode that comes after the one we just 130 + * looked at. We want the caller to move along if the bulkstat 131 + * information was copied successfully; if we tried to grab the inode 132 + * but it's no longer allocated; or if it's internal metadata. 133 + */ 134 + bc->breq->startino = ino + 1; 135 + out: 145 136 return error; 146 137 } 147 138 148 - /* Return 0 on success or positive error */ 149 - STATIC int 150 - xfs_bulkstat_one_fmt( 151 - void __user *ubuffer, 152 - int ubsize, 153 - int *ubused, 154 - const xfs_bstat_t *buffer) 155 - { 156 - if (ubsize < sizeof(*buffer)) 157 - return -ENOMEM; 158 - if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) 159 - return -EFAULT; 160 - if (ubused) 161 - *ubused = sizeof(*buffer); 162 - return 0; 163 - } 164 - 139 + /* Bulkstat a single inode. */ 165 140 int 166 141 xfs_bulkstat_one( 167 - xfs_mount_t *mp, /* mount point for filesystem */ 168 - xfs_ino_t ino, /* inode number to get data for */ 169 - void __user *buffer, /* buffer to place output in */ 170 - int ubsize, /* size of buffer */ 171 - int *ubused, /* bytes used by me */ 172 - int *stat) /* BULKSTAT_RV_... */ 142 + struct xfs_ibulk *breq, 143 + bulkstat_one_fmt_pf formatter) 173 144 { 174 - return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, 175 - xfs_bulkstat_one_fmt, ubused, stat); 176 - } 145 + struct xfs_bstat_chunk bc = { 146 + .formatter = formatter, 147 + .breq = breq, 148 + }; 149 + int error; 177 150 178 - /* 179 - * Loop over all clusters in a chunk for a given incore inode allocation btree 180 - * record. Do a readahead if there are any allocated inodes in that cluster. 181 - */ 182 - STATIC void 183 - xfs_bulkstat_ichunk_ra( 184 - struct xfs_mount *mp, 185 - xfs_agnumber_t agno, 186 - struct xfs_inobt_rec_incore *irec) 187 - { 188 - xfs_agblock_t agbno; 189 - struct blk_plug plug; 190 - int i; /* inode chunk index */ 151 + ASSERT(breq->icount == 1); 191 152 192 - agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino); 153 + bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat), 154 + KM_SLEEP | KM_MAYFAIL); 155 + if (!bc.buf) 156 + return -ENOMEM; 193 157 194 - blk_start_plug(&plug); 195 - for (i = 0; i < XFS_INODES_PER_CHUNK; 196 - i += mp->m_inodes_per_cluster, agbno += mp->m_blocks_per_cluster) { 197 - if (xfs_inobt_maskn(i, mp->m_inodes_per_cluster) & 198 - ~irec->ir_free) { 199 - xfs_btree_reada_bufs(mp, agno, agbno, 200 - mp->m_blocks_per_cluster, 201 - &xfs_inode_buf_ops); 202 - } 203 - } 204 - blk_finish_plug(&plug); 205 - } 158 + error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc); 206 159 207 - /* 208 - * Lookup the inode chunk that the given inode lives in and then get the record 209 - * if we found the chunk. If the inode was not the last in the chunk and there 210 - * are some left allocated, update the data for the pointed-to record as well as 211 - * return the count of grabbed inodes. 212 - */ 213 - STATIC int 214 - xfs_bulkstat_grab_ichunk( 215 - struct xfs_btree_cur *cur, /* btree cursor */ 216 - xfs_agino_t agino, /* starting inode of chunk */ 217 - int *icount,/* return # of inodes grabbed */ 218 - struct xfs_inobt_rec_incore *irec) /* btree record */ 219 - { 220 - int idx; /* index into inode chunk */ 221 - int stat; 222 - int error = 0; 223 - 224 - /* Lookup the inode chunk that this inode lives in */ 225 - error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &stat); 226 - if (error) 227 - return error; 228 - if (!stat) { 229 - *icount = 0; 230 - return error; 231 - } 232 - 233 - /* Get the record, should always work */ 234 - error = xfs_inobt_get_rec(cur, irec, &stat); 235 - if (error) 236 - return error; 237 - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 1); 238 - 239 - /* Check if the record contains the inode in request */ 240 - if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) { 241 - *icount = 0; 242 - return 0; 243 - } 244 - 245 - idx = agino - irec->ir_startino + 1; 246 - if (idx < XFS_INODES_PER_CHUNK && 247 - (xfs_inobt_maskn(idx, XFS_INODES_PER_CHUNK - idx) & ~irec->ir_free)) { 248 - int i; 249 - 250 - /* We got a right chunk with some left inodes allocated at it. 251 - * Grab the chunk record. Mark all the uninteresting inodes 252 - * free -- because they're before our start point. 253 - */ 254 - for (i = 0; i < idx; i++) { 255 - if (XFS_INOBT_MASK(i) & ~irec->ir_free) 256 - irec->ir_freecount++; 257 - } 258 - 259 - irec->ir_free |= xfs_inobt_maskn(0, idx); 260 - *icount = irec->ir_count - irec->ir_freecount; 261 - } 262 - 263 - return 0; 264 - } 265 - 266 - #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) 267 - 268 - struct xfs_bulkstat_agichunk { 269 - char __user **ac_ubuffer;/* pointer into user's buffer */ 270 - int ac_ubleft; /* bytes left in user's buffer */ 271 - int ac_ubelem; /* spaces used in user's buffer */ 272 - }; 273 - 274 - /* 275 - * Process inodes in chunk with a pointer to a formatter function 276 - * that will iget the inode and fill in the appropriate structure. 277 - */ 278 - static int 279 - xfs_bulkstat_ag_ichunk( 280 - struct xfs_mount *mp, 281 - xfs_agnumber_t agno, 282 - struct xfs_inobt_rec_incore *irbp, 283 - bulkstat_one_pf formatter, 284 - size_t statstruct_size, 285 - struct xfs_bulkstat_agichunk *acp, 286 - xfs_agino_t *last_agino) 287 - { 288 - char __user **ubufp = acp->ac_ubuffer; 289 - int chunkidx; 290 - int error = 0; 291 - xfs_agino_t agino = irbp->ir_startino; 292 - 293 - for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK; 294 - chunkidx++, agino++) { 295 - int fmterror; 296 - int ubused; 297 - 298 - /* inode won't fit in buffer, we are done */ 299 - if (acp->ac_ubleft < statstruct_size) 300 - break; 301 - 302 - /* Skip if this inode is free */ 303 - if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) 304 - continue; 305 - 306 - /* Get the inode and fill in a single buffer */ 307 - ubused = statstruct_size; 308 - error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino), 309 - *ubufp, acp->ac_ubleft, &ubused, &fmterror); 310 - 311 - if (fmterror == BULKSTAT_RV_GIVEUP || 312 - (error && error != -ENOENT && error != -EINVAL)) { 313 - acp->ac_ubleft = 0; 314 - ASSERT(error); 315 - break; 316 - } 317 - 318 - /* be careful not to leak error if at end of chunk */ 319 - if (fmterror == BULKSTAT_RV_NOTHING || error) { 320 - error = 0; 321 - continue; 322 - } 323 - 324 - *ubufp += ubused; 325 - acp->ac_ubleft -= ubused; 326 - acp->ac_ubelem++; 327 - } 160 + kmem_free(bc.buf); 328 161 329 162 /* 330 - * Post-update *last_agino. At this point, agino will always point one 331 - * inode past the last inode we processed successfully. Hence we 332 - * substract that inode when setting the *last_agino cursor so that we 333 - * return the correct cookie to userspace. On the next bulkstat call, 334 - * the inode under the lastino cookie will be skipped as we have already 335 - * processed it here. 163 + * If we reported one inode to userspace then we abort because we hit 164 + * the end of the buffer. Don't leak that back to userspace. 336 165 */ 337 - *last_agino = agino - 1; 166 + if (error == XFS_IWALK_ABORT) 167 + error = 0; 338 168 339 169 return error; 340 170 } 341 171 342 - /* 343 - * Return stat information in bulk (by-inode) for the filesystem. 344 - */ 345 - int /* error status */ 346 - xfs_bulkstat( 347 - xfs_mount_t *mp, /* mount point for filesystem */ 348 - xfs_ino_t *lastinop, /* last inode returned */ 349 - int *ubcountp, /* size of buffer/count returned */ 350 - bulkstat_one_pf formatter, /* func that'd fill a single buf */ 351 - size_t statstruct_size, /* sizeof struct filling */ 352 - char __user *ubuffer, /* buffer with inode stats */ 353 - int *done) /* 1 if there are more stats to get */ 172 + static int 173 + xfs_bulkstat_iwalk( 174 + struct xfs_mount *mp, 175 + struct xfs_trans *tp, 176 + xfs_ino_t ino, 177 + void *data) 354 178 { 355 - xfs_buf_t *agbp; /* agi header buffer */ 356 - xfs_agino_t agino; /* inode # in allocation group */ 357 - xfs_agnumber_t agno; /* allocation group number */ 358 - xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ 359 - xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ 360 - int nirbuf; /* size of irbuf */ 361 - int ubcount; /* size of user's buffer */ 362 - struct xfs_bulkstat_agichunk ac; 363 - int error = 0; 179 + int error; 364 180 365 - /* 366 - * Get the last inode value, see if there's nothing to do. 367 - */ 368 - agno = XFS_INO_TO_AGNO(mp, *lastinop); 369 - agino = XFS_INO_TO_AGINO(mp, *lastinop); 370 - if (agno >= mp->m_sb.sb_agcount || 371 - *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) { 372 - *done = 1; 373 - *ubcountp = 0; 181 + error = xfs_bulkstat_one_int(mp, tp, ino, data); 182 + /* bulkstat just skips over missing inodes */ 183 + if (error == -ENOENT || error == -EINVAL) 374 184 return 0; 375 - } 185 + return error; 186 + } 376 187 377 - ubcount = *ubcountp; /* statstruct's */ 378 - ac.ac_ubuffer = &ubuffer; 379 - ac.ac_ubleft = ubcount * statstruct_size; /* bytes */; 380 - ac.ac_ubelem = 0; 188 + /* 189 + * Check the incoming lastino parameter. 190 + * 191 + * We allow any inode value that could map to physical space inside the 192 + * filesystem because if there are no inodes there, bulkstat moves on to the 193 + * next chunk. In other words, the magic agino value of zero takes us to the 194 + * first chunk in the AG, and an agino value past the end of the AG takes us to 195 + * the first chunk in the next AG. 196 + * 197 + * Therefore we can end early if the requested inode is beyond the end of the 198 + * filesystem or doesn't map properly. 199 + */ 200 + static inline bool 201 + xfs_bulkstat_already_done( 202 + struct xfs_mount *mp, 203 + xfs_ino_t startino) 204 + { 205 + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); 206 + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, startino); 381 207 382 - *ubcountp = 0; 383 - *done = 0; 208 + return agno >= mp->m_sb.sb_agcount || 209 + startino != XFS_AGINO_TO_INO(mp, agno, agino); 210 + } 384 211 385 - irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP); 386 - if (!irbuf) 212 + /* Return stat information in bulk (by-inode) for the filesystem. */ 213 + int 214 + xfs_bulkstat( 215 + struct xfs_ibulk *breq, 216 + bulkstat_one_fmt_pf formatter) 217 + { 218 + struct xfs_bstat_chunk bc = { 219 + .formatter = formatter, 220 + .breq = breq, 221 + }; 222 + int error; 223 + 224 + if (xfs_bulkstat_already_done(breq->mp, breq->startino)) 225 + return 0; 226 + 227 + bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat), 228 + KM_SLEEP | KM_MAYFAIL); 229 + if (!bc.buf) 387 230 return -ENOMEM; 388 - nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf); 389 231 390 - /* 391 - * Loop over the allocation groups, starting from the last 392 - * inode returned; 0 means start of the allocation group. 393 - */ 394 - while (agno < mp->m_sb.sb_agcount) { 395 - struct xfs_inobt_rec_incore *irbp = irbuf; 396 - struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf; 397 - bool end_of_ag = false; 398 - int icount = 0; 399 - int stat; 232 + error = xfs_iwalk(breq->mp, NULL, breq->startino, breq->flags, 233 + xfs_bulkstat_iwalk, breq->icount, &bc); 400 234 401 - error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 402 - if (error) 403 - break; 404 - /* 405 - * Allocate and initialize a btree cursor for ialloc btree. 406 - */ 407 - cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, 408 - XFS_BTNUM_INO); 409 - if (agino > 0) { 410 - /* 411 - * In the middle of an allocation group, we need to get 412 - * the remainder of the chunk we're in. 413 - */ 414 - struct xfs_inobt_rec_incore r; 415 - 416 - error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r); 417 - if (error) 418 - goto del_cursor; 419 - if (icount) { 420 - irbp->ir_startino = r.ir_startino; 421 - irbp->ir_holemask = r.ir_holemask; 422 - irbp->ir_count = r.ir_count; 423 - irbp->ir_freecount = r.ir_freecount; 424 - irbp->ir_free = r.ir_free; 425 - irbp++; 426 - } 427 - /* Increment to the next record */ 428 - error = xfs_btree_increment(cur, 0, &stat); 429 - } else { 430 - /* Start of ag. Lookup the first inode chunk */ 431 - error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat); 432 - } 433 - if (error || stat == 0) { 434 - end_of_ag = true; 435 - goto del_cursor; 436 - } 437 - 438 - /* 439 - * Loop through inode btree records in this ag, 440 - * until we run out of inodes or space in the buffer. 441 - */ 442 - while (irbp < irbufend && icount < ubcount) { 443 - struct xfs_inobt_rec_incore r; 444 - 445 - error = xfs_inobt_get_rec(cur, &r, &stat); 446 - if (error || stat == 0) { 447 - end_of_ag = true; 448 - goto del_cursor; 449 - } 450 - 451 - /* 452 - * If this chunk has any allocated inodes, save it. 453 - * Also start read-ahead now for this chunk. 454 - */ 455 - if (r.ir_freecount < r.ir_count) { 456 - xfs_bulkstat_ichunk_ra(mp, agno, &r); 457 - irbp->ir_startino = r.ir_startino; 458 - irbp->ir_holemask = r.ir_holemask; 459 - irbp->ir_count = r.ir_count; 460 - irbp->ir_freecount = r.ir_freecount; 461 - irbp->ir_free = r.ir_free; 462 - irbp++; 463 - icount += r.ir_count - r.ir_freecount; 464 - } 465 - error = xfs_btree_increment(cur, 0, &stat); 466 - if (error || stat == 0) { 467 - end_of_ag = true; 468 - goto del_cursor; 469 - } 470 - cond_resched(); 471 - } 472 - 473 - /* 474 - * Drop the btree buffers and the agi buffer as we can't hold any 475 - * of the locks these represent when calling iget. If there is a 476 - * pending error, then we are done. 477 - */ 478 - del_cursor: 479 - xfs_btree_del_cursor(cur, error); 480 - xfs_buf_relse(agbp); 481 - if (error) 482 - break; 483 - /* 484 - * Now format all the good inodes into the user's buffer. The 485 - * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer 486 - * for the next loop iteration. 487 - */ 488 - irbufend = irbp; 489 - for (irbp = irbuf; 490 - irbp < irbufend && ac.ac_ubleft >= statstruct_size; 491 - irbp++) { 492 - error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, 493 - formatter, statstruct_size, &ac, 494 - &agino); 495 - if (error) 496 - break; 497 - 498 - cond_resched(); 499 - } 500 - 501 - /* 502 - * If we've run out of space or had a formatting error, we 503 - * are now done 504 - */ 505 - if (ac.ac_ubleft < statstruct_size || error) 506 - break; 507 - 508 - if (end_of_ag) { 509 - agno++; 510 - agino = 0; 511 - } 512 - } 513 - /* 514 - * Done, we're either out of filesystem or space to put the data. 515 - */ 516 - kmem_free(irbuf); 517 - *ubcountp = ac.ac_ubelem; 235 + kmem_free(bc.buf); 518 236 519 237 /* 520 238 * We found some inodes, so clear the error status and return them. ··· 259 505 * triggered again and propagated to userspace as there will be no 260 506 * formatted inodes in the buffer. 261 507 */ 262 - if (ac.ac_ubelem) 508 + if (breq->ocount > 0) 263 509 error = 0; 264 - 265 - /* 266 - * If we ran out of filesystem, lastino will point off the end of 267 - * the filesystem so the next call will return immediately. 268 - */ 269 - *lastinop = XFS_AGINO_TO_INO(mp, agno, agino); 270 - if (agno >= mp->m_sb.sb_agcount) 271 - *done = 1; 272 510 273 511 return error; 274 512 } 275 513 276 - int 277 - xfs_inumbers_fmt( 278 - void __user *ubuffer, /* buffer to write to */ 279 - const struct xfs_inogrp *buffer, /* buffer to read from */ 280 - long count, /* # of elements to read */ 281 - long *written) /* # of bytes written */ 514 + /* Convert bulkstat (v5) to bstat (v1). */ 515 + void 516 + xfs_bulkstat_to_bstat( 517 + struct xfs_mount *mp, 518 + struct xfs_bstat *bs1, 519 + const struct xfs_bulkstat *bstat) 282 520 { 283 - if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer))) 284 - return -EFAULT; 285 - *written = count * sizeof(*buffer); 286 - return 0; 521 + memset(bs1, 0, sizeof(struct xfs_bstat)); 522 + bs1->bs_ino = bstat->bs_ino; 523 + bs1->bs_mode = bstat->bs_mode; 524 + bs1->bs_nlink = bstat->bs_nlink; 525 + bs1->bs_uid = bstat->bs_uid; 526 + bs1->bs_gid = bstat->bs_gid; 527 + bs1->bs_rdev = bstat->bs_rdev; 528 + bs1->bs_blksize = bstat->bs_blksize; 529 + bs1->bs_size = bstat->bs_size; 530 + bs1->bs_atime.tv_sec = bstat->bs_atime; 531 + bs1->bs_mtime.tv_sec = bstat->bs_mtime; 532 + bs1->bs_ctime.tv_sec = bstat->bs_ctime; 533 + bs1->bs_atime.tv_nsec = bstat->bs_atime_nsec; 534 + bs1->bs_mtime.tv_nsec = bstat->bs_mtime_nsec; 535 + bs1->bs_ctime.tv_nsec = bstat->bs_ctime_nsec; 536 + bs1->bs_blocks = bstat->bs_blocks; 537 + bs1->bs_xflags = bstat->bs_xflags; 538 + bs1->bs_extsize = XFS_FSB_TO_B(mp, bstat->bs_extsize_blks); 539 + bs1->bs_extents = bstat->bs_extents; 540 + bs1->bs_gen = bstat->bs_gen; 541 + bs1->bs_projid_lo = bstat->bs_projectid & 0xFFFF; 542 + bs1->bs_forkoff = bstat->bs_forkoff; 543 + bs1->bs_projid_hi = bstat->bs_projectid >> 16; 544 + bs1->bs_sick = bstat->bs_sick; 545 + bs1->bs_checked = bstat->bs_checked; 546 + bs1->bs_cowextsize = XFS_FSB_TO_B(mp, bstat->bs_cowextsize_blks); 547 + bs1->bs_dmevmask = 0; 548 + bs1->bs_dmstate = 0; 549 + bs1->bs_aextents = bstat->bs_aextents; 550 + } 551 + 552 + struct xfs_inumbers_chunk { 553 + inumbers_fmt_pf formatter; 554 + struct xfs_ibulk *breq; 555 + }; 556 + 557 + /* 558 + * INUMBERS 559 + * ======== 560 + * This is how we export inode btree records to userspace, so that XFS tools 561 + * can figure out where inodes are allocated. 562 + */ 563 + 564 + /* 565 + * Format the inode group structure and report it somewhere. 566 + * 567 + * Similar to xfs_bulkstat_one_int, lastino is the inode cursor as we walk 568 + * through the filesystem so we move it forward unless there was a runtime 569 + * error. If the formatter tells us the buffer is now full we also move the 570 + * cursor forward and abort the walk. 571 + */ 572 + STATIC int 573 + xfs_inumbers_walk( 574 + struct xfs_mount *mp, 575 + struct xfs_trans *tp, 576 + xfs_agnumber_t agno, 577 + const struct xfs_inobt_rec_incore *irec, 578 + void *data) 579 + { 580 + struct xfs_inumbers inogrp = { 581 + .xi_startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino), 582 + .xi_alloccount = irec->ir_count - irec->ir_freecount, 583 + .xi_allocmask = ~irec->ir_free, 584 + .xi_version = XFS_INUMBERS_VERSION_V5, 585 + }; 586 + struct xfs_inumbers_chunk *ic = data; 587 + int error; 588 + 589 + error = ic->formatter(ic->breq, &inogrp); 590 + if (error && error != XFS_IBULK_ABORT) 591 + return error; 592 + 593 + ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) + 594 + XFS_INODES_PER_CHUNK; 595 + return error; 287 596 } 288 597 289 598 /* 290 599 * Return inode number table for the filesystem. 291 600 */ 292 - int /* error status */ 601 + int 293 602 xfs_inumbers( 294 - struct xfs_mount *mp,/* mount point for filesystem */ 295 - xfs_ino_t *lastino,/* last inode returned */ 296 - int *count,/* size of buffer/count returned */ 297 - void __user *ubuffer,/* buffer with inode descriptions */ 603 + struct xfs_ibulk *breq, 298 604 inumbers_fmt_pf formatter) 299 605 { 300 - xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, *lastino); 301 - xfs_agino_t agino = XFS_INO_TO_AGINO(mp, *lastino); 302 - struct xfs_btree_cur *cur = NULL; 303 - struct xfs_buf *agbp = NULL; 304 - struct xfs_inogrp *buffer; 305 - int bcount; 306 - int left = *count; 307 - int bufidx = 0; 606 + struct xfs_inumbers_chunk ic = { 607 + .formatter = formatter, 608 + .breq = breq, 609 + }; 308 610 int error = 0; 309 611 310 - *count = 0; 311 - if (agno >= mp->m_sb.sb_agcount || 312 - *lastino != XFS_AGINO_TO_INO(mp, agno, agino)) 313 - return error; 612 + if (xfs_bulkstat_already_done(breq->mp, breq->startino)) 613 + return 0; 314 614 315 - bcount = min(left, (int)(PAGE_SIZE / sizeof(*buffer))); 316 - buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP); 317 - do { 318 - struct xfs_inobt_rec_incore r; 319 - int stat; 615 + error = xfs_inobt_walk(breq->mp, NULL, breq->startino, breq->flags, 616 + xfs_inumbers_walk, breq->icount, &ic); 320 617 321 - if (!agbp) { 322 - error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 323 - if (error) 324 - break; 325 - 326 - cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, 327 - XFS_BTNUM_INO); 328 - error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, 329 - &stat); 330 - if (error) 331 - break; 332 - if (!stat) 333 - goto next_ag; 334 - } 335 - 336 - error = xfs_inobt_get_rec(cur, &r, &stat); 337 - if (error) 338 - break; 339 - if (!stat) 340 - goto next_ag; 341 - 342 - agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1; 343 - buffer[bufidx].xi_startino = 344 - XFS_AGINO_TO_INO(mp, agno, r.ir_startino); 345 - buffer[bufidx].xi_alloccount = r.ir_count - r.ir_freecount; 346 - buffer[bufidx].xi_allocmask = ~r.ir_free; 347 - if (++bufidx == bcount) { 348 - long written; 349 - 350 - error = formatter(ubuffer, buffer, bufidx, &written); 351 - if (error) 352 - break; 353 - ubuffer += written; 354 - *count += bufidx; 355 - bufidx = 0; 356 - } 357 - if (!--left) 358 - break; 359 - 360 - error = xfs_btree_increment(cur, 0, &stat); 361 - if (error) 362 - break; 363 - if (stat) 364 - continue; 365 - 366 - next_ag: 367 - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 368 - cur = NULL; 369 - xfs_buf_relse(agbp); 370 - agbp = NULL; 371 - agino = 0; 372 - agno++; 373 - } while (agno < mp->m_sb.sb_agcount); 374 - 375 - if (!error) { 376 - if (bufidx) { 377 - long written; 378 - 379 - error = formatter(ubuffer, buffer, bufidx, &written); 380 - if (!error) 381 - *count += bufidx; 382 - } 383 - *lastino = XFS_AGINO_TO_INO(mp, agno, agino); 384 - } 385 - 386 - kmem_free(buffer); 387 - if (cur) 388 - xfs_btree_del_cursor(cur, error); 389 - if (agbp) 390 - xfs_buf_relse(agbp); 618 + /* 619 + * We found some inode groups, so clear the error status and return 620 + * them. The lastino pointer will point directly at the inode that 621 + * triggered any error that occurred, so on the next call the error 622 + * will be triggered again and propagated to userspace as there will be 623 + * no formatted inode groups in the buffer. 624 + */ 625 + if (breq->ocount > 0) 626 + error = 0; 391 627 392 628 return error; 629 + } 630 + 631 + /* Convert an inumbers (v5) struct to a inogrp (v1) struct. */ 632 + void 633 + xfs_inumbers_to_inogrp( 634 + struct xfs_inogrp *ig1, 635 + const struct xfs_inumbers *ig) 636 + { 637 + ig1->xi_startino = ig->xi_startino; 638 + ig1->xi_alloccount = ig->xi_alloccount; 639 + ig1->xi_allocmask = ig->xi_allocmask; 393 640 }
+39 -67
fs/xfs/xfs_itable.h
··· 5 5 #ifndef __XFS_ITABLE_H__ 6 6 #define __XFS_ITABLE_H__ 7 7 8 - /* 9 - * xfs_bulkstat() is used to fill in xfs_bstat structures as well as dm_stat 10 - * structures (by the dmi library). This is a pointer to a formatter function 11 - * that will iget the inode and fill in the appropriate structure. 12 - * see xfs_bulkstat_one() and xfs_dm_bulkstat_one() in dmapi_xfs.c 13 - */ 14 - typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, 15 - xfs_ino_t ino, 16 - void __user *buffer, 17 - int ubsize, 18 - int *ubused, 19 - int *stat); 8 + /* In-memory representation of a userspace request for batch inode data. */ 9 + struct xfs_ibulk { 10 + struct xfs_mount *mp; 11 + void __user *ubuffer; /* user output buffer */ 12 + xfs_ino_t startino; /* start with this inode */ 13 + unsigned int icount; /* number of elements in ubuffer */ 14 + unsigned int ocount; /* number of records returned */ 15 + unsigned int flags; /* see XFS_IBULK_FLAG_* */ 16 + }; 17 + 18 + /* Only iterate within the same AG as startino */ 19 + #define XFS_IBULK_SAME_AG (XFS_IWALK_SAME_AG) 20 + 21 + /* Return value that means we want to abort the walk. */ 22 + #define XFS_IBULK_ABORT (XFS_IWALK_ABORT) 20 23 21 24 /* 22 - * Values for stat return value. 25 + * Advance the user buffer pointer by one record of the given size. If the 26 + * buffer is now full, return the appropriate error code. 23 27 */ 24 - #define BULKSTAT_RV_NOTHING 0 25 - #define BULKSTAT_RV_DIDONE 1 26 - #define BULKSTAT_RV_GIVEUP 2 28 + static inline int 29 + xfs_ibulk_advance( 30 + struct xfs_ibulk *breq, 31 + size_t bytes) 32 + { 33 + char __user *b = breq->ubuffer; 34 + 35 + breq->ubuffer = b + bytes; 36 + breq->ocount++; 37 + return breq->ocount == breq->icount ? XFS_IBULK_ABORT : 0; 38 + } 27 39 28 40 /* 29 41 * Return stat information in bulk (by-inode) for the filesystem. 30 42 */ 31 - int /* error status */ 32 - xfs_bulkstat( 33 - xfs_mount_t *mp, /* mount point for filesystem */ 34 - xfs_ino_t *lastino, /* last inode returned */ 35 - int *count, /* size of buffer/count returned */ 36 - bulkstat_one_pf formatter, /* func that'd fill a single buf */ 37 - size_t statstruct_size,/* sizeof struct that we're filling */ 38 - char __user *ubuffer,/* buffer with inode stats */ 39 - int *done); /* 1 if there are more stats to get */ 40 43 41 - typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ 42 - void __user *ubuffer, /* buffer to write to */ 43 - int ubsize, /* remaining user buffer sz */ 44 - int *ubused, /* bytes used by formatter */ 45 - const xfs_bstat_t *buffer); /* buffer to read from */ 44 + typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq, 45 + const struct xfs_bulkstat *bstat); 46 46 47 - int 48 - xfs_bulkstat_one_int( 49 - xfs_mount_t *mp, 50 - xfs_ino_t ino, 51 - void __user *buffer, 52 - int ubsize, 53 - bulkstat_one_fmt_pf formatter, 54 - int *ubused, 55 - int *stat); 47 + int xfs_bulkstat_one(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); 48 + int xfs_bulkstat(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); 49 + void xfs_bulkstat_to_bstat(struct xfs_mount *mp, struct xfs_bstat *bs1, 50 + const struct xfs_bulkstat *bstat); 56 51 57 - int 58 - xfs_bulkstat_one( 59 - xfs_mount_t *mp, 60 - xfs_ino_t ino, 61 - void __user *buffer, 62 - int ubsize, 63 - int *ubused, 64 - int *stat); 52 + typedef int (*inumbers_fmt_pf)(struct xfs_ibulk *breq, 53 + const struct xfs_inumbers *igrp); 65 54 66 - typedef int (*inumbers_fmt_pf)( 67 - void __user *ubuffer, /* buffer to write to */ 68 - const xfs_inogrp_t *buffer, /* buffer to read from */ 69 - long count, /* # of elements to read */ 70 - long *written); /* # of bytes written */ 71 - 72 - int 73 - xfs_inumbers_fmt( 74 - void __user *ubuffer, /* buffer to write to */ 75 - const xfs_inogrp_t *buffer, /* buffer to read from */ 76 - long count, /* # of elements to read */ 77 - long *written); /* # of bytes written */ 78 - 79 - int /* error status */ 80 - xfs_inumbers( 81 - xfs_mount_t *mp, /* mount point for filesystem */ 82 - xfs_ino_t *last, /* last inode returned */ 83 - int *count, /* size of buffer/count returned */ 84 - void __user *buffer, /* buffer with inode info */ 85 - inumbers_fmt_pf formatter); 55 + int xfs_inumbers(struct xfs_ibulk *breq, inumbers_fmt_pf formatter); 56 + void xfs_inumbers_to_inogrp(struct xfs_inogrp *ig1, 57 + const struct xfs_inumbers *ig); 86 58 87 59 #endif /* __XFS_ITABLE_H__ */
+720
fs/xfs/xfs_iwalk.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Copyright (C) 2019 Oracle. All Rights Reserved. 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + */ 6 + #include "xfs.h" 7 + #include "xfs_fs.h" 8 + #include "xfs_shared.h" 9 + #include "xfs_format.h" 10 + #include "xfs_log_format.h" 11 + #include "xfs_trans_resv.h" 12 + #include "xfs_mount.h" 13 + #include "xfs_inode.h" 14 + #include "xfs_btree.h" 15 + #include "xfs_ialloc.h" 16 + #include "xfs_ialloc_btree.h" 17 + #include "xfs_iwalk.h" 18 + #include "xfs_error.h" 19 + #include "xfs_trace.h" 20 + #include "xfs_icache.h" 21 + #include "xfs_health.h" 22 + #include "xfs_trans.h" 23 + #include "xfs_pwork.h" 24 + 25 + /* 26 + * Walking Inodes in the Filesystem 27 + * ================================ 28 + * 29 + * This iterator function walks a subset of filesystem inodes in increasing 30 + * order from @startino until there are no more inodes. For each allocated 31 + * inode it finds, it calls a walk function with the relevant inode number and 32 + * a pointer to caller-provided data. The walk function can return the usual 33 + * negative error code to stop the iteration; 0 to continue the iteration; or 34 + * XFS_IWALK_ABORT to stop the iteration. This return value is returned to the 35 + * caller. 36 + * 37 + * Internally, we allow the walk function to do anything, which means that we 38 + * cannot maintain the inobt cursor or our lock on the AGI buffer. We 39 + * therefore cache the inobt records in kernel memory and only call the walk 40 + * function when our memory buffer is full. @nr_recs is the number of records 41 + * that we've cached, and @sz_recs is the size of our cache. 42 + * 43 + * It is the responsibility of the walk function to ensure it accesses 44 + * allocated inodes, as the inobt records may be stale by the time they are 45 + * acted upon. 46 + */ 47 + 48 + struct xfs_iwalk_ag { 49 + /* parallel work control data; will be null if single threaded */ 50 + struct xfs_pwork pwork; 51 + 52 + struct xfs_mount *mp; 53 + struct xfs_trans *tp; 54 + 55 + /* Where do we start the traversal? */ 56 + xfs_ino_t startino; 57 + 58 + /* Array of inobt records we cache. */ 59 + struct xfs_inobt_rec_incore *recs; 60 + 61 + /* Number of entries allocated for the @recs array. */ 62 + unsigned int sz_recs; 63 + 64 + /* Number of entries in the @recs array that are in use. */ 65 + unsigned int nr_recs; 66 + 67 + /* Inode walk function and data pointer. */ 68 + xfs_iwalk_fn iwalk_fn; 69 + xfs_inobt_walk_fn inobt_walk_fn; 70 + void *data; 71 + 72 + /* 73 + * Make it look like the inodes up to startino are free so that 74 + * bulkstat can start its inode iteration at the correct place without 75 + * needing to special case everywhere. 76 + */ 77 + unsigned int trim_start:1; 78 + 79 + /* Skip empty inobt records? */ 80 + unsigned int skip_empty:1; 81 + }; 82 + 83 + /* 84 + * Loop over all clusters in a chunk for a given incore inode allocation btree 85 + * record. Do a readahead if there are any allocated inodes in that cluster. 86 + */ 87 + STATIC void 88 + xfs_iwalk_ichunk_ra( 89 + struct xfs_mount *mp, 90 + xfs_agnumber_t agno, 91 + struct xfs_inobt_rec_incore *irec) 92 + { 93 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 94 + xfs_agblock_t agbno; 95 + struct blk_plug plug; 96 + int i; /* inode chunk index */ 97 + 98 + agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino); 99 + 100 + blk_start_plug(&plug); 101 + for (i = 0; i < XFS_INODES_PER_CHUNK; i += igeo->inodes_per_cluster) { 102 + xfs_inofree_t imask; 103 + 104 + imask = xfs_inobt_maskn(i, igeo->inodes_per_cluster); 105 + if (imask & ~irec->ir_free) { 106 + xfs_btree_reada_bufs(mp, agno, agbno, 107 + igeo->blocks_per_cluster, 108 + &xfs_inode_buf_ops); 109 + } 110 + agbno += igeo->blocks_per_cluster; 111 + } 112 + blk_finish_plug(&plug); 113 + } 114 + 115 + /* 116 + * Set the bits in @irec's free mask that correspond to the inodes before 117 + * @agino so that we skip them. This is how we restart an inode walk that was 118 + * interrupted in the middle of an inode record. 119 + */ 120 + STATIC void 121 + xfs_iwalk_adjust_start( 122 + xfs_agino_t agino, /* starting inode of chunk */ 123 + struct xfs_inobt_rec_incore *irec) /* btree record */ 124 + { 125 + int idx; /* index into inode chunk */ 126 + int i; 127 + 128 + idx = agino - irec->ir_startino; 129 + 130 + /* 131 + * We got a right chunk with some left inodes allocated at it. Grab 132 + * the chunk record. Mark all the uninteresting inodes free because 133 + * they're before our start point. 134 + */ 135 + for (i = 0; i < idx; i++) { 136 + if (XFS_INOBT_MASK(i) & ~irec->ir_free) 137 + irec->ir_freecount++; 138 + } 139 + 140 + irec->ir_free |= xfs_inobt_maskn(0, idx); 141 + } 142 + 143 + /* Allocate memory for a walk. */ 144 + STATIC int 145 + xfs_iwalk_alloc( 146 + struct xfs_iwalk_ag *iwag) 147 + { 148 + size_t size; 149 + 150 + ASSERT(iwag->recs == NULL); 151 + iwag->nr_recs = 0; 152 + 153 + /* Allocate a prefetch buffer for inobt records. */ 154 + size = iwag->sz_recs * sizeof(struct xfs_inobt_rec_incore); 155 + iwag->recs = kmem_alloc(size, KM_MAYFAIL); 156 + if (iwag->recs == NULL) 157 + return -ENOMEM; 158 + 159 + return 0; 160 + } 161 + 162 + /* Free memory we allocated for a walk. */ 163 + STATIC void 164 + xfs_iwalk_free( 165 + struct xfs_iwalk_ag *iwag) 166 + { 167 + kmem_free(iwag->recs); 168 + iwag->recs = NULL; 169 + } 170 + 171 + /* For each inuse inode in each cached inobt record, call our function. */ 172 + STATIC int 173 + xfs_iwalk_ag_recs( 174 + struct xfs_iwalk_ag *iwag) 175 + { 176 + struct xfs_mount *mp = iwag->mp; 177 + struct xfs_trans *tp = iwag->tp; 178 + xfs_ino_t ino; 179 + unsigned int i, j; 180 + xfs_agnumber_t agno; 181 + int error; 182 + 183 + agno = XFS_INO_TO_AGNO(mp, iwag->startino); 184 + for (i = 0; i < iwag->nr_recs; i++) { 185 + struct xfs_inobt_rec_incore *irec = &iwag->recs[i]; 186 + 187 + trace_xfs_iwalk_ag_rec(mp, agno, irec); 188 + 189 + if (xfs_pwork_want_abort(&iwag->pwork)) 190 + return 0; 191 + 192 + if (iwag->inobt_walk_fn) { 193 + error = iwag->inobt_walk_fn(mp, tp, agno, irec, 194 + iwag->data); 195 + if (error) 196 + return error; 197 + } 198 + 199 + if (!iwag->iwalk_fn) 200 + continue; 201 + 202 + for (j = 0; j < XFS_INODES_PER_CHUNK; j++) { 203 + if (xfs_pwork_want_abort(&iwag->pwork)) 204 + return 0; 205 + 206 + /* Skip if this inode is free */ 207 + if (XFS_INOBT_MASK(j) & irec->ir_free) 208 + continue; 209 + 210 + /* Otherwise call our function. */ 211 + ino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino + j); 212 + error = iwag->iwalk_fn(mp, tp, ino, iwag->data); 213 + if (error) 214 + return error; 215 + } 216 + } 217 + 218 + return 0; 219 + } 220 + 221 + /* Delete cursor and let go of AGI. */ 222 + static inline void 223 + xfs_iwalk_del_inobt( 224 + struct xfs_trans *tp, 225 + struct xfs_btree_cur **curpp, 226 + struct xfs_buf **agi_bpp, 227 + int error) 228 + { 229 + if (*curpp) { 230 + xfs_btree_del_cursor(*curpp, error); 231 + *curpp = NULL; 232 + } 233 + if (*agi_bpp) { 234 + xfs_trans_brelse(tp, *agi_bpp); 235 + *agi_bpp = NULL; 236 + } 237 + } 238 + 239 + /* 240 + * Set ourselves up for walking inobt records starting from a given point in 241 + * the filesystem. 242 + * 243 + * If caller passed in a nonzero start inode number, load the record from the 244 + * inobt and make the record look like all the inodes before agino are free so 245 + * that we skip them, and then move the cursor to the next inobt record. This 246 + * is how we support starting an iwalk in the middle of an inode chunk. 247 + * 248 + * If the caller passed in a start number of zero, move the cursor to the first 249 + * inobt record. 250 + * 251 + * The caller is responsible for cleaning up the cursor and buffer pointer 252 + * regardless of the error status. 253 + */ 254 + STATIC int 255 + xfs_iwalk_ag_start( 256 + struct xfs_iwalk_ag *iwag, 257 + xfs_agnumber_t agno, 258 + xfs_agino_t agino, 259 + struct xfs_btree_cur **curpp, 260 + struct xfs_buf **agi_bpp, 261 + int *has_more) 262 + { 263 + struct xfs_mount *mp = iwag->mp; 264 + struct xfs_trans *tp = iwag->tp; 265 + struct xfs_inobt_rec_incore *irec; 266 + int error; 267 + 268 + /* Set up a fresh cursor and empty the inobt cache. */ 269 + iwag->nr_recs = 0; 270 + error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp); 271 + if (error) 272 + return error; 273 + 274 + /* Starting at the beginning of the AG? That's easy! */ 275 + if (agino == 0) 276 + return xfs_inobt_lookup(*curpp, 0, XFS_LOOKUP_GE, has_more); 277 + 278 + /* 279 + * Otherwise, we have to grab the inobt record where we left off, stuff 280 + * the record into our cache, and then see if there are more records. 281 + * We require a lookup cache of at least two elements so that the 282 + * caller doesn't have to deal with tearing down the cursor to walk the 283 + * records. 284 + */ 285 + error = xfs_inobt_lookup(*curpp, agino, XFS_LOOKUP_LE, has_more); 286 + if (error) 287 + return error; 288 + 289 + /* 290 + * If the LE lookup at @agino yields no records, jump ahead to the 291 + * inobt cursor increment to see if there are more records to process. 292 + */ 293 + if (!*has_more) 294 + goto out_advance; 295 + 296 + /* Get the record, should always work */ 297 + irec = &iwag->recs[iwag->nr_recs]; 298 + error = xfs_inobt_get_rec(*curpp, irec, has_more); 299 + if (error) 300 + return error; 301 + XFS_WANT_CORRUPTED_RETURN(mp, *has_more == 1); 302 + 303 + /* 304 + * If the LE lookup yielded an inobt record before the cursor position, 305 + * skip it and see if there's another one after it. 306 + */ 307 + if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) 308 + goto out_advance; 309 + 310 + /* 311 + * If agino fell in the middle of the inode record, make it look like 312 + * the inodes up to agino are free so that we don't return them again. 313 + */ 314 + if (iwag->trim_start) 315 + xfs_iwalk_adjust_start(agino, irec); 316 + 317 + /* 318 + * The prefetch calculation is supposed to give us a large enough inobt 319 + * record cache that grab_ichunk can stage a partial first record and 320 + * the loop body can cache a record without having to check for cache 321 + * space until after it reads an inobt record. 322 + */ 323 + iwag->nr_recs++; 324 + ASSERT(iwag->nr_recs < iwag->sz_recs); 325 + 326 + out_advance: 327 + return xfs_btree_increment(*curpp, 0, has_more); 328 + } 329 + 330 + /* 331 + * The inobt record cache is full, so preserve the inobt cursor state and 332 + * run callbacks on the cached inobt records. When we're done, restore the 333 + * cursor state to wherever the cursor would have been had the cache not been 334 + * full (and therefore we could've just incremented the cursor) if *@has_more 335 + * is true. On exit, *@has_more will indicate whether or not the caller should 336 + * try for more inode records. 337 + */ 338 + STATIC int 339 + xfs_iwalk_run_callbacks( 340 + struct xfs_iwalk_ag *iwag, 341 + xfs_agnumber_t agno, 342 + struct xfs_btree_cur **curpp, 343 + struct xfs_buf **agi_bpp, 344 + int *has_more) 345 + { 346 + struct xfs_mount *mp = iwag->mp; 347 + struct xfs_trans *tp = iwag->tp; 348 + struct xfs_inobt_rec_incore *irec; 349 + xfs_agino_t restart; 350 + int error; 351 + 352 + ASSERT(iwag->nr_recs > 0); 353 + 354 + /* Delete cursor but remember the last record we cached... */ 355 + xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0); 356 + irec = &iwag->recs[iwag->nr_recs - 1]; 357 + restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1; 358 + 359 + error = xfs_iwalk_ag_recs(iwag); 360 + if (error) 361 + return error; 362 + 363 + /* ...empty the cache... */ 364 + iwag->nr_recs = 0; 365 + 366 + if (!has_more) 367 + return 0; 368 + 369 + /* ...and recreate the cursor just past where we left off. */ 370 + error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp); 371 + if (error) 372 + return error; 373 + 374 + return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more); 375 + } 376 + 377 + /* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */ 378 + STATIC int 379 + xfs_iwalk_ag( 380 + struct xfs_iwalk_ag *iwag) 381 + { 382 + struct xfs_mount *mp = iwag->mp; 383 + struct xfs_trans *tp = iwag->tp; 384 + struct xfs_buf *agi_bp = NULL; 385 + struct xfs_btree_cur *cur = NULL; 386 + xfs_agnumber_t agno; 387 + xfs_agino_t agino; 388 + int has_more; 389 + int error = 0; 390 + 391 + /* Set up our cursor at the right place in the inode btree. */ 392 + agno = XFS_INO_TO_AGNO(mp, iwag->startino); 393 + agino = XFS_INO_TO_AGINO(mp, iwag->startino); 394 + error = xfs_iwalk_ag_start(iwag, agno, agino, &cur, &agi_bp, &has_more); 395 + 396 + while (!error && has_more) { 397 + struct xfs_inobt_rec_incore *irec; 398 + 399 + cond_resched(); 400 + if (xfs_pwork_want_abort(&iwag->pwork)) 401 + goto out; 402 + 403 + /* Fetch the inobt record. */ 404 + irec = &iwag->recs[iwag->nr_recs]; 405 + error = xfs_inobt_get_rec(cur, irec, &has_more); 406 + if (error || !has_more) 407 + break; 408 + 409 + /* No allocated inodes in this chunk; skip it. */ 410 + if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) { 411 + error = xfs_btree_increment(cur, 0, &has_more); 412 + if (error) 413 + break; 414 + continue; 415 + } 416 + 417 + /* 418 + * Start readahead for this inode chunk in anticipation of 419 + * walking the inodes. 420 + */ 421 + if (iwag->iwalk_fn) 422 + xfs_iwalk_ichunk_ra(mp, agno, irec); 423 + 424 + /* 425 + * If there's space in the buffer for more records, increment 426 + * the btree cursor and grab more. 427 + */ 428 + if (++iwag->nr_recs < iwag->sz_recs) { 429 + error = xfs_btree_increment(cur, 0, &has_more); 430 + if (error || !has_more) 431 + break; 432 + continue; 433 + } 434 + 435 + /* 436 + * Otherwise, we need to save cursor state and run the callback 437 + * function on the cached records. The run_callbacks function 438 + * is supposed to return a cursor pointing to the record where 439 + * we would be if we had been able to increment like above. 440 + */ 441 + ASSERT(has_more); 442 + error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp, 443 + &has_more); 444 + } 445 + 446 + if (iwag->nr_recs == 0 || error) 447 + goto out; 448 + 449 + /* Walk the unprocessed records in the cache. */ 450 + error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp, &has_more); 451 + 452 + out: 453 + xfs_iwalk_del_inobt(tp, &cur, &agi_bp, error); 454 + return error; 455 + } 456 + 457 + /* 458 + * We experimentally determined that the reduction in ioctl call overhead 459 + * diminishes when userspace asks for more than 2048 inodes, so we'll cap 460 + * prefetch at this point. 461 + */ 462 + #define IWALK_MAX_INODE_PREFETCH (2048U) 463 + 464 + /* 465 + * Given the number of inodes to prefetch, set the number of inobt records that 466 + * we cache in memory, which controls the number of inodes we try to read 467 + * ahead. Set the maximum if @inodes == 0. 468 + */ 469 + static inline unsigned int 470 + xfs_iwalk_prefetch( 471 + unsigned int inodes) 472 + { 473 + unsigned int inobt_records; 474 + 475 + /* 476 + * If the caller didn't tell us the number of inodes they wanted, 477 + * assume the maximum prefetch possible for best performance. 478 + * Otherwise, cap prefetch at that maximum so that we don't start an 479 + * absurd amount of prefetch. 480 + */ 481 + if (inodes == 0) 482 + inodes = IWALK_MAX_INODE_PREFETCH; 483 + inodes = min(inodes, IWALK_MAX_INODE_PREFETCH); 484 + 485 + /* Round the inode count up to a full chunk. */ 486 + inodes = round_up(inodes, XFS_INODES_PER_CHUNK); 487 + 488 + /* 489 + * In order to convert the number of inodes to prefetch into an 490 + * estimate of the number of inobt records to cache, we require a 491 + * conversion factor that reflects our expectations of the average 492 + * loading factor of an inode chunk. Based on data gathered, most 493 + * (but not all) filesystems manage to keep the inode chunks totally 494 + * full, so we'll underestimate slightly so that our readahead will 495 + * still deliver the performance we want on aging filesystems: 496 + * 497 + * inobt = inodes / (INODES_PER_CHUNK * (4 / 5)); 498 + * 499 + * The funny math is to avoid integer division. 500 + */ 501 + inobt_records = (inodes * 5) / (4 * XFS_INODES_PER_CHUNK); 502 + 503 + /* 504 + * Allocate enough space to prefetch at least two inobt records so that 505 + * we can cache both the record where the iwalk started and the next 506 + * record. This simplifies the AG inode walk loop setup code. 507 + */ 508 + return max(inobt_records, 2U); 509 + } 510 + 511 + /* 512 + * Walk all inodes in the filesystem starting from @startino. The @iwalk_fn 513 + * will be called for each allocated inode, being passed the inode's number and 514 + * @data. @max_prefetch controls how many inobt records' worth of inodes we 515 + * try to readahead. 516 + */ 517 + int 518 + xfs_iwalk( 519 + struct xfs_mount *mp, 520 + struct xfs_trans *tp, 521 + xfs_ino_t startino, 522 + unsigned int flags, 523 + xfs_iwalk_fn iwalk_fn, 524 + unsigned int inode_records, 525 + void *data) 526 + { 527 + struct xfs_iwalk_ag iwag = { 528 + .mp = mp, 529 + .tp = tp, 530 + .iwalk_fn = iwalk_fn, 531 + .data = data, 532 + .startino = startino, 533 + .sz_recs = xfs_iwalk_prefetch(inode_records), 534 + .trim_start = 1, 535 + .skip_empty = 1, 536 + .pwork = XFS_PWORK_SINGLE_THREADED, 537 + }; 538 + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); 539 + int error; 540 + 541 + ASSERT(agno < mp->m_sb.sb_agcount); 542 + ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL)); 543 + 544 + error = xfs_iwalk_alloc(&iwag); 545 + if (error) 546 + return error; 547 + 548 + for (; agno < mp->m_sb.sb_agcount; agno++) { 549 + error = xfs_iwalk_ag(&iwag); 550 + if (error) 551 + break; 552 + iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); 553 + if (flags & XFS_INOBT_WALK_SAME_AG) 554 + break; 555 + } 556 + 557 + xfs_iwalk_free(&iwag); 558 + return error; 559 + } 560 + 561 + /* Run per-thread iwalk work. */ 562 + static int 563 + xfs_iwalk_ag_work( 564 + struct xfs_mount *mp, 565 + struct xfs_pwork *pwork) 566 + { 567 + struct xfs_iwalk_ag *iwag; 568 + int error = 0; 569 + 570 + iwag = container_of(pwork, struct xfs_iwalk_ag, pwork); 571 + if (xfs_pwork_want_abort(pwork)) 572 + goto out; 573 + 574 + error = xfs_iwalk_alloc(iwag); 575 + if (error) 576 + goto out; 577 + 578 + error = xfs_iwalk_ag(iwag); 579 + xfs_iwalk_free(iwag); 580 + out: 581 + kmem_free(iwag); 582 + return error; 583 + } 584 + 585 + /* 586 + * Walk all the inodes in the filesystem using multiple threads to process each 587 + * AG. 588 + */ 589 + int 590 + xfs_iwalk_threaded( 591 + struct xfs_mount *mp, 592 + xfs_ino_t startino, 593 + unsigned int flags, 594 + xfs_iwalk_fn iwalk_fn, 595 + unsigned int inode_records, 596 + bool polled, 597 + void *data) 598 + { 599 + struct xfs_pwork_ctl pctl; 600 + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); 601 + unsigned int nr_threads; 602 + int error; 603 + 604 + ASSERT(agno < mp->m_sb.sb_agcount); 605 + ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL)); 606 + 607 + nr_threads = xfs_pwork_guess_datadev_parallelism(mp); 608 + error = xfs_pwork_init(mp, &pctl, xfs_iwalk_ag_work, "xfs_iwalk", 609 + nr_threads); 610 + if (error) 611 + return error; 612 + 613 + for (; agno < mp->m_sb.sb_agcount; agno++) { 614 + struct xfs_iwalk_ag *iwag; 615 + 616 + if (xfs_pwork_ctl_want_abort(&pctl)) 617 + break; 618 + 619 + iwag = kmem_zalloc(sizeof(struct xfs_iwalk_ag), KM_SLEEP); 620 + iwag->mp = mp; 621 + iwag->iwalk_fn = iwalk_fn; 622 + iwag->data = data; 623 + iwag->startino = startino; 624 + iwag->sz_recs = xfs_iwalk_prefetch(inode_records); 625 + xfs_pwork_queue(&pctl, &iwag->pwork); 626 + startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); 627 + if (flags & XFS_INOBT_WALK_SAME_AG) 628 + break; 629 + } 630 + 631 + if (polled) 632 + xfs_pwork_poll(&pctl); 633 + return xfs_pwork_destroy(&pctl); 634 + } 635 + 636 + /* 637 + * Allow callers to cache up to a page's worth of inobt records. This reflects 638 + * the existing inumbers prefetching behavior. Since the inobt walk does not 639 + * itself do anything with the inobt records, we can set a fairly high limit 640 + * here. 641 + */ 642 + #define MAX_INOBT_WALK_PREFETCH \ 643 + (PAGE_SIZE / sizeof(struct xfs_inobt_rec_incore)) 644 + 645 + /* 646 + * Given the number of records that the user wanted, set the number of inobt 647 + * records that we buffer in memory. Set the maximum if @inobt_records == 0. 648 + */ 649 + static inline unsigned int 650 + xfs_inobt_walk_prefetch( 651 + unsigned int inobt_records) 652 + { 653 + /* 654 + * If the caller didn't tell us the number of inobt records they 655 + * wanted, assume the maximum prefetch possible for best performance. 656 + */ 657 + if (inobt_records == 0) 658 + inobt_records = MAX_INOBT_WALK_PREFETCH; 659 + 660 + /* 661 + * Allocate enough space to prefetch at least two inobt records so that 662 + * we can cache both the record where the iwalk started and the next 663 + * record. This simplifies the AG inode walk loop setup code. 664 + */ 665 + inobt_records = max(inobt_records, 2U); 666 + 667 + /* 668 + * Cap prefetch at that maximum so that we don't use an absurd amount 669 + * of memory. 670 + */ 671 + return min_t(unsigned int, inobt_records, MAX_INOBT_WALK_PREFETCH); 672 + } 673 + 674 + /* 675 + * Walk all inode btree records in the filesystem starting from @startino. The 676 + * @inobt_walk_fn will be called for each btree record, being passed the incore 677 + * record and @data. @max_prefetch controls how many inobt records we try to 678 + * cache ahead of time. 679 + */ 680 + int 681 + xfs_inobt_walk( 682 + struct xfs_mount *mp, 683 + struct xfs_trans *tp, 684 + xfs_ino_t startino, 685 + unsigned int flags, 686 + xfs_inobt_walk_fn inobt_walk_fn, 687 + unsigned int inobt_records, 688 + void *data) 689 + { 690 + struct xfs_iwalk_ag iwag = { 691 + .mp = mp, 692 + .tp = tp, 693 + .inobt_walk_fn = inobt_walk_fn, 694 + .data = data, 695 + .startino = startino, 696 + .sz_recs = xfs_inobt_walk_prefetch(inobt_records), 697 + .pwork = XFS_PWORK_SINGLE_THREADED, 698 + }; 699 + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); 700 + int error; 701 + 702 + ASSERT(agno < mp->m_sb.sb_agcount); 703 + ASSERT(!(flags & ~XFS_INOBT_WALK_FLAGS_ALL)); 704 + 705 + error = xfs_iwalk_alloc(&iwag); 706 + if (error) 707 + return error; 708 + 709 + for (; agno < mp->m_sb.sb_agcount; agno++) { 710 + error = xfs_iwalk_ag(&iwag); 711 + if (error) 712 + break; 713 + iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); 714 + if (flags & XFS_INOBT_WALK_SAME_AG) 715 + break; 716 + } 717 + 718 + xfs_iwalk_free(&iwag); 719 + return error; 720 + }
+46
fs/xfs/xfs_iwalk.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * Copyright (C) 2019 Oracle. All Rights Reserved. 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + */ 6 + #ifndef __XFS_IWALK_H__ 7 + #define __XFS_IWALK_H__ 8 + 9 + /* Walk all inodes in the filesystem starting from @startino. */ 10 + typedef int (*xfs_iwalk_fn)(struct xfs_mount *mp, struct xfs_trans *tp, 11 + xfs_ino_t ino, void *data); 12 + /* Return values for xfs_iwalk_fn. */ 13 + #define XFS_IWALK_CONTINUE (XFS_ITER_CONTINUE) 14 + #define XFS_IWALK_ABORT (XFS_ITER_ABORT) 15 + 16 + int xfs_iwalk(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t startino, 17 + unsigned int flags, xfs_iwalk_fn iwalk_fn, 18 + unsigned int inode_records, void *data); 19 + int xfs_iwalk_threaded(struct xfs_mount *mp, xfs_ino_t startino, 20 + unsigned int flags, xfs_iwalk_fn iwalk_fn, 21 + unsigned int inode_records, bool poll, void *data); 22 + 23 + /* Only iterate inodes within the same AG as @startino. */ 24 + #define XFS_IWALK_SAME_AG (0x1) 25 + 26 + #define XFS_IWALK_FLAGS_ALL (XFS_IWALK_SAME_AG) 27 + 28 + /* Walk all inode btree records in the filesystem starting from @startino. */ 29 + typedef int (*xfs_inobt_walk_fn)(struct xfs_mount *mp, struct xfs_trans *tp, 30 + xfs_agnumber_t agno, 31 + const struct xfs_inobt_rec_incore *irec, 32 + void *data); 33 + /* Return value (for xfs_inobt_walk_fn) that aborts the walk immediately. */ 34 + #define XFS_INOBT_WALK_ABORT (XFS_IWALK_ABORT) 35 + 36 + int xfs_inobt_walk(struct xfs_mount *mp, struct xfs_trans *tp, 37 + xfs_ino_t startino, unsigned int flags, 38 + xfs_inobt_walk_fn inobt_walk_fn, unsigned int inobt_records, 39 + void *data); 40 + 41 + /* Only iterate inobt records within the same AG as @startino. */ 42 + #define XFS_INOBT_WALK_SAME_AG (XFS_IWALK_SAME_AG) 43 + 44 + #define XFS_INOBT_WALK_FLAGS_ALL (XFS_INOBT_WALK_SAME_AG) 45 + 46 + #endif /* __XFS_IWALK_H__ */
+3 -2
fs/xfs/xfs_linux.h
··· 110 110 #define current_restore_flags_nested(sp, f) \ 111 111 (current->flags = ((current->flags & ~(f)) | (*(sp) & (f)))) 112 112 113 - #define spinlock_destroy(lock) 114 - 115 113 #define NBBY 8 /* number of bits per byte */ 116 114 117 115 /* ··· 218 220 do_div(x, y); 219 221 return x; 220 222 } 223 + 224 + int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count, 225 + char *data, unsigned int op); 221 226 222 227 #define ASSERT_ALWAYS(expr) \ 223 228 (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+266 -376
fs/xfs/xfs_log.c
··· 16 16 #include "xfs_trans_priv.h" 17 17 #include "xfs_log.h" 18 18 #include "xfs_log_priv.h" 19 - #include "xfs_log_recover.h" 20 - #include "xfs_inode.h" 21 19 #include "xfs_trace.h" 22 - #include "xfs_fsops.h" 23 - #include "xfs_cksum.h" 24 20 #include "xfs_sysfs.h" 25 21 #include "xfs_sb.h" 26 22 #include "xfs_health.h" ··· 41 45 xlog_space_left( 42 46 struct xlog *log, 43 47 atomic64_t *head); 44 - STATIC int 45 - xlog_sync( 46 - struct xlog *log, 47 - struct xlog_in_core *iclog); 48 48 STATIC void 49 49 xlog_dealloc_log( 50 50 struct xlog *log); 51 51 52 52 /* local state machine functions */ 53 - STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); 54 - STATIC void 55 - xlog_state_do_callback( 56 - struct xlog *log, 57 - int aborted, 58 - struct xlog_in_core *iclog); 53 + STATIC void xlog_state_done_syncing( 54 + struct xlog_in_core *iclog, 55 + bool aborted); 59 56 STATIC int 60 57 xlog_state_get_iclog_space( 61 58 struct xlog *log, ··· 96 107 xlog_verify_iclog( 97 108 struct xlog *log, 98 109 struct xlog_in_core *iclog, 99 - int count, 100 - bool syncing); 110 + int count); 101 111 STATIC void 102 112 xlog_verify_tail_lsn( 103 113 struct xlog *log, ··· 105 117 #else 106 118 #define xlog_verify_dest_ptr(a,b) 107 119 #define xlog_verify_grant_tail(a) 108 - #define xlog_verify_iclog(a,b,c,d) 120 + #define xlog_verify_iclog(a,b,c) 109 121 #define xlog_verify_tail_lsn(a,b,c) 110 122 #endif 111 123 ··· 529 541 return lsn; 530 542 } 531 543 532 - /* 533 - * Attaches a new iclog I/O completion callback routine during 534 - * transaction commit. If the log is in error state, a non-zero 535 - * return code is handed back and the caller is responsible for 536 - * executing the callback at an appropriate time. 537 - */ 538 - int 539 - xfs_log_notify( 540 - struct xlog_in_core *iclog, 541 - xfs_log_callback_t *cb) 542 - { 543 - int abortflg; 544 - 545 - spin_lock(&iclog->ic_callback_lock); 546 - abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); 547 - if (!abortflg) { 548 - ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || 549 - (iclog->ic_state == XLOG_STATE_WANT_SYNC)); 550 - cb->cb_next = NULL; 551 - *(iclog->ic_callback_tail) = cb; 552 - iclog->ic_callback_tail = &(cb->cb_next); 553 - } 554 - spin_unlock(&iclog->ic_callback_lock); 555 - return abortflg; 556 - } 557 - 558 544 int 559 545 xfs_log_release_iclog( 560 546 struct xfs_mount *mp, ··· 769 807 * The mount has failed. Cancel the recovery if it hasn't completed and destroy 770 808 * the log. 771 809 */ 772 - int 810 + void 773 811 xfs_log_mount_cancel( 774 812 struct xfs_mount *mp) 775 813 { 776 - int error; 777 - 778 - error = xlog_recover_cancel(mp->m_log); 814 + xlog_recover_cancel(mp->m_log); 779 815 xfs_log_unmount(mp); 780 - 781 - return error; 782 816 } 783 817 784 818 /* ··· 890 932 * Or, if we are doing a forced umount (typically because of IO errors). 891 933 */ 892 934 if (mp->m_flags & XFS_MOUNT_NORECOVERY || 893 - xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { 935 + xfs_readonly_buftarg(log->l_targ)) { 894 936 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); 895 937 return 0; 896 938 } ··· 1202 1244 } 1203 1245 1204 1246 1205 - /* 1206 - * Log function which is called when an io completes. 1207 - * 1208 - * The log manager needs its own routine, in order to control what 1209 - * happens with the buffer after the write completes. 1210 - */ 1211 1247 static void 1212 - xlog_iodone(xfs_buf_t *bp) 1248 + xlog_ioend_work( 1249 + struct work_struct *work) 1213 1250 { 1214 - struct xlog_in_core *iclog = bp->b_log_item; 1215 - struct xlog *l = iclog->ic_log; 1216 - int aborted = 0; 1251 + struct xlog_in_core *iclog = 1252 + container_of(work, struct xlog_in_core, ic_end_io_work); 1253 + struct xlog *log = iclog->ic_log; 1254 + bool aborted = false; 1255 + int error; 1256 + 1257 + error = blk_status_to_errno(iclog->ic_bio.bi_status); 1258 + #ifdef DEBUG 1259 + /* treat writes with injected CRC errors as failed */ 1260 + if (iclog->ic_fail_crc) 1261 + error = -EIO; 1262 + #endif 1217 1263 1218 1264 /* 1219 - * Race to shutdown the filesystem if we see an error or the iclog is in 1220 - * IOABORT state. The IOABORT state is only set in DEBUG mode to inject 1221 - * CRC errors into log recovery. 1265 + * Race to shutdown the filesystem if we see an error. 1222 1266 */ 1223 - if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR) || 1224 - iclog->ic_state & XLOG_STATE_IOABORT) { 1225 - if (iclog->ic_state & XLOG_STATE_IOABORT) 1226 - iclog->ic_state &= ~XLOG_STATE_IOABORT; 1227 - 1228 - xfs_buf_ioerror_alert(bp, __func__); 1229 - xfs_buf_stale(bp); 1230 - xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR); 1267 + if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) { 1268 + xfs_alert(log->l_mp, "log I/O error %d", error); 1269 + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); 1231 1270 /* 1232 1271 * This flag will be propagated to the trans-committed 1233 1272 * callback routines to let them know that the log-commit 1234 1273 * didn't succeed. 1235 1274 */ 1236 - aborted = XFS_LI_ABORTED; 1275 + aborted = true; 1237 1276 } else if (iclog->ic_state & XLOG_STATE_IOERROR) { 1238 - aborted = XFS_LI_ABORTED; 1277 + aborted = true; 1239 1278 } 1240 1279 1241 - /* log I/O is always issued ASYNC */ 1242 - ASSERT(bp->b_flags & XBF_ASYNC); 1243 1280 xlog_state_done_syncing(iclog, aborted); 1281 + bio_uninit(&iclog->ic_bio); 1244 1282 1245 1283 /* 1246 - * drop the buffer lock now that we are done. Nothing references 1247 - * the buffer after this, so an unmount waiting on this lock can now 1248 - * tear it down safely. As such, it is unsafe to reference the buffer 1249 - * (bp) after the unlock as we could race with it being freed. 1284 + * Drop the lock to signal that we are done. Nothing references the 1285 + * iclog after this, so an unmount waiting on this lock can now tear it 1286 + * down safely. As such, it is unsafe to reference the iclog after the 1287 + * unlock as we could race with it being freed. 1250 1288 */ 1251 - xfs_buf_unlock(bp); 1289 + up(&iclog->ic_sema); 1252 1290 } 1253 1291 1254 1292 /* ··· 1255 1301 * If the filesystem blocksize is too large, we may need to choose a 1256 1302 * larger size since the directory code currently logs entire blocks. 1257 1303 */ 1258 - 1259 1304 STATIC void 1260 1305 xlog_get_iclog_buffer_size( 1261 1306 struct xfs_mount *mp, 1262 1307 struct xlog *log) 1263 1308 { 1264 - int size; 1265 - int xhdrs; 1266 - 1267 1309 if (mp->m_logbufs <= 0) 1268 - log->l_iclog_bufs = XLOG_MAX_ICLOGS; 1269 - else 1270 - log->l_iclog_bufs = mp->m_logbufs; 1310 + mp->m_logbufs = XLOG_MAX_ICLOGS; 1311 + if (mp->m_logbsize <= 0) 1312 + mp->m_logbsize = XLOG_BIG_RECORD_BSIZE; 1313 + 1314 + log->l_iclog_bufs = mp->m_logbufs; 1315 + log->l_iclog_size = mp->m_logbsize; 1271 1316 1272 1317 /* 1273 - * Buffer size passed in from mount system call. 1318 + * # headers = size / 32k - one header holds cycles from 32k of data. 1274 1319 */ 1275 - if (mp->m_logbsize > 0) { 1276 - size = log->l_iclog_size = mp->m_logbsize; 1277 - log->l_iclog_size_log = 0; 1278 - while (size != 1) { 1279 - log->l_iclog_size_log++; 1280 - size >>= 1; 1281 - } 1282 - 1283 - if (xfs_sb_version_haslogv2(&mp->m_sb)) { 1284 - /* # headers = size / 32k 1285 - * one header holds cycles from 32k of data 1286 - */ 1287 - 1288 - xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE; 1289 - if (mp->m_logbsize % XLOG_HEADER_CYCLE_SIZE) 1290 - xhdrs++; 1291 - log->l_iclog_hsize = xhdrs << BBSHIFT; 1292 - log->l_iclog_heads = xhdrs; 1293 - } else { 1294 - ASSERT(mp->m_logbsize <= XLOG_BIG_RECORD_BSIZE); 1295 - log->l_iclog_hsize = BBSIZE; 1296 - log->l_iclog_heads = 1; 1297 - } 1298 - goto done; 1299 - } 1300 - 1301 - /* All machines use 32kB buffers by default. */ 1302 - log->l_iclog_size = XLOG_BIG_RECORD_BSIZE; 1303 - log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT; 1304 - 1305 - /* the default log size is 16k or 32k which is one header sector */ 1306 - log->l_iclog_hsize = BBSIZE; 1307 - log->l_iclog_heads = 1; 1308 - 1309 - done: 1310 - /* are we being asked to make the sizes selected above visible? */ 1311 - if (mp->m_logbufs == 0) 1312 - mp->m_logbufs = log->l_iclog_bufs; 1313 - if (mp->m_logbsize == 0) 1314 - mp->m_logbsize = log->l_iclog_size; 1315 - } /* xlog_get_iclog_buffer_size */ 1316 - 1320 + log->l_iclog_heads = 1321 + DIV_ROUND_UP(mp->m_logbsize, XLOG_HEADER_CYCLE_SIZE); 1322 + log->l_iclog_hsize = log->l_iclog_heads << BBSHIFT; 1323 + } 1317 1324 1318 1325 void 1319 1326 xfs_log_work_queue( ··· 1337 1422 xlog_rec_header_t *head; 1338 1423 xlog_in_core_t **iclogp; 1339 1424 xlog_in_core_t *iclog, *prev_iclog=NULL; 1340 - xfs_buf_t *bp; 1341 1425 int i; 1342 1426 int error = -ENOMEM; 1343 1427 uint log2_size = 0; ··· 1394 1480 1395 1481 xlog_get_iclog_buffer_size(mp, log); 1396 1482 1397 - /* 1398 - * Use a NULL block for the extra log buffer used during splits so that 1399 - * it will trigger errors if we ever try to do IO on it without first 1400 - * having set it up properly. 1401 - */ 1402 - error = -ENOMEM; 1403 - bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL, 1404 - BTOBB(log->l_iclog_size), XBF_NO_IOACCT); 1405 - if (!bp) 1406 - goto out_free_log; 1407 - 1408 - /* 1409 - * The iclogbuf buffer locks are held over IO but we are not going to do 1410 - * IO yet. Hence unlock the buffer so that the log IO path can grab it 1411 - * when appropriately. 1412 - */ 1413 - ASSERT(xfs_buf_islocked(bp)); 1414 - xfs_buf_unlock(bp); 1415 - 1416 - /* use high priority wq for log I/O completion */ 1417 - bp->b_ioend_wq = mp->m_log_workqueue; 1418 - bp->b_iodone = xlog_iodone; 1419 - log->l_xbuf = bp; 1420 - 1421 1483 spin_lock_init(&log->l_icloglock); 1422 1484 init_waitqueue_head(&log->l_flush_wait); 1423 1485 ··· 1406 1516 * xlog_in_core_t in xfs_log_priv.h for details. 1407 1517 */ 1408 1518 ASSERT(log->l_iclog_size >= 4096); 1409 - for (i=0; i < log->l_iclog_bufs; i++) { 1410 - *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL); 1411 - if (!*iclogp) 1519 + for (i = 0; i < log->l_iclog_bufs; i++) { 1520 + size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) * 1521 + sizeof(struct bio_vec); 1522 + 1523 + iclog = kmem_zalloc(sizeof(*iclog) + bvec_size, KM_MAYFAIL); 1524 + if (!iclog) 1412 1525 goto out_free_iclog; 1413 1526 1414 - iclog = *iclogp; 1527 + *iclogp = iclog; 1415 1528 iclog->ic_prev = prev_iclog; 1416 1529 prev_iclog = iclog; 1417 1530 1418 - bp = xfs_buf_get_uncached(mp->m_logdev_targp, 1419 - BTOBB(log->l_iclog_size), 1420 - XBF_NO_IOACCT); 1421 - if (!bp) 1531 + iclog->ic_data = kmem_alloc_large(log->l_iclog_size, 1532 + KM_MAYFAIL); 1533 + if (!iclog->ic_data) 1422 1534 goto out_free_iclog; 1423 - 1424 - ASSERT(xfs_buf_islocked(bp)); 1425 - xfs_buf_unlock(bp); 1426 - 1427 - /* use high priority wq for log I/O completion */ 1428 - bp->b_ioend_wq = mp->m_log_workqueue; 1429 - bp->b_iodone = xlog_iodone; 1430 - iclog->ic_bp = bp; 1431 - iclog->ic_data = bp->b_addr; 1432 1535 #ifdef DEBUG 1433 1536 log->l_iclog_bak[i] = &iclog->ic_header; 1434 1537 #endif ··· 1435 1552 head->h_fmt = cpu_to_be32(XLOG_FMT); 1436 1553 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); 1437 1554 1438 - iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize; 1555 + iclog->ic_size = log->l_iclog_size - log->l_iclog_hsize; 1439 1556 iclog->ic_state = XLOG_STATE_ACTIVE; 1440 1557 iclog->ic_log = log; 1441 1558 atomic_set(&iclog->ic_refcnt, 0); 1442 1559 spin_lock_init(&iclog->ic_callback_lock); 1443 - iclog->ic_callback_tail = &(iclog->ic_callback); 1560 + INIT_LIST_HEAD(&iclog->ic_callbacks); 1444 1561 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; 1445 1562 1446 1563 init_waitqueue_head(&iclog->ic_force_wait); 1447 1564 init_waitqueue_head(&iclog->ic_write_wait); 1565 + INIT_WORK(&iclog->ic_end_io_work, xlog_ioend_work); 1566 + sema_init(&iclog->ic_sema, 1); 1448 1567 1449 1568 iclogp = &iclog->ic_next; 1450 1569 } 1451 1570 *iclogp = log->l_iclog; /* complete ring */ 1452 1571 log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ 1453 1572 1573 + log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s", 1574 + WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI, 0, 1575 + mp->m_fsname); 1576 + if (!log->l_ioend_workqueue) 1577 + goto out_free_iclog; 1578 + 1454 1579 error = xlog_cil_init(log); 1455 1580 if (error) 1456 - goto out_free_iclog; 1581 + goto out_destroy_workqueue; 1457 1582 return log; 1458 1583 1584 + out_destroy_workqueue: 1585 + destroy_workqueue(log->l_ioend_workqueue); 1459 1586 out_free_iclog: 1460 1587 for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { 1461 1588 prev_iclog = iclog->ic_next; 1462 - if (iclog->ic_bp) 1463 - xfs_buf_free(iclog->ic_bp); 1589 + kmem_free(iclog->ic_data); 1464 1590 kmem_free(iclog); 1465 1591 } 1466 - spinlock_destroy(&log->l_icloglock); 1467 - xfs_buf_free(log->l_xbuf); 1468 1592 out_free_log: 1469 1593 kmem_free(log); 1470 1594 out: ··· 1656 1766 return xfs_end_cksum(crc); 1657 1767 } 1658 1768 1659 - /* 1660 - * The bdstrat callback function for log bufs. This gives us a central 1661 - * place to trap bufs in case we get hit by a log I/O error and need to 1662 - * shutdown. Actually, in practice, even when we didn't get a log error, 1663 - * we transition the iclogs to IOERROR state *after* flushing all existing 1664 - * iclogs to disk. This is because we don't want anymore new transactions to be 1665 - * started or completed afterwards. 1666 - * 1667 - * We lock the iclogbufs here so that we can serialise against IO completion 1668 - * during unmount. We might be processing a shutdown triggered during unmount, 1669 - * and that can occur asynchronously to the unmount thread, and hence we need to 1670 - * ensure that completes before tearing down the iclogbufs. Hence we need to 1671 - * hold the buffer lock across the log IO to acheive that. 1672 - */ 1673 - STATIC int 1674 - xlog_bdstrat( 1675 - struct xfs_buf *bp) 1769 + static void 1770 + xlog_bio_end_io( 1771 + struct bio *bio) 1676 1772 { 1677 - struct xlog_in_core *iclog = bp->b_log_item; 1773 + struct xlog_in_core *iclog = bio->bi_private; 1678 1774 1679 - xfs_buf_lock(bp); 1680 - if (iclog->ic_state & XLOG_STATE_IOERROR) { 1681 - xfs_buf_ioerror(bp, -EIO); 1682 - xfs_buf_stale(bp); 1683 - xfs_buf_ioend(bp); 1775 + queue_work(iclog->ic_log->l_ioend_workqueue, 1776 + &iclog->ic_end_io_work); 1777 + } 1778 + 1779 + static void 1780 + xlog_map_iclog_data( 1781 + struct bio *bio, 1782 + void *data, 1783 + size_t count) 1784 + { 1785 + do { 1786 + struct page *page = kmem_to_page(data); 1787 + unsigned int off = offset_in_page(data); 1788 + size_t len = min_t(size_t, count, PAGE_SIZE - off); 1789 + 1790 + WARN_ON_ONCE(bio_add_page(bio, page, len, off) != len); 1791 + 1792 + data += len; 1793 + count -= len; 1794 + } while (count); 1795 + } 1796 + 1797 + STATIC void 1798 + xlog_write_iclog( 1799 + struct xlog *log, 1800 + struct xlog_in_core *iclog, 1801 + uint64_t bno, 1802 + unsigned int count, 1803 + bool need_flush) 1804 + { 1805 + ASSERT(bno < log->l_logBBsize); 1806 + 1807 + /* 1808 + * We lock the iclogbufs here so that we can serialise against I/O 1809 + * completion during unmount. We might be processing a shutdown 1810 + * triggered during unmount, and that can occur asynchronously to the 1811 + * unmount thread, and hence we need to ensure that completes before 1812 + * tearing down the iclogbufs. Hence we need to hold the buffer lock 1813 + * across the log IO to archieve that. 1814 + */ 1815 + down(&iclog->ic_sema); 1816 + if (unlikely(iclog->ic_state & XLOG_STATE_IOERROR)) { 1684 1817 /* 1685 1818 * It would seem logical to return EIO here, but we rely on 1686 1819 * the log state machine to propagate I/O errors instead of 1687 - * doing it here. Similarly, IO completion will unlock the 1688 - * buffer, so we don't do it here. 1820 + * doing it here. We kick of the state machine and unlock 1821 + * the buffer manually, the code needs to be kept in sync 1822 + * with the I/O completion path. 1689 1823 */ 1690 - return 0; 1824 + xlog_state_done_syncing(iclog, XFS_LI_ABORTED); 1825 + up(&iclog->ic_sema); 1826 + return; 1691 1827 } 1692 1828 1693 - xfs_buf_submit(bp); 1694 - return 0; 1829 + iclog->ic_io_size = count; 1830 + 1831 + bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE)); 1832 + bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev); 1833 + iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno; 1834 + iclog->ic_bio.bi_end_io = xlog_bio_end_io; 1835 + iclog->ic_bio.bi_private = iclog; 1836 + iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_FUA; 1837 + if (need_flush) 1838 + iclog->ic_bio.bi_opf |= REQ_PREFLUSH; 1839 + 1840 + xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, iclog->ic_io_size); 1841 + if (is_vmalloc_addr(iclog->ic_data)) 1842 + flush_kernel_vmap_range(iclog->ic_data, iclog->ic_io_size); 1843 + 1844 + /* 1845 + * If this log buffer would straddle the end of the log we will have 1846 + * to split it up into two bios, so that we can continue at the start. 1847 + */ 1848 + if (bno + BTOBB(count) > log->l_logBBsize) { 1849 + struct bio *split; 1850 + 1851 + split = bio_split(&iclog->ic_bio, log->l_logBBsize - bno, 1852 + GFP_NOIO, &fs_bio_set); 1853 + bio_chain(split, &iclog->ic_bio); 1854 + submit_bio(split); 1855 + 1856 + /* restart at logical offset zero for the remainder */ 1857 + iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart; 1858 + } 1859 + 1860 + submit_bio(&iclog->ic_bio); 1861 + } 1862 + 1863 + /* 1864 + * We need to bump cycle number for the part of the iclog that is 1865 + * written to the start of the log. Watch out for the header magic 1866 + * number case, though. 1867 + */ 1868 + static void 1869 + xlog_split_iclog( 1870 + struct xlog *log, 1871 + void *data, 1872 + uint64_t bno, 1873 + unsigned int count) 1874 + { 1875 + unsigned int split_offset = BBTOB(log->l_logBBsize - bno); 1876 + unsigned int i; 1877 + 1878 + for (i = split_offset; i < count; i += BBSIZE) { 1879 + uint32_t cycle = get_unaligned_be32(data + i); 1880 + 1881 + if (++cycle == XLOG_HEADER_MAGIC_NUM) 1882 + cycle++; 1883 + put_unaligned_be32(cycle, data + i); 1884 + } 1885 + } 1886 + 1887 + static int 1888 + xlog_calc_iclog_size( 1889 + struct xlog *log, 1890 + struct xlog_in_core *iclog, 1891 + uint32_t *roundoff) 1892 + { 1893 + uint32_t count_init, count; 1894 + bool use_lsunit; 1895 + 1896 + use_lsunit = xfs_sb_version_haslogv2(&log->l_mp->m_sb) && 1897 + log->l_mp->m_sb.sb_logsunit > 1; 1898 + 1899 + /* Add for LR header */ 1900 + count_init = log->l_iclog_hsize + iclog->ic_offset; 1901 + 1902 + /* Round out the log write size */ 1903 + if (use_lsunit) { 1904 + /* we have a v2 stripe unit to use */ 1905 + count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init)); 1906 + } else { 1907 + count = BBTOB(BTOBB(count_init)); 1908 + } 1909 + 1910 + ASSERT(count >= count_init); 1911 + *roundoff = count - count_init; 1912 + 1913 + if (use_lsunit) 1914 + ASSERT(*roundoff < log->l_mp->m_sb.sb_logsunit); 1915 + else 1916 + ASSERT(*roundoff < BBTOB(1)); 1917 + return count; 1695 1918 } 1696 1919 1697 1920 /* ··· 1827 1824 * log will require grabbing the lock though. 1828 1825 * 1829 1826 * The entire log manager uses a logical block numbering scheme. Only 1830 - * log_sync (and then only bwrite()) know about the fact that the log may 1831 - * not start with block zero on a given device. The log block start offset 1832 - * is added immediately before calling bwrite(). 1827 + * xlog_write_iclog knows about the fact that the log may not start with 1828 + * block zero on a given device. 1833 1829 */ 1834 - 1835 - STATIC int 1830 + STATIC void 1836 1831 xlog_sync( 1837 1832 struct xlog *log, 1838 1833 struct xlog_in_core *iclog) 1839 1834 { 1840 - xfs_buf_t *bp; 1841 - int i; 1842 - uint count; /* byte count of bwrite */ 1843 - uint count_init; /* initial count before roundup */ 1844 - int roundoff; /* roundoff to BB or stripe */ 1845 - int split = 0; /* split write into two regions */ 1846 - int error; 1847 - int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); 1848 - int size; 1835 + unsigned int count; /* byte count of bwrite */ 1836 + unsigned int roundoff; /* roundoff to BB or stripe */ 1837 + uint64_t bno; 1838 + unsigned int size; 1839 + bool need_flush = true, split = false; 1849 1840 1850 - XFS_STATS_INC(log->l_mp, xs_log_writes); 1851 1841 ASSERT(atomic_read(&iclog->ic_refcnt) == 0); 1852 1842 1853 - /* Add for LR header */ 1854 - count_init = log->l_iclog_hsize + iclog->ic_offset; 1855 - 1856 - /* Round out the log write size */ 1857 - if (v2 && log->l_mp->m_sb.sb_logsunit > 1) { 1858 - /* we have a v2 stripe unit to use */ 1859 - count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init)); 1860 - } else { 1861 - count = BBTOB(BTOBB(count_init)); 1862 - } 1863 - roundoff = count - count_init; 1864 - ASSERT(roundoff >= 0); 1865 - ASSERT((v2 && log->l_mp->m_sb.sb_logsunit > 1 && 1866 - roundoff < log->l_mp->m_sb.sb_logsunit) 1867 - || 1868 - (log->l_mp->m_sb.sb_logsunit <= 1 && 1869 - roundoff < BBTOB(1))); 1843 + count = xlog_calc_iclog_size(log, iclog, &roundoff); 1870 1844 1871 1845 /* move grant heads by roundoff in sync */ 1872 1846 xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff); ··· 1854 1874 1855 1875 /* real byte length */ 1856 1876 size = iclog->ic_offset; 1857 - if (v2) 1877 + if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) 1858 1878 size += roundoff; 1859 1879 iclog->ic_header.h_len = cpu_to_be32(size); 1860 1880 1861 - bp = iclog->ic_bp; 1862 - XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); 1863 - 1881 + XFS_STATS_INC(log->l_mp, xs_log_writes); 1864 1882 XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count)); 1865 1883 1884 + bno = BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)); 1885 + 1866 1886 /* Do we need to split this write into 2 parts? */ 1867 - if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) { 1868 - char *dptr; 1869 - 1870 - split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp))); 1871 - count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)); 1872 - iclog->ic_bwritecnt = 2; 1873 - 1874 - /* 1875 - * Bump the cycle numbers at the start of each block in the 1876 - * part of the iclog that ends up in the buffer that gets 1877 - * written to the start of the log. 1878 - * 1879 - * Watch out for the header magic number case, though. 1880 - */ 1881 - dptr = (char *)&iclog->ic_header + count; 1882 - for (i = 0; i < split; i += BBSIZE) { 1883 - uint32_t cycle = be32_to_cpu(*(__be32 *)dptr); 1884 - if (++cycle == XLOG_HEADER_MAGIC_NUM) 1885 - cycle++; 1886 - *(__be32 *)dptr = cpu_to_be32(cycle); 1887 - 1888 - dptr += BBSIZE; 1889 - } 1890 - } else { 1891 - iclog->ic_bwritecnt = 1; 1887 + if (bno + BTOBB(count) > log->l_logBBsize) { 1888 + xlog_split_iclog(log, &iclog->ic_header, bno, count); 1889 + split = true; 1892 1890 } 1893 1891 1894 1892 /* calculcate the checksum */ ··· 1879 1921 * write on I/O completion and shutdown the fs. The subsequent mount 1880 1922 * detects the bad CRC and attempts to recover. 1881 1923 */ 1924 + #ifdef DEBUG 1882 1925 if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) { 1883 1926 iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA); 1884 - iclog->ic_state |= XLOG_STATE_IOABORT; 1927 + iclog->ic_fail_crc = true; 1885 1928 xfs_warn(log->l_mp, 1886 1929 "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.", 1887 1930 be64_to_cpu(iclog->ic_header.h_lsn)); 1888 1931 } 1889 - 1890 - bp->b_io_length = BTOBB(count); 1891 - bp->b_log_item = iclog; 1892 - bp->b_flags &= ~XBF_FLUSH; 1893 - bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); 1932 + #endif 1894 1933 1895 1934 /* 1896 1935 * Flush the data device before flushing the log to make sure all meta ··· 1897 1942 * synchronously here; for an internal log we can simply use the block 1898 1943 * layer state machine for preflushes. 1899 1944 */ 1900 - if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp) 1945 + if (log->l_targ != log->l_mp->m_ddev_targp || split) { 1901 1946 xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp); 1902 - else 1903 - bp->b_flags |= XBF_FLUSH; 1904 - 1905 - ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1906 - ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); 1907 - 1908 - xlog_verify_iclog(log, iclog, count, true); 1909 - 1910 - /* account for log which doesn't start at block #0 */ 1911 - XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); 1912 - 1913 - /* 1914 - * Don't call xfs_bwrite here. We do log-syncs even when the filesystem 1915 - * is shutting down. 1916 - */ 1917 - error = xlog_bdstrat(bp); 1918 - if (error) { 1919 - xfs_buf_ioerror_alert(bp, "xlog_sync"); 1920 - return error; 1947 + need_flush = false; 1921 1948 } 1922 - if (split) { 1923 - bp = iclog->ic_log->l_xbuf; 1924 - XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ 1925 - xfs_buf_associate_memory(bp, 1926 - (char *)&iclog->ic_header + count, split); 1927 - bp->b_log_item = iclog; 1928 - bp->b_flags &= ~XBF_FLUSH; 1929 - bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); 1930 1949 1931 - ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1932 - ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); 1933 - 1934 - /* account for internal log which doesn't start at block #0 */ 1935 - XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); 1936 - error = xlog_bdstrat(bp); 1937 - if (error) { 1938 - xfs_buf_ioerror_alert(bp, "xlog_sync (split)"); 1939 - return error; 1940 - } 1941 - } 1942 - return 0; 1943 - } /* xlog_sync */ 1950 + xlog_verify_iclog(log, iclog, count); 1951 + xlog_write_iclog(log, iclog, bno, count, need_flush); 1952 + } 1944 1953 1945 1954 /* 1946 1955 * Deallocate a log structure ··· 1924 2005 */ 1925 2006 iclog = log->l_iclog; 1926 2007 for (i = 0; i < log->l_iclog_bufs; i++) { 1927 - xfs_buf_lock(iclog->ic_bp); 1928 - xfs_buf_unlock(iclog->ic_bp); 2008 + down(&iclog->ic_sema); 2009 + up(&iclog->ic_sema); 1929 2010 iclog = iclog->ic_next; 1930 2011 } 1931 2012 1932 - /* 1933 - * Always need to ensure that the extra buffer does not point to memory 1934 - * owned by another log buffer before we free it. Also, cycle the lock 1935 - * first to ensure we've completed IO on it. 1936 - */ 1937 - xfs_buf_lock(log->l_xbuf); 1938 - xfs_buf_unlock(log->l_xbuf); 1939 - xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size)); 1940 - xfs_buf_free(log->l_xbuf); 1941 - 1942 2013 iclog = log->l_iclog; 1943 2014 for (i = 0; i < log->l_iclog_bufs; i++) { 1944 - xfs_buf_free(iclog->ic_bp); 1945 2015 next_iclog = iclog->ic_next; 2016 + kmem_free(iclog->ic_data); 1946 2017 kmem_free(iclog); 1947 2018 iclog = next_iclog; 1948 2019 } 1949 - spinlock_destroy(&log->l_icloglock); 1950 2020 1951 2021 log->l_mp->m_log = NULL; 2022 + destroy_workqueue(log->l_ioend_workqueue); 1952 2023 kmem_free(log); 1953 2024 } /* xlog_dealloc_log */ 1954 2025 ··· 2519 2610 if (iclog->ic_state == XLOG_STATE_DIRTY) { 2520 2611 iclog->ic_state = XLOG_STATE_ACTIVE; 2521 2612 iclog->ic_offset = 0; 2522 - ASSERT(iclog->ic_callback == NULL); 2613 + ASSERT(list_empty_careful(&iclog->ic_callbacks)); 2523 2614 /* 2524 2615 * If the number of ops in this iclog indicate it just 2525 2616 * contains the dummy transaction, we can ··· 2589 2680 2590 2681 STATIC xfs_lsn_t 2591 2682 xlog_get_lowest_lsn( 2592 - struct xlog *log) 2683 + struct xlog *log) 2593 2684 { 2594 - xlog_in_core_t *lsn_log; 2595 - xfs_lsn_t lowest_lsn, lsn; 2685 + struct xlog_in_core *iclog = log->l_iclog; 2686 + xfs_lsn_t lowest_lsn = 0, lsn; 2596 2687 2597 - lsn_log = log->l_iclog; 2598 - lowest_lsn = 0; 2599 2688 do { 2600 - if (!(lsn_log->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY))) { 2601 - lsn = be64_to_cpu(lsn_log->ic_header.h_lsn); 2602 - if ((lsn && !lowest_lsn) || 2603 - (XFS_LSN_CMP(lsn, lowest_lsn) < 0)) { 2689 + if (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY)) 2690 + continue; 2691 + 2692 + lsn = be64_to_cpu(iclog->ic_header.h_lsn); 2693 + if ((lsn && !lowest_lsn) || XFS_LSN_CMP(lsn, lowest_lsn) < 0) 2604 2694 lowest_lsn = lsn; 2605 - } 2606 - } 2607 - lsn_log = lsn_log->ic_next; 2608 - } while (lsn_log != log->l_iclog); 2695 + } while ((iclog = iclog->ic_next) != log->l_iclog); 2696 + 2609 2697 return lowest_lsn; 2610 2698 } 2611 - 2612 2699 2613 2700 STATIC void 2614 2701 xlog_state_do_callback( 2615 2702 struct xlog *log, 2616 - int aborted, 2703 + bool aborted, 2617 2704 struct xlog_in_core *ciclog) 2618 2705 { 2619 2706 xlog_in_core_t *iclog; 2620 2707 xlog_in_core_t *first_iclog; /* used to know when we've 2621 2708 * processed all iclogs once */ 2622 - xfs_log_callback_t *cb, *cb_next; 2623 2709 int flushcnt = 0; 2624 2710 xfs_lsn_t lowest_lsn; 2625 2711 int ioerrors; /* counter: iclogs with errors */ ··· 2725 2821 */ 2726 2822 ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), 2727 2823 be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); 2728 - if (iclog->ic_callback) 2824 + if (!list_empty_careful(&iclog->ic_callbacks)) 2729 2825 atomic64_set(&log->l_last_sync_lsn, 2730 2826 be64_to_cpu(iclog->ic_header.h_lsn)); 2731 2827 ··· 2742 2838 * callbacks being added. 2743 2839 */ 2744 2840 spin_lock(&iclog->ic_callback_lock); 2745 - cb = iclog->ic_callback; 2746 - while (cb) { 2747 - iclog->ic_callback_tail = &(iclog->ic_callback); 2748 - iclog->ic_callback = NULL; 2749 - spin_unlock(&iclog->ic_callback_lock); 2841 + while (!list_empty(&iclog->ic_callbacks)) { 2842 + LIST_HEAD(tmp); 2750 2843 2751 - /* perform callbacks in the order given */ 2752 - for (; cb; cb = cb_next) { 2753 - cb_next = cb->cb_next; 2754 - cb->cb_func(cb->cb_arg, aborted); 2755 - } 2844 + list_splice_init(&iclog->ic_callbacks, &tmp); 2845 + 2846 + spin_unlock(&iclog->ic_callback_lock); 2847 + xlog_cil_process_committed(&tmp, aborted); 2756 2848 spin_lock(&iclog->ic_callback_lock); 2757 - cb = iclog->ic_callback; 2758 2849 } 2759 2850 2760 2851 loopdidcallbacks++; 2761 2852 funcdidcallbacks++; 2762 2853 2763 2854 spin_lock(&log->l_icloglock); 2764 - ASSERT(iclog->ic_callback == NULL); 2765 2855 spin_unlock(&iclog->ic_callback_lock); 2766 2856 if (!(iclog->ic_state & XLOG_STATE_IOERROR)) 2767 2857 iclog->ic_state = XLOG_STATE_DIRTY; ··· 2841 2943 */ 2842 2944 STATIC void 2843 2945 xlog_state_done_syncing( 2844 - xlog_in_core_t *iclog, 2845 - int aborted) 2946 + struct xlog_in_core *iclog, 2947 + bool aborted) 2846 2948 { 2847 - struct xlog *log = iclog->ic_log; 2949 + struct xlog *log = iclog->ic_log; 2848 2950 2849 2951 spin_lock(&log->l_icloglock); 2850 2952 2851 2953 ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || 2852 2954 iclog->ic_state == XLOG_STATE_IOERROR); 2853 2955 ASSERT(atomic_read(&iclog->ic_refcnt) == 0); 2854 - ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2); 2855 - 2856 2956 2857 2957 /* 2858 2958 * If we got an error, either on the first buffer, or in the case of ··· 2858 2962 * and none should ever be attempted to be written to disk 2859 2963 * again. 2860 2964 */ 2861 - if (iclog->ic_state != XLOG_STATE_IOERROR) { 2862 - if (--iclog->ic_bwritecnt == 1) { 2863 - spin_unlock(&log->l_icloglock); 2864 - return; 2865 - } 2965 + if (iclog->ic_state != XLOG_STATE_IOERROR) 2866 2966 iclog->ic_state = XLOG_STATE_DONE_SYNC; 2867 - } 2868 2967 2869 2968 /* 2870 2969 * Someone could be sleeping prior to writing out the next ··· 3128 3237 * flags after this point. 3129 3238 */ 3130 3239 if (sync) 3131 - return xlog_sync(log, iclog); 3240 + xlog_sync(log, iclog); 3132 3241 return 0; 3133 3242 } /* xlog_state_release_iclog */ 3134 3243 ··· 3719 3828 xlog_verify_iclog( 3720 3829 struct xlog *log, 3721 3830 struct xlog_in_core *iclog, 3722 - int count, 3723 - bool syncing) 3831 + int count) 3724 3832 { 3725 3833 xlog_op_header_t *ophead; 3726 3834 xlog_in_core_t *icptr; ··· 3763 3873 /* clientid is only 1 byte */ 3764 3874 p = &ophead->oh_clientid; 3765 3875 field_offset = p - base_ptr; 3766 - if (!syncing || (field_offset & 0x1ff)) { 3876 + if (field_offset & 0x1ff) { 3767 3877 clientid = ophead->oh_clientid; 3768 3878 } else { 3769 3879 idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap); ··· 3786 3896 /* check length */ 3787 3897 p = &ophead->oh_len; 3788 3898 field_offset = p - base_ptr; 3789 - if (!syncing || (field_offset & 0x1ff)) { 3899 + if (field_offset & 0x1ff) { 3790 3900 op_len = be32_to_cpu(ophead->oh_len); 3791 3901 } else { 3792 3902 idx = BTOBBT((uintptr_t)&ophead->oh_len - ··· 3923 4033 * avoid races. 3924 4034 */ 3925 4035 wake_up_all(&log->l_cilp->xc_commit_wait); 3926 - xlog_state_do_callback(log, XFS_LI_ABORTED, NULL); 4036 + xlog_state_do_callback(log, true, NULL); 3927 4037 3928 4038 #ifdef XFSERRORDEBUG 3929 4039 {
+4 -13
fs/xfs/xfs_log.h
··· 6 6 #ifndef __XFS_LOG_H__ 7 7 #define __XFS_LOG_H__ 8 8 9 + struct xfs_cil_ctx; 10 + 9 11 struct xfs_log_vec { 10 12 struct xfs_log_vec *lv_next; /* next lv in build list */ 11 13 int lv_niovecs; /* number of iovecs in lv */ ··· 74 72 } 75 73 76 74 /* 77 - * Structure used to pass callback function and the function's argument 78 - * to the log manager. 79 - */ 80 - typedef struct xfs_log_callback { 81 - struct xfs_log_callback *cb_next; 82 - void (*cb_func)(void *, int); 83 - void *cb_arg; 84 - } xfs_log_callback_t; 85 - 86 - /* 87 75 * By comparing each component, we don't have to worry about extra 88 76 * endian issues in treating two 32 bit numbers as one 64 bit number 89 77 */ ··· 117 125 xfs_daddr_t start_block, 118 126 int num_bblocks); 119 127 int xfs_log_mount_finish(struct xfs_mount *mp); 120 - int xfs_log_mount_cancel(struct xfs_mount *); 128 + void xfs_log_mount_cancel(struct xfs_mount *); 121 129 xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); 122 130 xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp); 123 131 void xfs_log_space_wake(struct xfs_mount *mp); 124 - int xfs_log_notify(struct xlog_in_core *iclog, 125 - struct xfs_log_callback *callback_entry); 126 132 int xfs_log_release_iclog(struct xfs_mount *mp, 127 133 struct xlog_in_core *iclog); 128 134 int xfs_log_reserve(struct xfs_mount *mp, ··· 138 148 139 149 void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 140 150 xfs_lsn_t *commit_lsn, bool regrant); 151 + void xlog_cil_process_committed(struct list_head *list, bool aborted); 141 152 bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); 142 153 143 154 void xfs_log_work_queue(struct xfs_mount *mp);
+35 -16
fs/xfs/xfs_log_cil.c
··· 10 10 #include "xfs_shared.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 - #include "xfs_error.h" 14 - #include "xfs_alloc.h" 15 13 #include "xfs_extent_busy.h" 16 - #include "xfs_discard.h" 17 14 #include "xfs_trans.h" 18 15 #include "xfs_trans_priv.h" 19 16 #include "xfs_log.h" ··· 243 246 * shadow buffer, so update the the pointer to it appropriately. 244 247 */ 245 248 if (!old_lv) { 246 - lv->lv_item->li_ops->iop_pin(lv->lv_item); 249 + if (lv->lv_item->li_ops->iop_pin) 250 + lv->lv_item->li_ops->iop_pin(lv->lv_item); 247 251 lv->lv_item->li_lv_shadow = NULL; 248 252 } else if (old_lv != lv) { 249 253 ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED); ··· 574 576 */ 575 577 static void 576 578 xlog_cil_committed( 577 - void *args, 578 - int abort) 579 + struct xfs_cil_ctx *ctx, 580 + bool abort) 579 581 { 580 - struct xfs_cil_ctx *ctx = args; 581 582 struct xfs_mount *mp = ctx->cil->xc_log->l_mp; 582 583 583 584 /* ··· 609 612 xlog_discard_busy_extents(mp, ctx); 610 613 else 611 614 kmem_free(ctx); 615 + } 616 + 617 + void 618 + xlog_cil_process_committed( 619 + struct list_head *list, 620 + bool aborted) 621 + { 622 + struct xfs_cil_ctx *ctx; 623 + 624 + while ((ctx = list_first_entry_or_null(list, 625 + struct xfs_cil_ctx, iclog_entry))) { 626 + list_del(&ctx->iclog_entry); 627 + xlog_cil_committed(ctx, aborted); 628 + } 612 629 } 613 630 614 631 /* ··· 846 835 if (commit_lsn == -1) 847 836 goto out_abort; 848 837 849 - /* attach all the transactions w/ busy extents to iclog */ 850 - ctx->log_cb.cb_func = xlog_cil_committed; 851 - ctx->log_cb.cb_arg = ctx; 852 - error = xfs_log_notify(commit_iclog, &ctx->log_cb); 853 - if (error) 838 + spin_lock(&commit_iclog->ic_callback_lock); 839 + if (commit_iclog->ic_state & XLOG_STATE_IOERROR) { 840 + spin_unlock(&commit_iclog->ic_callback_lock); 854 841 goto out_abort; 842 + } 843 + ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE || 844 + commit_iclog->ic_state == XLOG_STATE_WANT_SYNC); 845 + list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks); 846 + spin_unlock(&commit_iclog->ic_callback_lock); 855 847 856 848 /* 857 849 * now the checkpoint commit is complete and we've attached the ··· 878 864 out_abort_free_ticket: 879 865 xfs_log_ticket_put(tic); 880 866 out_abort: 881 - xlog_cil_committed(ctx, XFS_LI_ABORTED); 867 + xlog_cil_committed(ctx, true); 882 868 return -EIO; 883 869 } 884 870 ··· 998 984 { 999 985 struct xlog *log = mp->m_log; 1000 986 struct xfs_cil *cil = log->l_cilp; 987 + struct xfs_log_item *lip, *next; 1001 988 xfs_lsn_t xc_commit_lsn; 1002 989 1003 990 /* ··· 1023 1008 1024 1009 /* 1025 1010 * Once all the items of the transaction have been copied to the CIL, 1026 - * the items can be unlocked and freed. 1011 + * the items can be unlocked and possibly freed. 1027 1012 * 1028 1013 * This needs to be done before we drop the CIL context lock because we 1029 1014 * have to update state in the log items and unlock them before they go ··· 1032 1017 * the log items. This affects (at least) processing of stale buffers, 1033 1018 * inodes and EFIs. 1034 1019 */ 1035 - xfs_trans_free_items(tp, xc_commit_lsn, false); 1036 - 1020 + trace_xfs_trans_commit_items(tp, _RET_IP_); 1021 + list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { 1022 + xfs_trans_del_item(lip); 1023 + if (lip->li_ops->iop_committing) 1024 + lip->li_ops->iop_committing(lip, xc_commit_lsn); 1025 + } 1037 1026 xlog_cil_push_background(log); 1038 1027 1039 1028 up_read(&cil->xc_ctx_lock);
+18 -18
fs/xfs/xfs_log_priv.h
··· 10 10 struct xlog; 11 11 struct xlog_ticket; 12 12 struct xfs_mount; 13 - struct xfs_log_callback; 14 13 15 14 /* 16 15 * Flags for log structure ··· 49 50 #define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */ 50 51 #define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/ 51 52 #define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */ 52 - #define XLOG_STATE_IOABORT 0x0100 /* force abort on I/O completion (debug) */ 53 53 #define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */ 54 54 #define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */ 55 55 ··· 177 179 * the iclog. 178 180 * - ic_forcewait is used to implement synchronous forcing of the iclog to disk. 179 181 * - ic_next is the pointer to the next iclog in the ring. 180 - * - ic_bp is a pointer to the buffer used to write this incore log to disk. 181 182 * - ic_log is a pointer back to the global log structure. 182 - * - ic_callback is a linked list of callback function/argument pairs to be 183 - * called after an iclog finishes writing. 184 - * - ic_size is the full size of the header plus data. 183 + * - ic_size is the full size of the log buffer, minus the cycle headers. 184 + * - ic_io_size is the size of the currently pending log buffer write, which 185 + * might be smaller than ic_size 185 186 * - ic_offset is the current number of bytes written to in this iclog. 186 187 * - ic_refcnt is bumped when someone is writing to the log. 187 188 * - ic_state is the state of the iclog. ··· 190 193 * structure cacheline aligned. The following fields can be contended on 191 194 * by independent processes: 192 195 * 193 - * - ic_callback_* 196 + * - ic_callbacks 194 197 * - ic_refcnt 195 198 * - fields protected by the global l_icloglock 196 199 * ··· 203 206 wait_queue_head_t ic_write_wait; 204 207 struct xlog_in_core *ic_next; 205 208 struct xlog_in_core *ic_prev; 206 - struct xfs_buf *ic_bp; 207 209 struct xlog *ic_log; 208 - int ic_size; 209 - int ic_offset; 210 - int ic_bwritecnt; 210 + u32 ic_size; 211 + u32 ic_io_size; 212 + u32 ic_offset; 211 213 unsigned short ic_state; 212 214 char *ic_datap; /* pointer to iclog data */ 213 215 214 216 /* Callback structures need their own cacheline */ 215 217 spinlock_t ic_callback_lock ____cacheline_aligned_in_smp; 216 - struct xfs_log_callback *ic_callback; 217 - struct xfs_log_callback **ic_callback_tail; 218 + struct list_head ic_callbacks; 218 219 219 220 /* reference counts need their own cacheline */ 220 221 atomic_t ic_refcnt ____cacheline_aligned_in_smp; 221 222 xlog_in_core_2_t *ic_data; 222 223 #define ic_header ic_data->hic_header 224 + #ifdef DEBUG 225 + bool ic_fail_crc : 1; 226 + #endif 227 + struct semaphore ic_sema; 228 + struct work_struct ic_end_io_work; 229 + struct bio ic_bio; 230 + struct bio_vec ic_bvec[]; 223 231 } xlog_in_core_t; 224 232 225 233 /* ··· 245 243 int space_used; /* aggregate size of regions */ 246 244 struct list_head busy_extents; /* busy extents in chkpt */ 247 245 struct xfs_log_vec *lv_chain; /* logvecs being pushed */ 248 - struct xfs_log_callback log_cb; /* completion callback hook. */ 246 + struct list_head iclog_entry; 249 247 struct list_head committing; /* ctx committing list */ 250 248 struct work_struct discard_endio_work; 251 249 }; ··· 352 350 struct xfs_mount *l_mp; /* mount point */ 353 351 struct xfs_ail *l_ailp; /* AIL log is working with */ 354 352 struct xfs_cil *l_cilp; /* CIL log is working with */ 355 - struct xfs_buf *l_xbuf; /* extra buffer for log 356 - * wrapping */ 357 353 struct xfs_buftarg *l_targ; /* buftarg of log */ 354 + struct workqueue_struct *l_ioend_workqueue; /* for I/O completions */ 358 355 struct delayed_work l_work; /* background flush work */ 359 356 uint l_flags; 360 357 uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ ··· 362 361 int l_iclog_heads; /* # of iclog header sectors */ 363 362 uint l_sectBBsize; /* sector size in BBs (2^n) */ 364 363 int l_iclog_size; /* size of log in bytes */ 365 - int l_iclog_size_log; /* log power size of log */ 366 364 int l_iclog_bufs; /* number of iclog buffers */ 367 365 xfs_daddr_t l_logBBstart; /* start block of log */ 368 366 int l_logsize; /* size of log in bytes */ ··· 418 418 extern int 419 419 xlog_recover_finish( 420 420 struct xlog *log); 421 - extern int 421 + extern void 422 422 xlog_recover_cancel(struct xlog *); 423 423 424 424 extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
+190 -277
fs/xfs/xfs_log_recover.c
··· 13 13 #include "xfs_sb.h" 14 14 #include "xfs_mount.h" 15 15 #include "xfs_defer.h" 16 - #include "xfs_da_format.h" 17 - #include "xfs_da_btree.h" 18 16 #include "xfs_inode.h" 19 17 #include "xfs_trans.h" 20 18 #include "xfs_log.h" ··· 24 26 #include "xfs_alloc.h" 25 27 #include "xfs_ialloc.h" 26 28 #include "xfs_quota.h" 27 - #include "xfs_cksum.h" 28 29 #include "xfs_trace.h" 29 30 #include "xfs_icache.h" 30 31 #include "xfs_bmap_btree.h" ··· 76 79 * are valid, false otherwise. 77 80 */ 78 81 static inline bool 79 - xlog_verify_bp( 82 + xlog_verify_bno( 80 83 struct xlog *log, 81 84 xfs_daddr_t blk_no, 82 85 int bbcount) ··· 89 92 } 90 93 91 94 /* 92 - * Allocate a buffer to hold log data. The buffer needs to be able 93 - * to map to a range of nbblks basic blocks at any valid (basic 94 - * block) offset within the log. 95 + * Allocate a buffer to hold log data. The buffer needs to be able to map to 96 + * a range of nbblks basic blocks at any valid offset within the log. 95 97 */ 96 - STATIC xfs_buf_t * 97 - xlog_get_bp( 98 + static char * 99 + xlog_alloc_buffer( 98 100 struct xlog *log, 99 101 int nbblks) 100 102 { 101 - struct xfs_buf *bp; 102 - 103 103 /* 104 104 * Pass log block 0 since we don't have an addr yet, buffer will be 105 105 * verified on read. 106 106 */ 107 - if (!xlog_verify_bp(log, 0, nbblks)) { 107 + if (!xlog_verify_bno(log, 0, nbblks)) { 108 108 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 109 109 nbblks); 110 110 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); ··· 109 115 } 110 116 111 117 /* 112 - * We do log I/O in units of log sectors (a power-of-2 113 - * multiple of the basic block size), so we round up the 114 - * requested size to accommodate the basic blocks required 115 - * for complete log sectors. 118 + * We do log I/O in units of log sectors (a power-of-2 multiple of the 119 + * basic block size), so we round up the requested size to accommodate 120 + * the basic blocks required for complete log sectors. 116 121 * 117 - * In addition, the buffer may be used for a non-sector- 118 - * aligned block offset, in which case an I/O of the 119 - * requested size could extend beyond the end of the 120 - * buffer. If the requested size is only 1 basic block it 121 - * will never straddle a sector boundary, so this won't be 122 - * an issue. Nor will this be a problem if the log I/O is 123 - * done in basic blocks (sector size 1). But otherwise we 124 - * extend the buffer by one extra log sector to ensure 125 - * there's space to accommodate this possibility. 122 + * In addition, the buffer may be used for a non-sector-aligned block 123 + * offset, in which case an I/O of the requested size could extend 124 + * beyond the end of the buffer. If the requested size is only 1 basic 125 + * block it will never straddle a sector boundary, so this won't be an 126 + * issue. Nor will this be a problem if the log I/O is done in basic 127 + * blocks (sector size 1). But otherwise we extend the buffer by one 128 + * extra log sector to ensure there's space to accommodate this 129 + * possibility. 126 130 */ 127 131 if (nbblks > 1 && log->l_sectBBsize > 1) 128 132 nbblks += log->l_sectBBsize; 129 133 nbblks = round_up(nbblks, log->l_sectBBsize); 130 - 131 - bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0); 132 - if (bp) 133 - xfs_buf_unlock(bp); 134 - return bp; 135 - } 136 - 137 - STATIC void 138 - xlog_put_bp( 139 - xfs_buf_t *bp) 140 - { 141 - xfs_buf_free(bp); 134 + return kmem_alloc_large(BBTOB(nbblks), KM_MAYFAIL); 142 135 } 143 136 144 137 /* 145 138 * Return the address of the start of the given block number's data 146 139 * in a log buffer. The buffer covers a log sector-aligned region. 147 140 */ 148 - STATIC char * 141 + static inline unsigned int 149 142 xlog_align( 150 143 struct xlog *log, 151 - xfs_daddr_t blk_no, 152 - int nbblks, 153 - struct xfs_buf *bp) 144 + xfs_daddr_t blk_no) 154 145 { 155 - xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); 156 - 157 - ASSERT(offset + nbblks <= bp->b_length); 158 - return bp->b_addr + BBTOB(offset); 146 + return BBTOB(blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1)); 159 147 } 160 148 161 - 162 - /* 163 - * nbblks should be uint, but oh well. Just want to catch that 32-bit length. 164 - */ 165 - STATIC int 166 - xlog_bread_noalign( 167 - struct xlog *log, 168 - xfs_daddr_t blk_no, 169 - int nbblks, 170 - struct xfs_buf *bp) 149 + static int 150 + xlog_do_io( 151 + struct xlog *log, 152 + xfs_daddr_t blk_no, 153 + unsigned int nbblks, 154 + char *data, 155 + unsigned int op) 171 156 { 172 - int error; 157 + int error; 173 158 174 - if (!xlog_verify_bp(log, blk_no, nbblks)) { 159 + if (!xlog_verify_bno(log, blk_no, nbblks)) { 175 160 xfs_warn(log->l_mp, 176 161 "Invalid log block/length (0x%llx, 0x%x) for buffer", 177 162 blk_no, nbblks); ··· 160 187 161 188 blk_no = round_down(blk_no, log->l_sectBBsize); 162 189 nbblks = round_up(nbblks, log->l_sectBBsize); 163 - 164 190 ASSERT(nbblks > 0); 165 - ASSERT(nbblks <= bp->b_length); 166 191 167 - XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); 168 - bp->b_flags |= XBF_READ; 169 - bp->b_io_length = nbblks; 170 - bp->b_error = 0; 171 - 172 - error = xfs_buf_submit(bp); 173 - if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) 174 - xfs_buf_ioerror_alert(bp, __func__); 192 + error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no, 193 + BBTOB(nbblks), data, op); 194 + if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) { 195 + xfs_alert(log->l_mp, 196 + "log recovery %s I/O error at daddr 0x%llx len %d error %d", 197 + op == REQ_OP_WRITE ? "write" : "read", 198 + blk_no, nbblks, error); 199 + } 175 200 return error; 201 + } 202 + 203 + STATIC int 204 + xlog_bread_noalign( 205 + struct xlog *log, 206 + xfs_daddr_t blk_no, 207 + int nbblks, 208 + char *data) 209 + { 210 + return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ); 176 211 } 177 212 178 213 STATIC int ··· 188 207 struct xlog *log, 189 208 xfs_daddr_t blk_no, 190 209 int nbblks, 191 - struct xfs_buf *bp, 210 + char *data, 192 211 char **offset) 193 212 { 194 213 int error; 195 214 196 - error = xlog_bread_noalign(log, blk_no, nbblks, bp); 197 - if (error) 198 - return error; 199 - 200 - *offset = xlog_align(log, blk_no, nbblks, bp); 201 - return 0; 215 + error = xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ); 216 + if (!error) 217 + *offset = data + xlog_align(log, blk_no); 218 + return error; 202 219 } 203 220 204 - /* 205 - * Read at an offset into the buffer. Returns with the buffer in it's original 206 - * state regardless of the result of the read. 207 - */ 208 - STATIC int 209 - xlog_bread_offset( 210 - struct xlog *log, 211 - xfs_daddr_t blk_no, /* block to read from */ 212 - int nbblks, /* blocks to read */ 213 - struct xfs_buf *bp, 214 - char *offset) 215 - { 216 - char *orig_offset = bp->b_addr; 217 - int orig_len = BBTOB(bp->b_length); 218 - int error, error2; 219 - 220 - error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); 221 - if (error) 222 - return error; 223 - 224 - error = xlog_bread_noalign(log, blk_no, nbblks, bp); 225 - 226 - /* must reset buffer pointer even on error */ 227 - error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len); 228 - if (error) 229 - return error; 230 - return error2; 231 - } 232 - 233 - /* 234 - * Write out the buffer at the given block for the given number of blocks. 235 - * The buffer is kept locked across the write and is returned locked. 236 - * This can only be used for synchronous log writes. 237 - */ 238 221 STATIC int 239 222 xlog_bwrite( 240 223 struct xlog *log, 241 224 xfs_daddr_t blk_no, 242 225 int nbblks, 243 - struct xfs_buf *bp) 226 + char *data) 244 227 { 245 - int error; 246 - 247 - if (!xlog_verify_bp(log, blk_no, nbblks)) { 248 - xfs_warn(log->l_mp, 249 - "Invalid log block/length (0x%llx, 0x%x) for buffer", 250 - blk_no, nbblks); 251 - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 252 - return -EFSCORRUPTED; 253 - } 254 - 255 - blk_no = round_down(blk_no, log->l_sectBBsize); 256 - nbblks = round_up(nbblks, log->l_sectBBsize); 257 - 258 - ASSERT(nbblks > 0); 259 - ASSERT(nbblks <= bp->b_length); 260 - 261 - XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); 262 - xfs_buf_hold(bp); 263 - xfs_buf_lock(bp); 264 - bp->b_io_length = nbblks; 265 - bp->b_error = 0; 266 - 267 - error = xfs_bwrite(bp); 268 - if (error) 269 - xfs_buf_ioerror_alert(bp, __func__); 270 - xfs_buf_relse(bp); 271 - return error; 228 + return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_WRITE); 272 229 } 273 230 274 231 #ifdef DEBUG ··· 296 377 * We're not going to bother about retrying 297 378 * this during recovery. One strike! 298 379 */ 299 - if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { 380 + if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) { 300 381 xfs_buf_ioerror_alert(bp, __func__); 301 - xfs_force_shutdown(bp->b_target->bt_mount, 302 - SHUTDOWN_META_IO_ERROR); 382 + xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); 303 383 } 304 384 } 305 385 ··· 323 405 STATIC int 324 406 xlog_find_cycle_start( 325 407 struct xlog *log, 326 - struct xfs_buf *bp, 408 + char *buffer, 327 409 xfs_daddr_t first_blk, 328 410 xfs_daddr_t *last_blk, 329 411 uint cycle) ··· 337 419 end_blk = *last_blk; 338 420 mid_blk = BLK_AVG(first_blk, end_blk); 339 421 while (mid_blk != first_blk && mid_blk != end_blk) { 340 - error = xlog_bread(log, mid_blk, 1, bp, &offset); 422 + error = xlog_bread(log, mid_blk, 1, buffer, &offset); 341 423 if (error) 342 424 return error; 343 425 mid_cycle = xlog_get_cycle(offset); ··· 373 455 { 374 456 xfs_daddr_t i, j; 375 457 uint cycle; 376 - xfs_buf_t *bp; 458 + char *buffer; 377 459 xfs_daddr_t bufblks; 378 460 char *buf = NULL; 379 461 int error = 0; ··· 387 469 bufblks = 1 << ffs(nbblks); 388 470 while (bufblks > log->l_logBBsize) 389 471 bufblks >>= 1; 390 - while (!(bp = xlog_get_bp(log, bufblks))) { 472 + while (!(buffer = xlog_alloc_buffer(log, bufblks))) { 391 473 bufblks >>= 1; 392 474 if (bufblks < log->l_sectBBsize) 393 475 return -ENOMEM; ··· 398 480 399 481 bcount = min(bufblks, (start_blk + nbblks - i)); 400 482 401 - error = xlog_bread(log, i, bcount, bp, &buf); 483 + error = xlog_bread(log, i, bcount, buffer, &buf); 402 484 if (error) 403 485 goto out; 404 486 ··· 416 498 *new_blk = -1; 417 499 418 500 out: 419 - xlog_put_bp(bp); 501 + kmem_free(buffer); 420 502 return error; 421 503 } 422 504 ··· 440 522 int extra_bblks) 441 523 { 442 524 xfs_daddr_t i; 443 - xfs_buf_t *bp; 525 + char *buffer; 444 526 char *offset = NULL; 445 527 xlog_rec_header_t *head = NULL; 446 528 int error = 0; ··· 450 532 451 533 ASSERT(start_blk != 0 || *last_blk != start_blk); 452 534 453 - if (!(bp = xlog_get_bp(log, num_blks))) { 454 - if (!(bp = xlog_get_bp(log, 1))) 535 + buffer = xlog_alloc_buffer(log, num_blks); 536 + if (!buffer) { 537 + buffer = xlog_alloc_buffer(log, 1); 538 + if (!buffer) 455 539 return -ENOMEM; 456 540 smallmem = 1; 457 541 } else { 458 - error = xlog_bread(log, start_blk, num_blks, bp, &offset); 542 + error = xlog_bread(log, start_blk, num_blks, buffer, &offset); 459 543 if (error) 460 544 goto out; 461 545 offset += ((num_blks - 1) << BBSHIFT); ··· 474 554 } 475 555 476 556 if (smallmem) { 477 - error = xlog_bread(log, i, 1, bp, &offset); 557 + error = xlog_bread(log, i, 1, buffer, &offset); 478 558 if (error) 479 559 goto out; 480 560 } ··· 527 607 *last_blk = i; 528 608 529 609 out: 530 - xlog_put_bp(bp); 610 + kmem_free(buffer); 531 611 return error; 532 612 } 533 613 ··· 549 629 struct xlog *log, 550 630 xfs_daddr_t *return_head_blk) 551 631 { 552 - xfs_buf_t *bp; 632 + char *buffer; 553 633 char *offset; 554 634 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; 555 635 int num_scan_bblks; ··· 579 659 } 580 660 581 661 first_blk = 0; /* get cycle # of 1st block */ 582 - bp = xlog_get_bp(log, 1); 583 - if (!bp) 662 + buffer = xlog_alloc_buffer(log, 1); 663 + if (!buffer) 584 664 return -ENOMEM; 585 665 586 - error = xlog_bread(log, 0, 1, bp, &offset); 666 + error = xlog_bread(log, 0, 1, buffer, &offset); 587 667 if (error) 588 - goto bp_err; 668 + goto out_free_buffer; 589 669 590 670 first_half_cycle = xlog_get_cycle(offset); 591 671 592 672 last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ 593 - error = xlog_bread(log, last_blk, 1, bp, &offset); 673 + error = xlog_bread(log, last_blk, 1, buffer, &offset); 594 674 if (error) 595 - goto bp_err; 675 + goto out_free_buffer; 596 676 597 677 last_half_cycle = xlog_get_cycle(offset); 598 678 ASSERT(last_half_cycle != 0); ··· 660 740 * ^ we want to locate this spot 661 741 */ 662 742 stop_on_cycle = last_half_cycle; 663 - if ((error = xlog_find_cycle_start(log, bp, first_blk, 664 - &head_blk, last_half_cycle))) 665 - goto bp_err; 743 + error = xlog_find_cycle_start(log, buffer, first_blk, &head_blk, 744 + last_half_cycle); 745 + if (error) 746 + goto out_free_buffer; 666 747 } 667 748 668 749 /* ··· 683 762 if ((error = xlog_find_verify_cycle(log, 684 763 start_blk, num_scan_bblks, 685 764 stop_on_cycle, &new_blk))) 686 - goto bp_err; 765 + goto out_free_buffer; 687 766 if (new_blk != -1) 688 767 head_blk = new_blk; 689 768 } else { /* need to read 2 parts of log */ ··· 720 799 if ((error = xlog_find_verify_cycle(log, start_blk, 721 800 num_scan_bblks - (int)head_blk, 722 801 (stop_on_cycle - 1), &new_blk))) 723 - goto bp_err; 802 + goto out_free_buffer; 724 803 if (new_blk != -1) { 725 804 head_blk = new_blk; 726 805 goto validate_head; ··· 736 815 if ((error = xlog_find_verify_cycle(log, 737 816 start_blk, (int)head_blk, 738 817 stop_on_cycle, &new_blk))) 739 - goto bp_err; 818 + goto out_free_buffer; 740 819 if (new_blk != -1) 741 820 head_blk = new_blk; 742 821 } ··· 755 834 if (error == 1) 756 835 error = -EIO; 757 836 if (error) 758 - goto bp_err; 837 + goto out_free_buffer; 759 838 } else { 760 839 start_blk = 0; 761 840 ASSERT(head_blk <= INT_MAX); 762 841 error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0); 763 842 if (error < 0) 764 - goto bp_err; 843 + goto out_free_buffer; 765 844 if (error == 1) { 766 845 /* We hit the beginning of the log during our search */ 767 846 start_blk = log_bbnum - (num_scan_bblks - head_blk); ··· 774 853 if (error == 1) 775 854 error = -EIO; 776 855 if (error) 777 - goto bp_err; 856 + goto out_free_buffer; 778 857 if (new_blk != log_bbnum) 779 858 head_blk = new_blk; 780 859 } else if (error) 781 - goto bp_err; 860 + goto out_free_buffer; 782 861 } 783 862 784 - xlog_put_bp(bp); 863 + kmem_free(buffer); 785 864 if (head_blk == log_bbnum) 786 865 *return_head_blk = 0; 787 866 else ··· 794 873 */ 795 874 return 0; 796 875 797 - bp_err: 798 - xlog_put_bp(bp); 799 - 876 + out_free_buffer: 877 + kmem_free(buffer); 800 878 if (error) 801 879 xfs_warn(log->l_mp, "failed to find log head"); 802 880 return error; ··· 815 895 xfs_daddr_t head_blk, 816 896 xfs_daddr_t tail_blk, 817 897 int count, 818 - struct xfs_buf *bp, 898 + char *buffer, 819 899 xfs_daddr_t *rblk, 820 900 struct xlog_rec_header **rhead, 821 901 bool *wrapped) ··· 834 914 */ 835 915 end_blk = head_blk > tail_blk ? tail_blk : 0; 836 916 for (i = (int) head_blk - 1; i >= end_blk; i--) { 837 - error = xlog_bread(log, i, 1, bp, &offset); 917 + error = xlog_bread(log, i, 1, buffer, &offset); 838 918 if (error) 839 919 goto out_error; 840 920 ··· 853 933 */ 854 934 if (tail_blk >= head_blk && found != count) { 855 935 for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) { 856 - error = xlog_bread(log, i, 1, bp, &offset); 936 + error = xlog_bread(log, i, 1, buffer, &offset); 857 937 if (error) 858 938 goto out_error; 859 939 ··· 889 969 xfs_daddr_t head_blk, 890 970 xfs_daddr_t tail_blk, 891 971 int count, 892 - struct xfs_buf *bp, 972 + char *buffer, 893 973 xfs_daddr_t *rblk, 894 974 struct xlog_rec_header **rhead, 895 975 bool *wrapped) ··· 908 988 */ 909 989 end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1; 910 990 for (i = (int) tail_blk; i <= end_blk; i++) { 911 - error = xlog_bread(log, i, 1, bp, &offset); 991 + error = xlog_bread(log, i, 1, buffer, &offset); 912 992 if (error) 913 993 goto out_error; 914 994 ··· 926 1006 */ 927 1007 if (tail_blk > head_blk && found != count) { 928 1008 for (i = 0; i < (int) head_blk; i++) { 929 - error = xlog_bread(log, i, 1, bp, &offset); 1009 + error = xlog_bread(log, i, 1, buffer, &offset); 930 1010 if (error) 931 1011 goto out_error; 932 1012 ··· 989 1069 int hsize) 990 1070 { 991 1071 struct xlog_rec_header *thead; 992 - struct xfs_buf *bp; 1072 + char *buffer; 993 1073 xfs_daddr_t first_bad; 994 1074 int error = 0; 995 1075 bool wrapped; 996 1076 xfs_daddr_t tmp_tail; 997 1077 xfs_daddr_t orig_tail = *tail_blk; 998 1078 999 - bp = xlog_get_bp(log, 1); 1000 - if (!bp) 1079 + buffer = xlog_alloc_buffer(log, 1); 1080 + if (!buffer) 1001 1081 return -ENOMEM; 1002 1082 1003 1083 /* 1004 1084 * Make sure the tail points to a record (returns positive count on 1005 1085 * success). 1006 1086 */ 1007 - error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, bp, 1087 + error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, buffer, 1008 1088 &tmp_tail, &thead, &wrapped); 1009 1089 if (error < 0) 1010 1090 goto out; ··· 1033 1113 break; 1034 1114 1035 1115 /* skip to the next record; returns positive count on success */ 1036 - error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, bp, 1037 - &tmp_tail, &thead, &wrapped); 1116 + error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, 1117 + buffer, &tmp_tail, &thead, &wrapped); 1038 1118 if (error < 0) 1039 1119 goto out; 1040 1120 ··· 1049 1129 "Tail block (0x%llx) overwrite detected. Updated to 0x%llx", 1050 1130 orig_tail, *tail_blk); 1051 1131 out: 1052 - xlog_put_bp(bp); 1132 + kmem_free(buffer); 1053 1133 return error; 1054 1134 } 1055 1135 ··· 1071 1151 struct xlog *log, 1072 1152 xfs_daddr_t *head_blk, /* in/out: unverified head */ 1073 1153 xfs_daddr_t *tail_blk, /* out: tail block */ 1074 - struct xfs_buf *bp, 1154 + char *buffer, 1075 1155 xfs_daddr_t *rhead_blk, /* start blk of last record */ 1076 1156 struct xlog_rec_header **rhead, /* ptr to last record */ 1077 1157 bool *wrapped) /* last rec. wraps phys. log */ 1078 1158 { 1079 1159 struct xlog_rec_header *tmp_rhead; 1080 - struct xfs_buf *tmp_bp; 1160 + char *tmp_buffer; 1081 1161 xfs_daddr_t first_bad; 1082 1162 xfs_daddr_t tmp_rhead_blk; 1083 1163 int found; ··· 1088 1168 * Check the head of the log for torn writes. Search backwards from the 1089 1169 * head until we hit the tail or the maximum number of log record I/Os 1090 1170 * that could have been in flight at one time. Use a temporary buffer so 1091 - * we don't trash the rhead/bp pointers from the caller. 1171 + * we don't trash the rhead/buffer pointers from the caller. 1092 1172 */ 1093 - tmp_bp = xlog_get_bp(log, 1); 1094 - if (!tmp_bp) 1173 + tmp_buffer = xlog_alloc_buffer(log, 1); 1174 + if (!tmp_buffer) 1095 1175 return -ENOMEM; 1096 1176 error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk, 1097 - XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk, 1098 - &tmp_rhead, &tmp_wrapped); 1099 - xlog_put_bp(tmp_bp); 1177 + XLOG_MAX_ICLOGS, tmp_buffer, 1178 + &tmp_rhead_blk, &tmp_rhead, &tmp_wrapped); 1179 + kmem_free(tmp_buffer); 1100 1180 if (error < 0) 1101 1181 return error; 1102 1182 ··· 1125 1205 * (i.e., the records with invalid CRC) if the cycle number 1126 1206 * matches the the current cycle. 1127 1207 */ 1128 - found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, bp, 1129 - rhead_blk, rhead, wrapped); 1208 + found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, 1209 + buffer, rhead_blk, rhead, wrapped); 1130 1210 if (found < 0) 1131 1211 return found; 1132 1212 if (found == 0) /* XXX: right thing to do here? */ ··· 1186 1266 xfs_daddr_t *tail_blk, 1187 1267 struct xlog_rec_header *rhead, 1188 1268 xfs_daddr_t rhead_blk, 1189 - struct xfs_buf *bp, 1269 + char *buffer, 1190 1270 bool *clean) 1191 1271 { 1192 1272 struct xlog_op_header *op_head; ··· 1229 1309 if (*head_blk == after_umount_blk && 1230 1310 be32_to_cpu(rhead->h_num_logops) == 1) { 1231 1311 umount_data_blk = xlog_wrap_logbno(log, rhead_blk + hblks); 1232 - error = xlog_bread(log, umount_data_blk, 1, bp, &offset); 1312 + error = xlog_bread(log, umount_data_blk, 1, buffer, &offset); 1233 1313 if (error) 1234 1314 return error; 1235 1315 ··· 1308 1388 { 1309 1389 xlog_rec_header_t *rhead; 1310 1390 char *offset = NULL; 1311 - xfs_buf_t *bp; 1391 + char *buffer; 1312 1392 int error; 1313 1393 xfs_daddr_t rhead_blk; 1314 1394 xfs_lsn_t tail_lsn; ··· 1322 1402 return error; 1323 1403 ASSERT(*head_blk < INT_MAX); 1324 1404 1325 - bp = xlog_get_bp(log, 1); 1326 - if (!bp) 1405 + buffer = xlog_alloc_buffer(log, 1); 1406 + if (!buffer) 1327 1407 return -ENOMEM; 1328 1408 if (*head_blk == 0) { /* special case */ 1329 - error = xlog_bread(log, 0, 1, bp, &offset); 1409 + error = xlog_bread(log, 0, 1, buffer, &offset); 1330 1410 if (error) 1331 1411 goto done; 1332 1412 ··· 1342 1422 * block. This wraps all the way back around to the head so something is 1343 1423 * seriously wrong if we can't find it. 1344 1424 */ 1345 - error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, 1425 + error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, buffer, 1346 1426 &rhead_blk, &rhead, &wrapped); 1347 1427 if (error < 0) 1348 1428 return error; ··· 1363 1443 * state to determine whether recovery is necessary. 1364 1444 */ 1365 1445 error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead, 1366 - rhead_blk, bp, &clean); 1446 + rhead_blk, buffer, &clean); 1367 1447 if (error) 1368 1448 goto done; 1369 1449 ··· 1380 1460 if (!clean) { 1381 1461 xfs_daddr_t orig_head = *head_blk; 1382 1462 1383 - error = xlog_verify_head(log, head_blk, tail_blk, bp, 1463 + error = xlog_verify_head(log, head_blk, tail_blk, buffer, 1384 1464 &rhead_blk, &rhead, &wrapped); 1385 1465 if (error) 1386 1466 goto done; ··· 1391 1471 wrapped); 1392 1472 tail_lsn = atomic64_read(&log->l_tail_lsn); 1393 1473 error = xlog_check_unmount_rec(log, head_blk, tail_blk, 1394 - rhead, rhead_blk, bp, 1474 + rhead, rhead_blk, buffer, 1395 1475 &clean); 1396 1476 if (error) 1397 1477 goto done; ··· 1425 1505 * But... if the -device- itself is readonly, just skip this. 1426 1506 * We can't recover this device anyway, so it won't matter. 1427 1507 */ 1428 - if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) 1508 + if (!xfs_readonly_buftarg(log->l_targ)) 1429 1509 error = xlog_clear_stale_blocks(log, tail_lsn); 1430 1510 1431 1511 done: 1432 - xlog_put_bp(bp); 1512 + kmem_free(buffer); 1433 1513 1434 1514 if (error) 1435 1515 xfs_warn(log->l_mp, "failed to locate log tail"); ··· 1457 1537 struct xlog *log, 1458 1538 xfs_daddr_t *blk_no) 1459 1539 { 1460 - xfs_buf_t *bp; 1540 + char *buffer; 1461 1541 char *offset; 1462 1542 uint first_cycle, last_cycle; 1463 1543 xfs_daddr_t new_blk, last_blk, start_blk; ··· 1467 1547 *blk_no = 0; 1468 1548 1469 1549 /* check totally zeroed log */ 1470 - bp = xlog_get_bp(log, 1); 1471 - if (!bp) 1550 + buffer = xlog_alloc_buffer(log, 1); 1551 + if (!buffer) 1472 1552 return -ENOMEM; 1473 - error = xlog_bread(log, 0, 1, bp, &offset); 1553 + error = xlog_bread(log, 0, 1, buffer, &offset); 1474 1554 if (error) 1475 - goto bp_err; 1555 + goto out_free_buffer; 1476 1556 1477 1557 first_cycle = xlog_get_cycle(offset); 1478 1558 if (first_cycle == 0) { /* completely zeroed log */ 1479 1559 *blk_no = 0; 1480 - xlog_put_bp(bp); 1560 + kmem_free(buffer); 1481 1561 return 1; 1482 1562 } 1483 1563 1484 1564 /* check partially zeroed log */ 1485 - error = xlog_bread(log, log_bbnum-1, 1, bp, &offset); 1565 + error = xlog_bread(log, log_bbnum-1, 1, buffer, &offset); 1486 1566 if (error) 1487 - goto bp_err; 1567 + goto out_free_buffer; 1488 1568 1489 1569 last_cycle = xlog_get_cycle(offset); 1490 1570 if (last_cycle != 0) { /* log completely written to */ 1491 - xlog_put_bp(bp); 1571 + kmem_free(buffer); 1492 1572 return 0; 1493 1573 } 1494 1574 1495 1575 /* we have a partially zeroed log */ 1496 1576 last_blk = log_bbnum-1; 1497 - if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) 1498 - goto bp_err; 1577 + error = xlog_find_cycle_start(log, buffer, 0, &last_blk, 0); 1578 + if (error) 1579 + goto out_free_buffer; 1499 1580 1500 1581 /* 1501 1582 * Validate the answer. Because there is no way to guarantee that ··· 1519 1598 */ 1520 1599 if ((error = xlog_find_verify_cycle(log, start_blk, 1521 1600 (int)num_scan_bblks, 0, &new_blk))) 1522 - goto bp_err; 1601 + goto out_free_buffer; 1523 1602 if (new_blk != -1) 1524 1603 last_blk = new_blk; 1525 1604 ··· 1531 1610 if (error == 1) 1532 1611 error = -EIO; 1533 1612 if (error) 1534 - goto bp_err; 1613 + goto out_free_buffer; 1535 1614 1536 1615 *blk_no = last_blk; 1537 - bp_err: 1538 - xlog_put_bp(bp); 1616 + out_free_buffer: 1617 + kmem_free(buffer); 1539 1618 if (error) 1540 1619 return error; 1541 1620 return 1; ··· 1578 1657 int tail_block) 1579 1658 { 1580 1659 char *offset; 1581 - xfs_buf_t *bp; 1660 + char *buffer; 1582 1661 int balign, ealign; 1583 1662 int sectbb = log->l_sectBBsize; 1584 1663 int end_block = start_block + blocks; ··· 1595 1674 bufblks = 1 << ffs(blocks); 1596 1675 while (bufblks > log->l_logBBsize) 1597 1676 bufblks >>= 1; 1598 - while (!(bp = xlog_get_bp(log, bufblks))) { 1677 + while (!(buffer = xlog_alloc_buffer(log, bufblks))) { 1599 1678 bufblks >>= 1; 1600 1679 if (bufblks < sectbb) 1601 1680 return -ENOMEM; ··· 1607 1686 */ 1608 1687 balign = round_down(start_block, sectbb); 1609 1688 if (balign != start_block) { 1610 - error = xlog_bread_noalign(log, start_block, 1, bp); 1689 + error = xlog_bread_noalign(log, start_block, 1, buffer); 1611 1690 if (error) 1612 - goto out_put_bp; 1691 + goto out_free_buffer; 1613 1692 1614 1693 j = start_block - balign; 1615 1694 } ··· 1626 1705 */ 1627 1706 ealign = round_down(end_block, sectbb); 1628 1707 if (j == 0 && (start_block + endcount > ealign)) { 1629 - offset = bp->b_addr + BBTOB(ealign - start_block); 1630 - error = xlog_bread_offset(log, ealign, sectbb, 1631 - bp, offset); 1708 + error = xlog_bread_noalign(log, ealign, sectbb, 1709 + buffer + BBTOB(ealign - start_block)); 1632 1710 if (error) 1633 1711 break; 1634 1712 1635 1713 } 1636 1714 1637 - offset = xlog_align(log, start_block, endcount, bp); 1715 + offset = buffer + xlog_align(log, start_block); 1638 1716 for (; j < endcount; j++) { 1639 1717 xlog_add_record(log, offset, cycle, i+j, 1640 1718 tail_cycle, tail_block); 1641 1719 offset += BBSIZE; 1642 1720 } 1643 - error = xlog_bwrite(log, start_block, endcount, bp); 1721 + error = xlog_bwrite(log, start_block, endcount, buffer); 1644 1722 if (error) 1645 1723 break; 1646 1724 start_block += endcount; 1647 1725 j = 0; 1648 1726 } 1649 1727 1650 - out_put_bp: 1651 - xlog_put_bp(bp); 1728 + out_free_buffer: 1729 + kmem_free(buffer); 1652 1730 return error; 1653 1731 } 1654 1732 ··· 2082 2162 if (xfs_sb_version_hascrc(&mp->m_sb)) 2083 2163 bp->b_ops = &xfs_inode_buf_ops; 2084 2164 2085 - inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; 2165 + inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog; 2086 2166 for (i = 0; i < inodes_per_buf; i++) { 2087 2167 next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + 2088 2168 offsetof(xfs_dinode_t, di_next_unlinked); ··· 2124 2204 2125 2205 ASSERT(item->ri_buf[item_index].i_addr != NULL); 2126 2206 ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); 2127 - ASSERT((reg_buf_offset + reg_buf_bytes) <= 2128 - BBTOB(bp->b_io_length)); 2207 + ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length)); 2129 2208 2130 2209 /* 2131 2210 * The current logged region contains a copy of the ··· 2589 2670 ASSERT(nbits > 0); 2590 2671 ASSERT(item->ri_buf[i].i_addr != NULL); 2591 2672 ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); 2592 - ASSERT(BBTOB(bp->b_io_length) >= 2673 + ASSERT(BBTOB(bp->b_length) >= 2593 2674 ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); 2594 2675 2595 2676 /* ··· 2801 2882 * 2802 2883 * Also make sure that only inode buffers with good sizes stay in 2803 2884 * the buffer cache. The kernel moves inodes in buffers of 1 block 2804 - * or mp->m_inode_cluster_size bytes, whichever is bigger. The inode 2885 + * or inode_cluster_size bytes, whichever is bigger. The inode 2805 2886 * buffers in the log can be a different size if the log was generated 2806 2887 * by an older kernel using unclustered inode buffers or a newer kernel 2807 2888 * running with a different inode cluster size. Regardless, if the 2808 - * the inode buffer size isn't max(blocksize, mp->m_inode_cluster_size) 2809 - * for *our* value of mp->m_inode_cluster_size, then we need to keep 2889 + * the inode buffer size isn't max(blocksize, inode_cluster_size) 2890 + * for *our* value of inode_cluster_size, then we need to keep 2810 2891 * the buffer out of the buffer cache so that the buffer won't 2811 2892 * overlap with future reads of those inodes. 2812 2893 */ 2813 2894 if (XFS_DINODE_MAGIC == 2814 2895 be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && 2815 - (BBTOB(bp->b_io_length) != max(log->l_mp->m_sb.sb_blocksize, 2816 - (uint32_t)log->l_mp->m_inode_cluster_size))) { 2896 + (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) { 2817 2897 xfs_buf_stale(bp); 2818 2898 error = xfs_bwrite(bp); 2819 2899 } else { 2820 - ASSERT(bp->b_target->bt_mount == mp); 2900 + ASSERT(bp->b_mount == mp); 2821 2901 bp->b_iodone = xlog_recover_iodone; 2822 2902 xfs_buf_delwri_queue(bp, buffer_list); 2823 2903 } ··· 3178 3260 /* re-generate the checksum. */ 3179 3261 xfs_dinode_calc_crc(log->l_mp, dip); 3180 3262 3181 - ASSERT(bp->b_target->bt_mount == mp); 3263 + ASSERT(bp->b_mount == mp); 3182 3264 bp->b_iodone = xlog_recover_iodone; 3183 3265 xfs_buf_delwri_queue(bp, buffer_list); 3184 3266 ··· 3317 3399 } 3318 3400 3319 3401 ASSERT(dq_f->qlf_size == 2); 3320 - ASSERT(bp->b_target->bt_mount == mp); 3402 + ASSERT(bp->b_mount == mp); 3321 3403 bp->b_iodone = xlog_recover_iodone; 3322 3404 xfs_buf_delwri_queue(bp, buffer_list); 3323 3405 ··· 3381 3463 { 3382 3464 xfs_efd_log_format_t *efd_formatp; 3383 3465 xfs_efi_log_item_t *efip = NULL; 3384 - xfs_log_item_t *lip; 3466 + struct xfs_log_item *lip; 3385 3467 uint64_t efi_id; 3386 3468 struct xfs_ail_cursor cur; 3387 3469 struct xfs_ail *ailp = log->l_ailp; ··· 3767 3849 { 3768 3850 struct xfs_mount *mp = log->l_mp; 3769 3851 struct xfs_icreate_log *icl; 3852 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 3770 3853 xfs_agnumber_t agno; 3771 3854 xfs_agblock_t agbno; 3772 3855 unsigned int count; ··· 3817 3898 3818 3899 /* 3819 3900 * The inode chunk is either full or sparse and we only support 3820 - * m_ialloc_min_blks sized sparse allocations at this time. 3901 + * m_ino_geo.ialloc_min_blks sized sparse allocations at this time. 3821 3902 */ 3822 - if (length != mp->m_ialloc_blks && 3823 - length != mp->m_ialloc_min_blks) { 3903 + if (length != igeo->ialloc_blks && 3904 + length != igeo->ialloc_min_blks) { 3824 3905 xfs_warn(log->l_mp, 3825 3906 "%s: unsupported chunk length", __FUNCTION__); 3826 3907 return -EINVAL; ··· 3840 3921 * buffers for cancellation so we don't overwrite anything written after 3841 3922 * a cancellation. 3842 3923 */ 3843 - bb_per_cluster = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster); 3844 - nbufs = length / mp->m_blocks_per_cluster; 3924 + bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster); 3925 + nbufs = length / igeo->blocks_per_cluster; 3845 3926 for (i = 0, cancel_count = 0; i < nbufs; i++) { 3846 3927 xfs_daddr_t daddr; 3847 3928 3848 3929 daddr = XFS_AGB_TO_DADDR(mp, agno, 3849 - agbno + i * mp->m_blocks_per_cluster); 3930 + agbno + i * igeo->blocks_per_cluster); 3850 3931 if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0)) 3851 3932 cancel_count++; 3852 3933 } ··· 4875 4956 * A cancel occurs when the mount has failed and we're bailing out. 4876 4957 * Release all pending log intent items so they don't pin the AIL. 4877 4958 */ 4878 - STATIC int 4959 + STATIC void 4879 4960 xlog_recover_cancel_intents( 4880 4961 struct xlog *log) 4881 4962 { 4882 4963 struct xfs_log_item *lip; 4883 - int error = 0; 4884 4964 struct xfs_ail_cursor cur; 4885 4965 struct xfs_ail *ailp; 4886 4966 ··· 4919 5001 4920 5002 xfs_trans_ail_cursor_done(&cur); 4921 5003 spin_unlock(&ailp->ail_lock); 4922 - return error; 4923 5004 } 4924 5005 4925 5006 /* ··· 5224 5307 xfs_daddr_t blk_no, rblk_no; 5225 5308 xfs_daddr_t rhead_blk; 5226 5309 char *offset; 5227 - xfs_buf_t *hbp, *dbp; 5310 + char *hbp, *dbp; 5228 5311 int error = 0, h_size, h_len; 5229 5312 int error2 = 0; 5230 5313 int bblks, split_bblks; ··· 5249 5332 * iclog header and extract the header size from it. Get a 5250 5333 * new hbp that is the correct size. 5251 5334 */ 5252 - hbp = xlog_get_bp(log, 1); 5335 + hbp = xlog_alloc_buffer(log, 1); 5253 5336 if (!hbp) 5254 5337 return -ENOMEM; 5255 5338 ··· 5291 5374 hblks = h_size / XLOG_HEADER_CYCLE_SIZE; 5292 5375 if (h_size % XLOG_HEADER_CYCLE_SIZE) 5293 5376 hblks++; 5294 - xlog_put_bp(hbp); 5295 - hbp = xlog_get_bp(log, hblks); 5377 + kmem_free(hbp); 5378 + hbp = xlog_alloc_buffer(log, hblks); 5296 5379 } else { 5297 5380 hblks = 1; 5298 5381 } 5299 5382 } else { 5300 5383 ASSERT(log->l_sectBBsize == 1); 5301 5384 hblks = 1; 5302 - hbp = xlog_get_bp(log, 1); 5385 + hbp = xlog_alloc_buffer(log, 1); 5303 5386 h_size = XLOG_BIG_RECORD_BSIZE; 5304 5387 } 5305 5388 5306 5389 if (!hbp) 5307 5390 return -ENOMEM; 5308 - dbp = xlog_get_bp(log, BTOBB(h_size)); 5391 + dbp = xlog_alloc_buffer(log, BTOBB(h_size)); 5309 5392 if (!dbp) { 5310 - xlog_put_bp(hbp); 5393 + kmem_free(hbp); 5311 5394 return -ENOMEM; 5312 5395 } 5313 5396 ··· 5322 5405 /* 5323 5406 * Check for header wrapping around physical end-of-log 5324 5407 */ 5325 - offset = hbp->b_addr; 5408 + offset = hbp; 5326 5409 split_hblks = 0; 5327 5410 wrapped_hblks = 0; 5328 5411 if (blk_no + hblks <= log->l_logBBsize) { ··· 5358 5441 * - order is important. 5359 5442 */ 5360 5443 wrapped_hblks = hblks - split_hblks; 5361 - error = xlog_bread_offset(log, 0, 5362 - wrapped_hblks, hbp, 5444 + error = xlog_bread_noalign(log, 0, 5445 + wrapped_hblks, 5363 5446 offset + BBTOB(split_hblks)); 5364 5447 if (error) 5365 5448 goto bread_err2; ··· 5390 5473 } else { 5391 5474 /* This log record is split across the 5392 5475 * physical end of log */ 5393 - offset = dbp->b_addr; 5476 + offset = dbp; 5394 5477 split_bblks = 0; 5395 5478 if (blk_no != log->l_logBBsize) { 5396 5479 /* some data is before the physical ··· 5419 5502 * _first_, then the log start (LR header end) 5420 5503 * - order is important. 5421 5504 */ 5422 - error = xlog_bread_offset(log, 0, 5423 - bblks - split_bblks, dbp, 5505 + error = xlog_bread_noalign(log, 0, 5506 + bblks - split_bblks, 5424 5507 offset + BBTOB(split_bblks)); 5425 5508 if (error) 5426 5509 goto bread_err2; ··· 5468 5551 } 5469 5552 5470 5553 bread_err2: 5471 - xlog_put_bp(dbp); 5554 + kmem_free(dbp); 5472 5555 bread_err1: 5473 - xlog_put_bp(hbp); 5556 + kmem_free(hbp); 5474 5557 5475 5558 /* 5476 5559 * Submit buffers that have been added from the last record processed, ··· 5604 5687 * Now that we've finished replaying all buffer and inode 5605 5688 * updates, re-read in the superblock and reverify it. 5606 5689 */ 5607 - bp = xfs_getsb(mp, 0); 5690 + bp = xfs_getsb(mp); 5608 5691 bp->b_flags &= ~(XBF_DONE | XBF_ASYNC); 5609 5692 ASSERT(!(bp->b_flags & XBF_WRITE)); 5610 5693 bp->b_flags |= XBF_READ; ··· 5777 5860 return 0; 5778 5861 } 5779 5862 5780 - int 5863 + void 5781 5864 xlog_recover_cancel( 5782 5865 struct xlog *log) 5783 5866 { 5784 - int error = 0; 5785 - 5786 5867 if (log->l_flags & XLOG_RECOVERY_NEEDED) 5787 - error = xlog_recover_cancel_intents(log); 5788 - 5789 - return error; 5868 + xlog_recover_cancel_intents(log); 5790 5869 } 5791 5870 5792 5871 #if defined(DEBUG)
+1 -1
fs/xfs/xfs_message.c
··· 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 8 #include "xfs_error.h" 9 + #include "xfs_shared.h" 9 10 #include "xfs_format.h" 10 - #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 13
+6 -96
fs/xfs/xfs_mount.c
··· 12 12 #include "xfs_bit.h" 13 13 #include "xfs_sb.h" 14 14 #include "xfs_mount.h" 15 - #include "xfs_defer.h" 16 - #include "xfs_da_format.h" 17 - #include "xfs_da_btree.h" 18 15 #include "xfs_inode.h" 19 16 #include "xfs_dir2.h" 20 17 #include "xfs_ialloc.h" ··· 24 27 #include "xfs_error.h" 25 28 #include "xfs_quota.h" 26 29 #include "xfs_fsops.h" 27 - #include "xfs_trace.h" 28 30 #include "xfs_icache.h" 29 31 #include "xfs_sysfs.h" 30 32 #include "xfs_rmap_btree.h" ··· 426 430 } 427 431 428 432 /* 429 - * Set the maximum inode count for this filesystem 430 - */ 431 - STATIC void 432 - xfs_set_maxicount(xfs_mount_t *mp) 433 - { 434 - xfs_sb_t *sbp = &(mp->m_sb); 435 - uint64_t icount; 436 - 437 - if (sbp->sb_imax_pct) { 438 - /* 439 - * Make sure the maximum inode count is a multiple 440 - * of the units we allocate inodes in. 441 - */ 442 - icount = sbp->sb_dblocks * sbp->sb_imax_pct; 443 - do_div(icount, 100); 444 - do_div(icount, mp->m_ialloc_blks); 445 - mp->m_maxicount = (icount * mp->m_ialloc_blks) << 446 - sbp->sb_inopblog; 447 - } else { 448 - mp->m_maxicount = 0; 449 - } 450 - } 451 - 452 - /* 453 433 * Set the default minimum read and write sizes unless 454 434 * already specified in a mount option. 455 435 * We use smaller I/O sizes when the file system ··· 479 507 do_div(space, 100); 480 508 mp->m_low_space[i] = space * (i + 1); 481 509 } 482 - } 483 - 484 - 485 - /* 486 - * Set whether we're using inode alignment. 487 - */ 488 - STATIC void 489 - xfs_set_inoalignment(xfs_mount_t *mp) 490 - { 491 - if (xfs_sb_version_hasalign(&mp->m_sb) && 492 - mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) 493 - mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; 494 - else 495 - mp->m_inoalign_mask = 0; 496 - /* 497 - * If we are using stripe alignment, check whether 498 - * the stripe unit is a multiple of the inode alignment 499 - */ 500 - if (mp->m_dalign && mp->m_inoalign_mask && 501 - !(mp->m_dalign & mp->m_inoalign_mask)) 502 - mp->m_sinoalign = mp->m_dalign; 503 - else 504 - mp->m_sinoalign = 0; 505 510 } 506 511 507 512 /* ··· 632 683 { 633 684 struct xfs_sb *sbp = &(mp->m_sb); 634 685 struct xfs_inode *rip; 686 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 635 687 uint64_t resblks; 636 688 uint quotamount = 0; 637 689 uint quotaflags = 0; ··· 699 749 xfs_alloc_compute_maxlevels(mp); 700 750 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); 701 751 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); 702 - xfs_ialloc_compute_maxlevels(mp); 752 + xfs_ialloc_setup_geometry(mp); 703 753 xfs_rmapbt_compute_maxlevels(mp); 704 754 xfs_refcountbt_compute_maxlevels(mp); 705 - 706 - xfs_set_maxicount(mp); 707 755 708 756 /* enable fail_at_unmount as default */ 709 757 mp->m_fail_unmount = true; ··· 736 788 xfs_set_low_space_thresholds(mp); 737 789 738 790 /* 739 - * Set the inode cluster size. 740 - * This may still be overridden by the file system 741 - * block size if it is larger than the chosen cluster size. 742 - * 743 - * For v5 filesystems, scale the cluster size with the inode size to 744 - * keep a constant ratio of inode per cluster buffer, but only if mkfs 745 - * has set the inode alignment value appropriately for larger cluster 746 - * sizes. 747 - */ 748 - mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; 749 - if (xfs_sb_version_hascrc(&mp->m_sb)) { 750 - int new_size = mp->m_inode_cluster_size; 751 - 752 - new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; 753 - if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) 754 - mp->m_inode_cluster_size = new_size; 755 - } 756 - mp->m_blocks_per_cluster = xfs_icluster_size_fsb(mp); 757 - mp->m_inodes_per_cluster = XFS_FSB_TO_INO(mp, mp->m_blocks_per_cluster); 758 - mp->m_cluster_align = xfs_ialloc_cluster_alignment(mp); 759 - mp->m_cluster_align_inodes = XFS_FSB_TO_INO(mp, mp->m_cluster_align); 760 - 761 - /* 762 791 * If enabled, sparse inode chunk alignment is expected to match the 763 792 * cluster size. Full inode chunk alignment must match the chunk size, 764 793 * but that is checked on sb read verification... 765 794 */ 766 795 if (xfs_sb_version_hassparseinodes(&mp->m_sb) && 767 796 mp->m_sb.sb_spino_align != 768 - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) { 797 + XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) { 769 798 xfs_warn(mp, 770 799 "Sparse inode block alignment (%u) must match cluster size (%llu).", 771 800 mp->m_sb.sb_spino_align, 772 - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)); 801 + XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)); 773 802 error = -EINVAL; 774 803 goto out_remove_uuid; 775 804 } 776 - 777 - /* 778 - * Set inode alignment fields 779 - */ 780 - xfs_set_inoalignment(mp); 781 805 782 806 /* 783 807 * Check that the data (and log if separate) is an ok size. ··· 1305 1385 * xfs_getsb() is called to obtain the buffer for the superblock. 1306 1386 * The buffer is returned locked and read in from disk. 1307 1387 * The buffer should be released with a call to xfs_brelse(). 1308 - * 1309 - * If the flags parameter is BUF_TRYLOCK, then we'll only return 1310 - * the superblock buffer if it can be locked without sleeping. 1311 - * If it can't then we'll return NULL. 1312 1388 */ 1313 1389 struct xfs_buf * 1314 1390 xfs_getsb( 1315 - struct xfs_mount *mp, 1316 - int flags) 1391 + struct xfs_mount *mp) 1317 1392 { 1318 1393 struct xfs_buf *bp = mp->m_sb_bp; 1319 1394 1320 - if (!xfs_buf_trylock(bp)) { 1321 - if (flags & XBF_TRYLOCK) 1322 - return NULL; 1323 - xfs_buf_lock(bp); 1324 - } 1325 - 1395 + xfs_buf_lock(bp); 1326 1396 xfs_buf_hold(bp); 1327 1397 ASSERT(bp->b_flags & XBF_DONE); 1328 1398 return bp;
+4 -18
fs/xfs/xfs_mount.h
··· 105 105 struct xfs_da_geometry *m_dir_geo; /* directory block geometry */ 106 106 struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */ 107 107 struct xlog *m_log; /* log specific stuff */ 108 + struct xfs_ino_geometry m_ino_geo; /* inode geometry */ 108 109 int m_logbufs; /* number of log buffers */ 109 110 int m_logbsize; /* size of each log buffer */ 110 111 uint m_rsumlevels; /* rt summary levels */ ··· 127 126 uint8_t m_blkbit_log; /* blocklog + NBBY */ 128 127 uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ 129 128 uint8_t m_agno_log; /* log #ag's */ 130 - uint8_t m_agino_log; /* #bits for agino in inum */ 131 - uint m_inode_cluster_size;/* min inode buf size */ 132 - unsigned int m_inodes_per_cluster; 133 - unsigned int m_blocks_per_cluster; 134 - unsigned int m_cluster_align; 135 - unsigned int m_cluster_align_inodes; 136 129 uint m_blockmask; /* sb_blocksize-1 */ 137 130 uint m_blockwsize; /* sb_blocksize in words */ 138 131 uint m_blockwmask; /* blockwsize-1 */ ··· 134 139 uint m_alloc_mnr[2]; /* min alloc btree records */ 135 140 uint m_bmap_dmxr[2]; /* max bmap btree records */ 136 141 uint m_bmap_dmnr[2]; /* min bmap btree records */ 137 - uint m_inobt_mxr[2]; /* max inobt btree records */ 138 - uint m_inobt_mnr[2]; /* min inobt btree records */ 139 142 uint m_rmap_mxr[2]; /* max rmap btree records */ 140 143 uint m_rmap_mnr[2]; /* min rmap btree records */ 141 144 uint m_refc_mxr[2]; /* max refc btree records */ 142 145 uint m_refc_mnr[2]; /* min refc btree records */ 143 146 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ 144 147 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ 145 - uint m_in_maxlevels; /* max inobt btree levels. */ 146 148 uint m_rmap_maxlevels; /* max rmap btree levels */ 147 149 uint m_refc_maxlevels; /* max refcount btree level */ 148 150 xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */ ··· 151 159 int m_fixedfsid[2]; /* unchanged for life of FS */ 152 160 uint64_t m_flags; /* global mount flags */ 153 161 bool m_finobt_nores; /* no per-AG finobt resv. */ 154 - int m_ialloc_inos; /* inodes in inode allocation */ 155 - int m_ialloc_blks; /* blocks in inode allocation */ 156 - int m_ialloc_min_blks;/* min blocks in sparse inode 157 - * allocation */ 158 - int m_inoalign_mask;/* mask sb_inoalignmt if used */ 159 162 uint m_qflags; /* quota status flags */ 160 163 struct xfs_trans_resv m_resv; /* precomputed res values */ 161 - uint64_t m_maxicount; /* maximum inode count */ 162 164 uint64_t m_resblks; /* total reserved blocks */ 163 165 uint64_t m_resblks_avail;/* available reserved blocks */ 164 166 uint64_t m_resblks_save; /* reserved blks @ remount,ro */ 165 167 int m_dalign; /* stripe unit */ 166 168 int m_swidth; /* stripe width */ 167 - int m_sinoalign; /* stripe unit inode alignment */ 168 169 uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ 169 170 const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ 170 171 const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */ ··· 183 198 struct workqueue_struct *m_unwritten_workqueue; 184 199 struct workqueue_struct *m_cil_workqueue; 185 200 struct workqueue_struct *m_reclaim_workqueue; 186 - struct workqueue_struct *m_log_workqueue; 187 201 struct workqueue_struct *m_eofblocks_workqueue; 188 202 struct workqueue_struct *m_sync_workqueue; 189 203 ··· 209 225 struct xfs_kobj m_errortag_kobj; 210 226 #endif 211 227 } xfs_mount_t; 228 + 229 + #define M_IGEO(mp) (&(mp)->m_ino_geo) 212 230 213 231 /* 214 232 * Flags for m_flags. ··· 451 465 bool reserved); 452 466 extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta); 453 467 454 - extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); 468 + extern struct xfs_buf *xfs_getsb(xfs_mount_t *); 455 469 extern int xfs_readsb(xfs_mount_t *, int); 456 470 extern void xfs_freesb(xfs_mount_t *); 457 471 extern bool xfs_fs_writable(struct xfs_mount *mp, int level);
+5
fs/xfs/xfs_ondisk.h
··· 146 146 XFS_CHECK_OFFSET(struct xfs_dir3_data_hdr, hdr.magic, 0); 147 147 XFS_CHECK_OFFSET(struct xfs_dir3_free, hdr.hdr.magic, 0); 148 148 XFS_CHECK_OFFSET(struct xfs_attr3_leafblock, hdr.info.hdr, 0); 149 + 150 + XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat, 192); 151 + XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers, 24); 152 + XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req, 64); 153 + XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req, 64); 149 154 } 150 155 151 156 #endif /* __XFS_ONDISK_H */
+1 -8
fs/xfs/xfs_pnfs.c
··· 2 2 /* 3 3 * Copyright (c) 2014 Christoph Hellwig. 4 4 */ 5 - #include <linux/iomap.h> 6 5 #include "xfs.h" 6 + #include "xfs_shared.h" 7 7 #include "xfs_format.h" 8 8 #include "xfs_log_format.h" 9 9 #include "xfs_trans_resv.h" 10 - #include "xfs_sb.h" 11 10 #include "xfs_mount.h" 12 11 #include "xfs_inode.h" 13 12 #include "xfs_trans.h" 14 - #include "xfs_log.h" 15 13 #include "xfs_bmap.h" 16 - #include "xfs_bmap_util.h" 17 - #include "xfs_error.h" 18 14 #include "xfs_iomap.h" 19 - #include "xfs_shared.h" 20 - #include "xfs_bit.h" 21 - #include "xfs_pnfs.h" 22 15 23 16 /* 24 17 * Ensure that we do not have any outstanding pNFS layouts that can be used by
+136
fs/xfs/xfs_pwork.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Copyright (C) 2019 Oracle. All Rights Reserved. 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + */ 6 + #include "xfs.h" 7 + #include "xfs_fs.h" 8 + #include "xfs_shared.h" 9 + #include "xfs_format.h" 10 + #include "xfs_log_format.h" 11 + #include "xfs_trans_resv.h" 12 + #include "xfs_mount.h" 13 + #include "xfs_trace.h" 14 + #include "xfs_sysctl.h" 15 + #include "xfs_pwork.h" 16 + #include <linux/nmi.h> 17 + 18 + /* 19 + * Parallel Work Queue 20 + * =================== 21 + * 22 + * Abstract away the details of running a large and "obviously" parallelizable 23 + * task across multiple CPUs. Callers initialize the pwork control object with 24 + * a desired level of parallelization and a work function. Next, they embed 25 + * struct xfs_pwork in whatever structure they use to pass work context to a 26 + * worker thread and queue that pwork. The work function will be passed the 27 + * pwork item when it is run (from process context) and any returned error will 28 + * be recorded in xfs_pwork_ctl.error. Work functions should check for errors 29 + * and abort if necessary; the non-zeroness of xfs_pwork_ctl.error does not 30 + * stop workqueue item processing. 31 + * 32 + * This is the rough equivalent of the xfsprogs workqueue code, though we can't 33 + * reuse that name here. 34 + */ 35 + 36 + /* Invoke our caller's function. */ 37 + static void 38 + xfs_pwork_work( 39 + struct work_struct *work) 40 + { 41 + struct xfs_pwork *pwork; 42 + struct xfs_pwork_ctl *pctl; 43 + int error; 44 + 45 + pwork = container_of(work, struct xfs_pwork, work); 46 + pctl = pwork->pctl; 47 + error = pctl->work_fn(pctl->mp, pwork); 48 + if (error && !pctl->error) 49 + pctl->error = error; 50 + if (atomic_dec_and_test(&pctl->nr_work)) 51 + wake_up(&pctl->poll_wait); 52 + } 53 + 54 + /* 55 + * Set up control data for parallel work. @work_fn is the function that will 56 + * be called. @tag will be written into the kernel threads. @nr_threads is 57 + * the level of parallelism desired, or 0 for no limit. 58 + */ 59 + int 60 + xfs_pwork_init( 61 + struct xfs_mount *mp, 62 + struct xfs_pwork_ctl *pctl, 63 + xfs_pwork_work_fn work_fn, 64 + const char *tag, 65 + unsigned int nr_threads) 66 + { 67 + #ifdef DEBUG 68 + if (xfs_globals.pwork_threads >= 0) 69 + nr_threads = xfs_globals.pwork_threads; 70 + #endif 71 + trace_xfs_pwork_init(mp, nr_threads, current->pid); 72 + 73 + pctl->wq = alloc_workqueue("%s-%d", WQ_FREEZABLE, nr_threads, tag, 74 + current->pid); 75 + if (!pctl->wq) 76 + return -ENOMEM; 77 + pctl->work_fn = work_fn; 78 + pctl->error = 0; 79 + pctl->mp = mp; 80 + atomic_set(&pctl->nr_work, 0); 81 + init_waitqueue_head(&pctl->poll_wait); 82 + 83 + return 0; 84 + } 85 + 86 + /* Queue some parallel work. */ 87 + void 88 + xfs_pwork_queue( 89 + struct xfs_pwork_ctl *pctl, 90 + struct xfs_pwork *pwork) 91 + { 92 + INIT_WORK(&pwork->work, xfs_pwork_work); 93 + pwork->pctl = pctl; 94 + atomic_inc(&pctl->nr_work); 95 + queue_work(pctl->wq, &pwork->work); 96 + } 97 + 98 + /* Wait for the work to finish and tear down the control structure. */ 99 + int 100 + xfs_pwork_destroy( 101 + struct xfs_pwork_ctl *pctl) 102 + { 103 + destroy_workqueue(pctl->wq); 104 + pctl->wq = NULL; 105 + return pctl->error; 106 + } 107 + 108 + /* 109 + * Wait for the work to finish by polling completion status and touch the soft 110 + * lockup watchdog. This is for callers such as mount which hold locks. 111 + */ 112 + void 113 + xfs_pwork_poll( 114 + struct xfs_pwork_ctl *pctl) 115 + { 116 + while (wait_event_timeout(pctl->poll_wait, 117 + atomic_read(&pctl->nr_work) == 0, HZ) == 0) 118 + touch_softlockup_watchdog(); 119 + } 120 + 121 + /* 122 + * Return the amount of parallelism that the data device can handle, or 0 for 123 + * no limit. 124 + */ 125 + unsigned int 126 + xfs_pwork_guess_datadev_parallelism( 127 + struct xfs_mount *mp) 128 + { 129 + struct xfs_buftarg *btp = mp->m_ddev_targp; 130 + 131 + /* 132 + * For now we'll go with the most conservative setting possible, 133 + * which is two threads for an SSD and 1 thread everywhere else. 134 + */ 135 + return blk_queue_nonrot(btp->bt_bdev->bd_queue) ? 2 : 1; 136 + }
+61
fs/xfs/xfs_pwork.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * Copyright (C) 2019 Oracle. All Rights Reserved. 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + */ 6 + #ifndef __XFS_PWORK_H__ 7 + #define __XFS_PWORK_H__ 8 + 9 + struct xfs_pwork; 10 + struct xfs_mount; 11 + 12 + typedef int (*xfs_pwork_work_fn)(struct xfs_mount *mp, struct xfs_pwork *pwork); 13 + 14 + /* 15 + * Parallel work coordination structure. 16 + */ 17 + struct xfs_pwork_ctl { 18 + struct workqueue_struct *wq; 19 + struct xfs_mount *mp; 20 + xfs_pwork_work_fn work_fn; 21 + struct wait_queue_head poll_wait; 22 + atomic_t nr_work; 23 + int error; 24 + }; 25 + 26 + /* 27 + * Embed this parallel work control item inside your own work structure, 28 + * then queue work with it. 29 + */ 30 + struct xfs_pwork { 31 + struct work_struct work; 32 + struct xfs_pwork_ctl *pctl; 33 + }; 34 + 35 + #define XFS_PWORK_SINGLE_THREADED { .pctl = NULL } 36 + 37 + /* Have we been told to abort? */ 38 + static inline bool 39 + xfs_pwork_ctl_want_abort( 40 + struct xfs_pwork_ctl *pctl) 41 + { 42 + return pctl && pctl->error; 43 + } 44 + 45 + /* Have we been told to abort? */ 46 + static inline bool 47 + xfs_pwork_want_abort( 48 + struct xfs_pwork *pwork) 49 + { 50 + return xfs_pwork_ctl_want_abort(pwork->pctl); 51 + } 52 + 53 + int xfs_pwork_init(struct xfs_mount *mp, struct xfs_pwork_ctl *pctl, 54 + xfs_pwork_work_fn work_fn, const char *tag, 55 + unsigned int nr_threads); 56 + void xfs_pwork_queue(struct xfs_pwork_ctl *pctl, struct xfs_pwork *pwork); 57 + int xfs_pwork_destroy(struct xfs_pwork_ctl *pctl); 58 + void xfs_pwork_poll(struct xfs_pwork_ctl *pctl); 59 + unsigned int xfs_pwork_guess_datadev_parallelism(struct xfs_mount *mp); 60 + 61 + #endif /* __XFS_PWORK_H__ */
+21 -47
fs/xfs/xfs_qm.c
··· 13 13 #include "xfs_sb.h" 14 14 #include "xfs_mount.h" 15 15 #include "xfs_inode.h" 16 - #include "xfs_ialloc.h" 17 - #include "xfs_itable.h" 16 + #include "xfs_iwalk.h" 18 17 #include "xfs_quota.h" 19 - #include "xfs_error.h" 20 18 #include "xfs_bmap.h" 21 - #include "xfs_bmap_btree.h" 22 19 #include "xfs_bmap_util.h" 23 20 #include "xfs_trans.h" 24 21 #include "xfs_trans_space.h" 25 22 #include "xfs_qm.h" 26 23 #include "xfs_trace.h" 27 24 #include "xfs_icache.h" 28 - #include "xfs_cksum.h" 29 25 30 26 /* 31 27 * The global quota manager. There is only one of these for the entire ··· 1114 1118 /* ARGSUSED */ 1115 1119 STATIC int 1116 1120 xfs_qm_dqusage_adjust( 1117 - xfs_mount_t *mp, /* mount point for filesystem */ 1118 - xfs_ino_t ino, /* inode number to get data for */ 1119 - void __user *buffer, /* not used */ 1120 - int ubsize, /* not used */ 1121 - int *ubused, /* not used */ 1122 - int *res) /* result code value */ 1121 + struct xfs_mount *mp, 1122 + struct xfs_trans *tp, 1123 + xfs_ino_t ino, 1124 + void *data) 1123 1125 { 1124 - xfs_inode_t *ip; 1125 - xfs_qcnt_t nblks; 1126 - xfs_filblks_t rtblks = 0; /* total rt blks */ 1127 - int error; 1126 + struct xfs_inode *ip; 1127 + xfs_qcnt_t nblks; 1128 + xfs_filblks_t rtblks = 0; /* total rt blks */ 1129 + int error; 1128 1130 1129 1131 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 1130 1132 ··· 1130 1136 * rootino must have its resources accounted for, not so with the quota 1131 1137 * inodes. 1132 1138 */ 1133 - if (xfs_is_quota_inode(&mp->m_sb, ino)) { 1134 - *res = BULKSTAT_RV_NOTHING; 1135 - return -EINVAL; 1136 - } 1139 + if (xfs_is_quota_inode(&mp->m_sb, ino)) 1140 + return 0; 1137 1141 1138 1142 /* 1139 1143 * We don't _need_ to take the ilock EXCL here because quotacheck runs 1140 1144 * at mount time and therefore nobody will be racing chown/chproj. 1141 1145 */ 1142 - error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, 0, &ip); 1143 - if (error) { 1144 - *res = BULKSTAT_RV_NOTHING; 1146 + error = xfs_iget(mp, tp, ino, XFS_IGET_DONTCACHE, 0, &ip); 1147 + if (error == -EINVAL || error == -ENOENT) 1148 + return 0; 1149 + if (error) 1145 1150 return error; 1146 - } 1147 1151 1148 1152 ASSERT(ip->i_delayed_blks == 0); 1149 1153 ··· 1149 1157 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 1150 1158 1151 1159 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 1152 - error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); 1160 + error = xfs_iread_extents(tp, ip, XFS_DATA_FORK); 1153 1161 if (error) 1154 1162 goto error0; 1155 1163 } ··· 1192 1200 goto error0; 1193 1201 } 1194 1202 1195 - xfs_irele(ip); 1196 - *res = BULKSTAT_RV_DIDONE; 1197 - return 0; 1198 - 1199 1203 error0: 1200 1204 xfs_irele(ip); 1201 - *res = BULKSTAT_RV_GIVEUP; 1202 1205 return error; 1203 1206 } 1204 1207 ··· 1257 1270 xfs_qm_quotacheck( 1258 1271 xfs_mount_t *mp) 1259 1272 { 1260 - int done, count, error, error2; 1261 - xfs_ino_t lastino; 1262 - size_t structsz; 1273 + int error, error2; 1263 1274 uint flags; 1264 1275 LIST_HEAD (buffer_list); 1265 1276 struct xfs_inode *uip = mp->m_quotainfo->qi_uquotaip; 1266 1277 struct xfs_inode *gip = mp->m_quotainfo->qi_gquotaip; 1267 1278 struct xfs_inode *pip = mp->m_quotainfo->qi_pquotaip; 1268 1279 1269 - count = INT_MAX; 1270 - structsz = 1; 1271 - lastino = 0; 1272 1280 flags = 0; 1273 1281 1274 1282 ASSERT(uip || gip || pip); ··· 1300 1318 flags |= XFS_PQUOTA_CHKD; 1301 1319 } 1302 1320 1303 - do { 1304 - /* 1305 - * Iterate thru all the inodes in the file system, 1306 - * adjusting the corresponding dquot counters in core. 1307 - */ 1308 - error = xfs_bulkstat(mp, &lastino, &count, 1309 - xfs_qm_dqusage_adjust, 1310 - structsz, NULL, &done); 1311 - if (error) 1312 - break; 1313 - 1314 - } while (!done); 1321 + error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true, 1322 + NULL); 1323 + if (error) 1324 + goto error_return; 1315 1325 1316 1326 /* 1317 1327 * We've made all the changes that we need to make incore. Flush them
+1 -1
fs/xfs/xfs_qm_bhv.c
··· 5 5 */ 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 11 #include "xfs_trans_resv.h" 11 12 #include "xfs_quota.h" 12 13 #include "xfs_mount.h" 13 14 #include "xfs_inode.h" 14 - #include "xfs_error.h" 15 15 #include "xfs_trans.h" 16 16 #include "xfs_qm.h" 17 17
-5
fs/xfs/xfs_qm_syscalls.c
··· 4 4 * All Rights Reserved. 5 5 */ 6 6 7 - #include <linux/capability.h> 8 7 9 8 #include "xfs.h" 10 9 #include "xfs_fs.h" ··· 11 12 #include "xfs_format.h" 12 13 #include "xfs_log_format.h" 13 14 #include "xfs_trans_resv.h" 14 - #include "xfs_bit.h" 15 15 #include "xfs_sb.h" 16 16 #include "xfs_mount.h" 17 17 #include "xfs_inode.h" 18 18 #include "xfs_trans.h" 19 - #include "xfs_error.h" 20 19 #include "xfs_quota.h" 21 20 #include "xfs_qm.h" 22 - #include "xfs_trace.h" 23 21 #include "xfs_icache.h" 24 - #include "xfs_defer.h" 25 22 26 23 STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); 27 24 STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
+1 -2
fs/xfs/xfs_quotaops.c
··· 4 4 * All Rights Reserved. 5 5 */ 6 6 #include "xfs.h" 7 + #include "xfs_shared.h" 7 8 #include "xfs_format.h" 8 9 #include "xfs_log_format.h" 9 10 #include "xfs_trans_resv.h" ··· 12 11 #include "xfs_inode.h" 13 12 #include "xfs_quota.h" 14 13 #include "xfs_trans.h" 15 - #include "xfs_trace.h" 16 14 #include "xfs_icache.h" 17 15 #include "xfs_qm.h" 18 - #include <linux/quota.h> 19 16 20 17 21 18 static void
+219 -154
fs/xfs/xfs_refcount_item.c
··· 14 14 #include "xfs_defer.h" 15 15 #include "xfs_trans.h" 16 16 #include "xfs_trans_priv.h" 17 - #include "xfs_buf_item.h" 18 17 #include "xfs_refcount_item.h" 19 18 #include "xfs_log.h" 20 19 #include "xfs_refcount.h" ··· 94 95 } 95 96 96 97 /* 97 - * Pinning has no meaning for an cui item, so just return. 98 - */ 99 - STATIC void 100 - xfs_cui_item_pin( 101 - struct xfs_log_item *lip) 102 - { 103 - } 104 - 105 - /* 106 98 * The unpin operation is the last place an CUI is manipulated in the log. It is 107 99 * either inserted in the AIL or aborted in the event of a log I/O error. In 108 100 * either case, the CUI transaction has been successfully committed to make it ··· 112 122 } 113 123 114 124 /* 115 - * CUI items have no locking or pushing. However, since CUIs are pulled from 116 - * the AIL when their corresponding CUDs are committed to disk, their situation 117 - * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller 118 - * will eventually flush the log. This should help in getting the CUI out of 119 - * the AIL. 120 - */ 121 - STATIC uint 122 - xfs_cui_item_push( 123 - struct xfs_log_item *lip, 124 - struct list_head *buffer_list) 125 - { 126 - return XFS_ITEM_PINNED; 127 - } 128 - 129 - /* 130 125 * The CUI has been either committed or aborted if the transaction has been 131 126 * cancelled. If the transaction was cancelled, an CUD isn't going to be 132 127 * constructed and thus we free the CUI here directly. 133 128 */ 134 129 STATIC void 135 - xfs_cui_item_unlock( 130 + xfs_cui_item_release( 136 131 struct xfs_log_item *lip) 137 132 { 138 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) 139 - xfs_cui_release(CUI_ITEM(lip)); 133 + xfs_cui_release(CUI_ITEM(lip)); 140 134 } 141 135 142 - /* 143 - * The CUI is logged only once and cannot be moved in the log, so simply return 144 - * the lsn at which it's been logged. 145 - */ 146 - STATIC xfs_lsn_t 147 - xfs_cui_item_committed( 148 - struct xfs_log_item *lip, 149 - xfs_lsn_t lsn) 150 - { 151 - return lsn; 152 - } 153 - 154 - /* 155 - * The CUI dependency tracking op doesn't do squat. It can't because 156 - * it doesn't know where the free extent is coming from. The dependency 157 - * tracking has to be handled by the "enclosing" metadata object. For 158 - * example, for inodes, the inode is locked throughout the extent freeing 159 - * so the dependency should be recorded there. 160 - */ 161 - STATIC void 162 - xfs_cui_item_committing( 163 - struct xfs_log_item *lip, 164 - xfs_lsn_t lsn) 165 - { 166 - } 167 - 168 - /* 169 - * This is the ops vector shared by all cui log items. 170 - */ 171 136 static const struct xfs_item_ops xfs_cui_item_ops = { 172 137 .iop_size = xfs_cui_item_size, 173 138 .iop_format = xfs_cui_item_format, 174 - .iop_pin = xfs_cui_item_pin, 175 139 .iop_unpin = xfs_cui_item_unpin, 176 - .iop_unlock = xfs_cui_item_unlock, 177 - .iop_committed = xfs_cui_item_committed, 178 - .iop_push = xfs_cui_item_push, 179 - .iop_committing = xfs_cui_item_committing, 140 + .iop_release = xfs_cui_item_release, 180 141 }; 181 142 182 143 /* ··· 195 254 } 196 255 197 256 /* 198 - * Pinning has no meaning for an cud item, so just return. 199 - */ 200 - STATIC void 201 - xfs_cud_item_pin( 202 - struct xfs_log_item *lip) 203 - { 204 - } 205 - 206 - /* 207 - * Since pinning has no meaning for an cud item, unpinning does 208 - * not either. 209 - */ 210 - STATIC void 211 - xfs_cud_item_unpin( 212 - struct xfs_log_item *lip, 213 - int remove) 214 - { 215 - } 216 - 217 - /* 218 - * There isn't much you can do to push on an cud item. It is simply stuck 219 - * waiting for the log to be flushed to disk. 220 - */ 221 - STATIC uint 222 - xfs_cud_item_push( 223 - struct xfs_log_item *lip, 224 - struct list_head *buffer_list) 225 - { 226 - return XFS_ITEM_PINNED; 227 - } 228 - 229 - /* 230 257 * The CUD is either committed or aborted if the transaction is cancelled. If 231 258 * the transaction is cancelled, drop our reference to the CUI and free the 232 259 * CUD. 233 260 */ 234 261 STATIC void 235 - xfs_cud_item_unlock( 262 + xfs_cud_item_release( 236 263 struct xfs_log_item *lip) 237 264 { 238 265 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 239 266 240 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { 241 - xfs_cui_release(cudp->cud_cuip); 242 - kmem_zone_free(xfs_cud_zone, cudp); 243 - } 244 - } 245 - 246 - /* 247 - * When the cud item is committed to disk, all we need to do is delete our 248 - * reference to our partner cui item and then free ourselves. Since we're 249 - * freeing ourselves we must return -1 to keep the transaction code from 250 - * further referencing this item. 251 - */ 252 - STATIC xfs_lsn_t 253 - xfs_cud_item_committed( 254 - struct xfs_log_item *lip, 255 - xfs_lsn_t lsn) 256 - { 257 - struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 258 - 259 - /* 260 - * Drop the CUI reference regardless of whether the CUD has been 261 - * aborted. Once the CUD transaction is constructed, it is the sole 262 - * responsibility of the CUD to release the CUI (even if the CUI is 263 - * aborted due to log I/O error). 264 - */ 265 267 xfs_cui_release(cudp->cud_cuip); 266 268 kmem_zone_free(xfs_cud_zone, cudp); 267 - 268 - return (xfs_lsn_t)-1; 269 269 } 270 270 271 - /* 272 - * The CUD dependency tracking op doesn't do squat. It can't because 273 - * it doesn't know where the free extent is coming from. The dependency 274 - * tracking has to be handled by the "enclosing" metadata object. For 275 - * example, for inodes, the inode is locked throughout the extent freeing 276 - * so the dependency should be recorded there. 277 - */ 278 - STATIC void 279 - xfs_cud_item_committing( 280 - struct xfs_log_item *lip, 281 - xfs_lsn_t lsn) 282 - { 283 - } 284 - 285 - /* 286 - * This is the ops vector shared by all cud log items. 287 - */ 288 271 static const struct xfs_item_ops xfs_cud_item_ops = { 272 + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, 289 273 .iop_size = xfs_cud_item_size, 290 274 .iop_format = xfs_cud_item_format, 291 - .iop_pin = xfs_cud_item_pin, 292 - .iop_unpin = xfs_cud_item_unpin, 293 - .iop_unlock = xfs_cud_item_unlock, 294 - .iop_committed = xfs_cud_item_committed, 295 - .iop_push = xfs_cud_item_push, 296 - .iop_committing = xfs_cud_item_committing, 275 + .iop_release = xfs_cud_item_release, 297 276 }; 298 277 299 - /* 300 - * Allocate and initialize an cud item with the given number of extents. 301 - */ 302 - struct xfs_cud_log_item * 303 - xfs_cud_init( 304 - struct xfs_mount *mp, 278 + static struct xfs_cud_log_item * 279 + xfs_trans_get_cud( 280 + struct xfs_trans *tp, 305 281 struct xfs_cui_log_item *cuip) 306 - 307 282 { 308 - struct xfs_cud_log_item *cudp; 283 + struct xfs_cud_log_item *cudp; 309 284 310 285 cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP); 311 - xfs_log_item_init(mp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops); 286 + xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD, 287 + &xfs_cud_item_ops); 312 288 cudp->cud_cuip = cuip; 313 289 cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id; 314 290 291 + xfs_trans_add_item(tp, &cudp->cud_item); 315 292 return cudp; 316 293 } 294 + 295 + /* 296 + * Finish an refcount update and log it to the CUD. Note that the 297 + * transaction is marked dirty regardless of whether the refcount 298 + * update succeeds or fails to support the CUI/CUD lifecycle rules. 299 + */ 300 + static int 301 + xfs_trans_log_finish_refcount_update( 302 + struct xfs_trans *tp, 303 + struct xfs_cud_log_item *cudp, 304 + enum xfs_refcount_intent_type type, 305 + xfs_fsblock_t startblock, 306 + xfs_extlen_t blockcount, 307 + xfs_fsblock_t *new_fsb, 308 + xfs_extlen_t *new_len, 309 + struct xfs_btree_cur **pcur) 310 + { 311 + int error; 312 + 313 + error = xfs_refcount_finish_one(tp, type, startblock, 314 + blockcount, new_fsb, new_len, pcur); 315 + 316 + /* 317 + * Mark the transaction dirty, even on error. This ensures the 318 + * transaction is aborted, which: 319 + * 320 + * 1.) releases the CUI and frees the CUD 321 + * 2.) shuts down the filesystem 322 + */ 323 + tp->t_flags |= XFS_TRANS_DIRTY; 324 + set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags); 325 + 326 + return error; 327 + } 328 + 329 + /* Sort refcount intents by AG. */ 330 + static int 331 + xfs_refcount_update_diff_items( 332 + void *priv, 333 + struct list_head *a, 334 + struct list_head *b) 335 + { 336 + struct xfs_mount *mp = priv; 337 + struct xfs_refcount_intent *ra; 338 + struct xfs_refcount_intent *rb; 339 + 340 + ra = container_of(a, struct xfs_refcount_intent, ri_list); 341 + rb = container_of(b, struct xfs_refcount_intent, ri_list); 342 + return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) - 343 + XFS_FSB_TO_AGNO(mp, rb->ri_startblock); 344 + } 345 + 346 + /* Get an CUI. */ 347 + STATIC void * 348 + xfs_refcount_update_create_intent( 349 + struct xfs_trans *tp, 350 + unsigned int count) 351 + { 352 + struct xfs_cui_log_item *cuip; 353 + 354 + ASSERT(tp != NULL); 355 + ASSERT(count > 0); 356 + 357 + cuip = xfs_cui_init(tp->t_mountp, count); 358 + ASSERT(cuip != NULL); 359 + 360 + /* 361 + * Get a log_item_desc to point at the new item. 362 + */ 363 + xfs_trans_add_item(tp, &cuip->cui_item); 364 + return cuip; 365 + } 366 + 367 + /* Set the phys extent flags for this reverse mapping. */ 368 + static void 369 + xfs_trans_set_refcount_flags( 370 + struct xfs_phys_extent *refc, 371 + enum xfs_refcount_intent_type type) 372 + { 373 + refc->pe_flags = 0; 374 + switch (type) { 375 + case XFS_REFCOUNT_INCREASE: 376 + case XFS_REFCOUNT_DECREASE: 377 + case XFS_REFCOUNT_ALLOC_COW: 378 + case XFS_REFCOUNT_FREE_COW: 379 + refc->pe_flags |= type; 380 + break; 381 + default: 382 + ASSERT(0); 383 + } 384 + } 385 + 386 + /* Log refcount updates in the intent item. */ 387 + STATIC void 388 + xfs_refcount_update_log_item( 389 + struct xfs_trans *tp, 390 + void *intent, 391 + struct list_head *item) 392 + { 393 + struct xfs_cui_log_item *cuip = intent; 394 + struct xfs_refcount_intent *refc; 395 + uint next_extent; 396 + struct xfs_phys_extent *ext; 397 + 398 + refc = container_of(item, struct xfs_refcount_intent, ri_list); 399 + 400 + tp->t_flags |= XFS_TRANS_DIRTY; 401 + set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags); 402 + 403 + /* 404 + * atomic_inc_return gives us the value after the increment; 405 + * we want to use it as an array index so we need to subtract 1 from 406 + * it. 407 + */ 408 + next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1; 409 + ASSERT(next_extent < cuip->cui_format.cui_nextents); 410 + ext = &cuip->cui_format.cui_extents[next_extent]; 411 + ext->pe_startblock = refc->ri_startblock; 412 + ext->pe_len = refc->ri_blockcount; 413 + xfs_trans_set_refcount_flags(ext, refc->ri_type); 414 + } 415 + 416 + /* Get an CUD so we can process all the deferred refcount updates. */ 417 + STATIC void * 418 + xfs_refcount_update_create_done( 419 + struct xfs_trans *tp, 420 + void *intent, 421 + unsigned int count) 422 + { 423 + return xfs_trans_get_cud(tp, intent); 424 + } 425 + 426 + /* Process a deferred refcount update. */ 427 + STATIC int 428 + xfs_refcount_update_finish_item( 429 + struct xfs_trans *tp, 430 + struct list_head *item, 431 + void *done_item, 432 + void **state) 433 + { 434 + struct xfs_refcount_intent *refc; 435 + xfs_fsblock_t new_fsb; 436 + xfs_extlen_t new_aglen; 437 + int error; 438 + 439 + refc = container_of(item, struct xfs_refcount_intent, ri_list); 440 + error = xfs_trans_log_finish_refcount_update(tp, done_item, 441 + refc->ri_type, 442 + refc->ri_startblock, 443 + refc->ri_blockcount, 444 + &new_fsb, &new_aglen, 445 + (struct xfs_btree_cur **)state); 446 + /* Did we run out of reservation? Requeue what we didn't finish. */ 447 + if (!error && new_aglen > 0) { 448 + ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE || 449 + refc->ri_type == XFS_REFCOUNT_DECREASE); 450 + refc->ri_startblock = new_fsb; 451 + refc->ri_blockcount = new_aglen; 452 + return -EAGAIN; 453 + } 454 + kmem_free(refc); 455 + return error; 456 + } 457 + 458 + /* Clean up after processing deferred refcounts. */ 459 + STATIC void 460 + xfs_refcount_update_finish_cleanup( 461 + struct xfs_trans *tp, 462 + void *state, 463 + int error) 464 + { 465 + struct xfs_btree_cur *rcur = state; 466 + 467 + xfs_refcount_finish_one_cleanup(tp, rcur, error); 468 + } 469 + 470 + /* Abort all pending CUIs. */ 471 + STATIC void 472 + xfs_refcount_update_abort_intent( 473 + void *intent) 474 + { 475 + xfs_cui_release(intent); 476 + } 477 + 478 + /* Cancel a deferred refcount update. */ 479 + STATIC void 480 + xfs_refcount_update_cancel_item( 481 + struct list_head *item) 482 + { 483 + struct xfs_refcount_intent *refc; 484 + 485 + refc = container_of(item, struct xfs_refcount_intent, ri_list); 486 + kmem_free(refc); 487 + } 488 + 489 + const struct xfs_defer_op_type xfs_refcount_update_defer_type = { 490 + .max_items = XFS_CUI_MAX_FAST_EXTENTS, 491 + .diff_items = xfs_refcount_update_diff_items, 492 + .create_intent = xfs_refcount_update_create_intent, 493 + .abort_intent = xfs_refcount_update_abort_intent, 494 + .log_item = xfs_refcount_update_log_item, 495 + .create_done = xfs_refcount_update_create_done, 496 + .finish_item = xfs_refcount_update_finish_item, 497 + .finish_cleanup = xfs_refcount_update_finish_cleanup, 498 + .cancel_item = xfs_refcount_update_cancel_item, 499 + }; 317 500 318 501 /* 319 502 * Process a refcount update intent item that was recovered from the log.
-2
fs/xfs/xfs_refcount_item.h
··· 78 78 extern struct kmem_zone *xfs_cud_zone; 79 79 80 80 struct xfs_cui_log_item *xfs_cui_init(struct xfs_mount *, uint); 81 - struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *, 82 - struct xfs_cui_log_item *); 83 81 void xfs_cui_item_free(struct xfs_cui_log_item *); 84 82 void xfs_cui_release(struct xfs_cui_log_item *); 85 83 int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip);
+2 -13
fs/xfs/xfs_reflink.c
··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 13 #include "xfs_defer.h" 14 - #include "xfs_da_format.h" 15 - #include "xfs_da_btree.h" 16 14 #include "xfs_inode.h" 17 15 #include "xfs_trans.h" 18 - #include "xfs_inode_item.h" 19 16 #include "xfs_bmap.h" 20 17 #include "xfs_bmap_util.h" 21 - #include "xfs_error.h" 22 - #include "xfs_dir2.h" 23 - #include "xfs_dir2_priv.h" 24 - #include "xfs_ioctl.h" 25 18 #include "xfs_trace.h" 26 - #include "xfs_log.h" 27 19 #include "xfs_icache.h" 28 - #include "xfs_pnfs.h" 29 20 #include "xfs_btree.h" 30 21 #include "xfs_refcount_btree.h" 31 22 #include "xfs_refcount.h" ··· 24 33 #include "xfs_trans_space.h" 25 34 #include "xfs_bit.h" 26 35 #include "xfs_alloc.h" 27 - #include "xfs_quota_defs.h" 28 36 #include "xfs_quota.h" 29 37 #include "xfs_reflink.h" 30 38 #include "xfs_iomap.h" 31 - #include "xfs_rmap_btree.h" 32 39 #include "xfs_sb.h" 33 40 #include "xfs_ag_resv.h" 34 41 ··· 561 572 562 573 /* Start a rolling transaction to remove the mappings */ 563 574 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, 564 - 0, 0, XFS_TRANS_NOFS, &tp); 575 + 0, 0, 0, &tp); 565 576 if (error) 566 577 goto out; 567 578 ··· 620 631 621 632 resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); 622 633 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 623 - XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp); 634 + XFS_TRANS_RESERVE, &tp); 624 635 if (error) 625 636 return error; 626 637
+241 -155
fs/xfs/xfs_rmap_item.c
··· 14 14 #include "xfs_defer.h" 15 15 #include "xfs_trans.h" 16 16 #include "xfs_trans_priv.h" 17 - #include "xfs_buf_item.h" 18 17 #include "xfs_rmap_item.h" 19 18 #include "xfs_log.h" 20 19 #include "xfs_rmap.h" ··· 93 94 } 94 95 95 96 /* 96 - * Pinning has no meaning for an rui item, so just return. 97 - */ 98 - STATIC void 99 - xfs_rui_item_pin( 100 - struct xfs_log_item *lip) 101 - { 102 - } 103 - 104 - /* 105 97 * The unpin operation is the last place an RUI is manipulated in the log. It is 106 98 * either inserted in the AIL or aborted in the event of a log I/O error. In 107 99 * either case, the RUI transaction has been successfully committed to make it ··· 111 121 } 112 122 113 123 /* 114 - * RUI items have no locking or pushing. However, since RUIs are pulled from 115 - * the AIL when their corresponding RUDs are committed to disk, their situation 116 - * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller 117 - * will eventually flush the log. This should help in getting the RUI out of 118 - * the AIL. 119 - */ 120 - STATIC uint 121 - xfs_rui_item_push( 122 - struct xfs_log_item *lip, 123 - struct list_head *buffer_list) 124 - { 125 - return XFS_ITEM_PINNED; 126 - } 127 - 128 - /* 129 124 * The RUI has been either committed or aborted if the transaction has been 130 125 * cancelled. If the transaction was cancelled, an RUD isn't going to be 131 126 * constructed and thus we free the RUI here directly. 132 127 */ 133 128 STATIC void 134 - xfs_rui_item_unlock( 129 + xfs_rui_item_release( 135 130 struct xfs_log_item *lip) 136 131 { 137 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) 138 - xfs_rui_release(RUI_ITEM(lip)); 132 + xfs_rui_release(RUI_ITEM(lip)); 139 133 } 140 134 141 - /* 142 - * The RUI is logged only once and cannot be moved in the log, so simply return 143 - * the lsn at which it's been logged. 144 - */ 145 - STATIC xfs_lsn_t 146 - xfs_rui_item_committed( 147 - struct xfs_log_item *lip, 148 - xfs_lsn_t lsn) 149 - { 150 - return lsn; 151 - } 152 - 153 - /* 154 - * The RUI dependency tracking op doesn't do squat. It can't because 155 - * it doesn't know where the free extent is coming from. The dependency 156 - * tracking has to be handled by the "enclosing" metadata object. For 157 - * example, for inodes, the inode is locked throughout the extent freeing 158 - * so the dependency should be recorded there. 159 - */ 160 - STATIC void 161 - xfs_rui_item_committing( 162 - struct xfs_log_item *lip, 163 - xfs_lsn_t lsn) 164 - { 165 - } 166 - 167 - /* 168 - * This is the ops vector shared by all rui log items. 169 - */ 170 135 static const struct xfs_item_ops xfs_rui_item_ops = { 171 136 .iop_size = xfs_rui_item_size, 172 137 .iop_format = xfs_rui_item_format, 173 - .iop_pin = xfs_rui_item_pin, 174 138 .iop_unpin = xfs_rui_item_unpin, 175 - .iop_unlock = xfs_rui_item_unlock, 176 - .iop_committed = xfs_rui_item_committed, 177 - .iop_push = xfs_rui_item_push, 178 - .iop_committing = xfs_rui_item_committing, 139 + .iop_release = xfs_rui_item_release, 179 140 }; 180 141 181 142 /* ··· 216 275 } 217 276 218 277 /* 219 - * Pinning has no meaning for an rud item, so just return. 220 - */ 221 - STATIC void 222 - xfs_rud_item_pin( 223 - struct xfs_log_item *lip) 224 - { 225 - } 226 - 227 - /* 228 - * Since pinning has no meaning for an rud item, unpinning does 229 - * not either. 230 - */ 231 - STATIC void 232 - xfs_rud_item_unpin( 233 - struct xfs_log_item *lip, 234 - int remove) 235 - { 236 - } 237 - 238 - /* 239 - * There isn't much you can do to push on an rud item. It is simply stuck 240 - * waiting for the log to be flushed to disk. 241 - */ 242 - STATIC uint 243 - xfs_rud_item_push( 244 - struct xfs_log_item *lip, 245 - struct list_head *buffer_list) 246 - { 247 - return XFS_ITEM_PINNED; 248 - } 249 - 250 - /* 251 278 * The RUD is either committed or aborted if the transaction is cancelled. If 252 279 * the transaction is cancelled, drop our reference to the RUI and free the 253 280 * RUD. 254 281 */ 255 282 STATIC void 256 - xfs_rud_item_unlock( 283 + xfs_rud_item_release( 257 284 struct xfs_log_item *lip) 258 285 { 259 286 struct xfs_rud_log_item *rudp = RUD_ITEM(lip); 260 287 261 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { 262 - xfs_rui_release(rudp->rud_ruip); 263 - kmem_zone_free(xfs_rud_zone, rudp); 288 + xfs_rui_release(rudp->rud_ruip); 289 + kmem_zone_free(xfs_rud_zone, rudp); 290 + } 291 + 292 + static const struct xfs_item_ops xfs_rud_item_ops = { 293 + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, 294 + .iop_size = xfs_rud_item_size, 295 + .iop_format = xfs_rud_item_format, 296 + .iop_release = xfs_rud_item_release, 297 + }; 298 + 299 + static struct xfs_rud_log_item * 300 + xfs_trans_get_rud( 301 + struct xfs_trans *tp, 302 + struct xfs_rui_log_item *ruip) 303 + { 304 + struct xfs_rud_log_item *rudp; 305 + 306 + rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP); 307 + xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD, 308 + &xfs_rud_item_ops); 309 + rudp->rud_ruip = ruip; 310 + rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id; 311 + 312 + xfs_trans_add_item(tp, &rudp->rud_item); 313 + return rudp; 314 + } 315 + 316 + /* Set the map extent flags for this reverse mapping. */ 317 + static void 318 + xfs_trans_set_rmap_flags( 319 + struct xfs_map_extent *rmap, 320 + enum xfs_rmap_intent_type type, 321 + int whichfork, 322 + xfs_exntst_t state) 323 + { 324 + rmap->me_flags = 0; 325 + if (state == XFS_EXT_UNWRITTEN) 326 + rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN; 327 + if (whichfork == XFS_ATTR_FORK) 328 + rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK; 329 + switch (type) { 330 + case XFS_RMAP_MAP: 331 + rmap->me_flags |= XFS_RMAP_EXTENT_MAP; 332 + break; 333 + case XFS_RMAP_MAP_SHARED: 334 + rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED; 335 + break; 336 + case XFS_RMAP_UNMAP: 337 + rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP; 338 + break; 339 + case XFS_RMAP_UNMAP_SHARED: 340 + rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED; 341 + break; 342 + case XFS_RMAP_CONVERT: 343 + rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT; 344 + break; 345 + case XFS_RMAP_CONVERT_SHARED: 346 + rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED; 347 + break; 348 + case XFS_RMAP_ALLOC: 349 + rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC; 350 + break; 351 + case XFS_RMAP_FREE: 352 + rmap->me_flags |= XFS_RMAP_EXTENT_FREE; 353 + break; 354 + default: 355 + ASSERT(0); 264 356 } 265 357 } 266 358 267 359 /* 268 - * When the rud item is committed to disk, all we need to do is delete our 269 - * reference to our partner rui item and then free ourselves. Since we're 270 - * freeing ourselves we must return -1 to keep the transaction code from 271 - * further referencing this item. 360 + * Finish an rmap update and log it to the RUD. Note that the transaction is 361 + * marked dirty regardless of whether the rmap update succeeds or fails to 362 + * support the RUI/RUD lifecycle rules. 272 363 */ 273 - STATIC xfs_lsn_t 274 - xfs_rud_item_committed( 275 - struct xfs_log_item *lip, 276 - xfs_lsn_t lsn) 364 + static int 365 + xfs_trans_log_finish_rmap_update( 366 + struct xfs_trans *tp, 367 + struct xfs_rud_log_item *rudp, 368 + enum xfs_rmap_intent_type type, 369 + uint64_t owner, 370 + int whichfork, 371 + xfs_fileoff_t startoff, 372 + xfs_fsblock_t startblock, 373 + xfs_filblks_t blockcount, 374 + xfs_exntst_t state, 375 + struct xfs_btree_cur **pcur) 277 376 { 278 - struct xfs_rud_log_item *rudp = RUD_ITEM(lip); 377 + int error; 378 + 379 + error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff, 380 + startblock, blockcount, state, pcur); 279 381 280 382 /* 281 - * Drop the RUI reference regardless of whether the RUD has been 282 - * aborted. Once the RUD transaction is constructed, it is the sole 283 - * responsibility of the RUD to release the RUI (even if the RUI is 284 - * aborted due to log I/O error). 383 + * Mark the transaction dirty, even on error. This ensures the 384 + * transaction is aborted, which: 385 + * 386 + * 1.) releases the RUI and frees the RUD 387 + * 2.) shuts down the filesystem 285 388 */ 286 - xfs_rui_release(rudp->rud_ruip); 287 - kmem_zone_free(xfs_rud_zone, rudp); 389 + tp->t_flags |= XFS_TRANS_DIRTY; 390 + set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags); 288 391 289 - return (xfs_lsn_t)-1; 392 + return error; 290 393 } 291 394 292 - /* 293 - * The RUD dependency tracking op doesn't do squat. It can't because 294 - * it doesn't know where the free extent is coming from. The dependency 295 - * tracking has to be handled by the "enclosing" metadata object. For 296 - * example, for inodes, the inode is locked throughout the extent freeing 297 - * so the dependency should be recorded there. 298 - */ 395 + /* Sort rmap intents by AG. */ 396 + static int 397 + xfs_rmap_update_diff_items( 398 + void *priv, 399 + struct list_head *a, 400 + struct list_head *b) 401 + { 402 + struct xfs_mount *mp = priv; 403 + struct xfs_rmap_intent *ra; 404 + struct xfs_rmap_intent *rb; 405 + 406 + ra = container_of(a, struct xfs_rmap_intent, ri_list); 407 + rb = container_of(b, struct xfs_rmap_intent, ri_list); 408 + return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) - 409 + XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock); 410 + } 411 + 412 + /* Get an RUI. */ 413 + STATIC void * 414 + xfs_rmap_update_create_intent( 415 + struct xfs_trans *tp, 416 + unsigned int count) 417 + { 418 + struct xfs_rui_log_item *ruip; 419 + 420 + ASSERT(tp != NULL); 421 + ASSERT(count > 0); 422 + 423 + ruip = xfs_rui_init(tp->t_mountp, count); 424 + ASSERT(ruip != NULL); 425 + 426 + /* 427 + * Get a log_item_desc to point at the new item. 428 + */ 429 + xfs_trans_add_item(tp, &ruip->rui_item); 430 + return ruip; 431 + } 432 + 433 + /* Log rmap updates in the intent item. */ 299 434 STATIC void 300 - xfs_rud_item_committing( 301 - struct xfs_log_item *lip, 302 - xfs_lsn_t lsn) 435 + xfs_rmap_update_log_item( 436 + struct xfs_trans *tp, 437 + void *intent, 438 + struct list_head *item) 303 439 { 440 + struct xfs_rui_log_item *ruip = intent; 441 + struct xfs_rmap_intent *rmap; 442 + uint next_extent; 443 + struct xfs_map_extent *map; 444 + 445 + rmap = container_of(item, struct xfs_rmap_intent, ri_list); 446 + 447 + tp->t_flags |= XFS_TRANS_DIRTY; 448 + set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags); 449 + 450 + /* 451 + * atomic_inc_return gives us the value after the increment; 452 + * we want to use it as an array index so we need to subtract 1 from 453 + * it. 454 + */ 455 + next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1; 456 + ASSERT(next_extent < ruip->rui_format.rui_nextents); 457 + map = &ruip->rui_format.rui_extents[next_extent]; 458 + map->me_owner = rmap->ri_owner; 459 + map->me_startblock = rmap->ri_bmap.br_startblock; 460 + map->me_startoff = rmap->ri_bmap.br_startoff; 461 + map->me_len = rmap->ri_bmap.br_blockcount; 462 + xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork, 463 + rmap->ri_bmap.br_state); 304 464 } 305 465 306 - /* 307 - * This is the ops vector shared by all rud log items. 308 - */ 309 - static const struct xfs_item_ops xfs_rud_item_ops = { 310 - .iop_size = xfs_rud_item_size, 311 - .iop_format = xfs_rud_item_format, 312 - .iop_pin = xfs_rud_item_pin, 313 - .iop_unpin = xfs_rud_item_unpin, 314 - .iop_unlock = xfs_rud_item_unlock, 315 - .iop_committed = xfs_rud_item_committed, 316 - .iop_push = xfs_rud_item_push, 317 - .iop_committing = xfs_rud_item_committing, 466 + /* Get an RUD so we can process all the deferred rmap updates. */ 467 + STATIC void * 468 + xfs_rmap_update_create_done( 469 + struct xfs_trans *tp, 470 + void *intent, 471 + unsigned int count) 472 + { 473 + return xfs_trans_get_rud(tp, intent); 474 + } 475 + 476 + /* Process a deferred rmap update. */ 477 + STATIC int 478 + xfs_rmap_update_finish_item( 479 + struct xfs_trans *tp, 480 + struct list_head *item, 481 + void *done_item, 482 + void **state) 483 + { 484 + struct xfs_rmap_intent *rmap; 485 + int error; 486 + 487 + rmap = container_of(item, struct xfs_rmap_intent, ri_list); 488 + error = xfs_trans_log_finish_rmap_update(tp, done_item, 489 + rmap->ri_type, 490 + rmap->ri_owner, rmap->ri_whichfork, 491 + rmap->ri_bmap.br_startoff, 492 + rmap->ri_bmap.br_startblock, 493 + rmap->ri_bmap.br_blockcount, 494 + rmap->ri_bmap.br_state, 495 + (struct xfs_btree_cur **)state); 496 + kmem_free(rmap); 497 + return error; 498 + } 499 + 500 + /* Clean up after processing deferred rmaps. */ 501 + STATIC void 502 + xfs_rmap_update_finish_cleanup( 503 + struct xfs_trans *tp, 504 + void *state, 505 + int error) 506 + { 507 + struct xfs_btree_cur *rcur = state; 508 + 509 + xfs_rmap_finish_one_cleanup(tp, rcur, error); 510 + } 511 + 512 + /* Abort all pending RUIs. */ 513 + STATIC void 514 + xfs_rmap_update_abort_intent( 515 + void *intent) 516 + { 517 + xfs_rui_release(intent); 518 + } 519 + 520 + /* Cancel a deferred rmap update. */ 521 + STATIC void 522 + xfs_rmap_update_cancel_item( 523 + struct list_head *item) 524 + { 525 + struct xfs_rmap_intent *rmap; 526 + 527 + rmap = container_of(item, struct xfs_rmap_intent, ri_list); 528 + kmem_free(rmap); 529 + } 530 + 531 + const struct xfs_defer_op_type xfs_rmap_update_defer_type = { 532 + .max_items = XFS_RUI_MAX_FAST_EXTENTS, 533 + .diff_items = xfs_rmap_update_diff_items, 534 + .create_intent = xfs_rmap_update_create_intent, 535 + .abort_intent = xfs_rmap_update_abort_intent, 536 + .log_item = xfs_rmap_update_log_item, 537 + .create_done = xfs_rmap_update_create_done, 538 + .finish_item = xfs_rmap_update_finish_item, 539 + .finish_cleanup = xfs_rmap_update_finish_cleanup, 540 + .cancel_item = xfs_rmap_update_cancel_item, 318 541 }; 319 - 320 - /* 321 - * Allocate and initialize an rud item with the given number of extents. 322 - */ 323 - struct xfs_rud_log_item * 324 - xfs_rud_init( 325 - struct xfs_mount *mp, 326 - struct xfs_rui_log_item *ruip) 327 - 328 - { 329 - struct xfs_rud_log_item *rudp; 330 - 331 - rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP); 332 - xfs_log_item_init(mp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops); 333 - rudp->rud_ruip = ruip; 334 - rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id; 335 - 336 - return rudp; 337 - } 338 542 339 543 /* 340 544 * Process an rmap update intent item that was recovered from the log.
-2
fs/xfs/xfs_rmap_item.h
··· 78 78 extern struct kmem_zone *xfs_rud_zone; 79 79 80 80 struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint); 81 - struct xfs_rud_log_item *xfs_rud_init(struct xfs_mount *, 82 - struct xfs_rui_log_item *); 83 81 int xfs_rui_copy_format(struct xfs_log_iovec *buf, 84 82 struct xfs_rui_log_format *dst_rui_fmt); 85 83 void xfs_rui_item_free(struct xfs_rui_log_item *);
-6
fs/xfs/xfs_rtalloc.c
··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_bit.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_defer.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_bmap.h" 17 - #include "xfs_bmap_util.h" 18 16 #include "xfs_bmap_btree.h" 19 - #include "xfs_alloc.h" 20 - #include "xfs_error.h" 21 17 #include "xfs_trans.h" 22 18 #include "xfs_trans_space.h" 23 - #include "xfs_trace.h" 24 - #include "xfs_buf.h" 25 19 #include "xfs_icache.h" 26 20 #include "xfs_rtalloc.h" 27 21
-1
fs/xfs/xfs_stats.c
··· 4 4 * All Rights Reserved. 5 5 */ 6 6 #include "xfs.h" 7 - #include <linux/proc_fs.h> 8 7 9 8 struct xstats xfsstats; 10 9
+6 -26
fs/xfs/xfs_super.c
··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_sb.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_da_format.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_btree.h" 17 16 #include "xfs_bmap.h" 18 17 #include "xfs_alloc.h" 19 - #include "xfs_error.h" 20 18 #include "xfs_fsops.h" 21 19 #include "xfs_trans.h" 22 20 #include "xfs_buf_item.h" 23 21 #include "xfs_log.h" 24 22 #include "xfs_log_priv.h" 25 - #include "xfs_da_btree.h" 26 23 #include "xfs_dir2.h" 27 24 #include "xfs_extfree_item.h" 28 25 #include "xfs_mru_cache.h" ··· 35 38 #include "xfs_refcount_item.h" 36 39 #include "xfs_bmap_item.h" 37 40 #include "xfs_reflink.h" 38 - #include "xfs_defer.h" 39 41 40 - #include <linux/namei.h> 41 - #include <linux/dax.h> 42 - #include <linux/init.h> 43 - #include <linux/slab.h> 44 42 #include <linux/magic.h> 45 - #include <linux/mount.h> 46 - #include <linux/mempool.h> 47 - #include <linux/writeback.h> 48 - #include <linux/kthread.h> 49 - #include <linux/freezer.h> 50 43 #include <linux/parser.h> 51 44 52 45 static const struct super_operations xfs_super_operations; ··· 569 582 * Calculate how much should be reserved for inodes to meet 570 583 * the max inode percentage. Used only for inode32. 571 584 */ 572 - if (mp->m_maxicount) { 585 + if (M_IGEO(mp)->maxicount) { 573 586 uint64_t icount; 574 587 575 588 icount = sbp->sb_dblocks * sbp->sb_imax_pct; ··· 827 840 if (!mp->m_reclaim_workqueue) 828 841 goto out_destroy_cil; 829 842 830 - mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", 831 - WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0, 832 - mp->m_fsname); 833 - if (!mp->m_log_workqueue) 834 - goto out_destroy_reclaim; 835 - 836 843 mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", 837 844 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); 838 845 if (!mp->m_eofblocks_workqueue) 839 - goto out_destroy_log; 846 + goto out_destroy_reclaim; 840 847 841 848 mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0, 842 849 mp->m_fsname); ··· 841 860 842 861 out_destroy_eofb: 843 862 destroy_workqueue(mp->m_eofblocks_workqueue); 844 - out_destroy_log: 845 - destroy_workqueue(mp->m_log_workqueue); 846 863 out_destroy_reclaim: 847 864 destroy_workqueue(mp->m_reclaim_workqueue); 848 865 out_destroy_cil: ··· 859 880 { 860 881 destroy_workqueue(mp->m_sync_workqueue); 861 882 destroy_workqueue(mp->m_eofblocks_workqueue); 862 - destroy_workqueue(mp->m_log_workqueue); 863 883 destroy_workqueue(mp->m_reclaim_workqueue); 864 884 destroy_workqueue(mp->m_cil_workqueue); 865 885 destroy_workqueue(mp->m_unwritten_workqueue); ··· 1109 1131 1110 1132 fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree); 1111 1133 statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); 1112 - if (mp->m_maxicount) 1134 + if (M_IGEO(mp)->maxicount) 1113 1135 statp->f_files = min_t(typeof(statp->f_files), 1114 1136 statp->f_files, 1115 - mp->m_maxicount); 1137 + M_IGEO(mp)->maxicount); 1116 1138 1117 1139 /* If sb_icount overshot maxicount, report actual allocation */ 1118 1140 statp->f_files = max_t(typeof(statp->f_files), ··· 1663 1685 sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); 1664 1686 sb->s_max_links = XFS_MAXLINK; 1665 1687 sb->s_time_gran = 1; 1688 + sb->s_iflags |= SB_I_CGROUPWB; 1689 + 1666 1690 set_posix_acl_flag(sb); 1667 1691 1668 1692 /* version 5 superblocks support inode version counters. */
+14
fs/xfs/xfs_super.h
··· 38 38 # define XFS_SCRUB_STRING 39 39 #endif 40 40 41 + #ifdef CONFIG_XFS_ONLINE_REPAIR 42 + # define XFS_REPAIR_STRING "repair, " 43 + #else 44 + # define XFS_REPAIR_STRING 45 + #endif 46 + 47 + #ifdef CONFIG_XFS_WARN 48 + # define XFS_WARN_STRING "verbose warnings, " 49 + #else 50 + # define XFS_WARN_STRING 51 + #endif 52 + 41 53 #ifdef DEBUG 42 54 # define XFS_DBG_STRING "debug" 43 55 #else ··· 61 49 XFS_SECURITY_STRING \ 62 50 XFS_REALTIME_STRING \ 63 51 XFS_SCRUB_STRING \ 52 + XFS_REPAIR_STRING \ 53 + XFS_WARN_STRING \ 64 54 XFS_DBG_STRING /* DBG must be last */ 65 55 66 56 struct xfs_inode;
-9
fs/xfs/xfs_symlink.c
··· 12 12 #include "xfs_trans_resv.h" 13 13 #include "xfs_bit.h" 14 14 #include "xfs_mount.h" 15 - #include "xfs_da_format.h" 16 - #include "xfs_da_btree.h" 17 - #include "xfs_defer.h" 18 15 #include "xfs_dir2.h" 19 16 #include "xfs_inode.h" 20 - #include "xfs_ialloc.h" 21 - #include "xfs_alloc.h" 22 17 #include "xfs_bmap.h" 23 18 #include "xfs_bmap_btree.h" 24 - #include "xfs_bmap_util.h" 25 - #include "xfs_error.h" 26 19 #include "xfs_quota.h" 27 20 #include "xfs_trans_space.h" 28 21 #include "xfs_trace.h" 29 - #include "xfs_symlink.h" 30 22 #include "xfs_trans.h" 31 - #include "xfs_log.h" 32 23 33 24 /* ----- Kernel only functions below ----- */ 34 25 int
-3
fs/xfs/xfs_sysctl.c
··· 4 4 * All Rights Reserved. 5 5 */ 6 6 #include "xfs.h" 7 - #include <linux/sysctl.h> 8 - #include <linux/proc_fs.h> 9 7 #include "xfs_error.h" 10 - #include "xfs_stats.h" 11 8 12 9 static struct ctl_table_header *xfs_table_header; 13 10
+3
fs/xfs/xfs_sysctl.h
··· 82 82 extern xfs_param_t xfs_params; 83 83 84 84 struct xfs_globals { 85 + #ifdef DEBUG 86 + int pwork_threads; /* parallel workqueue threads */ 87 + #endif 85 88 int log_recovery_delay; /* log recovery delay (secs) */ 86 89 int mount_delay; /* mount setup delay (secs) */ 87 90 bool bug_on_assert; /* BUG() the kernel on assert failure */
+40 -2
fs/xfs/xfs_sysfs.c
··· 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_sysfs.h" 13 - #include "xfs_log.h" 14 13 #include "xfs_log_priv.h" 15 - #include "xfs_stats.h" 16 14 #include "xfs_mount.h" 17 15 18 16 struct xfs_sysfs_attr { ··· 204 206 } 205 207 XFS_SYSFS_ATTR_RW(always_cow); 206 208 209 + #ifdef DEBUG 210 + /* 211 + * Override how many threads the parallel work queue is allowed to create. 212 + * This has to be a debug-only global (instead of an errortag) because one of 213 + * the main users of parallel workqueues is mount time quotacheck. 214 + */ 215 + STATIC ssize_t 216 + pwork_threads_store( 217 + struct kobject *kobject, 218 + const char *buf, 219 + size_t count) 220 + { 221 + int ret; 222 + int val; 223 + 224 + ret = kstrtoint(buf, 0, &val); 225 + if (ret) 226 + return ret; 227 + 228 + if (val < -1 || val > num_possible_cpus()) 229 + return -EINVAL; 230 + 231 + xfs_globals.pwork_threads = val; 232 + 233 + return count; 234 + } 235 + 236 + STATIC ssize_t 237 + pwork_threads_show( 238 + struct kobject *kobject, 239 + char *buf) 240 + { 241 + return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.pwork_threads); 242 + } 243 + XFS_SYSFS_ATTR_RW(pwork_threads); 244 + #endif /* DEBUG */ 245 + 207 246 static struct attribute *xfs_dbg_attrs[] = { 208 247 ATTR_LIST(bug_on_assert), 209 248 ATTR_LIST(log_recovery_delay), 210 249 ATTR_LIST(mount_delay), 211 250 ATTR_LIST(always_cow), 251 + #ifdef DEBUG 252 + ATTR_LIST(pwork_threads), 253 + #endif 212 254 NULL, 213 255 }; 214 256
-8
fs/xfs/xfs_trace.c
··· 15 15 #include "xfs_inode.h" 16 16 #include "xfs_btree.h" 17 17 #include "xfs_da_btree.h" 18 - #include "xfs_ialloc.h" 19 - #include "xfs_itable.h" 20 18 #include "xfs_alloc.h" 21 19 #include "xfs_bmap.h" 22 20 #include "xfs_attr.h" 23 - #include "xfs_attr_leaf.h" 24 21 #include "xfs_trans.h" 25 - #include "xfs_log.h" 26 22 #include "xfs_log_priv.h" 27 23 #include "xfs_buf_item.h" 28 24 #include "xfs_quota.h" 29 - #include "xfs_iomap.h" 30 - #include "xfs_aops.h" 31 25 #include "xfs_dquot_item.h" 32 26 #include "xfs_dquot.h" 33 27 #include "xfs_log_recover.h" 34 - #include "xfs_inode_item.h" 35 - #include "xfs_bmap_btree.h" 36 28 #include "xfs_filestream.h" 37 29 #include "xfs_fsmap.h" 38 30
+60 -1
fs/xfs/xfs_trace.h
··· 475 475 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); 476 476 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); 477 477 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); 478 - DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); 478 + DEFINE_BUF_ITEM_EVENT(xfs_buf_item_release); 479 479 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); 480 480 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); 481 481 DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); ··· 3360 3360 DEFINE_TRANS_EVENT(xfs_trans_free); 3361 3361 DEFINE_TRANS_EVENT(xfs_trans_roll); 3362 3362 DEFINE_TRANS_EVENT(xfs_trans_add_item); 3363 + DEFINE_TRANS_EVENT(xfs_trans_commit_items); 3363 3364 DEFINE_TRANS_EVENT(xfs_trans_free_items); 3364 3365 3365 3366 TRACE_EVENT(xfs_iunlink_update_bucket, ··· 3516 3515 TP_ARGS(ip, flags)) 3517 3516 DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_sick); 3518 3517 DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_healthy); 3518 + 3519 + TRACE_EVENT(xfs_iwalk_ag, 3520 + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 3521 + xfs_agino_t startino), 3522 + TP_ARGS(mp, agno, startino), 3523 + TP_STRUCT__entry( 3524 + __field(dev_t, dev) 3525 + __field(xfs_agnumber_t, agno) 3526 + __field(xfs_agino_t, startino) 3527 + ), 3528 + TP_fast_assign( 3529 + __entry->dev = mp->m_super->s_dev; 3530 + __entry->agno = agno; 3531 + __entry->startino = startino; 3532 + ), 3533 + TP_printk("dev %d:%d agno %d startino %u", 3534 + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, 3535 + __entry->startino) 3536 + ) 3537 + 3538 + TRACE_EVENT(xfs_iwalk_ag_rec, 3539 + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 3540 + struct xfs_inobt_rec_incore *irec), 3541 + TP_ARGS(mp, agno, irec), 3542 + TP_STRUCT__entry( 3543 + __field(dev_t, dev) 3544 + __field(xfs_agnumber_t, agno) 3545 + __field(xfs_agino_t, startino) 3546 + __field(uint64_t, freemask) 3547 + ), 3548 + TP_fast_assign( 3549 + __entry->dev = mp->m_super->s_dev; 3550 + __entry->agno = agno; 3551 + __entry->startino = irec->ir_startino; 3552 + __entry->freemask = irec->ir_free; 3553 + ), 3554 + TP_printk("dev %d:%d agno %d startino %u freemask 0x%llx", 3555 + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, 3556 + __entry->startino, __entry->freemask) 3557 + ) 3558 + 3559 + TRACE_EVENT(xfs_pwork_init, 3560 + TP_PROTO(struct xfs_mount *mp, unsigned int nr_threads, pid_t pid), 3561 + TP_ARGS(mp, nr_threads, pid), 3562 + TP_STRUCT__entry( 3563 + __field(dev_t, dev) 3564 + __field(unsigned int, nr_threads) 3565 + __field(pid_t, pid) 3566 + ), 3567 + TP_fast_assign( 3568 + __entry->dev = mp->m_super->s_dev; 3569 + __entry->nr_threads = nr_threads; 3570 + __entry->pid = pid; 3571 + ), 3572 + TP_printk("dev %d:%d nr_threads %u pid %u", 3573 + MAJOR(__entry->dev), MINOR(__entry->dev), 3574 + __entry->nr_threads, __entry->pid) 3575 + ) 3519 3576 3520 3577 #endif /* _TRACE_XFS_H */ 3521 3578
+25 -18
fs/xfs/xfs_trans.c
··· 11 11 #include "xfs_log_format.h" 12 12 #include "xfs_trans_resv.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_inode.h" 15 14 #include "xfs_extent_busy.h" 16 15 #include "xfs_quota.h" 17 16 #include "xfs_trans.h" ··· 263 264 * GFP_NOFS allocation context so that we avoid lockdep false positives 264 265 * by doing GFP_KERNEL allocations inside sb_start_intwrite(). 265 266 */ 266 - tp = kmem_zone_zalloc(xfs_trans_zone, 267 - (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP); 268 - 267 + tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP); 269 268 if (!(flags & XFS_TRANS_NO_WRITECOUNT)) 270 269 sb_start_intwrite(mp->m_super); 271 270 ··· 449 452 xfs_buf_t *bp; 450 453 int whole = 0; 451 454 452 - bp = xfs_trans_getsb(tp, tp->t_mountp, 0); 455 + bp = xfs_trans_getsb(tp, tp->t_mountp); 453 456 sbp = XFS_BUF_TO_SBP(bp); 454 457 455 458 /* ··· 764 767 } 765 768 766 769 /* Detach and unlock all of the items in a transaction */ 767 - void 770 + static void 768 771 xfs_trans_free_items( 769 772 struct xfs_trans *tp, 770 - xfs_lsn_t commit_lsn, 771 773 bool abort) 772 774 { 773 775 struct xfs_log_item *lip, *next; ··· 775 779 776 780 list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { 777 781 xfs_trans_del_item(lip); 778 - if (commit_lsn != NULLCOMMITLSN) 779 - lip->li_ops->iop_committing(lip, commit_lsn); 780 782 if (abort) 781 783 set_bit(XFS_LI_ABORTED, &lip->li_flags); 782 - lip->li_ops->iop_unlock(lip); 784 + if (lip->li_ops->iop_release) 785 + lip->li_ops->iop_release(lip); 783 786 } 784 787 } 785 788 ··· 799 804 for (i = 0; i < nr_items; i++) { 800 805 struct xfs_log_item *lip = log_items[i]; 801 806 802 - lip->li_ops->iop_unpin(lip, 0); 807 + if (lip->li_ops->iop_unpin) 808 + lip->li_ops->iop_unpin(lip, 0); 803 809 } 804 810 } 805 811 ··· 811 815 * 812 816 * If we are called with the aborted flag set, it is because a log write during 813 817 * a CIL checkpoint commit has failed. In this case, all the items in the 814 - * checkpoint have already gone through iop_commited and iop_unlock, which 818 + * checkpoint have already gone through iop_committed and iop_committing, which 815 819 * means that checkpoint commit abort handling is treated exactly the same 816 820 * as an iclog write error even though we haven't started any IO yet. Hence in 817 821 * this case all we need to do is iop_committed processing, followed by an ··· 829 833 struct xfs_ail *ailp, 830 834 struct xfs_log_vec *log_vector, 831 835 xfs_lsn_t commit_lsn, 832 - int aborted) 836 + bool aborted) 833 837 { 834 838 #define LOG_ITEM_BATCH_SIZE 32 835 839 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; ··· 848 852 849 853 if (aborted) 850 854 set_bit(XFS_LI_ABORTED, &lip->li_flags); 851 - item_lsn = lip->li_ops->iop_committed(lip, commit_lsn); 855 + 856 + if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) { 857 + lip->li_ops->iop_release(lip); 858 + continue; 859 + } 860 + 861 + if (lip->li_ops->iop_committed) 862 + item_lsn = lip->li_ops->iop_committed(lip, commit_lsn); 863 + else 864 + item_lsn = commit_lsn; 852 865 853 866 /* item_lsn of -1 means the item needs no further processing */ 854 867 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) ··· 869 864 */ 870 865 if (aborted) { 871 866 ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount)); 872 - lip->li_ops->iop_unpin(lip, 1); 867 + if (lip->li_ops->iop_unpin) 868 + lip->li_ops->iop_unpin(lip, 1); 873 869 continue; 874 870 } 875 871 ··· 888 882 xfs_trans_ail_update(ailp, lip, item_lsn); 889 883 else 890 884 spin_unlock(&ailp->ail_lock); 891 - lip->li_ops->iop_unpin(lip, 0); 885 + if (lip->li_ops->iop_unpin) 886 + lip->li_ops->iop_unpin(lip, 0); 892 887 continue; 893 888 } 894 889 ··· 1005 998 tp->t_ticket = NULL; 1006 999 } 1007 1000 current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 1008 - xfs_trans_free_items(tp, NULLCOMMITLSN, !!error); 1001 + xfs_trans_free_items(tp, !!error); 1009 1002 xfs_trans_free(tp); 1010 1003 1011 1004 XFS_STATS_INC(mp, xs_trans_empty); ··· 1067 1060 /* mark this thread as no longer being in a transaction */ 1068 1061 current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 1069 1062 1070 - xfs_trans_free_items(tp, NULLCOMMITLSN, dirty); 1063 + xfs_trans_free_items(tp, dirty); 1071 1064 xfs_trans_free(tp); 1072 1065 } 1073 1066
+18 -52
fs/xfs/xfs_trans.h
··· 27 27 struct xfs_bui_log_item; 28 28 struct xfs_bud_log_item; 29 29 30 - typedef struct xfs_log_item { 30 + struct xfs_log_item { 31 31 struct list_head li_ail; /* AIL pointers */ 32 32 struct list_head li_trans; /* transaction list */ 33 33 xfs_lsn_t li_lsn; /* last on-disk lsn */ ··· 48 48 struct xfs_log_vec *li_lv; /* active log vector */ 49 49 struct xfs_log_vec *li_lv_shadow; /* standby vector */ 50 50 xfs_lsn_t li_seq; /* CIL commit seq */ 51 - } xfs_log_item_t; 51 + }; 52 52 53 53 /* 54 54 * li_flags use the (set/test/clear)_bit atomic interfaces because updates can ··· 67 67 { (1 << XFS_LI_DIRTY), "DIRTY" } 68 68 69 69 struct xfs_item_ops { 70 - void (*iop_size)(xfs_log_item_t *, int *, int *); 71 - void (*iop_format)(xfs_log_item_t *, struct xfs_log_vec *); 72 - void (*iop_pin)(xfs_log_item_t *); 73 - void (*iop_unpin)(xfs_log_item_t *, int remove); 70 + unsigned flags; 71 + void (*iop_size)(struct xfs_log_item *, int *, int *); 72 + void (*iop_format)(struct xfs_log_item *, struct xfs_log_vec *); 73 + void (*iop_pin)(struct xfs_log_item *); 74 + void (*iop_unpin)(struct xfs_log_item *, int remove); 74 75 uint (*iop_push)(struct xfs_log_item *, struct list_head *); 75 - void (*iop_unlock)(xfs_log_item_t *); 76 - xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); 77 - void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); 78 - void (*iop_error)(xfs_log_item_t *, xfs_buf_t *); 76 + void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn); 77 + void (*iop_release)(struct xfs_log_item *); 78 + xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t); 79 + void (*iop_error)(struct xfs_log_item *, xfs_buf_t *); 79 80 }; 81 + 82 + /* 83 + * Release the log item as soon as committed. This is for items just logging 84 + * intents that never need to be written back in place. 85 + */ 86 + #define XFS_ITEM_RELEASE_WHEN_COMMITTED (1 << 0) 80 87 81 88 void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item, 82 89 int type, const struct xfs_item_ops *ops); ··· 210 203 flags, bpp, ops); 211 204 } 212 205 213 - struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int); 206 + struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *); 214 207 215 208 void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); 216 209 void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *); ··· 230 223 bool xfs_trans_buf_is_dirty(struct xfs_buf *bp); 231 224 void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); 232 225 233 - struct xfs_efd_log_item *xfs_trans_get_efd(struct xfs_trans *, 234 - struct xfs_efi_log_item *, 235 - uint); 236 - int xfs_trans_free_extent(struct xfs_trans *, 237 - struct xfs_efd_log_item *, xfs_fsblock_t, 238 - xfs_extlen_t, 239 - const struct xfs_owner_info *, 240 - bool); 241 226 int xfs_trans_commit(struct xfs_trans *); 242 227 int xfs_trans_roll(struct xfs_trans **); 243 228 int xfs_trans_roll_inode(struct xfs_trans **, struct xfs_inode *); ··· 243 244 struct xfs_buf *src_bp); 244 245 245 246 extern kmem_zone_t *xfs_trans_zone; 246 - 247 - /* rmap updates */ 248 - enum xfs_rmap_intent_type; 249 - 250 - struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp, 251 - struct xfs_rui_log_item *ruip); 252 - int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp, 253 - struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type, 254 - uint64_t owner, int whichfork, xfs_fileoff_t startoff, 255 - xfs_fsblock_t startblock, xfs_filblks_t blockcount, 256 - xfs_exntst_t state, struct xfs_btree_cur **pcur); 257 - 258 - /* refcount updates */ 259 - enum xfs_refcount_intent_type; 260 - 261 - struct xfs_cud_log_item *xfs_trans_get_cud(struct xfs_trans *tp, 262 - struct xfs_cui_log_item *cuip); 263 - int xfs_trans_log_finish_refcount_update(struct xfs_trans *tp, 264 - struct xfs_cud_log_item *cudp, 265 - enum xfs_refcount_intent_type type, xfs_fsblock_t startblock, 266 - xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb, 267 - xfs_extlen_t *new_len, struct xfs_btree_cur **pcur); 268 - 269 - /* mapping updates */ 270 - enum xfs_bmap_intent_type; 271 - 272 - struct xfs_bud_log_item *xfs_trans_get_bud(struct xfs_trans *tp, 273 - struct xfs_bui_log_item *buip); 274 - int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp, 275 - struct xfs_bud_log_item *rudp, enum xfs_bmap_intent_type type, 276 - struct xfs_inode *ip, int whichfork, xfs_fileoff_t startoff, 277 - xfs_fsblock_t startblock, xfs_filblks_t *blockcount, 278 - xfs_exntst_t state); 279 247 280 248 #endif /* __XFS_TRANS_H__ */
+31 -22
fs/xfs/xfs_trans_ail.c
··· 6 6 */ 7 7 #include "xfs.h" 8 8 #include "xfs_fs.h" 9 + #include "xfs_shared.h" 9 10 #include "xfs_format.h" 10 11 #include "xfs_log_format.h" 11 12 #include "xfs_trans_resv.h" ··· 75 74 * Return a pointer to the last item in the AIL. If the AIL is empty, then 76 75 * return NULL. 77 76 */ 78 - static xfs_log_item_t * 77 + static struct xfs_log_item * 79 78 xfs_ail_max( 80 79 struct xfs_ail *ailp) 81 80 { 82 81 if (list_empty(&ailp->ail_head)) 83 82 return NULL; 84 83 85 - return list_entry(ailp->ail_head.prev, xfs_log_item_t, li_ail); 84 + return list_entry(ailp->ail_head.prev, struct xfs_log_item, li_ail); 86 85 } 87 86 88 87 /* 89 88 * Return a pointer to the item which follows the given item in the AIL. If 90 89 * the given item is the last item in the list, then return NULL. 91 90 */ 92 - static xfs_log_item_t * 91 + static struct xfs_log_item * 93 92 xfs_ail_next( 94 - struct xfs_ail *ailp, 95 - xfs_log_item_t *lip) 93 + struct xfs_ail *ailp, 94 + struct xfs_log_item *lip) 96 95 { 97 96 if (lip->li_ail.next == &ailp->ail_head) 98 97 return NULL; 99 98 100 - return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); 99 + return list_first_entry(&lip->li_ail, struct xfs_log_item, li_ail); 101 100 } 102 101 103 102 /* ··· 110 109 */ 111 110 xfs_lsn_t 112 111 xfs_ail_min_lsn( 113 - struct xfs_ail *ailp) 112 + struct xfs_ail *ailp) 114 113 { 115 - xfs_lsn_t lsn = 0; 116 - xfs_log_item_t *lip; 114 + xfs_lsn_t lsn = 0; 115 + struct xfs_log_item *lip; 117 116 118 117 spin_lock(&ailp->ail_lock); 119 118 lip = xfs_ail_min(ailp); ··· 129 128 */ 130 129 static xfs_lsn_t 131 130 xfs_ail_max_lsn( 132 - struct xfs_ail *ailp) 131 + struct xfs_ail *ailp) 133 132 { 134 - xfs_lsn_t lsn = 0; 135 - xfs_log_item_t *lip; 133 + xfs_lsn_t lsn = 0; 134 + struct xfs_log_item *lip; 136 135 137 136 spin_lock(&ailp->ail_lock); 138 137 lip = xfs_ail_max(ailp); ··· 217 216 * ascending traversal. Pass a @lsn of zero to initialise the cursor to the 218 217 * first item in the AIL. Returns NULL if the list is empty. 219 218 */ 220 - xfs_log_item_t * 219 + struct xfs_log_item * 221 220 xfs_trans_ail_cursor_first( 222 221 struct xfs_ail *ailp, 223 222 struct xfs_ail_cursor *cur, 224 223 xfs_lsn_t lsn) 225 224 { 226 - xfs_log_item_t *lip; 225 + struct xfs_log_item *lip; 227 226 228 227 xfs_trans_ail_cursor_init(ailp, cur); 229 228 ··· 249 248 struct xfs_ail *ailp, 250 249 xfs_lsn_t lsn) 251 250 { 252 - xfs_log_item_t *lip; 251 + struct xfs_log_item *lip; 253 252 254 253 list_for_each_entry_reverse(lip, &ailp->ail_head, li_ail) { 255 254 if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0) ··· 328 327 */ 329 328 static void 330 329 xfs_ail_delete( 331 - struct xfs_ail *ailp, 332 - xfs_log_item_t *lip) 330 + struct xfs_ail *ailp, 331 + struct xfs_log_item *lip) 333 332 { 334 333 xfs_ail_check(ailp, lip); 335 334 list_del(&lip->li_ail); ··· 348 347 if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN)) 349 348 return XFS_ITEM_PINNED; 350 349 350 + /* 351 + * Consider the item pinned if a push callback is not defined so the 352 + * caller will force the log. This should only happen for intent items 353 + * as they are unpinned once the associated done item is committed to 354 + * the on-disk log. 355 + */ 356 + if (!lip->li_ops->iop_push) 357 + return XFS_ITEM_PINNED; 351 358 return lip->li_ops->iop_push(lip, &ailp->ail_buf_list); 352 359 } 353 360 ··· 365 356 { 366 357 xfs_mount_t *mp = ailp->ail_mount; 367 358 struct xfs_ail_cursor cur; 368 - xfs_log_item_t *lip; 359 + struct xfs_log_item *lip; 369 360 xfs_lsn_t lsn; 370 361 xfs_lsn_t target; 371 362 long tout; ··· 620 611 */ 621 612 void 622 613 xfs_ail_push( 623 - struct xfs_ail *ailp, 624 - xfs_lsn_t threshold_lsn) 614 + struct xfs_ail *ailp, 615 + xfs_lsn_t threshold_lsn) 625 616 { 626 - xfs_log_item_t *lip; 617 + struct xfs_log_item *lip; 627 618 628 619 lip = xfs_ail_min(ailp); 629 620 if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) || ··· 708 699 int nr_items, 709 700 xfs_lsn_t lsn) __releases(ailp->ail_lock) 710 701 { 711 - xfs_log_item_t *mlip; 702 + struct xfs_log_item *mlip; 712 703 int mlip_changed = 0; 713 704 int i; 714 705 LIST_HEAD(tmp);
-232
fs/xfs/xfs_trans_bmap.c
··· 1 - // SPDX-License-Identifier: GPL-2.0+ 2 - /* 3 - * Copyright (C) 2016 Oracle. All Rights Reserved. 4 - * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 - */ 6 - #include "xfs.h" 7 - #include "xfs_fs.h" 8 - #include "xfs_shared.h" 9 - #include "xfs_format.h" 10 - #include "xfs_log_format.h" 11 - #include "xfs_trans_resv.h" 12 - #include "xfs_mount.h" 13 - #include "xfs_defer.h" 14 - #include "xfs_trans.h" 15 - #include "xfs_trans_priv.h" 16 - #include "xfs_bmap_item.h" 17 - #include "xfs_alloc.h" 18 - #include "xfs_bmap.h" 19 - #include "xfs_inode.h" 20 - 21 - /* 22 - * This routine is called to allocate a "bmap update done" 23 - * log item. 24 - */ 25 - struct xfs_bud_log_item * 26 - xfs_trans_get_bud( 27 - struct xfs_trans *tp, 28 - struct xfs_bui_log_item *buip) 29 - { 30 - struct xfs_bud_log_item *budp; 31 - 32 - budp = xfs_bud_init(tp->t_mountp, buip); 33 - xfs_trans_add_item(tp, &budp->bud_item); 34 - return budp; 35 - } 36 - 37 - /* 38 - * Finish an bmap update and log it to the BUD. Note that the 39 - * transaction is marked dirty regardless of whether the bmap update 40 - * succeeds or fails to support the BUI/BUD lifecycle rules. 41 - */ 42 - int 43 - xfs_trans_log_finish_bmap_update( 44 - struct xfs_trans *tp, 45 - struct xfs_bud_log_item *budp, 46 - enum xfs_bmap_intent_type type, 47 - struct xfs_inode *ip, 48 - int whichfork, 49 - xfs_fileoff_t startoff, 50 - xfs_fsblock_t startblock, 51 - xfs_filblks_t *blockcount, 52 - xfs_exntst_t state) 53 - { 54 - int error; 55 - 56 - error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff, 57 - startblock, blockcount, state); 58 - 59 - /* 60 - * Mark the transaction dirty, even on error. This ensures the 61 - * transaction is aborted, which: 62 - * 63 - * 1.) releases the BUI and frees the BUD 64 - * 2.) shuts down the filesystem 65 - */ 66 - tp->t_flags |= XFS_TRANS_DIRTY; 67 - set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags); 68 - 69 - return error; 70 - } 71 - 72 - /* Sort bmap intents by inode. */ 73 - static int 74 - xfs_bmap_update_diff_items( 75 - void *priv, 76 - struct list_head *a, 77 - struct list_head *b) 78 - { 79 - struct xfs_bmap_intent *ba; 80 - struct xfs_bmap_intent *bb; 81 - 82 - ba = container_of(a, struct xfs_bmap_intent, bi_list); 83 - bb = container_of(b, struct xfs_bmap_intent, bi_list); 84 - return ba->bi_owner->i_ino - bb->bi_owner->i_ino; 85 - } 86 - 87 - /* Get an BUI. */ 88 - STATIC void * 89 - xfs_bmap_update_create_intent( 90 - struct xfs_trans *tp, 91 - unsigned int count) 92 - { 93 - struct xfs_bui_log_item *buip; 94 - 95 - ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS); 96 - ASSERT(tp != NULL); 97 - 98 - buip = xfs_bui_init(tp->t_mountp); 99 - ASSERT(buip != NULL); 100 - 101 - /* 102 - * Get a log_item_desc to point at the new item. 103 - */ 104 - xfs_trans_add_item(tp, &buip->bui_item); 105 - return buip; 106 - } 107 - 108 - /* Set the map extent flags for this mapping. */ 109 - static void 110 - xfs_trans_set_bmap_flags( 111 - struct xfs_map_extent *bmap, 112 - enum xfs_bmap_intent_type type, 113 - int whichfork, 114 - xfs_exntst_t state) 115 - { 116 - bmap->me_flags = 0; 117 - switch (type) { 118 - case XFS_BMAP_MAP: 119 - case XFS_BMAP_UNMAP: 120 - bmap->me_flags = type; 121 - break; 122 - default: 123 - ASSERT(0); 124 - } 125 - if (state == XFS_EXT_UNWRITTEN) 126 - bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN; 127 - if (whichfork == XFS_ATTR_FORK) 128 - bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK; 129 - } 130 - 131 - /* Log bmap updates in the intent item. */ 132 - STATIC void 133 - xfs_bmap_update_log_item( 134 - struct xfs_trans *tp, 135 - void *intent, 136 - struct list_head *item) 137 - { 138 - struct xfs_bui_log_item *buip = intent; 139 - struct xfs_bmap_intent *bmap; 140 - uint next_extent; 141 - struct xfs_map_extent *map; 142 - 143 - bmap = container_of(item, struct xfs_bmap_intent, bi_list); 144 - 145 - tp->t_flags |= XFS_TRANS_DIRTY; 146 - set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags); 147 - 148 - /* 149 - * atomic_inc_return gives us the value after the increment; 150 - * we want to use it as an array index so we need to subtract 1 from 151 - * it. 152 - */ 153 - next_extent = atomic_inc_return(&buip->bui_next_extent) - 1; 154 - ASSERT(next_extent < buip->bui_format.bui_nextents); 155 - map = &buip->bui_format.bui_extents[next_extent]; 156 - map->me_owner = bmap->bi_owner->i_ino; 157 - map->me_startblock = bmap->bi_bmap.br_startblock; 158 - map->me_startoff = bmap->bi_bmap.br_startoff; 159 - map->me_len = bmap->bi_bmap.br_blockcount; 160 - xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork, 161 - bmap->bi_bmap.br_state); 162 - } 163 - 164 - /* Get an BUD so we can process all the deferred rmap updates. */ 165 - STATIC void * 166 - xfs_bmap_update_create_done( 167 - struct xfs_trans *tp, 168 - void *intent, 169 - unsigned int count) 170 - { 171 - return xfs_trans_get_bud(tp, intent); 172 - } 173 - 174 - /* Process a deferred rmap update. */ 175 - STATIC int 176 - xfs_bmap_update_finish_item( 177 - struct xfs_trans *tp, 178 - struct list_head *item, 179 - void *done_item, 180 - void **state) 181 - { 182 - struct xfs_bmap_intent *bmap; 183 - xfs_filblks_t count; 184 - int error; 185 - 186 - bmap = container_of(item, struct xfs_bmap_intent, bi_list); 187 - count = bmap->bi_bmap.br_blockcount; 188 - error = xfs_trans_log_finish_bmap_update(tp, done_item, 189 - bmap->bi_type, 190 - bmap->bi_owner, bmap->bi_whichfork, 191 - bmap->bi_bmap.br_startoff, 192 - bmap->bi_bmap.br_startblock, 193 - &count, 194 - bmap->bi_bmap.br_state); 195 - if (!error && count > 0) { 196 - ASSERT(bmap->bi_type == XFS_BMAP_UNMAP); 197 - bmap->bi_bmap.br_blockcount = count; 198 - return -EAGAIN; 199 - } 200 - kmem_free(bmap); 201 - return error; 202 - } 203 - 204 - /* Abort all pending BUIs. */ 205 - STATIC void 206 - xfs_bmap_update_abort_intent( 207 - void *intent) 208 - { 209 - xfs_bui_release(intent); 210 - } 211 - 212 - /* Cancel a deferred rmap update. */ 213 - STATIC void 214 - xfs_bmap_update_cancel_item( 215 - struct list_head *item) 216 - { 217 - struct xfs_bmap_intent *bmap; 218 - 219 - bmap = container_of(item, struct xfs_bmap_intent, bi_list); 220 - kmem_free(bmap); 221 - } 222 - 223 - const struct xfs_defer_op_type xfs_bmap_update_defer_type = { 224 - .max_items = XFS_BUI_MAX_FAST_EXTENTS, 225 - .diff_items = xfs_bmap_update_diff_items, 226 - .create_intent = xfs_bmap_update_create_intent, 227 - .abort_intent = xfs_bmap_update_abort_intent, 228 - .log_item = xfs_bmap_update_log_item, 229 - .create_done = xfs_bmap_update_create_done, 230 - .finish_item = xfs_bmap_update_finish_item, 231 - .cancel_item = xfs_bmap_update_cancel_item, 232 - };
+4 -7
fs/xfs/xfs_trans_buf.c
··· 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 - #include "xfs_inode.h" 14 13 #include "xfs_trans.h" 15 14 #include "xfs_buf_item.h" 16 15 #include "xfs_trans_priv.h" 17 - #include "xfs_error.h" 18 16 #include "xfs_trace.h" 19 17 20 18 /* ··· 172 174 xfs_buf_t * 173 175 xfs_trans_getsb( 174 176 xfs_trans_t *tp, 175 - struct xfs_mount *mp, 176 - int flags) 177 + struct xfs_mount *mp) 177 178 { 178 179 xfs_buf_t *bp; 179 180 struct xfs_buf_log_item *bip; ··· 182 185 * if tp is NULL. 183 186 */ 184 187 if (tp == NULL) 185 - return xfs_getsb(mp, flags); 188 + return xfs_getsb(mp); 186 189 187 190 /* 188 191 * If the superblock buffer already has this transaction ··· 200 203 return bp; 201 204 } 202 205 203 - bp = xfs_getsb(mp, flags); 206 + bp = xfs_getsb(mp); 204 207 if (bp == NULL) 205 208 return NULL; 206 209 ··· 425 428 426 429 /* 427 430 * Mark the buffer as not needing to be unlocked when the buf item's 428 - * iop_unlock() routine is called. The buffer must already be locked 431 + * iop_committing() routine is called. The buffer must already be locked 429 432 * and associated with the given transaction. 430 433 */ 431 434 /* ARGSUSED */
-11
fs/xfs/xfs_trans_dquot.c
··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 13 #include "xfs_inode.h" 14 - #include "xfs_error.h" 15 14 #include "xfs_trans.h" 16 15 #include "xfs_trans_priv.h" 17 16 #include "xfs_quota.h" ··· 28 29 xfs_trans_t *tp, 29 30 xfs_dquot_t *dqp) 30 31 { 31 - ASSERT(dqp->q_transp != tp); 32 32 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 33 33 ASSERT(dqp->q_logitem.qli_dquot == dqp); 34 34 ··· 35 37 * Get a log_item_desc to point at the new item. 36 38 */ 37 39 xfs_trans_add_item(tp, &dqp->q_logitem.qli_item); 38 - 39 - /* 40 - * Initialize d_transp so we can later determine if this dquot is 41 - * associated with this transaction. 42 - */ 43 - dqp->q_transp = tp; 44 40 } 45 - 46 41 47 42 /* 48 43 * This is called to mark the dquot as needing ··· 52 61 xfs_trans_t *tp, 53 62 xfs_dquot_t *dqp) 54 63 { 55 - ASSERT(dqp->q_transp == tp); 56 64 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 57 65 58 66 tp->t_flags |= XFS_TRANS_DIRTY; ··· 337 347 break; 338 348 339 349 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 340 - ASSERT(dqp->q_transp == tp); 341 350 342 351 /* 343 352 * adjust the actual number of blocks used
-286
fs/xfs/xfs_trans_extfree.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * Copyright (c) 2000,2005 Silicon Graphics, Inc. 4 - * All Rights Reserved. 5 - */ 6 - #include "xfs.h" 7 - #include "xfs_fs.h" 8 - #include "xfs_shared.h" 9 - #include "xfs_format.h" 10 - #include "xfs_log_format.h" 11 - #include "xfs_trans_resv.h" 12 - #include "xfs_bit.h" 13 - #include "xfs_mount.h" 14 - #include "xfs_defer.h" 15 - #include "xfs_trans.h" 16 - #include "xfs_trans_priv.h" 17 - #include "xfs_extfree_item.h" 18 - #include "xfs_alloc.h" 19 - #include "xfs_bmap.h" 20 - #include "xfs_trace.h" 21 - 22 - /* 23 - * This routine is called to allocate an "extent free done" 24 - * log item that will hold nextents worth of extents. The 25 - * caller must use all nextents extents, because we are not 26 - * flexible about this at all. 27 - */ 28 - struct xfs_efd_log_item * 29 - xfs_trans_get_efd(struct xfs_trans *tp, 30 - struct xfs_efi_log_item *efip, 31 - uint nextents) 32 - { 33 - struct xfs_efd_log_item *efdp; 34 - 35 - ASSERT(tp != NULL); 36 - ASSERT(nextents > 0); 37 - 38 - efdp = xfs_efd_init(tp->t_mountp, efip, nextents); 39 - ASSERT(efdp != NULL); 40 - 41 - /* 42 - * Get a log_item_desc to point at the new item. 43 - */ 44 - xfs_trans_add_item(tp, &efdp->efd_item); 45 - return efdp; 46 - } 47 - 48 - /* 49 - * Free an extent and log it to the EFD. Note that the transaction is marked 50 - * dirty regardless of whether the extent free succeeds or fails to support the 51 - * EFI/EFD lifecycle rules. 52 - */ 53 - int 54 - xfs_trans_free_extent( 55 - struct xfs_trans *tp, 56 - struct xfs_efd_log_item *efdp, 57 - xfs_fsblock_t start_block, 58 - xfs_extlen_t ext_len, 59 - const struct xfs_owner_info *oinfo, 60 - bool skip_discard) 61 - { 62 - struct xfs_mount *mp = tp->t_mountp; 63 - struct xfs_extent *extp; 64 - uint next_extent; 65 - xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block); 66 - xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, 67 - start_block); 68 - int error; 69 - 70 - trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); 71 - 72 - error = __xfs_free_extent(tp, start_block, ext_len, 73 - oinfo, XFS_AG_RESV_NONE, skip_discard); 74 - /* 75 - * Mark the transaction dirty, even on error. This ensures the 76 - * transaction is aborted, which: 77 - * 78 - * 1.) releases the EFI and frees the EFD 79 - * 2.) shuts down the filesystem 80 - */ 81 - tp->t_flags |= XFS_TRANS_DIRTY; 82 - set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); 83 - 84 - next_extent = efdp->efd_next_extent; 85 - ASSERT(next_extent < efdp->efd_format.efd_nextents); 86 - extp = &(efdp->efd_format.efd_extents[next_extent]); 87 - extp->ext_start = start_block; 88 - extp->ext_len = ext_len; 89 - efdp->efd_next_extent++; 90 - 91 - return error; 92 - } 93 - 94 - /* Sort bmap items by AG. */ 95 - static int 96 - xfs_extent_free_diff_items( 97 - void *priv, 98 - struct list_head *a, 99 - struct list_head *b) 100 - { 101 - struct xfs_mount *mp = priv; 102 - struct xfs_extent_free_item *ra; 103 - struct xfs_extent_free_item *rb; 104 - 105 - ra = container_of(a, struct xfs_extent_free_item, xefi_list); 106 - rb = container_of(b, struct xfs_extent_free_item, xefi_list); 107 - return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) - 108 - XFS_FSB_TO_AGNO(mp, rb->xefi_startblock); 109 - } 110 - 111 - /* Get an EFI. */ 112 - STATIC void * 113 - xfs_extent_free_create_intent( 114 - struct xfs_trans *tp, 115 - unsigned int count) 116 - { 117 - struct xfs_efi_log_item *efip; 118 - 119 - ASSERT(tp != NULL); 120 - ASSERT(count > 0); 121 - 122 - efip = xfs_efi_init(tp->t_mountp, count); 123 - ASSERT(efip != NULL); 124 - 125 - /* 126 - * Get a log_item_desc to point at the new item. 127 - */ 128 - xfs_trans_add_item(tp, &efip->efi_item); 129 - return efip; 130 - } 131 - 132 - /* Log a free extent to the intent item. */ 133 - STATIC void 134 - xfs_extent_free_log_item( 135 - struct xfs_trans *tp, 136 - void *intent, 137 - struct list_head *item) 138 - { 139 - struct xfs_efi_log_item *efip = intent; 140 - struct xfs_extent_free_item *free; 141 - uint next_extent; 142 - struct xfs_extent *extp; 143 - 144 - free = container_of(item, struct xfs_extent_free_item, xefi_list); 145 - 146 - tp->t_flags |= XFS_TRANS_DIRTY; 147 - set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags); 148 - 149 - /* 150 - * atomic_inc_return gives us the value after the increment; 151 - * we want to use it as an array index so we need to subtract 1 from 152 - * it. 153 - */ 154 - next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; 155 - ASSERT(next_extent < efip->efi_format.efi_nextents); 156 - extp = &efip->efi_format.efi_extents[next_extent]; 157 - extp->ext_start = free->xefi_startblock; 158 - extp->ext_len = free->xefi_blockcount; 159 - } 160 - 161 - /* Get an EFD so we can process all the free extents. */ 162 - STATIC void * 163 - xfs_extent_free_create_done( 164 - struct xfs_trans *tp, 165 - void *intent, 166 - unsigned int count) 167 - { 168 - return xfs_trans_get_efd(tp, intent, count); 169 - } 170 - 171 - /* Process a free extent. */ 172 - STATIC int 173 - xfs_extent_free_finish_item( 174 - struct xfs_trans *tp, 175 - struct list_head *item, 176 - void *done_item, 177 - void **state) 178 - { 179 - struct xfs_extent_free_item *free; 180 - int error; 181 - 182 - free = container_of(item, struct xfs_extent_free_item, xefi_list); 183 - error = xfs_trans_free_extent(tp, done_item, 184 - free->xefi_startblock, 185 - free->xefi_blockcount, 186 - &free->xefi_oinfo, free->xefi_skip_discard); 187 - kmem_free(free); 188 - return error; 189 - } 190 - 191 - /* Abort all pending EFIs. */ 192 - STATIC void 193 - xfs_extent_free_abort_intent( 194 - void *intent) 195 - { 196 - xfs_efi_release(intent); 197 - } 198 - 199 - /* Cancel a free extent. */ 200 - STATIC void 201 - xfs_extent_free_cancel_item( 202 - struct list_head *item) 203 - { 204 - struct xfs_extent_free_item *free; 205 - 206 - free = container_of(item, struct xfs_extent_free_item, xefi_list); 207 - kmem_free(free); 208 - } 209 - 210 - const struct xfs_defer_op_type xfs_extent_free_defer_type = { 211 - .max_items = XFS_EFI_MAX_FAST_EXTENTS, 212 - .diff_items = xfs_extent_free_diff_items, 213 - .create_intent = xfs_extent_free_create_intent, 214 - .abort_intent = xfs_extent_free_abort_intent, 215 - .log_item = xfs_extent_free_log_item, 216 - .create_done = xfs_extent_free_create_done, 217 - .finish_item = xfs_extent_free_finish_item, 218 - .cancel_item = xfs_extent_free_cancel_item, 219 - }; 220 - 221 - /* 222 - * AGFL blocks are accounted differently in the reserve pools and are not 223 - * inserted into the busy extent list. 224 - */ 225 - STATIC int 226 - xfs_agfl_free_finish_item( 227 - struct xfs_trans *tp, 228 - struct list_head *item, 229 - void *done_item, 230 - void **state) 231 - { 232 - struct xfs_mount *mp = tp->t_mountp; 233 - struct xfs_efd_log_item *efdp = done_item; 234 - struct xfs_extent_free_item *free; 235 - struct xfs_extent *extp; 236 - struct xfs_buf *agbp; 237 - int error; 238 - xfs_agnumber_t agno; 239 - xfs_agblock_t agbno; 240 - uint next_extent; 241 - 242 - free = container_of(item, struct xfs_extent_free_item, xefi_list); 243 - ASSERT(free->xefi_blockcount == 1); 244 - agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); 245 - agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); 246 - 247 - trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount); 248 - 249 - error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); 250 - if (!error) 251 - error = xfs_free_agfl_block(tp, agno, agbno, agbp, 252 - &free->xefi_oinfo); 253 - 254 - /* 255 - * Mark the transaction dirty, even on error. This ensures the 256 - * transaction is aborted, which: 257 - * 258 - * 1.) releases the EFI and frees the EFD 259 - * 2.) shuts down the filesystem 260 - */ 261 - tp->t_flags |= XFS_TRANS_DIRTY; 262 - set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); 263 - 264 - next_extent = efdp->efd_next_extent; 265 - ASSERT(next_extent < efdp->efd_format.efd_nextents); 266 - extp = &(efdp->efd_format.efd_extents[next_extent]); 267 - extp->ext_start = free->xefi_startblock; 268 - extp->ext_len = free->xefi_blockcount; 269 - efdp->efd_next_extent++; 270 - 271 - kmem_free(free); 272 - return error; 273 - } 274 - 275 - 276 - /* sub-type with special handling for AGFL deferred frees */ 277 - const struct xfs_defer_op_type xfs_agfl_free_defer_type = { 278 - .max_items = XFS_EFI_MAX_FAST_EXTENTS, 279 - .diff_items = xfs_extent_free_diff_items, 280 - .create_intent = xfs_extent_free_create_intent, 281 - .abort_intent = xfs_extent_free_abort_intent, 282 - .log_item = xfs_extent_free_log_item, 283 - .create_done = xfs_extent_free_create_done, 284 - .finish_item = xfs_agfl_free_finish_item, 285 - .cancel_item = xfs_extent_free_cancel_item, 286 - };
-3
fs/xfs/xfs_trans_inode.c
··· 8 8 #include "xfs_shared.h" 9 9 #include "xfs_format.h" 10 10 #include "xfs_log_format.h" 11 - #include "xfs_trans_resv.h" 12 - #include "xfs_mount.h" 13 11 #include "xfs_inode.h" 14 12 #include "xfs_trans.h" 15 13 #include "xfs_trans_priv.h" 16 14 #include "xfs_inode_item.h" 17 - #include "xfs_trace.h" 18 15 19 16 #include <linux/iversion.h> 20 17
+1 -3
fs/xfs/xfs_trans_priv.h
··· 16 16 void xfs_trans_init(struct xfs_mount *); 17 17 void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); 18 18 void xfs_trans_del_item(struct xfs_log_item *); 19 - void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, 20 - bool abort); 21 19 void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); 22 20 23 21 void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, 24 - xfs_lsn_t commit_lsn, int aborted); 22 + xfs_lsn_t commit_lsn, bool aborted); 25 23 /* 26 24 * AIL traversal cursor. 27 25 *
-240
fs/xfs/xfs_trans_refcount.c
··· 1 - // SPDX-License-Identifier: GPL-2.0+ 2 - /* 3 - * Copyright (C) 2016 Oracle. All Rights Reserved. 4 - * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 - */ 6 - #include "xfs.h" 7 - #include "xfs_fs.h" 8 - #include "xfs_shared.h" 9 - #include "xfs_format.h" 10 - #include "xfs_log_format.h" 11 - #include "xfs_trans_resv.h" 12 - #include "xfs_mount.h" 13 - #include "xfs_defer.h" 14 - #include "xfs_trans.h" 15 - #include "xfs_trans_priv.h" 16 - #include "xfs_refcount_item.h" 17 - #include "xfs_alloc.h" 18 - #include "xfs_refcount.h" 19 - 20 - /* 21 - * This routine is called to allocate a "refcount update done" 22 - * log item. 23 - */ 24 - struct xfs_cud_log_item * 25 - xfs_trans_get_cud( 26 - struct xfs_trans *tp, 27 - struct xfs_cui_log_item *cuip) 28 - { 29 - struct xfs_cud_log_item *cudp; 30 - 31 - cudp = xfs_cud_init(tp->t_mountp, cuip); 32 - xfs_trans_add_item(tp, &cudp->cud_item); 33 - return cudp; 34 - } 35 - 36 - /* 37 - * Finish an refcount update and log it to the CUD. Note that the 38 - * transaction is marked dirty regardless of whether the refcount 39 - * update succeeds or fails to support the CUI/CUD lifecycle rules. 40 - */ 41 - int 42 - xfs_trans_log_finish_refcount_update( 43 - struct xfs_trans *tp, 44 - struct xfs_cud_log_item *cudp, 45 - enum xfs_refcount_intent_type type, 46 - xfs_fsblock_t startblock, 47 - xfs_extlen_t blockcount, 48 - xfs_fsblock_t *new_fsb, 49 - xfs_extlen_t *new_len, 50 - struct xfs_btree_cur **pcur) 51 - { 52 - int error; 53 - 54 - error = xfs_refcount_finish_one(tp, type, startblock, 55 - blockcount, new_fsb, new_len, pcur); 56 - 57 - /* 58 - * Mark the transaction dirty, even on error. This ensures the 59 - * transaction is aborted, which: 60 - * 61 - * 1.) releases the CUI and frees the CUD 62 - * 2.) shuts down the filesystem 63 - */ 64 - tp->t_flags |= XFS_TRANS_DIRTY; 65 - set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags); 66 - 67 - return error; 68 - } 69 - 70 - /* Sort refcount intents by AG. */ 71 - static int 72 - xfs_refcount_update_diff_items( 73 - void *priv, 74 - struct list_head *a, 75 - struct list_head *b) 76 - { 77 - struct xfs_mount *mp = priv; 78 - struct xfs_refcount_intent *ra; 79 - struct xfs_refcount_intent *rb; 80 - 81 - ra = container_of(a, struct xfs_refcount_intent, ri_list); 82 - rb = container_of(b, struct xfs_refcount_intent, ri_list); 83 - return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) - 84 - XFS_FSB_TO_AGNO(mp, rb->ri_startblock); 85 - } 86 - 87 - /* Get an CUI. */ 88 - STATIC void * 89 - xfs_refcount_update_create_intent( 90 - struct xfs_trans *tp, 91 - unsigned int count) 92 - { 93 - struct xfs_cui_log_item *cuip; 94 - 95 - ASSERT(tp != NULL); 96 - ASSERT(count > 0); 97 - 98 - cuip = xfs_cui_init(tp->t_mountp, count); 99 - ASSERT(cuip != NULL); 100 - 101 - /* 102 - * Get a log_item_desc to point at the new item. 103 - */ 104 - xfs_trans_add_item(tp, &cuip->cui_item); 105 - return cuip; 106 - } 107 - 108 - /* Set the phys extent flags for this reverse mapping. */ 109 - static void 110 - xfs_trans_set_refcount_flags( 111 - struct xfs_phys_extent *refc, 112 - enum xfs_refcount_intent_type type) 113 - { 114 - refc->pe_flags = 0; 115 - switch (type) { 116 - case XFS_REFCOUNT_INCREASE: 117 - case XFS_REFCOUNT_DECREASE: 118 - case XFS_REFCOUNT_ALLOC_COW: 119 - case XFS_REFCOUNT_FREE_COW: 120 - refc->pe_flags |= type; 121 - break; 122 - default: 123 - ASSERT(0); 124 - } 125 - } 126 - 127 - /* Log refcount updates in the intent item. */ 128 - STATIC void 129 - xfs_refcount_update_log_item( 130 - struct xfs_trans *tp, 131 - void *intent, 132 - struct list_head *item) 133 - { 134 - struct xfs_cui_log_item *cuip = intent; 135 - struct xfs_refcount_intent *refc; 136 - uint next_extent; 137 - struct xfs_phys_extent *ext; 138 - 139 - refc = container_of(item, struct xfs_refcount_intent, ri_list); 140 - 141 - tp->t_flags |= XFS_TRANS_DIRTY; 142 - set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags); 143 - 144 - /* 145 - * atomic_inc_return gives us the value after the increment; 146 - * we want to use it as an array index so we need to subtract 1 from 147 - * it. 148 - */ 149 - next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1; 150 - ASSERT(next_extent < cuip->cui_format.cui_nextents); 151 - ext = &cuip->cui_format.cui_extents[next_extent]; 152 - ext->pe_startblock = refc->ri_startblock; 153 - ext->pe_len = refc->ri_blockcount; 154 - xfs_trans_set_refcount_flags(ext, refc->ri_type); 155 - } 156 - 157 - /* Get an CUD so we can process all the deferred refcount updates. */ 158 - STATIC void * 159 - xfs_refcount_update_create_done( 160 - struct xfs_trans *tp, 161 - void *intent, 162 - unsigned int count) 163 - { 164 - return xfs_trans_get_cud(tp, intent); 165 - } 166 - 167 - /* Process a deferred refcount update. */ 168 - STATIC int 169 - xfs_refcount_update_finish_item( 170 - struct xfs_trans *tp, 171 - struct list_head *item, 172 - void *done_item, 173 - void **state) 174 - { 175 - struct xfs_refcount_intent *refc; 176 - xfs_fsblock_t new_fsb; 177 - xfs_extlen_t new_aglen; 178 - int error; 179 - 180 - refc = container_of(item, struct xfs_refcount_intent, ri_list); 181 - error = xfs_trans_log_finish_refcount_update(tp, done_item, 182 - refc->ri_type, 183 - refc->ri_startblock, 184 - refc->ri_blockcount, 185 - &new_fsb, &new_aglen, 186 - (struct xfs_btree_cur **)state); 187 - /* Did we run out of reservation? Requeue what we didn't finish. */ 188 - if (!error && new_aglen > 0) { 189 - ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE || 190 - refc->ri_type == XFS_REFCOUNT_DECREASE); 191 - refc->ri_startblock = new_fsb; 192 - refc->ri_blockcount = new_aglen; 193 - return -EAGAIN; 194 - } 195 - kmem_free(refc); 196 - return error; 197 - } 198 - 199 - /* Clean up after processing deferred refcounts. */ 200 - STATIC void 201 - xfs_refcount_update_finish_cleanup( 202 - struct xfs_trans *tp, 203 - void *state, 204 - int error) 205 - { 206 - struct xfs_btree_cur *rcur = state; 207 - 208 - xfs_refcount_finish_one_cleanup(tp, rcur, error); 209 - } 210 - 211 - /* Abort all pending CUIs. */ 212 - STATIC void 213 - xfs_refcount_update_abort_intent( 214 - void *intent) 215 - { 216 - xfs_cui_release(intent); 217 - } 218 - 219 - /* Cancel a deferred refcount update. */ 220 - STATIC void 221 - xfs_refcount_update_cancel_item( 222 - struct list_head *item) 223 - { 224 - struct xfs_refcount_intent *refc; 225 - 226 - refc = container_of(item, struct xfs_refcount_intent, ri_list); 227 - kmem_free(refc); 228 - } 229 - 230 - const struct xfs_defer_op_type xfs_refcount_update_defer_type = { 231 - .max_items = XFS_CUI_MAX_FAST_EXTENTS, 232 - .diff_items = xfs_refcount_update_diff_items, 233 - .create_intent = xfs_refcount_update_create_intent, 234 - .abort_intent = xfs_refcount_update_abort_intent, 235 - .log_item = xfs_refcount_update_log_item, 236 - .create_done = xfs_refcount_update_create_done, 237 - .finish_item = xfs_refcount_update_finish_item, 238 - .finish_cleanup = xfs_refcount_update_finish_cleanup, 239 - .cancel_item = xfs_refcount_update_cancel_item, 240 - };
-257
fs/xfs/xfs_trans_rmap.c
··· 1 - // SPDX-License-Identifier: GPL-2.0+ 2 - /* 3 - * Copyright (C) 2016 Oracle. All Rights Reserved. 4 - * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 - */ 6 - #include "xfs.h" 7 - #include "xfs_fs.h" 8 - #include "xfs_shared.h" 9 - #include "xfs_format.h" 10 - #include "xfs_log_format.h" 11 - #include "xfs_trans_resv.h" 12 - #include "xfs_mount.h" 13 - #include "xfs_defer.h" 14 - #include "xfs_trans.h" 15 - #include "xfs_trans_priv.h" 16 - #include "xfs_rmap_item.h" 17 - #include "xfs_alloc.h" 18 - #include "xfs_rmap.h" 19 - 20 - /* Set the map extent flags for this reverse mapping. */ 21 - static void 22 - xfs_trans_set_rmap_flags( 23 - struct xfs_map_extent *rmap, 24 - enum xfs_rmap_intent_type type, 25 - int whichfork, 26 - xfs_exntst_t state) 27 - { 28 - rmap->me_flags = 0; 29 - if (state == XFS_EXT_UNWRITTEN) 30 - rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN; 31 - if (whichfork == XFS_ATTR_FORK) 32 - rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK; 33 - switch (type) { 34 - case XFS_RMAP_MAP: 35 - rmap->me_flags |= XFS_RMAP_EXTENT_MAP; 36 - break; 37 - case XFS_RMAP_MAP_SHARED: 38 - rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED; 39 - break; 40 - case XFS_RMAP_UNMAP: 41 - rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP; 42 - break; 43 - case XFS_RMAP_UNMAP_SHARED: 44 - rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED; 45 - break; 46 - case XFS_RMAP_CONVERT: 47 - rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT; 48 - break; 49 - case XFS_RMAP_CONVERT_SHARED: 50 - rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED; 51 - break; 52 - case XFS_RMAP_ALLOC: 53 - rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC; 54 - break; 55 - case XFS_RMAP_FREE: 56 - rmap->me_flags |= XFS_RMAP_EXTENT_FREE; 57 - break; 58 - default: 59 - ASSERT(0); 60 - } 61 - } 62 - 63 - struct xfs_rud_log_item * 64 - xfs_trans_get_rud( 65 - struct xfs_trans *tp, 66 - struct xfs_rui_log_item *ruip) 67 - { 68 - struct xfs_rud_log_item *rudp; 69 - 70 - rudp = xfs_rud_init(tp->t_mountp, ruip); 71 - xfs_trans_add_item(tp, &rudp->rud_item); 72 - return rudp; 73 - } 74 - 75 - /* 76 - * Finish an rmap update and log it to the RUD. Note that the transaction is 77 - * marked dirty regardless of whether the rmap update succeeds or fails to 78 - * support the RUI/RUD lifecycle rules. 79 - */ 80 - int 81 - xfs_trans_log_finish_rmap_update( 82 - struct xfs_trans *tp, 83 - struct xfs_rud_log_item *rudp, 84 - enum xfs_rmap_intent_type type, 85 - uint64_t owner, 86 - int whichfork, 87 - xfs_fileoff_t startoff, 88 - xfs_fsblock_t startblock, 89 - xfs_filblks_t blockcount, 90 - xfs_exntst_t state, 91 - struct xfs_btree_cur **pcur) 92 - { 93 - int error; 94 - 95 - error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff, 96 - startblock, blockcount, state, pcur); 97 - 98 - /* 99 - * Mark the transaction dirty, even on error. This ensures the 100 - * transaction is aborted, which: 101 - * 102 - * 1.) releases the RUI and frees the RUD 103 - * 2.) shuts down the filesystem 104 - */ 105 - tp->t_flags |= XFS_TRANS_DIRTY; 106 - set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags); 107 - 108 - return error; 109 - } 110 - 111 - /* Sort rmap intents by AG. */ 112 - static int 113 - xfs_rmap_update_diff_items( 114 - void *priv, 115 - struct list_head *a, 116 - struct list_head *b) 117 - { 118 - struct xfs_mount *mp = priv; 119 - struct xfs_rmap_intent *ra; 120 - struct xfs_rmap_intent *rb; 121 - 122 - ra = container_of(a, struct xfs_rmap_intent, ri_list); 123 - rb = container_of(b, struct xfs_rmap_intent, ri_list); 124 - return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) - 125 - XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock); 126 - } 127 - 128 - /* Get an RUI. */ 129 - STATIC void * 130 - xfs_rmap_update_create_intent( 131 - struct xfs_trans *tp, 132 - unsigned int count) 133 - { 134 - struct xfs_rui_log_item *ruip; 135 - 136 - ASSERT(tp != NULL); 137 - ASSERT(count > 0); 138 - 139 - ruip = xfs_rui_init(tp->t_mountp, count); 140 - ASSERT(ruip != NULL); 141 - 142 - /* 143 - * Get a log_item_desc to point at the new item. 144 - */ 145 - xfs_trans_add_item(tp, &ruip->rui_item); 146 - return ruip; 147 - } 148 - 149 - /* Log rmap updates in the intent item. */ 150 - STATIC void 151 - xfs_rmap_update_log_item( 152 - struct xfs_trans *tp, 153 - void *intent, 154 - struct list_head *item) 155 - { 156 - struct xfs_rui_log_item *ruip = intent; 157 - struct xfs_rmap_intent *rmap; 158 - uint next_extent; 159 - struct xfs_map_extent *map; 160 - 161 - rmap = container_of(item, struct xfs_rmap_intent, ri_list); 162 - 163 - tp->t_flags |= XFS_TRANS_DIRTY; 164 - set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags); 165 - 166 - /* 167 - * atomic_inc_return gives us the value after the increment; 168 - * we want to use it as an array index so we need to subtract 1 from 169 - * it. 170 - */ 171 - next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1; 172 - ASSERT(next_extent < ruip->rui_format.rui_nextents); 173 - map = &ruip->rui_format.rui_extents[next_extent]; 174 - map->me_owner = rmap->ri_owner; 175 - map->me_startblock = rmap->ri_bmap.br_startblock; 176 - map->me_startoff = rmap->ri_bmap.br_startoff; 177 - map->me_len = rmap->ri_bmap.br_blockcount; 178 - xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork, 179 - rmap->ri_bmap.br_state); 180 - } 181 - 182 - /* Get an RUD so we can process all the deferred rmap updates. */ 183 - STATIC void * 184 - xfs_rmap_update_create_done( 185 - struct xfs_trans *tp, 186 - void *intent, 187 - unsigned int count) 188 - { 189 - return xfs_trans_get_rud(tp, intent); 190 - } 191 - 192 - /* Process a deferred rmap update. */ 193 - STATIC int 194 - xfs_rmap_update_finish_item( 195 - struct xfs_trans *tp, 196 - struct list_head *item, 197 - void *done_item, 198 - void **state) 199 - { 200 - struct xfs_rmap_intent *rmap; 201 - int error; 202 - 203 - rmap = container_of(item, struct xfs_rmap_intent, ri_list); 204 - error = xfs_trans_log_finish_rmap_update(tp, done_item, 205 - rmap->ri_type, 206 - rmap->ri_owner, rmap->ri_whichfork, 207 - rmap->ri_bmap.br_startoff, 208 - rmap->ri_bmap.br_startblock, 209 - rmap->ri_bmap.br_blockcount, 210 - rmap->ri_bmap.br_state, 211 - (struct xfs_btree_cur **)state); 212 - kmem_free(rmap); 213 - return error; 214 - } 215 - 216 - /* Clean up after processing deferred rmaps. */ 217 - STATIC void 218 - xfs_rmap_update_finish_cleanup( 219 - struct xfs_trans *tp, 220 - void *state, 221 - int error) 222 - { 223 - struct xfs_btree_cur *rcur = state; 224 - 225 - xfs_rmap_finish_one_cleanup(tp, rcur, error); 226 - } 227 - 228 - /* Abort all pending RUIs. */ 229 - STATIC void 230 - xfs_rmap_update_abort_intent( 231 - void *intent) 232 - { 233 - xfs_rui_release(intent); 234 - } 235 - 236 - /* Cancel a deferred rmap update. */ 237 - STATIC void 238 - xfs_rmap_update_cancel_item( 239 - struct list_head *item) 240 - { 241 - struct xfs_rmap_intent *rmap; 242 - 243 - rmap = container_of(item, struct xfs_rmap_intent, ri_list); 244 - kmem_free(rmap); 245 - } 246 - 247 - const struct xfs_defer_op_type xfs_rmap_update_defer_type = { 248 - .max_items = XFS_RUI_MAX_FAST_EXTENTS, 249 - .diff_items = xfs_rmap_update_diff_items, 250 - .create_intent = xfs_rmap_update_create_intent, 251 - .abort_intent = xfs_rmap_update_abort_intent, 252 - .log_item = xfs_rmap_update_log_item, 253 - .create_done = xfs_rmap_update_create_done, 254 - .finish_item = xfs_rmap_update_finish_item, 255 - .finish_cleanup = xfs_rmap_update_finish_cleanup, 256 - .cancel_item = xfs_rmap_update_cancel_item, 257 - };
+1 -4
fs/xfs/xfs_xattr.c
··· 5 5 */ 6 6 7 7 #include "xfs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 - #include "xfs_trans_resv.h" 11 - #include "xfs_mount.h" 12 11 #include "xfs_da_format.h" 13 12 #include "xfs_inode.h" 14 13 #include "xfs_attr.h" 15 - #include "xfs_attr_leaf.h" 16 - #include "xfs_acl.h" 17 14 18 15 #include <linux/posix_acl_xattr.h> 19 16 #include <linux/xattr.h>