Merge tag 'xfs-5.3-merge-12' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

+4 -4

Documentation/filesystems/xfs-self-describing-metadata.txt

··· 222 222 xfs_foo_read_verify( 223 223 struct xfs_buf *bp) 224 224 { 225 - struct xfs_mount *mp = bp->b_target->bt_mount; 225 + struct xfs_mount *mp = bp->b_mount; 226 226 227 227 if ((xfs_sb_version_hascrc(&mp->m_sb) && 228 228 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), ··· 245 245 xfs_foo_verify( 246 246 struct xfs_buf *bp) 247 247 { 248 - struct xfs_mount *mp = bp->b_target->bt_mount; 248 + struct xfs_mount *mp = bp->b_mount; 249 249 struct xfs_ondisk_hdr *hdr = bp->b_addr; 250 250 251 251 if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC)) ··· 272 272 xfs_foo_verify( 273 273 struct xfs_buf *bp) 274 274 { 275 - struct xfs_mount *mp = bp->b_target->bt_mount; 275 + struct xfs_mount *mp = bp->b_mount; 276 276 struct xfs_ondisk_hdr *hdr = bp->b_addr; 277 277 278 278 if (hdr->magic == cpu_to_be32(XFS_FOO_CRC_MAGIC)) { ··· 297 297 xfs_foo_write_verify( 298 298 struct xfs_buf *bp) 299 299 { 300 - struct xfs_mount *mp = bp->b_target->bt_mount; 300 + struct xfs_mount *mp = bp->b_mount; 301 301 struct xfs_buf_log_item *bip = bp->b_fspriv; 302 302 303 303 if (!xfs_foo_verify(bp)) {

+6

MAINTAINERS

··· 17544 17544 T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git 17545 17545 S: Supported 17546 17546 F: Documentation/filesystems/xfs.txt 17547 + F: Documentation/ABI/testing/sysfs-fs-xfs 17548 + F: Documentation/filesystems/xfs.txt 17549 + F: Documentation/filesystems/xfs-delayed-logging-design.txt 17550 + F: Documentation/filesystems/xfs-self-describing-metadata.txt 17547 17551 F: fs/xfs/ 17552 + F: include/uapi/linux/dqblk_xfs.h 17553 + F: include/uapi/linux/fsmap.h 17548 17554 17549 17555 XILINX AXI ETHERNET DRIVER 17550 17556 M: Anirudha Sarangi <anirudh@xilinx.com>

+4 -5

fs/xfs/Makefile

··· 62 62 xfs_attr_inactive.o \ 63 63 xfs_attr_list.o \ 64 64 xfs_bmap_util.o \ 65 + xfs_bio_io.o \ 65 66 xfs_buf.o \ 66 67 xfs_dir2_readdir.o \ 67 68 xfs_discard.o \ ··· 81 80 xfs_iops.o \ 82 81 xfs_inode.o \ 83 82 xfs_itable.o \ 83 + xfs_iwalk.o \ 84 84 xfs_message.o \ 85 85 xfs_mount.o \ 86 86 xfs_mru_cache.o \ 87 + xfs_pwork.o \ 87 88 xfs_reflink.o \ 88 89 xfs_stats.o \ 89 90 xfs_super.o \ ··· 107 104 xfs_rmap_item.o \ 108 105 xfs_log_recover.o \ 109 106 xfs_trans_ail.o \ 110 - xfs_trans_bmap.o \ 111 107 xfs_trans_buf.o \ 112 - xfs_trans_extfree.o \ 113 - xfs_trans_inode.o \ 114 - xfs_trans_refcount.o \ 115 - xfs_trans_rmap.o \ 108 + xfs_trans_inode.o 116 109 117 110 # optional features 118 111 xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \

-5

fs/xfs/kmem.c

··· 3 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 4 * All Rights Reserved. 5 5 */ 6 - #include <linux/mm.h> 7 6 #include <linux/sched/mm.h> 8 - #include <linux/highmem.h> 9 - #include <linux/slab.h> 10 - #include <linux/swap.h> 11 - #include <linux/blkdev.h> 12 7 #include <linux/backing-dev.h> 13 8 #include "kmem.h" 14 9 #include "xfs_message.h"

+8

fs/xfs/kmem.h

··· 124 124 return kmem_zone_alloc(zone, flags | KM_ZERO); 125 125 } 126 126 127 + static inline struct page * 128 + kmem_to_page(void *addr) 129 + { 130 + if (is_vmalloc_addr(addr)) 131 + return vmalloc_to_page(addr); 132 + return virt_to_page(addr); 133 + } 134 + 127 135 #endif /* __XFS_SUPPORT_KMEM_H__ */

+88 -16

fs/xfs/libxfs/xfs_ag.c

··· 10 10 #include "xfs_shared.h" 11 11 #include "xfs_format.h" 12 12 #include "xfs_trans_resv.h" 13 + #include "xfs_bit.h" 13 14 #include "xfs_sb.h" 14 15 #include "xfs_mount.h" 15 16 #include "xfs_btree.h" ··· 45 44 return bp; 46 45 } 47 46 47 + static inline bool is_log_ag(struct xfs_mount *mp, struct aghdr_init_data *id) 48 + { 49 + return mp->m_sb.sb_logstart > 0 && 50 + id->agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart); 51 + } 52 + 48 53 /* 49 54 * Generic btree root block init function 50 55 */ ··· 60 53 struct xfs_buf *bp, 61 54 struct aghdr_init_data *id) 62 55 { 63 - xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno, 0); 56 + xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno); 57 + } 58 + 59 + /* Finish initializing a free space btree. */ 60 + static void 61 + xfs_freesp_init_recs( 62 + struct xfs_mount *mp, 63 + struct xfs_buf *bp, 64 + struct aghdr_init_data *id) 65 + { 66 + struct xfs_alloc_rec *arec; 67 + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 68 + 69 + arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 70 + arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); 71 + 72 + if (is_log_ag(mp, id)) { 73 + struct xfs_alloc_rec *nrec; 74 + xfs_agblock_t start = XFS_FSB_TO_AGBNO(mp, 75 + mp->m_sb.sb_logstart); 76 + 77 + ASSERT(start >= mp->m_ag_prealloc_blocks); 78 + if (start != mp->m_ag_prealloc_blocks) { 79 + /* 80 + * Modify first record to pad stripe align of log 81 + */ 82 + arec->ar_blockcount = cpu_to_be32(start - 83 + mp->m_ag_prealloc_blocks); 84 + nrec = arec + 1; 85 + 86 + /* 87 + * Insert second record at start of internal log 88 + * which then gets trimmed. 89 + */ 90 + nrec->ar_startblock = cpu_to_be32( 91 + be32_to_cpu(arec->ar_startblock) + 92 + be32_to_cpu(arec->ar_blockcount)); 93 + arec = nrec; 94 + be16_add_cpu(&block->bb_numrecs, 1); 95 + } 96 + /* 97 + * Change record start to after the internal log 98 + */ 99 + be32_add_cpu(&arec->ar_startblock, mp->m_sb.sb_logblocks); 100 + } 101 + 102 + /* 103 + * Calculate the record block count and check for the case where 104 + * the log might have consumed all available space in the AG. If 105 + * so, reset the record count to 0 to avoid exposure of an invalid 106 + * record start block. 107 + */ 108 + arec->ar_blockcount = cpu_to_be32(id->agsize - 109 + be32_to_cpu(arec->ar_startblock)); 110 + if (!arec->ar_blockcount) 111 + block->bb_numrecs = 0; 64 112 } 65 113 66 114 /* ··· 127 65 struct xfs_buf *bp, 128 66 struct aghdr_init_data *id) 129 67 { 130 - struct xfs_alloc_rec *arec; 131 - 132 - xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno, 0); 133 - arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 134 - arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); 135 - arec->ar_blockcount = cpu_to_be32(id->agsize - 136 - be32_to_cpu(arec->ar_startblock)); 68 + xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno); 69 + xfs_freesp_init_recs(mp, bp, id); 137 70 } 138 71 139 72 static void ··· 137 80 struct xfs_buf *bp, 138 81 struct aghdr_init_data *id) 139 82 { 140 - struct xfs_alloc_rec *arec; 141 - 142 - xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno, 0); 143 - arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 144 - arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); 145 - arec->ar_blockcount = cpu_to_be32(id->agsize - 146 - be32_to_cpu(arec->ar_startblock)); 83 + xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno); 84 + xfs_freesp_init_recs(mp, bp, id); 147 85 } 148 86 149 87 /* ··· 153 101 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 154 102 struct xfs_rmap_rec *rrec; 155 103 156 - xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno, 0); 104 + xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno); 157 105 158 106 /* 159 107 * mark the AG header regions as static metadata The BNO ··· 198 146 rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp)); 199 147 rrec->rm_blockcount = cpu_to_be32(1); 200 148 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC); 149 + rrec->rm_offset = 0; 150 + be16_add_cpu(&block->bb_numrecs, 1); 151 + } 152 + 153 + /* account for the log space */ 154 + if (is_log_ag(mp, id)) { 155 + rrec = XFS_RMAP_REC_ADDR(block, 156 + be16_to_cpu(block->bb_numrecs) + 1); 157 + rrec->rm_startblock = cpu_to_be32( 158 + XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart)); 159 + rrec->rm_blockcount = cpu_to_be32(mp->m_sb.sb_logblocks); 160 + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG); 201 161 rrec->rm_offset = 0; 202 162 be16_add_cpu(&block->bb_numrecs, 1); 203 163 } ··· 272 208 xfs_refc_block(mp)); 273 209 agf->agf_refcount_level = cpu_to_be32(1); 274 210 agf->agf_refcount_blocks = cpu_to_be32(1); 211 + } 212 + 213 + if (is_log_ag(mp, id)) { 214 + int64_t logblocks = mp->m_sb.sb_logblocks; 215 + 216 + be32_add_cpu(&agf->agf_freeblks, -logblocks); 217 + agf->agf_longest = cpu_to_be32(id->agsize - 218 + XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart) - logblocks); 275 219 } 276 220 } 277 221

-8

fs/xfs/libxfs/xfs_ag_resv.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 - #include "xfs_sb.h" 13 12 #include "xfs_mount.h" 14 - #include "xfs_defer.h" 15 13 #include "xfs_alloc.h" 16 14 #include "xfs_errortag.h" 17 15 #include "xfs_error.h" 18 16 #include "xfs_trace.h" 19 - #include "xfs_cksum.h" 20 17 #include "xfs_trans.h" 21 - #include "xfs_bit.h" 22 - #include "xfs_bmap.h" 23 - #include "xfs_bmap_btree.h" 24 - #include "xfs_ag_resv.h" 25 - #include "xfs_trans_space.h" 26 18 #include "xfs_rmap_btree.h" 27 19 #include "xfs_btree.h" 28 20 #include "xfs_refcount_btree.h"

+109 -118

fs/xfs/libxfs/xfs_alloc.c

··· 13 13 #include "xfs_sb.h" 14 14 #include "xfs_mount.h" 15 15 #include "xfs_defer.h" 16 - #include "xfs_inode.h" 17 16 #include "xfs_btree.h" 18 17 #include "xfs_rmap.h" 19 18 #include "xfs_alloc_btree.h" ··· 20 21 #include "xfs_extent_busy.h" 21 22 #include "xfs_errortag.h" 22 23 #include "xfs_error.h" 23 - #include "xfs_cksum.h" 24 24 #include "xfs_trace.h" 25 25 #include "xfs_trans.h" 26 26 #include "xfs_buf_item.h" ··· 39 41 STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); 40 42 STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); 41 43 STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); 42 - STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, 43 - xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); 44 44 45 45 /* 46 46 * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in ··· 551 555 xfs_agfl_verify( 552 556 struct xfs_buf *bp) 553 557 { 554 - struct xfs_mount *mp = bp->b_target->bt_mount; 558 + struct xfs_mount *mp = bp->b_mount; 555 559 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); 556 560 int i; 557 561 ··· 592 596 xfs_agfl_read_verify( 593 597 struct xfs_buf *bp) 594 598 { 595 - struct xfs_mount *mp = bp->b_target->bt_mount; 599 + struct xfs_mount *mp = bp->b_mount; 596 600 xfs_failaddr_t fa; 597 601 598 602 /* ··· 617 621 xfs_agfl_write_verify( 618 622 struct xfs_buf *bp) 619 623 { 620 - struct xfs_mount *mp = bp->b_target->bt_mount; 624 + struct xfs_mount *mp = bp->b_mount; 621 625 struct xfs_buf_log_item *bip = bp->b_log_item; 622 626 xfs_failaddr_t fa; 623 627 ··· 694 698 /* 695 699 * Allocation group level functions. 696 700 */ 701 + 702 + /* 703 + * Deal with the case where only small freespaces remain. Either return the 704 + * contents of the last freespace record, or allocate space from the freelist if 705 + * there is nothing in the tree. 706 + */ 707 + STATIC int /* error */ 708 + xfs_alloc_ag_vextent_small( 709 + struct xfs_alloc_arg *args, /* allocation argument structure */ 710 + struct xfs_btree_cur *ccur, /* optional by-size cursor */ 711 + xfs_agblock_t *fbnop, /* result block number */ 712 + xfs_extlen_t *flenp, /* result length */ 713 + int *stat) /* status: 0-freelist, 1-normal/none */ 714 + { 715 + int error = 0; 716 + xfs_agblock_t fbno = NULLAGBLOCK; 717 + xfs_extlen_t flen = 0; 718 + int i = 0; 719 + 720 + /* 721 + * If a cntbt cursor is provided, try to allocate the largest record in 722 + * the tree. Try the AGFL if the cntbt is empty, otherwise fail the 723 + * allocation. Make sure to respect minleft even when pulling from the 724 + * freelist. 725 + */ 726 + if (ccur) 727 + error = xfs_btree_decrement(ccur, 0, &i); 728 + if (error) 729 + goto error; 730 + if (i) { 731 + error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i); 732 + if (error) 733 + goto error; 734 + XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error); 735 + goto out; 736 + } 737 + 738 + if (args->minlen != 1 || args->alignment != 1 || 739 + args->resv == XFS_AG_RESV_AGFL || 740 + (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <= 741 + args->minleft)) 742 + goto out; 743 + 744 + error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); 745 + if (error) 746 + goto error; 747 + if (fbno == NULLAGBLOCK) 748 + goto out; 749 + 750 + xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, 751 + xfs_alloc_allow_busy_reuse(args->datatype)); 752 + 753 + if (xfs_alloc_is_userdata(args->datatype)) { 754 + struct xfs_buf *bp; 755 + 756 + bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno); 757 + if (!bp) { 758 + error = -EFSCORRUPTED; 759 + goto error; 760 + } 761 + xfs_trans_binval(args->tp, bp); 762 + } 763 + *fbnop = args->agbno = fbno; 764 + *flenp = args->len = 1; 765 + XFS_WANT_CORRUPTED_GOTO(args->mp, 766 + fbno < be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 767 + error); 768 + args->wasfromfl = 1; 769 + trace_xfs_alloc_small_freelist(args); 770 + 771 + /* 772 + * If we're feeding an AGFL block to something that doesn't live in the 773 + * free space, we need to clear out the OWN_AG rmap. 774 + */ 775 + error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1, 776 + &XFS_RMAP_OINFO_AG); 777 + if (error) 778 + goto error; 779 + 780 + *stat = 0; 781 + return 0; 782 + 783 + out: 784 + /* 785 + * Can't do the allocation, give up. 786 + */ 787 + if (flen < args->minlen) { 788 + args->agbno = NULLAGBLOCK; 789 + trace_xfs_alloc_small_notenough(args); 790 + flen = 0; 791 + } 792 + *fbnop = fbno; 793 + *flenp = flen; 794 + *stat = 1; 795 + trace_xfs_alloc_small_done(args); 796 + return 0; 797 + 798 + error: 799 + trace_xfs_alloc_small_error(args); 800 + return error; 801 + } 697 802 698 803 /* 699 804 * Allocate a variable extent in the allocation group agno. ··· 1680 1583 } 1681 1584 1682 1585 /* 1683 - * Deal with the case where only small freespaces remain. 1684 - * Either return the contents of the last freespace record, 1685 - * or allocate space from the freelist if there is nothing in the tree. 1686 - */ 1687 - STATIC int /* error */ 1688 - xfs_alloc_ag_vextent_small( 1689 - xfs_alloc_arg_t *args, /* allocation argument structure */ 1690 - xfs_btree_cur_t *ccur, /* by-size cursor */ 1691 - xfs_agblock_t *fbnop, /* result block number */ 1692 - xfs_extlen_t *flenp, /* result length */ 1693 - int *stat) /* status: 0-freelist, 1-normal/none */ 1694 - { 1695 - int error; 1696 - xfs_agblock_t fbno; 1697 - xfs_extlen_t flen; 1698 - int i; 1699 - 1700 - if ((error = xfs_btree_decrement(ccur, 0, &i))) 1701 - goto error0; 1702 - if (i) { 1703 - if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) 1704 - goto error0; 1705 - XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1706 - } 1707 - /* 1708 - * Nothing in the btree, try the freelist. Make sure 1709 - * to respect minleft even when pulling from the 1710 - * freelist. 1711 - */ 1712 - else if (args->minlen == 1 && args->alignment == 1 && 1713 - args->resv != XFS_AG_RESV_AGFL && 1714 - (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) 1715 - > args->minleft)) { 1716 - error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); 1717 - if (error) 1718 - goto error0; 1719 - if (fbno != NULLAGBLOCK) { 1720 - xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, 1721 - xfs_alloc_allow_busy_reuse(args->datatype)); 1722 - 1723 - if (xfs_alloc_is_userdata(args->datatype)) { 1724 - xfs_buf_t *bp; 1725 - 1726 - bp = xfs_btree_get_bufs(args->mp, args->tp, 1727 - args->agno, fbno, 0); 1728 - if (!bp) { 1729 - error = -EFSCORRUPTED; 1730 - goto error0; 1731 - } 1732 - xfs_trans_binval(args->tp, bp); 1733 - } 1734 - args->len = 1; 1735 - args->agbno = fbno; 1736 - XFS_WANT_CORRUPTED_GOTO(args->mp, 1737 - args->agbno + args->len <= 1738 - be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1739 - error0); 1740 - args->wasfromfl = 1; 1741 - trace_xfs_alloc_small_freelist(args); 1742 - 1743 - /* 1744 - * If we're feeding an AGFL block to something that 1745 - * doesn't live in the free space, we need to clear 1746 - * out the OWN_AG rmap. 1747 - */ 1748 - error = xfs_rmap_free(args->tp, args->agbp, args->agno, 1749 - fbno, 1, &XFS_RMAP_OINFO_AG); 1750 - if (error) 1751 - goto error0; 1752 - 1753 - *stat = 0; 1754 - return 0; 1755 - } 1756 - /* 1757 - * Nothing in the freelist. 1758 - */ 1759 - else 1760 - flen = 0; 1761 - } 1762 - /* 1763 - * Can't allocate from the freelist for some reason. 1764 - */ 1765 - else { 1766 - fbno = NULLAGBLOCK; 1767 - flen = 0; 1768 - } 1769 - /* 1770 - * Can't do the allocation, give up. 1771 - */ 1772 - if (flen < args->minlen) { 1773 - args->agbno = NULLAGBLOCK; 1774 - trace_xfs_alloc_small_notenough(args); 1775 - flen = 0; 1776 - } 1777 - *fbnop = fbno; 1778 - *flenp = flen; 1779 - *stat = 1; 1780 - trace_xfs_alloc_small_done(args); 1781 - return 0; 1782 - 1783 - error0: 1784 - trace_xfs_alloc_small_error(args); 1785 - return error; 1786 - } 1787 - 1788 - /* 1789 1586 * Free the extent starting at agno/bno for length. 1790 1587 */ 1791 1588 STATIC int ··· 2086 2095 if (error) 2087 2096 return error; 2088 2097 2089 - bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno, 0); 2098 + bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno); 2090 2099 if (!bp) 2091 2100 return -EFSCORRUPTED; 2092 2101 xfs_trans_binval(tp, bp); ··· 2577 2586 xfs_agf_verify( 2578 2587 struct xfs_buf *bp) 2579 2588 { 2580 - struct xfs_mount *mp = bp->b_target->bt_mount; 2589 + struct xfs_mount *mp = bp->b_mount; 2581 2590 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); 2582 2591 2583 2592 if (xfs_sb_version_hascrc(&mp->m_sb)) { ··· 2635 2644 xfs_agf_read_verify( 2636 2645 struct xfs_buf *bp) 2637 2646 { 2638 - struct xfs_mount *mp = bp->b_target->bt_mount; 2647 + struct xfs_mount *mp = bp->b_mount; 2639 2648 xfs_failaddr_t fa; 2640 2649 2641 2650 if (xfs_sb_version_hascrc(&mp->m_sb) && ··· 2652 2661 xfs_agf_write_verify( 2653 2662 struct xfs_buf *bp) 2654 2663 { 2655 - struct xfs_mount *mp = bp->b_target->bt_mount; 2664 + struct xfs_mount *mp = bp->b_mount; 2656 2665 struct xfs_buf_log_item *bip = bp->b_log_item; 2657 2666 xfs_failaddr_t fa; 2658 2667 ··· 3137 3146 3138 3147 /* 3139 3148 * Walk all the blocks in the AGFL. The @walk_fn can return any negative 3140 - * error code or XFS_BTREE_QUERY_RANGE_ABORT. 3149 + * error code or XFS_ITER_*. 3141 3150 */ 3142 3151 int 3143 3152 xfs_agfl_walk(

+1 -2

fs/xfs/libxfs/xfs_alloc_btree.c

··· 17 17 #include "xfs_extent_busy.h" 18 18 #include "xfs_error.h" 19 19 #include "xfs_trace.h" 20 - #include "xfs_cksum.h" 21 20 #include "xfs_trans.h" 22 21 23 22 ··· 291 292 xfs_allocbt_verify( 292 293 struct xfs_buf *bp) 293 294 { 294 - struct xfs_mount *mp = bp->b_target->bt_mount; 295 + struct xfs_mount *mp = bp->b_mount; 295 296 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 296 297 struct xfs_perag *pag = bp->b_pag; 297 298 xfs_failaddr_t fa;

-5

fs/xfs/libxfs/xfs_attr.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 - #include "xfs_bit.h" 13 12 #include "xfs_mount.h" 14 13 #include "xfs_defer.h" 15 14 #include "xfs_da_format.h" 16 15 #include "xfs_da_btree.h" 17 16 #include "xfs_attr_sf.h" 18 17 #include "xfs_inode.h" 19 - #include "xfs_alloc.h" 20 18 #include "xfs_trans.h" 21 - #include "xfs_inode_item.h" 22 19 #include "xfs_bmap.h" 23 - #include "xfs_bmap_util.h" 24 20 #include "xfs_bmap_btree.h" 25 21 #include "xfs_attr.h" 26 22 #include "xfs_attr_leaf.h" 27 23 #include "xfs_attr_remote.h" 28 - #include "xfs_error.h" 29 24 #include "xfs_quota.h" 30 25 #include "xfs_trans_space.h" 31 26 #include "xfs_trace.h"

+7 -1

fs/xfs/libxfs/xfs_attr.h

··· 112 112 struct xfs_inode *dp; /* inode */ 113 113 struct attrlist_cursor_kern *cursor; /* position in list */ 114 114 char *alist; /* output buffer */ 115 - int seen_enough; /* T/F: seen enough of list? */ 115 + 116 + /* 117 + * Abort attribute list iteration if non-zero. Can be used to pass 118 + * error values to the xfs_attr_list caller. 119 + */ 120 + int seen_enough; 121 + 116 122 ssize_t count; /* num used entries */ 117 123 int dupcnt; /* count dup hashvals seen */ 118 124 int bufsize; /* total buffer size */

+6 -9

fs/xfs/libxfs/xfs_attr_leaf.c

··· 10 10 #include "xfs_format.h" 11 11 #include "xfs_log_format.h" 12 12 #include "xfs_trans_resv.h" 13 - #include "xfs_bit.h" 14 13 #include "xfs_sb.h" 15 14 #include "xfs_mount.h" 16 15 #include "xfs_da_format.h" 17 16 #include "xfs_da_btree.h" 18 17 #include "xfs_inode.h" 19 18 #include "xfs_trans.h" 20 - #include "xfs_inode_item.h" 21 19 #include "xfs_bmap_btree.h" 22 20 #include "xfs_bmap.h" 23 21 #include "xfs_attr_sf.h" ··· 25 27 #include "xfs_error.h" 26 28 #include "xfs_trace.h" 27 29 #include "xfs_buf_item.h" 28 - #include "xfs_cksum.h" 29 30 #include "xfs_dir2.h" 30 31 #include "xfs_log.h" 31 32 ··· 237 240 struct xfs_buf *bp) 238 241 { 239 242 struct xfs_attr3_icleaf_hdr ichdr; 240 - struct xfs_mount *mp = bp->b_target->bt_mount; 243 + struct xfs_mount *mp = bp->b_mount; 241 244 struct xfs_attr_leafblock *leaf = bp->b_addr; 242 245 struct xfs_attr_leaf_entry *entries; 243 246 uint32_t end; /* must be 32bit - see below */ ··· 310 313 xfs_attr3_leaf_write_verify( 311 314 struct xfs_buf *bp) 312 315 { 313 - struct xfs_mount *mp = bp->b_target->bt_mount; 316 + struct xfs_mount *mp = bp->b_mount; 314 317 struct xfs_buf_log_item *bip = bp->b_log_item; 315 318 struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; 316 319 xfs_failaddr_t fa; ··· 340 343 xfs_attr3_leaf_read_verify( 341 344 struct xfs_buf *bp) 342 345 { 343 - struct xfs_mount *mp = bp->b_target->bt_mount; 346 + struct xfs_mount *mp = bp->b_mount; 344 347 xfs_failaddr_t fa; 345 348 346 349 if (xfs_sb_version_hascrc(&mp->m_sb) && ··· 862 865 struct xfs_attr3_icleaf_hdr leafhdr; 863 866 int bytes; 864 867 int i; 865 - struct xfs_mount *mp = bp->b_target->bt_mount; 868 + struct xfs_mount *mp = bp->b_mount; 866 869 867 870 leaf = bp->b_addr; 868 871 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); ··· 1522 1525 { 1523 1526 struct xfs_attr3_icleaf_hdr ichdr1; 1524 1527 struct xfs_attr3_icleaf_hdr ichdr2; 1525 - struct xfs_mount *mp = leaf1_bp->b_target->bt_mount; 1528 + struct xfs_mount *mp = leaf1_bp->b_mount; 1526 1529 1527 1530 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr1, leaf1_bp->b_addr); 1528 1531 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr2, leaf2_bp->b_addr); ··· 2565 2568 { 2566 2569 struct xfs_attr3_icleaf_hdr ichdr; 2567 2570 struct xfs_attr_leaf_entry *entries; 2568 - struct xfs_mount *mp = bp->b_target->bt_mount; 2571 + struct xfs_mount *mp = bp->b_mount; 2569 2572 2570 2573 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, bp->b_addr); 2571 2574 entries = xfs_attr3_leaf_entryp(bp->b_addr);

+3 -11

fs/xfs/libxfs/xfs_attr_remote.c

··· 16 16 #include "xfs_da_format.h" 17 17 #include "xfs_da_btree.h" 18 18 #include "xfs_inode.h" 19 - #include "xfs_alloc.h" 20 19 #include "xfs_trans.h" 21 - #include "xfs_inode_item.h" 22 20 #include "xfs_bmap.h" 23 - #include "xfs_bmap_util.h" 24 21 #include "xfs_attr.h" 25 - #include "xfs_attr_leaf.h" 26 - #include "xfs_attr_remote.h" 27 - #include "xfs_trans_space.h" 28 22 #include "xfs_trace.h" 29 - #include "xfs_cksum.h" 30 - #include "xfs_buf_item.h" 31 23 #include "xfs_error.h" 32 24 33 25 #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ ··· 103 111 bool check_crc, 104 112 xfs_failaddr_t *failaddr) 105 113 { 106 - struct xfs_mount *mp = bp->b_target->bt_mount; 114 + struct xfs_mount *mp = bp->b_mount; 107 115 char *ptr; 108 116 int len; 109 117 xfs_daddr_t bno; ··· 167 175 xfs_attr3_rmt_write_verify( 168 176 struct xfs_buf *bp) 169 177 { 170 - struct xfs_mount *mp = bp->b_target->bt_mount; 178 + struct xfs_mount *mp = bp->b_mount; 171 179 xfs_failaddr_t fa; 172 180 int blksize = mp->m_attr_geo->blksize; 173 181 char *ptr; ··· 527 535 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 528 536 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 529 537 530 - bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0); 538 + bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt); 531 539 if (!bp) 532 540 return -ENOMEM; 533 541 bp->b_ops = &xfs_attr3_rmt_buf_ops;

-1

fs/xfs/libxfs/xfs_bit.c

··· 5 5 */ 6 6 #include "xfs.h" 7 7 #include "xfs_log_format.h" 8 - #include "xfs_bit.h" 9 8 10 9 /* 11 10 * XFS bit manipulation routines, used in non-realtime code.

+7 -12

fs/xfs/libxfs/xfs_bmap.c

··· 13 13 #include "xfs_sb.h" 14 14 #include "xfs_mount.h" 15 15 #include "xfs_defer.h" 16 - #include "xfs_da_format.h" 17 - #include "xfs_da_btree.h" 18 16 #include "xfs_dir2.h" 19 17 #include "xfs_inode.h" 20 18 #include "xfs_btree.h" 21 19 #include "xfs_trans.h" 22 - #include "xfs_inode_item.h" 23 - #include "xfs_extfree_item.h" 24 20 #include "xfs_alloc.h" 25 21 #include "xfs_bmap.h" 26 22 #include "xfs_bmap_util.h" ··· 28 32 #include "xfs_trans_space.h" 29 33 #include "xfs_buf_item.h" 30 34 #include "xfs_trace.h" 31 - #include "xfs_symlink.h" 32 35 #include "xfs_attr_leaf.h" 33 36 #include "xfs_filestream.h" 34 37 #include "xfs_rmap.h" ··· 365 370 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); 366 371 if (!bp) { 367 372 bp_release = 1; 368 - error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, 373 + error = xfs_btree_read_bufl(mp, NULL, bno, &bp, 369 374 XFS_BMAP_BTREE_REF, 370 375 &xfs_bmbt_buf_ops); 371 376 if (error) ··· 449 454 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); 450 455 if (!bp) { 451 456 bp_release = 1; 452 - error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, 457 + error = xfs_btree_read_bufl(mp, NULL, bno, &bp, 453 458 XFS_BMAP_BTREE_REF, 454 459 &xfs_bmbt_buf_ops); 455 460 if (error) ··· 614 619 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 615 620 xfs_btree_check_lptr(cur, cbno, 1)); 616 621 #endif 617 - error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF, 622 + error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF, 618 623 &xfs_bmbt_buf_ops); 619 624 if (error) 620 625 return error; ··· 727 732 cur->bc_private.b.allocated++; 728 733 ip->i_d.di_nblocks++; 729 734 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); 730 - abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); 735 + abp = xfs_btree_get_bufl(mp, tp, args.fsbno); 731 736 if (!abp) { 732 737 error = -EFSCORRUPTED; 733 738 goto out_unreserve_dquot; ··· 873 878 ASSERT(args.fsbno != NULLFSBLOCK); 874 879 ASSERT(args.len == 1); 875 880 tp->t_firstblock = args.fsbno; 876 - bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); 881 + bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno); 877 882 878 883 /* 879 884 * Initialize the block, copy the data and log the remote buffer. ··· 1198 1203 * pointer (leftmost) at each level. 1199 1204 */ 1200 1205 while (level-- > 0) { 1201 - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 1206 + error = xfs_btree_read_bufl(mp, tp, bno, &bp, 1202 1207 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); 1203 1208 if (error) 1204 1209 goto out; ··· 1271 1276 */ 1272 1277 if (bno == NULLFSBLOCK) 1273 1278 break; 1274 - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 1279 + error = xfs_btree_read_bufl(mp, tp, bno, &bp, 1275 1280 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); 1276 1281 if (error) 1277 1282 goto out;

+1 -4

fs/xfs/libxfs/xfs_bmap_btree.c

··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_bit.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_defer.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_trans.h" 17 - #include "xfs_inode_item.h" 18 16 #include "xfs_alloc.h" 19 17 #include "xfs_btree.h" 20 18 #include "xfs_bmap_btree.h" ··· 20 22 #include "xfs_error.h" 21 23 #include "xfs_quota.h" 22 24 #include "xfs_trace.h" 23 - #include "xfs_cksum.h" 24 25 #include "xfs_rmap.h" 25 26 26 27 /* ··· 408 411 xfs_bmbt_verify( 409 412 struct xfs_buf *bp) 410 413 { 411 - struct xfs_mount *mp = bp->b_target->bt_mount; 414 + struct xfs_mount *mp = bp->b_mount; 412 415 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 413 416 xfs_failaddr_t fa; 414 417 unsigned int level;

+19 -30

fs/xfs/libxfs/xfs_btree.c

··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_bit.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_defer.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_trans.h" 17 - #include "xfs_inode_item.h" 18 16 #include "xfs_buf_item.h" 19 17 #include "xfs_btree.h" 20 18 #include "xfs_errortag.h" 21 19 #include "xfs_error.h" 22 20 #include "xfs_trace.h" 23 - #include "xfs_cksum.h" 24 21 #include "xfs_alloc.h" 25 22 #include "xfs_log.h" 26 23 ··· 273 276 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 274 277 struct xfs_buf_log_item *bip = bp->b_log_item; 275 278 276 - if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 279 + if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb)) 277 280 return; 278 281 if (bip) 279 282 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); ··· 285 288 struct xfs_buf *bp) 286 289 { 287 290 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 288 - struct xfs_mount *mp = bp->b_target->bt_mount; 291 + struct xfs_mount *mp = bp->b_mount; 289 292 290 293 if (xfs_sb_version_hascrc(&mp->m_sb)) { 291 294 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) ··· 311 314 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 312 315 struct xfs_buf_log_item *bip = bp->b_log_item; 313 316 314 - if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 317 + if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb)) 315 318 return; 316 319 if (bip) 317 320 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); ··· 323 326 struct xfs_buf *bp) 324 327 { 325 328 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 326 - struct xfs_mount *mp = bp->b_target->bt_mount; 329 + struct xfs_mount *mp = bp->b_mount; 327 330 328 331 if (xfs_sb_version_hascrc(&mp->m_sb)) { 329 332 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) ··· 688 691 xfs_btree_get_bufl( 689 692 xfs_mount_t *mp, /* file system mount point */ 690 693 xfs_trans_t *tp, /* transaction pointer */ 691 - xfs_fsblock_t fsbno, /* file system block number */ 692 - uint lock) /* lock flags for get_buf */ 694 + xfs_fsblock_t fsbno) /* file system block number */ 693 695 { 694 696 xfs_daddr_t d; /* real disk block address */ 695 697 696 698 ASSERT(fsbno != NULLFSBLOCK); 697 699 d = XFS_FSB_TO_DADDR(mp, fsbno); 698 - return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); 700 + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0); 699 701 } 700 702 701 703 /* ··· 706 710 xfs_mount_t *mp, /* file system mount point */ 707 711 xfs_trans_t *tp, /* transaction pointer */ 708 712 xfs_agnumber_t agno, /* allocation group number */ 709 - xfs_agblock_t agbno, /* allocation group block number */ 710 - uint lock) /* lock flags for get_buf */ 713 + xfs_agblock_t agbno) /* allocation group block number */ 711 714 { 712 715 xfs_daddr_t d; /* real disk block address */ 713 716 714 717 ASSERT(agno != NULLAGNUMBER); 715 718 ASSERT(agbno != NULLAGBLOCK); 716 719 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 717 - return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); 720 + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0); 718 721 } 719 722 720 723 /* ··· 840 845 struct xfs_mount *mp, /* file system mount point */ 841 846 struct xfs_trans *tp, /* transaction pointer */ 842 847 xfs_fsblock_t fsbno, /* file system block number */ 843 - uint lock, /* lock flags for read_buf */ 844 848 struct xfs_buf **bpp, /* buffer for fsbno */ 845 849 int refval, /* ref count value for buffer */ 846 850 const struct xfs_buf_ops *ops) ··· 852 858 return -EFSCORRUPTED; 853 859 d = XFS_FSB_TO_DADDR(mp, fsbno); 854 860 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 855 - mp->m_bsize, lock, &bp, ops); 861 + mp->m_bsize, 0, &bp, ops); 856 862 if (error) 857 863 return error; 858 864 if (bp) ··· 1179 1185 xfs_btnum_t btnum, 1180 1186 __u16 level, 1181 1187 __u16 numrecs, 1182 - __u64 owner, 1183 - unsigned int flags) 1188 + __u64 owner) 1184 1189 { 1185 1190 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, 1186 - btnum, level, numrecs, owner, flags); 1191 + btnum, level, numrecs, owner, 0); 1187 1192 } 1188 1193 1189 1194 STATIC void ··· 1281 1288 xfs_btree_get_buf_block( 1282 1289 struct xfs_btree_cur *cur, 1283 1290 union xfs_btree_ptr *ptr, 1284 - int flags, 1285 1291 struct xfs_btree_block **block, 1286 1292 struct xfs_buf **bpp) 1287 1293 { ··· 1288 1296 xfs_daddr_t d; 1289 1297 int error; 1290 1298 1291 - /* need to sort out how callers deal with failures first */ 1292 - ASSERT(!(flags & XBF_TRYLOCK)); 1293 - 1294 1299 error = xfs_btree_ptr_to_daddr(cur, ptr, &d); 1295 1300 if (error) 1296 1301 return error; 1297 1302 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, 1298 - mp->m_bsize, flags); 1303 + mp->m_bsize, 0); 1299 1304 1300 1305 if (!*bpp) 1301 1306 return -ENOMEM; ··· 2695 2706 XFS_BTREE_STATS_INC(cur, alloc); 2696 2707 2697 2708 /* Set up the new block as "right". */ 2698 - error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp); 2709 + error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp); 2699 2710 if (error) 2700 2711 goto error0; 2701 2712 ··· 2950 2961 XFS_BTREE_STATS_INC(cur, alloc); 2951 2962 2952 2963 /* Copy the root into a real block. */ 2953 - error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp); 2964 + error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp); 2954 2965 if (error) 2955 2966 goto error0; 2956 2967 ··· 3047 3058 XFS_BTREE_STATS_INC(cur, alloc); 3048 3059 3049 3060 /* Set up the new block. */ 3050 - error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp); 3061 + error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp); 3051 3062 if (error) 3052 3063 goto error0; 3053 3064 ··· 4422 4433 struct xfs_buf *bp, 4423 4434 uint64_t owner) 4424 4435 { 4425 - struct xfs_mount *mp = bp->b_target->bt_mount; 4436 + struct xfs_mount *mp = bp->b_mount; 4426 4437 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4427 4438 4428 4439 if (!xfs_sb_version_hascrc(&mp->m_sb)) ··· 4443 4454 struct xfs_buf *bp, 4444 4455 unsigned int max_recs) 4445 4456 { 4446 - struct xfs_mount *mp = bp->b_target->bt_mount; 4457 + struct xfs_mount *mp = bp->b_mount; 4447 4458 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4448 4459 4449 4460 /* numrecs verification */ ··· 4473 4484 xfs_btree_sblock_v5hdr_verify( 4474 4485 struct xfs_buf *bp) 4475 4486 { 4476 - struct xfs_mount *mp = bp->b_target->bt_mount; 4487 + struct xfs_mount *mp = bp->b_mount; 4477 4488 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4478 4489 struct xfs_perag *pag = bp->b_pag; 4479 4490 ··· 4499 4510 struct xfs_buf *bp, 4500 4511 unsigned int max_recs) 4501 4512 { 4502 - struct xfs_mount *mp = bp->b_target->bt_mount; 4513 + struct xfs_mount *mp = bp->b_mount; 4503 4514 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4504 4515 xfs_agblock_t agno; 4505 4516

+5 -9

fs/xfs/libxfs/xfs_btree.h

··· 301 301 xfs_btree_get_bufl( 302 302 struct xfs_mount *mp, /* file system mount point */ 303 303 struct xfs_trans *tp, /* transaction pointer */ 304 - xfs_fsblock_t fsbno, /* file system block number */ 305 - uint lock); /* lock flags for get_buf */ 304 + xfs_fsblock_t fsbno); /* file system block number */ 306 305 307 306 /* 308 307 * Get a buffer for the block, return it with no data read. ··· 312 313 struct xfs_mount *mp, /* file system mount point */ 313 314 struct xfs_trans *tp, /* transaction pointer */ 314 315 xfs_agnumber_t agno, /* allocation group number */ 315 - xfs_agblock_t agbno, /* allocation group block number */ 316 - uint lock); /* lock flags for get_buf */ 316 + xfs_agblock_t agbno); /* allocation group block number */ 317 317 318 318 /* 319 319 * Check for the cursor referring to the last block at the given level. ··· 343 345 struct xfs_mount *mp, /* file system mount point */ 344 346 struct xfs_trans *tp, /* transaction pointer */ 345 347 xfs_fsblock_t fsbno, /* file system block number */ 346 - uint lock, /* lock flags for read_buf */ 347 348 struct xfs_buf **bpp, /* buffer for fsbno */ 348 349 int refval, /* ref count value for buffer */ 349 350 const struct xfs_buf_ops *ops); ··· 380 383 xfs_btnum_t btnum, 381 384 __u16 level, 382 385 __u16 numrecs, 383 - __u64 owner, 384 - unsigned int flags); 386 + __u64 owner); 385 387 386 388 void 387 389 xfs_btree_init_block_int( ··· 465 469 unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len); 466 470 467 471 /* return codes */ 468 - #define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */ 469 - #define XFS_BTREE_QUERY_RANGE_ABORT 1 /* stop iterating */ 472 + #define XFS_BTREE_QUERY_RANGE_CONTINUE (XFS_ITER_CONTINUE) /* keep iterating */ 473 + #define XFS_BTREE_QUERY_RANGE_ABORT (XFS_ITER_ABORT) /* stop iterating */ 470 474 typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur, 471 475 union xfs_btree_rec *rec, void *priv); 472 476

+3 -9

fs/xfs/libxfs/xfs_da_btree.c

··· 12 12 #include "xfs_trans_resv.h" 13 13 #include "xfs_bit.h" 14 14 #include "xfs_mount.h" 15 - #include "xfs_da_format.h" 16 - #include "xfs_da_btree.h" 17 15 #include "xfs_dir2.h" 18 16 #include "xfs_dir2_priv.h" 19 17 #include "xfs_inode.h" 20 18 #include "xfs_trans.h" 21 - #include "xfs_inode_item.h" 22 - #include "xfs_alloc.h" 23 19 #include "xfs_bmap.h" 24 - #include "xfs_attr.h" 25 20 #include "xfs_attr_leaf.h" 26 21 #include "xfs_error.h" 27 22 #include "xfs_trace.h" 28 - #include "xfs_cksum.h" 29 23 #include "xfs_buf_item.h" 30 24 #include "xfs_log.h" 31 25 ··· 120 126 struct xfs_buf *bp, 121 127 struct xfs_da3_blkinfo *hdr3) 122 128 { 123 - struct xfs_mount *mp = bp->b_target->bt_mount; 129 + struct xfs_mount *mp = bp->b_mount; 124 130 struct xfs_da_blkinfo *hdr = &hdr3->hdr; 125 131 126 132 if (!xfs_verify_magic16(bp, hdr->magic)) ··· 142 148 xfs_da3_node_verify( 143 149 struct xfs_buf *bp) 144 150 { 145 - struct xfs_mount *mp = bp->b_target->bt_mount; 151 + struct xfs_mount *mp = bp->b_mount; 146 152 struct xfs_da_intnode *hdr = bp->b_addr; 147 153 struct xfs_da3_icnode_hdr ichdr; 148 154 const struct xfs_dir_ops *ops; ··· 180 186 xfs_da3_node_write_verify( 181 187 struct xfs_buf *bp) 182 188 { 183 - struct xfs_mount *mp = bp->b_target->bt_mount; 189 + struct xfs_mount *mp = bp->b_mount; 184 190 struct xfs_buf_log_item *bip = bp->b_log_item; 185 191 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 186 192 xfs_failaddr_t fa;

-3

fs/xfs/libxfs/xfs_da_format.c

··· 11 11 #include "xfs_log_format.h" 12 12 #include "xfs_trans_resv.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_da_format.h" 15 - #include "xfs_da_btree.h" 16 14 #include "xfs_inode.h" 17 15 #include "xfs_dir2.h" 18 - #include "xfs_dir2_priv.h" 19 16 20 17 /* 21 18 * Shortform directory ops

-2

fs/xfs/libxfs/xfs_defer.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 - #include "xfs_bit.h" 13 - #include "xfs_sb.h" 14 12 #include "xfs_mount.h" 15 13 #include "xfs_defer.h" 16 14 #include "xfs_trans.h"

+1 -5

fs/xfs/libxfs/xfs_dir2.c

··· 5 5 */ 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 11 #include "xfs_trans_resv.h" 11 12 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_da_format.h" 14 - #include "xfs_da_btree.h" 15 13 #include "xfs_inode.h" 16 14 #include "xfs_trans.h" 17 - #include "xfs_inode_item.h" 18 15 #include "xfs_bmap.h" 19 16 #include "xfs_dir2.h" 20 17 #include "xfs_dir2_priv.h" 21 - #include "xfs_ialloc.h" 22 18 #include "xfs_errortag.h" 23 19 #include "xfs_error.h" 24 20 #include "xfs_trace.h"

+4 -7

fs/xfs/libxfs/xfs_dir2_block.c

··· 6 6 */ 7 7 #include "xfs.h" 8 8 #include "xfs_fs.h" 9 + #include "xfs_shared.h" 9 10 #include "xfs_format.h" 10 11 #include "xfs_log_format.h" 11 12 #include "xfs_trans_resv.h" 12 13 #include "xfs_mount.h" 13 - #include "xfs_da_format.h" 14 - #include "xfs_da_btree.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_trans.h" 17 - #include "xfs_inode_item.h" 18 16 #include "xfs_bmap.h" 19 17 #include "xfs_buf_item.h" 20 18 #include "xfs_dir2.h" 21 19 #include "xfs_dir2_priv.h" 22 20 #include "xfs_error.h" 23 21 #include "xfs_trace.h" 24 - #include "xfs_cksum.h" 25 22 #include "xfs_log.h" 26 23 27 24 /* ··· 47 50 xfs_dir3_block_verify( 48 51 struct xfs_buf *bp) 49 52 { 50 - struct xfs_mount *mp = bp->b_target->bt_mount; 53 + struct xfs_mount *mp = bp->b_mount; 51 54 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 52 55 53 56 if (!xfs_verify_magic(bp, hdr3->magic)) ··· 68 71 xfs_dir3_block_read_verify( 69 72 struct xfs_buf *bp) 70 73 { 71 - struct xfs_mount *mp = bp->b_target->bt_mount; 74 + struct xfs_mount *mp = bp->b_mount; 72 75 xfs_failaddr_t fa; 73 76 74 77 if (xfs_sb_version_hascrc(&mp->m_sb) && ··· 85 88 xfs_dir3_block_write_verify( 86 89 struct xfs_buf *bp) 87 90 { 88 - struct xfs_mount *mp = bp->b_target->bt_mount; 91 + struct xfs_mount *mp = bp->b_mount; 89 92 struct xfs_buf_log_item *bip = bp->b_log_item; 90 93 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 91 94 xfs_failaddr_t fa;

+5 -9

fs/xfs/libxfs/xfs_dir2_data.c

··· 6 6 */ 7 7 #include "xfs.h" 8 8 #include "xfs_fs.h" 9 + #include "xfs_shared.h" 9 10 #include "xfs_format.h" 10 11 #include "xfs_log_format.h" 11 12 #include "xfs_trans_resv.h" 12 13 #include "xfs_mount.h" 13 - #include "xfs_da_format.h" 14 - #include "xfs_da_btree.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_dir2.h" 17 - #include "xfs_dir2_priv.h" 18 16 #include "xfs_error.h" 19 17 #include "xfs_trans.h" 20 18 #include "xfs_buf_item.h" 21 - #include "xfs_cksum.h" 22 19 #include "xfs_log.h" 23 20 24 21 static xfs_failaddr_t xfs_dir2_data_freefind_verify( ··· 47 50 int i; /* leaf index */ 48 51 int lastfree; /* last entry was unused */ 49 52 xfs_dir2_leaf_entry_t *lep=NULL; /* block leaf entries */ 50 - xfs_mount_t *mp; /* filesystem mount point */ 53 + struct xfs_mount *mp = bp->b_mount; 51 54 char *p; /* current data position */ 52 55 int stale; /* count of stale leaves */ 53 56 struct xfs_name name; 54 57 const struct xfs_dir_ops *ops; 55 58 struct xfs_da_geometry *geo; 56 59 57 - mp = bp->b_target->bt_mount; 58 60 geo = mp->m_dir_geo; 59 61 60 62 /* ··· 245 249 xfs_dir3_data_verify( 246 250 struct xfs_buf *bp) 247 251 { 248 - struct xfs_mount *mp = bp->b_target->bt_mount; 252 + struct xfs_mount *mp = bp->b_mount; 249 253 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 250 254 251 255 if (!xfs_verify_magic(bp, hdr3->magic)) ··· 294 298 xfs_dir3_data_read_verify( 295 299 struct xfs_buf *bp) 296 300 { 297 - struct xfs_mount *mp = bp->b_target->bt_mount; 301 + struct xfs_mount *mp = bp->b_mount; 298 302 xfs_failaddr_t fa; 299 303 300 304 if (xfs_sb_version_hascrc(&mp->m_sb) && ··· 311 315 xfs_dir3_data_write_verify( 312 316 struct xfs_buf *bp) 313 317 { 314 - struct xfs_mount *mp = bp->b_target->bt_mount; 318 + struct xfs_mount *mp = bp->b_mount; 315 319 struct xfs_buf_log_item *bip = bp->b_log_item; 316 320 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 317 321 xfs_failaddr_t fa;

+4 -7

fs/xfs/libxfs/xfs_dir2_leaf.c

··· 6 6 */ 7 7 #include "xfs.h" 8 8 #include "xfs_fs.h" 9 + #include "xfs_shared.h" 9 10 #include "xfs_format.h" 10 11 #include "xfs_log_format.h" 11 12 #include "xfs_trans_resv.h" 12 13 #include "xfs_mount.h" 13 - #include "xfs_da_format.h" 14 - #include "xfs_da_btree.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_bmap.h" 17 16 #include "xfs_dir2.h" ··· 19 20 #include "xfs_trace.h" 20 21 #include "xfs_trans.h" 21 22 #include "xfs_buf_item.h" 22 - #include "xfs_cksum.h" 23 - #include "xfs_log.h" 24 23 25 24 /* 26 25 * Local function declarations. ··· 141 144 xfs_dir3_leaf_verify( 142 145 struct xfs_buf *bp) 143 146 { 144 - struct xfs_mount *mp = bp->b_target->bt_mount; 147 + struct xfs_mount *mp = bp->b_mount; 145 148 struct xfs_dir2_leaf *leaf = bp->b_addr; 146 149 xfs_failaddr_t fa; 147 150 ··· 156 159 xfs_dir3_leaf_read_verify( 157 160 struct xfs_buf *bp) 158 161 { 159 - struct xfs_mount *mp = bp->b_target->bt_mount; 162 + struct xfs_mount *mp = bp->b_mount; 160 163 xfs_failaddr_t fa; 161 164 162 165 if (xfs_sb_version_hascrc(&mp->m_sb) && ··· 173 176 xfs_dir3_leaf_write_verify( 174 177 struct xfs_buf *bp) 175 178 { 176 - struct xfs_mount *mp = bp->b_target->bt_mount; 179 + struct xfs_mount *mp = bp->b_mount; 177 180 struct xfs_buf_log_item *bip = bp->b_log_item; 178 181 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; 179 182 xfs_failaddr_t fa;

+4 -6

fs/xfs/libxfs/xfs_dir2_node.c

··· 6 6 */ 7 7 #include "xfs.h" 8 8 #include "xfs_fs.h" 9 + #include "xfs_shared.h" 9 10 #include "xfs_format.h" 10 11 #include "xfs_log_format.h" 11 12 #include "xfs_trans_resv.h" 12 13 #include "xfs_mount.h" 13 - #include "xfs_da_format.h" 14 - #include "xfs_da_btree.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_bmap.h" 17 16 #include "xfs_dir2.h" ··· 19 20 #include "xfs_trace.h" 20 21 #include "xfs_trans.h" 21 22 #include "xfs_buf_item.h" 22 - #include "xfs_cksum.h" 23 23 #include "xfs_log.h" 24 24 25 25 /* ··· 82 84 xfs_dir3_free_verify( 83 85 struct xfs_buf *bp) 84 86 { 85 - struct xfs_mount *mp = bp->b_target->bt_mount; 87 + struct xfs_mount *mp = bp->b_mount; 86 88 struct xfs_dir2_free_hdr *hdr = bp->b_addr; 87 89 88 90 if (!xfs_verify_magic(bp, hdr->magic)) ··· 108 110 xfs_dir3_free_read_verify( 109 111 struct xfs_buf *bp) 110 112 { 111 - struct xfs_mount *mp = bp->b_target->bt_mount; 113 + struct xfs_mount *mp = bp->b_mount; 112 114 xfs_failaddr_t fa; 113 115 114 116 if (xfs_sb_version_hascrc(&mp->m_sb) && ··· 125 127 xfs_dir3_free_write_verify( 126 128 struct xfs_buf *bp) 127 129 { 128 - struct xfs_mount *mp = bp->b_target->bt_mount; 130 + struct xfs_mount *mp = bp->b_mount; 129 131 struct xfs_buf_log_item *bip = bp->b_log_item; 130 132 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 131 133 xfs_failaddr_t fa;

+1 -4

fs/xfs/libxfs/xfs_dir2_sf.c

··· 5 5 */ 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 11 #include "xfs_trans_resv.h" 11 12 #include "xfs_mount.h" 12 - #include "xfs_da_format.h" 13 - #include "xfs_da_btree.h" 14 13 #include "xfs_inode.h" 15 14 #include "xfs_trans.h" 16 - #include "xfs_inode_item.h" 17 - #include "xfs_error.h" 18 15 #include "xfs_dir2.h" 19 16 #include "xfs_dir2_priv.h" 20 17 #include "xfs_trace.h"

+4 -6

fs/xfs/libxfs/xfs_dquot_buf.c

··· 16 16 #include "xfs_trans.h" 17 17 #include "xfs_qm.h" 18 18 #include "xfs_error.h" 19 - #include "xfs_cksum.h" 20 - #include "xfs_trace.h" 21 19 22 20 int 23 21 xfs_calc_dquots_per_chunk( ··· 222 224 xfs_dquot_buf_verify_struct( 223 225 struct xfs_buf *bp) 224 226 { 225 - struct xfs_mount *mp = bp->b_target->bt_mount; 227 + struct xfs_mount *mp = bp->b_mount; 226 228 227 229 return xfs_dquot_buf_verify(mp, bp, false); 228 230 } ··· 231 233 xfs_dquot_buf_read_verify( 232 234 struct xfs_buf *bp) 233 235 { 234 - struct xfs_mount *mp = bp->b_target->bt_mount; 236 + struct xfs_mount *mp = bp->b_mount; 235 237 236 238 if (!xfs_dquot_buf_verify_crc(mp, bp, false)) 237 239 return; ··· 248 250 xfs_dquot_buf_readahead_verify( 249 251 struct xfs_buf *bp) 250 252 { 251 - struct xfs_mount *mp = bp->b_target->bt_mount; 253 + struct xfs_mount *mp = bp->b_mount; 252 254 253 255 if (!xfs_dquot_buf_verify_crc(mp, bp, true) || 254 256 xfs_dquot_buf_verify(mp, bp, true) != NULL) { ··· 266 268 xfs_dquot_buf_write_verify( 267 269 struct xfs_buf *bp) 268 270 { 269 - struct xfs_mount *mp = bp->b_target->bt_mount; 271 + struct xfs_mount *mp = bp->b_mount; 270 272 271 273 xfs_dquot_buf_verify(mp, bp, false); 272 274 }

+1 -1

fs/xfs/libxfs/xfs_format.h

··· 1071 1071 #define XFS_INO_MASK(k) (uint32_t)((1ULL << (k)) - 1) 1072 1072 #define XFS_INO_OFFSET_BITS(mp) (mp)->m_sb.sb_inopblog 1073 1073 #define XFS_INO_AGBNO_BITS(mp) (mp)->m_sb.sb_agblklog 1074 - #define XFS_INO_AGINO_BITS(mp) (mp)->m_agino_log 1074 + #define XFS_INO_AGINO_BITS(mp) ((mp)->m_ino_geo.agino_log) 1075 1075 #define XFS_INO_AGNO_BITS(mp) (mp)->m_agno_log 1076 1076 #define XFS_INO_BITS(mp) \ 1077 1077 XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp)

+114 -10

fs/xfs/libxfs/xfs_fs.h

··· 97 97 * For use by backup and restore programs to set the XFS on-disk inode 98 98 * fields di_dmevmask and di_dmstate. These must be set to exactly and 99 99 * only values previously obtained via xfs_bulkstat! (Specifically the 100 - * xfs_bstat_t fields bs_dmevmask and bs_dmstate.) 100 + * struct xfs_bstat fields bs_dmevmask and bs_dmstate.) 101 101 */ 102 102 #ifndef HAVE_FSDMIDATA 103 103 struct fsdmidata { ··· 328 328 __s32 tv_nsec; /* and nanoseconds */ 329 329 } xfs_bstime_t; 330 330 331 - typedef struct xfs_bstat { 331 + struct xfs_bstat { 332 332 __u64 bs_ino; /* inode number */ 333 333 __u16 bs_mode; /* type and mode */ 334 334 __u16 bs_nlink; /* number of links */ ··· 356 356 __u32 bs_dmevmask; /* DMIG event mask */ 357 357 __u16 bs_dmstate; /* DMIG state info */ 358 358 __u16 bs_aextents; /* attribute number of extents */ 359 - } xfs_bstat_t; 359 + }; 360 + 361 + /* New bulkstat structure that reports v5 features and fixes padding issues */ 362 + struct xfs_bulkstat { 363 + uint64_t bs_ino; /* inode number */ 364 + uint64_t bs_size; /* file size */ 365 + 366 + uint64_t bs_blocks; /* number of blocks */ 367 + uint64_t bs_xflags; /* extended flags */ 368 + 369 + uint64_t bs_atime; /* access time, seconds */ 370 + uint64_t bs_mtime; /* modify time, seconds */ 371 + 372 + uint64_t bs_ctime; /* inode change time, seconds */ 373 + uint64_t bs_btime; /* creation time, seconds */ 374 + 375 + uint32_t bs_gen; /* generation count */ 376 + uint32_t bs_uid; /* user id */ 377 + uint32_t bs_gid; /* group id */ 378 + uint32_t bs_projectid; /* project id */ 379 + 380 + uint32_t bs_atime_nsec; /* access time, nanoseconds */ 381 + uint32_t bs_mtime_nsec; /* modify time, nanoseconds */ 382 + uint32_t bs_ctime_nsec; /* inode change time, nanoseconds */ 383 + uint32_t bs_btime_nsec; /* creation time, nanoseconds */ 384 + 385 + uint32_t bs_blksize; /* block size */ 386 + uint32_t bs_rdev; /* device value */ 387 + uint32_t bs_cowextsize_blks; /* cow extent size hint, blocks */ 388 + uint32_t bs_extsize_blks; /* extent size hint, blocks */ 389 + 390 + uint32_t bs_nlink; /* number of links */ 391 + uint32_t bs_extents; /* number of extents */ 392 + uint32_t bs_aextents; /* attribute number of extents */ 393 + uint16_t bs_version; /* structure version */ 394 + uint16_t bs_forkoff; /* inode fork offset in bytes */ 395 + 396 + uint16_t bs_sick; /* sick inode metadata */ 397 + uint16_t bs_checked; /* checked inode metadata */ 398 + uint16_t bs_mode; /* type and mode */ 399 + uint16_t bs_pad2; /* zeroed */ 400 + 401 + uint64_t bs_pad[7]; /* zeroed */ 402 + }; 403 + 404 + #define XFS_BULKSTAT_VERSION_V1 (1) 405 + #define XFS_BULKSTAT_VERSION_V5 (5) 360 406 361 407 /* bs_sick flags */ 362 408 #define XFS_BS_SICK_INODE (1 << 0) /* inode core */ ··· 420 374 * to retain compatibility with "old" filesystems). 421 375 */ 422 376 static inline uint32_t 423 - bstat_get_projid(struct xfs_bstat *bs) 377 + bstat_get_projid(const struct xfs_bstat *bs) 424 378 { 425 379 return (uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo; 426 380 } ··· 428 382 /* 429 383 * The user-level BulkStat Request interface structure. 430 384 */ 431 - typedef struct xfs_fsop_bulkreq { 385 + struct xfs_fsop_bulkreq { 432 386 __u64 __user *lastip; /* last inode # pointer */ 433 387 __s32 icount; /* count of entries in buffer */ 434 388 void __user *ubuffer;/* user buffer for inode desc. */ 435 389 __s32 __user *ocount; /* output count pointer */ 436 - } xfs_fsop_bulkreq_t; 437 - 390 + }; 438 391 439 392 /* 440 393 * Structures returned from xfs_inumbers routine (XFS_IOC_FSINUMBERS). 441 394 */ 442 - typedef struct xfs_inogrp { 395 + struct xfs_inogrp { 443 396 __u64 xi_startino; /* starting inode number */ 444 397 __s32 xi_alloccount; /* # bits set in allocmask */ 445 398 __u64 xi_allocmask; /* mask of allocated inodes */ 446 - } xfs_inogrp_t; 399 + }; 447 400 401 + /* New inumbers structure that reports v5 features and fixes padding issues */ 402 + struct xfs_inumbers { 403 + uint64_t xi_startino; /* starting inode number */ 404 + uint64_t xi_allocmask; /* mask of allocated inodes */ 405 + uint8_t xi_alloccount; /* # bits set in allocmask */ 406 + uint8_t xi_version; /* version */ 407 + uint8_t xi_padding[6]; /* zero */ 408 + }; 409 + 410 + #define XFS_INUMBERS_VERSION_V1 (1) 411 + #define XFS_INUMBERS_VERSION_V5 (5) 412 + 413 + /* Header for bulk inode requests. */ 414 + struct xfs_bulk_ireq { 415 + uint64_t ino; /* I/O: start with this inode */ 416 + uint32_t flags; /* I/O: operation flags */ 417 + uint32_t icount; /* I: count of entries in buffer */ 418 + uint32_t ocount; /* O: count of entries filled out */ 419 + uint32_t agno; /* I: see comment for IREQ_AGNO */ 420 + uint64_t reserved[5]; /* must be zero */ 421 + }; 422 + 423 + /* 424 + * Only return results from the specified @agno. If @ino is zero, start 425 + * with the first inode of @agno. 426 + */ 427 + #define XFS_BULK_IREQ_AGNO (1 << 0) 428 + 429 + /* 430 + * Return bulkstat information for a single inode, where @ino value is a 431 + * special value, not a literal inode number. See the XFS_BULK_IREQ_SPECIAL_* 432 + * values below. Not compatible with XFS_BULK_IREQ_AGNO. 433 + */ 434 + #define XFS_BULK_IREQ_SPECIAL (1 << 1) 435 + 436 + #define XFS_BULK_IREQ_FLAGS_ALL (XFS_BULK_IREQ_AGNO | \ 437 + XFS_BULK_IREQ_SPECIAL) 438 + 439 + /* Operate on the root directory inode. */ 440 + #define XFS_BULK_IREQ_SPECIAL_ROOT (1) 441 + 442 + /* 443 + * ioctl structures for v5 bulkstat and inumbers requests 444 + */ 445 + struct xfs_bulkstat_req { 446 + struct xfs_bulk_ireq hdr; 447 + struct xfs_bulkstat bulkstat[]; 448 + }; 449 + #define XFS_BULKSTAT_REQ_SIZE(nr) (sizeof(struct xfs_bulkstat_req) + \ 450 + (nr) * sizeof(struct xfs_bulkstat)) 451 + 452 + struct xfs_inumbers_req { 453 + struct xfs_bulk_ireq hdr; 454 + struct xfs_inumbers inumbers[]; 455 + }; 456 + #define XFS_INUMBERS_REQ_SIZE(nr) (sizeof(struct xfs_inumbers_req) + \ 457 + (nr) * sizeof(struct xfs_inumbers)) 448 458 449 459 /* 450 460 * Error injection. ··· 631 529 xfs_off_t sx_offset; /* offset into file */ 632 530 xfs_off_t sx_length; /* leng from offset */ 633 531 char sx_pad[16]; /* pad space, unused */ 634 - xfs_bstat_t sx_stat; /* stat of target b4 copy */ 532 + struct xfs_bstat sx_stat; /* stat of target b4 copy */ 635 533 } xfs_swapext_t; 636 534 637 535 /* ··· 803 701 #define XFS_IOC_FSGEOMETRY_V4 _IOR ('X', 124, struct xfs_fsop_geom_v4) 804 702 #define XFS_IOC_GOINGDOWN _IOR ('X', 125, uint32_t) 805 703 #define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom) 704 + #define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req) 705 + #define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req) 806 706 /* XFS_IOC_GETFSUUID ---------- deprecated 140 */ 807 707 808 708

+1 -1

fs/xfs/libxfs/xfs_health.h

··· 185 185 186 186 void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo); 187 187 void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo); 188 - void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bstat *bs); 188 + void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs); 189 189 190 190 #endif /* __XFS_HEALTH_H__ */

+163 -82

fs/xfs/libxfs/xfs_ialloc.c

··· 12 12 #include "xfs_bit.h" 13 13 #include "xfs_sb.h" 14 14 #include "xfs_mount.h" 15 - #include "xfs_defer.h" 16 15 #include "xfs_inode.h" 17 16 #include "xfs_btree.h" 18 17 #include "xfs_ialloc.h" 19 18 #include "xfs_ialloc_btree.h" 20 19 #include "xfs_alloc.h" 21 - #include "xfs_rtalloc.h" 22 20 #include "xfs_errortag.h" 23 21 #include "xfs_error.h" 24 22 #include "xfs_bmap.h" 25 - #include "xfs_cksum.h" 26 23 #include "xfs_trans.h" 27 24 #include "xfs_buf_item.h" 28 25 #include "xfs_icreate_item.h" ··· 27 30 #include "xfs_trace.h" 28 31 #include "xfs_log.h" 29 32 #include "xfs_rmap.h" 30 - 31 - 32 - /* 33 - * Allocation group level functions. 34 - */ 35 - int 36 - xfs_ialloc_cluster_alignment( 37 - struct xfs_mount *mp) 38 - { 39 - if (xfs_sb_version_hasalign(&mp->m_sb) && 40 - mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) 41 - return mp->m_sb.sb_inoalignmt; 42 - return 1; 43 - } 44 33 45 34 /* 46 35 * Lookup a record by ino in the btree given by cur. ··· 282 299 * sizes, manipulate the inodes in buffers which are multiples of the 283 300 * blocks size. 284 301 */ 285 - nbufs = length / mp->m_blocks_per_cluster; 302 + nbufs = length / M_IGEO(mp)->blocks_per_cluster; 286 303 287 304 /* 288 305 * Figure out what version number to use in the inodes we create. If ··· 326 343 * Get the block. 327 344 */ 328 345 d = XFS_AGB_TO_DADDR(mp, agno, agbno + 329 - (j * mp->m_blocks_per_cluster)); 346 + (j * M_IGEO(mp)->blocks_per_cluster)); 330 347 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 331 - mp->m_bsize * mp->m_blocks_per_cluster, 348 + mp->m_bsize * 349 + M_IGEO(mp)->blocks_per_cluster, 332 350 XBF_UNMAPPED); 333 351 if (!fbuf) 334 352 return -ENOMEM; ··· 337 353 /* Initialize the inode buffers and log them appropriately. */ 338 354 fbuf->b_ops = &xfs_inode_buf_ops; 339 355 xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); 340 - for (i = 0; i < mp->m_inodes_per_cluster; i++) { 356 + for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) { 341 357 int ioffset = i << mp->m_sb.sb_inodelog; 342 358 uint isize = xfs_dinode_size(version); 343 359 ··· 600 616 * Allocate new inodes in the allocation group specified by agbp. 601 617 * Return 0 for success, else error code. 602 618 */ 603 - STATIC int /* error code or 0 */ 619 + STATIC int 604 620 xfs_ialloc_ag_alloc( 605 - xfs_trans_t *tp, /* transaction pointer */ 606 - xfs_buf_t *agbp, /* alloc group buffer */ 607 - int *alloc) 621 + struct xfs_trans *tp, 622 + struct xfs_buf *agbp, 623 + int *alloc) 608 624 { 609 - xfs_agi_t *agi; /* allocation group header */ 610 - xfs_alloc_arg_t args; /* allocation argument structure */ 611 - xfs_agnumber_t agno; 612 - int error; 613 - xfs_agino_t newino; /* new first inode's number */ 614 - xfs_agino_t newlen; /* new number of inodes */ 615 - int isaligned = 0; /* inode allocation at stripe unit */ 616 - /* boundary */ 617 - uint16_t allocmask = (uint16_t) -1; /* init. to full chunk */ 625 + struct xfs_agi *agi; 626 + struct xfs_alloc_arg args; 627 + xfs_agnumber_t agno; 628 + int error; 629 + xfs_agino_t newino; /* new first inode's number */ 630 + xfs_agino_t newlen; /* new number of inodes */ 631 + int isaligned = 0; /* inode allocation at stripe */ 632 + /* unit boundary */ 633 + /* init. to full chunk */ 634 + uint16_t allocmask = (uint16_t) -1; 618 635 struct xfs_inobt_rec_incore rec; 619 - struct xfs_perag *pag; 620 - int do_sparse = 0; 636 + struct xfs_perag *pag; 637 + struct xfs_ino_geometry *igeo = M_IGEO(tp->t_mountp); 638 + int do_sparse = 0; 621 639 622 640 memset(&args, 0, sizeof(args)); 623 641 args.tp = tp; ··· 630 644 #ifdef DEBUG 631 645 /* randomly do sparse inode allocations */ 632 646 if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) && 633 - args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks) 647 + igeo->ialloc_min_blks < igeo->ialloc_blks) 634 648 do_sparse = prandom_u32() & 1; 635 649 #endif 636 650 ··· 638 652 * Locking will ensure that we don't have two callers in here 639 653 * at one time. 640 654 */ 641 - newlen = args.mp->m_ialloc_inos; 642 - if (args.mp->m_maxicount && 655 + newlen = igeo->ialloc_inos; 656 + if (igeo->maxicount && 643 657 percpu_counter_read_positive(&args.mp->m_icount) + newlen > 644 - args.mp->m_maxicount) 658 + igeo->maxicount) 645 659 return -ENOSPC; 646 - args.minlen = args.maxlen = args.mp->m_ialloc_blks; 660 + args.minlen = args.maxlen = igeo->ialloc_blks; 647 661 /* 648 662 * First try to allocate inodes contiguous with the last-allocated 649 663 * chunk of inodes. If the filesystem is striped, this will fill ··· 653 667 newino = be32_to_cpu(agi->agi_newino); 654 668 agno = be32_to_cpu(agi->agi_seqno); 655 669 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + 656 - args.mp->m_ialloc_blks; 670 + igeo->ialloc_blks; 657 671 if (do_sparse) 658 672 goto sparse_alloc; 659 673 if (likely(newino != NULLAGINO && ··· 676 690 * but not to use them in the actual exact allocation. 677 691 */ 678 692 args.alignment = 1; 679 - args.minalignslop = args.mp->m_cluster_align - 1; 693 + args.minalignslop = igeo->cluster_align - 1; 680 694 681 695 /* Allow space for the inode btree to split. */ 682 - args.minleft = args.mp->m_in_maxlevels - 1; 696 + args.minleft = igeo->inobt_maxlevels - 1; 683 697 if ((error = xfs_alloc_vextent(&args))) 684 698 return error; 685 699 ··· 706 720 * pieces, so don't need alignment anyway. 707 721 */ 708 722 isaligned = 0; 709 - if (args.mp->m_sinoalign) { 723 + if (igeo->ialloc_align) { 710 724 ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); 711 725 args.alignment = args.mp->m_dalign; 712 726 isaligned = 1; 713 727 } else 714 - args.alignment = args.mp->m_cluster_align; 728 + args.alignment = igeo->cluster_align; 715 729 /* 716 730 * Need to figure out where to allocate the inode blocks. 717 731 * Ideally they should be spaced out through the a.g. ··· 727 741 /* 728 742 * Allow space for the inode btree to split. 729 743 */ 730 - args.minleft = args.mp->m_in_maxlevels - 1; 744 + args.minleft = igeo->inobt_maxlevels - 1; 731 745 if ((error = xfs_alloc_vextent(&args))) 732 746 return error; 733 747 } ··· 740 754 args.type = XFS_ALLOCTYPE_NEAR_BNO; 741 755 args.agbno = be32_to_cpu(agi->agi_root); 742 756 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 743 - args.alignment = args.mp->m_cluster_align; 757 + args.alignment = igeo->cluster_align; 744 758 if ((error = xfs_alloc_vextent(&args))) 745 759 return error; 746 760 } ··· 750 764 * the sparse allocation length is smaller than a full chunk. 751 765 */ 752 766 if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) && 753 - args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks && 767 + igeo->ialloc_min_blks < igeo->ialloc_blks && 754 768 args.fsbno == NULLFSBLOCK) { 755 769 sparse_alloc: 756 770 args.type = XFS_ALLOCTYPE_NEAR_BNO; ··· 759 773 args.alignment = args.mp->m_sb.sb_spino_align; 760 774 args.prod = 1; 761 775 762 - args.minlen = args.mp->m_ialloc_min_blks; 776 + args.minlen = igeo->ialloc_min_blks; 763 777 args.maxlen = args.minlen; 764 778 765 779 /* ··· 775 789 args.min_agbno = args.mp->m_sb.sb_inoalignmt; 776 790 args.max_agbno = round_down(args.mp->m_sb.sb_agblocks, 777 791 args.mp->m_sb.sb_inoalignmt) - 778 - args.mp->m_ialloc_blks; 792 + igeo->ialloc_blks; 779 793 780 794 error = xfs_alloc_vextent(&args); 781 795 if (error) ··· 992 1006 * space needed for alignment of inode chunks when checking the 993 1007 * longest contiguous free space in the AG - this prevents us 994 1008 * from getting ENOSPC because we have free space larger than 995 - * m_ialloc_blks but alignment constraints prevent us from using 1009 + * ialloc_blks but alignment constraints prevent us from using 996 1010 * it. 997 1011 * 998 1012 * If we can't find an AG with space for full alignment slack to ··· 1001 1015 * if we fail allocation due to alignment issues then it is most 1002 1016 * likely a real ENOSPC condition. 1003 1017 */ 1004 - ineed = mp->m_ialloc_min_blks; 1018 + ineed = M_IGEO(mp)->ialloc_min_blks; 1005 1019 if (flags && ineed > 1) 1006 - ineed += mp->m_cluster_align; 1020 + ineed += M_IGEO(mp)->cluster_align; 1007 1021 longest = pag->pagf_longest; 1008 1022 if (!longest) 1009 1023 longest = pag->pagf_flcount > 0; ··· 1689 1703 int noroom = 0; 1690 1704 xfs_agnumber_t start_agno; 1691 1705 struct xfs_perag *pag; 1706 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 1692 1707 int okalloc = 1; 1693 1708 1694 1709 if (*IO_agbp) { ··· 1720 1733 * Read rough value of mp->m_icount by percpu_counter_read_positive, 1721 1734 * which will sacrifice the preciseness but improve the performance. 1722 1735 */ 1723 - if (mp->m_maxicount && 1724 - percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos 1725 - > mp->m_maxicount) { 1736 + if (igeo->maxicount && 1737 + percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos 1738 + > igeo->maxicount) { 1726 1739 noroom = 1; 1727 1740 okalloc = 0; 1728 1741 } ··· 1839 1852 if (!xfs_inobt_issparse(rec->ir_holemask)) { 1840 1853 /* not sparse, calculate extent info directly */ 1841 1854 xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), 1842 - mp->m_ialloc_blks, &XFS_RMAP_OINFO_INODES); 1855 + M_IGEO(mp)->ialloc_blks, 1856 + &XFS_RMAP_OINFO_INODES); 1843 1857 return; 1844 1858 } 1845 1859 ··· 2249 2261 2250 2262 /* check that the returned record contains the required inode */ 2251 2263 if (rec.ir_startino > agino || 2252 - rec.ir_startino + mp->m_ialloc_inos <= agino) 2264 + rec.ir_startino + M_IGEO(mp)->ialloc_inos <= agino) 2253 2265 return -EINVAL; 2254 2266 2255 2267 /* for untrusted inodes check it is allocated first */ ··· 2340 2352 * If the inode cluster size is the same as the blocksize or 2341 2353 * smaller we get to the buffer by simple arithmetics. 2342 2354 */ 2343 - if (mp->m_blocks_per_cluster == 1) { 2355 + if (M_IGEO(mp)->blocks_per_cluster == 1) { 2344 2356 offset = XFS_INO_TO_OFFSET(mp, ino); 2345 2357 ASSERT(offset < mp->m_sb.sb_inopblock); 2346 2358 ··· 2356 2368 * find the location. Otherwise we have to do a btree 2357 2369 * lookup to find the location. 2358 2370 */ 2359 - if (mp->m_inoalign_mask) { 2360 - offset_agbno = agbno & mp->m_inoalign_mask; 2371 + if (M_IGEO(mp)->inoalign_mask) { 2372 + offset_agbno = agbno & M_IGEO(mp)->inoalign_mask; 2361 2373 chunk_agbno = agbno - offset_agbno; 2362 2374 } else { 2363 2375 error = xfs_imap_lookup(mp, tp, agno, agino, agbno, ··· 2369 2381 out_map: 2370 2382 ASSERT(agbno >= chunk_agbno); 2371 2383 cluster_agbno = chunk_agbno + 2372 - ((offset_agbno / mp->m_blocks_per_cluster) * 2373 - mp->m_blocks_per_cluster); 2384 + ((offset_agbno / M_IGEO(mp)->blocks_per_cluster) * 2385 + M_IGEO(mp)->blocks_per_cluster); 2374 2386 offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + 2375 2387 XFS_INO_TO_OFFSET(mp, ino); 2376 2388 2377 2389 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); 2378 - imap->im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster); 2390 + imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster); 2379 2391 imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog); 2380 2392 2381 2393 /* ··· 2394 2406 return -EINVAL; 2395 2407 } 2396 2408 return 0; 2397 - } 2398 - 2399 - /* 2400 - * Compute and fill in value of m_in_maxlevels. 2401 - */ 2402 - void 2403 - xfs_ialloc_compute_maxlevels( 2404 - xfs_mount_t *mp) /* file system mount structure */ 2405 - { 2406 - uint inodes; 2407 - 2408 - inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG; 2409 - mp->m_in_maxlevels = xfs_btree_compute_maxlevels(mp->m_inobt_mnr, 2410 - inodes); 2411 2409 } 2412 2410 2413 2411 /* ··· 2467 2493 xfs_agi_verify( 2468 2494 struct xfs_buf *bp) 2469 2495 { 2470 - struct xfs_mount *mp = bp->b_target->bt_mount; 2496 + struct xfs_mount *mp = bp->b_mount; 2471 2497 struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); 2472 2498 int i; 2473 2499 ··· 2519 2545 xfs_agi_read_verify( 2520 2546 struct xfs_buf *bp) 2521 2547 { 2522 - struct xfs_mount *mp = bp->b_target->bt_mount; 2548 + struct xfs_mount *mp = bp->b_mount; 2523 2549 xfs_failaddr_t fa; 2524 2550 2525 2551 if (xfs_sb_version_hascrc(&mp->m_sb) && ··· 2536 2562 xfs_agi_write_verify( 2537 2563 struct xfs_buf *bp) 2538 2564 { 2539 - struct xfs_mount *mp = bp->b_target->bt_mount; 2565 + struct xfs_mount *mp = bp->b_mount; 2540 2566 struct xfs_buf_log_item *bip = bp->b_log_item; 2541 2567 xfs_failaddr_t fa; 2542 2568 ··· 2741 2767 *count = ci.count; 2742 2768 *freecount = ci.freecount; 2743 2769 return 0; 2770 + } 2771 + 2772 + /* 2773 + * Initialize inode-related geometry information. 2774 + * 2775 + * Compute the inode btree min and max levels and set maxicount. 2776 + * 2777 + * Set the inode cluster size. This may still be overridden by the file 2778 + * system block size if it is larger than the chosen cluster size. 2779 + * 2780 + * For v5 filesystems, scale the cluster size with the inode size to keep a 2781 + * constant ratio of inode per cluster buffer, but only if mkfs has set the 2782 + * inode alignment value appropriately for larger cluster sizes. 2783 + * 2784 + * Then compute the inode cluster alignment information. 2785 + */ 2786 + void 2787 + xfs_ialloc_setup_geometry( 2788 + struct xfs_mount *mp) 2789 + { 2790 + struct xfs_sb *sbp = &mp->m_sb; 2791 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 2792 + uint64_t icount; 2793 + uint inodes; 2794 + 2795 + /* Compute inode btree geometry. */ 2796 + igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog; 2797 + igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); 2798 + igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0); 2799 + igeo->inobt_mnr[0] = igeo->inobt_mxr[0] / 2; 2800 + igeo->inobt_mnr[1] = igeo->inobt_mxr[1] / 2; 2801 + 2802 + igeo->ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK, 2803 + sbp->sb_inopblock); 2804 + igeo->ialloc_blks = igeo->ialloc_inos >> sbp->sb_inopblog; 2805 + 2806 + if (sbp->sb_spino_align) 2807 + igeo->ialloc_min_blks = sbp->sb_spino_align; 2808 + else 2809 + igeo->ialloc_min_blks = igeo->ialloc_blks; 2810 + 2811 + /* Compute and fill in value of m_ino_geo.inobt_maxlevels. */ 2812 + inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG; 2813 + igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr, 2814 + inodes); 2815 + 2816 + /* Set the maximum inode count for this filesystem. */ 2817 + if (sbp->sb_imax_pct) { 2818 + /* 2819 + * Make sure the maximum inode count is a multiple 2820 + * of the units we allocate inodes in. 2821 + */ 2822 + icount = sbp->sb_dblocks * sbp->sb_imax_pct; 2823 + do_div(icount, 100); 2824 + do_div(icount, igeo->ialloc_blks); 2825 + igeo->maxicount = XFS_FSB_TO_INO(mp, 2826 + icount * igeo->ialloc_blks); 2827 + } else { 2828 + igeo->maxicount = 0; 2829 + } 2830 + 2831 + /* 2832 + * Compute the desired size of an inode cluster buffer size, which 2833 + * starts at 8K and (on v5 filesystems) scales up with larger inode 2834 + * sizes. 2835 + * 2836 + * Preserve the desired inode cluster size because the sparse inodes 2837 + * feature uses that desired size (not the actual size) to compute the 2838 + * sparse inode alignment. The mount code validates this value, so we 2839 + * cannot change the behavior. 2840 + */ 2841 + igeo->inode_cluster_size_raw = XFS_INODE_BIG_CLUSTER_SIZE; 2842 + if (xfs_sb_version_hascrc(&mp->m_sb)) { 2843 + int new_size = igeo->inode_cluster_size_raw; 2844 + 2845 + new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; 2846 + if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) 2847 + igeo->inode_cluster_size_raw = new_size; 2848 + } 2849 + 2850 + /* Calculate inode cluster ratios. */ 2851 + if (igeo->inode_cluster_size_raw > mp->m_sb.sb_blocksize) 2852 + igeo->blocks_per_cluster = XFS_B_TO_FSBT(mp, 2853 + igeo->inode_cluster_size_raw); 2854 + else 2855 + igeo->blocks_per_cluster = 1; 2856 + igeo->inode_cluster_size = XFS_FSB_TO_B(mp, igeo->blocks_per_cluster); 2857 + igeo->inodes_per_cluster = XFS_FSB_TO_INO(mp, igeo->blocks_per_cluster); 2858 + 2859 + /* Calculate inode cluster alignment. */ 2860 + if (xfs_sb_version_hasalign(&mp->m_sb) && 2861 + mp->m_sb.sb_inoalignmt >= igeo->blocks_per_cluster) 2862 + igeo->cluster_align = mp->m_sb.sb_inoalignmt; 2863 + else 2864 + igeo->cluster_align = 1; 2865 + igeo->inoalign_mask = igeo->cluster_align - 1; 2866 + igeo->cluster_align_inodes = XFS_FSB_TO_INO(mp, igeo->cluster_align); 2867 + 2868 + /* 2869 + * If we are using stripe alignment, check whether 2870 + * the stripe unit is a multiple of the inode alignment 2871 + */ 2872 + if (mp->m_dalign && igeo->inoalign_mask && 2873 + !(mp->m_dalign & igeo->inoalign_mask)) 2874 + igeo->ialloc_align = mp->m_dalign; 2875 + else 2876 + igeo->ialloc_align = 0; 2744 2877 }

+1 -17

fs/xfs/libxfs/xfs_ialloc.h

··· 23 23 * sparse chunks */ 24 24 }; 25 25 26 - /* Calculate and return the number of filesystem blocks per inode cluster */ 27 - static inline int 28 - xfs_icluster_size_fsb( 29 - struct xfs_mount *mp) 30 - { 31 - if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size) 32 - return 1; 33 - return mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog; 34 - } 35 - 36 26 /* 37 27 * Make an inode pointer out of the buffer/offset. 38 28 */ ··· 84 94 xfs_ino_t ino, /* inode to locate */ 85 95 struct xfs_imap *imap, /* location map structure */ 86 96 uint flags); /* flags for inode btree lookup */ 87 - 88 - /* 89 - * Compute and fill in value of m_in_maxlevels. 90 - */ 91 - void 92 - xfs_ialloc_compute_maxlevels( 93 - struct xfs_mount *mp); /* file system mount structure */ 94 97 95 98 /* 96 99 * Log specified fields for the ag hdr (inode section) ··· 151 168 int *stat); 152 169 153 170 int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); 171 + void xfs_ialloc_setup_geometry(struct xfs_mount *mp); 154 172 155 173 #endif /* __XFS_IALLOC_H__ */

+42 -14

fs/xfs/libxfs/xfs_ialloc_btree.c

··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_bit.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_inode.h" 15 14 #include "xfs_btree.h" 16 15 #include "xfs_ialloc.h" 17 16 #include "xfs_ialloc_btree.h" 18 17 #include "xfs_alloc.h" 19 18 #include "xfs_error.h" 20 19 #include "xfs_trace.h" 21 - #include "xfs_cksum.h" 22 20 #include "xfs_trans.h" 23 21 #include "xfs_rmap.h" 24 22 ··· 26 28 struct xfs_btree_cur *cur, 27 29 int level) 28 30 { 29 - return cur->bc_mp->m_inobt_mnr[level != 0]; 31 + return M_IGEO(cur->bc_mp)->inobt_mnr[level != 0]; 30 32 } 31 33 32 34 STATIC struct xfs_btree_cur * ··· 162 164 struct xfs_btree_cur *cur, 163 165 int level) 164 166 { 165 - return cur->bc_mp->m_inobt_mxr[level != 0]; 167 + return M_IGEO(cur->bc_mp)->inobt_mxr[level != 0]; 166 168 } 167 169 168 170 STATIC void ··· 253 255 xfs_inobt_verify( 254 256 struct xfs_buf *bp) 255 257 { 256 - struct xfs_mount *mp = bp->b_target->bt_mount; 258 + struct xfs_mount *mp = bp->b_mount; 257 259 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 258 260 xfs_failaddr_t fa; 259 261 unsigned int level; ··· 279 281 280 282 /* level verification */ 281 283 level = be16_to_cpu(block->bb_level); 282 - if (level >= mp->m_in_maxlevels) 284 + if (level >= M_IGEO(mp)->inobt_maxlevels) 283 285 return __this_address; 284 286 285 - return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]); 287 + return xfs_btree_sblock_verify(bp, 288 + M_IGEO(mp)->inobt_mxr[level != 0]); 286 289 } 287 290 288 291 static void ··· 545 546 xfs_agblock_t agblocks = xfs_ag_block_count(mp, agno); 546 547 547 548 /* Bail out if we're uninitialized, which can happen in mkfs. */ 548 - if (mp->m_inobt_mxr[0] == 0) 549 + if (M_IGEO(mp)->inobt_mxr[0] == 0) 549 550 return 0; 550 551 551 552 /* ··· 557 558 XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno) 558 559 agblocks -= mp->m_sb.sb_logblocks; 559 560 560 - return xfs_btree_calc_size(mp->m_inobt_mnr, 561 + return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, 561 562 (uint64_t)agblocks * mp->m_sb.sb_inopblock / 562 563 XFS_INODES_PER_CHUNK); 564 + } 565 + 566 + /* Read AGI and create inobt cursor. */ 567 + int 568 + xfs_inobt_cur( 569 + struct xfs_mount *mp, 570 + struct xfs_trans *tp, 571 + xfs_agnumber_t agno, 572 + xfs_btnum_t which, 573 + struct xfs_btree_cur **curpp, 574 + struct xfs_buf **agi_bpp) 575 + { 576 + struct xfs_btree_cur *cur; 577 + int error; 578 + 579 + ASSERT(*agi_bpp == NULL); 580 + ASSERT(*curpp == NULL); 581 + 582 + error = xfs_ialloc_read_agi(mp, tp, agno, agi_bpp); 583 + if (error) 584 + return error; 585 + 586 + cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, agno, which); 587 + if (!cur) { 588 + xfs_trans_brelse(tp, *agi_bpp); 589 + *agi_bpp = NULL; 590 + return -ENOMEM; 591 + } 592 + *curpp = cur; 593 + return 0; 563 594 } 564 595 565 596 static int ··· 600 571 xfs_btnum_t btnum, 601 572 xfs_extlen_t *tree_blocks) 602 573 { 603 - struct xfs_buf *agbp; 604 - struct xfs_btree_cur *cur; 574 + struct xfs_buf *agbp = NULL; 575 + struct xfs_btree_cur *cur = NULL; 605 576 int error; 606 577 607 - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 578 + error = xfs_inobt_cur(mp, tp, agno, btnum, &cur, &agbp); 608 579 if (error) 609 580 return error; 610 581 611 - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); 612 582 error = xfs_btree_count_blocks(cur, tree_blocks); 613 583 xfs_btree_del_cursor(cur, error); 614 584 xfs_trans_brelse(tp, agbp); ··· 647 619 struct xfs_mount *mp, 648 620 unsigned long long len) 649 621 { 650 - return xfs_btree_calc_size(mp->m_inobt_mnr, len); 622 + return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, len); 651 623 }

+3

fs/xfs/libxfs/xfs_ialloc_btree.h

··· 64 64 xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); 65 65 extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp, 66 66 unsigned long long len); 67 + int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp, 68 + xfs_agnumber_t agno, xfs_btnum_t btnum, 69 + struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp); 67 70 68 71 #endif /* __XFS_IALLOC_BTREE_H__ */

+1 -5

fs/xfs/libxfs/xfs_iext_tree.c

··· 3 3 * Copyright (c) 2017 Christoph Hellwig. 4 4 */ 5 5 6 - #include <linux/cache.h> 7 - #include <linux/kernel.h> 8 - #include <linux/slab.h> 9 6 #include "xfs.h" 7 + #include "xfs_shared.h" 10 8 #include "xfs_format.h" 11 9 #include "xfs_bit.h" 12 10 #include "xfs_log_format.h" 13 11 #include "xfs_inode.h" 14 - #include "xfs_inode_fork.h" 15 12 #include "xfs_trans_resv.h" 16 13 #include "xfs_mount.h" 17 - #include "xfs_bmap.h" 18 14 #include "xfs_trace.h" 19 15 20 16 /*

+2 -7

fs/xfs/libxfs/xfs_inode_buf.c

··· 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 - #include "xfs_defer.h" 14 13 #include "xfs_inode.h" 15 14 #include "xfs_errortag.h" 16 15 #include "xfs_error.h" 17 - #include "xfs_cksum.h" 18 16 #include "xfs_icache.h" 19 17 #include "xfs_trans.h" 20 18 #include "xfs_ialloc.h" ··· 31 33 xfs_buf_t *bp) 32 34 { 33 35 int i; 34 - int j; 35 36 xfs_dinode_t *dip; 36 37 37 - j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 38 - 39 - for (i = 0; i < j; i++) { 38 + for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) { 40 39 dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize); 41 40 if (!dip->di_next_unlinked) { 42 41 xfs_alert(mp, ··· 75 80 struct xfs_buf *bp, 76 81 bool readahead) 77 82 { 78 - struct xfs_mount *mp = bp->b_target->bt_mount; 83 + struct xfs_mount *mp = bp->b_mount; 79 84 xfs_agnumber_t agno; 80 85 int i; 81 86 int ni;

+1 -3

fs/xfs/libxfs/xfs_inode_fork.c

··· 3 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 4 * All Rights Reserved. 5 5 */ 6 - #include <linux/log2.h> 7 6 8 7 #include "xfs.h" 9 8 #include "xfs_fs.h" 9 + #include "xfs_shared.h" 10 10 #include "xfs_format.h" 11 11 #include "xfs_log_format.h" 12 12 #include "xfs_trans_resv.h" ··· 19 19 #include "xfs_bmap.h" 20 20 #include "xfs_error.h" 21 21 #include "xfs_trace.h" 22 - #include "xfs_attr_sf.h" 23 22 #include "xfs_da_format.h" 24 23 #include "xfs_da_btree.h" 25 24 #include "xfs_dir2_priv.h" 26 25 #include "xfs_attr_leaf.h" 27 - #include "xfs_shared.h" 28 26 29 27 kmem_zone_t *xfs_ifork_zone; 30 28

-2

fs/xfs/libxfs/xfs_log_rlimit.c

··· 12 12 #include "xfs_mount.h" 13 13 #include "xfs_da_format.h" 14 14 #include "xfs_trans_space.h" 15 - #include "xfs_inode.h" 16 15 #include "xfs_da_btree.h" 17 - #include "xfs_attr_leaf.h" 18 16 #include "xfs_bmap_btree.h" 19 17 20 18 /*

-2

fs/xfs/libxfs/xfs_refcount.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 - #include "xfs_sb.h" 13 12 #include "xfs_mount.h" 14 13 #include "xfs_defer.h" 15 14 #include "xfs_btree.h" ··· 18 19 #include "xfs_errortag.h" 19 20 #include "xfs_error.h" 20 21 #include "xfs_trace.h" 21 - #include "xfs_cksum.h" 22 22 #include "xfs_trans.h" 23 23 #include "xfs_bit.h" 24 24 #include "xfs_refcount.h"

+1 -3

fs/xfs/libxfs/xfs_refcount_btree.c

··· 12 12 #include "xfs_sb.h" 13 13 #include "xfs_mount.h" 14 14 #include "xfs_btree.h" 15 - #include "xfs_bmap.h" 16 15 #include "xfs_refcount_btree.h" 17 16 #include "xfs_alloc.h" 18 17 #include "xfs_error.h" 19 18 #include "xfs_trace.h" 20 - #include "xfs_cksum.h" 21 19 #include "xfs_trans.h" 22 20 #include "xfs_bit.h" 23 21 #include "xfs_rmap.h" ··· 201 203 xfs_refcountbt_verify( 202 204 struct xfs_buf *bp) 203 205 { 204 - struct xfs_mount *mp = bp->b_target->bt_mount; 206 + struct xfs_mount *mp = bp->b_mount; 205 207 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 206 208 struct xfs_perag *pag = bp->b_pag; 207 209 xfs_failaddr_t fa;

-7

fs/xfs/libxfs/xfs_rmap.c

··· 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_bit.h" 13 - #include "xfs_sb.h" 14 13 #include "xfs_mount.h" 15 14 #include "xfs_defer.h" 16 - #include "xfs_da_format.h" 17 - #include "xfs_da_btree.h" 18 15 #include "xfs_btree.h" 19 16 #include "xfs_trans.h" 20 17 #include "xfs_alloc.h" 21 18 #include "xfs_rmap.h" 22 19 #include "xfs_rmap_btree.h" 23 - #include "xfs_trans_space.h" 24 20 #include "xfs_trace.h" 25 21 #include "xfs_errortag.h" 26 22 #include "xfs_error.h" 27 - #include "xfs_extent_busy.h" 28 - #include "xfs_bmap.h" 29 23 #include "xfs_inode.h" 30 - #include "xfs_ialloc.h" 31 24 32 25 /* 33 26 * Lookup the first record less than or equal to [bno, len, owner, offset]

+1 -5

fs/xfs/libxfs/xfs_rmap_btree.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 - #include "xfs_bit.h" 13 12 #include "xfs_sb.h" 14 13 #include "xfs_mount.h" 15 - #include "xfs_defer.h" 16 - #include "xfs_inode.h" 17 14 #include "xfs_trans.h" 18 15 #include "xfs_alloc.h" 19 16 #include "xfs_btree.h" 20 17 #include "xfs_rmap.h" 21 18 #include "xfs_rmap_btree.h" 22 19 #include "xfs_trace.h" 23 - #include "xfs_cksum.h" 24 20 #include "xfs_error.h" 25 21 #include "xfs_extent_busy.h" 26 22 #include "xfs_ag_resv.h" ··· 288 292 xfs_rmapbt_verify( 289 293 struct xfs_buf *bp) 290 294 { 291 - struct xfs_mount *mp = bp->b_target->bt_mount; 295 + struct xfs_mount *mp = bp->b_mount; 292 296 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 293 297 struct xfs_perag *pag = bp->b_pag; 294 298 xfs_failaddr_t fa;

-8

fs/xfs/libxfs/xfs_rtbitmap.c

··· 13 13 #include "xfs_mount.h" 14 14 #include "xfs_inode.h" 15 15 #include "xfs_bmap.h" 16 - #include "xfs_bmap_util.h" 17 - #include "xfs_bmap_btree.h" 18 - #include "xfs_alloc.h" 19 - #include "xfs_error.h" 20 16 #include "xfs_trans.h" 21 - #include "xfs_trans_space.h" 22 - #include "xfs_trace.h" 23 - #include "xfs_buf.h" 24 - #include "xfs_icache.h" 25 17 #include "xfs_rtalloc.h" 26 18 27 19

+10 -29

fs/xfs/libxfs/xfs_sb.c

··· 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_bit.h" 13 - #include "xfs_sb.h" 14 13 #include "xfs_mount.h" 15 - #include "xfs_defer.h" 16 - #include "xfs_inode.h" 17 14 #include "xfs_ialloc.h" 18 15 #include "xfs_alloc.h" 19 16 #include "xfs_error.h" 20 17 #include "xfs_trace.h" 21 - #include "xfs_cksum.h" 22 18 #include "xfs_trans.h" 23 19 #include "xfs_buf_item.h" 24 20 #include "xfs_bmap_btree.h" 25 21 #include "xfs_alloc_btree.h" 26 - #include "xfs_ialloc_btree.h" 27 22 #include "xfs_log.h" 28 23 #include "xfs_rmap_btree.h" 29 - #include "xfs_bmap.h" 30 24 #include "xfs_refcount_btree.h" 31 25 #include "xfs_da_format.h" 32 - #include "xfs_da_btree.h" 33 26 #include "xfs_health.h" 34 27 35 28 /* ··· 679 686 struct xfs_buf *bp) 680 687 { 681 688 struct xfs_sb sb; 682 - struct xfs_mount *mp = bp->b_target->bt_mount; 689 + struct xfs_mount *mp = bp->b_mount; 683 690 struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); 684 691 int error; 685 692 ··· 745 752 struct xfs_buf *bp) 746 753 { 747 754 struct xfs_sb sb; 748 - struct xfs_mount *mp = bp->b_target->bt_mount; 755 + struct xfs_mount *mp = bp->b_mount; 749 756 struct xfs_buf_log_item *bip = bp->b_log_item; 750 757 int error; 751 758 ··· 793 800 * 794 801 * Mount initialization code establishing various mount 795 802 * fields from the superblock associated with the given 796 - * mount structure 803 + * mount structure. 804 + * 805 + * Inode geometry are calculated in xfs_ialloc_setup_geometry. 797 806 */ 798 807 void 799 808 xfs_sb_mount_common( 800 - struct xfs_mount *mp, 801 - struct xfs_sb *sbp) 809 + struct xfs_mount *mp, 810 + struct xfs_sb *sbp) 802 811 { 803 812 mp->m_agfrotor = mp->m_agirotor = 0; 804 813 mp->m_maxagi = mp->m_sb.sb_agcount; ··· 808 813 mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; 809 814 mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; 810 815 mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; 811 - mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; 812 816 mp->m_blockmask = sbp->sb_blocksize - 1; 813 817 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; 814 818 mp->m_blockwmask = mp->m_blockwsize - 1; ··· 816 822 mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0); 817 823 mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; 818 824 mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2; 819 - 820 - mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); 821 - mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0); 822 - mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2; 823 - mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2; 824 825 825 826 mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1); 826 827 mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0); ··· 833 844 mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2; 834 845 835 846 mp->m_bsize = XFS_FSB_TO_BB(mp, 1); 836 - mp->m_ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK, 837 - sbp->sb_inopblock); 838 - mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; 839 - 840 - if (sbp->sb_spino_align) 841 - mp->m_ialloc_min_blks = sbp->sb_spino_align; 842 - else 843 - mp->m_ialloc_min_blks = mp->m_ialloc_blks; 844 847 mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); 845 848 mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp); 846 849 } ··· 920 939 struct xfs_trans *tp) 921 940 { 922 941 struct xfs_mount *mp = tp->t_mountp; 923 - struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0); 942 + struct xfs_buf *bp = xfs_trans_getsb(tp, mp); 924 943 925 944 mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); 926 945 mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); ··· 986 1005 987 1006 bp = xfs_buf_get(mp->m_ddev_targp, 988 1007 XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), 989 - XFS_FSS_TO_BB(mp, 1), 0); 1008 + XFS_FSS_TO_BB(mp, 1)); 990 1009 /* 991 1010 * If we get an error reading or writing alternate superblocks, 992 1011 * continue. xfs_repair chooses the "best" superblock based ··· 1050 1069 if (error) 1051 1070 return error; 1052 1071 1053 - bp = xfs_trans_getsb(tp, mp, 0); 1072 + bp = xfs_trans_getsb(tp, mp); 1054 1073 xfs_log_sb(tp); 1055 1074 xfs_trans_bhold(tp, bp); 1056 1075 xfs_trans_set_sync(tp);

+48 -1

fs/xfs/libxfs/xfs_shared.h

··· 65 65 #define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ 66 66 #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ 67 67 #define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */ 68 - #define XFS_TRANS_NOFS 0x80 /* pass KM_NOFS to kmem_alloc */ 69 68 /* 70 69 * LOWMODE is used by the allocator to activate the lowspace algorithm - when 71 70 * free space is running low the extent allocator may choose to allocate an ··· 134 135 void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, 135 136 struct xfs_inode *ip, struct xfs_ifork *ifp); 136 137 xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip); 138 + 139 + /* Computed inode geometry for the filesystem. */ 140 + struct xfs_ino_geometry { 141 + /* Maximum inode count in this filesystem. */ 142 + uint64_t maxicount; 143 + 144 + /* Actual inode cluster buffer size, in bytes. */ 145 + unsigned int inode_cluster_size; 146 + 147 + /* 148 + * Desired inode cluster buffer size, in bytes. This value is not 149 + * rounded up to at least one filesystem block, which is necessary for 150 + * the sole purpose of validating sb_spino_align. Runtime code must 151 + * only ever use inode_cluster_size. 152 + */ 153 + unsigned int inode_cluster_size_raw; 154 + 155 + /* Inode cluster sizes, adjusted to be at least 1 fsb. */ 156 + unsigned int inodes_per_cluster; 157 + unsigned int blocks_per_cluster; 158 + 159 + /* Inode cluster alignment. */ 160 + unsigned int cluster_align; 161 + unsigned int cluster_align_inodes; 162 + unsigned int inoalign_mask; /* mask sb_inoalignmt if used */ 163 + 164 + unsigned int inobt_mxr[2]; /* max inobt btree records */ 165 + unsigned int inobt_mnr[2]; /* min inobt btree records */ 166 + unsigned int inobt_maxlevels; /* max inobt btree levels. */ 167 + 168 + /* Size of inode allocations under normal operation. */ 169 + unsigned int ialloc_inos; 170 + unsigned int ialloc_blks; 171 + 172 + /* Minimum inode blocks for a sparse allocation. */ 173 + unsigned int ialloc_min_blks; 174 + 175 + /* stripe unit inode alignment */ 176 + unsigned int ialloc_align; 177 + 178 + unsigned int agino_log; /* #bits for agino in inum */ 179 + }; 180 + 181 + /* Keep iterating the data structure. */ 182 + #define XFS_ITER_CONTINUE (0) 183 + 184 + /* Stop iterating the data structure. */ 185 + #define XFS_ITER_ABORT (1) 137 186 138 187 #endif /* __XFS_SHARED_H__ */

+3 -7

fs/xfs/libxfs/xfs_symlink_remote.c

··· 11 11 #include "xfs_shared.h" 12 12 #include "xfs_trans_resv.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_bmap_btree.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_error.h" 17 - #include "xfs_trace.h" 18 - #include "xfs_symlink.h" 19 - #include "xfs_cksum.h" 20 16 #include "xfs_trans.h" 21 17 #include "xfs_buf_item.h" 22 18 #include "xfs_log.h" ··· 86 90 xfs_symlink_verify( 87 91 struct xfs_buf *bp) 88 92 { 89 - struct xfs_mount *mp = bp->b_target->bt_mount; 93 + struct xfs_mount *mp = bp->b_mount; 90 94 struct xfs_dsymlink_hdr *dsl = bp->b_addr; 91 95 92 96 if (!xfs_sb_version_hascrc(&mp->m_sb)) ··· 112 116 xfs_symlink_read_verify( 113 117 struct xfs_buf *bp) 114 118 { 115 - struct xfs_mount *mp = bp->b_target->bt_mount; 119 + struct xfs_mount *mp = bp->b_mount; 116 120 xfs_failaddr_t fa; 117 121 118 122 /* no verification of non-crc buffers */ ··· 132 136 xfs_symlink_write_verify( 133 137 struct xfs_buf *bp) 134 138 { 135 - struct xfs_mount *mp = bp->b_target->bt_mount; 139 + struct xfs_mount *mp = bp->b_mount; 136 140 struct xfs_buf_log_item *bip = bp->b_log_item; 137 141 xfs_failaddr_t fa; 138 142

+8 -9

fs/xfs/libxfs/xfs_trans_resv.c

··· 15 15 #include "xfs_da_btree.h" 16 16 #include "xfs_inode.h" 17 17 #include "xfs_bmap_btree.h" 18 - #include "xfs_ialloc.h" 19 18 #include "xfs_quota.h" 20 19 #include "xfs_trans.h" 21 20 #include "xfs_qm.h" 22 21 #include "xfs_trans_space.h" 23 - #include "xfs_trace.h" 24 22 25 23 #define _ALLOC true 26 24 #define _FREE false ··· 134 136 xfs_calc_inobt_res( 135 137 struct xfs_mount *mp) 136 138 { 137 - return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 138 - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 139 - XFS_FSB_TO_B(mp, 1)); 139 + return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels, 140 + XFS_FSB_TO_B(mp, 1)) + 141 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 142 + XFS_FSB_TO_B(mp, 1)); 140 143 } 141 144 142 145 /* ··· 166 167 * includes: 167 168 * 168 169 * the allocation btrees: 2 trees * (max depth - 1) * block size 169 - * the inode chunk: m_ialloc_blks * N 170 + * the inode chunk: m_ino_geo.ialloc_blks * N 170 171 * 171 172 * The size N of the inode chunk reservation depends on whether it is for 172 173 * allocation or free and which type of create transaction is in use. An inode ··· 192 193 size = XFS_FSB_TO_B(mp, 1); 193 194 } 194 195 195 - res += xfs_calc_buf_res(mp->m_ialloc_blks, size); 196 + res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size); 196 197 return res; 197 198 } 198 199 ··· 306 307 struct xfs_mount *mp) 307 308 { 308 309 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 309 - 2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); 310 + 2 * M_IGEO(mp)->inode_cluster_size; 310 311 } 311 312 312 313 /* ··· 344 345 xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) 345 346 { 346 347 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 347 - max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); 348 + M_IGEO(mp)->inode_cluster_size; 348 349 } 349 350 350 351 /*

+4 -3

fs/xfs/libxfs/xfs_trans_space.h

··· 56 56 #define XFS_DIRREMOVE_SPACE_RES(mp) \ 57 57 XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) 58 58 #define XFS_IALLOC_SPACE_RES(mp) \ 59 - ((mp)->m_ialloc_blks + \ 59 + (M_IGEO(mp)->ialloc_blks + \ 60 60 (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \ 61 - ((mp)->m_in_maxlevels - 1))) 61 + (M_IGEO(mp)->inobt_maxlevels - 1))) 62 62 63 63 /* 64 64 * Space reservation values for various transactions. ··· 94 94 #define XFS_SYMLINK_SPACE_RES(mp,nl,b) \ 95 95 (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b)) 96 96 #define XFS_IFREE_SPACE_RES(mp) \ 97 - (xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0) 97 + (xfs_sb_version_hasfinobt(&mp->m_sb) ? \ 98 + M_IGEO(mp)->inobt_maxlevels : 0) 98 99 99 100 100 101 #endif /* __XFS_TRANS_SPACE_H__ */

+2 -11

fs/xfs/libxfs/xfs_types.c

··· 7 7 #include "xfs.h" 8 8 #include "xfs_fs.h" 9 9 #include "xfs_format.h" 10 - #include "xfs_log_format.h" 11 10 #include "xfs_shared.h" 12 11 #include "xfs_trans_resv.h" 13 12 #include "xfs_bit.h" 14 - #include "xfs_sb.h" 15 13 #include "xfs_mount.h" 16 - #include "xfs_defer.h" 17 - #include "xfs_inode.h" 18 - #include "xfs_btree.h" 19 - #include "xfs_rmap.h" 20 - #include "xfs_alloc_btree.h" 21 - #include "xfs_alloc.h" 22 - #include "xfs_ialloc.h" 23 14 24 15 /* Find the size of the AG, in blocks. */ 25 16 xfs_agblock_t ··· 78 87 * Calculate the first inode, which will be in the first 79 88 * cluster-aligned block after the AGFL. 80 89 */ 81 - bno = round_up(XFS_AGFL_BLOCK(mp) + 1, mp->m_cluster_align); 90 + bno = round_up(XFS_AGFL_BLOCK(mp) + 1, M_IGEO(mp)->cluster_align); 82 91 *first = XFS_AGB_TO_AGINO(mp, bno); 83 92 84 93 /* 85 94 * Calculate the last inode, which will be at the end of the 86 95 * last (aligned) cluster that can be allocated in the AG. 87 96 */ 88 - bno = round_down(eoag, mp->m_cluster_align); 97 + bno = round_down(eoag, M_IGEO(mp)->cluster_align); 89 98 *last = XFS_AGB_TO_AGINO(mp, bno) - 1; 90 99 } 91 100

+2 -9

fs/xfs/scrub/agheader.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 - #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 13 #include "xfs_sb.h" 18 - #include "xfs_inode.h" 19 14 #include "xfs_alloc.h" 20 15 #include "xfs_ialloc.h" 21 16 #include "xfs_rmap.h" 22 - #include "scrub/xfs_scrub.h" 23 17 #include "scrub/scrub.h" 24 18 #include "scrub/common.h" 25 - #include "scrub/trace.h" 26 19 27 20 /* Superblock */ 28 21 ··· 639 646 xchk_agfl_block_xref(sc, agbno); 640 647 641 648 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 642 - return XFS_BTREE_QUERY_RANGE_ABORT; 649 + return XFS_ITER_ABORT; 643 650 644 651 return 0; 645 652 } ··· 730 737 /* Check the blocks in the AGFL. */ 731 738 error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp), 732 739 sc->sa.agfl_bp, xchk_agfl_block, &sai); 733 - if (error == XFS_BTREE_QUERY_RANGE_ABORT) { 740 + if (error == XFS_ITER_ABORT) { 734 741 error = 0; 735 742 goto out_free; 736 743 }

-5

fs/xfs/scrub/agheader_repair.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 13 #include "xfs_log_format.h" 16 14 #include "xfs_trans.h" 17 15 #include "xfs_sb.h" 18 - #include "xfs_inode.h" 19 16 #include "xfs_alloc.h" 20 17 #include "xfs_alloc_btree.h" 21 18 #include "xfs_ialloc.h" 22 19 #include "xfs_ialloc_btree.h" 23 20 #include "xfs_rmap.h" 24 21 #include "xfs_rmap_btree.h" 25 - #include "xfs_refcount.h" 26 22 #include "xfs_refcount_btree.h" 27 - #include "scrub/xfs_scrub.h" 28 23 #include "scrub/scrub.h" 29 24 #include "scrub/common.h" 30 25 #include "scrub/trace.h"

-7

fs/xfs/scrub/alloc.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 - #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 13 #include "xfs_alloc.h" 19 14 #include "xfs_rmap.h" 20 - #include "scrub/xfs_scrub.h" 21 15 #include "scrub/scrub.h" 22 16 #include "scrub/common.h" 23 17 #include "scrub/btree.h" 24 - #include "scrub/trace.h" 25 18 26 19 /* 27 20 * Set us up to scrub free space btrees.

+91 -31

fs/xfs/scrub/attr.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 12 #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 13 #include "xfs_inode.h" 19 14 #include "xfs_da_format.h" 20 15 #include "xfs_da_btree.h" 21 - #include "xfs_dir2.h" 22 16 #include "xfs_attr.h" 23 17 #include "xfs_attr_leaf.h" 24 - #include "scrub/xfs_scrub.h" 25 18 #include "scrub/scrub.h" 26 19 #include "scrub/common.h" 27 20 #include "scrub/dabtree.h" 28 - #include "scrub/trace.h" 21 + #include "scrub/attr.h" 29 22 30 - #include <linux/posix_acl_xattr.h> 31 - #include <linux/xattr.h> 23 + /* 24 + * Allocate enough memory to hold an attr value and attr block bitmaps, 25 + * reallocating the buffer if necessary. Buffer contents are not preserved 26 + * across a reallocation. 27 + */ 28 + int 29 + xchk_setup_xattr_buf( 30 + struct xfs_scrub *sc, 31 + size_t value_size, 32 + xfs_km_flags_t flags) 33 + { 34 + size_t sz; 35 + struct xchk_xattr_buf *ab = sc->buf; 36 + 37 + /* 38 + * We need enough space to read an xattr value from the file or enough 39 + * space to hold three copies of the xattr free space bitmap. We don't 40 + * need the buffer space for both purposes at the same time. 41 + */ 42 + sz = 3 * sizeof(long) * BITS_TO_LONGS(sc->mp->m_attr_geo->blksize); 43 + sz = max_t(size_t, sz, value_size); 44 + 45 + /* 46 + * If there's already a buffer, figure out if we need to reallocate it 47 + * to accommodate a larger size. 48 + */ 49 + if (ab) { 50 + if (sz <= ab->sz) 51 + return 0; 52 + kmem_free(ab); 53 + sc->buf = NULL; 54 + } 55 + 56 + /* 57 + * Don't zero the buffer upon allocation to avoid runtime overhead. 58 + * All users must be careful never to read uninitialized contents. 59 + */ 60 + ab = kmem_alloc_large(sizeof(*ab) + sz, flags); 61 + if (!ab) 62 + return -ENOMEM; 63 + 64 + ab->sz = sz; 65 + sc->buf = ab; 66 + return 0; 67 + } 32 68 33 69 /* Set us up to scrub an inode's extended attributes. */ 34 70 int ··· 72 36 struct xfs_scrub *sc, 73 37 struct xfs_inode *ip) 74 38 { 75 - size_t sz; 39 + int error; 76 40 77 41 /* 78 - * Allocate the buffer without the inode lock held. We need enough 79 - * space to read every xattr value in the file or enough space to 80 - * hold three copies of the xattr free space bitmap. (Not both at 81 - * the same time.) 42 + * We failed to get memory while checking attrs, so this time try to 43 + * get all the memory we're ever going to need. Allocate the buffer 44 + * without the inode lock held, which means we can sleep. 82 45 */ 83 - sz = max_t(size_t, XATTR_SIZE_MAX, 3 * sizeof(long) * 84 - BITS_TO_LONGS(sc->mp->m_attr_geo->blksize)); 85 - sc->buf = kmem_zalloc_large(sz, KM_SLEEP); 86 - if (!sc->buf) 87 - return -ENOMEM; 46 + if (sc->flags & XCHK_TRY_HARDER) { 47 + error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, KM_SLEEP); 48 + if (error) 49 + return error; 50 + } 88 51 89 52 return xchk_setup_inode_contents(sc, ip, 0); 90 53 } ··· 118 83 sx = container_of(context, struct xchk_xattr, context); 119 84 120 85 if (xchk_should_terminate(sx->sc, &error)) { 121 - context->seen_enough = 1; 86 + context->seen_enough = error; 122 87 return; 123 88 } 124 89 ··· 134 99 return; 135 100 } 136 101 102 + /* 103 + * Try to allocate enough memory to extrat the attr value. If that 104 + * doesn't work, we overload the seen_enough variable to convey 105 + * the error message back to the main scrub function. 106 + */ 107 + error = xchk_setup_xattr_buf(sx->sc, valuelen, KM_MAYFAIL); 108 + if (error == -ENOMEM) 109 + error = -EDEADLOCK; 110 + if (error) { 111 + context->seen_enough = error; 112 + return; 113 + } 114 + 137 115 args.flags = ATTR_KERNOTIME; 138 116 if (flags & XFS_ATTR_ROOT) 139 117 args.flags |= ATTR_ROOT; ··· 159 111 args.namelen = namelen; 160 112 args.hashval = xfs_da_hashname(args.name, args.namelen); 161 113 args.trans = context->tp; 162 - args.value = sx->sc->buf; 163 - args.valuelen = XATTR_SIZE_MAX; 114 + args.value = xchk_xattr_valuebuf(sx->sc); 115 + args.valuelen = valuelen; 164 116 165 117 error = xfs_attr_get_ilocked(context->dp, &args); 166 118 if (error == -EEXIST) ··· 173 125 args.blkno); 174 126 fail_xref: 175 127 if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 176 - context->seen_enough = 1; 128 + context->seen_enough = XFS_ITER_ABORT; 177 129 return; 178 130 } 179 131 ··· 218 170 unsigned long *map, 219 171 struct xfs_attr3_icleaf_hdr *leafhdr) 220 172 { 221 - unsigned long *freemap; 222 - unsigned long *dstmap; 173 + unsigned long *freemap = xchk_xattr_freemap(sc); 174 + unsigned long *dstmap = xchk_xattr_dstmap(sc); 223 175 unsigned int mapsize = sc->mp->m_attr_geo->blksize; 224 176 int i; 225 177 226 178 /* Construct bitmap of freemap contents. */ 227 - freemap = (unsigned long *)sc->buf + BITS_TO_LONGS(mapsize); 228 179 bitmap_zero(freemap, mapsize); 229 180 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { 230 181 if (!xchk_xattr_set_map(sc, freemap, ··· 233 186 } 234 187 235 188 /* Look for bits that are set in freemap and are marked in use. */ 236 - dstmap = freemap + BITS_TO_LONGS(mapsize); 237 189 return bitmap_and(dstmap, freemap, map, mapsize) == 0; 238 190 } 239 191 ··· 247 201 char *buf_end, 248 202 struct xfs_attr_leafblock *leaf, 249 203 struct xfs_attr3_icleaf_hdr *leafhdr, 250 - unsigned long *usedmap, 251 204 struct xfs_attr_leaf_entry *ent, 252 205 int idx, 253 206 unsigned int *usedbytes, 254 207 __u32 *last_hashval) 255 208 { 256 209 struct xfs_mount *mp = ds->state->mp; 210 + unsigned long *usedmap = xchk_xattr_usedmap(ds->sc); 257 211 char *name_end; 258 212 struct xfs_attr_leaf_name_local *lentry; 259 213 struct xfs_attr_leaf_name_remote *rentry; ··· 313 267 struct xfs_attr_leafblock *leaf = bp->b_addr; 314 268 struct xfs_attr_leaf_entry *ent; 315 269 struct xfs_attr_leaf_entry *entries; 316 - unsigned long *usedmap = ds->sc->buf; 270 + unsigned long *usedmap; 317 271 char *buf_end; 318 272 size_t off; 319 273 __u32 last_hashval = 0; 320 274 unsigned int usedbytes = 0; 321 275 unsigned int hdrsize; 322 276 int i; 277 + int error; 323 278 324 279 if (*last_checked == blk->blkno) 325 280 return 0; 281 + 282 + /* Allocate memory for block usage checking. */ 283 + error = xchk_setup_xattr_buf(ds->sc, 0, KM_MAYFAIL); 284 + if (error == -ENOMEM) 285 + return -EDEADLOCK; 286 + if (error) 287 + return error; 288 + usedmap = xchk_xattr_usedmap(ds->sc); 289 + 326 290 *last_checked = blk->blkno; 327 291 bitmap_zero(usedmap, mp->m_attr_geo->blksize); 328 292 ··· 380 324 381 325 /* Check the entry and nameval. */ 382 326 xchk_xattr_entry(ds, level, buf_end, leaf, &leafhdr, 383 - usedmap, ent, i, &usedbytes, &last_hashval); 327 + ent, i, &usedbytes, &last_hashval); 384 328 385 329 if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 386 330 goto out; ··· 520 464 error = xfs_attr_list_int_ilocked(&sx.context); 521 465 if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error)) 522 466 goto out; 467 + 468 + /* Did our listent function try to return any errors? */ 469 + if (sx.context.seen_enough < 0) 470 + error = sx.context.seen_enough; 523 471 out: 524 472 return error; 525 473 }

+71

fs/xfs/scrub/attr.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * Copyright (C) 2019 Oracle. All Rights Reserved. 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + */ 6 + #ifndef __XFS_SCRUB_ATTR_H__ 7 + #define __XFS_SCRUB_ATTR_H__ 8 + 9 + /* 10 + * Temporary storage for online scrub and repair of extended attributes. 11 + */ 12 + struct xchk_xattr_buf { 13 + /* Size of @buf, in bytes. */ 14 + size_t sz; 15 + 16 + /* 17 + * Memory buffer -- either used for extracting attr values while 18 + * walking the attributes; or for computing attr block bitmaps when 19 + * checking the attribute tree. 20 + * 21 + * Each bitmap contains enough bits to track every byte in an attr 22 + * block (rounded up to the size of an unsigned long). The attr block 23 + * used space bitmap starts at the beginning of the buffer; the free 24 + * space bitmap follows immediately after; and we have a third buffer 25 + * for storing intermediate bitmap results. 26 + */ 27 + uint8_t buf[0]; 28 + }; 29 + 30 + /* A place to store attribute values. */ 31 + static inline uint8_t * 32 + xchk_xattr_valuebuf( 33 + struct xfs_scrub *sc) 34 + { 35 + struct xchk_xattr_buf *ab = sc->buf; 36 + 37 + return ab->buf; 38 + } 39 + 40 + /* A bitmap of space usage computed by walking an attr leaf block. */ 41 + static inline unsigned long * 42 + xchk_xattr_usedmap( 43 + struct xfs_scrub *sc) 44 + { 45 + struct xchk_xattr_buf *ab = sc->buf; 46 + 47 + return (unsigned long *)ab->buf; 48 + } 49 + 50 + /* A bitmap of free space computed by walking attr leaf block free info. */ 51 + static inline unsigned long * 52 + xchk_xattr_freemap( 53 + struct xfs_scrub *sc) 54 + { 55 + return xchk_xattr_usedmap(sc) + 56 + BITS_TO_LONGS(sc->mp->m_attr_geo->blksize); 57 + } 58 + 59 + /* A bitmap used to hold temporary results. */ 60 + static inline unsigned long * 61 + xchk_xattr_dstmap( 62 + struct xfs_scrub *sc) 63 + { 64 + return xchk_xattr_freemap(sc) + 65 + BITS_TO_LONGS(sc->mp->m_attr_geo->blksize); 66 + } 67 + 68 + int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size, 69 + xfs_km_flags_t flags); 70 + 71 + #endif /* __XFS_SCRUB_ATTR_H__ */

-5

fs/xfs/scrub/bitmap.c

··· 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 12 #include "xfs_btree.h" 13 - #include "scrub/xfs_scrub.h" 14 - #include "scrub/scrub.h" 15 - #include "scrub/common.h" 16 - #include "scrub/trace.h" 17 - #include "scrub/repair.h" 18 13 #include "scrub/bitmap.h" 19 14 20 15 /*

-8

fs/xfs/scrub/bmap.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 13 #include "xfs_bit.h" 15 14 #include "xfs_log_format.h" 16 15 #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 16 #include "xfs_inode.h" 19 - #include "xfs_inode_fork.h" 20 17 #include "xfs_alloc.h" 21 - #include "xfs_rtalloc.h" 22 18 #include "xfs_bmap.h" 23 - #include "xfs_bmap_util.h" 24 19 #include "xfs_bmap_btree.h" 25 20 #include "xfs_rmap.h" 26 21 #include "xfs_rmap_btree.h" 27 - #include "xfs_refcount.h" 28 - #include "scrub/xfs_scrub.h" 29 22 #include "scrub/scrub.h" 30 23 #include "scrub/common.h" 31 24 #include "scrub/btree.h" 32 - #include "scrub/trace.h" 33 25 34 26 /* Set us up with an inode's bmap. */ 35 27 int

-7

fs/xfs/scrub/btree.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 - #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 - #include "xfs_inode.h" 19 - #include "xfs_alloc.h" 20 13 #include "scrub/scrub.h" 21 14 #include "scrub/common.h" 22 15 #include "scrub/btree.h"

-8

fs/xfs/scrub/common.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 13 #include "xfs_log_format.h" 16 14 #include "xfs_trans.h" 17 15 #include "xfs_sb.h" 18 16 #include "xfs_inode.h" 19 17 #include "xfs_icache.h" 20 - #include "xfs_itable.h" 21 18 #include "xfs_alloc.h" 22 19 #include "xfs_alloc_btree.h" 23 - #include "xfs_bmap.h" 24 - #include "xfs_bmap_btree.h" 25 20 #include "xfs_ialloc.h" 26 21 #include "xfs_ialloc_btree.h" 27 - #include "xfs_refcount.h" 28 22 #include "xfs_refcount_btree.h" 29 23 #include "xfs_rmap.h" 30 24 #include "xfs_rmap_btree.h" ··· 26 32 #include "xfs_trans_priv.h" 27 33 #include "xfs_attr.h" 28 34 #include "xfs_reflink.h" 29 - #include "scrub/xfs_scrub.h" 30 35 #include "scrub/scrub.h" 31 36 #include "scrub/common.h" 32 37 #include "scrub/trace.h" 33 - #include "scrub/btree.h" 34 38 #include "scrub/repair.h" 35 39 #include "scrub/health.h" 36 40

-8

fs/xfs/scrub/dabtree.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 12 #include "xfs_log_format.h" 16 13 #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 14 #include "xfs_inode.h" 19 - #include "xfs_inode_fork.h" 20 - #include "xfs_da_format.h" 21 - #include "xfs_da_btree.h" 22 15 #include "xfs_dir2.h" 23 16 #include "xfs_dir2_priv.h" 24 17 #include "xfs_attr_leaf.h" 25 - #include "scrub/xfs_scrub.h" 26 18 #include "scrub/scrub.h" 27 19 #include "scrub/common.h" 28 20 #include "scrub/trace.h"

-10

fs/xfs/scrub/dir.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 12 #include "xfs_log_format.h" 16 13 #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 14 #include "xfs_inode.h" 19 15 #include "xfs_icache.h" 20 - #include "xfs_itable.h" 21 - #include "xfs_da_format.h" 22 - #include "xfs_da_btree.h" 23 16 #include "xfs_dir2.h" 24 17 #include "xfs_dir2_priv.h" 25 - #include "xfs_ialloc.h" 26 - #include "scrub/xfs_scrub.h" 27 18 #include "scrub/scrub.h" 28 19 #include "scrub/common.h" 29 - #include "scrub/trace.h" 30 20 #include "scrub/dabtree.h" 31 21 32 22 /* Set us up to scrub directories. */

-12

fs/xfs/scrub/fscounters.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 - #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 12 #include "xfs_sb.h" 18 - #include "xfs_inode.h" 19 13 #include "xfs_alloc.h" 20 14 #include "xfs_ialloc.h" 21 - #include "xfs_rmap.h" 22 - #include "xfs_error.h" 23 - #include "xfs_errortag.h" 24 - #include "xfs_icache.h" 25 15 #include "xfs_health.h" 26 - #include "xfs_bmap.h" 27 - #include "scrub/xfs_scrub.h" 28 16 #include "scrub/scrub.h" 29 17 #include "scrub/common.h" 30 18 #include "scrub/trace.h"

-8

fs/xfs/scrub/health.c

··· 7 7 #include "xfs_fs.h" 8 8 #include "xfs_shared.h" 9 9 #include "xfs_format.h" 10 - #include "xfs_trans_resv.h" 11 - #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 10 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 - #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 11 #include "xfs_sb.h" 18 - #include "xfs_inode.h" 19 12 #include "xfs_health.h" 20 13 #include "scrub/scrub.h" 21 - #include "scrub/health.h" 22 14 23 15 /* 24 16 * Scrub and In-Core Filesystem Health Assessments

+11 -17

fs/xfs/scrub/ialloc.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 13 #include "xfs_log_format.h" 16 14 #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 15 #include "xfs_inode.h" 19 - #include "xfs_alloc.h" 20 16 #include "xfs_ialloc.h" 21 17 #include "xfs_ialloc_btree.h" 22 18 #include "xfs_icache.h" 23 19 #include "xfs_rmap.h" 24 - #include "xfs_log.h" 25 - #include "xfs_trans_priv.h" 26 - #include "scrub/xfs_scrub.h" 27 20 #include "scrub/scrub.h" 28 21 #include "scrub/common.h" 29 22 #include "scrub/btree.h" ··· 223 230 int error = 0; 224 231 225 232 nr_inodes = min_t(unsigned int, XFS_INODES_PER_CHUNK, 226 - mp->m_inodes_per_cluster); 233 + M_IGEO(mp)->inodes_per_cluster); 227 234 228 235 /* Map this inode cluster */ 229 236 agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino + cluster_base); ··· 244 251 */ 245 252 ir_holemask = (irec->ir_holemask & cluster_mask); 246 253 imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); 247 - imap.im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster); 254 + imap.im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster); 248 255 imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino) << 249 256 mp->m_sb.sb_inodelog; 250 257 ··· 269 276 /* If any part of this is a hole, skip it. */ 270 277 if (ir_holemask) { 271 278 xchk_xref_is_not_owned_by(bs->sc, agbno, 272 - mp->m_blocks_per_cluster, 279 + M_IGEO(mp)->blocks_per_cluster, 273 280 &XFS_RMAP_OINFO_INODES); 274 281 return 0; 275 282 } 276 283 277 - xchk_xref_is_owned_by(bs->sc, agbno, mp->m_blocks_per_cluster, 284 + xchk_xref_is_owned_by(bs->sc, agbno, M_IGEO(mp)->blocks_per_cluster, 278 285 &XFS_RMAP_OINFO_INODES); 279 286 280 287 /* Grab the inode cluster buffer. */ ··· 326 333 */ 327 334 for (cluster_base = 0; 328 335 cluster_base < XFS_INODES_PER_CHUNK; 329 - cluster_base += bs->sc->mp->m_inodes_per_cluster) { 336 + cluster_base += M_IGEO(bs->sc->mp)->inodes_per_cluster) { 330 337 error = xchk_iallocbt_check_cluster(bs, irec, cluster_base); 331 338 if (error) 332 339 break; ··· 348 355 { 349 356 struct xfs_mount *mp = bs->sc->mp; 350 357 struct xchk_iallocbt *iabt = bs->private; 358 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 351 359 352 360 /* 353 361 * finobt records have different positioning requirements than inobt ··· 366 372 unsigned int imask; 367 373 368 374 imask = min_t(unsigned int, XFS_INODES_PER_CHUNK, 369 - mp->m_cluster_align_inodes) - 1; 375 + igeo->cluster_align_inodes) - 1; 370 376 if (irec->ir_startino & imask) 371 377 xchk_btree_set_corrupt(bs->sc, bs->cur, 0); 372 378 return; ··· 394 400 } 395 401 396 402 /* inobt records must be aligned to cluster and inoalignmnt size. */ 397 - if (irec->ir_startino & (mp->m_cluster_align_inodes - 1)) { 403 + if (irec->ir_startino & (igeo->cluster_align_inodes - 1)) { 398 404 xchk_btree_set_corrupt(bs->sc, bs->cur, 0); 399 405 return; 400 406 } 401 407 402 - if (irec->ir_startino & (mp->m_inodes_per_cluster - 1)) { 408 + if (irec->ir_startino & (igeo->inodes_per_cluster - 1)) { 403 409 xchk_btree_set_corrupt(bs->sc, bs->cur, 0); 404 410 return; 405 411 } 406 412 407 - if (mp->m_inodes_per_cluster <= XFS_INODES_PER_CHUNK) 413 + if (igeo->inodes_per_cluster <= XFS_INODES_PER_CHUNK) 408 414 return; 409 415 410 416 /* ··· 413 419 * after this one. 414 420 */ 415 421 iabt->next_startino = irec->ir_startino + XFS_INODES_PER_CHUNK; 416 - iabt->next_cluster_ino = irec->ir_startino + mp->m_inodes_per_cluster; 422 + iabt->next_cluster_ino = irec->ir_startino + igeo->inodes_per_cluster; 417 423 } 418 424 419 425 /* Scrub an inobt/finobt record. */

-10

fs/xfs/scrub/inode.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 13 #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 14 #include "xfs_inode.h" 19 - #include "xfs_icache.h" 20 - #include "xfs_inode_buf.h" 21 - #include "xfs_inode_fork.h" 22 15 #include "xfs_ialloc.h" 23 16 #include "xfs_da_format.h" 24 17 #include "xfs_reflink.h" 25 18 #include "xfs_rmap.h" 26 - #include "xfs_bmap.h" 27 19 #include "xfs_bmap_util.h" 28 - #include "scrub/xfs_scrub.h" 29 20 #include "scrub/scrub.h" 30 21 #include "scrub/common.h" 31 22 #include "scrub/btree.h" 32 - #include "scrub/trace.h" 33 23 34 24 /* 35 25 * Grab total control of the inode metadata. It doesn't matter here if

-8

fs/xfs/scrub/parent.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 12 #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 13 #include "xfs_inode.h" 19 14 #include "xfs_icache.h" 20 15 #include "xfs_dir2.h" 21 16 #include "xfs_dir2_priv.h" 22 - #include "xfs_ialloc.h" 23 - #include "scrub/xfs_scrub.h" 24 17 #include "scrub/scrub.h" 25 18 #include "scrub/common.h" 26 - #include "scrub/trace.h" 27 19 28 20 /* Set us up to scrub parents. */ 29 21 int

+1 -12

fs/xfs/scrub/quota.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 12 #include "xfs_log_format.h" 16 13 #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 14 #include "xfs_inode.h" 19 - #include "xfs_inode_fork.h" 20 - #include "xfs_alloc.h" 21 - #include "xfs_bmap.h" 22 15 #include "xfs_quota.h" 23 16 #include "xfs_qm.h" 24 - #include "xfs_dquot.h" 25 - #include "xfs_dquot_item.h" 26 - #include "scrub/xfs_scrub.h" 27 17 #include "scrub/scrub.h" 28 18 #include "scrub/common.h" 29 - #include "scrub/trace.h" 30 19 31 20 /* Convert a scrub type code to a DQ flag, or return 0 if error. */ 32 21 static inline uint ··· 133 144 if (bsoft > bhard) 134 145 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); 135 146 136 - if (ihard > mp->m_maxicount) 147 + if (ihard > M_IGEO(mp)->maxicount) 137 148 xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset); 138 149 if (isoft > ihard) 139 150 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);

-10

fs/xfs/scrub/refcount.c

··· 7 7 #include "xfs_fs.h" 8 8 #include "xfs_shared.h" 9 9 #include "xfs_format.h" 10 - #include "xfs_trans_resv.h" 11 - #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 10 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 - #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 - #include "xfs_alloc.h" 19 11 #include "xfs_rmap.h" 20 12 #include "xfs_refcount.h" 21 - #include "scrub/xfs_scrub.h" 22 13 #include "scrub/scrub.h" 23 14 #include "scrub/common.h" 24 15 #include "scrub/btree.h" 25 - #include "scrub/trace.h" 26 16 27 17 /* 28 18 * Set us up to scrub reference count btrees.

+3 -11

fs/xfs/scrub/repair.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 13 #include "xfs_log_format.h" 16 14 #include "xfs_trans.h" 17 15 #include "xfs_sb.h" 18 16 #include "xfs_inode.h" 19 - #include "xfs_icache.h" 20 17 #include "xfs_alloc.h" 21 18 #include "xfs_alloc_btree.h" 22 19 #include "xfs_ialloc.h" 23 20 #include "xfs_ialloc_btree.h" 24 21 #include "xfs_rmap.h" 25 22 #include "xfs_rmap_btree.h" 26 - #include "xfs_refcount.h" 27 23 #include "xfs_refcount_btree.h" 28 24 #include "xfs_extent_busy.h" 29 25 #include "xfs_ag_resv.h" 30 - #include "xfs_trans_space.h" 31 26 #include "xfs_quota.h" 32 - #include "xfs_attr.h" 33 - #include "xfs_reflink.h" 34 - #include "scrub/xfs_scrub.h" 35 27 #include "scrub/scrub.h" 36 28 #include "scrub/common.h" 37 29 #include "scrub/trace.h" ··· 349 357 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb), 350 358 XFS_FSB_TO_BB(mp, 1), 0); 351 359 xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); 352 - xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno, 0); 360 + xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno); 353 361 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF); 354 362 xfs_trans_log_buf(tp, bp, 0, bp->b_length); 355 363 bp->b_ops = ops; ··· 664 672 { 665 673 xfs_agblock_t *agbno = priv; 666 674 667 - return (*agbno == bno) ? XFS_BTREE_QUERY_RANGE_ABORT : 0; 675 + return (*agbno == bno) ? XFS_ITER_ABORT : 0; 668 676 } 669 677 670 678 /* Does this block match the btree information passed in? */ ··· 694 702 if (owner == XFS_RMAP_OWN_AG) { 695 703 error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp, 696 704 xrep_findroot_agfl_walk, &agbno); 697 - if (error == XFS_BTREE_QUERY_RANGE_ABORT) 705 + if (error == XFS_ITER_ABORT) 698 706 return 0; 699 707 if (error) 700 708 return error;

-9

fs/xfs/scrub/rmap.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 12 #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 - #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 - #include "xfs_alloc.h" 19 - #include "xfs_ialloc.h" 20 13 #include "xfs_rmap.h" 21 14 #include "xfs_refcount.h" 22 - #include "scrub/xfs_scrub.h" 23 15 #include "scrub/scrub.h" 24 16 #include "scrub/common.h" 25 17 #include "scrub/btree.h" 26 - #include "scrub/trace.h" 27 18 28 19 /* 29 20 * Set us up to scrub reverse mapping btrees.

-7

fs/xfs/scrub/rtbitmap.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 12 #include "xfs_log_format.h" 16 13 #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 - #include "xfs_alloc.h" 19 14 #include "xfs_rtalloc.h" 20 15 #include "xfs_inode.h" 21 - #include "scrub/xfs_scrub.h" 22 16 #include "scrub/scrub.h" 23 17 #include "scrub/common.h" 24 - #include "scrub/trace.h" 25 18 26 19 /* Set us up with the realtime metadata locked. */ 27 20 int

-20

fs/xfs/scrub/scrub.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 12 #include "xfs_log_format.h" 16 13 #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 14 #include "xfs_inode.h" 19 - #include "xfs_icache.h" 20 - #include "xfs_itable.h" 21 - #include "xfs_alloc.h" 22 - #include "xfs_alloc_btree.h" 23 - #include "xfs_bmap.h" 24 - #include "xfs_bmap_btree.h" 25 - #include "xfs_ialloc.h" 26 - #include "xfs_ialloc_btree.h" 27 - #include "xfs_refcount.h" 28 - #include "xfs_refcount_btree.h" 29 - #include "xfs_rmap.h" 30 - #include "xfs_rmap_btree.h" 31 15 #include "xfs_quota.h" 32 16 #include "xfs_qm.h" 33 17 #include "xfs_errortag.h" 34 18 #include "xfs_error.h" 35 - #include "xfs_log.h" 36 - #include "xfs_trans_priv.h" 37 - #include "scrub/xfs_scrub.h" 38 19 #include "scrub/scrub.h" 39 20 #include "scrub/common.h" 40 21 #include "scrub/trace.h" 41 - #include "scrub/btree.h" 42 22 #include "scrub/repair.h" 43 23 #include "scrub/health.h" 44 24

-8

fs/xfs/scrub/symlink.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_mount.h" 12 - #include "xfs_defer.h" 13 - #include "xfs_btree.h" 14 - #include "xfs_bit.h" 15 12 #include "xfs_log_format.h" 16 - #include "xfs_trans.h" 17 - #include "xfs_sb.h" 18 13 #include "xfs_inode.h" 19 - #include "xfs_inode_fork.h" 20 14 #include "xfs_symlink.h" 21 - #include "scrub/xfs_scrub.h" 22 15 #include "scrub/scrub.h" 23 16 #include "scrub/common.h" 24 - #include "scrub/trace.h" 25 17 26 18 /* Set us up to scrub a symbolic link. */ 27 19 int

-6

fs/xfs/scrub/trace.c

··· 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 - #include "xfs_defer.h" 14 - #include "xfs_da_format.h" 15 13 #include "xfs_inode.h" 16 14 #include "xfs_btree.h" 17 - #include "xfs_trans.h" 18 - #include "xfs_bit.h" 19 - #include "scrub/xfs_scrub.h" 20 15 #include "scrub/scrub.h" 21 - #include "scrub/common.h" 22 16 23 17 /* Figure out which block the btree cursor was pointing to. */ 24 18 static inline xfs_fsblock_t

+1 -3

fs/xfs/xfs_acl.c

··· 4 4 * All Rights Reserved. 5 5 */ 6 6 #include "xfs.h" 7 + #include "xfs_shared.h" 7 8 #include "xfs_format.h" 8 9 #include "xfs_log_format.h" 9 10 #include "xfs_trans_resv.h" 10 11 #include "xfs_mount.h" 11 12 #include "xfs_inode.h" 12 - #include "xfs_acl.h" 13 13 #include "xfs_attr.h" 14 14 #include "xfs_trace.h" 15 - #include <linux/slab.h> 16 - #include <linux/xattr.h> 17 15 #include <linux/posix_acl_xattr.h> 18 16 19 17

+65 -56

fs/xfs/xfs_aops.c

··· 12 12 #include "xfs_mount.h" 13 13 #include "xfs_inode.h" 14 14 #include "xfs_trans.h" 15 - #include "xfs_inode_item.h" 16 - #include "xfs_alloc.h" 17 - #include "xfs_error.h" 18 15 #include "xfs_iomap.h" 19 16 #include "xfs_trace.h" 20 17 #include "xfs_bmap.h" 21 18 #include "xfs_bmap_util.h" 22 - #include "xfs_bmap_btree.h" 23 19 #include "xfs_reflink.h" 24 - #include <linux/writeback.h> 25 20 26 21 /* 27 22 * structure owned by writepages passed to individual writepage calls ··· 133 138 struct xfs_trans *tp; 134 139 int error; 135 140 136 - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 137 - XFS_TRANS_NOFS, &tp); 141 + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); 138 142 if (error) 139 143 return error; 140 144 ··· 234 240 struct xfs_inode *ip = XFS_I(ioend->io_inode); 235 241 xfs_off_t offset = ioend->io_offset; 236 242 size_t size = ioend->io_size; 243 + unsigned int nofs_flag; 237 244 int error; 245 + 246 + /* 247 + * We can allocate memory here while doing writeback on behalf of 248 + * memory reclaim. To avoid memory allocation deadlocks set the 249 + * task-wide nofs context for the following operations. 250 + */ 251 + nofs_flag = memalloc_nofs_save(); 238 252 239 253 /* 240 254 * Just clean up the in-memory strutures if the fs has been shut down. ··· 284 282 list_del_init(&ioend->io_list); 285 283 xfs_destroy_ioend(ioend, error); 286 284 } 285 + 286 + memalloc_nofs_restore(nofs_flag); 287 287 } 288 288 289 289 /* ··· 294 290 static bool 295 291 xfs_ioend_can_merge( 296 292 struct xfs_ioend *ioend, 297 - int ioend_error, 298 293 struct xfs_ioend *next) 299 294 { 300 - int next_error; 301 - 302 - next_error = blk_status_to_errno(next->io_bio->bi_status); 303 - if (ioend_error != next_error) 295 + if (ioend->io_bio->bi_status != next->io_bio->bi_status) 304 296 return false; 305 297 if ((ioend->io_fork == XFS_COW_FORK) ^ (next->io_fork == XFS_COW_FORK)) 306 298 return false; ··· 305 305 return false; 306 306 if (ioend->io_offset + ioend->io_size != next->io_offset) 307 307 return false; 308 - if (xfs_ioend_is_append(ioend) != xfs_ioend_is_append(next)) 309 - return false; 310 308 return true; 309 + } 310 + 311 + /* 312 + * If the to be merged ioend has a preallocated transaction for file 313 + * size updates we need to ensure the ioend it is merged into also 314 + * has one. If it already has one we can simply cancel the transaction 315 + * as it is guaranteed to be clean. 316 + */ 317 + static void 318 + xfs_ioend_merge_append_transactions( 319 + struct xfs_ioend *ioend, 320 + struct xfs_ioend *next) 321 + { 322 + if (!ioend->io_append_trans) { 323 + ioend->io_append_trans = next->io_append_trans; 324 + next->io_append_trans = NULL; 325 + } else { 326 + xfs_setfilesize_ioend(next, -ECANCELED); 327 + } 311 328 } 312 329 313 330 /* Try to merge adjacent completions. */ ··· 334 317 struct list_head *more_ioends) 335 318 { 336 319 struct xfs_ioend *next_ioend; 337 - int ioend_error; 338 - int error; 339 - 340 - if (list_empty(more_ioends)) 341 - return; 342 - 343 - ioend_error = blk_status_to_errno(ioend->io_bio->bi_status); 344 320 345 321 while (!list_empty(more_ioends)) { 346 322 next_ioend = list_first_entry(more_ioends, struct xfs_ioend, 347 323 io_list); 348 - if (!xfs_ioend_can_merge(ioend, ioend_error, next_ioend)) 324 + if (!xfs_ioend_can_merge(ioend, next_ioend)) 349 325 break; 350 326 list_move_tail(&next_ioend->io_list, &ioend->io_list); 351 327 ioend->io_size += next_ioend->io_size; 352 - if (ioend->io_append_trans) { 353 - error = xfs_setfilesize_ioend(next_ioend, 1); 354 - ASSERT(error == 1); 355 - } 328 + if (next_ioend->io_append_trans) 329 + xfs_ioend_merge_append_transactions(ioend, next_ioend); 356 330 } 357 331 } 358 332 ··· 634 626 * reference to the ioend to ensure that the ioend completion is only done once 635 627 * all bios have been submitted and the ioend is really done. 636 628 * 637 - * If @fail is non-zero, it means that we have a situation where some part of 629 + * If @status is non-zero, it means that we have a situation where some part of 638 630 * the submission process has failed after we have marked paged for writeback 639 631 * and unlocked them. In this situation, we need to fail the bio and ioend 640 632 * rather than submit it to IO. This typically only happens on a filesystem ··· 646 638 struct xfs_ioend *ioend, 647 639 int status) 648 640 { 641 + unsigned int nofs_flag; 642 + 643 + /* 644 + * We can allocate memory here while doing writeback on behalf of 645 + * memory reclaim. To avoid memory allocation deadlocks set the 646 + * task-wide nofs context for the following operations. 647 + */ 648 + nofs_flag = memalloc_nofs_save(); 649 + 649 650 /* Convert CoW extents to regular */ 650 651 if (!status && ioend->io_fork == XFS_COW_FORK) { 651 - /* 652 - * Yuk. This can do memory allocation, but is not a 653 - * transactional operation so everything is done in GFP_KERNEL 654 - * context. That can deadlock, because we hold pages in 655 - * writeback state and GFP_KERNEL allocations can block on them. 656 - * Hence we must operate in nofs conditions here. 657 - */ 658 - unsigned nofs_flag; 659 - 660 - nofs_flag = memalloc_nofs_save(); 661 652 status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), 662 653 ioend->io_offset, ioend->io_size); 663 - memalloc_nofs_restore(nofs_flag); 664 654 } 665 655 666 656 /* Reserve log space if we might write beyond the on-disk inode size. */ ··· 669 663 !ioend->io_append_trans) 670 664 status = xfs_setfilesize_trans_alloc(ioend); 671 665 666 + memalloc_nofs_restore(nofs_flag); 667 + 672 668 ioend->io_bio->bi_private = ioend; 673 669 ioend->io_bio->bi_end_io = xfs_end_bio; 674 - ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); 675 670 676 671 /* 677 672 * If we are failing the IO now, just mark the ioend with an ··· 686 679 return status; 687 680 } 688 681 689 - ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; 690 682 submit_bio(ioend->io_bio); 691 683 return 0; 692 684 } ··· 697 691 xfs_exntst_t state, 698 692 xfs_off_t offset, 699 693 struct block_device *bdev, 700 - sector_t sector) 694 + sector_t sector, 695 + struct writeback_control *wbc) 701 696 { 702 697 struct xfs_ioend *ioend; 703 698 struct bio *bio; ··· 706 699 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset); 707 700 bio_set_dev(bio, bdev); 708 701 bio->bi_iter.bi_sector = sector; 702 + bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); 703 + bio->bi_write_hint = inode->i_write_hint; 704 + wbc_init_bio(wbc, bio); 709 705 710 706 ioend = container_of(bio, struct xfs_ioend, io_inline_bio); 711 707 INIT_LIST_HEAD(&ioend->io_list); ··· 729 719 * so that the bi_private linkage is set up in the right direction for the 730 720 * traversal in xfs_destroy_ioend(). 731 721 */ 732 - static void 722 + static struct bio * 733 723 xfs_chain_bio( 734 - struct xfs_ioend *ioend, 735 - struct writeback_control *wbc, 736 - struct block_device *bdev, 737 - sector_t sector) 724 + struct bio *prev) 738 725 { 739 726 struct bio *new; 740 727 741 728 new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); 742 - bio_set_dev(new, bdev); 743 - new->bi_iter.bi_sector = sector; 744 - bio_chain(ioend->io_bio, new); 745 - bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ 746 - ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); 747 - ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; 748 - submit_bio(ioend->io_bio); 749 - ioend->io_bio = new; 729 + bio_copy_dev(new, prev);/* also copies over blkcg information */ 730 + new->bi_iter.bi_sector = bio_end_sector(prev); 731 + new->bi_opf = prev->bi_opf; 732 + new->bi_write_hint = prev->bi_write_hint; 733 + 734 + bio_chain(prev, new); 735 + bio_get(prev); /* for xfs_destroy_ioend */ 736 + submit_bio(prev); 737 + return new; 750 738 } 751 739 752 740 /* ··· 780 772 if (wpc->ioend) 781 773 list_add(&wpc->ioend->io_list, iolist); 782 774 wpc->ioend = xfs_alloc_ioend(inode, wpc->fork, 783 - wpc->imap.br_state, offset, bdev, sector); 775 + wpc->imap.br_state, offset, bdev, sector, wbc); 784 776 } 785 777 786 778 merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, ··· 791 783 792 784 if (!merged) { 793 785 if (bio_full(wpc->ioend->io_bio, len)) 794 - xfs_chain_bio(wpc->ioend, wbc, bdev, sector); 786 + wpc->ioend->io_bio = xfs_chain_bio(wpc->ioend->io_bio); 795 787 bio_add_page(wpc->ioend->io_bio, page, len, poff); 796 788 } 797 789 798 790 wpc->ioend->io_size += len; 791 + wbc_account_io(wbc, page, len); 799 792 } 800 793 801 794 STATIC void

-1

fs/xfs/xfs_aops.h

··· 28 28 29 29 int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); 30 30 31 - extern void xfs_count_page_state(struct page *, int *, int *); 32 31 extern struct block_device *xfs_find_bdev_for_inode(struct inode *); 33 32 extern struct dax_device *xfs_find_daxdev_for_inode(struct inode *); 34 33

+1 -6

fs/xfs/xfs_attr_inactive.c

··· 15 15 #include "xfs_da_format.h" 16 16 #include "xfs_da_btree.h" 17 17 #include "xfs_inode.h" 18 - #include "xfs_alloc.h" 19 18 #include "xfs_attr_remote.h" 20 19 #include "xfs_trans.h" 21 - #include "xfs_inode_item.h" 22 20 #include "xfs_bmap.h" 23 21 #include "xfs_attr.h" 24 22 #include "xfs_attr_leaf.h" 25 - #include "xfs_error.h" 26 23 #include "xfs_quota.h" 27 - #include "xfs_trace.h" 28 24 #include "xfs_dir2.h" 29 - #include "xfs_defer.h" 30 25 31 26 /* 32 27 * Look at all the extents for this logical region, ··· 116 121 int size; 117 122 int tmp; 118 123 int i; 119 - struct xfs_mount *mp = bp->b_target->bt_mount; 124 + struct xfs_mount *mp = bp->b_mount; 120 125 121 126 leaf = bp->b_addr; 122 127 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);

+1 -6

fs/xfs/xfs_attr_list.c

··· 6 6 */ 7 7 #include "xfs.h" 8 8 #include "xfs_fs.h" 9 + #include "xfs_shared.h" 9 10 #include "xfs_format.h" 10 11 #include "xfs_log_format.h" 11 12 #include "xfs_trans_resv.h" 12 - #include "xfs_bit.h" 13 13 #include "xfs_mount.h" 14 14 #include "xfs_da_format.h" 15 - #include "xfs_da_btree.h" 16 15 #include "xfs_inode.h" 17 16 #include "xfs_trans.h" 18 - #include "xfs_inode_item.h" 19 17 #include "xfs_bmap.h" 20 18 #include "xfs_attr.h" 21 19 #include "xfs_attr_sf.h" 22 - #include "xfs_attr_remote.h" 23 20 #include "xfs_attr_leaf.h" 24 21 #include "xfs_error.h" 25 22 #include "xfs_trace.h" 26 - #include "xfs_buf_item.h" 27 - #include "xfs_cksum.h" 28 23 #include "xfs_dir2.h" 29 24 30 25 STATIC int

+61

fs/xfs/xfs_bio_io.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (c) 2019 Christoph Hellwig. 4 + */ 5 + #include "xfs.h" 6 + 7 + static inline unsigned int bio_max_vecs(unsigned int count) 8 + { 9 + return min_t(unsigned, howmany(count, PAGE_SIZE), BIO_MAX_PAGES); 10 + } 11 + 12 + int 13 + xfs_rw_bdev( 14 + struct block_device *bdev, 15 + sector_t sector, 16 + unsigned int count, 17 + char *data, 18 + unsigned int op) 19 + 20 + { 21 + unsigned int is_vmalloc = is_vmalloc_addr(data); 22 + unsigned int left = count; 23 + int error; 24 + struct bio *bio; 25 + 26 + if (is_vmalloc && op == REQ_OP_WRITE) 27 + flush_kernel_vmap_range(data, count); 28 + 29 + bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left)); 30 + bio_set_dev(bio, bdev); 31 + bio->bi_iter.bi_sector = sector; 32 + bio->bi_opf = op | REQ_META | REQ_SYNC; 33 + 34 + do { 35 + struct page *page = kmem_to_page(data); 36 + unsigned int off = offset_in_page(data); 37 + unsigned int len = min_t(unsigned, left, PAGE_SIZE - off); 38 + 39 + while (bio_add_page(bio, page, len, off) != len) { 40 + struct bio *prev = bio; 41 + 42 + bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left)); 43 + bio_copy_dev(bio, prev); 44 + bio->bi_iter.bi_sector = bio_end_sector(prev); 45 + bio->bi_opf = prev->bi_opf; 46 + bio_chain(prev, bio); 47 + 48 + submit_bio(prev); 49 + } 50 + 51 + data += len; 52 + left -= len; 53 + } while (left > 0); 54 + 55 + error = submit_bio_wait(bio); 56 + bio_put(bio); 57 + 58 + if (is_vmalloc && op == REQ_OP_READ) 59 + invalidate_kernel_vmap_range(data, count); 60 + return error; 61 + }

+211 -155

fs/xfs/xfs_bmap_item.c

··· 9 9 #include "xfs_log_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_bit.h" 12 + #include "xfs_shared.h" 12 13 #include "xfs_mount.h" 13 14 #include "xfs_defer.h" 14 15 #include "xfs_inode.h" 15 16 #include "xfs_trans.h" 16 17 #include "xfs_trans_priv.h" 17 - #include "xfs_buf_item.h" 18 18 #include "xfs_bmap_item.h" 19 19 #include "xfs_log.h" 20 20 #include "xfs_bmap.h" 21 21 #include "xfs_icache.h" 22 - #include "xfs_trace.h" 23 22 #include "xfs_bmap_btree.h" 24 23 #include "xfs_trans_space.h" 25 24 ··· 95 96 } 96 97 97 98 /* 98 - * Pinning has no meaning for an bui item, so just return. 99 - */ 100 - STATIC void 101 - xfs_bui_item_pin( 102 - struct xfs_log_item *lip) 103 - { 104 - } 105 - 106 - /* 107 99 * The unpin operation is the last place an BUI is manipulated in the log. It is 108 100 * either inserted in the AIL or aborted in the event of a log I/O error. In 109 101 * either case, the BUI transaction has been successfully committed to make it ··· 113 123 } 114 124 115 125 /* 116 - * BUI items have no locking or pushing. However, since BUIs are pulled from 117 - * the AIL when their corresponding BUDs are committed to disk, their situation 118 - * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller 119 - * will eventually flush the log. This should help in getting the BUI out of 120 - * the AIL. 121 - */ 122 - STATIC uint 123 - xfs_bui_item_push( 124 - struct xfs_log_item *lip, 125 - struct list_head *buffer_list) 126 - { 127 - return XFS_ITEM_PINNED; 128 - } 129 - 130 - /* 131 126 * The BUI has been either committed or aborted if the transaction has been 132 127 * cancelled. If the transaction was cancelled, an BUD isn't going to be 133 128 * constructed and thus we free the BUI here directly. 134 129 */ 135 130 STATIC void 136 - xfs_bui_item_unlock( 131 + xfs_bui_item_release( 137 132 struct xfs_log_item *lip) 138 133 { 139 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) 140 - xfs_bui_release(BUI_ITEM(lip)); 134 + xfs_bui_release(BUI_ITEM(lip)); 141 135 } 142 136 143 - /* 144 - * The BUI is logged only once and cannot be moved in the log, so simply return 145 - * the lsn at which it's been logged. 146 - */ 147 - STATIC xfs_lsn_t 148 - xfs_bui_item_committed( 149 - struct xfs_log_item *lip, 150 - xfs_lsn_t lsn) 151 - { 152 - return lsn; 153 - } 154 - 155 - /* 156 - * The BUI dependency tracking op doesn't do squat. It can't because 157 - * it doesn't know where the free extent is coming from. The dependency 158 - * tracking has to be handled by the "enclosing" metadata object. For 159 - * example, for inodes, the inode is locked throughout the extent freeing 160 - * so the dependency should be recorded there. 161 - */ 162 - STATIC void 163 - xfs_bui_item_committing( 164 - struct xfs_log_item *lip, 165 - xfs_lsn_t lsn) 166 - { 167 - } 168 - 169 - /* 170 - * This is the ops vector shared by all bui log items. 171 - */ 172 137 static const struct xfs_item_ops xfs_bui_item_ops = { 173 138 .iop_size = xfs_bui_item_size, 174 139 .iop_format = xfs_bui_item_format, 175 - .iop_pin = xfs_bui_item_pin, 176 140 .iop_unpin = xfs_bui_item_unpin, 177 - .iop_unlock = xfs_bui_item_unlock, 178 - .iop_committed = xfs_bui_item_committed, 179 - .iop_push = xfs_bui_item_push, 180 - .iop_committing = xfs_bui_item_committing, 141 + .iop_release = xfs_bui_item_release, 181 142 }; 182 143 183 144 /* ··· 190 249 } 191 250 192 251 /* 193 - * Pinning has no meaning for an bud item, so just return. 194 - */ 195 - STATIC void 196 - xfs_bud_item_pin( 197 - struct xfs_log_item *lip) 198 - { 199 - } 200 - 201 - /* 202 - * Since pinning has no meaning for an bud item, unpinning does 203 - * not either. 204 - */ 205 - STATIC void 206 - xfs_bud_item_unpin( 207 - struct xfs_log_item *lip, 208 - int remove) 209 - { 210 - } 211 - 212 - /* 213 - * There isn't much you can do to push on an bud item. It is simply stuck 214 - * waiting for the log to be flushed to disk. 215 - */ 216 - STATIC uint 217 - xfs_bud_item_push( 218 - struct xfs_log_item *lip, 219 - struct list_head *buffer_list) 220 - { 221 - return XFS_ITEM_PINNED; 222 - } 223 - 224 - /* 225 252 * The BUD is either committed or aborted if the transaction is cancelled. If 226 253 * the transaction is cancelled, drop our reference to the BUI and free the 227 254 * BUD. 228 255 */ 229 256 STATIC void 230 - xfs_bud_item_unlock( 257 + xfs_bud_item_release( 231 258 struct xfs_log_item *lip) 232 259 { 233 260 struct xfs_bud_log_item *budp = BUD_ITEM(lip); 234 261 235 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { 236 - xfs_bui_release(budp->bud_buip); 237 - kmem_zone_free(xfs_bud_zone, budp); 238 - } 239 - } 240 - 241 - /* 242 - * When the bud item is committed to disk, all we need to do is delete our 243 - * reference to our partner bui item and then free ourselves. Since we're 244 - * freeing ourselves we must return -1 to keep the transaction code from 245 - * further referencing this item. 246 - */ 247 - STATIC xfs_lsn_t 248 - xfs_bud_item_committed( 249 - struct xfs_log_item *lip, 250 - xfs_lsn_t lsn) 251 - { 252 - struct xfs_bud_log_item *budp = BUD_ITEM(lip); 253 - 254 - /* 255 - * Drop the BUI reference regardless of whether the BUD has been 256 - * aborted. Once the BUD transaction is constructed, it is the sole 257 - * responsibility of the BUD to release the BUI (even if the BUI is 258 - * aborted due to log I/O error). 259 - */ 260 262 xfs_bui_release(budp->bud_buip); 261 263 kmem_zone_free(xfs_bud_zone, budp); 262 - 263 - return (xfs_lsn_t)-1; 264 264 } 265 265 266 - /* 267 - * The BUD dependency tracking op doesn't do squat. It can't because 268 - * it doesn't know where the free extent is coming from. The dependency 269 - * tracking has to be handled by the "enclosing" metadata object. For 270 - * example, for inodes, the inode is locked throughout the extent freeing 271 - * so the dependency should be recorded there. 272 - */ 273 - STATIC void 274 - xfs_bud_item_committing( 275 - struct xfs_log_item *lip, 276 - xfs_lsn_t lsn) 277 - { 278 - } 279 - 280 - /* 281 - * This is the ops vector shared by all bud log items. 282 - */ 283 266 static const struct xfs_item_ops xfs_bud_item_ops = { 267 + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, 284 268 .iop_size = xfs_bud_item_size, 285 269 .iop_format = xfs_bud_item_format, 286 - .iop_pin = xfs_bud_item_pin, 287 - .iop_unpin = xfs_bud_item_unpin, 288 - .iop_unlock = xfs_bud_item_unlock, 289 - .iop_committed = xfs_bud_item_committed, 290 - .iop_push = xfs_bud_item_push, 291 - .iop_committing = xfs_bud_item_committing, 270 + .iop_release = xfs_bud_item_release, 292 271 }; 293 272 294 - /* 295 - * Allocate and initialize an bud item with the given number of extents. 296 - */ 297 - struct xfs_bud_log_item * 298 - xfs_bud_init( 299 - struct xfs_mount *mp, 273 + static struct xfs_bud_log_item * 274 + xfs_trans_get_bud( 275 + struct xfs_trans *tp, 300 276 struct xfs_bui_log_item *buip) 301 - 302 277 { 303 - struct xfs_bud_log_item *budp; 278 + struct xfs_bud_log_item *budp; 304 279 305 280 budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP); 306 - xfs_log_item_init(mp, &budp->bud_item, XFS_LI_BUD, &xfs_bud_item_ops); 281 + xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD, 282 + &xfs_bud_item_ops); 307 283 budp->bud_buip = buip; 308 284 budp->bud_format.bud_bui_id = buip->bui_format.bui_id; 309 285 286 + xfs_trans_add_item(tp, &budp->bud_item); 310 287 return budp; 311 288 } 289 + 290 + /* 291 + * Finish an bmap update and log it to the BUD. Note that the 292 + * transaction is marked dirty regardless of whether the bmap update 293 + * succeeds or fails to support the BUI/BUD lifecycle rules. 294 + */ 295 + static int 296 + xfs_trans_log_finish_bmap_update( 297 + struct xfs_trans *tp, 298 + struct xfs_bud_log_item *budp, 299 + enum xfs_bmap_intent_type type, 300 + struct xfs_inode *ip, 301 + int whichfork, 302 + xfs_fileoff_t startoff, 303 + xfs_fsblock_t startblock, 304 + xfs_filblks_t *blockcount, 305 + xfs_exntst_t state) 306 + { 307 + int error; 308 + 309 + error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff, 310 + startblock, blockcount, state); 311 + 312 + /* 313 + * Mark the transaction dirty, even on error. This ensures the 314 + * transaction is aborted, which: 315 + * 316 + * 1.) releases the BUI and frees the BUD 317 + * 2.) shuts down the filesystem 318 + */ 319 + tp->t_flags |= XFS_TRANS_DIRTY; 320 + set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags); 321 + 322 + return error; 323 + } 324 + 325 + /* Sort bmap intents by inode. */ 326 + static int 327 + xfs_bmap_update_diff_items( 328 + void *priv, 329 + struct list_head *a, 330 + struct list_head *b) 331 + { 332 + struct xfs_bmap_intent *ba; 333 + struct xfs_bmap_intent *bb; 334 + 335 + ba = container_of(a, struct xfs_bmap_intent, bi_list); 336 + bb = container_of(b, struct xfs_bmap_intent, bi_list); 337 + return ba->bi_owner->i_ino - bb->bi_owner->i_ino; 338 + } 339 + 340 + /* Get an BUI. */ 341 + STATIC void * 342 + xfs_bmap_update_create_intent( 343 + struct xfs_trans *tp, 344 + unsigned int count) 345 + { 346 + struct xfs_bui_log_item *buip; 347 + 348 + ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS); 349 + ASSERT(tp != NULL); 350 + 351 + buip = xfs_bui_init(tp->t_mountp); 352 + ASSERT(buip != NULL); 353 + 354 + /* 355 + * Get a log_item_desc to point at the new item. 356 + */ 357 + xfs_trans_add_item(tp, &buip->bui_item); 358 + return buip; 359 + } 360 + 361 + /* Set the map extent flags for this mapping. */ 362 + static void 363 + xfs_trans_set_bmap_flags( 364 + struct xfs_map_extent *bmap, 365 + enum xfs_bmap_intent_type type, 366 + int whichfork, 367 + xfs_exntst_t state) 368 + { 369 + bmap->me_flags = 0; 370 + switch (type) { 371 + case XFS_BMAP_MAP: 372 + case XFS_BMAP_UNMAP: 373 + bmap->me_flags = type; 374 + break; 375 + default: 376 + ASSERT(0); 377 + } 378 + if (state == XFS_EXT_UNWRITTEN) 379 + bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN; 380 + if (whichfork == XFS_ATTR_FORK) 381 + bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK; 382 + } 383 + 384 + /* Log bmap updates in the intent item. */ 385 + STATIC void 386 + xfs_bmap_update_log_item( 387 + struct xfs_trans *tp, 388 + void *intent, 389 + struct list_head *item) 390 + { 391 + struct xfs_bui_log_item *buip = intent; 392 + struct xfs_bmap_intent *bmap; 393 + uint next_extent; 394 + struct xfs_map_extent *map; 395 + 396 + bmap = container_of(item, struct xfs_bmap_intent, bi_list); 397 + 398 + tp->t_flags |= XFS_TRANS_DIRTY; 399 + set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags); 400 + 401 + /* 402 + * atomic_inc_return gives us the value after the increment; 403 + * we want to use it as an array index so we need to subtract 1 from 404 + * it. 405 + */ 406 + next_extent = atomic_inc_return(&buip->bui_next_extent) - 1; 407 + ASSERT(next_extent < buip->bui_format.bui_nextents); 408 + map = &buip->bui_format.bui_extents[next_extent]; 409 + map->me_owner = bmap->bi_owner->i_ino; 410 + map->me_startblock = bmap->bi_bmap.br_startblock; 411 + map->me_startoff = bmap->bi_bmap.br_startoff; 412 + map->me_len = bmap->bi_bmap.br_blockcount; 413 + xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork, 414 + bmap->bi_bmap.br_state); 415 + } 416 + 417 + /* Get an BUD so we can process all the deferred rmap updates. */ 418 + STATIC void * 419 + xfs_bmap_update_create_done( 420 + struct xfs_trans *tp, 421 + void *intent, 422 + unsigned int count) 423 + { 424 + return xfs_trans_get_bud(tp, intent); 425 + } 426 + 427 + /* Process a deferred rmap update. */ 428 + STATIC int 429 + xfs_bmap_update_finish_item( 430 + struct xfs_trans *tp, 431 + struct list_head *item, 432 + void *done_item, 433 + void **state) 434 + { 435 + struct xfs_bmap_intent *bmap; 436 + xfs_filblks_t count; 437 + int error; 438 + 439 + bmap = container_of(item, struct xfs_bmap_intent, bi_list); 440 + count = bmap->bi_bmap.br_blockcount; 441 + error = xfs_trans_log_finish_bmap_update(tp, done_item, 442 + bmap->bi_type, 443 + bmap->bi_owner, bmap->bi_whichfork, 444 + bmap->bi_bmap.br_startoff, 445 + bmap->bi_bmap.br_startblock, 446 + &count, 447 + bmap->bi_bmap.br_state); 448 + if (!error && count > 0) { 449 + ASSERT(bmap->bi_type == XFS_BMAP_UNMAP); 450 + bmap->bi_bmap.br_blockcount = count; 451 + return -EAGAIN; 452 + } 453 + kmem_free(bmap); 454 + return error; 455 + } 456 + 457 + /* Abort all pending BUIs. */ 458 + STATIC void 459 + xfs_bmap_update_abort_intent( 460 + void *intent) 461 + { 462 + xfs_bui_release(intent); 463 + } 464 + 465 + /* Cancel a deferred rmap update. */ 466 + STATIC void 467 + xfs_bmap_update_cancel_item( 468 + struct list_head *item) 469 + { 470 + struct xfs_bmap_intent *bmap; 471 + 472 + bmap = container_of(item, struct xfs_bmap_intent, bi_list); 473 + kmem_free(bmap); 474 + } 475 + 476 + const struct xfs_defer_op_type xfs_bmap_update_defer_type = { 477 + .max_items = XFS_BUI_MAX_FAST_EXTENTS, 478 + .diff_items = xfs_bmap_update_diff_items, 479 + .create_intent = xfs_bmap_update_create_intent, 480 + .abort_intent = xfs_bmap_update_abort_intent, 481 + .log_item = xfs_bmap_update_log_item, 482 + .create_done = xfs_bmap_update_create_done, 483 + .finish_item = xfs_bmap_update_finish_item, 484 + .cancel_item = xfs_bmap_update_cancel_item, 485 + }; 312 486 313 487 /* 314 488 * Process a bmap update intent item that was recovered from the log.

-2

fs/xfs/xfs_bmap_item.h

··· 75 75 extern struct kmem_zone *xfs_bud_zone; 76 76 77 77 struct xfs_bui_log_item *xfs_bui_init(struct xfs_mount *); 78 - struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *, 79 - struct xfs_bui_log_item *); 80 78 void xfs_bui_item_free(struct xfs_bui_log_item *); 81 79 void xfs_bui_release(struct xfs_bui_log_item *); 82 80 int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip);

+3 -8

fs/xfs/xfs_bmap_util.c

··· 12 12 #include "xfs_trans_resv.h" 13 13 #include "xfs_bit.h" 14 14 #include "xfs_mount.h" 15 - #include "xfs_da_format.h" 16 15 #include "xfs_defer.h" 17 16 #include "xfs_inode.h" 18 17 #include "xfs_btree.h" 19 18 #include "xfs_trans.h" 20 - #include "xfs_extfree_item.h" 21 19 #include "xfs_alloc.h" 22 20 #include "xfs_bmap.h" 23 21 #include "xfs_bmap_util.h" ··· 26 28 #include "xfs_trans_space.h" 27 29 #include "xfs_trace.h" 28 30 #include "xfs_icache.h" 29 - #include "xfs_log.h" 30 - #include "xfs_rmap_btree.h" 31 31 #include "xfs_iomap.h" 32 32 #include "xfs_reflink.h" 33 - #include "xfs_refcount.h" 34 33 35 34 /* Kernel only BMAP related definitions and functions */ 36 35 ··· 271 276 struct xfs_btree_block *block, *nextblock; 272 277 int numrecs; 273 278 274 - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, 279 + error = xfs_btree_read_bufl(mp, tp, bno, &bp, XFS_BMAP_BTREE_REF, 275 280 &xfs_bmbt_buf_ops); 276 281 if (error) 277 282 return error; ··· 282 287 /* Not at node above leaves, count this level of nodes */ 283 288 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 284 289 while (nextbno != NULLFSBLOCK) { 285 - error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, 290 + error = xfs_btree_read_bufl(mp, tp, nextbno, &nbp, 286 291 XFS_BMAP_BTREE_REF, 287 292 &xfs_bmbt_buf_ops); 288 293 if (error) ··· 316 321 if (nextbno == NULLFSBLOCK) 317 322 break; 318 323 bno = nextbno; 319 - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 324 + error = xfs_btree_read_bufl(mp, tp, bno, &bp, 320 325 XFS_BMAP_BTREE_REF, 321 326 &xfs_bmbt_buf_ops); 322 327 if (error)

+24 -147

fs/xfs/xfs_buf.c

··· 4 4 * All Rights Reserved. 5 5 */ 6 6 #include "xfs.h" 7 - #include <linux/stddef.h> 8 - #include <linux/errno.h> 9 - #include <linux/gfp.h> 10 - #include <linux/pagemap.h> 11 - #include <linux/init.h> 12 - #include <linux/vmalloc.h> 13 - #include <linux/bio.h> 14 - #include <linux/sysctl.h> 15 - #include <linux/proc_fs.h> 16 - #include <linux/workqueue.h> 17 - #include <linux/percpu.h> 18 - #include <linux/blkdev.h> 19 - #include <linux/hash.h> 20 - #include <linux/kthread.h> 21 - #include <linux/migrate.h> 22 7 #include <linux/backing-dev.h> 23 - #include <linux/freezer.h> 24 8 9 + #include "xfs_shared.h" 25 10 #include "xfs_format.h" 26 11 #include "xfs_log_format.h" 27 12 #include "xfs_trans_resv.h" ··· 198 213 } 199 214 } 200 215 201 - struct xfs_buf * 216 + static struct xfs_buf * 202 217 _xfs_buf_alloc( 203 218 struct xfs_buftarg *target, 204 219 struct xfs_buf_map *map, ··· 228 243 sema_init(&bp->b_sema, 0); /* held, no waiters */ 229 244 spin_lock_init(&bp->b_lock); 230 245 bp->b_target = target; 246 + bp->b_mount = target->bt_mount; 231 247 bp->b_flags = flags; 232 248 233 249 /* ··· 249 263 bp->b_maps[i].bm_len = map[i].bm_len; 250 264 bp->b_length += map[i].bm_len; 251 265 } 252 - bp->b_io_length = bp->b_length; 253 266 254 267 atomic_set(&bp->b_pin_count, 0); 255 268 init_waitqueue_head(&bp->b_waiters); 256 269 257 - XFS_STATS_INC(target->bt_mount, xb_create); 270 + XFS_STATS_INC(bp->b_mount, xb_create); 258 271 trace_xfs_buf_init(bp, _RET_IP_); 259 272 260 273 return bp; ··· 410 425 current->comm, current->pid, 411 426 __func__, gfp_mask); 412 427 413 - XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries); 428 + XFS_STATS_INC(bp->b_mount, xb_page_retries); 414 429 congestion_wait(BLK_RW_ASYNC, HZ/50); 415 430 goto retry; 416 431 } 417 432 418 - XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found); 433 + XFS_STATS_INC(bp->b_mount, xb_page_found); 419 434 420 435 nbytes = min_t(size_t, size, PAGE_SIZE - offset); 421 436 size -= nbytes; ··· 894 909 return 0; 895 910 } 896 911 897 - /* 898 - * Return a buffer allocated as an empty buffer and associated to external 899 - * memory via xfs_buf_associate_memory() back to it's empty state. 900 - */ 901 - void 902 - xfs_buf_set_empty( 903 - struct xfs_buf *bp, 904 - size_t numblks) 905 - { 906 - if (bp->b_pages) 907 - _xfs_buf_free_pages(bp); 908 - 909 - bp->b_pages = NULL; 910 - bp->b_page_count = 0; 911 - bp->b_addr = NULL; 912 - bp->b_length = numblks; 913 - bp->b_io_length = numblks; 914 - 915 - ASSERT(bp->b_map_count == 1); 916 - bp->b_bn = XFS_BUF_DADDR_NULL; 917 - bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL; 918 - bp->b_maps[0].bm_len = bp->b_length; 919 - } 920 - 921 - static inline struct page * 922 - mem_to_page( 923 - void *addr) 924 - { 925 - if ((!is_vmalloc_addr(addr))) { 926 - return virt_to_page(addr); 927 - } else { 928 - return vmalloc_to_page(addr); 929 - } 930 - } 931 - 932 - int 933 - xfs_buf_associate_memory( 934 - xfs_buf_t *bp, 935 - void *mem, 936 - size_t len) 937 - { 938 - int rval; 939 - int i = 0; 940 - unsigned long pageaddr; 941 - unsigned long offset; 942 - size_t buflen; 943 - int page_count; 944 - 945 - pageaddr = (unsigned long)mem & PAGE_MASK; 946 - offset = (unsigned long)mem - pageaddr; 947 - buflen = PAGE_ALIGN(len + offset); 948 - page_count = buflen >> PAGE_SHIFT; 949 - 950 - /* Free any previous set of page pointers */ 951 - if (bp->b_pages) 952 - _xfs_buf_free_pages(bp); 953 - 954 - bp->b_pages = NULL; 955 - bp->b_addr = mem; 956 - 957 - rval = _xfs_buf_get_pages(bp, page_count); 958 - if (rval) 959 - return rval; 960 - 961 - bp->b_offset = offset; 962 - 963 - for (i = 0; i < bp->b_page_count; i++) { 964 - bp->b_pages[i] = mem_to_page((void *)pageaddr); 965 - pageaddr += PAGE_SIZE; 966 - } 967 - 968 - bp->b_io_length = BTOBB(len); 969 - bp->b_length = BTOBB(buflen); 970 - 971 - return 0; 972 - } 973 - 974 912 xfs_buf_t * 975 913 xfs_buf_get_uncached( 976 914 struct xfs_buftarg *target, ··· 1088 1180 trace_xfs_buf_lock(bp, _RET_IP_); 1089 1181 1090 1182 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 1091 - xfs_log_force(bp->b_target->bt_mount, 0); 1183 + xfs_log_force(bp->b_mount, 0); 1092 1184 down(&bp->b_sema); 1093 1185 1094 1186 trace_xfs_buf_lock_done(bp, _RET_IP_); ··· 1177 1269 struct xfs_buf *bp) 1178 1270 { 1179 1271 INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); 1180 - queue_work(bp->b_ioend_wq, &bp->b_ioend_work); 1272 + queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work); 1181 1273 } 1182 1274 1183 1275 void ··· 1196 1288 struct xfs_buf *bp, 1197 1289 const char *func) 1198 1290 { 1199 - xfs_alert(bp->b_target->bt_mount, 1291 + xfs_alert(bp->b_mount, 1200 1292 "metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d", 1201 1293 func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, 1202 1294 -bp->b_error); ··· 1215 1307 XBF_WRITE_FAIL | XBF_DONE); 1216 1308 1217 1309 error = xfs_buf_submit(bp); 1218 - if (error) { 1219 - xfs_force_shutdown(bp->b_target->bt_mount, 1220 - SHUTDOWN_META_IO_ERROR); 1221 - } 1310 + if (error) 1311 + xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); 1222 1312 return error; 1223 1313 } 1224 1314 ··· 1342 1436 */ 1343 1437 bp->b_error = 0; 1344 1438 1345 - /* 1346 - * Initialize the I/O completion workqueue if we haven't yet or the 1347 - * submitter has not opted to specify a custom one. 1348 - */ 1349 - if (!bp->b_ioend_wq) 1350 - bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue; 1351 - 1352 1439 if (bp->b_flags & XBF_WRITE) { 1353 1440 op = REQ_OP_WRITE; 1354 - if (bp->b_flags & XBF_SYNCIO) 1355 - op_flags = REQ_SYNC; 1356 - if (bp->b_flags & XBF_FUA) 1357 - op_flags |= REQ_FUA; 1358 - if (bp->b_flags & XBF_FLUSH) 1359 - op_flags |= REQ_PREFLUSH; 1360 1441 1361 1442 /* 1362 1443 * Run the write verifier callback function if it exists. If ··· 1353 1460 if (bp->b_ops) { 1354 1461 bp->b_ops->verify_write(bp); 1355 1462 if (bp->b_error) { 1356 - xfs_force_shutdown(bp->b_target->bt_mount, 1463 + xfs_force_shutdown(bp->b_mount, 1357 1464 SHUTDOWN_CORRUPT_INCORE); 1358 1465 return; 1359 1466 } 1360 1467 } else if (bp->b_bn != XFS_BUF_DADDR_NULL) { 1361 - struct xfs_mount *mp = bp->b_target->bt_mount; 1468 + struct xfs_mount *mp = bp->b_mount; 1362 1469 1363 1470 /* 1364 1471 * non-crc filesystems don't attach verifiers during ··· 1390 1497 * subsequent call. 1391 1498 */ 1392 1499 offset = bp->b_offset; 1393 - size = BBTOB(bp->b_io_length); 1500 + size = BBTOB(bp->b_length); 1394 1501 blk_start_plug(&plug); 1395 1502 for (i = 0; i < bp->b_map_count; i++) { 1396 1503 xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags); ··· 1436 1543 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); 1437 1544 1438 1545 /* on shutdown we stale and complete the buffer immediately */ 1439 - if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { 1546 + if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { 1440 1547 xfs_buf_ioerror(bp, -EIO); 1441 1548 bp->b_flags &= ~XBF_DONE; 1442 1549 xfs_buf_stale(bp); ··· 1506 1613 return page_address(page) + (offset & (PAGE_SIZE-1)); 1507 1614 } 1508 1615 1509 - /* 1510 - * Move data into or out of a buffer. 1511 - */ 1512 1616 void 1513 - xfs_buf_iomove( 1514 - xfs_buf_t *bp, /* buffer to process */ 1515 - size_t boff, /* starting buffer offset */ 1516 - size_t bsize, /* length to copy */ 1517 - void *data, /* data address */ 1518 - xfs_buf_rw_t mode) /* read/write/zero flag */ 1617 + xfs_buf_zero( 1618 + struct xfs_buf *bp, 1619 + size_t boff, 1620 + size_t bsize) 1519 1621 { 1520 1622 size_t bend; 1521 1623 ··· 1523 1635 page_offset = (boff + bp->b_offset) & ~PAGE_MASK; 1524 1636 page = bp->b_pages[page_index]; 1525 1637 csize = min_t(size_t, PAGE_SIZE - page_offset, 1526 - BBTOB(bp->b_io_length) - boff); 1638 + BBTOB(bp->b_length) - boff); 1527 1639 1528 1640 ASSERT((csize + page_offset) <= PAGE_SIZE); 1529 1641 1530 - switch (mode) { 1531 - case XBRW_ZERO: 1532 - memset(page_address(page) + page_offset, 0, csize); 1533 - break; 1534 - case XBRW_READ: 1535 - memcpy(data, page_address(page) + page_offset, csize); 1536 - break; 1537 - case XBRW_WRITE: 1538 - memcpy(page_address(page) + page_offset, data, csize); 1539 - } 1642 + memset(page_address(page) + page_offset, 0, csize); 1540 1643 1541 1644 boff += csize; 1542 - data += csize; 1543 1645 } 1544 1646 } 1545 1647 ··· 2076 2198 * This allows userspace to disrupt buffer caching for debug/testing 2077 2199 * purposes. 2078 2200 */ 2079 - if (XFS_TEST_ERROR(false, bp->b_target->bt_mount, 2080 - XFS_ERRTAG_BUF_LRU_REF)) 2201 + if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF)) 2081 2202 lru_ref = 0; 2082 2203 2083 2204 atomic_set(&bp->b_lru_ref, lru_ref); ··· 2092 2215 struct xfs_buf *bp, 2093 2216 __be32 dmagic) 2094 2217 { 2095 - struct xfs_mount *mp = bp->b_target->bt_mount; 2218 + struct xfs_mount *mp = bp->b_mount; 2096 2219 int idx; 2097 2220 2098 2221 idx = xfs_sb_version_hascrc(&mp->m_sb); ··· 2110 2233 struct xfs_buf *bp, 2111 2234 __be16 dmagic) 2112 2235 { 2113 - struct xfs_mount *mp = bp->b_target->bt_mount; 2236 + struct xfs_mount *mp = bp->b_mount; 2114 2237 int idx; 2115 2238 2116 2239 idx = xfs_sb_version_hascrc(&mp->m_sb);

+7 -46

fs/xfs/xfs_buf.h

··· 21 21 22 22 #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) 23 23 24 - typedef enum { 25 - XBRW_READ = 1, /* transfer into target memory */ 26 - XBRW_WRITE = 2, /* transfer from target memory */ 27 - XBRW_ZERO = 3, /* Zero target memory */ 28 - } xfs_buf_rw_t; 29 - 30 24 #define XBF_READ (1 << 0) /* buffer intended for reading from device */ 31 25 #define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ 32 26 #define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ ··· 28 34 #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ 29 35 #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ 30 36 #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ 31 - #define XBF_WRITE_FAIL (1 << 24)/* async writes have failed on this buffer */ 32 - 33 - /* I/O hints for the BIO layer */ 34 - #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ 35 - #define XBF_FUA (1 << 11)/* force cache write through mode */ 36 - #define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ 37 + #define XBF_WRITE_FAIL (1 << 7) /* async writes have failed on this buffer */ 37 38 38 39 /* flags used only as arguments to access routines */ 39 40 #define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ ··· 38 49 #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ 39 50 #define _XBF_KMEM (1 << 21)/* backed by heap memory */ 40 51 #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ 41 - #define _XBF_COMPOUND (1 << 23)/* compound buffer */ 42 52 43 53 typedef unsigned int xfs_buf_flags_t; 44 54 ··· 50 62 { XBF_DONE, "DONE" }, \ 51 63 { XBF_STALE, "STALE" }, \ 52 64 { XBF_WRITE_FAIL, "WRITE_FAIL" }, \ 53 - { XBF_SYNCIO, "SYNCIO" }, \ 54 - { XBF_FUA, "FUA" }, \ 55 - { XBF_FLUSH, "FLUSH" }, \ 56 65 { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ 57 66 { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ 58 67 { _XBF_PAGES, "PAGES" }, \ 59 68 { _XBF_KMEM, "KMEM" }, \ 60 - { _XBF_DELWRI_Q, "DELWRI_Q" }, \ 61 - { _XBF_COMPOUND, "COMPOUND" } 69 + { _XBF_DELWRI_Q, "DELWRI_Q" } 62 70 63 71 64 72 /* ··· 145 161 wait_queue_head_t b_waiters; /* unpin waiters */ 146 162 struct list_head b_list; 147 163 struct xfs_perag *b_pag; /* contains rbtree root */ 164 + struct xfs_mount *b_mount; 148 165 xfs_buftarg_t *b_target; /* buffer target (device) */ 149 166 void *b_addr; /* virtual address of buffer */ 150 167 struct work_struct b_ioend_work; 151 - struct workqueue_struct *b_ioend_wq; /* I/O completion wq */ 152 168 xfs_buf_iodone_t b_iodone; /* I/O completion function */ 153 169 struct completion b_iowait; /* queue for I/O waiters */ 154 - void *b_log_item; 170 + struct xfs_buf_log_item *b_log_item; 155 171 struct list_head b_li_list; /* Log items list head */ 156 172 struct xfs_trans *b_transp; 157 173 struct page **b_pages; /* array of page pointers */ ··· 159 175 struct xfs_buf_map *b_maps; /* compound buffer map */ 160 176 struct xfs_buf_map __b_map; /* inline compound buffer map */ 161 177 int b_map_count; 162 - int b_io_length; /* IO size in BBs */ 163 178 atomic_t b_pin_count; /* pin count */ 164 179 atomic_t b_io_remaining; /* #outstanding I/O requests */ 165 180 unsigned int b_page_count; /* size of page array */ ··· 192 209 xfs_daddr_t blkno, size_t numblks, 193 210 xfs_buf_flags_t flags); 194 211 195 - struct xfs_buf *_xfs_buf_alloc(struct xfs_buftarg *target, 196 - struct xfs_buf_map *map, int nmaps, 197 - xfs_buf_flags_t flags); 198 - 199 - static inline struct xfs_buf * 200 - xfs_buf_alloc( 201 - struct xfs_buftarg *target, 202 - xfs_daddr_t blkno, 203 - size_t numblks, 204 - xfs_buf_flags_t flags) 205 - { 206 - DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); 207 - return _xfs_buf_alloc(target, &map, 1, flags); 208 - } 209 - 210 212 struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target, 211 213 struct xfs_buf_map *map, int nmaps, 212 214 xfs_buf_flags_t flags); ··· 207 239 xfs_buf_get( 208 240 struct xfs_buftarg *target, 209 241 xfs_daddr_t blkno, 210 - size_t numblks, 211 - xfs_buf_flags_t flags) 242 + size_t numblks) 212 243 { 213 244 DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); 214 - return xfs_buf_get_map(target, &map, 1, flags); 245 + return xfs_buf_get_map(target, &map, 1, 0); 215 246 } 216 247 217 248 static inline struct xfs_buf * ··· 235 268 DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); 236 269 return xfs_buf_readahead_map(target, &map, 1, ops); 237 270 } 238 - 239 - void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks); 240 - int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length); 241 271 242 272 struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, 243 273 int flags); ··· 269 305 return __xfs_buf_submit(bp, wait); 270 306 } 271 307 272 - extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, 273 - xfs_buf_rw_t); 274 - #define xfs_buf_zero(bp, off, len) \ 275 - xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) 308 + void xfs_buf_zero(struct xfs_buf *bp, size_t boff, size_t bsize); 276 309 277 310 /* Buffer Utility Routines */ 278 311 extern void *xfs_buf_offset(struct xfs_buf *, size_t);

+18 -22

fs/xfs/xfs_buf_item.c

··· 5 5 */ 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 11 #include "xfs_trans_resv.h" 11 12 #include "xfs_bit.h" 12 - #include "xfs_sb.h" 13 13 #include "xfs_mount.h" 14 14 #include "xfs_trans.h" 15 15 #include "xfs_buf_item.h" 16 16 #include "xfs_trans_priv.h" 17 - #include "xfs_error.h" 18 17 #include "xfs_trace.h" 19 18 #include "xfs_log.h" 20 - #include "xfs_inode.h" 21 19 22 20 23 21 kmem_zone_t *xfs_buf_item_zone; ··· 518 520 /* has a previous flush failed due to IO errors? */ 519 521 if ((bp->b_flags & XBF_WRITE_FAIL) && 520 522 ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) { 521 - xfs_warn(bp->b_target->bt_mount, 523 + xfs_warn(bp->b_mount, 522 524 "Failing async write on buffer block 0x%llx. Retrying async write.", 523 525 (long long)bp->b_bn); 524 526 } ··· 592 594 * free the item. 593 595 */ 594 596 STATIC void 595 - xfs_buf_item_unlock( 597 + xfs_buf_item_release( 596 598 struct xfs_log_item *lip) 597 599 { 598 600 struct xfs_buf_log_item *bip = BUF_ITEM(lip); ··· 607 609 &lip->li_flags); 608 610 #endif 609 611 610 - trace_xfs_buf_item_unlock(bip); 612 + trace_xfs_buf_item_release(bip); 611 613 612 614 /* 613 615 * The bli dirty state should match whether the blf has logged segments ··· 635 637 return; 636 638 ASSERT(!stale || aborted); 637 639 xfs_buf_relse(bp); 640 + } 641 + 642 + STATIC void 643 + xfs_buf_item_committing( 644 + struct xfs_log_item *lip, 645 + xfs_lsn_t commit_lsn) 646 + { 647 + return xfs_buf_item_release(lip); 638 648 } 639 649 640 650 /* ··· 677 671 return lsn; 678 672 } 679 673 680 - STATIC void 681 - xfs_buf_item_committing( 682 - struct xfs_log_item *lip, 683 - xfs_lsn_t commit_lsn) 684 - { 685 - } 686 - 687 - /* 688 - * This is the ops vector shared by all buf log items. 689 - */ 690 674 static const struct xfs_item_ops xfs_buf_item_ops = { 691 675 .iop_size = xfs_buf_item_size, 692 676 .iop_format = xfs_buf_item_format, 693 677 .iop_pin = xfs_buf_item_pin, 694 678 .iop_unpin = xfs_buf_item_unpin, 695 - .iop_unlock = xfs_buf_item_unlock, 679 + .iop_release = xfs_buf_item_release, 680 + .iop_committing = xfs_buf_item_committing, 696 681 .iop_committed = xfs_buf_item_committed, 697 682 .iop_push = xfs_buf_item_push, 698 - .iop_committing = xfs_buf_item_committing 699 683 }; 700 684 701 685 STATIC int ··· 739 743 * this buffer. If we do already have one, there is 740 744 * nothing to do here so return. 741 745 */ 742 - ASSERT(bp->b_target->bt_mount == mp); 746 + ASSERT(bp->b_mount == mp); 743 747 if (bip) { 744 748 ASSERT(bip->bli_item.li_type == XFS_LI_BUF); 745 749 ASSERT(!bp->b_transp); ··· 976 980 */ 977 981 void 978 982 xfs_buf_attach_iodone( 979 - xfs_buf_t *bp, 980 - void (*cb)(xfs_buf_t *, xfs_log_item_t *), 981 - xfs_log_item_t *lip) 983 + struct xfs_buf *bp, 984 + void (*cb)(struct xfs_buf *, struct xfs_log_item *), 985 + struct xfs_log_item *lip) 982 986 { 983 987 ASSERT(xfs_buf_islocked(bp)); 984 988

+3 -3

fs/xfs/xfs_buf_item.h

··· 39 39 * locked, and which 128 byte chunks of the buffer are dirty. 40 40 */ 41 41 struct xfs_buf_log_item { 42 - xfs_log_item_t bli_item; /* common item structure */ 42 + struct xfs_log_item bli_item; /* common item structure */ 43 43 struct xfs_buf *bli_buf; /* real buffer pointer */ 44 44 unsigned int bli_flags; /* misc flags */ 45 45 unsigned int bli_recur; /* lock recursion count */ ··· 55 55 void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); 56 56 bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); 57 57 void xfs_buf_attach_iodone(struct xfs_buf *, 58 - void(*)(struct xfs_buf *, xfs_log_item_t *), 59 - xfs_log_item_t *); 58 + void(*)(struct xfs_buf *, struct xfs_log_item *), 59 + struct xfs_log_item *); 60 60 void xfs_buf_iodone_callbacks(struct xfs_buf *); 61 61 void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); 62 62 bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *,

+1 -4

fs/xfs/xfs_dir2_readdir.c

··· 6 6 */ 7 7 #include "xfs.h" 8 8 #include "xfs_fs.h" 9 + #include "xfs_shared.h" 9 10 #include "xfs_format.h" 10 11 #include "xfs_log_format.h" 11 12 #include "xfs_trans_resv.h" 12 - #include "xfs_bit.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_da_format.h" 15 - #include "xfs_da_btree.h" 16 14 #include "xfs_inode.h" 17 15 #include "xfs_dir2.h" 18 16 #include "xfs_dir2_priv.h" 19 - #include "xfs_error.h" 20 17 #include "xfs_trace.h" 21 18 #include "xfs_bmap.h" 22 19 #include "xfs_trans.h"

+1 -3

fs/xfs/xfs_discard.c

··· 4 4 * All Rights Reserved. 5 5 */ 6 6 #include "xfs.h" 7 + #include "xfs_shared.h" 7 8 #include "xfs_format.h" 8 9 #include "xfs_log_format.h" 9 10 #include "xfs_trans_resv.h" 10 11 #include "xfs_sb.h" 11 12 #include "xfs_mount.h" 12 - #include "xfs_quota.h" 13 - #include "xfs_inode.h" 14 13 #include "xfs_btree.h" 15 14 #include "xfs_alloc_btree.h" 16 15 #include "xfs_alloc.h" 17 16 #include "xfs_error.h" 18 17 #include "xfs_extent_busy.h" 19 - #include "xfs_discard.h" 20 18 #include "xfs_trace.h" 21 19 #include "xfs_log.h" 22 20

+1 -5

fs/xfs/xfs_dquot.c

··· 14 14 #include "xfs_defer.h" 15 15 #include "xfs_inode.h" 16 16 #include "xfs_bmap.h" 17 - #include "xfs_bmap_util.h" 18 - #include "xfs_alloc.h" 19 17 #include "xfs_quota.h" 20 - #include "xfs_error.h" 21 18 #include "xfs_trans.h" 22 19 #include "xfs_buf_item.h" 23 20 #include "xfs_trans_space.h" 24 21 #include "xfs_trans_priv.h" 25 22 #include "xfs_qm.h" 26 - #include "xfs_cksum.h" 27 23 #include "xfs_trace.h" 28 24 #include "xfs_log.h" 29 25 #include "xfs_bmap_btree.h" ··· 1239 1243 /* 1240 1244 * Iterate every dquot of a particular type. The caller must ensure that the 1241 1245 * particular quota type is active. iter_fn can return negative error codes, 1242 - * or XFS_BTREE_QUERY_RANGE_ABORT to indicate that it wants to stop iterating. 1246 + * or XFS_ITER_ABORT to indicate that it wants to stop iterating. 1243 1247 */ 1244 1248 int 1245 1249 xfs_qm_dqiterate(

-1

fs/xfs/xfs_dquot.h

··· 34 34 uint dq_flags; /* various flags (XFS_DQ_*) */ 35 35 struct list_head q_lru; /* global free list of dquots */ 36 36 struct xfs_mount*q_mount; /* filesystem this relates to */ 37 - struct xfs_trans*q_transp; /* trans this belongs to currently */ 38 37 uint q_nrefs; /* # active refs from inodes */ 39 38 xfs_daddr_t q_blkno; /* blkno of dquot buffer */ 40 39 int q_bufoffset; /* off of dq in buffer (# dquots) */

+6 -112

fs/xfs/xfs_dquot_item.c

··· 5 5 */ 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 11 #include "xfs_trans_resv.h" 11 12 #include "xfs_mount.h" 12 13 #include "xfs_inode.h" 13 14 #include "xfs_quota.h" 14 - #include "xfs_error.h" 15 15 #include "xfs_trans.h" 16 16 #include "xfs_buf_item.h" 17 17 #include "xfs_trans_priv.h" ··· 92 92 ASSERT(atomic_read(&dqp->q_pincount) > 0); 93 93 if (atomic_dec_and_test(&dqp->q_pincount)) 94 94 wake_up(&dqp->q_pinwait); 95 - } 96 - 97 - STATIC xfs_lsn_t 98 - xfs_qm_dquot_logitem_committed( 99 - struct xfs_log_item *lip, 100 - xfs_lsn_t lsn) 101 - { 102 - /* 103 - * We always re-log the entire dquot when it becomes dirty, 104 - * so, the latest copy _is_ the only one that matters. 105 - */ 106 - return lsn; 107 95 } 108 96 109 97 /* ··· 197 209 return rval; 198 210 } 199 211 200 - /* 201 - * Unlock the dquot associated with the log item. 202 - * Clear the fields of the dquot and dquot log item that 203 - * are specific to the current transaction. If the 204 - * hold flags is set, do not unlock the dquot. 205 - */ 206 212 STATIC void 207 - xfs_qm_dquot_logitem_unlock( 213 + xfs_qm_dquot_logitem_release( 208 214 struct xfs_log_item *lip) 209 215 { 210 216 struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; 211 217 212 218 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 213 - 214 - /* 215 - * Clear the transaction pointer in the dquot 216 - */ 217 - dqp->q_transp = NULL; 218 219 219 220 /* 220 221 * dquots are never 'held' from getting unlocked at the end of ··· 214 237 xfs_dqunlock(dqp); 215 238 } 216 239 217 - /* 218 - * this needs to stamp an lsn into the dquot, I think. 219 - * rpc's that look at user dquot's would then have to 220 - * push on the dependency recorded in the dquot 221 - */ 222 240 STATIC void 223 241 xfs_qm_dquot_logitem_committing( 224 242 struct xfs_log_item *lip, 225 - xfs_lsn_t lsn) 243 + xfs_lsn_t commit_lsn) 226 244 { 245 + return xfs_qm_dquot_logitem_release(lip); 227 246 } 228 247 229 - /* 230 - * This is the ops vector for dquots 231 - */ 232 248 static const struct xfs_item_ops xfs_dquot_item_ops = { 233 249 .iop_size = xfs_qm_dquot_logitem_size, 234 250 .iop_format = xfs_qm_dquot_logitem_format, 235 251 .iop_pin = xfs_qm_dquot_logitem_pin, 236 252 .iop_unpin = xfs_qm_dquot_logitem_unpin, 237 - .iop_unlock = xfs_qm_dquot_logitem_unlock, 238 - .iop_committed = xfs_qm_dquot_logitem_committed, 253 + .iop_release = xfs_qm_dquot_logitem_release, 254 + .iop_committing = xfs_qm_dquot_logitem_committing, 239 255 .iop_push = xfs_qm_dquot_logitem_push, 240 - .iop_committing = xfs_qm_dquot_logitem_committing, 241 256 .iop_error = xfs_dquot_item_error 242 257 }; 243 258 ··· 289 320 } 290 321 291 322 /* 292 - * Pinning has no meaning for an quotaoff item, so just return. 293 - */ 294 - STATIC void 295 - xfs_qm_qoff_logitem_pin( 296 - struct xfs_log_item *lip) 297 - { 298 - } 299 - 300 - /* 301 - * Since pinning has no meaning for an quotaoff item, unpinning does 302 - * not either. 303 - */ 304 - STATIC void 305 - xfs_qm_qoff_logitem_unpin( 306 - struct xfs_log_item *lip, 307 - int remove) 308 - { 309 - } 310 - 311 - /* 312 323 * There isn't much you can do to push a quotaoff item. It is simply 313 324 * stuck waiting for the log to be flushed to disk. 314 325 */ ··· 298 349 struct list_head *buffer_list) 299 350 { 300 351 return XFS_ITEM_LOCKED; 301 - } 302 - 303 - /* 304 - * Quotaoff items have no locking or pushing, so return failure 305 - * so that the caller doesn't bother with us. 306 - */ 307 - STATIC void 308 - xfs_qm_qoff_logitem_unlock( 309 - struct xfs_log_item *lip) 310 - { 311 - } 312 - 313 - /* 314 - * The quotaoff-start-item is logged only once and cannot be moved in the log, 315 - * so simply return the lsn at which it's been logged. 316 - */ 317 - STATIC xfs_lsn_t 318 - xfs_qm_qoff_logitem_committed( 319 - struct xfs_log_item *lip, 320 - xfs_lsn_t lsn) 321 - { 322 - return lsn; 323 352 } 324 353 325 354 STATIC xfs_lsn_t ··· 323 396 return (xfs_lsn_t)-1; 324 397 } 325 398 326 - /* 327 - * XXX rcc - don't know quite what to do with this. I think we can 328 - * just ignore it. The only time that isn't the case is if we allow 329 - * the client to somehow see that quotas have been turned off in which 330 - * we can't allow that to get back until the quotaoff hits the disk. 331 - * So how would that happen? Also, do we need different routines for 332 - * quotaoff start and quotaoff end? I suspect the answer is yes but 333 - * to be sure, I need to look at the recovery code and see how quota off 334 - * recovery is handled (do we roll forward or back or do something else). 335 - * If we roll forwards or backwards, then we need two separate routines, 336 - * one that does nothing and one that stamps in the lsn that matters 337 - * (truly makes the quotaoff irrevocable). If we do something else, 338 - * then maybe we don't need two. 339 - */ 340 - STATIC void 341 - xfs_qm_qoff_logitem_committing( 342 - struct xfs_log_item *lip, 343 - xfs_lsn_t commit_lsn) 344 - { 345 - } 346 - 347 399 static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { 348 400 .iop_size = xfs_qm_qoff_logitem_size, 349 401 .iop_format = xfs_qm_qoff_logitem_format, 350 - .iop_pin = xfs_qm_qoff_logitem_pin, 351 - .iop_unpin = xfs_qm_qoff_logitem_unpin, 352 - .iop_unlock = xfs_qm_qoff_logitem_unlock, 353 402 .iop_committed = xfs_qm_qoffend_logitem_committed, 354 403 .iop_push = xfs_qm_qoff_logitem_push, 355 - .iop_committing = xfs_qm_qoff_logitem_committing 356 404 }; 357 405 358 - /* 359 - * This is the ops vector shared by all quotaoff-start log items. 360 - */ 361 406 static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = { 362 407 .iop_size = xfs_qm_qoff_logitem_size, 363 408 .iop_format = xfs_qm_qoff_logitem_format, 364 - .iop_pin = xfs_qm_qoff_logitem_pin, 365 - .iop_unpin = xfs_qm_qoff_logitem_unpin, 366 - .iop_unlock = xfs_qm_qoff_logitem_unlock, 367 - .iop_committed = xfs_qm_qoff_logitem_committed, 368 409 .iop_push = xfs_qm_qoff_logitem_push, 369 - .iop_committing = xfs_qm_qoff_logitem_committing 370 410 }; 371 411 372 412 /*

+2 -2

fs/xfs/xfs_dquot_item.h

··· 12 12 struct xfs_qoff_logitem; 13 13 14 14 typedef struct xfs_dq_logitem { 15 - xfs_log_item_t qli_item; /* common portion */ 15 + struct xfs_log_item qli_item; /* common portion */ 16 16 struct xfs_dquot *qli_dquot; /* dquot ptr */ 17 17 xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ 18 18 } xfs_dq_logitem_t; 19 19 20 20 typedef struct xfs_qoff_logitem { 21 - xfs_log_item_t qql_item; /* common portion */ 21 + struct xfs_log_item qql_item; /* common portion */ 22 22 struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ 23 23 unsigned int qql_flags; 24 24 } xfs_qoff_logitem_t;

+2 -1

fs/xfs/xfs_error.c

··· 4 4 * All Rights Reserved. 5 5 */ 6 6 #include "xfs.h" 7 + #include "xfs_shared.h" 7 8 #include "xfs_format.h" 8 9 #include "xfs_fs.h" 9 10 #include "xfs_log_format.h" ··· 354 353 size_t bufsz, 355 354 xfs_failaddr_t failaddr) 356 355 { 357 - struct xfs_mount *mp = bp->b_target->bt_mount; 356 + struct xfs_mount *mp = bp->b_mount; 358 357 xfs_failaddr_t fa; 359 358 int sz; 360 359

+1 -3

fs/xfs/xfs_export.c

··· 4 4 * All Rights Reserved. 5 5 */ 6 6 #include "xfs.h" 7 + #include "xfs_shared.h" 7 8 #include "xfs_format.h" 8 9 #include "xfs_log_format.h" 9 10 #include "xfs_trans_resv.h" 10 11 #include "xfs_mount.h" 11 - #include "xfs_da_format.h" 12 - #include "xfs_da_btree.h" 13 12 #include "xfs_dir2.h" 14 13 #include "xfs_export.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_trans.h" 17 16 #include "xfs_inode_item.h" 18 - #include "xfs_trace.h" 19 17 #include "xfs_icache.h" 20 18 #include "xfs_log.h" 21 19 #include "xfs_pnfs.h"

+266 -158

fs/xfs/xfs_extfree_item.c

··· 9 9 #include "xfs_log_format.h" 10 10 #include "xfs_trans_resv.h" 11 11 #include "xfs_bit.h" 12 + #include "xfs_shared.h" 12 13 #include "xfs_mount.h" 14 + #include "xfs_defer.h" 13 15 #include "xfs_trans.h" 14 16 #include "xfs_trans_priv.h" 15 - #include "xfs_buf_item.h" 16 17 #include "xfs_extfree_item.h" 17 18 #include "xfs_log.h" 18 19 #include "xfs_btree.h" 19 20 #include "xfs_rmap.h" 21 + #include "xfs_alloc.h" 22 + #include "xfs_bmap.h" 23 + #include "xfs_trace.h" 20 24 21 25 22 26 kmem_zone_t *xfs_efi_zone; ··· 111 107 112 108 113 109 /* 114 - * Pinning has no meaning for an efi item, so just return. 115 - */ 116 - STATIC void 117 - xfs_efi_item_pin( 118 - struct xfs_log_item *lip) 119 - { 120 - } 121 - 122 - /* 123 110 * The unpin operation is the last place an EFI is manipulated in the log. It is 124 111 * either inserted in the AIL or aborted in the event of a log I/O error. In 125 112 * either case, the EFI transaction has been successfully committed to make it ··· 128 133 } 129 134 130 135 /* 131 - * Efi items have no locking or pushing. However, since EFIs are pulled from 132 - * the AIL when their corresponding EFDs are committed to disk, their situation 133 - * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller 134 - * will eventually flush the log. This should help in getting the EFI out of 135 - * the AIL. 136 - */ 137 - STATIC uint 138 - xfs_efi_item_push( 139 - struct xfs_log_item *lip, 140 - struct list_head *buffer_list) 141 - { 142 - return XFS_ITEM_PINNED; 143 - } 144 - 145 - /* 146 136 * The EFI has been either committed or aborted if the transaction has been 147 137 * cancelled. If the transaction was cancelled, an EFD isn't going to be 148 138 * constructed and thus we free the EFI here directly. 149 139 */ 150 140 STATIC void 151 - xfs_efi_item_unlock( 141 + xfs_efi_item_release( 152 142 struct xfs_log_item *lip) 153 143 { 154 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) 155 - xfs_efi_release(EFI_ITEM(lip)); 144 + xfs_efi_release(EFI_ITEM(lip)); 156 145 } 157 146 158 - /* 159 - * The EFI is logged only once and cannot be moved in the log, so simply return 160 - * the lsn at which it's been logged. 161 - */ 162 - STATIC xfs_lsn_t 163 - xfs_efi_item_committed( 164 - struct xfs_log_item *lip, 165 - xfs_lsn_t lsn) 166 - { 167 - return lsn; 168 - } 169 - 170 - /* 171 - * The EFI dependency tracking op doesn't do squat. It can't because 172 - * it doesn't know where the free extent is coming from. The dependency 173 - * tracking has to be handled by the "enclosing" metadata object. For 174 - * example, for inodes, the inode is locked throughout the extent freeing 175 - * so the dependency should be recorded there. 176 - */ 177 - STATIC void 178 - xfs_efi_item_committing( 179 - struct xfs_log_item *lip, 180 - xfs_lsn_t lsn) 181 - { 182 - } 183 - 184 - /* 185 - * This is the ops vector shared by all efi log items. 186 - */ 187 147 static const struct xfs_item_ops xfs_efi_item_ops = { 188 148 .iop_size = xfs_efi_item_size, 189 149 .iop_format = xfs_efi_item_format, 190 - .iop_pin = xfs_efi_item_pin, 191 150 .iop_unpin = xfs_efi_item_unpin, 192 - .iop_unlock = xfs_efi_item_unlock, 193 - .iop_committed = xfs_efi_item_committed, 194 - .iop_push = xfs_efi_item_push, 195 - .iop_committing = xfs_efi_item_committing 151 + .iop_release = xfs_efi_item_release, 196 152 }; 197 153 198 154 ··· 295 349 } 296 350 297 351 /* 298 - * Pinning has no meaning for an efd item, so just return. 299 - */ 300 - STATIC void 301 - xfs_efd_item_pin( 302 - struct xfs_log_item *lip) 303 - { 304 - } 305 - 306 - /* 307 - * Since pinning has no meaning for an efd item, unpinning does 308 - * not either. 309 - */ 310 - STATIC void 311 - xfs_efd_item_unpin( 312 - struct xfs_log_item *lip, 313 - int remove) 314 - { 315 - } 316 - 317 - /* 318 - * There isn't much you can do to push on an efd item. It is simply stuck 319 - * waiting for the log to be flushed to disk. 320 - */ 321 - STATIC uint 322 - xfs_efd_item_push( 323 - struct xfs_log_item *lip, 324 - struct list_head *buffer_list) 325 - { 326 - return XFS_ITEM_PINNED; 327 - } 328 - 329 - /* 330 352 * The EFD is either committed or aborted if the transaction is cancelled. If 331 353 * the transaction is cancelled, drop our reference to the EFI and free the EFD. 332 354 */ 333 355 STATIC void 334 - xfs_efd_item_unlock( 356 + xfs_efd_item_release( 335 357 struct xfs_log_item *lip) 336 358 { 337 359 struct xfs_efd_log_item *efdp = EFD_ITEM(lip); 338 360 339 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { 340 - xfs_efi_release(efdp->efd_efip); 341 - xfs_efd_item_free(efdp); 342 - } 343 - } 344 - 345 - /* 346 - * When the efd item is committed to disk, all we need to do is delete our 347 - * reference to our partner efi item and then free ourselves. Since we're 348 - * freeing ourselves we must return -1 to keep the transaction code from further 349 - * referencing this item. 350 - */ 351 - STATIC xfs_lsn_t 352 - xfs_efd_item_committed( 353 - struct xfs_log_item *lip, 354 - xfs_lsn_t lsn) 355 - { 356 - struct xfs_efd_log_item *efdp = EFD_ITEM(lip); 357 - 358 - /* 359 - * Drop the EFI reference regardless of whether the EFD has been 360 - * aborted. Once the EFD transaction is constructed, it is the sole 361 - * responsibility of the EFD to release the EFI (even if the EFI is 362 - * aborted due to log I/O error). 363 - */ 364 361 xfs_efi_release(efdp->efd_efip); 365 362 xfs_efd_item_free(efdp); 366 - 367 - return (xfs_lsn_t)-1; 368 363 } 369 364 370 - /* 371 - * The EFD dependency tracking op doesn't do squat. It can't because 372 - * it doesn't know where the free extent is coming from. The dependency 373 - * tracking has to be handled by the "enclosing" metadata object. For 374 - * example, for inodes, the inode is locked throughout the extent freeing 375 - * so the dependency should be recorded there. 376 - */ 377 - STATIC void 378 - xfs_efd_item_committing( 379 - struct xfs_log_item *lip, 380 - xfs_lsn_t lsn) 381 - { 382 - } 383 - 384 - /* 385 - * This is the ops vector shared by all efd log items. 386 - */ 387 365 static const struct xfs_item_ops xfs_efd_item_ops = { 366 + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, 388 367 .iop_size = xfs_efd_item_size, 389 368 .iop_format = xfs_efd_item_format, 390 - .iop_pin = xfs_efd_item_pin, 391 - .iop_unpin = xfs_efd_item_unpin, 392 - .iop_unlock = xfs_efd_item_unlock, 393 - .iop_committed = xfs_efd_item_committed, 394 - .iop_push = xfs_efd_item_push, 395 - .iop_committing = xfs_efd_item_committing 369 + .iop_release = xfs_efd_item_release, 396 370 }; 397 371 398 372 /* 399 - * Allocate and initialize an efd item with the given number of extents. 373 + * Allocate an "extent free done" log item that will hold nextents worth of 374 + * extents. The caller must use all nextents extents, because we are not 375 + * flexible about this at all. 400 376 */ 401 - struct xfs_efd_log_item * 402 - xfs_efd_init( 403 - struct xfs_mount *mp, 404 - struct xfs_efi_log_item *efip, 405 - uint nextents) 406 - 377 + static struct xfs_efd_log_item * 378 + xfs_trans_get_efd( 379 + struct xfs_trans *tp, 380 + struct xfs_efi_log_item *efip, 381 + unsigned int nextents) 407 382 { 408 - struct xfs_efd_log_item *efdp; 409 - uint size; 383 + struct xfs_efd_log_item *efdp; 410 384 411 385 ASSERT(nextents > 0); 386 + 412 387 if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { 413 - size = (uint)(sizeof(xfs_efd_log_item_t) + 414 - ((nextents - 1) * sizeof(xfs_extent_t))); 415 - efdp = kmem_zalloc(size, KM_SLEEP); 388 + efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) + 389 + (nextents - 1) * sizeof(struct xfs_extent), 390 + KM_SLEEP); 416 391 } else { 417 392 efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP); 418 393 } 419 394 420 - xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops); 395 + xfs_log_item_init(tp->t_mountp, &efdp->efd_item, XFS_LI_EFD, 396 + &xfs_efd_item_ops); 421 397 efdp->efd_efip = efip; 422 398 efdp->efd_format.efd_nextents = nextents; 423 399 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; 424 400 401 + xfs_trans_add_item(tp, &efdp->efd_item); 425 402 return efdp; 426 403 } 404 + 405 + /* 406 + * Free an extent and log it to the EFD. Note that the transaction is marked 407 + * dirty regardless of whether the extent free succeeds or fails to support the 408 + * EFI/EFD lifecycle rules. 409 + */ 410 + static int 411 + xfs_trans_free_extent( 412 + struct xfs_trans *tp, 413 + struct xfs_efd_log_item *efdp, 414 + xfs_fsblock_t start_block, 415 + xfs_extlen_t ext_len, 416 + const struct xfs_owner_info *oinfo, 417 + bool skip_discard) 418 + { 419 + struct xfs_mount *mp = tp->t_mountp; 420 + struct xfs_extent *extp; 421 + uint next_extent; 422 + xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block); 423 + xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, 424 + start_block); 425 + int error; 426 + 427 + trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); 428 + 429 + error = __xfs_free_extent(tp, start_block, ext_len, 430 + oinfo, XFS_AG_RESV_NONE, skip_discard); 431 + /* 432 + * Mark the transaction dirty, even on error. This ensures the 433 + * transaction is aborted, which: 434 + * 435 + * 1.) releases the EFI and frees the EFD 436 + * 2.) shuts down the filesystem 437 + */ 438 + tp->t_flags |= XFS_TRANS_DIRTY; 439 + set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); 440 + 441 + next_extent = efdp->efd_next_extent; 442 + ASSERT(next_extent < efdp->efd_format.efd_nextents); 443 + extp = &(efdp->efd_format.efd_extents[next_extent]); 444 + extp->ext_start = start_block; 445 + extp->ext_len = ext_len; 446 + efdp->efd_next_extent++; 447 + 448 + return error; 449 + } 450 + 451 + /* Sort bmap items by AG. */ 452 + static int 453 + xfs_extent_free_diff_items( 454 + void *priv, 455 + struct list_head *a, 456 + struct list_head *b) 457 + { 458 + struct xfs_mount *mp = priv; 459 + struct xfs_extent_free_item *ra; 460 + struct xfs_extent_free_item *rb; 461 + 462 + ra = container_of(a, struct xfs_extent_free_item, xefi_list); 463 + rb = container_of(b, struct xfs_extent_free_item, xefi_list); 464 + return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) - 465 + XFS_FSB_TO_AGNO(mp, rb->xefi_startblock); 466 + } 467 + 468 + /* Get an EFI. */ 469 + STATIC void * 470 + xfs_extent_free_create_intent( 471 + struct xfs_trans *tp, 472 + unsigned int count) 473 + { 474 + struct xfs_efi_log_item *efip; 475 + 476 + ASSERT(tp != NULL); 477 + ASSERT(count > 0); 478 + 479 + efip = xfs_efi_init(tp->t_mountp, count); 480 + ASSERT(efip != NULL); 481 + 482 + /* 483 + * Get a log_item_desc to point at the new item. 484 + */ 485 + xfs_trans_add_item(tp, &efip->efi_item); 486 + return efip; 487 + } 488 + 489 + /* Log a free extent to the intent item. */ 490 + STATIC void 491 + xfs_extent_free_log_item( 492 + struct xfs_trans *tp, 493 + void *intent, 494 + struct list_head *item) 495 + { 496 + struct xfs_efi_log_item *efip = intent; 497 + struct xfs_extent_free_item *free; 498 + uint next_extent; 499 + struct xfs_extent *extp; 500 + 501 + free = container_of(item, struct xfs_extent_free_item, xefi_list); 502 + 503 + tp->t_flags |= XFS_TRANS_DIRTY; 504 + set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags); 505 + 506 + /* 507 + * atomic_inc_return gives us the value after the increment; 508 + * we want to use it as an array index so we need to subtract 1 from 509 + * it. 510 + */ 511 + next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; 512 + ASSERT(next_extent < efip->efi_format.efi_nextents); 513 + extp = &efip->efi_format.efi_extents[next_extent]; 514 + extp->ext_start = free->xefi_startblock; 515 + extp->ext_len = free->xefi_blockcount; 516 + } 517 + 518 + /* Get an EFD so we can process all the free extents. */ 519 + STATIC void * 520 + xfs_extent_free_create_done( 521 + struct xfs_trans *tp, 522 + void *intent, 523 + unsigned int count) 524 + { 525 + return xfs_trans_get_efd(tp, intent, count); 526 + } 527 + 528 + /* Process a free extent. */ 529 + STATIC int 530 + xfs_extent_free_finish_item( 531 + struct xfs_trans *tp, 532 + struct list_head *item, 533 + void *done_item, 534 + void **state) 535 + { 536 + struct xfs_extent_free_item *free; 537 + int error; 538 + 539 + free = container_of(item, struct xfs_extent_free_item, xefi_list); 540 + error = xfs_trans_free_extent(tp, done_item, 541 + free->xefi_startblock, 542 + free->xefi_blockcount, 543 + &free->xefi_oinfo, free->xefi_skip_discard); 544 + kmem_free(free); 545 + return error; 546 + } 547 + 548 + /* Abort all pending EFIs. */ 549 + STATIC void 550 + xfs_extent_free_abort_intent( 551 + void *intent) 552 + { 553 + xfs_efi_release(intent); 554 + } 555 + 556 + /* Cancel a free extent. */ 557 + STATIC void 558 + xfs_extent_free_cancel_item( 559 + struct list_head *item) 560 + { 561 + struct xfs_extent_free_item *free; 562 + 563 + free = container_of(item, struct xfs_extent_free_item, xefi_list); 564 + kmem_free(free); 565 + } 566 + 567 + const struct xfs_defer_op_type xfs_extent_free_defer_type = { 568 + .max_items = XFS_EFI_MAX_FAST_EXTENTS, 569 + .diff_items = xfs_extent_free_diff_items, 570 + .create_intent = xfs_extent_free_create_intent, 571 + .abort_intent = xfs_extent_free_abort_intent, 572 + .log_item = xfs_extent_free_log_item, 573 + .create_done = xfs_extent_free_create_done, 574 + .finish_item = xfs_extent_free_finish_item, 575 + .cancel_item = xfs_extent_free_cancel_item, 576 + }; 577 + 578 + /* 579 + * AGFL blocks are accounted differently in the reserve pools and are not 580 + * inserted into the busy extent list. 581 + */ 582 + STATIC int 583 + xfs_agfl_free_finish_item( 584 + struct xfs_trans *tp, 585 + struct list_head *item, 586 + void *done_item, 587 + void **state) 588 + { 589 + struct xfs_mount *mp = tp->t_mountp; 590 + struct xfs_efd_log_item *efdp = done_item; 591 + struct xfs_extent_free_item *free; 592 + struct xfs_extent *extp; 593 + struct xfs_buf *agbp; 594 + int error; 595 + xfs_agnumber_t agno; 596 + xfs_agblock_t agbno; 597 + uint next_extent; 598 + 599 + free = container_of(item, struct xfs_extent_free_item, xefi_list); 600 + ASSERT(free->xefi_blockcount == 1); 601 + agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); 602 + agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); 603 + 604 + trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount); 605 + 606 + error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); 607 + if (!error) 608 + error = xfs_free_agfl_block(tp, agno, agbno, agbp, 609 + &free->xefi_oinfo); 610 + 611 + /* 612 + * Mark the transaction dirty, even on error. This ensures the 613 + * transaction is aborted, which: 614 + * 615 + * 1.) releases the EFI and frees the EFD 616 + * 2.) shuts down the filesystem 617 + */ 618 + tp->t_flags |= XFS_TRANS_DIRTY; 619 + set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); 620 + 621 + next_extent = efdp->efd_next_extent; 622 + ASSERT(next_extent < efdp->efd_format.efd_nextents); 623 + extp = &(efdp->efd_format.efd_extents[next_extent]); 624 + extp->ext_start = free->xefi_startblock; 625 + extp->ext_len = free->xefi_blockcount; 626 + efdp->efd_next_extent++; 627 + 628 + kmem_free(free); 629 + return error; 630 + } 631 + 632 + /* sub-type with special handling for AGFL deferred frees */ 633 + const struct xfs_defer_op_type xfs_agfl_free_defer_type = { 634 + .max_items = XFS_EFI_MAX_FAST_EXTENTS, 635 + .diff_items = xfs_extent_free_diff_items, 636 + .create_intent = xfs_extent_free_create_intent, 637 + .abort_intent = xfs_extent_free_abort_intent, 638 + .log_item = xfs_extent_free_log_item, 639 + .create_done = xfs_extent_free_create_done, 640 + .finish_item = xfs_agfl_free_finish_item, 641 + .cancel_item = xfs_extent_free_cancel_item, 642 + }; 427 643 428 644 /* 429 645 * Process an extent free intent item that was recovered from

+2 -4

fs/xfs/xfs_extfree_item.h

··· 51 51 * AIL, so at this point both the EFI and EFD are freed. 52 52 */ 53 53 typedef struct xfs_efi_log_item { 54 - xfs_log_item_t efi_item; 54 + struct xfs_log_item efi_item; 55 55 atomic_t efi_refcount; 56 56 atomic_t efi_next_extent; 57 57 unsigned long efi_flags; /* misc flags */ ··· 64 64 * have been freed. 65 65 */ 66 66 typedef struct xfs_efd_log_item { 67 - xfs_log_item_t efd_item; 67 + struct xfs_log_item efd_item; 68 68 xfs_efi_log_item_t *efd_efip; 69 69 uint efd_next_extent; 70 70 xfs_efd_log_format_t efd_format; ··· 79 79 extern struct kmem_zone *xfs_efd_zone; 80 80 81 81 xfs_efi_log_item_t *xfs_efi_init(struct xfs_mount *, uint); 82 - xfs_efd_log_item_t *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *, 83 - uint); 84 82 int xfs_efi_copy_format(xfs_log_iovec_t *buf, 85 83 xfs_efi_log_format_t *dst_efi_fmt); 86 84 void xfs_efi_item_free(xfs_efi_log_item_t *);

+15 -8

fs/xfs/xfs_file.c

··· 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 - #include "xfs_da_format.h" 14 - #include "xfs_da_btree.h" 15 13 #include "xfs_inode.h" 16 14 #include "xfs_trans.h" 17 15 #include "xfs_inode_item.h" 18 16 #include "xfs_bmap.h" 19 17 #include "xfs_bmap_util.h" 20 - #include "xfs_error.h" 21 18 #include "xfs_dir2.h" 22 19 #include "xfs_dir2_priv.h" 23 20 #include "xfs_ioctl.h" ··· 25 28 #include "xfs_iomap.h" 26 29 #include "xfs_reflink.h" 27 30 28 - #include <linux/dcache.h> 29 31 #include <linux/falloc.h> 30 - #include <linux/pagevec.h> 31 32 #include <linux/backing-dev.h> 32 33 #include <linux/mman.h> 33 34 ··· 374 379 struct inode *inode = file_inode(iocb->ki_filp); 375 380 struct xfs_inode *ip = XFS_I(inode); 376 381 loff_t offset = iocb->ki_pos; 382 + unsigned int nofs_flag; 377 383 int error = 0; 378 384 379 385 trace_xfs_end_io_direct_write(ip, offset, size); ··· 391 395 */ 392 396 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size); 393 397 398 + /* 399 + * We can allocate memory here while doing writeback on behalf of 400 + * memory reclaim. To avoid memory allocation deadlocks set the 401 + * task-wide nofs context for the following operations. 402 + */ 403 + nofs_flag = memalloc_nofs_save(); 404 + 394 405 if (flags & IOMAP_DIO_COW) { 395 406 error = xfs_reflink_end_cow(ip, offset, size); 396 407 if (error) 397 - return error; 408 + goto out; 398 409 } 399 410 400 411 /* ··· 410 407 * earlier allows a racing dio read to find unwritten extents before 411 408 * they are converted. 412 409 */ 413 - if (flags & IOMAP_DIO_UNWRITTEN) 414 - return xfs_iomap_write_unwritten(ip, offset, size, true); 410 + if (flags & IOMAP_DIO_UNWRITTEN) { 411 + error = xfs_iomap_write_unwritten(ip, offset, size, true); 412 + goto out; 413 + } 415 414 416 415 /* 417 416 * We need to update the in-core inode size here so that we don't end up ··· 435 430 spin_unlock(&ip->i_flags_lock); 436 431 } 437 432 433 + out: 434 + memalloc_nofs_restore(nofs_flag); 438 435 return error; 439 436 } 440 437

+1 -4

fs/xfs/xfs_filestream.c

··· 5 5 * All Rights Reserved. 6 6 */ 7 7 #include "xfs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 11 #include "xfs_trans_resv.h" 11 12 #include "xfs_sb.h" 12 13 #include "xfs_mount.h" 13 - #include "xfs_defer.h" 14 14 #include "xfs_inode.h" 15 15 #include "xfs_bmap.h" 16 - #include "xfs_bmap_util.h" 17 16 #include "xfs_alloc.h" 18 17 #include "xfs_mru_cache.h" 19 - #include "xfs_filestream.h" 20 18 #include "xfs_trace.h" 21 19 #include "xfs_ag_resv.h" 22 20 #include "xfs_trans.h" 23 - #include "xfs_shared.h" 24 21 25 22 struct xfs_fstrm_item { 26 23 struct xfs_mru_cache_elem mru;

-4

fs/xfs/xfs_fsmap.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 - #include "xfs_sb.h" 13 12 #include "xfs_mount.h" 14 - #include "xfs_defer.h" 15 13 #include "xfs_inode.h" 16 14 #include "xfs_trans.h" 17 - #include "xfs_error.h" 18 15 #include "xfs_btree.h" 19 16 #include "xfs_rmap_btree.h" 20 17 #include "xfs_trace.h" 21 - #include "xfs_log.h" 22 18 #include "xfs_rmap.h" 23 19 #include "xfs_alloc.h" 24 20 #include "xfs_bit.h"

+2 -6

fs/xfs/xfs_fsops.c

··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_sb.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_defer.h" 15 14 #include "xfs_trans.h" 16 15 #include "xfs_error.h" 17 - #include "xfs_btree.h" 18 16 #include "xfs_alloc.h" 19 17 #include "xfs_fsops.h" 20 18 #include "xfs_trans_space.h" 21 - #include "xfs_rtalloc.h" 22 - #include "xfs_trace.h" 23 19 #include "xfs_log.h" 24 20 #include "xfs_ag.h" 25 21 #include "xfs_ag_resv.h" ··· 247 251 if (mp->m_sb.sb_imax_pct) { 248 252 uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct; 249 253 do_div(icount, 100); 250 - mp->m_maxicount = XFS_FSB_TO_INO(mp, icount); 254 + M_IGEO(mp)->maxicount = XFS_FSB_TO_INO(mp, icount); 251 255 } else 252 - mp->m_maxicount = 0; 256 + M_IGEO(mp)->maxicount = 0; 253 257 254 258 /* Update secondary superblocks now the physical grow has completed */ 255 259 error = xfs_update_secondary_sbs(mp);

+3 -1

fs/xfs/xfs_globals.c

··· 4 4 * All Rights Reserved. 5 5 */ 6 6 #include "xfs.h" 7 - #include "xfs_sysctl.h" 8 7 9 8 /* 10 9 * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, ··· 39 40 .bug_on_assert = true, /* assert failures BUG() */ 40 41 #else 41 42 .bug_on_assert = false, /* assert failures WARN() */ 43 + #endif 44 + #ifdef DEBUG 45 + .pwork_threads = -1, /* automatic thread detection */ 42 46 #endif 43 47 };

+1 -5

fs/xfs/xfs_health.c

··· 9 9 #include "xfs_format.h" 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 - #include "xfs_bit.h" 13 12 #include "xfs_sb.h" 14 13 #include "xfs_mount.h" 15 - #include "xfs_defer.h" 16 - #include "xfs_da_format.h" 17 - #include "xfs_da_btree.h" 18 14 #include "xfs_inode.h" 19 15 #include "xfs_trace.h" 20 16 #include "xfs_health.h" ··· 369 373 void 370 374 xfs_bulkstat_health( 371 375 struct xfs_inode *ip, 372 - struct xfs_bstat *bs) 376 + struct xfs_bulkstat *bs) 373 377 { 374 378 const struct ioctl_sick_map *m; 375 379 unsigned int sick;

+1 -3

fs/xfs/xfs_icache.c

··· 5 5 */ 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 11 #include "xfs_trans_resv.h" 11 12 #include "xfs_sb.h" 12 13 #include "xfs_mount.h" 13 14 #include "xfs_inode.h" 14 - #include "xfs_error.h" 15 15 #include "xfs_trans.h" 16 16 #include "xfs_trans_priv.h" 17 17 #include "xfs_inode_item.h" ··· 23 23 #include "xfs_dquot.h" 24 24 #include "xfs_reflink.h" 25 25 26 - #include <linux/kthread.h> 27 - #include <linux/freezer.h> 28 26 #include <linux/iversion.h> 29 27 30 28 /*

+4 -71

fs/xfs/xfs_icreate_item.c

··· 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 8 #include "xfs_shared.h" 9 - #include "xfs_format.h" 10 9 #include "xfs_log_format.h" 11 - #include "xfs_trans_resv.h" 12 - #include "xfs_bit.h" 13 - #include "xfs_mount.h" 14 10 #include "xfs_trans.h" 15 11 #include "xfs_trans_priv.h" 16 - #include "xfs_error.h" 17 12 #include "xfs_icreate_item.h" 18 13 #include "xfs_log.h" 19 14 ··· 51 56 sizeof(struct xfs_icreate_log)); 52 57 } 53 58 54 - 55 - /* Pinning has no meaning for the create item, so just return. */ 56 59 STATIC void 57 - xfs_icreate_item_pin( 60 + xfs_icreate_item_release( 58 61 struct xfs_log_item *lip) 59 62 { 63 + kmem_zone_free(xfs_icreate_zone, ICR_ITEM(lip)); 60 64 } 61 65 62 - 63 - /* pinning has no meaning for the create item, so just return. */ 64 - STATIC void 65 - xfs_icreate_item_unpin( 66 - struct xfs_log_item *lip, 67 - int remove) 68 - { 69 - } 70 - 71 - STATIC void 72 - xfs_icreate_item_unlock( 73 - struct xfs_log_item *lip) 74 - { 75 - struct xfs_icreate_item *icp = ICR_ITEM(lip); 76 - 77 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) 78 - kmem_zone_free(xfs_icreate_zone, icp); 79 - return; 80 - } 81 - 82 - /* 83 - * Because we have ordered buffers being tracked in the AIL for the inode 84 - * creation, we don't need the create item after this. Hence we can free 85 - * the log item and return -1 to tell the caller we're done with the item. 86 - */ 87 - STATIC xfs_lsn_t 88 - xfs_icreate_item_committed( 89 - struct xfs_log_item *lip, 90 - xfs_lsn_t lsn) 91 - { 92 - struct xfs_icreate_item *icp = ICR_ITEM(lip); 93 - 94 - kmem_zone_free(xfs_icreate_zone, icp); 95 - return (xfs_lsn_t)-1; 96 - } 97 - 98 - /* item can never get into the AIL */ 99 - STATIC uint 100 - xfs_icreate_item_push( 101 - struct xfs_log_item *lip, 102 - struct list_head *buffer_list) 103 - { 104 - ASSERT(0); 105 - return XFS_ITEM_SUCCESS; 106 - } 107 - 108 - /* Ordered buffers do the dependency tracking here, so this does nothing. */ 109 - STATIC void 110 - xfs_icreate_item_committing( 111 - struct xfs_log_item *lip, 112 - xfs_lsn_t lsn) 113 - { 114 - } 115 - 116 - /* 117 - * This is the ops vector shared by all buf log items. 118 - */ 119 66 static const struct xfs_item_ops xfs_icreate_item_ops = { 67 + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, 120 68 .iop_size = xfs_icreate_item_size, 121 69 .iop_format = xfs_icreate_item_format, 122 - .iop_pin = xfs_icreate_item_pin, 123 - .iop_unpin = xfs_icreate_item_unpin, 124 - .iop_push = xfs_icreate_item_push, 125 - .iop_unlock = xfs_icreate_item_unlock, 126 - .iop_committed = xfs_icreate_item_committed, 127 - .iop_committing = xfs_icreate_item_committing, 70 + .iop_release = xfs_icreate_item_release, 128 71 }; 129 72 130 73

+18 -24

fs/xfs/xfs_inode.c

··· 3 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 4 * All Rights Reserved. 5 5 */ 6 - #include <linux/log2.h> 7 6 #include <linux/iversion.h> 8 7 9 8 #include "xfs.h" ··· 15 16 #include "xfs_mount.h" 16 17 #include "xfs_defer.h" 17 18 #include "xfs_inode.h" 18 - #include "xfs_da_format.h" 19 - #include "xfs_da_btree.h" 20 19 #include "xfs_dir2.h" 21 - #include "xfs_attr_sf.h" 22 20 #include "xfs_attr.h" 23 21 #include "xfs_trans_space.h" 24 22 #include "xfs_trans.h" ··· 28 32 #include "xfs_error.h" 29 33 #include "xfs_quota.h" 30 34 #include "xfs_filestream.h" 31 - #include "xfs_cksum.h" 32 35 #include "xfs_trace.h" 33 36 #include "xfs_icache.h" 34 37 #include "xfs_symlink.h" ··· 35 40 #include "xfs_log.h" 36 41 #include "xfs_bmap_btree.h" 37 42 #include "xfs_reflink.h" 38 - #include "xfs_dir2_priv.h" 39 43 40 44 kmem_zone_t *xfs_inode_zone; 41 45 ··· 435 441 */ 436 442 static void 437 443 xfs_lock_inodes( 438 - xfs_inode_t **ips, 439 - int inodes, 440 - uint lock_mode) 444 + struct xfs_inode **ips, 445 + int inodes, 446 + uint lock_mode) 441 447 { 442 - int attempts = 0, i, j, try_lock; 443 - xfs_log_item_t *lp; 448 + int attempts = 0, i, j, try_lock; 449 + struct xfs_log_item *lp; 444 450 445 451 /* 446 452 * Currently supports between 2 and 5 inodes with exclusive locking. We ··· 479 485 */ 480 486 if (!try_lock) { 481 487 for (j = (i - 1); j >= 0 && !try_lock; j--) { 482 - lp = (xfs_log_item_t *)ips[j]->i_itemp; 488 + lp = &ips[j]->i_itemp->ili_item; 483 489 if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) 484 490 try_lock++; 485 491 } ··· 545 551 struct xfs_inode *temp; 546 552 uint mode_temp; 547 553 int attempts = 0; 548 - xfs_log_item_t *lp; 554 + struct xfs_log_item *lp; 549 555 550 556 ASSERT(hweight32(ip0_mode) == 1); 551 557 ASSERT(hweight32(ip1_mode) == 1); ··· 579 585 * the second lock. If we can't get it, we must release the first one 580 586 * and try again. 581 587 */ 582 - lp = (xfs_log_item_t *)ip0->i_itemp; 588 + lp = &ip0->i_itemp->ili_item; 583 589 if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) { 584 590 if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) { 585 591 xfs_iunlock(ip0, ip0_mode); ··· 2531 2537 xfs_inode_log_item_t *iip; 2532 2538 struct xfs_log_item *lip; 2533 2539 struct xfs_perag *pag; 2540 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 2534 2541 xfs_ino_t inum; 2535 2542 2536 2543 inum = xic->first_ino; 2537 2544 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); 2538 - nbufs = mp->m_ialloc_blks / mp->m_blocks_per_cluster; 2545 + nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster; 2539 2546 2540 - for (j = 0; j < nbufs; j++, inum += mp->m_inodes_per_cluster) { 2547 + for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) { 2541 2548 /* 2542 2549 * The allocation bitmap tells us which inodes of the chunk were 2543 2550 * physically allocated. Skip the cluster if an inode falls into ··· 2546 2551 */ 2547 2552 ioffset = inum - xic->first_ino; 2548 2553 if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) { 2549 - ASSERT(ioffset % mp->m_inodes_per_cluster == 0); 2554 + ASSERT(ioffset % igeo->inodes_per_cluster == 0); 2550 2555 continue; 2551 2556 } 2552 2557 ··· 2562 2567 * to mark all the active inodes on the buffer stale. 2563 2568 */ 2564 2569 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 2565 - mp->m_bsize * mp->m_blocks_per_cluster, 2570 + mp->m_bsize * igeo->blocks_per_cluster, 2566 2571 XBF_UNMAPPED); 2567 2572 2568 2573 if (!bp) ··· 2609 2614 * transaction stale above, which means there is no point in 2610 2615 * even trying to lock them. 2611 2616 */ 2612 - for (i = 0; i < mp->m_inodes_per_cluster; i++) { 2617 + for (i = 0; i < igeo->inodes_per_cluster; i++) { 2613 2618 retry: 2614 2619 rcu_read_lock(); 2615 2620 ip = radix_tree_lookup(&pag->pag_ici_root, ··· 3467 3472 struct xfs_mount *mp = ip->i_mount; 3468 3473 struct xfs_perag *pag; 3469 3474 unsigned long first_index, mask; 3470 - unsigned long inodes_per_cluster; 3471 3475 int cilist_size; 3472 3476 struct xfs_inode **cilist; 3473 3477 struct xfs_inode *cip; 3478 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 3474 3479 int nr_found; 3475 3480 int clcount = 0; 3476 3481 int i; 3477 3482 3478 3483 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 3479 3484 3480 - inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 3481 - cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 3485 + cilist_size = igeo->inodes_per_cluster * sizeof(struct xfs_inode *); 3482 3486 cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS); 3483 3487 if (!cilist) 3484 3488 goto out_put; 3485 3489 3486 - mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1); 3490 + mask = ~(igeo->inodes_per_cluster - 1); 3487 3491 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 3488 3492 rcu_read_lock(); 3489 3493 /* really need a gang lookup range call here */ 3490 3494 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist, 3491 - first_index, inodes_per_cluster); 3495 + first_index, igeo->inodes_per_cluster); 3492 3496 if (nr_found == 0) 3493 3497 goto out_free; 3494 3498

+7 -9

fs/xfs/xfs_inode_item.c

··· 5 5 */ 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 11 #include "xfs_trans_resv.h" ··· 13 12 #include "xfs_inode.h" 14 13 #include "xfs_trans.h" 15 14 #include "xfs_inode_item.h" 16 - #include "xfs_error.h" 17 15 #include "xfs_trace.h" 18 16 #include "xfs_trans_priv.h" 19 17 #include "xfs_buf_item.h" ··· 565 565 * Unlock the inode associated with the inode log item. 566 566 */ 567 567 STATIC void 568 - xfs_inode_item_unlock( 568 + xfs_inode_item_release( 569 569 struct xfs_log_item *lip) 570 570 { 571 571 struct xfs_inode_log_item *iip = INODE_ITEM(lip); ··· 621 621 STATIC void 622 622 xfs_inode_item_committing( 623 623 struct xfs_log_item *lip, 624 - xfs_lsn_t lsn) 624 + xfs_lsn_t commit_lsn) 625 625 { 626 - INODE_ITEM(lip)->ili_last_lsn = lsn; 626 + INODE_ITEM(lip)->ili_last_lsn = commit_lsn; 627 + return xfs_inode_item_release(lip); 627 628 } 628 629 629 - /* 630 - * This is the ops vector shared by all buf log items. 631 - */ 632 630 static const struct xfs_item_ops xfs_inode_item_ops = { 633 631 .iop_size = xfs_inode_item_size, 634 632 .iop_format = xfs_inode_item_format, 635 633 .iop_pin = xfs_inode_item_pin, 636 634 .iop_unpin = xfs_inode_item_unpin, 637 - .iop_unlock = xfs_inode_item_unlock, 635 + .iop_release = xfs_inode_item_release, 638 636 .iop_committed = xfs_inode_item_committed, 639 637 .iop_push = xfs_inode_item_push, 640 - .iop_committing = xfs_inode_item_committing, 638 + .iop_committing = xfs_inode_item_committing, 641 639 .iop_error = xfs_inode_item_error 642 640 }; 643 641

+1 -1

fs/xfs/xfs_inode_item.h

··· 14 14 struct xfs_mount; 15 15 16 16 typedef struct xfs_inode_log_item { 17 - xfs_log_item_t ili_item; /* common portion */ 17 + struct xfs_log_item ili_item; /* common portion */ 18 18 struct xfs_inode *ili_inode; /* inode ptr */ 19 19 xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ 20 20 xfs_lsn_t ili_last_lsn; /* lsn at last transaction */

+261 -33

fs/xfs/xfs_ioctl.c

··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 13 #include "xfs_inode.h" 14 - #include "xfs_ioctl.h" 15 - #include "xfs_alloc.h" 16 14 #include "xfs_rtalloc.h" 15 + #include "xfs_iwalk.h" 17 16 #include "xfs_itable.h" 18 17 #include "xfs_error.h" 19 18 #include "xfs_attr.h" ··· 24 25 #include "xfs_export.h" 25 26 #include "xfs_trace.h" 26 27 #include "xfs_icache.h" 27 - #include "xfs_symlink.h" 28 28 #include "xfs_trans.h" 29 29 #include "xfs_acl.h" 30 30 #include "xfs_btree.h" ··· 34 36 #include "xfs_ag.h" 35 37 #include "xfs_health.h" 36 38 37 - #include <linux/capability.h> 38 - #include <linux/cred.h> 39 - #include <linux/dcache.h> 40 39 #include <linux/mount.h> 41 40 #include <linux/namei.h> 42 - #include <linux/pagemap.h> 43 - #include <linux/slab.h> 44 - #include <linux/exportfs.h> 45 41 46 42 /* 47 43 * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to ··· 713 721 return error; 714 722 } 715 723 724 + /* Return 0 on success or positive error */ 725 + int 726 + xfs_fsbulkstat_one_fmt( 727 + struct xfs_ibulk *breq, 728 + const struct xfs_bulkstat *bstat) 729 + { 730 + struct xfs_bstat bs1; 731 + 732 + xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat); 733 + if (copy_to_user(breq->ubuffer, &bs1, sizeof(bs1))) 734 + return -EFAULT; 735 + return xfs_ibulk_advance(breq, sizeof(struct xfs_bstat)); 736 + } 737 + 738 + int 739 + xfs_fsinumbers_fmt( 740 + struct xfs_ibulk *breq, 741 + const struct xfs_inumbers *igrp) 742 + { 743 + struct xfs_inogrp ig1; 744 + 745 + xfs_inumbers_to_inogrp(&ig1, igrp); 746 + if (copy_to_user(breq->ubuffer, &ig1, sizeof(struct xfs_inogrp))) 747 + return -EFAULT; 748 + return xfs_ibulk_advance(breq, sizeof(struct xfs_inogrp)); 749 + } 750 + 716 751 STATIC int 717 - xfs_ioc_bulkstat( 752 + xfs_ioc_fsbulkstat( 718 753 xfs_mount_t *mp, 719 754 unsigned int cmd, 720 755 void __user *arg) 721 756 { 722 - xfs_fsop_bulkreq_t bulkreq; 723 - int count; /* # of records returned */ 724 - xfs_ino_t inlast; /* last inode number */ 725 - int done; 757 + struct xfs_fsop_bulkreq bulkreq; 758 + struct xfs_ibulk breq = { 759 + .mp = mp, 760 + .ocount = 0, 761 + }; 762 + xfs_ino_t lastino; 726 763 int error; 727 764 728 765 /* done = 1 if there are more stats to get and if bulkstat */ ··· 763 742 if (XFS_FORCED_SHUTDOWN(mp)) 764 743 return -EIO; 765 744 766 - if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) 745 + if (copy_from_user(&bulkreq, arg, sizeof(struct xfs_fsop_bulkreq))) 767 746 return -EFAULT; 768 747 769 - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) 748 + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) 770 749 return -EFAULT; 771 750 772 - if ((count = bulkreq.icount) <= 0) 751 + if (bulkreq.icount <= 0) 773 752 return -EINVAL; 774 753 775 754 if (bulkreq.ubuffer == NULL) 776 755 return -EINVAL; 777 756 778 - if (cmd == XFS_IOC_FSINUMBERS) 779 - error = xfs_inumbers(mp, &inlast, &count, 780 - bulkreq.ubuffer, xfs_inumbers_fmt); 781 - else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) 782 - error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer, 783 - sizeof(xfs_bstat_t), NULL, &done); 784 - else /* XFS_IOC_FSBULKSTAT */ 785 - error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, 786 - sizeof(xfs_bstat_t), bulkreq.ubuffer, 787 - &done); 757 + breq.ubuffer = bulkreq.ubuffer; 758 + breq.icount = bulkreq.icount; 759 + 760 + /* 761 + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number 762 + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect 763 + * that *lastip contains either zero or the number of the last inode to 764 + * be examined by the previous call and return results starting with 765 + * the next inode after that. The new bulk request back end functions 766 + * take the inode to start with, so we have to compute the startino 767 + * parameter from lastino to maintain correct function. lastino == 0 768 + * is a special case because it has traditionally meant "first inode 769 + * in filesystem". 770 + */ 771 + if (cmd == XFS_IOC_FSINUMBERS) { 772 + breq.startino = lastino ? lastino + 1 : 0; 773 + error = xfs_inumbers(&breq, xfs_fsinumbers_fmt); 774 + lastino = breq.startino - 1; 775 + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) { 776 + breq.startino = lastino; 777 + breq.icount = 1; 778 + error = xfs_bulkstat_one(&breq, xfs_fsbulkstat_one_fmt); 779 + } else { /* XFS_IOC_FSBULKSTAT */ 780 + breq.startino = lastino ? lastino + 1 : 0; 781 + error = xfs_bulkstat(&breq, xfs_fsbulkstat_one_fmt); 782 + lastino = breq.startino - 1; 783 + } 788 784 789 785 if (error) 790 786 return error; 791 787 792 - if (bulkreq.ocount != NULL) { 793 - if (copy_to_user(bulkreq.lastip, &inlast, 794 - sizeof(xfs_ino_t))) 795 - return -EFAULT; 788 + if (bulkreq.lastip != NULL && 789 + copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t))) 790 + return -EFAULT; 796 791 797 - if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) 798 - return -EFAULT; 792 + if (bulkreq.ocount != NULL && 793 + copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32))) 794 + return -EFAULT; 795 + 796 + return 0; 797 + } 798 + 799 + /* Return 0 on success or positive error */ 800 + static int 801 + xfs_bulkstat_fmt( 802 + struct xfs_ibulk *breq, 803 + const struct xfs_bulkstat *bstat) 804 + { 805 + if (copy_to_user(breq->ubuffer, bstat, sizeof(struct xfs_bulkstat))) 806 + return -EFAULT; 807 + return xfs_ibulk_advance(breq, sizeof(struct xfs_bulkstat)); 808 + } 809 + 810 + /* 811 + * Check the incoming bulk request @hdr from userspace and initialize the 812 + * internal @breq bulk request appropriately. Returns 0 if the bulk request 813 + * should proceed; XFS_ITER_ABORT if there's nothing to do; or the usual 814 + * negative error code. 815 + */ 816 + static int 817 + xfs_bulk_ireq_setup( 818 + struct xfs_mount *mp, 819 + struct xfs_bulk_ireq *hdr, 820 + struct xfs_ibulk *breq, 821 + void __user *ubuffer) 822 + { 823 + if (hdr->icount == 0 || 824 + (hdr->flags & ~XFS_BULK_IREQ_FLAGS_ALL) || 825 + memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved))) 826 + return -EINVAL; 827 + 828 + breq->startino = hdr->ino; 829 + breq->ubuffer = ubuffer; 830 + breq->icount = hdr->icount; 831 + breq->ocount = 0; 832 + breq->flags = 0; 833 + 834 + /* 835 + * The @ino parameter is a special value, so we must look it up here. 836 + * We're not allowed to have IREQ_AGNO, and we only return one inode 837 + * worth of data. 838 + */ 839 + if (hdr->flags & XFS_BULK_IREQ_SPECIAL) { 840 + if (hdr->flags & XFS_BULK_IREQ_AGNO) 841 + return -EINVAL; 842 + 843 + switch (hdr->ino) { 844 + case XFS_BULK_IREQ_SPECIAL_ROOT: 845 + hdr->ino = mp->m_sb.sb_rootino; 846 + break; 847 + default: 848 + return -EINVAL; 849 + } 850 + breq->icount = 1; 799 851 } 852 + 853 + /* 854 + * The IREQ_AGNO flag means that we only want results from a given AG. 855 + * If @hdr->ino is zero, we start iterating in that AG. If @hdr->ino is 856 + * beyond the specified AG then we return no results. 857 + */ 858 + if (hdr->flags & XFS_BULK_IREQ_AGNO) { 859 + if (hdr->agno >= mp->m_sb.sb_agcount) 860 + return -EINVAL; 861 + 862 + if (breq->startino == 0) 863 + breq->startino = XFS_AGINO_TO_INO(mp, hdr->agno, 0); 864 + else if (XFS_INO_TO_AGNO(mp, breq->startino) < hdr->agno) 865 + return -EINVAL; 866 + 867 + breq->flags |= XFS_IBULK_SAME_AG; 868 + 869 + /* Asking for an inode past the end of the AG? We're done! */ 870 + if (XFS_INO_TO_AGNO(mp, breq->startino) > hdr->agno) 871 + return XFS_ITER_ABORT; 872 + } else if (hdr->agno) 873 + return -EINVAL; 874 + 875 + /* Asking for an inode past the end of the FS? We're done! */ 876 + if (XFS_INO_TO_AGNO(mp, breq->startino) >= mp->m_sb.sb_agcount) 877 + return XFS_ITER_ABORT; 878 + 879 + return 0; 880 + } 881 + 882 + /* 883 + * Update the userspace bulk request @hdr to reflect the end state of the 884 + * internal bulk request @breq. 885 + */ 886 + static void 887 + xfs_bulk_ireq_teardown( 888 + struct xfs_bulk_ireq *hdr, 889 + struct xfs_ibulk *breq) 890 + { 891 + hdr->ino = breq->startino; 892 + hdr->ocount = breq->ocount; 893 + } 894 + 895 + /* Handle the v5 bulkstat ioctl. */ 896 + STATIC int 897 + xfs_ioc_bulkstat( 898 + struct xfs_mount *mp, 899 + unsigned int cmd, 900 + struct xfs_bulkstat_req __user *arg) 901 + { 902 + struct xfs_bulk_ireq hdr; 903 + struct xfs_ibulk breq = { 904 + .mp = mp, 905 + }; 906 + int error; 907 + 908 + if (!capable(CAP_SYS_ADMIN)) 909 + return -EPERM; 910 + 911 + if (XFS_FORCED_SHUTDOWN(mp)) 912 + return -EIO; 913 + 914 + if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr))) 915 + return -EFAULT; 916 + 917 + error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->bulkstat); 918 + if (error == XFS_ITER_ABORT) 919 + goto out_teardown; 920 + if (error < 0) 921 + return error; 922 + 923 + error = xfs_bulkstat(&breq, xfs_bulkstat_fmt); 924 + if (error) 925 + return error; 926 + 927 + out_teardown: 928 + xfs_bulk_ireq_teardown(&hdr, &breq); 929 + if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr))) 930 + return -EFAULT; 931 + 932 + return 0; 933 + } 934 + 935 + STATIC int 936 + xfs_inumbers_fmt( 937 + struct xfs_ibulk *breq, 938 + const struct xfs_inumbers *igrp) 939 + { 940 + if (copy_to_user(breq->ubuffer, igrp, sizeof(struct xfs_inumbers))) 941 + return -EFAULT; 942 + return xfs_ibulk_advance(breq, sizeof(struct xfs_inumbers)); 943 + } 944 + 945 + /* Handle the v5 inumbers ioctl. */ 946 + STATIC int 947 + xfs_ioc_inumbers( 948 + struct xfs_mount *mp, 949 + unsigned int cmd, 950 + struct xfs_inumbers_req __user *arg) 951 + { 952 + struct xfs_bulk_ireq hdr; 953 + struct xfs_ibulk breq = { 954 + .mp = mp, 955 + }; 956 + int error; 957 + 958 + if (!capable(CAP_SYS_ADMIN)) 959 + return -EPERM; 960 + 961 + if (XFS_FORCED_SHUTDOWN(mp)) 962 + return -EIO; 963 + 964 + if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr))) 965 + return -EFAULT; 966 + 967 + error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->inumbers); 968 + if (error == XFS_ITER_ABORT) 969 + goto out_teardown; 970 + if (error < 0) 971 + return error; 972 + 973 + error = xfs_inumbers(&breq, xfs_inumbers_fmt); 974 + if (error) 975 + return error; 976 + 977 + out_teardown: 978 + xfs_bulk_ireq_teardown(&hdr, &breq); 979 + if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr))) 980 + return -EFAULT; 800 981 801 982 return 0; 802 983 } ··· 2149 1926 case XFS_IOC_FSBULKSTAT_SINGLE: 2150 1927 case XFS_IOC_FSBULKSTAT: 2151 1928 case XFS_IOC_FSINUMBERS: 1929 + return xfs_ioc_fsbulkstat(mp, cmd, arg); 1930 + 1931 + case XFS_IOC_BULKSTAT: 2152 1932 return xfs_ioc_bulkstat(mp, cmd, arg); 1933 + case XFS_IOC_INUMBERS: 1934 + return xfs_ioc_inumbers(mp, cmd, arg); 2153 1935 2154 1936 case XFS_IOC_FSGEOMETRY_V1: 2155 1937 return xfs_ioc_fsgeometry(mp, arg, 3);

+8

fs/xfs/xfs_ioctl.h

··· 77 77 uint evmask, 78 78 uint16_t state); 79 79 80 + struct xfs_ibulk; 81 + struct xfs_bstat; 82 + struct xfs_inogrp; 83 + 84 + int xfs_fsbulkstat_one_fmt(struct xfs_ibulk *breq, 85 + const struct xfs_bulkstat *bstat); 86 + int xfs_fsinumbers_fmt(struct xfs_ibulk *breq, const struct xfs_inumbers *igrp); 87 + 80 88 #endif

+81 -82

fs/xfs/xfs_ioctl32.c

··· 3 3 * Copyright (c) 2004-2005 Silicon Graphics, Inc. 4 4 * All Rights Reserved. 5 5 */ 6 - #include <linux/compat.h> 7 - #include <linux/ioctl.h> 8 6 #include <linux/mount.h> 9 - #include <linux/slab.h> 10 - #include <linux/uaccess.h> 11 7 #include <linux/fsmap.h> 12 8 #include "xfs.h" 13 9 #include "xfs_fs.h" 10 + #include "xfs_shared.h" 14 11 #include "xfs_format.h" 15 12 #include "xfs_log_format.h" 16 13 #include "xfs_trans_resv.h" 17 14 #include "xfs_mount.h" 18 15 #include "xfs_inode.h" 16 + #include "xfs_iwalk.h" 19 17 #include "xfs_itable.h" 20 - #include "xfs_error.h" 21 18 #include "xfs_fsops.h" 22 - #include "xfs_alloc.h" 23 19 #include "xfs_rtalloc.h" 24 20 #include "xfs_attr.h" 25 21 #include "xfs_ioctl.h" ··· 80 84 } 81 85 82 86 STATIC int 83 - xfs_inumbers_fmt_compat( 84 - void __user *ubuffer, 85 - const struct xfs_inogrp *buffer, 86 - long count, 87 - long *written) 87 + xfs_fsinumbers_fmt_compat( 88 + struct xfs_ibulk *breq, 89 + const struct xfs_inumbers *ig) 88 90 { 89 - compat_xfs_inogrp_t __user *p32 = ubuffer; 90 - long i; 91 + struct compat_xfs_inogrp __user *p32 = breq->ubuffer; 92 + struct xfs_inogrp ig1; 93 + struct xfs_inogrp *igrp = &ig1; 91 94 92 - for (i = 0; i < count; i++) { 93 - if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || 94 - put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || 95 - put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) 96 - return -EFAULT; 97 - } 98 - *written = count * sizeof(*p32); 99 - return 0; 95 + xfs_inumbers_to_inogrp(&ig1, ig); 96 + 97 + if (put_user(igrp->xi_startino, &p32->xi_startino) || 98 + put_user(igrp->xi_alloccount, &p32->xi_alloccount) || 99 + put_user(igrp->xi_allocmask, &p32->xi_allocmask)) 100 + return -EFAULT; 101 + 102 + return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_inogrp)); 100 103 } 101 104 102 105 #else 103 - #define xfs_inumbers_fmt_compat xfs_inumbers_fmt 106 + #define xfs_fsinumbers_fmt_compat xfs_fsinumbers_fmt 104 107 #endif /* BROKEN_X86_ALIGNMENT */ 105 108 106 109 STATIC int ··· 116 121 return 0; 117 122 } 118 123 119 - /* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */ 124 + /* 125 + * struct xfs_bstat has differing alignment on intel, & bstime_t sizes 126 + * everywhere 127 + */ 120 128 STATIC int 121 129 xfs_ioctl32_bstat_copyin( 122 - xfs_bstat_t *bstat, 123 - compat_xfs_bstat_t __user *bstat32) 130 + struct xfs_bstat *bstat, 131 + struct compat_xfs_bstat __user *bstat32) 124 132 { 125 133 if (get_user(bstat->bs_ino, &bstat32->bs_ino) || 126 134 get_user(bstat->bs_mode, &bstat32->bs_mode) || ··· 169 171 170 172 /* Return 0 on success or positive error (to xfs_bulkstat()) */ 171 173 STATIC int 172 - xfs_bulkstat_one_fmt_compat( 173 - void __user *ubuffer, 174 - int ubsize, 175 - int *ubused, 176 - const xfs_bstat_t *buffer) 174 + xfs_fsbulkstat_one_fmt_compat( 175 + struct xfs_ibulk *breq, 176 + const struct xfs_bulkstat *bstat) 177 177 { 178 - compat_xfs_bstat_t __user *p32 = ubuffer; 178 + struct compat_xfs_bstat __user *p32 = breq->ubuffer; 179 + struct xfs_bstat bs1; 180 + struct xfs_bstat *buffer = &bs1; 179 181 180 - if (ubsize < sizeof(*p32)) 181 - return -ENOMEM; 182 + xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat); 182 183 183 184 if (put_user(buffer->bs_ino, &p32->bs_ino) || 184 185 put_user(buffer->bs_mode, &p32->bs_mode) || ··· 202 205 put_user(buffer->bs_dmstate, &p32->bs_dmstate) || 203 206 put_user(buffer->bs_aextents, &p32->bs_aextents)) 204 207 return -EFAULT; 205 - if (ubused) 206 - *ubused = sizeof(*p32); 207 - return 0; 208 - } 209 208 210 - STATIC int 211 - xfs_bulkstat_one_compat( 212 - xfs_mount_t *mp, /* mount point for filesystem */ 213 - xfs_ino_t ino, /* inode number to get data for */ 214 - void __user *buffer, /* buffer to place output in */ 215 - int ubsize, /* size of buffer */ 216 - int *ubused, /* bytes used by me */ 217 - int *stat) /* BULKSTAT_RV_... */ 218 - { 219 - return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, 220 - xfs_bulkstat_one_fmt_compat, 221 - ubused, stat); 209 + return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_bstat)); 222 210 } 223 211 224 212 /* copied from xfs_ioctl.c */ 225 213 STATIC int 226 - xfs_compat_ioc_bulkstat( 214 + xfs_compat_ioc_fsbulkstat( 227 215 xfs_mount_t *mp, 228 216 unsigned int cmd, 229 - compat_xfs_fsop_bulkreq_t __user *p32) 217 + struct compat_xfs_fsop_bulkreq __user *p32) 230 218 { 231 219 u32 addr; 232 - xfs_fsop_bulkreq_t bulkreq; 233 - int count; /* # of records returned */ 234 - xfs_ino_t inlast; /* last inode number */ 235 - int done; 220 + struct xfs_fsop_bulkreq bulkreq; 221 + struct xfs_ibulk breq = { 222 + .mp = mp, 223 + .ocount = 0, 224 + }; 225 + xfs_ino_t lastino; 236 226 int error; 237 227 238 228 /* ··· 228 244 * to userpace memory via bulkreq.ubuffer. Normally the compat 229 245 * functions and structure size are the correct ones to use ... 230 246 */ 231 - inumbers_fmt_pf inumbers_func = xfs_inumbers_fmt_compat; 232 - bulkstat_one_pf bs_one_func = xfs_bulkstat_one_compat; 233 - size_t bs_one_size = sizeof(struct compat_xfs_bstat); 247 + inumbers_fmt_pf inumbers_func = xfs_fsinumbers_fmt_compat; 248 + bulkstat_one_fmt_pf bs_one_func = xfs_fsbulkstat_one_fmt_compat; 234 249 235 250 #ifdef CONFIG_X86_X32 236 251 if (in_x32_syscall()) { ··· 241 258 * the data written out in compat layout will not match what 242 259 * x32 userspace expects. 243 260 */ 244 - inumbers_func = xfs_inumbers_fmt; 245 - bs_one_func = xfs_bulkstat_one; 246 - bs_one_size = sizeof(struct xfs_bstat); 261 + inumbers_func = xfs_fsinumbers_fmt; 262 + bs_one_func = xfs_fsbulkstat_one_fmt; 247 263 } 248 264 #endif 249 265 ··· 266 284 return -EFAULT; 267 285 bulkreq.ocount = compat_ptr(addr); 268 286 269 - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) 287 + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) 270 288 return -EFAULT; 271 289 272 - if ((count = bulkreq.icount) <= 0) 290 + if (bulkreq.icount <= 0) 273 291 return -EINVAL; 274 292 275 293 if (bulkreq.ubuffer == NULL) 276 294 return -EINVAL; 277 295 278 - if (cmd == XFS_IOC_FSINUMBERS_32) { 279 - error = xfs_inumbers(mp, &inlast, &count, 280 - bulkreq.ubuffer, inumbers_func); 281 - } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { 282 - int res; 296 + breq.ubuffer = bulkreq.ubuffer; 297 + breq.icount = bulkreq.icount; 283 298 284 - error = bs_one_func(mp, inlast, bulkreq.ubuffer, 285 - bs_one_size, NULL, &res); 299 + /* 300 + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number 301 + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect 302 + * that *lastip contains either zero or the number of the last inode to 303 + * be examined by the previous call and return results starting with 304 + * the next inode after that. The new bulk request back end functions 305 + * take the inode to start with, so we have to compute the startino 306 + * parameter from lastino to maintain correct function. lastino == 0 307 + * is a special case because it has traditionally meant "first inode 308 + * in filesystem". 309 + */ 310 + if (cmd == XFS_IOC_FSINUMBERS_32) { 311 + breq.startino = lastino ? lastino + 1 : 0; 312 + error = xfs_inumbers(&breq, inumbers_func); 313 + lastino = breq.startino - 1; 314 + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { 315 + breq.startino = lastino; 316 + breq.icount = 1; 317 + error = xfs_bulkstat_one(&breq, bs_one_func); 318 + lastino = breq.startino; 286 319 } else if (cmd == XFS_IOC_FSBULKSTAT_32) { 287 - error = xfs_bulkstat(mp, &inlast, &count, 288 - bs_one_func, bs_one_size, 289 - bulkreq.ubuffer, &done); 290 - } else 320 + breq.startino = lastino ? lastino + 1 : 0; 321 + error = xfs_bulkstat(&breq, bs_one_func); 322 + lastino = breq.startino - 1; 323 + } else { 291 324 error = -EINVAL; 325 + } 292 326 if (error) 293 327 return error; 294 328 295 - if (bulkreq.ocount != NULL) { 296 - if (copy_to_user(bulkreq.lastip, &inlast, 297 - sizeof(xfs_ino_t))) 298 - return -EFAULT; 329 + if (bulkreq.lastip != NULL && 330 + copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t))) 331 + return -EFAULT; 299 332 300 - if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) 301 - return -EFAULT; 302 - } 333 + if (bulkreq.ocount != NULL && 334 + copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32))) 335 + return -EFAULT; 303 336 304 337 return 0; 305 338 } ··· 574 577 case XFS_IOC_ERROR_CLEARALL: 575 578 case FS_IOC_GETFSMAP: 576 579 case XFS_IOC_SCRUB_METADATA: 580 + case XFS_IOC_BULKSTAT: 581 + case XFS_IOC_INUMBERS: 577 582 return xfs_file_ioctl(filp, cmd, p); 578 583 #if !defined(BROKEN_X86_ALIGNMENT) || defined(CONFIG_X86_X32) 579 584 /* ··· 673 674 case XFS_IOC_FSBULKSTAT_32: 674 675 case XFS_IOC_FSBULKSTAT_SINGLE_32: 675 676 case XFS_IOC_FSINUMBERS_32: 676 - return xfs_compat_ioc_bulkstat(mp, cmd, arg); 677 + return xfs_compat_ioc_fsbulkstat(mp, cmd, arg); 677 678 case XFS_IOC_FD_TO_HANDLE_32: 678 679 case XFS_IOC_PATH_TO_HANDLE_32: 679 680 case XFS_IOC_PATH_TO_FSHANDLE_32: {

+7 -7

fs/xfs/xfs_ioctl32.h

··· 36 36 __s32 tv_nsec; /* and nanoseconds */ 37 37 } compat_xfs_bstime_t; 38 38 39 - typedef struct compat_xfs_bstat { 39 + struct compat_xfs_bstat { 40 40 __u64 bs_ino; /* inode number */ 41 41 __u16 bs_mode; /* type and mode */ 42 42 __u16 bs_nlink; /* number of links */ ··· 61 61 __u32 bs_dmevmask; /* DMIG event mask */ 62 62 __u16 bs_dmstate; /* DMIG state info */ 63 63 __u16 bs_aextents; /* attribute number of extents */ 64 - } __compat_packed compat_xfs_bstat_t; 64 + } __compat_packed; 65 65 66 - typedef struct compat_xfs_fsop_bulkreq { 66 + struct compat_xfs_fsop_bulkreq { 67 67 compat_uptr_t lastip; /* last inode # pointer */ 68 68 __s32 icount; /* count of entries in buffer */ 69 69 compat_uptr_t ubuffer; /* user buffer for inode desc. */ 70 70 compat_uptr_t ocount; /* output count pointer */ 71 - } compat_xfs_fsop_bulkreq_t; 71 + }; 72 72 73 73 #define XFS_IOC_FSBULKSTAT_32 \ 74 74 _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) ··· 106 106 xfs_off_t sx_offset; /* offset into file */ 107 107 xfs_off_t sx_length; /* leng from offset */ 108 108 char sx_pad[16]; /* pad space, unused */ 109 - compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */ 109 + struct compat_xfs_bstat sx_stat; /* stat of target b4 copy */ 110 110 } __compat_packed compat_xfs_swapext_t; 111 111 112 112 #define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext) ··· 201 201 #define XFS_IOC_FSGEOMETRY_V1_32 \ 202 202 _IOR('X', 100, struct compat_xfs_fsop_geom_v1) 203 203 204 - typedef struct compat_xfs_inogrp { 204 + struct compat_xfs_inogrp { 205 205 __u64 xi_startino; /* starting inode number */ 206 206 __s32 xi_alloccount; /* # bits set in allocmask */ 207 207 __u64 xi_allocmask; /* mask of allocated inodes */ 208 - } __attribute__((packed)) compat_xfs_inogrp_t; 208 + } __attribute__((packed)); 209 209 210 210 /* These growfs input structures have padding on the end, so must translate */ 211 211 typedef struct compat_xfs_growfs_data {

+1 -4

fs/xfs/xfs_iomap.c

··· 4 4 * Copyright (c) 2016-2018 Christoph Hellwig. 5 5 * All Rights Reserved. 6 6 */ 7 - #include <linux/iomap.h> 8 7 #include "xfs.h" 9 8 #include "xfs_fs.h" 10 9 #include "xfs_shared.h" ··· 11 12 #include "xfs_log_format.h" 12 13 #include "xfs_trans_resv.h" 13 14 #include "xfs_mount.h" 14 - #include "xfs_defer.h" 15 15 #include "xfs_inode.h" 16 16 #include "xfs_btree.h" 17 17 #include "xfs_bmap_btree.h" ··· 23 25 #include "xfs_inode_item.h" 24 26 #include "xfs_iomap.h" 25 27 #include "xfs_trace.h" 26 - #include "xfs_icache.h" 27 28 #include "xfs_quota.h" 28 29 #include "xfs_dquot_item.h" 29 30 #include "xfs_dquot.h" ··· 776 779 * complete here and might deadlock on the iolock. 777 780 */ 778 781 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 779 - XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp); 782 + XFS_TRANS_RESERVE, &tp); 780 783 if (error) 781 784 return error; 782 785

-10

fs/xfs/xfs_iops.c

··· 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 - #include "xfs_da_format.h" 14 13 #include "xfs_inode.h" 15 - #include "xfs_bmap.h" 16 - #include "xfs_bmap_util.h" 17 14 #include "xfs_acl.h" 18 15 #include "xfs_quota.h" 19 - #include "xfs_error.h" 20 16 #include "xfs_attr.h" 21 17 #include "xfs_trans.h" 22 18 #include "xfs_trace.h" 23 19 #include "xfs_icache.h" 24 20 #include "xfs_symlink.h" 25 - #include "xfs_da_btree.h" 26 21 #include "xfs_dir2.h" 27 - #include "xfs_trans_space.h" 28 22 #include "xfs_iomap.h" 29 - #include "xfs_defer.h" 30 23 31 - #include <linux/capability.h> 32 24 #include <linux/xattr.h> 33 25 #include <linux/posix_acl.h> 34 26 #include <linux/security.h> 35 - #include <linux/iomap.h> 36 - #include <linux/slab.h> 37 27 #include <linux/iversion.h> 38 28 39 29 /*

+255 -500

fs/xfs/xfs_itable.c

··· 14 14 #include "xfs_btree.h" 15 15 #include "xfs_ialloc.h" 16 16 #include "xfs_ialloc_btree.h" 17 + #include "xfs_iwalk.h" 17 18 #include "xfs_itable.h" 18 19 #include "xfs_error.h" 19 - #include "xfs_trace.h" 20 20 #include "xfs_icache.h" 21 21 #include "xfs_health.h" 22 22 23 23 /* 24 - * Return stat information for one inode. 25 - * Return 0 if ok, else errno. 24 + * Bulk Stat 25 + * ========= 26 + * 27 + * Use the inode walking functions to fill out struct xfs_bulkstat for every 28 + * allocated inode, then pass the stat information to some externally provided 29 + * iteration function. 26 30 */ 27 - int 31 + 32 + struct xfs_bstat_chunk { 33 + bulkstat_one_fmt_pf formatter; 34 + struct xfs_ibulk *breq; 35 + struct xfs_bulkstat *buf; 36 + }; 37 + 38 + /* 39 + * Fill out the bulkstat info for a single inode and report it somewhere. 40 + * 41 + * bc->breq->lastino is effectively the inode cursor as we walk through the 42 + * filesystem. Therefore, we update it any time we need to move the cursor 43 + * forward, regardless of whether or not we're sending any bstat information 44 + * back to userspace. If the inode is internal metadata or, has been freed 45 + * out from under us, we just simply keep going. 46 + * 47 + * However, if any other type of error happens we want to stop right where we 48 + * are so that userspace will call back with exact number of the bad inode and 49 + * we can send back an error code. 50 + * 51 + * Note that if the formatter tells us there's no space left in the buffer we 52 + * move the cursor forward and abort the walk. 53 + */ 54 + STATIC int 28 55 xfs_bulkstat_one_int( 29 - struct xfs_mount *mp, /* mount point for filesystem */ 30 - xfs_ino_t ino, /* inode to get data for */ 31 - void __user *buffer, /* buffer to place output in */ 32 - int ubsize, /* size of buffer */ 33 - bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ 34 - int *ubused, /* bytes used by me */ 35 - int *stat) /* BULKSTAT_RV_... */ 56 + struct xfs_mount *mp, 57 + struct xfs_trans *tp, 58 + xfs_ino_t ino, 59 + struct xfs_bstat_chunk *bc) 36 60 { 37 61 struct xfs_icdinode *dic; /* dinode core info pointer */ 38 62 struct xfs_inode *ip; /* incore inode pointer */ 39 63 struct inode *inode; 40 - struct xfs_bstat *buf; /* return buffer */ 41 - int error = 0; /* error value */ 64 + struct xfs_bulkstat *buf = bc->buf; 65 + int error = -EINVAL; 42 66 43 - *stat = BULKSTAT_RV_NOTHING; 67 + if (xfs_internal_inum(mp, ino)) 68 + goto out_advance; 44 69 45 - if (!buffer || xfs_internal_inum(mp, ino)) 46 - return -EINVAL; 47 - 48 - buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); 49 - if (!buf) 50 - return -ENOMEM; 51 - 52 - error = xfs_iget(mp, NULL, ino, 70 + error = xfs_iget(mp, tp, ino, 53 71 (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), 54 72 XFS_ILOCK_SHARED, &ip); 73 + if (error == -ENOENT || error == -EINVAL) 74 + goto out_advance; 55 75 if (error) 56 - goto out_free; 76 + goto out; 57 77 58 78 ASSERT(ip != NULL); 59 79 ASSERT(ip->i_imap.im_blkno != 0); ··· 84 64 /* xfs_iget returns the following without needing 85 65 * further change. 86 66 */ 87 - buf->bs_projid_lo = dic->di_projid_lo; 88 - buf->bs_projid_hi = dic->di_projid_hi; 67 + buf->bs_projectid = xfs_get_projid(ip); 89 68 buf->bs_ino = ino; 90 69 buf->bs_uid = dic->di_uid; 91 70 buf->bs_gid = dic->di_gid; 92 71 buf->bs_size = dic->di_size; 93 72 94 73 buf->bs_nlink = inode->i_nlink; 95 - buf->bs_atime.tv_sec = inode->i_atime.tv_sec; 96 - buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec; 97 - buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec; 98 - buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec; 99 - buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec; 100 - buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec; 74 + buf->bs_atime = inode->i_atime.tv_sec; 75 + buf->bs_atime_nsec = inode->i_atime.tv_nsec; 76 + buf->bs_mtime = inode->i_mtime.tv_sec; 77 + buf->bs_mtime_nsec = inode->i_mtime.tv_nsec; 78 + buf->bs_ctime = inode->i_ctime.tv_sec; 79 + buf->bs_ctime_nsec = inode->i_ctime.tv_nsec; 80 + buf->bs_btime = dic->di_crtime.t_sec; 81 + buf->bs_btime_nsec = dic->di_crtime.t_nsec; 101 82 buf->bs_gen = inode->i_generation; 102 83 buf->bs_mode = inode->i_mode; 103 84 104 85 buf->bs_xflags = xfs_ip2xflags(ip); 105 - buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; 86 + buf->bs_extsize_blks = dic->di_extsize; 106 87 buf->bs_extents = dic->di_nextents; 107 - memset(buf->bs_pad, 0, sizeof(buf->bs_pad)); 108 88 xfs_bulkstat_health(ip, buf); 109 - buf->bs_dmevmask = dic->di_dmevmask; 110 - buf->bs_dmstate = dic->di_dmstate; 111 89 buf->bs_aextents = dic->di_anextents; 112 90 buf->bs_forkoff = XFS_IFORK_BOFF(ip); 91 + buf->bs_version = XFS_BULKSTAT_VERSION_V5; 113 92 114 93 if (dic->di_version == 3) { 115 94 if (dic->di_flags2 & XFS_DIFLAG2_COWEXTSIZE) 116 - buf->bs_cowextsize = dic->di_cowextsize << 117 - mp->m_sb.sb_blocklog; 95 + buf->bs_cowextsize_blks = dic->di_cowextsize; 118 96 } 119 97 120 98 switch (dic->di_format) { ··· 136 118 xfs_iunlock(ip, XFS_ILOCK_SHARED); 137 119 xfs_irele(ip); 138 120 139 - error = formatter(buffer, ubsize, ubused, buf); 140 - if (!error) 141 - *stat = BULKSTAT_RV_DIDONE; 121 + error = bc->formatter(bc->breq, buf); 122 + if (error == XFS_IBULK_ABORT) 123 + goto out_advance; 124 + if (error) 125 + goto out; 142 126 143 - out_free: 144 - kmem_free(buf); 127 + out_advance: 128 + /* 129 + * Advance the cursor to the inode that comes after the one we just 130 + * looked at. We want the caller to move along if the bulkstat 131 + * information was copied successfully; if we tried to grab the inode 132 + * but it's no longer allocated; or if it's internal metadata. 133 + */ 134 + bc->breq->startino = ino + 1; 135 + out: 145 136 return error; 146 137 } 147 138 148 - /* Return 0 on success or positive error */ 149 - STATIC int 150 - xfs_bulkstat_one_fmt( 151 - void __user *ubuffer, 152 - int ubsize, 153 - int *ubused, 154 - const xfs_bstat_t *buffer) 155 - { 156 - if (ubsize < sizeof(*buffer)) 157 - return -ENOMEM; 158 - if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) 159 - return -EFAULT; 160 - if (ubused) 161 - *ubused = sizeof(*buffer); 162 - return 0; 163 - } 164 - 139 + /* Bulkstat a single inode. */ 165 140 int 166 141 xfs_bulkstat_one( 167 - xfs_mount_t *mp, /* mount point for filesystem */ 168 - xfs_ino_t ino, /* inode number to get data for */ 169 - void __user *buffer, /* buffer to place output in */ 170 - int ubsize, /* size of buffer */ 171 - int *ubused, /* bytes used by me */ 172 - int *stat) /* BULKSTAT_RV_... */ 142 + struct xfs_ibulk *breq, 143 + bulkstat_one_fmt_pf formatter) 173 144 { 174 - return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, 175 - xfs_bulkstat_one_fmt, ubused, stat); 176 - } 145 + struct xfs_bstat_chunk bc = { 146 + .formatter = formatter, 147 + .breq = breq, 148 + }; 149 + int error; 177 150 178 - /* 179 - * Loop over all clusters in a chunk for a given incore inode allocation btree 180 - * record. Do a readahead if there are any allocated inodes in that cluster. 181 - */ 182 - STATIC void 183 - xfs_bulkstat_ichunk_ra( 184 - struct xfs_mount *mp, 185 - xfs_agnumber_t agno, 186 - struct xfs_inobt_rec_incore *irec) 187 - { 188 - xfs_agblock_t agbno; 189 - struct blk_plug plug; 190 - int i; /* inode chunk index */ 151 + ASSERT(breq->icount == 1); 191 152 192 - agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino); 153 + bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat), 154 + KM_SLEEP | KM_MAYFAIL); 155 + if (!bc.buf) 156 + return -ENOMEM; 193 157 194 - blk_start_plug(&plug); 195 - for (i = 0; i < XFS_INODES_PER_CHUNK; 196 - i += mp->m_inodes_per_cluster, agbno += mp->m_blocks_per_cluster) { 197 - if (xfs_inobt_maskn(i, mp->m_inodes_per_cluster) & 198 - ~irec->ir_free) { 199 - xfs_btree_reada_bufs(mp, agno, agbno, 200 - mp->m_blocks_per_cluster, 201 - &xfs_inode_buf_ops); 202 - } 203 - } 204 - blk_finish_plug(&plug); 205 - } 158 + error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc); 206 159 207 - /* 208 - * Lookup the inode chunk that the given inode lives in and then get the record 209 - * if we found the chunk. If the inode was not the last in the chunk and there 210 - * are some left allocated, update the data for the pointed-to record as well as 211 - * return the count of grabbed inodes. 212 - */ 213 - STATIC int 214 - xfs_bulkstat_grab_ichunk( 215 - struct xfs_btree_cur *cur, /* btree cursor */ 216 - xfs_agino_t agino, /* starting inode of chunk */ 217 - int *icount,/* return # of inodes grabbed */ 218 - struct xfs_inobt_rec_incore *irec) /* btree record */ 219 - { 220 - int idx; /* index into inode chunk */ 221 - int stat; 222 - int error = 0; 223 - 224 - /* Lookup the inode chunk that this inode lives in */ 225 - error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &stat); 226 - if (error) 227 - return error; 228 - if (!stat) { 229 - *icount = 0; 230 - return error; 231 - } 232 - 233 - /* Get the record, should always work */ 234 - error = xfs_inobt_get_rec(cur, irec, &stat); 235 - if (error) 236 - return error; 237 - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 1); 238 - 239 - /* Check if the record contains the inode in request */ 240 - if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) { 241 - *icount = 0; 242 - return 0; 243 - } 244 - 245 - idx = agino - irec->ir_startino + 1; 246 - if (idx < XFS_INODES_PER_CHUNK && 247 - (xfs_inobt_maskn(idx, XFS_INODES_PER_CHUNK - idx) & ~irec->ir_free)) { 248 - int i; 249 - 250 - /* We got a right chunk with some left inodes allocated at it. 251 - * Grab the chunk record. Mark all the uninteresting inodes 252 - * free -- because they're before our start point. 253 - */ 254 - for (i = 0; i < idx; i++) { 255 - if (XFS_INOBT_MASK(i) & ~irec->ir_free) 256 - irec->ir_freecount++; 257 - } 258 - 259 - irec->ir_free |= xfs_inobt_maskn(0, idx); 260 - *icount = irec->ir_count - irec->ir_freecount; 261 - } 262 - 263 - return 0; 264 - } 265 - 266 - #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) 267 - 268 - struct xfs_bulkstat_agichunk { 269 - char __user **ac_ubuffer;/* pointer into user's buffer */ 270 - int ac_ubleft; /* bytes left in user's buffer */ 271 - int ac_ubelem; /* spaces used in user's buffer */ 272 - }; 273 - 274 - /* 275 - * Process inodes in chunk with a pointer to a formatter function 276 - * that will iget the inode and fill in the appropriate structure. 277 - */ 278 - static int 279 - xfs_bulkstat_ag_ichunk( 280 - struct xfs_mount *mp, 281 - xfs_agnumber_t agno, 282 - struct xfs_inobt_rec_incore *irbp, 283 - bulkstat_one_pf formatter, 284 - size_t statstruct_size, 285 - struct xfs_bulkstat_agichunk *acp, 286 - xfs_agino_t *last_agino) 287 - { 288 - char __user **ubufp = acp->ac_ubuffer; 289 - int chunkidx; 290 - int error = 0; 291 - xfs_agino_t agino = irbp->ir_startino; 292 - 293 - for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK; 294 - chunkidx++, agino++) { 295 - int fmterror; 296 - int ubused; 297 - 298 - /* inode won't fit in buffer, we are done */ 299 - if (acp->ac_ubleft < statstruct_size) 300 - break; 301 - 302 - /* Skip if this inode is free */ 303 - if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) 304 - continue; 305 - 306 - /* Get the inode and fill in a single buffer */ 307 - ubused = statstruct_size; 308 - error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino), 309 - *ubufp, acp->ac_ubleft, &ubused, &fmterror); 310 - 311 - if (fmterror == BULKSTAT_RV_GIVEUP || 312 - (error && error != -ENOENT && error != -EINVAL)) { 313 - acp->ac_ubleft = 0; 314 - ASSERT(error); 315 - break; 316 - } 317 - 318 - /* be careful not to leak error if at end of chunk */ 319 - if (fmterror == BULKSTAT_RV_NOTHING || error) { 320 - error = 0; 321 - continue; 322 - } 323 - 324 - *ubufp += ubused; 325 - acp->ac_ubleft -= ubused; 326 - acp->ac_ubelem++; 327 - } 160 + kmem_free(bc.buf); 328 161 329 162 /* 330 - * Post-update *last_agino. At this point, agino will always point one 331 - * inode past the last inode we processed successfully. Hence we 332 - * substract that inode when setting the *last_agino cursor so that we 333 - * return the correct cookie to userspace. On the next bulkstat call, 334 - * the inode under the lastino cookie will be skipped as we have already 335 - * processed it here. 163 + * If we reported one inode to userspace then we abort because we hit 164 + * the end of the buffer. Don't leak that back to userspace. 336 165 */ 337 - *last_agino = agino - 1; 166 + if (error == XFS_IWALK_ABORT) 167 + error = 0; 338 168 339 169 return error; 340 170 } 341 171 342 - /* 343 - * Return stat information in bulk (by-inode) for the filesystem. 344 - */ 345 - int /* error status */ 346 - xfs_bulkstat( 347 - xfs_mount_t *mp, /* mount point for filesystem */ 348 - xfs_ino_t *lastinop, /* last inode returned */ 349 - int *ubcountp, /* size of buffer/count returned */ 350 - bulkstat_one_pf formatter, /* func that'd fill a single buf */ 351 - size_t statstruct_size, /* sizeof struct filling */ 352 - char __user *ubuffer, /* buffer with inode stats */ 353 - int *done) /* 1 if there are more stats to get */ 172 + static int 173 + xfs_bulkstat_iwalk( 174 + struct xfs_mount *mp, 175 + struct xfs_trans *tp, 176 + xfs_ino_t ino, 177 + void *data) 354 178 { 355 - xfs_buf_t *agbp; /* agi header buffer */ 356 - xfs_agino_t agino; /* inode # in allocation group */ 357 - xfs_agnumber_t agno; /* allocation group number */ 358 - xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ 359 - xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ 360 - int nirbuf; /* size of irbuf */ 361 - int ubcount; /* size of user's buffer */ 362 - struct xfs_bulkstat_agichunk ac; 363 - int error = 0; 179 + int error; 364 180 365 - /* 366 - * Get the last inode value, see if there's nothing to do. 367 - */ 368 - agno = XFS_INO_TO_AGNO(mp, *lastinop); 369 - agino = XFS_INO_TO_AGINO(mp, *lastinop); 370 - if (agno >= mp->m_sb.sb_agcount || 371 - *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) { 372 - *done = 1; 373 - *ubcountp = 0; 181 + error = xfs_bulkstat_one_int(mp, tp, ino, data); 182 + /* bulkstat just skips over missing inodes */ 183 + if (error == -ENOENT || error == -EINVAL) 374 184 return 0; 375 - } 185 + return error; 186 + } 376 187 377 - ubcount = *ubcountp; /* statstruct's */ 378 - ac.ac_ubuffer = &ubuffer; 379 - ac.ac_ubleft = ubcount * statstruct_size; /* bytes */; 380 - ac.ac_ubelem = 0; 188 + /* 189 + * Check the incoming lastino parameter. 190 + * 191 + * We allow any inode value that could map to physical space inside the 192 + * filesystem because if there are no inodes there, bulkstat moves on to the 193 + * next chunk. In other words, the magic agino value of zero takes us to the 194 + * first chunk in the AG, and an agino value past the end of the AG takes us to 195 + * the first chunk in the next AG. 196 + * 197 + * Therefore we can end early if the requested inode is beyond the end of the 198 + * filesystem or doesn't map properly. 199 + */ 200 + static inline bool 201 + xfs_bulkstat_already_done( 202 + struct xfs_mount *mp, 203 + xfs_ino_t startino) 204 + { 205 + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); 206 + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, startino); 381 207 382 - *ubcountp = 0; 383 - *done = 0; 208 + return agno >= mp->m_sb.sb_agcount || 209 + startino != XFS_AGINO_TO_INO(mp, agno, agino); 210 + } 384 211 385 - irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP); 386 - if (!irbuf) 212 + /* Return stat information in bulk (by-inode) for the filesystem. */ 213 + int 214 + xfs_bulkstat( 215 + struct xfs_ibulk *breq, 216 + bulkstat_one_fmt_pf formatter) 217 + { 218 + struct xfs_bstat_chunk bc = { 219 + .formatter = formatter, 220 + .breq = breq, 221 + }; 222 + int error; 223 + 224 + if (xfs_bulkstat_already_done(breq->mp, breq->startino)) 225 + return 0; 226 + 227 + bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat), 228 + KM_SLEEP | KM_MAYFAIL); 229 + if (!bc.buf) 387 230 return -ENOMEM; 388 - nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf); 389 231 390 - /* 391 - * Loop over the allocation groups, starting from the last 392 - * inode returned; 0 means start of the allocation group. 393 - */ 394 - while (agno < mp->m_sb.sb_agcount) { 395 - struct xfs_inobt_rec_incore *irbp = irbuf; 396 - struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf; 397 - bool end_of_ag = false; 398 - int icount = 0; 399 - int stat; 232 + error = xfs_iwalk(breq->mp, NULL, breq->startino, breq->flags, 233 + xfs_bulkstat_iwalk, breq->icount, &bc); 400 234 401 - error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 402 - if (error) 403 - break; 404 - /* 405 - * Allocate and initialize a btree cursor for ialloc btree. 406 - */ 407 - cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, 408 - XFS_BTNUM_INO); 409 - if (agino > 0) { 410 - /* 411 - * In the middle of an allocation group, we need to get 412 - * the remainder of the chunk we're in. 413 - */ 414 - struct xfs_inobt_rec_incore r; 415 - 416 - error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r); 417 - if (error) 418 - goto del_cursor; 419 - if (icount) { 420 - irbp->ir_startino = r.ir_startino; 421 - irbp->ir_holemask = r.ir_holemask; 422 - irbp->ir_count = r.ir_count; 423 - irbp->ir_freecount = r.ir_freecount; 424 - irbp->ir_free = r.ir_free; 425 - irbp++; 426 - } 427 - /* Increment to the next record */ 428 - error = xfs_btree_increment(cur, 0, &stat); 429 - } else { 430 - /* Start of ag. Lookup the first inode chunk */ 431 - error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat); 432 - } 433 - if (error || stat == 0) { 434 - end_of_ag = true; 435 - goto del_cursor; 436 - } 437 - 438 - /* 439 - * Loop through inode btree records in this ag, 440 - * until we run out of inodes or space in the buffer. 441 - */ 442 - while (irbp < irbufend && icount < ubcount) { 443 - struct xfs_inobt_rec_incore r; 444 - 445 - error = xfs_inobt_get_rec(cur, &r, &stat); 446 - if (error || stat == 0) { 447 - end_of_ag = true; 448 - goto del_cursor; 449 - } 450 - 451 - /* 452 - * If this chunk has any allocated inodes, save it. 453 - * Also start read-ahead now for this chunk. 454 - */ 455 - if (r.ir_freecount < r.ir_count) { 456 - xfs_bulkstat_ichunk_ra(mp, agno, &r); 457 - irbp->ir_startino = r.ir_startino; 458 - irbp->ir_holemask = r.ir_holemask; 459 - irbp->ir_count = r.ir_count; 460 - irbp->ir_freecount = r.ir_freecount; 461 - irbp->ir_free = r.ir_free; 462 - irbp++; 463 - icount += r.ir_count - r.ir_freecount; 464 - } 465 - error = xfs_btree_increment(cur, 0, &stat); 466 - if (error || stat == 0) { 467 - end_of_ag = true; 468 - goto del_cursor; 469 - } 470 - cond_resched(); 471 - } 472 - 473 - /* 474 - * Drop the btree buffers and the agi buffer as we can't hold any 475 - * of the locks these represent when calling iget. If there is a 476 - * pending error, then we are done. 477 - */ 478 - del_cursor: 479 - xfs_btree_del_cursor(cur, error); 480 - xfs_buf_relse(agbp); 481 - if (error) 482 - break; 483 - /* 484 - * Now format all the good inodes into the user's buffer. The 485 - * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer 486 - * for the next loop iteration. 487 - */ 488 - irbufend = irbp; 489 - for (irbp = irbuf; 490 - irbp < irbufend && ac.ac_ubleft >= statstruct_size; 491 - irbp++) { 492 - error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, 493 - formatter, statstruct_size, &ac, 494 - &agino); 495 - if (error) 496 - break; 497 - 498 - cond_resched(); 499 - } 500 - 501 - /* 502 - * If we've run out of space or had a formatting error, we 503 - * are now done 504 - */ 505 - if (ac.ac_ubleft < statstruct_size || error) 506 - break; 507 - 508 - if (end_of_ag) { 509 - agno++; 510 - agino = 0; 511 - } 512 - } 513 - /* 514 - * Done, we're either out of filesystem or space to put the data. 515 - */ 516 - kmem_free(irbuf); 517 - *ubcountp = ac.ac_ubelem; 235 + kmem_free(bc.buf); 518 236 519 237 /* 520 238 * We found some inodes, so clear the error status and return them. ··· 259 505 * triggered again and propagated to userspace as there will be no 260 506 * formatted inodes in the buffer. 261 507 */ 262 - if (ac.ac_ubelem) 508 + if (breq->ocount > 0) 263 509 error = 0; 264 - 265 - /* 266 - * If we ran out of filesystem, lastino will point off the end of 267 - * the filesystem so the next call will return immediately. 268 - */ 269 - *lastinop = XFS_AGINO_TO_INO(mp, agno, agino); 270 - if (agno >= mp->m_sb.sb_agcount) 271 - *done = 1; 272 510 273 511 return error; 274 512 } 275 513 276 - int 277 - xfs_inumbers_fmt( 278 - void __user *ubuffer, /* buffer to write to */ 279 - const struct xfs_inogrp *buffer, /* buffer to read from */ 280 - long count, /* # of elements to read */ 281 - long *written) /* # of bytes written */ 514 + /* Convert bulkstat (v5) to bstat (v1). */ 515 + void 516 + xfs_bulkstat_to_bstat( 517 + struct xfs_mount *mp, 518 + struct xfs_bstat *bs1, 519 + const struct xfs_bulkstat *bstat) 282 520 { 283 - if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer))) 284 - return -EFAULT; 285 - *written = count * sizeof(*buffer); 286 - return 0; 521 + memset(bs1, 0, sizeof(struct xfs_bstat)); 522 + bs1->bs_ino = bstat->bs_ino; 523 + bs1->bs_mode = bstat->bs_mode; 524 + bs1->bs_nlink = bstat->bs_nlink; 525 + bs1->bs_uid = bstat->bs_uid; 526 + bs1->bs_gid = bstat->bs_gid; 527 + bs1->bs_rdev = bstat->bs_rdev; 528 + bs1->bs_blksize = bstat->bs_blksize; 529 + bs1->bs_size = bstat->bs_size; 530 + bs1->bs_atime.tv_sec = bstat->bs_atime; 531 + bs1->bs_mtime.tv_sec = bstat->bs_mtime; 532 + bs1->bs_ctime.tv_sec = bstat->bs_ctime; 533 + bs1->bs_atime.tv_nsec = bstat->bs_atime_nsec; 534 + bs1->bs_mtime.tv_nsec = bstat->bs_mtime_nsec; 535 + bs1->bs_ctime.tv_nsec = bstat->bs_ctime_nsec; 536 + bs1->bs_blocks = bstat->bs_blocks; 537 + bs1->bs_xflags = bstat->bs_xflags; 538 + bs1->bs_extsize = XFS_FSB_TO_B(mp, bstat->bs_extsize_blks); 539 + bs1->bs_extents = bstat->bs_extents; 540 + bs1->bs_gen = bstat->bs_gen; 541 + bs1->bs_projid_lo = bstat->bs_projectid & 0xFFFF; 542 + bs1->bs_forkoff = bstat->bs_forkoff; 543 + bs1->bs_projid_hi = bstat->bs_projectid >> 16; 544 + bs1->bs_sick = bstat->bs_sick; 545 + bs1->bs_checked = bstat->bs_checked; 546 + bs1->bs_cowextsize = XFS_FSB_TO_B(mp, bstat->bs_cowextsize_blks); 547 + bs1->bs_dmevmask = 0; 548 + bs1->bs_dmstate = 0; 549 + bs1->bs_aextents = bstat->bs_aextents; 550 + } 551 + 552 + struct xfs_inumbers_chunk { 553 + inumbers_fmt_pf formatter; 554 + struct xfs_ibulk *breq; 555 + }; 556 + 557 + /* 558 + * INUMBERS 559 + * ======== 560 + * This is how we export inode btree records to userspace, so that XFS tools 561 + * can figure out where inodes are allocated. 562 + */ 563 + 564 + /* 565 + * Format the inode group structure and report it somewhere. 566 + * 567 + * Similar to xfs_bulkstat_one_int, lastino is the inode cursor as we walk 568 + * through the filesystem so we move it forward unless there was a runtime 569 + * error. If the formatter tells us the buffer is now full we also move the 570 + * cursor forward and abort the walk. 571 + */ 572 + STATIC int 573 + xfs_inumbers_walk( 574 + struct xfs_mount *mp, 575 + struct xfs_trans *tp, 576 + xfs_agnumber_t agno, 577 + const struct xfs_inobt_rec_incore *irec, 578 + void *data) 579 + { 580 + struct xfs_inumbers inogrp = { 581 + .xi_startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino), 582 + .xi_alloccount = irec->ir_count - irec->ir_freecount, 583 + .xi_allocmask = ~irec->ir_free, 584 + .xi_version = XFS_INUMBERS_VERSION_V5, 585 + }; 586 + struct xfs_inumbers_chunk *ic = data; 587 + int error; 588 + 589 + error = ic->formatter(ic->breq, &inogrp); 590 + if (error && error != XFS_IBULK_ABORT) 591 + return error; 592 + 593 + ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) + 594 + XFS_INODES_PER_CHUNK; 595 + return error; 287 596 } 288 597 289 598 /* 290 599 * Return inode number table for the filesystem. 291 600 */ 292 - int /* error status */ 601 + int 293 602 xfs_inumbers( 294 - struct xfs_mount *mp,/* mount point for filesystem */ 295 - xfs_ino_t *lastino,/* last inode returned */ 296 - int *count,/* size of buffer/count returned */ 297 - void __user *ubuffer,/* buffer with inode descriptions */ 603 + struct xfs_ibulk *breq, 298 604 inumbers_fmt_pf formatter) 299 605 { 300 - xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, *lastino); 301 - xfs_agino_t agino = XFS_INO_TO_AGINO(mp, *lastino); 302 - struct xfs_btree_cur *cur = NULL; 303 - struct xfs_buf *agbp = NULL; 304 - struct xfs_inogrp *buffer; 305 - int bcount; 306 - int left = *count; 307 - int bufidx = 0; 606 + struct xfs_inumbers_chunk ic = { 607 + .formatter = formatter, 608 + .breq = breq, 609 + }; 308 610 int error = 0; 309 611 310 - *count = 0; 311 - if (agno >= mp->m_sb.sb_agcount || 312 - *lastino != XFS_AGINO_TO_INO(mp, agno, agino)) 313 - return error; 612 + if (xfs_bulkstat_already_done(breq->mp, breq->startino)) 613 + return 0; 314 614 315 - bcount = min(left, (int)(PAGE_SIZE / sizeof(*buffer))); 316 - buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP); 317 - do { 318 - struct xfs_inobt_rec_incore r; 319 - int stat; 615 + error = xfs_inobt_walk(breq->mp, NULL, breq->startino, breq->flags, 616 + xfs_inumbers_walk, breq->icount, &ic); 320 617 321 - if (!agbp) { 322 - error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 323 - if (error) 324 - break; 325 - 326 - cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, 327 - XFS_BTNUM_INO); 328 - error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, 329 - &stat); 330 - if (error) 331 - break; 332 - if (!stat) 333 - goto next_ag; 334 - } 335 - 336 - error = xfs_inobt_get_rec(cur, &r, &stat); 337 - if (error) 338 - break; 339 - if (!stat) 340 - goto next_ag; 341 - 342 - agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1; 343 - buffer[bufidx].xi_startino = 344 - XFS_AGINO_TO_INO(mp, agno, r.ir_startino); 345 - buffer[bufidx].xi_alloccount = r.ir_count - r.ir_freecount; 346 - buffer[bufidx].xi_allocmask = ~r.ir_free; 347 - if (++bufidx == bcount) { 348 - long written; 349 - 350 - error = formatter(ubuffer, buffer, bufidx, &written); 351 - if (error) 352 - break; 353 - ubuffer += written; 354 - *count += bufidx; 355 - bufidx = 0; 356 - } 357 - if (!--left) 358 - break; 359 - 360 - error = xfs_btree_increment(cur, 0, &stat); 361 - if (error) 362 - break; 363 - if (stat) 364 - continue; 365 - 366 - next_ag: 367 - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 368 - cur = NULL; 369 - xfs_buf_relse(agbp); 370 - agbp = NULL; 371 - agino = 0; 372 - agno++; 373 - } while (agno < mp->m_sb.sb_agcount); 374 - 375 - if (!error) { 376 - if (bufidx) { 377 - long written; 378 - 379 - error = formatter(ubuffer, buffer, bufidx, &written); 380 - if (!error) 381 - *count += bufidx; 382 - } 383 - *lastino = XFS_AGINO_TO_INO(mp, agno, agino); 384 - } 385 - 386 - kmem_free(buffer); 387 - if (cur) 388 - xfs_btree_del_cursor(cur, error); 389 - if (agbp) 390 - xfs_buf_relse(agbp); 618 + /* 619 + * We found some inode groups, so clear the error status and return 620 + * them. The lastino pointer will point directly at the inode that 621 + * triggered any error that occurred, so on the next call the error 622 + * will be triggered again and propagated to userspace as there will be 623 + * no formatted inode groups in the buffer. 624 + */ 625 + if (breq->ocount > 0) 626 + error = 0; 391 627 392 628 return error; 629 + } 630 + 631 + /* Convert an inumbers (v5) struct to a inogrp (v1) struct. */ 632 + void 633 + xfs_inumbers_to_inogrp( 634 + struct xfs_inogrp *ig1, 635 + const struct xfs_inumbers *ig) 636 + { 637 + ig1->xi_startino = ig->xi_startino; 638 + ig1->xi_alloccount = ig->xi_alloccount; 639 + ig1->xi_allocmask = ig->xi_allocmask; 393 640 }

+39 -67

fs/xfs/xfs_itable.h

··· 5 5 #ifndef __XFS_ITABLE_H__ 6 6 #define __XFS_ITABLE_H__ 7 7 8 - /* 9 - * xfs_bulkstat() is used to fill in xfs_bstat structures as well as dm_stat 10 - * structures (by the dmi library). This is a pointer to a formatter function 11 - * that will iget the inode and fill in the appropriate structure. 12 - * see xfs_bulkstat_one() and xfs_dm_bulkstat_one() in dmapi_xfs.c 13 - */ 14 - typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, 15 - xfs_ino_t ino, 16 - void __user *buffer, 17 - int ubsize, 18 - int *ubused, 19 - int *stat); 8 + /* In-memory representation of a userspace request for batch inode data. */ 9 + struct xfs_ibulk { 10 + struct xfs_mount *mp; 11 + void __user *ubuffer; /* user output buffer */ 12 + xfs_ino_t startino; /* start with this inode */ 13 + unsigned int icount; /* number of elements in ubuffer */ 14 + unsigned int ocount; /* number of records returned */ 15 + unsigned int flags; /* see XFS_IBULK_FLAG_* */ 16 + }; 17 + 18 + /* Only iterate within the same AG as startino */ 19 + #define XFS_IBULK_SAME_AG (XFS_IWALK_SAME_AG) 20 + 21 + /* Return value that means we want to abort the walk. */ 22 + #define XFS_IBULK_ABORT (XFS_IWALK_ABORT) 20 23 21 24 /* 22 - * Values for stat return value. 25 + * Advance the user buffer pointer by one record of the given size. If the 26 + * buffer is now full, return the appropriate error code. 23 27 */ 24 - #define BULKSTAT_RV_NOTHING 0 25 - #define BULKSTAT_RV_DIDONE 1 26 - #define BULKSTAT_RV_GIVEUP 2 28 + static inline int 29 + xfs_ibulk_advance( 30 + struct xfs_ibulk *breq, 31 + size_t bytes) 32 + { 33 + char __user *b = breq->ubuffer; 34 + 35 + breq->ubuffer = b + bytes; 36 + breq->ocount++; 37 + return breq->ocount == breq->icount ? XFS_IBULK_ABORT : 0; 38 + } 27 39 28 40 /* 29 41 * Return stat information in bulk (by-inode) for the filesystem. 30 42 */ 31 - int /* error status */ 32 - xfs_bulkstat( 33 - xfs_mount_t *mp, /* mount point for filesystem */ 34 - xfs_ino_t *lastino, /* last inode returned */ 35 - int *count, /* size of buffer/count returned */ 36 - bulkstat_one_pf formatter, /* func that'd fill a single buf */ 37 - size_t statstruct_size,/* sizeof struct that we're filling */ 38 - char __user *ubuffer,/* buffer with inode stats */ 39 - int *done); /* 1 if there are more stats to get */ 40 43 41 - typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ 42 - void __user *ubuffer, /* buffer to write to */ 43 - int ubsize, /* remaining user buffer sz */ 44 - int *ubused, /* bytes used by formatter */ 45 - const xfs_bstat_t *buffer); /* buffer to read from */ 44 + typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq, 45 + const struct xfs_bulkstat *bstat); 46 46 47 - int 48 - xfs_bulkstat_one_int( 49 - xfs_mount_t *mp, 50 - xfs_ino_t ino, 51 - void __user *buffer, 52 - int ubsize, 53 - bulkstat_one_fmt_pf formatter, 54 - int *ubused, 55 - int *stat); 47 + int xfs_bulkstat_one(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); 48 + int xfs_bulkstat(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); 49 + void xfs_bulkstat_to_bstat(struct xfs_mount *mp, struct xfs_bstat *bs1, 50 + const struct xfs_bulkstat *bstat); 56 51 57 - int 58 - xfs_bulkstat_one( 59 - xfs_mount_t *mp, 60 - xfs_ino_t ino, 61 - void __user *buffer, 62 - int ubsize, 63 - int *ubused, 64 - int *stat); 52 + typedef int (*inumbers_fmt_pf)(struct xfs_ibulk *breq, 53 + const struct xfs_inumbers *igrp); 65 54 66 - typedef int (*inumbers_fmt_pf)( 67 - void __user *ubuffer, /* buffer to write to */ 68 - const xfs_inogrp_t *buffer, /* buffer to read from */ 69 - long count, /* # of elements to read */ 70 - long *written); /* # of bytes written */ 71 - 72 - int 73 - xfs_inumbers_fmt( 74 - void __user *ubuffer, /* buffer to write to */ 75 - const xfs_inogrp_t *buffer, /* buffer to read from */ 76 - long count, /* # of elements to read */ 77 - long *written); /* # of bytes written */ 78 - 79 - int /* error status */ 80 - xfs_inumbers( 81 - xfs_mount_t *mp, /* mount point for filesystem */ 82 - xfs_ino_t *last, /* last inode returned */ 83 - int *count, /* size of buffer/count returned */ 84 - void __user *buffer, /* buffer with inode info */ 85 - inumbers_fmt_pf formatter); 55 + int xfs_inumbers(struct xfs_ibulk *breq, inumbers_fmt_pf formatter); 56 + void xfs_inumbers_to_inogrp(struct xfs_inogrp *ig1, 57 + const struct xfs_inumbers *ig); 86 58 87 59 #endif /* __XFS_ITABLE_H__ */

+720

fs/xfs/xfs_iwalk.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Copyright (C) 2019 Oracle. All Rights Reserved. 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + */ 6 + #include "xfs.h" 7 + #include "xfs_fs.h" 8 + #include "xfs_shared.h" 9 + #include "xfs_format.h" 10 + #include "xfs_log_format.h" 11 + #include "xfs_trans_resv.h" 12 + #include "xfs_mount.h" 13 + #include "xfs_inode.h" 14 + #include "xfs_btree.h" 15 + #include "xfs_ialloc.h" 16 + #include "xfs_ialloc_btree.h" 17 + #include "xfs_iwalk.h" 18 + #include "xfs_error.h" 19 + #include "xfs_trace.h" 20 + #include "xfs_icache.h" 21 + #include "xfs_health.h" 22 + #include "xfs_trans.h" 23 + #include "xfs_pwork.h" 24 + 25 + /* 26 + * Walking Inodes in the Filesystem 27 + * ================================ 28 + * 29 + * This iterator function walks a subset of filesystem inodes in increasing 30 + * order from @startino until there are no more inodes. For each allocated 31 + * inode it finds, it calls a walk function with the relevant inode number and 32 + * a pointer to caller-provided data. The walk function can return the usual 33 + * negative error code to stop the iteration; 0 to continue the iteration; or 34 + * XFS_IWALK_ABORT to stop the iteration. This return value is returned to the 35 + * caller. 36 + * 37 + * Internally, we allow the walk function to do anything, which means that we 38 + * cannot maintain the inobt cursor or our lock on the AGI buffer. We 39 + * therefore cache the inobt records in kernel memory and only call the walk 40 + * function when our memory buffer is full. @nr_recs is the number of records 41 + * that we've cached, and @sz_recs is the size of our cache. 42 + * 43 + * It is the responsibility of the walk function to ensure it accesses 44 + * allocated inodes, as the inobt records may be stale by the time they are 45 + * acted upon. 46 + */ 47 + 48 + struct xfs_iwalk_ag { 49 + /* parallel work control data; will be null if single threaded */ 50 + struct xfs_pwork pwork; 51 + 52 + struct xfs_mount *mp; 53 + struct xfs_trans *tp; 54 + 55 + /* Where do we start the traversal? */ 56 + xfs_ino_t startino; 57 + 58 + /* Array of inobt records we cache. */ 59 + struct xfs_inobt_rec_incore *recs; 60 + 61 + /* Number of entries allocated for the @recs array. */ 62 + unsigned int sz_recs; 63 + 64 + /* Number of entries in the @recs array that are in use. */ 65 + unsigned int nr_recs; 66 + 67 + /* Inode walk function and data pointer. */ 68 + xfs_iwalk_fn iwalk_fn; 69 + xfs_inobt_walk_fn inobt_walk_fn; 70 + void *data; 71 + 72 + /* 73 + * Make it look like the inodes up to startino are free so that 74 + * bulkstat can start its inode iteration at the correct place without 75 + * needing to special case everywhere. 76 + */ 77 + unsigned int trim_start:1; 78 + 79 + /* Skip empty inobt records? */ 80 + unsigned int skip_empty:1; 81 + }; 82 + 83 + /* 84 + * Loop over all clusters in a chunk for a given incore inode allocation btree 85 + * record. Do a readahead if there are any allocated inodes in that cluster. 86 + */ 87 + STATIC void 88 + xfs_iwalk_ichunk_ra( 89 + struct xfs_mount *mp, 90 + xfs_agnumber_t agno, 91 + struct xfs_inobt_rec_incore *irec) 92 + { 93 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 94 + xfs_agblock_t agbno; 95 + struct blk_plug plug; 96 + int i; /* inode chunk index */ 97 + 98 + agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino); 99 + 100 + blk_start_plug(&plug); 101 + for (i = 0; i < XFS_INODES_PER_CHUNK; i += igeo->inodes_per_cluster) { 102 + xfs_inofree_t imask; 103 + 104 + imask = xfs_inobt_maskn(i, igeo->inodes_per_cluster); 105 + if (imask & ~irec->ir_free) { 106 + xfs_btree_reada_bufs(mp, agno, agbno, 107 + igeo->blocks_per_cluster, 108 + &xfs_inode_buf_ops); 109 + } 110 + agbno += igeo->blocks_per_cluster; 111 + } 112 + blk_finish_plug(&plug); 113 + } 114 + 115 + /* 116 + * Set the bits in @irec's free mask that correspond to the inodes before 117 + * @agino so that we skip them. This is how we restart an inode walk that was 118 + * interrupted in the middle of an inode record. 119 + */ 120 + STATIC void 121 + xfs_iwalk_adjust_start( 122 + xfs_agino_t agino, /* starting inode of chunk */ 123 + struct xfs_inobt_rec_incore *irec) /* btree record */ 124 + { 125 + int idx; /* index into inode chunk */ 126 + int i; 127 + 128 + idx = agino - irec->ir_startino; 129 + 130 + /* 131 + * We got a right chunk with some left inodes allocated at it. Grab 132 + * the chunk record. Mark all the uninteresting inodes free because 133 + * they're before our start point. 134 + */ 135 + for (i = 0; i < idx; i++) { 136 + if (XFS_INOBT_MASK(i) & ~irec->ir_free) 137 + irec->ir_freecount++; 138 + } 139 + 140 + irec->ir_free |= xfs_inobt_maskn(0, idx); 141 + } 142 + 143 + /* Allocate memory for a walk. */ 144 + STATIC int 145 + xfs_iwalk_alloc( 146 + struct xfs_iwalk_ag *iwag) 147 + { 148 + size_t size; 149 + 150 + ASSERT(iwag->recs == NULL); 151 + iwag->nr_recs = 0; 152 + 153 + /* Allocate a prefetch buffer for inobt records. */ 154 + size = iwag->sz_recs * sizeof(struct xfs_inobt_rec_incore); 155 + iwag->recs = kmem_alloc(size, KM_MAYFAIL); 156 + if (iwag->recs == NULL) 157 + return -ENOMEM; 158 + 159 + return 0; 160 + } 161 + 162 + /* Free memory we allocated for a walk. */ 163 + STATIC void 164 + xfs_iwalk_free( 165 + struct xfs_iwalk_ag *iwag) 166 + { 167 + kmem_free(iwag->recs); 168 + iwag->recs = NULL; 169 + } 170 + 171 + /* For each inuse inode in each cached inobt record, call our function. */ 172 + STATIC int 173 + xfs_iwalk_ag_recs( 174 + struct xfs_iwalk_ag *iwag) 175 + { 176 + struct xfs_mount *mp = iwag->mp; 177 + struct xfs_trans *tp = iwag->tp; 178 + xfs_ino_t ino; 179 + unsigned int i, j; 180 + xfs_agnumber_t agno; 181 + int error; 182 + 183 + agno = XFS_INO_TO_AGNO(mp, iwag->startino); 184 + for (i = 0; i < iwag->nr_recs; i++) { 185 + struct xfs_inobt_rec_incore *irec = &iwag->recs[i]; 186 + 187 + trace_xfs_iwalk_ag_rec(mp, agno, irec); 188 + 189 + if (xfs_pwork_want_abort(&iwag->pwork)) 190 + return 0; 191 + 192 + if (iwag->inobt_walk_fn) { 193 + error = iwag->inobt_walk_fn(mp, tp, agno, irec, 194 + iwag->data); 195 + if (error) 196 + return error; 197 + } 198 + 199 + if (!iwag->iwalk_fn) 200 + continue; 201 + 202 + for (j = 0; j < XFS_INODES_PER_CHUNK; j++) { 203 + if (xfs_pwork_want_abort(&iwag->pwork)) 204 + return 0; 205 + 206 + /* Skip if this inode is free */ 207 + if (XFS_INOBT_MASK(j) & irec->ir_free) 208 + continue; 209 + 210 + /* Otherwise call our function. */ 211 + ino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino + j); 212 + error = iwag->iwalk_fn(mp, tp, ino, iwag->data); 213 + if (error) 214 + return error; 215 + } 216 + } 217 + 218 + return 0; 219 + } 220 + 221 + /* Delete cursor and let go of AGI. */ 222 + static inline void 223 + xfs_iwalk_del_inobt( 224 + struct xfs_trans *tp, 225 + struct xfs_btree_cur **curpp, 226 + struct xfs_buf **agi_bpp, 227 + int error) 228 + { 229 + if (*curpp) { 230 + xfs_btree_del_cursor(*curpp, error); 231 + *curpp = NULL; 232 + } 233 + if (*agi_bpp) { 234 + xfs_trans_brelse(tp, *agi_bpp); 235 + *agi_bpp = NULL; 236 + } 237 + } 238 + 239 + /* 240 + * Set ourselves up for walking inobt records starting from a given point in 241 + * the filesystem. 242 + * 243 + * If caller passed in a nonzero start inode number, load the record from the 244 + * inobt and make the record look like all the inodes before agino are free so 245 + * that we skip them, and then move the cursor to the next inobt record. This 246 + * is how we support starting an iwalk in the middle of an inode chunk. 247 + * 248 + * If the caller passed in a start number of zero, move the cursor to the first 249 + * inobt record. 250 + * 251 + * The caller is responsible for cleaning up the cursor and buffer pointer 252 + * regardless of the error status. 253 + */ 254 + STATIC int 255 + xfs_iwalk_ag_start( 256 + struct xfs_iwalk_ag *iwag, 257 + xfs_agnumber_t agno, 258 + xfs_agino_t agino, 259 + struct xfs_btree_cur **curpp, 260 + struct xfs_buf **agi_bpp, 261 + int *has_more) 262 + { 263 + struct xfs_mount *mp = iwag->mp; 264 + struct xfs_trans *tp = iwag->tp; 265 + struct xfs_inobt_rec_incore *irec; 266 + int error; 267 + 268 + /* Set up a fresh cursor and empty the inobt cache. */ 269 + iwag->nr_recs = 0; 270 + error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp); 271 + if (error) 272 + return error; 273 + 274 + /* Starting at the beginning of the AG? That's easy! */ 275 + if (agino == 0) 276 + return xfs_inobt_lookup(*curpp, 0, XFS_LOOKUP_GE, has_more); 277 + 278 + /* 279 + * Otherwise, we have to grab the inobt record where we left off, stuff 280 + * the record into our cache, and then see if there are more records. 281 + * We require a lookup cache of at least two elements so that the 282 + * caller doesn't have to deal with tearing down the cursor to walk the 283 + * records. 284 + */ 285 + error = xfs_inobt_lookup(*curpp, agino, XFS_LOOKUP_LE, has_more); 286 + if (error) 287 + return error; 288 + 289 + /* 290 + * If the LE lookup at @agino yields no records, jump ahead to the 291 + * inobt cursor increment to see if there are more records to process. 292 + */ 293 + if (!*has_more) 294 + goto out_advance; 295 + 296 + /* Get the record, should always work */ 297 + irec = &iwag->recs[iwag->nr_recs]; 298 + error = xfs_inobt_get_rec(*curpp, irec, has_more); 299 + if (error) 300 + return error; 301 + XFS_WANT_CORRUPTED_RETURN(mp, *has_more == 1); 302 + 303 + /* 304 + * If the LE lookup yielded an inobt record before the cursor position, 305 + * skip it and see if there's another one after it. 306 + */ 307 + if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) 308 + goto out_advance; 309 + 310 + /* 311 + * If agino fell in the middle of the inode record, make it look like 312 + * the inodes up to agino are free so that we don't return them again. 313 + */ 314 + if (iwag->trim_start) 315 + xfs_iwalk_adjust_start(agino, irec); 316 + 317 + /* 318 + * The prefetch calculation is supposed to give us a large enough inobt 319 + * record cache that grab_ichunk can stage a partial first record and 320 + * the loop body can cache a record without having to check for cache 321 + * space until after it reads an inobt record. 322 + */ 323 + iwag->nr_recs++; 324 + ASSERT(iwag->nr_recs < iwag->sz_recs); 325 + 326 + out_advance: 327 + return xfs_btree_increment(*curpp, 0, has_more); 328 + } 329 + 330 + /* 331 + * The inobt record cache is full, so preserve the inobt cursor state and 332 + * run callbacks on the cached inobt records. When we're done, restore the 333 + * cursor state to wherever the cursor would have been had the cache not been 334 + * full (and therefore we could've just incremented the cursor) if *@has_more 335 + * is true. On exit, *@has_more will indicate whether or not the caller should 336 + * try for more inode records. 337 + */ 338 + STATIC int 339 + xfs_iwalk_run_callbacks( 340 + struct xfs_iwalk_ag *iwag, 341 + xfs_agnumber_t agno, 342 + struct xfs_btree_cur **curpp, 343 + struct xfs_buf **agi_bpp, 344 + int *has_more) 345 + { 346 + struct xfs_mount *mp = iwag->mp; 347 + struct xfs_trans *tp = iwag->tp; 348 + struct xfs_inobt_rec_incore *irec; 349 + xfs_agino_t restart; 350 + int error; 351 + 352 + ASSERT(iwag->nr_recs > 0); 353 + 354 + /* Delete cursor but remember the last record we cached... */ 355 + xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0); 356 + irec = &iwag->recs[iwag->nr_recs - 1]; 357 + restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1; 358 + 359 + error = xfs_iwalk_ag_recs(iwag); 360 + if (error) 361 + return error; 362 + 363 + /* ...empty the cache... */ 364 + iwag->nr_recs = 0; 365 + 366 + if (!has_more) 367 + return 0; 368 + 369 + /* ...and recreate the cursor just past where we left off. */ 370 + error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp); 371 + if (error) 372 + return error; 373 + 374 + return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more); 375 + } 376 + 377 + /* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */ 378 + STATIC int 379 + xfs_iwalk_ag( 380 + struct xfs_iwalk_ag *iwag) 381 + { 382 + struct xfs_mount *mp = iwag->mp; 383 + struct xfs_trans *tp = iwag->tp; 384 + struct xfs_buf *agi_bp = NULL; 385 + struct xfs_btree_cur *cur = NULL; 386 + xfs_agnumber_t agno; 387 + xfs_agino_t agino; 388 + int has_more; 389 + int error = 0; 390 + 391 + /* Set up our cursor at the right place in the inode btree. */ 392 + agno = XFS_INO_TO_AGNO(mp, iwag->startino); 393 + agino = XFS_INO_TO_AGINO(mp, iwag->startino); 394 + error = xfs_iwalk_ag_start(iwag, agno, agino, &cur, &agi_bp, &has_more); 395 + 396 + while (!error && has_more) { 397 + struct xfs_inobt_rec_incore *irec; 398 + 399 + cond_resched(); 400 + if (xfs_pwork_want_abort(&iwag->pwork)) 401 + goto out; 402 + 403 + /* Fetch the inobt record. */ 404 + irec = &iwag->recs[iwag->nr_recs]; 405 + error = xfs_inobt_get_rec(cur, irec, &has_more); 406 + if (error || !has_more) 407 + break; 408 + 409 + /* No allocated inodes in this chunk; skip it. */ 410 + if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) { 411 + error = xfs_btree_increment(cur, 0, &has_more); 412 + if (error) 413 + break; 414 + continue; 415 + } 416 + 417 + /* 418 + * Start readahead for this inode chunk in anticipation of 419 + * walking the inodes. 420 + */ 421 + if (iwag->iwalk_fn) 422 + xfs_iwalk_ichunk_ra(mp, agno, irec); 423 + 424 + /* 425 + * If there's space in the buffer for more records, increment 426 + * the btree cursor and grab more. 427 + */ 428 + if (++iwag->nr_recs < iwag->sz_recs) { 429 + error = xfs_btree_increment(cur, 0, &has_more); 430 + if (error || !has_more) 431 + break; 432 + continue; 433 + } 434 + 435 + /* 436 + * Otherwise, we need to save cursor state and run the callback 437 + * function on the cached records. The run_callbacks function 438 + * is supposed to return a cursor pointing to the record where 439 + * we would be if we had been able to increment like above. 440 + */ 441 + ASSERT(has_more); 442 + error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp, 443 + &has_more); 444 + } 445 + 446 + if (iwag->nr_recs == 0 || error) 447 + goto out; 448 + 449 + /* Walk the unprocessed records in the cache. */ 450 + error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp, &has_more); 451 + 452 + out: 453 + xfs_iwalk_del_inobt(tp, &cur, &agi_bp, error); 454 + return error; 455 + } 456 + 457 + /* 458 + * We experimentally determined that the reduction in ioctl call overhead 459 + * diminishes when userspace asks for more than 2048 inodes, so we'll cap 460 + * prefetch at this point. 461 + */ 462 + #define IWALK_MAX_INODE_PREFETCH (2048U) 463 + 464 + /* 465 + * Given the number of inodes to prefetch, set the number of inobt records that 466 + * we cache in memory, which controls the number of inodes we try to read 467 + * ahead. Set the maximum if @inodes == 0. 468 + */ 469 + static inline unsigned int 470 + xfs_iwalk_prefetch( 471 + unsigned int inodes) 472 + { 473 + unsigned int inobt_records; 474 + 475 + /* 476 + * If the caller didn't tell us the number of inodes they wanted, 477 + * assume the maximum prefetch possible for best performance. 478 + * Otherwise, cap prefetch at that maximum so that we don't start an 479 + * absurd amount of prefetch. 480 + */ 481 + if (inodes == 0) 482 + inodes = IWALK_MAX_INODE_PREFETCH; 483 + inodes = min(inodes, IWALK_MAX_INODE_PREFETCH); 484 + 485 + /* Round the inode count up to a full chunk. */ 486 + inodes = round_up(inodes, XFS_INODES_PER_CHUNK); 487 + 488 + /* 489 + * In order to convert the number of inodes to prefetch into an 490 + * estimate of the number of inobt records to cache, we require a 491 + * conversion factor that reflects our expectations of the average 492 + * loading factor of an inode chunk. Based on data gathered, most 493 + * (but not all) filesystems manage to keep the inode chunks totally 494 + * full, so we'll underestimate slightly so that our readahead will 495 + * still deliver the performance we want on aging filesystems: 496 + * 497 + * inobt = inodes / (INODES_PER_CHUNK * (4 / 5)); 498 + * 499 + * The funny math is to avoid integer division. 500 + */ 501 + inobt_records = (inodes * 5) / (4 * XFS_INODES_PER_CHUNK); 502 + 503 + /* 504 + * Allocate enough space to prefetch at least two inobt records so that 505 + * we can cache both the record where the iwalk started and the next 506 + * record. This simplifies the AG inode walk loop setup code. 507 + */ 508 + return max(inobt_records, 2U); 509 + } 510 + 511 + /* 512 + * Walk all inodes in the filesystem starting from @startino. The @iwalk_fn 513 + * will be called for each allocated inode, being passed the inode's number and 514 + * @data. @max_prefetch controls how many inobt records' worth of inodes we 515 + * try to readahead. 516 + */ 517 + int 518 + xfs_iwalk( 519 + struct xfs_mount *mp, 520 + struct xfs_trans *tp, 521 + xfs_ino_t startino, 522 + unsigned int flags, 523 + xfs_iwalk_fn iwalk_fn, 524 + unsigned int inode_records, 525 + void *data) 526 + { 527 + struct xfs_iwalk_ag iwag = { 528 + .mp = mp, 529 + .tp = tp, 530 + .iwalk_fn = iwalk_fn, 531 + .data = data, 532 + .startino = startino, 533 + .sz_recs = xfs_iwalk_prefetch(inode_records), 534 + .trim_start = 1, 535 + .skip_empty = 1, 536 + .pwork = XFS_PWORK_SINGLE_THREADED, 537 + }; 538 + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); 539 + int error; 540 + 541 + ASSERT(agno < mp->m_sb.sb_agcount); 542 + ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL)); 543 + 544 + error = xfs_iwalk_alloc(&iwag); 545 + if (error) 546 + return error; 547 + 548 + for (; agno < mp->m_sb.sb_agcount; agno++) { 549 + error = xfs_iwalk_ag(&iwag); 550 + if (error) 551 + break; 552 + iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); 553 + if (flags & XFS_INOBT_WALK_SAME_AG) 554 + break; 555 + } 556 + 557 + xfs_iwalk_free(&iwag); 558 + return error; 559 + } 560 + 561 + /* Run per-thread iwalk work. */ 562 + static int 563 + xfs_iwalk_ag_work( 564 + struct xfs_mount *mp, 565 + struct xfs_pwork *pwork) 566 + { 567 + struct xfs_iwalk_ag *iwag; 568 + int error = 0; 569 + 570 + iwag = container_of(pwork, struct xfs_iwalk_ag, pwork); 571 + if (xfs_pwork_want_abort(pwork)) 572 + goto out; 573 + 574 + error = xfs_iwalk_alloc(iwag); 575 + if (error) 576 + goto out; 577 + 578 + error = xfs_iwalk_ag(iwag); 579 + xfs_iwalk_free(iwag); 580 + out: 581 + kmem_free(iwag); 582 + return error; 583 + } 584 + 585 + /* 586 + * Walk all the inodes in the filesystem using multiple threads to process each 587 + * AG. 588 + */ 589 + int 590 + xfs_iwalk_threaded( 591 + struct xfs_mount *mp, 592 + xfs_ino_t startino, 593 + unsigned int flags, 594 + xfs_iwalk_fn iwalk_fn, 595 + unsigned int inode_records, 596 + bool polled, 597 + void *data) 598 + { 599 + struct xfs_pwork_ctl pctl; 600 + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); 601 + unsigned int nr_threads; 602 + int error; 603 + 604 + ASSERT(agno < mp->m_sb.sb_agcount); 605 + ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL)); 606 + 607 + nr_threads = xfs_pwork_guess_datadev_parallelism(mp); 608 + error = xfs_pwork_init(mp, &pctl, xfs_iwalk_ag_work, "xfs_iwalk", 609 + nr_threads); 610 + if (error) 611 + return error; 612 + 613 + for (; agno < mp->m_sb.sb_agcount; agno++) { 614 + struct xfs_iwalk_ag *iwag; 615 + 616 + if (xfs_pwork_ctl_want_abort(&pctl)) 617 + break; 618 + 619 + iwag = kmem_zalloc(sizeof(struct xfs_iwalk_ag), KM_SLEEP); 620 + iwag->mp = mp; 621 + iwag->iwalk_fn = iwalk_fn; 622 + iwag->data = data; 623 + iwag->startino = startino; 624 + iwag->sz_recs = xfs_iwalk_prefetch(inode_records); 625 + xfs_pwork_queue(&pctl, &iwag->pwork); 626 + startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); 627 + if (flags & XFS_INOBT_WALK_SAME_AG) 628 + break; 629 + } 630 + 631 + if (polled) 632 + xfs_pwork_poll(&pctl); 633 + return xfs_pwork_destroy(&pctl); 634 + } 635 + 636 + /* 637 + * Allow callers to cache up to a page's worth of inobt records. This reflects 638 + * the existing inumbers prefetching behavior. Since the inobt walk does not 639 + * itself do anything with the inobt records, we can set a fairly high limit 640 + * here. 641 + */ 642 + #define MAX_INOBT_WALK_PREFETCH \ 643 + (PAGE_SIZE / sizeof(struct xfs_inobt_rec_incore)) 644 + 645 + /* 646 + * Given the number of records that the user wanted, set the number of inobt 647 + * records that we buffer in memory. Set the maximum if @inobt_records == 0. 648 + */ 649 + static inline unsigned int 650 + xfs_inobt_walk_prefetch( 651 + unsigned int inobt_records) 652 + { 653 + /* 654 + * If the caller didn't tell us the number of inobt records they 655 + * wanted, assume the maximum prefetch possible for best performance. 656 + */ 657 + if (inobt_records == 0) 658 + inobt_records = MAX_INOBT_WALK_PREFETCH; 659 + 660 + /* 661 + * Allocate enough space to prefetch at least two inobt records so that 662 + * we can cache both the record where the iwalk started and the next 663 + * record. This simplifies the AG inode walk loop setup code. 664 + */ 665 + inobt_records = max(inobt_records, 2U); 666 + 667 + /* 668 + * Cap prefetch at that maximum so that we don't use an absurd amount 669 + * of memory. 670 + */ 671 + return min_t(unsigned int, inobt_records, MAX_INOBT_WALK_PREFETCH); 672 + } 673 + 674 + /* 675 + * Walk all inode btree records in the filesystem starting from @startino. The 676 + * @inobt_walk_fn will be called for each btree record, being passed the incore 677 + * record and @data. @max_prefetch controls how many inobt records we try to 678 + * cache ahead of time. 679 + */ 680 + int 681 + xfs_inobt_walk( 682 + struct xfs_mount *mp, 683 + struct xfs_trans *tp, 684 + xfs_ino_t startino, 685 + unsigned int flags, 686 + xfs_inobt_walk_fn inobt_walk_fn, 687 + unsigned int inobt_records, 688 + void *data) 689 + { 690 + struct xfs_iwalk_ag iwag = { 691 + .mp = mp, 692 + .tp = tp, 693 + .inobt_walk_fn = inobt_walk_fn, 694 + .data = data, 695 + .startino = startino, 696 + .sz_recs = xfs_inobt_walk_prefetch(inobt_records), 697 + .pwork = XFS_PWORK_SINGLE_THREADED, 698 + }; 699 + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); 700 + int error; 701 + 702 + ASSERT(agno < mp->m_sb.sb_agcount); 703 + ASSERT(!(flags & ~XFS_INOBT_WALK_FLAGS_ALL)); 704 + 705 + error = xfs_iwalk_alloc(&iwag); 706 + if (error) 707 + return error; 708 + 709 + for (; agno < mp->m_sb.sb_agcount; agno++) { 710 + error = xfs_iwalk_ag(&iwag); 711 + if (error) 712 + break; 713 + iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); 714 + if (flags & XFS_INOBT_WALK_SAME_AG) 715 + break; 716 + } 717 + 718 + xfs_iwalk_free(&iwag); 719 + return error; 720 + }

+46

fs/xfs/xfs_iwalk.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * Copyright (C) 2019 Oracle. All Rights Reserved. 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + */ 6 + #ifndef __XFS_IWALK_H__ 7 + #define __XFS_IWALK_H__ 8 + 9 + /* Walk all inodes in the filesystem starting from @startino. */ 10 + typedef int (*xfs_iwalk_fn)(struct xfs_mount *mp, struct xfs_trans *tp, 11 + xfs_ino_t ino, void *data); 12 + /* Return values for xfs_iwalk_fn. */ 13 + #define XFS_IWALK_CONTINUE (XFS_ITER_CONTINUE) 14 + #define XFS_IWALK_ABORT (XFS_ITER_ABORT) 15 + 16 + int xfs_iwalk(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t startino, 17 + unsigned int flags, xfs_iwalk_fn iwalk_fn, 18 + unsigned int inode_records, void *data); 19 + int xfs_iwalk_threaded(struct xfs_mount *mp, xfs_ino_t startino, 20 + unsigned int flags, xfs_iwalk_fn iwalk_fn, 21 + unsigned int inode_records, bool poll, void *data); 22 + 23 + /* Only iterate inodes within the same AG as @startino. */ 24 + #define XFS_IWALK_SAME_AG (0x1) 25 + 26 + #define XFS_IWALK_FLAGS_ALL (XFS_IWALK_SAME_AG) 27 + 28 + /* Walk all inode btree records in the filesystem starting from @startino. */ 29 + typedef int (*xfs_inobt_walk_fn)(struct xfs_mount *mp, struct xfs_trans *tp, 30 + xfs_agnumber_t agno, 31 + const struct xfs_inobt_rec_incore *irec, 32 + void *data); 33 + /* Return value (for xfs_inobt_walk_fn) that aborts the walk immediately. */ 34 + #define XFS_INOBT_WALK_ABORT (XFS_IWALK_ABORT) 35 + 36 + int xfs_inobt_walk(struct xfs_mount *mp, struct xfs_trans *tp, 37 + xfs_ino_t startino, unsigned int flags, 38 + xfs_inobt_walk_fn inobt_walk_fn, unsigned int inobt_records, 39 + void *data); 40 + 41 + /* Only iterate inobt records within the same AG as @startino. */ 42 + #define XFS_INOBT_WALK_SAME_AG (XFS_IWALK_SAME_AG) 43 + 44 + #define XFS_INOBT_WALK_FLAGS_ALL (XFS_INOBT_WALK_SAME_AG) 45 + 46 + #endif /* __XFS_IWALK_H__ */

+3 -2

fs/xfs/xfs_linux.h

··· 110 110 #define current_restore_flags_nested(sp, f) \ 111 111 (current->flags = ((current->flags & ~(f)) | (*(sp) & (f)))) 112 112 113 - #define spinlock_destroy(lock) 114 - 115 113 #define NBBY 8 /* number of bits per byte */ 116 114 117 115 /* ··· 218 220 do_div(x, y); 219 221 return x; 220 222 } 223 + 224 + int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count, 225 + char *data, unsigned int op); 221 226 222 227 #define ASSERT_ALWAYS(expr) \ 223 228 (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))

+266 -376

fs/xfs/xfs_log.c

··· 16 16 #include "xfs_trans_priv.h" 17 17 #include "xfs_log.h" 18 18 #include "xfs_log_priv.h" 19 - #include "xfs_log_recover.h" 20 - #include "xfs_inode.h" 21 19 #include "xfs_trace.h" 22 - #include "xfs_fsops.h" 23 - #include "xfs_cksum.h" 24 20 #include "xfs_sysfs.h" 25 21 #include "xfs_sb.h" 26 22 #include "xfs_health.h" ··· 41 45 xlog_space_left( 42 46 struct xlog *log, 43 47 atomic64_t *head); 44 - STATIC int 45 - xlog_sync( 46 - struct xlog *log, 47 - struct xlog_in_core *iclog); 48 48 STATIC void 49 49 xlog_dealloc_log( 50 50 struct xlog *log); 51 51 52 52 /* local state machine functions */ 53 - STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); 54 - STATIC void 55 - xlog_state_do_callback( 56 - struct xlog *log, 57 - int aborted, 58 - struct xlog_in_core *iclog); 53 + STATIC void xlog_state_done_syncing( 54 + struct xlog_in_core *iclog, 55 + bool aborted); 59 56 STATIC int 60 57 xlog_state_get_iclog_space( 61 58 struct xlog *log, ··· 96 107 xlog_verify_iclog( 97 108 struct xlog *log, 98 109 struct xlog_in_core *iclog, 99 - int count, 100 - bool syncing); 110 + int count); 101 111 STATIC void 102 112 xlog_verify_tail_lsn( 103 113 struct xlog *log, ··· 105 117 #else 106 118 #define xlog_verify_dest_ptr(a,b) 107 119 #define xlog_verify_grant_tail(a) 108 - #define xlog_verify_iclog(a,b,c,d) 120 + #define xlog_verify_iclog(a,b,c) 109 121 #define xlog_verify_tail_lsn(a,b,c) 110 122 #endif 111 123 ··· 529 541 return lsn; 530 542 } 531 543 532 - /* 533 - * Attaches a new iclog I/O completion callback routine during 534 - * transaction commit. If the log is in error state, a non-zero 535 - * return code is handed back and the caller is responsible for 536 - * executing the callback at an appropriate time. 537 - */ 538 - int 539 - xfs_log_notify( 540 - struct xlog_in_core *iclog, 541 - xfs_log_callback_t *cb) 542 - { 543 - int abortflg; 544 - 545 - spin_lock(&iclog->ic_callback_lock); 546 - abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); 547 - if (!abortflg) { 548 - ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || 549 - (iclog->ic_state == XLOG_STATE_WANT_SYNC)); 550 - cb->cb_next = NULL; 551 - *(iclog->ic_callback_tail) = cb; 552 - iclog->ic_callback_tail = &(cb->cb_next); 553 - } 554 - spin_unlock(&iclog->ic_callback_lock); 555 - return abortflg; 556 - } 557 - 558 544 int 559 545 xfs_log_release_iclog( 560 546 struct xfs_mount *mp, ··· 769 807 * The mount has failed. Cancel the recovery if it hasn't completed and destroy 770 808 * the log. 771 809 */ 772 - int 810 + void 773 811 xfs_log_mount_cancel( 774 812 struct xfs_mount *mp) 775 813 { 776 - int error; 777 - 778 - error = xlog_recover_cancel(mp->m_log); 814 + xlog_recover_cancel(mp->m_log); 779 815 xfs_log_unmount(mp); 780 - 781 - return error; 782 816 } 783 817 784 818 /* ··· 890 932 * Or, if we are doing a forced umount (typically because of IO errors). 891 933 */ 892 934 if (mp->m_flags & XFS_MOUNT_NORECOVERY || 893 - xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { 935 + xfs_readonly_buftarg(log->l_targ)) { 894 936 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); 895 937 return 0; 896 938 } ··· 1202 1244 } 1203 1245 1204 1246 1205 - /* 1206 - * Log function which is called when an io completes. 1207 - * 1208 - * The log manager needs its own routine, in order to control what 1209 - * happens with the buffer after the write completes. 1210 - */ 1211 1247 static void 1212 - xlog_iodone(xfs_buf_t *bp) 1248 + xlog_ioend_work( 1249 + struct work_struct *work) 1213 1250 { 1214 - struct xlog_in_core *iclog = bp->b_log_item; 1215 - struct xlog *l = iclog->ic_log; 1216 - int aborted = 0; 1251 + struct xlog_in_core *iclog = 1252 + container_of(work, struct xlog_in_core, ic_end_io_work); 1253 + struct xlog *log = iclog->ic_log; 1254 + bool aborted = false; 1255 + int error; 1256 + 1257 + error = blk_status_to_errno(iclog->ic_bio.bi_status); 1258 + #ifdef DEBUG 1259 + /* treat writes with injected CRC errors as failed */ 1260 + if (iclog->ic_fail_crc) 1261 + error = -EIO; 1262 + #endif 1217 1263 1218 1264 /* 1219 - * Race to shutdown the filesystem if we see an error or the iclog is in 1220 - * IOABORT state. The IOABORT state is only set in DEBUG mode to inject 1221 - * CRC errors into log recovery. 1265 + * Race to shutdown the filesystem if we see an error. 1222 1266 */ 1223 - if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR) || 1224 - iclog->ic_state & XLOG_STATE_IOABORT) { 1225 - if (iclog->ic_state & XLOG_STATE_IOABORT) 1226 - iclog->ic_state &= ~XLOG_STATE_IOABORT; 1227 - 1228 - xfs_buf_ioerror_alert(bp, __func__); 1229 - xfs_buf_stale(bp); 1230 - xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR); 1267 + if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) { 1268 + xfs_alert(log->l_mp, "log I/O error %d", error); 1269 + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); 1231 1270 /* 1232 1271 * This flag will be propagated to the trans-committed 1233 1272 * callback routines to let them know that the log-commit 1234 1273 * didn't succeed. 1235 1274 */ 1236 - aborted = XFS_LI_ABORTED; 1275 + aborted = true; 1237 1276 } else if (iclog->ic_state & XLOG_STATE_IOERROR) { 1238 - aborted = XFS_LI_ABORTED; 1277 + aborted = true; 1239 1278 } 1240 1279 1241 - /* log I/O is always issued ASYNC */ 1242 - ASSERT(bp->b_flags & XBF_ASYNC); 1243 1280 xlog_state_done_syncing(iclog, aborted); 1281 + bio_uninit(&iclog->ic_bio); 1244 1282 1245 1283 /* 1246 - * drop the buffer lock now that we are done. Nothing references 1247 - * the buffer after this, so an unmount waiting on this lock can now 1248 - * tear it down safely. As such, it is unsafe to reference the buffer 1249 - * (bp) after the unlock as we could race with it being freed. 1284 + * Drop the lock to signal that we are done. Nothing references the 1285 + * iclog after this, so an unmount waiting on this lock can now tear it 1286 + * down safely. As such, it is unsafe to reference the iclog after the 1287 + * unlock as we could race with it being freed. 1250 1288 */ 1251 - xfs_buf_unlock(bp); 1289 + up(&iclog->ic_sema); 1252 1290 } 1253 1291 1254 1292 /* ··· 1255 1301 * If the filesystem blocksize is too large, we may need to choose a 1256 1302 * larger size since the directory code currently logs entire blocks. 1257 1303 */ 1258 - 1259 1304 STATIC void 1260 1305 xlog_get_iclog_buffer_size( 1261 1306 struct xfs_mount *mp, 1262 1307 struct xlog *log) 1263 1308 { 1264 - int size; 1265 - int xhdrs; 1266 - 1267 1309 if (mp->m_logbufs <= 0) 1268 - log->l_iclog_bufs = XLOG_MAX_ICLOGS; 1269 - else 1270 - log->l_iclog_bufs = mp->m_logbufs; 1310 + mp->m_logbufs = XLOG_MAX_ICLOGS; 1311 + if (mp->m_logbsize <= 0) 1312 + mp->m_logbsize = XLOG_BIG_RECORD_BSIZE; 1313 + 1314 + log->l_iclog_bufs = mp->m_logbufs; 1315 + log->l_iclog_size = mp->m_logbsize; 1271 1316 1272 1317 /* 1273 - * Buffer size passed in from mount system call. 1318 + * # headers = size / 32k - one header holds cycles from 32k of data. 1274 1319 */ 1275 - if (mp->m_logbsize > 0) { 1276 - size = log->l_iclog_size = mp->m_logbsize; 1277 - log->l_iclog_size_log = 0; 1278 - while (size != 1) { 1279 - log->l_iclog_size_log++; 1280 - size >>= 1; 1281 - } 1282 - 1283 - if (xfs_sb_version_haslogv2(&mp->m_sb)) { 1284 - /* # headers = size / 32k 1285 - * one header holds cycles from 32k of data 1286 - */ 1287 - 1288 - xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE; 1289 - if (mp->m_logbsize % XLOG_HEADER_CYCLE_SIZE) 1290 - xhdrs++; 1291 - log->l_iclog_hsize = xhdrs << BBSHIFT; 1292 - log->l_iclog_heads = xhdrs; 1293 - } else { 1294 - ASSERT(mp->m_logbsize <= XLOG_BIG_RECORD_BSIZE); 1295 - log->l_iclog_hsize = BBSIZE; 1296 - log->l_iclog_heads = 1; 1297 - } 1298 - goto done; 1299 - } 1300 - 1301 - /* All machines use 32kB buffers by default. */ 1302 - log->l_iclog_size = XLOG_BIG_RECORD_BSIZE; 1303 - log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT; 1304 - 1305 - /* the default log size is 16k or 32k which is one header sector */ 1306 - log->l_iclog_hsize = BBSIZE; 1307 - log->l_iclog_heads = 1; 1308 - 1309 - done: 1310 - /* are we being asked to make the sizes selected above visible? */ 1311 - if (mp->m_logbufs == 0) 1312 - mp->m_logbufs = log->l_iclog_bufs; 1313 - if (mp->m_logbsize == 0) 1314 - mp->m_logbsize = log->l_iclog_size; 1315 - } /* xlog_get_iclog_buffer_size */ 1316 - 1320 + log->l_iclog_heads = 1321 + DIV_ROUND_UP(mp->m_logbsize, XLOG_HEADER_CYCLE_SIZE); 1322 + log->l_iclog_hsize = log->l_iclog_heads << BBSHIFT; 1323 + } 1317 1324 1318 1325 void 1319 1326 xfs_log_work_queue( ··· 1337 1422 xlog_rec_header_t *head; 1338 1423 xlog_in_core_t **iclogp; 1339 1424 xlog_in_core_t *iclog, *prev_iclog=NULL; 1340 - xfs_buf_t *bp; 1341 1425 int i; 1342 1426 int error = -ENOMEM; 1343 1427 uint log2_size = 0; ··· 1394 1480 1395 1481 xlog_get_iclog_buffer_size(mp, log); 1396 1482 1397 - /* 1398 - * Use a NULL block for the extra log buffer used during splits so that 1399 - * it will trigger errors if we ever try to do IO on it without first 1400 - * having set it up properly. 1401 - */ 1402 - error = -ENOMEM; 1403 - bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL, 1404 - BTOBB(log->l_iclog_size), XBF_NO_IOACCT); 1405 - if (!bp) 1406 - goto out_free_log; 1407 - 1408 - /* 1409 - * The iclogbuf buffer locks are held over IO but we are not going to do 1410 - * IO yet. Hence unlock the buffer so that the log IO path can grab it 1411 - * when appropriately. 1412 - */ 1413 - ASSERT(xfs_buf_islocked(bp)); 1414 - xfs_buf_unlock(bp); 1415 - 1416 - /* use high priority wq for log I/O completion */ 1417 - bp->b_ioend_wq = mp->m_log_workqueue; 1418 - bp->b_iodone = xlog_iodone; 1419 - log->l_xbuf = bp; 1420 - 1421 1483 spin_lock_init(&log->l_icloglock); 1422 1484 init_waitqueue_head(&log->l_flush_wait); 1423 1485 ··· 1406 1516 * xlog_in_core_t in xfs_log_priv.h for details. 1407 1517 */ 1408 1518 ASSERT(log->l_iclog_size >= 4096); 1409 - for (i=0; i < log->l_iclog_bufs; i++) { 1410 - *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL); 1411 - if (!*iclogp) 1519 + for (i = 0; i < log->l_iclog_bufs; i++) { 1520 + size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) * 1521 + sizeof(struct bio_vec); 1522 + 1523 + iclog = kmem_zalloc(sizeof(*iclog) + bvec_size, KM_MAYFAIL); 1524 + if (!iclog) 1412 1525 goto out_free_iclog; 1413 1526 1414 - iclog = *iclogp; 1527 + *iclogp = iclog; 1415 1528 iclog->ic_prev = prev_iclog; 1416 1529 prev_iclog = iclog; 1417 1530 1418 - bp = xfs_buf_get_uncached(mp->m_logdev_targp, 1419 - BTOBB(log->l_iclog_size), 1420 - XBF_NO_IOACCT); 1421 - if (!bp) 1531 + iclog->ic_data = kmem_alloc_large(log->l_iclog_size, 1532 + KM_MAYFAIL); 1533 + if (!iclog->ic_data) 1422 1534 goto out_free_iclog; 1423 - 1424 - ASSERT(xfs_buf_islocked(bp)); 1425 - xfs_buf_unlock(bp); 1426 - 1427 - /* use high priority wq for log I/O completion */ 1428 - bp->b_ioend_wq = mp->m_log_workqueue; 1429 - bp->b_iodone = xlog_iodone; 1430 - iclog->ic_bp = bp; 1431 - iclog->ic_data = bp->b_addr; 1432 1535 #ifdef DEBUG 1433 1536 log->l_iclog_bak[i] = &iclog->ic_header; 1434 1537 #endif ··· 1435 1552 head->h_fmt = cpu_to_be32(XLOG_FMT); 1436 1553 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); 1437 1554 1438 - iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize; 1555 + iclog->ic_size = log->l_iclog_size - log->l_iclog_hsize; 1439 1556 iclog->ic_state = XLOG_STATE_ACTIVE; 1440 1557 iclog->ic_log = log; 1441 1558 atomic_set(&iclog->ic_refcnt, 0); 1442 1559 spin_lock_init(&iclog->ic_callback_lock); 1443 - iclog->ic_callback_tail = &(iclog->ic_callback); 1560 + INIT_LIST_HEAD(&iclog->ic_callbacks); 1444 1561 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; 1445 1562 1446 1563 init_waitqueue_head(&iclog->ic_force_wait); 1447 1564 init_waitqueue_head(&iclog->ic_write_wait); 1565 + INIT_WORK(&iclog->ic_end_io_work, xlog_ioend_work); 1566 + sema_init(&iclog->ic_sema, 1); 1448 1567 1449 1568 iclogp = &iclog->ic_next; 1450 1569 } 1451 1570 *iclogp = log->l_iclog; /* complete ring */ 1452 1571 log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ 1453 1572 1573 + log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s", 1574 + WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI, 0, 1575 + mp->m_fsname); 1576 + if (!log->l_ioend_workqueue) 1577 + goto out_free_iclog; 1578 + 1454 1579 error = xlog_cil_init(log); 1455 1580 if (error) 1456 - goto out_free_iclog; 1581 + goto out_destroy_workqueue; 1457 1582 return log; 1458 1583 1584 + out_destroy_workqueue: 1585 + destroy_workqueue(log->l_ioend_workqueue); 1459 1586 out_free_iclog: 1460 1587 for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { 1461 1588 prev_iclog = iclog->ic_next; 1462 - if (iclog->ic_bp) 1463 - xfs_buf_free(iclog->ic_bp); 1589 + kmem_free(iclog->ic_data); 1464 1590 kmem_free(iclog); 1465 1591 } 1466 - spinlock_destroy(&log->l_icloglock); 1467 - xfs_buf_free(log->l_xbuf); 1468 1592 out_free_log: 1469 1593 kmem_free(log); 1470 1594 out: ··· 1656 1766 return xfs_end_cksum(crc); 1657 1767 } 1658 1768 1659 - /* 1660 - * The bdstrat callback function for log bufs. This gives us a central 1661 - * place to trap bufs in case we get hit by a log I/O error and need to 1662 - * shutdown. Actually, in practice, even when we didn't get a log error, 1663 - * we transition the iclogs to IOERROR state *after* flushing all existing 1664 - * iclogs to disk. This is because we don't want anymore new transactions to be 1665 - * started or completed afterwards. 1666 - * 1667 - * We lock the iclogbufs here so that we can serialise against IO completion 1668 - * during unmount. We might be processing a shutdown triggered during unmount, 1669 - * and that can occur asynchronously to the unmount thread, and hence we need to 1670 - * ensure that completes before tearing down the iclogbufs. Hence we need to 1671 - * hold the buffer lock across the log IO to acheive that. 1672 - */ 1673 - STATIC int 1674 - xlog_bdstrat( 1675 - struct xfs_buf *bp) 1769 + static void 1770 + xlog_bio_end_io( 1771 + struct bio *bio) 1676 1772 { 1677 - struct xlog_in_core *iclog = bp->b_log_item; 1773 + struct xlog_in_core *iclog = bio->bi_private; 1678 1774 1679 - xfs_buf_lock(bp); 1680 - if (iclog->ic_state & XLOG_STATE_IOERROR) { 1681 - xfs_buf_ioerror(bp, -EIO); 1682 - xfs_buf_stale(bp); 1683 - xfs_buf_ioend(bp); 1775 + queue_work(iclog->ic_log->l_ioend_workqueue, 1776 + &iclog->ic_end_io_work); 1777 + } 1778 + 1779 + static void 1780 + xlog_map_iclog_data( 1781 + struct bio *bio, 1782 + void *data, 1783 + size_t count) 1784 + { 1785 + do { 1786 + struct page *page = kmem_to_page(data); 1787 + unsigned int off = offset_in_page(data); 1788 + size_t len = min_t(size_t, count, PAGE_SIZE - off); 1789 + 1790 + WARN_ON_ONCE(bio_add_page(bio, page, len, off) != len); 1791 + 1792 + data += len; 1793 + count -= len; 1794 + } while (count); 1795 + } 1796 + 1797 + STATIC void 1798 + xlog_write_iclog( 1799 + struct xlog *log, 1800 + struct xlog_in_core *iclog, 1801 + uint64_t bno, 1802 + unsigned int count, 1803 + bool need_flush) 1804 + { 1805 + ASSERT(bno < log->l_logBBsize); 1806 + 1807 + /* 1808 + * We lock the iclogbufs here so that we can serialise against I/O 1809 + * completion during unmount. We might be processing a shutdown 1810 + * triggered during unmount, and that can occur asynchronously to the 1811 + * unmount thread, and hence we need to ensure that completes before 1812 + * tearing down the iclogbufs. Hence we need to hold the buffer lock 1813 + * across the log IO to archieve that. 1814 + */ 1815 + down(&iclog->ic_sema); 1816 + if (unlikely(iclog->ic_state & XLOG_STATE_IOERROR)) { 1684 1817 /* 1685 1818 * It would seem logical to return EIO here, but we rely on 1686 1819 * the log state machine to propagate I/O errors instead of 1687 - * doing it here. Similarly, IO completion will unlock the 1688 - * buffer, so we don't do it here. 1820 + * doing it here. We kick of the state machine and unlock 1821 + * the buffer manually, the code needs to be kept in sync 1822 + * with the I/O completion path. 1689 1823 */ 1690 - return 0; 1824 + xlog_state_done_syncing(iclog, XFS_LI_ABORTED); 1825 + up(&iclog->ic_sema); 1826 + return; 1691 1827 } 1692 1828 1693 - xfs_buf_submit(bp); 1694 - return 0; 1829 + iclog->ic_io_size = count; 1830 + 1831 + bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE)); 1832 + bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev); 1833 + iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno; 1834 + iclog->ic_bio.bi_end_io = xlog_bio_end_io; 1835 + iclog->ic_bio.bi_private = iclog; 1836 + iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_FUA; 1837 + if (need_flush) 1838 + iclog->ic_bio.bi_opf |= REQ_PREFLUSH; 1839 + 1840 + xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, iclog->ic_io_size); 1841 + if (is_vmalloc_addr(iclog->ic_data)) 1842 + flush_kernel_vmap_range(iclog->ic_data, iclog->ic_io_size); 1843 + 1844 + /* 1845 + * If this log buffer would straddle the end of the log we will have 1846 + * to split it up into two bios, so that we can continue at the start. 1847 + */ 1848 + if (bno + BTOBB(count) > log->l_logBBsize) { 1849 + struct bio *split; 1850 + 1851 + split = bio_split(&iclog->ic_bio, log->l_logBBsize - bno, 1852 + GFP_NOIO, &fs_bio_set); 1853 + bio_chain(split, &iclog->ic_bio); 1854 + submit_bio(split); 1855 + 1856 + /* restart at logical offset zero for the remainder */ 1857 + iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart; 1858 + } 1859 + 1860 + submit_bio(&iclog->ic_bio); 1861 + } 1862 + 1863 + /* 1864 + * We need to bump cycle number for the part of the iclog that is 1865 + * written to the start of the log. Watch out for the header magic 1866 + * number case, though. 1867 + */ 1868 + static void 1869 + xlog_split_iclog( 1870 + struct xlog *log, 1871 + void *data, 1872 + uint64_t bno, 1873 + unsigned int count) 1874 + { 1875 + unsigned int split_offset = BBTOB(log->l_logBBsize - bno); 1876 + unsigned int i; 1877 + 1878 + for (i = split_offset; i < count; i += BBSIZE) { 1879 + uint32_t cycle = get_unaligned_be32(data + i); 1880 + 1881 + if (++cycle == XLOG_HEADER_MAGIC_NUM) 1882 + cycle++; 1883 + put_unaligned_be32(cycle, data + i); 1884 + } 1885 + } 1886 + 1887 + static int 1888 + xlog_calc_iclog_size( 1889 + struct xlog *log, 1890 + struct xlog_in_core *iclog, 1891 + uint32_t *roundoff) 1892 + { 1893 + uint32_t count_init, count; 1894 + bool use_lsunit; 1895 + 1896 + use_lsunit = xfs_sb_version_haslogv2(&log->l_mp->m_sb) && 1897 + log->l_mp->m_sb.sb_logsunit > 1; 1898 + 1899 + /* Add for LR header */ 1900 + count_init = log->l_iclog_hsize + iclog->ic_offset; 1901 + 1902 + /* Round out the log write size */ 1903 + if (use_lsunit) { 1904 + /* we have a v2 stripe unit to use */ 1905 + count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init)); 1906 + } else { 1907 + count = BBTOB(BTOBB(count_init)); 1908 + } 1909 + 1910 + ASSERT(count >= count_init); 1911 + *roundoff = count - count_init; 1912 + 1913 + if (use_lsunit) 1914 + ASSERT(*roundoff < log->l_mp->m_sb.sb_logsunit); 1915 + else 1916 + ASSERT(*roundoff < BBTOB(1)); 1917 + return count; 1695 1918 } 1696 1919 1697 1920 /* ··· 1827 1824 * log will require grabbing the lock though. 1828 1825 * 1829 1826 * The entire log manager uses a logical block numbering scheme. Only 1830 - * log_sync (and then only bwrite()) know about the fact that the log may 1831 - * not start with block zero on a given device. The log block start offset 1832 - * is added immediately before calling bwrite(). 1827 + * xlog_write_iclog knows about the fact that the log may not start with 1828 + * block zero on a given device. 1833 1829 */ 1834 - 1835 - STATIC int 1830 + STATIC void 1836 1831 xlog_sync( 1837 1832 struct xlog *log, 1838 1833 struct xlog_in_core *iclog) 1839 1834 { 1840 - xfs_buf_t *bp; 1841 - int i; 1842 - uint count; /* byte count of bwrite */ 1843 - uint count_init; /* initial count before roundup */ 1844 - int roundoff; /* roundoff to BB or stripe */ 1845 - int split = 0; /* split write into two regions */ 1846 - int error; 1847 - int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); 1848 - int size; 1835 + unsigned int count; /* byte count of bwrite */ 1836 + unsigned int roundoff; /* roundoff to BB or stripe */ 1837 + uint64_t bno; 1838 + unsigned int size; 1839 + bool need_flush = true, split = false; 1849 1840 1850 - XFS_STATS_INC(log->l_mp, xs_log_writes); 1851 1841 ASSERT(atomic_read(&iclog->ic_refcnt) == 0); 1852 1842 1853 - /* Add for LR header */ 1854 - count_init = log->l_iclog_hsize + iclog->ic_offset; 1855 - 1856 - /* Round out the log write size */ 1857 - if (v2 && log->l_mp->m_sb.sb_logsunit > 1) { 1858 - /* we have a v2 stripe unit to use */ 1859 - count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init)); 1860 - } else { 1861 - count = BBTOB(BTOBB(count_init)); 1862 - } 1863 - roundoff = count - count_init; 1864 - ASSERT(roundoff >= 0); 1865 - ASSERT((v2 && log->l_mp->m_sb.sb_logsunit > 1 && 1866 - roundoff < log->l_mp->m_sb.sb_logsunit) 1867 - || 1868 - (log->l_mp->m_sb.sb_logsunit <= 1 && 1869 - roundoff < BBTOB(1))); 1843 + count = xlog_calc_iclog_size(log, iclog, &roundoff); 1870 1844 1871 1845 /* move grant heads by roundoff in sync */ 1872 1846 xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff); ··· 1854 1874 1855 1875 /* real byte length */ 1856 1876 size = iclog->ic_offset; 1857 - if (v2) 1877 + if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) 1858 1878 size += roundoff; 1859 1879 iclog->ic_header.h_len = cpu_to_be32(size); 1860 1880 1861 - bp = iclog->ic_bp; 1862 - XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); 1863 - 1881 + XFS_STATS_INC(log->l_mp, xs_log_writes); 1864 1882 XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count)); 1865 1883 1884 + bno = BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)); 1885 + 1866 1886 /* Do we need to split this write into 2 parts? */ 1867 - if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) { 1868 - char *dptr; 1869 - 1870 - split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp))); 1871 - count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)); 1872 - iclog->ic_bwritecnt = 2; 1873 - 1874 - /* 1875 - * Bump the cycle numbers at the start of each block in the 1876 - * part of the iclog that ends up in the buffer that gets 1877 - * written to the start of the log. 1878 - * 1879 - * Watch out for the header magic number case, though. 1880 - */ 1881 - dptr = (char *)&iclog->ic_header + count; 1882 - for (i = 0; i < split; i += BBSIZE) { 1883 - uint32_t cycle = be32_to_cpu(*(__be32 *)dptr); 1884 - if (++cycle == XLOG_HEADER_MAGIC_NUM) 1885 - cycle++; 1886 - *(__be32 *)dptr = cpu_to_be32(cycle); 1887 - 1888 - dptr += BBSIZE; 1889 - } 1890 - } else { 1891 - iclog->ic_bwritecnt = 1; 1887 + if (bno + BTOBB(count) > log->l_logBBsize) { 1888 + xlog_split_iclog(log, &iclog->ic_header, bno, count); 1889 + split = true; 1892 1890 } 1893 1891 1894 1892 /* calculcate the checksum */ ··· 1879 1921 * write on I/O completion and shutdown the fs. The subsequent mount 1880 1922 * detects the bad CRC and attempts to recover. 1881 1923 */ 1924 + #ifdef DEBUG 1882 1925 if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) { 1883 1926 iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA); 1884 - iclog->ic_state |= XLOG_STATE_IOABORT; 1927 + iclog->ic_fail_crc = true; 1885 1928 xfs_warn(log->l_mp, 1886 1929 "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.", 1887 1930 be64_to_cpu(iclog->ic_header.h_lsn)); 1888 1931 } 1889 - 1890 - bp->b_io_length = BTOBB(count); 1891 - bp->b_log_item = iclog; 1892 - bp->b_flags &= ~XBF_FLUSH; 1893 - bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); 1932 + #endif 1894 1933 1895 1934 /* 1896 1935 * Flush the data device before flushing the log to make sure all meta ··· 1897 1942 * synchronously here; for an internal log we can simply use the block 1898 1943 * layer state machine for preflushes. 1899 1944 */ 1900 - if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp) 1945 + if (log->l_targ != log->l_mp->m_ddev_targp || split) { 1901 1946 xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp); 1902 - else 1903 - bp->b_flags |= XBF_FLUSH; 1904 - 1905 - ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1906 - ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); 1907 - 1908 - xlog_verify_iclog(log, iclog, count, true); 1909 - 1910 - /* account for log which doesn't start at block #0 */ 1911 - XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); 1912 - 1913 - /* 1914 - * Don't call xfs_bwrite here. We do log-syncs even when the filesystem 1915 - * is shutting down. 1916 - */ 1917 - error = xlog_bdstrat(bp); 1918 - if (error) { 1919 - xfs_buf_ioerror_alert(bp, "xlog_sync"); 1920 - return error; 1947 + need_flush = false; 1921 1948 } 1922 - if (split) { 1923 - bp = iclog->ic_log->l_xbuf; 1924 - XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ 1925 - xfs_buf_associate_memory(bp, 1926 - (char *)&iclog->ic_header + count, split); 1927 - bp->b_log_item = iclog; 1928 - bp->b_flags &= ~XBF_FLUSH; 1929 - bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); 1930 1949 1931 - ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1932 - ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); 1933 - 1934 - /* account for internal log which doesn't start at block #0 */ 1935 - XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); 1936 - error = xlog_bdstrat(bp); 1937 - if (error) { 1938 - xfs_buf_ioerror_alert(bp, "xlog_sync (split)"); 1939 - return error; 1940 - } 1941 - } 1942 - return 0; 1943 - } /* xlog_sync */ 1950 + xlog_verify_iclog(log, iclog, count); 1951 + xlog_write_iclog(log, iclog, bno, count, need_flush); 1952 + } 1944 1953 1945 1954 /* 1946 1955 * Deallocate a log structure ··· 1924 2005 */ 1925 2006 iclog = log->l_iclog; 1926 2007 for (i = 0; i < log->l_iclog_bufs; i++) { 1927 - xfs_buf_lock(iclog->ic_bp); 1928 - xfs_buf_unlock(iclog->ic_bp); 2008 + down(&iclog->ic_sema); 2009 + up(&iclog->ic_sema); 1929 2010 iclog = iclog->ic_next; 1930 2011 } 1931 2012 1932 - /* 1933 - * Always need to ensure that the extra buffer does not point to memory 1934 - * owned by another log buffer before we free it. Also, cycle the lock 1935 - * first to ensure we've completed IO on it. 1936 - */ 1937 - xfs_buf_lock(log->l_xbuf); 1938 - xfs_buf_unlock(log->l_xbuf); 1939 - xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size)); 1940 - xfs_buf_free(log->l_xbuf); 1941 - 1942 2013 iclog = log->l_iclog; 1943 2014 for (i = 0; i < log->l_iclog_bufs; i++) { 1944 - xfs_buf_free(iclog->ic_bp); 1945 2015 next_iclog = iclog->ic_next; 2016 + kmem_free(iclog->ic_data); 1946 2017 kmem_free(iclog); 1947 2018 iclog = next_iclog; 1948 2019 } 1949 - spinlock_destroy(&log->l_icloglock); 1950 2020 1951 2021 log->l_mp->m_log = NULL; 2022 + destroy_workqueue(log->l_ioend_workqueue); 1952 2023 kmem_free(log); 1953 2024 } /* xlog_dealloc_log */ 1954 2025 ··· 2519 2610 if (iclog->ic_state == XLOG_STATE_DIRTY) { 2520 2611 iclog->ic_state = XLOG_STATE_ACTIVE; 2521 2612 iclog->ic_offset = 0; 2522 - ASSERT(iclog->ic_callback == NULL); 2613 + ASSERT(list_empty_careful(&iclog->ic_callbacks)); 2523 2614 /* 2524 2615 * If the number of ops in this iclog indicate it just 2525 2616 * contains the dummy transaction, we can ··· 2589 2680 2590 2681 STATIC xfs_lsn_t 2591 2682 xlog_get_lowest_lsn( 2592 - struct xlog *log) 2683 + struct xlog *log) 2593 2684 { 2594 - xlog_in_core_t *lsn_log; 2595 - xfs_lsn_t lowest_lsn, lsn; 2685 + struct xlog_in_core *iclog = log->l_iclog; 2686 + xfs_lsn_t lowest_lsn = 0, lsn; 2596 2687 2597 - lsn_log = log->l_iclog; 2598 - lowest_lsn = 0; 2599 2688 do { 2600 - if (!(lsn_log->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY))) { 2601 - lsn = be64_to_cpu(lsn_log->ic_header.h_lsn); 2602 - if ((lsn && !lowest_lsn) || 2603 - (XFS_LSN_CMP(lsn, lowest_lsn) < 0)) { 2689 + if (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY)) 2690 + continue; 2691 + 2692 + lsn = be64_to_cpu(iclog->ic_header.h_lsn); 2693 + if ((lsn && !lowest_lsn) || XFS_LSN_CMP(lsn, lowest_lsn) < 0) 2604 2694 lowest_lsn = lsn; 2605 - } 2606 - } 2607 - lsn_log = lsn_log->ic_next; 2608 - } while (lsn_log != log->l_iclog); 2695 + } while ((iclog = iclog->ic_next) != log->l_iclog); 2696 + 2609 2697 return lowest_lsn; 2610 2698 } 2611 - 2612 2699 2613 2700 STATIC void 2614 2701 xlog_state_do_callback( 2615 2702 struct xlog *log, 2616 - int aborted, 2703 + bool aborted, 2617 2704 struct xlog_in_core *ciclog) 2618 2705 { 2619 2706 xlog_in_core_t *iclog; 2620 2707 xlog_in_core_t *first_iclog; /* used to know when we've 2621 2708 * processed all iclogs once */ 2622 - xfs_log_callback_t *cb, *cb_next; 2623 2709 int flushcnt = 0; 2624 2710 xfs_lsn_t lowest_lsn; 2625 2711 int ioerrors; /* counter: iclogs with errors */ ··· 2725 2821 */ 2726 2822 ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), 2727 2823 be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); 2728 - if (iclog->ic_callback) 2824 + if (!list_empty_careful(&iclog->ic_callbacks)) 2729 2825 atomic64_set(&log->l_last_sync_lsn, 2730 2826 be64_to_cpu(iclog->ic_header.h_lsn)); 2731 2827 ··· 2742 2838 * callbacks being added. 2743 2839 */ 2744 2840 spin_lock(&iclog->ic_callback_lock); 2745 - cb = iclog->ic_callback; 2746 - while (cb) { 2747 - iclog->ic_callback_tail = &(iclog->ic_callback); 2748 - iclog->ic_callback = NULL; 2749 - spin_unlock(&iclog->ic_callback_lock); 2841 + while (!list_empty(&iclog->ic_callbacks)) { 2842 + LIST_HEAD(tmp); 2750 2843 2751 - /* perform callbacks in the order given */ 2752 - for (; cb; cb = cb_next) { 2753 - cb_next = cb->cb_next; 2754 - cb->cb_func(cb->cb_arg, aborted); 2755 - } 2844 + list_splice_init(&iclog->ic_callbacks, &tmp); 2845 + 2846 + spin_unlock(&iclog->ic_callback_lock); 2847 + xlog_cil_process_committed(&tmp, aborted); 2756 2848 spin_lock(&iclog->ic_callback_lock); 2757 - cb = iclog->ic_callback; 2758 2849 } 2759 2850 2760 2851 loopdidcallbacks++; 2761 2852 funcdidcallbacks++; 2762 2853 2763 2854 spin_lock(&log->l_icloglock); 2764 - ASSERT(iclog->ic_callback == NULL); 2765 2855 spin_unlock(&iclog->ic_callback_lock); 2766 2856 if (!(iclog->ic_state & XLOG_STATE_IOERROR)) 2767 2857 iclog->ic_state = XLOG_STATE_DIRTY; ··· 2841 2943 */ 2842 2944 STATIC void 2843 2945 xlog_state_done_syncing( 2844 - xlog_in_core_t *iclog, 2845 - int aborted) 2946 + struct xlog_in_core *iclog, 2947 + bool aborted) 2846 2948 { 2847 - struct xlog *log = iclog->ic_log; 2949 + struct xlog *log = iclog->ic_log; 2848 2950 2849 2951 spin_lock(&log->l_icloglock); 2850 2952 2851 2953 ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || 2852 2954 iclog->ic_state == XLOG_STATE_IOERROR); 2853 2955 ASSERT(atomic_read(&iclog->ic_refcnt) == 0); 2854 - ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2); 2855 - 2856 2956 2857 2957 /* 2858 2958 * If we got an error, either on the first buffer, or in the case of ··· 2858 2962 * and none should ever be attempted to be written to disk 2859 2963 * again. 2860 2964 */ 2861 - if (iclog->ic_state != XLOG_STATE_IOERROR) { 2862 - if (--iclog->ic_bwritecnt == 1) { 2863 - spin_unlock(&log->l_icloglock); 2864 - return; 2865 - } 2965 + if (iclog->ic_state != XLOG_STATE_IOERROR) 2866 2966 iclog->ic_state = XLOG_STATE_DONE_SYNC; 2867 - } 2868 2967 2869 2968 /* 2870 2969 * Someone could be sleeping prior to writing out the next ··· 3128 3237 * flags after this point. 3129 3238 */ 3130 3239 if (sync) 3131 - return xlog_sync(log, iclog); 3240 + xlog_sync(log, iclog); 3132 3241 return 0; 3133 3242 } /* xlog_state_release_iclog */ 3134 3243 ··· 3719 3828 xlog_verify_iclog( 3720 3829 struct xlog *log, 3721 3830 struct xlog_in_core *iclog, 3722 - int count, 3723 - bool syncing) 3831 + int count) 3724 3832 { 3725 3833 xlog_op_header_t *ophead; 3726 3834 xlog_in_core_t *icptr; ··· 3763 3873 /* clientid is only 1 byte */ 3764 3874 p = &ophead->oh_clientid; 3765 3875 field_offset = p - base_ptr; 3766 - if (!syncing || (field_offset & 0x1ff)) { 3876 + if (field_offset & 0x1ff) { 3767 3877 clientid = ophead->oh_clientid; 3768 3878 } else { 3769 3879 idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap); ··· 3786 3896 /* check length */ 3787 3897 p = &ophead->oh_len; 3788 3898 field_offset = p - base_ptr; 3789 - if (!syncing || (field_offset & 0x1ff)) { 3899 + if (field_offset & 0x1ff) { 3790 3900 op_len = be32_to_cpu(ophead->oh_len); 3791 3901 } else { 3792 3902 idx = BTOBBT((uintptr_t)&ophead->oh_len - ··· 3923 4033 * avoid races. 3924 4034 */ 3925 4035 wake_up_all(&log->l_cilp->xc_commit_wait); 3926 - xlog_state_do_callback(log, XFS_LI_ABORTED, NULL); 4036 + xlog_state_do_callback(log, true, NULL); 3927 4037 3928 4038 #ifdef XFSERRORDEBUG 3929 4039 {

+4 -13

fs/xfs/xfs_log.h

··· 6 6 #ifndef __XFS_LOG_H__ 7 7 #define __XFS_LOG_H__ 8 8 9 + struct xfs_cil_ctx; 10 + 9 11 struct xfs_log_vec { 10 12 struct xfs_log_vec *lv_next; /* next lv in build list */ 11 13 int lv_niovecs; /* number of iovecs in lv */ ··· 74 72 } 75 73 76 74 /* 77 - * Structure used to pass callback function and the function's argument 78 - * to the log manager. 79 - */ 80 - typedef struct xfs_log_callback { 81 - struct xfs_log_callback *cb_next; 82 - void (*cb_func)(void *, int); 83 - void *cb_arg; 84 - } xfs_log_callback_t; 85 - 86 - /* 87 75 * By comparing each component, we don't have to worry about extra 88 76 * endian issues in treating two 32 bit numbers as one 64 bit number 89 77 */ ··· 117 125 xfs_daddr_t start_block, 118 126 int num_bblocks); 119 127 int xfs_log_mount_finish(struct xfs_mount *mp); 120 - int xfs_log_mount_cancel(struct xfs_mount *); 128 + void xfs_log_mount_cancel(struct xfs_mount *); 121 129 xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); 122 130 xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp); 123 131 void xfs_log_space_wake(struct xfs_mount *mp); 124 - int xfs_log_notify(struct xlog_in_core *iclog, 125 - struct xfs_log_callback *callback_entry); 126 132 int xfs_log_release_iclog(struct xfs_mount *mp, 127 133 struct xlog_in_core *iclog); 128 134 int xfs_log_reserve(struct xfs_mount *mp, ··· 138 148 139 149 void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 140 150 xfs_lsn_t *commit_lsn, bool regrant); 151 + void xlog_cil_process_committed(struct list_head *list, bool aborted); 141 152 bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); 142 153 143 154 void xfs_log_work_queue(struct xfs_mount *mp);

+35 -16

fs/xfs/xfs_log_cil.c

··· 10 10 #include "xfs_shared.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 - #include "xfs_error.h" 14 - #include "xfs_alloc.h" 15 13 #include "xfs_extent_busy.h" 16 - #include "xfs_discard.h" 17 14 #include "xfs_trans.h" 18 15 #include "xfs_trans_priv.h" 19 16 #include "xfs_log.h" ··· 243 246 * shadow buffer, so update the the pointer to it appropriately. 244 247 */ 245 248 if (!old_lv) { 246 - lv->lv_item->li_ops->iop_pin(lv->lv_item); 249 + if (lv->lv_item->li_ops->iop_pin) 250 + lv->lv_item->li_ops->iop_pin(lv->lv_item); 247 251 lv->lv_item->li_lv_shadow = NULL; 248 252 } else if (old_lv != lv) { 249 253 ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED); ··· 574 576 */ 575 577 static void 576 578 xlog_cil_committed( 577 - void *args, 578 - int abort) 579 + struct xfs_cil_ctx *ctx, 580 + bool abort) 579 581 { 580 - struct xfs_cil_ctx *ctx = args; 581 582 struct xfs_mount *mp = ctx->cil->xc_log->l_mp; 582 583 583 584 /* ··· 609 612 xlog_discard_busy_extents(mp, ctx); 610 613 else 611 614 kmem_free(ctx); 615 + } 616 + 617 + void 618 + xlog_cil_process_committed( 619 + struct list_head *list, 620 + bool aborted) 621 + { 622 + struct xfs_cil_ctx *ctx; 623 + 624 + while ((ctx = list_first_entry_or_null(list, 625 + struct xfs_cil_ctx, iclog_entry))) { 626 + list_del(&ctx->iclog_entry); 627 + xlog_cil_committed(ctx, aborted); 628 + } 612 629 } 613 630 614 631 /* ··· 846 835 if (commit_lsn == -1) 847 836 goto out_abort; 848 837 849 - /* attach all the transactions w/ busy extents to iclog */ 850 - ctx->log_cb.cb_func = xlog_cil_committed; 851 - ctx->log_cb.cb_arg = ctx; 852 - error = xfs_log_notify(commit_iclog, &ctx->log_cb); 853 - if (error) 838 + spin_lock(&commit_iclog->ic_callback_lock); 839 + if (commit_iclog->ic_state & XLOG_STATE_IOERROR) { 840 + spin_unlock(&commit_iclog->ic_callback_lock); 854 841 goto out_abort; 842 + } 843 + ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE || 844 + commit_iclog->ic_state == XLOG_STATE_WANT_SYNC); 845 + list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks); 846 + spin_unlock(&commit_iclog->ic_callback_lock); 855 847 856 848 /* 857 849 * now the checkpoint commit is complete and we've attached the ··· 878 864 out_abort_free_ticket: 879 865 xfs_log_ticket_put(tic); 880 866 out_abort: 881 - xlog_cil_committed(ctx, XFS_LI_ABORTED); 867 + xlog_cil_committed(ctx, true); 882 868 return -EIO; 883 869 } 884 870 ··· 998 984 { 999 985 struct xlog *log = mp->m_log; 1000 986 struct xfs_cil *cil = log->l_cilp; 987 + struct xfs_log_item *lip, *next; 1001 988 xfs_lsn_t xc_commit_lsn; 1002 989 1003 990 /* ··· 1023 1008 1024 1009 /* 1025 1010 * Once all the items of the transaction have been copied to the CIL, 1026 - * the items can be unlocked and freed. 1011 + * the items can be unlocked and possibly freed. 1027 1012 * 1028 1013 * This needs to be done before we drop the CIL context lock because we 1029 1014 * have to update state in the log items and unlock them before they go ··· 1032 1017 * the log items. This affects (at least) processing of stale buffers, 1033 1018 * inodes and EFIs. 1034 1019 */ 1035 - xfs_trans_free_items(tp, xc_commit_lsn, false); 1036 - 1020 + trace_xfs_trans_commit_items(tp, _RET_IP_); 1021 + list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { 1022 + xfs_trans_del_item(lip); 1023 + if (lip->li_ops->iop_committing) 1024 + lip->li_ops->iop_committing(lip, xc_commit_lsn); 1025 + } 1037 1026 xlog_cil_push_background(log); 1038 1027 1039 1028 up_read(&cil->xc_ctx_lock);

+18 -18

fs/xfs/xfs_log_priv.h

··· 10 10 struct xlog; 11 11 struct xlog_ticket; 12 12 struct xfs_mount; 13 - struct xfs_log_callback; 14 13 15 14 /* 16 15 * Flags for log structure ··· 49 50 #define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */ 50 51 #define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/ 51 52 #define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */ 52 - #define XLOG_STATE_IOABORT 0x0100 /* force abort on I/O completion (debug) */ 53 53 #define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */ 54 54 #define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */ 55 55 ··· 177 179 * the iclog. 178 180 * - ic_forcewait is used to implement synchronous forcing of the iclog to disk. 179 181 * - ic_next is the pointer to the next iclog in the ring. 180 - * - ic_bp is a pointer to the buffer used to write this incore log to disk. 181 182 * - ic_log is a pointer back to the global log structure. 182 - * - ic_callback is a linked list of callback function/argument pairs to be 183 - * called after an iclog finishes writing. 184 - * - ic_size is the full size of the header plus data. 183 + * - ic_size is the full size of the log buffer, minus the cycle headers. 184 + * - ic_io_size is the size of the currently pending log buffer write, which 185 + * might be smaller than ic_size 185 186 * - ic_offset is the current number of bytes written to in this iclog. 186 187 * - ic_refcnt is bumped when someone is writing to the log. 187 188 * - ic_state is the state of the iclog. ··· 190 193 * structure cacheline aligned. The following fields can be contended on 191 194 * by independent processes: 192 195 * 193 - * - ic_callback_* 196 + * - ic_callbacks 194 197 * - ic_refcnt 195 198 * - fields protected by the global l_icloglock 196 199 * ··· 203 206 wait_queue_head_t ic_write_wait; 204 207 struct xlog_in_core *ic_next; 205 208 struct xlog_in_core *ic_prev; 206 - struct xfs_buf *ic_bp; 207 209 struct xlog *ic_log; 208 - int ic_size; 209 - int ic_offset; 210 - int ic_bwritecnt; 210 + u32 ic_size; 211 + u32 ic_io_size; 212 + u32 ic_offset; 211 213 unsigned short ic_state; 212 214 char *ic_datap; /* pointer to iclog data */ 213 215 214 216 /* Callback structures need their own cacheline */ 215 217 spinlock_t ic_callback_lock ____cacheline_aligned_in_smp; 216 - struct xfs_log_callback *ic_callback; 217 - struct xfs_log_callback **ic_callback_tail; 218 + struct list_head ic_callbacks; 218 219 219 220 /* reference counts need their own cacheline */ 220 221 atomic_t ic_refcnt ____cacheline_aligned_in_smp; 221 222 xlog_in_core_2_t *ic_data; 222 223 #define ic_header ic_data->hic_header 224 + #ifdef DEBUG 225 + bool ic_fail_crc : 1; 226 + #endif 227 + struct semaphore ic_sema; 228 + struct work_struct ic_end_io_work; 229 + struct bio ic_bio; 230 + struct bio_vec ic_bvec[]; 223 231 } xlog_in_core_t; 224 232 225 233 /* ··· 245 243 int space_used; /* aggregate size of regions */ 246 244 struct list_head busy_extents; /* busy extents in chkpt */ 247 245 struct xfs_log_vec *lv_chain; /* logvecs being pushed */ 248 - struct xfs_log_callback log_cb; /* completion callback hook. */ 246 + struct list_head iclog_entry; 249 247 struct list_head committing; /* ctx committing list */ 250 248 struct work_struct discard_endio_work; 251 249 }; ··· 352 350 struct xfs_mount *l_mp; /* mount point */ 353 351 struct xfs_ail *l_ailp; /* AIL log is working with */ 354 352 struct xfs_cil *l_cilp; /* CIL log is working with */ 355 - struct xfs_buf *l_xbuf; /* extra buffer for log 356 - * wrapping */ 357 353 struct xfs_buftarg *l_targ; /* buftarg of log */ 354 + struct workqueue_struct *l_ioend_workqueue; /* for I/O completions */ 358 355 struct delayed_work l_work; /* background flush work */ 359 356 uint l_flags; 360 357 uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ ··· 362 361 int l_iclog_heads; /* # of iclog header sectors */ 363 362 uint l_sectBBsize; /* sector size in BBs (2^n) */ 364 363 int l_iclog_size; /* size of log in bytes */ 365 - int l_iclog_size_log; /* log power size of log */ 366 364 int l_iclog_bufs; /* number of iclog buffers */ 367 365 xfs_daddr_t l_logBBstart; /* start block of log */ 368 366 int l_logsize; /* size of log in bytes */ ··· 418 418 extern int 419 419 xlog_recover_finish( 420 420 struct xlog *log); 421 - extern int 421 + extern void 422 422 xlog_recover_cancel(struct xlog *); 423 423 424 424 extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,

+190 -277

fs/xfs/xfs_log_recover.c

··· 13 13 #include "xfs_sb.h" 14 14 #include "xfs_mount.h" 15 15 #include "xfs_defer.h" 16 - #include "xfs_da_format.h" 17 - #include "xfs_da_btree.h" 18 16 #include "xfs_inode.h" 19 17 #include "xfs_trans.h" 20 18 #include "xfs_log.h" ··· 24 26 #include "xfs_alloc.h" 25 27 #include "xfs_ialloc.h" 26 28 #include "xfs_quota.h" 27 - #include "xfs_cksum.h" 28 29 #include "xfs_trace.h" 29 30 #include "xfs_icache.h" 30 31 #include "xfs_bmap_btree.h" ··· 76 79 * are valid, false otherwise. 77 80 */ 78 81 static inline bool 79 - xlog_verify_bp( 82 + xlog_verify_bno( 80 83 struct xlog *log, 81 84 xfs_daddr_t blk_no, 82 85 int bbcount) ··· 89 92 } 90 93 91 94 /* 92 - * Allocate a buffer to hold log data. The buffer needs to be able 93 - * to map to a range of nbblks basic blocks at any valid (basic 94 - * block) offset within the log. 95 + * Allocate a buffer to hold log data. The buffer needs to be able to map to 96 + * a range of nbblks basic blocks at any valid offset within the log. 95 97 */ 96 - STATIC xfs_buf_t * 97 - xlog_get_bp( 98 + static char * 99 + xlog_alloc_buffer( 98 100 struct xlog *log, 99 101 int nbblks) 100 102 { 101 - struct xfs_buf *bp; 102 - 103 103 /* 104 104 * Pass log block 0 since we don't have an addr yet, buffer will be 105 105 * verified on read. 106 106 */ 107 - if (!xlog_verify_bp(log, 0, nbblks)) { 107 + if (!xlog_verify_bno(log, 0, nbblks)) { 108 108 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 109 109 nbblks); 110 110 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); ··· 109 115 } 110 116 111 117 /* 112 - * We do log I/O in units of log sectors (a power-of-2 113 - * multiple of the basic block size), so we round up the 114 - * requested size to accommodate the basic blocks required 115 - * for complete log sectors. 118 + * We do log I/O in units of log sectors (a power-of-2 multiple of the 119 + * basic block size), so we round up the requested size to accommodate 120 + * the basic blocks required for complete log sectors. 116 121 * 117 - * In addition, the buffer may be used for a non-sector- 118 - * aligned block offset, in which case an I/O of the 119 - * requested size could extend beyond the end of the 120 - * buffer. If the requested size is only 1 basic block it 121 - * will never straddle a sector boundary, so this won't be 122 - * an issue. Nor will this be a problem if the log I/O is 123 - * done in basic blocks (sector size 1). But otherwise we 124 - * extend the buffer by one extra log sector to ensure 125 - * there's space to accommodate this possibility. 122 + * In addition, the buffer may be used for a non-sector-aligned block 123 + * offset, in which case an I/O of the requested size could extend 124 + * beyond the end of the buffer. If the requested size is only 1 basic 125 + * block it will never straddle a sector boundary, so this won't be an 126 + * issue. Nor will this be a problem if the log I/O is done in basic 127 + * blocks (sector size 1). But otherwise we extend the buffer by one 128 + * extra log sector to ensure there's space to accommodate this 129 + * possibility. 126 130 */ 127 131 if (nbblks > 1 && log->l_sectBBsize > 1) 128 132 nbblks += log->l_sectBBsize; 129 133 nbblks = round_up(nbblks, log->l_sectBBsize); 130 - 131 - bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0); 132 - if (bp) 133 - xfs_buf_unlock(bp); 134 - return bp; 135 - } 136 - 137 - STATIC void 138 - xlog_put_bp( 139 - xfs_buf_t *bp) 140 - { 141 - xfs_buf_free(bp); 134 + return kmem_alloc_large(BBTOB(nbblks), KM_MAYFAIL); 142 135 } 143 136 144 137 /* 145 138 * Return the address of the start of the given block number's data 146 139 * in a log buffer. The buffer covers a log sector-aligned region. 147 140 */ 148 - STATIC char * 141 + static inline unsigned int 149 142 xlog_align( 150 143 struct xlog *log, 151 - xfs_daddr_t blk_no, 152 - int nbblks, 153 - struct xfs_buf *bp) 144 + xfs_daddr_t blk_no) 154 145 { 155 - xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); 156 - 157 - ASSERT(offset + nbblks <= bp->b_length); 158 - return bp->b_addr + BBTOB(offset); 146 + return BBTOB(blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1)); 159 147 } 160 148 161 - 162 - /* 163 - * nbblks should be uint, but oh well. Just want to catch that 32-bit length. 164 - */ 165 - STATIC int 166 - xlog_bread_noalign( 167 - struct xlog *log, 168 - xfs_daddr_t blk_no, 169 - int nbblks, 170 - struct xfs_buf *bp) 149 + static int 150 + xlog_do_io( 151 + struct xlog *log, 152 + xfs_daddr_t blk_no, 153 + unsigned int nbblks, 154 + char *data, 155 + unsigned int op) 171 156 { 172 - int error; 157 + int error; 173 158 174 - if (!xlog_verify_bp(log, blk_no, nbblks)) { 159 + if (!xlog_verify_bno(log, blk_no, nbblks)) { 175 160 xfs_warn(log->l_mp, 176 161 "Invalid log block/length (0x%llx, 0x%x) for buffer", 177 162 blk_no, nbblks); ··· 160 187 161 188 blk_no = round_down(blk_no, log->l_sectBBsize); 162 189 nbblks = round_up(nbblks, log->l_sectBBsize); 163 - 164 190 ASSERT(nbblks > 0); 165 - ASSERT(nbblks <= bp->b_length); 166 191 167 - XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); 168 - bp->b_flags |= XBF_READ; 169 - bp->b_io_length = nbblks; 170 - bp->b_error = 0; 171 - 172 - error = xfs_buf_submit(bp); 173 - if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) 174 - xfs_buf_ioerror_alert(bp, __func__); 192 + error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no, 193 + BBTOB(nbblks), data, op); 194 + if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) { 195 + xfs_alert(log->l_mp, 196 + "log recovery %s I/O error at daddr 0x%llx len %d error %d", 197 + op == REQ_OP_WRITE ? "write" : "read", 198 + blk_no, nbblks, error); 199 + } 175 200 return error; 201 + } 202 + 203 + STATIC int 204 + xlog_bread_noalign( 205 + struct xlog *log, 206 + xfs_daddr_t blk_no, 207 + int nbblks, 208 + char *data) 209 + { 210 + return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ); 176 211 } 177 212 178 213 STATIC int ··· 188 207 struct xlog *log, 189 208 xfs_daddr_t blk_no, 190 209 int nbblks, 191 - struct xfs_buf *bp, 210 + char *data, 192 211 char **offset) 193 212 { 194 213 int error; 195 214 196 - error = xlog_bread_noalign(log, blk_no, nbblks, bp); 197 - if (error) 198 - return error; 199 - 200 - *offset = xlog_align(log, blk_no, nbblks, bp); 201 - return 0; 215 + error = xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ); 216 + if (!error) 217 + *offset = data + xlog_align(log, blk_no); 218 + return error; 202 219 } 203 220 204 - /* 205 - * Read at an offset into the buffer. Returns with the buffer in it's original 206 - * state regardless of the result of the read. 207 - */ 208 - STATIC int 209 - xlog_bread_offset( 210 - struct xlog *log, 211 - xfs_daddr_t blk_no, /* block to read from */ 212 - int nbblks, /* blocks to read */ 213 - struct xfs_buf *bp, 214 - char *offset) 215 - { 216 - char *orig_offset = bp->b_addr; 217 - int orig_len = BBTOB(bp->b_length); 218 - int error, error2; 219 - 220 - error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); 221 - if (error) 222 - return error; 223 - 224 - error = xlog_bread_noalign(log, blk_no, nbblks, bp); 225 - 226 - /* must reset buffer pointer even on error */ 227 - error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len); 228 - if (error) 229 - return error; 230 - return error2; 231 - } 232 - 233 - /* 234 - * Write out the buffer at the given block for the given number of blocks. 235 - * The buffer is kept locked across the write and is returned locked. 236 - * This can only be used for synchronous log writes. 237 - */ 238 221 STATIC int 239 222 xlog_bwrite( 240 223 struct xlog *log, 241 224 xfs_daddr_t blk_no, 242 225 int nbblks, 243 - struct xfs_buf *bp) 226 + char *data) 244 227 { 245 - int error; 246 - 247 - if (!xlog_verify_bp(log, blk_no, nbblks)) { 248 - xfs_warn(log->l_mp, 249 - "Invalid log block/length (0x%llx, 0x%x) for buffer", 250 - blk_no, nbblks); 251 - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 252 - return -EFSCORRUPTED; 253 - } 254 - 255 - blk_no = round_down(blk_no, log->l_sectBBsize); 256 - nbblks = round_up(nbblks, log->l_sectBBsize); 257 - 258 - ASSERT(nbblks > 0); 259 - ASSERT(nbblks <= bp->b_length); 260 - 261 - XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); 262 - xfs_buf_hold(bp); 263 - xfs_buf_lock(bp); 264 - bp->b_io_length = nbblks; 265 - bp->b_error = 0; 266 - 267 - error = xfs_bwrite(bp); 268 - if (error) 269 - xfs_buf_ioerror_alert(bp, __func__); 270 - xfs_buf_relse(bp); 271 - return error; 228 + return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_WRITE); 272 229 } 273 230 274 231 #ifdef DEBUG ··· 296 377 * We're not going to bother about retrying 297 378 * this during recovery. One strike! 298 379 */ 299 - if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { 380 + if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) { 300 381 xfs_buf_ioerror_alert(bp, __func__); 301 - xfs_force_shutdown(bp->b_target->bt_mount, 302 - SHUTDOWN_META_IO_ERROR); 382 + xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); 303 383 } 304 384 } 305 385 ··· 323 405 STATIC int 324 406 xlog_find_cycle_start( 325 407 struct xlog *log, 326 - struct xfs_buf *bp, 408 + char *buffer, 327 409 xfs_daddr_t first_blk, 328 410 xfs_daddr_t *last_blk, 329 411 uint cycle) ··· 337 419 end_blk = *last_blk; 338 420 mid_blk = BLK_AVG(first_blk, end_blk); 339 421 while (mid_blk != first_blk && mid_blk != end_blk) { 340 - error = xlog_bread(log, mid_blk, 1, bp, &offset); 422 + error = xlog_bread(log, mid_blk, 1, buffer, &offset); 341 423 if (error) 342 424 return error; 343 425 mid_cycle = xlog_get_cycle(offset); ··· 373 455 { 374 456 xfs_daddr_t i, j; 375 457 uint cycle; 376 - xfs_buf_t *bp; 458 + char *buffer; 377 459 xfs_daddr_t bufblks; 378 460 char *buf = NULL; 379 461 int error = 0; ··· 387 469 bufblks = 1 << ffs(nbblks); 388 470 while (bufblks > log->l_logBBsize) 389 471 bufblks >>= 1; 390 - while (!(bp = xlog_get_bp(log, bufblks))) { 472 + while (!(buffer = xlog_alloc_buffer(log, bufblks))) { 391 473 bufblks >>= 1; 392 474 if (bufblks < log->l_sectBBsize) 393 475 return -ENOMEM; ··· 398 480 399 481 bcount = min(bufblks, (start_blk + nbblks - i)); 400 482 401 - error = xlog_bread(log, i, bcount, bp, &buf); 483 + error = xlog_bread(log, i, bcount, buffer, &buf); 402 484 if (error) 403 485 goto out; 404 486 ··· 416 498 *new_blk = -1; 417 499 418 500 out: 419 - xlog_put_bp(bp); 501 + kmem_free(buffer); 420 502 return error; 421 503 } 422 504 ··· 440 522 int extra_bblks) 441 523 { 442 524 xfs_daddr_t i; 443 - xfs_buf_t *bp; 525 + char *buffer; 444 526 char *offset = NULL; 445 527 xlog_rec_header_t *head = NULL; 446 528 int error = 0; ··· 450 532 451 533 ASSERT(start_blk != 0 || *last_blk != start_blk); 452 534 453 - if (!(bp = xlog_get_bp(log, num_blks))) { 454 - if (!(bp = xlog_get_bp(log, 1))) 535 + buffer = xlog_alloc_buffer(log, num_blks); 536 + if (!buffer) { 537 + buffer = xlog_alloc_buffer(log, 1); 538 + if (!buffer) 455 539 return -ENOMEM; 456 540 smallmem = 1; 457 541 } else { 458 - error = xlog_bread(log, start_blk, num_blks, bp, &offset); 542 + error = xlog_bread(log, start_blk, num_blks, buffer, &offset); 459 543 if (error) 460 544 goto out; 461 545 offset += ((num_blks - 1) << BBSHIFT); ··· 474 554 } 475 555 476 556 if (smallmem) { 477 - error = xlog_bread(log, i, 1, bp, &offset); 557 + error = xlog_bread(log, i, 1, buffer, &offset); 478 558 if (error) 479 559 goto out; 480 560 } ··· 527 607 *last_blk = i; 528 608 529 609 out: 530 - xlog_put_bp(bp); 610 + kmem_free(buffer); 531 611 return error; 532 612 } 533 613 ··· 549 629 struct xlog *log, 550 630 xfs_daddr_t *return_head_blk) 551 631 { 552 - xfs_buf_t *bp; 632 + char *buffer; 553 633 char *offset; 554 634 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; 555 635 int num_scan_bblks; ··· 579 659 } 580 660 581 661 first_blk = 0; /* get cycle # of 1st block */ 582 - bp = xlog_get_bp(log, 1); 583 - if (!bp) 662 + buffer = xlog_alloc_buffer(log, 1); 663 + if (!buffer) 584 664 return -ENOMEM; 585 665 586 - error = xlog_bread(log, 0, 1, bp, &offset); 666 + error = xlog_bread(log, 0, 1, buffer, &offset); 587 667 if (error) 588 - goto bp_err; 668 + goto out_free_buffer; 589 669 590 670 first_half_cycle = xlog_get_cycle(offset); 591 671 592 672 last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ 593 - error = xlog_bread(log, last_blk, 1, bp, &offset); 673 + error = xlog_bread(log, last_blk, 1, buffer, &offset); 594 674 if (error) 595 - goto bp_err; 675 + goto out_free_buffer; 596 676 597 677 last_half_cycle = xlog_get_cycle(offset); 598 678 ASSERT(last_half_cycle != 0); ··· 660 740 * ^ we want to locate this spot 661 741 */ 662 742 stop_on_cycle = last_half_cycle; 663 - if ((error = xlog_find_cycle_start(log, bp, first_blk, 664 - &head_blk, last_half_cycle))) 665 - goto bp_err; 743 + error = xlog_find_cycle_start(log, buffer, first_blk, &head_blk, 744 + last_half_cycle); 745 + if (error) 746 + goto out_free_buffer; 666 747 } 667 748 668 749 /* ··· 683 762 if ((error = xlog_find_verify_cycle(log, 684 763 start_blk, num_scan_bblks, 685 764 stop_on_cycle, &new_blk))) 686 - goto bp_err; 765 + goto out_free_buffer; 687 766 if (new_blk != -1) 688 767 head_blk = new_blk; 689 768 } else { /* need to read 2 parts of log */ ··· 720 799 if ((error = xlog_find_verify_cycle(log, start_blk, 721 800 num_scan_bblks - (int)head_blk, 722 801 (stop_on_cycle - 1), &new_blk))) 723 - goto bp_err; 802 + goto out_free_buffer; 724 803 if (new_blk != -1) { 725 804 head_blk = new_blk; 726 805 goto validate_head; ··· 736 815 if ((error = xlog_find_verify_cycle(log, 737 816 start_blk, (int)head_blk, 738 817 stop_on_cycle, &new_blk))) 739 - goto bp_err; 818 + goto out_free_buffer; 740 819 if (new_blk != -1) 741 820 head_blk = new_blk; 742 821 } ··· 755 834 if (error == 1) 756 835 error = -EIO; 757 836 if (error) 758 - goto bp_err; 837 + goto out_free_buffer; 759 838 } else { 760 839 start_blk = 0; 761 840 ASSERT(head_blk <= INT_MAX); 762 841 error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0); 763 842 if (error < 0) 764 - goto bp_err; 843 + goto out_free_buffer; 765 844 if (error == 1) { 766 845 /* We hit the beginning of the log during our search */ 767 846 start_blk = log_bbnum - (num_scan_bblks - head_blk); ··· 774 853 if (error == 1) 775 854 error = -EIO; 776 855 if (error) 777 - goto bp_err; 856 + goto out_free_buffer; 778 857 if (new_blk != log_bbnum) 779 858 head_blk = new_blk; 780 859 } else if (error) 781 - goto bp_err; 860 + goto out_free_buffer; 782 861 } 783 862 784 - xlog_put_bp(bp); 863 + kmem_free(buffer); 785 864 if (head_blk == log_bbnum) 786 865 *return_head_blk = 0; 787 866 else ··· 794 873 */ 795 874 return 0; 796 875 797 - bp_err: 798 - xlog_put_bp(bp); 799 - 876 + out_free_buffer: 877 + kmem_free(buffer); 800 878 if (error) 801 879 xfs_warn(log->l_mp, "failed to find log head"); 802 880 return error; ··· 815 895 xfs_daddr_t head_blk, 816 896 xfs_daddr_t tail_blk, 817 897 int count, 818 - struct xfs_buf *bp, 898 + char *buffer, 819 899 xfs_daddr_t *rblk, 820 900 struct xlog_rec_header **rhead, 821 901 bool *wrapped) ··· 834 914 */ 835 915 end_blk = head_blk > tail_blk ? tail_blk : 0; 836 916 for (i = (int) head_blk - 1; i >= end_blk; i--) { 837 - error = xlog_bread(log, i, 1, bp, &offset); 917 + error = xlog_bread(log, i, 1, buffer, &offset); 838 918 if (error) 839 919 goto out_error; 840 920 ··· 853 933 */ 854 934 if (tail_blk >= head_blk && found != count) { 855 935 for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) { 856 - error = xlog_bread(log, i, 1, bp, &offset); 936 + error = xlog_bread(log, i, 1, buffer, &offset); 857 937 if (error) 858 938 goto out_error; 859 939 ··· 889 969 xfs_daddr_t head_blk, 890 970 xfs_daddr_t tail_blk, 891 971 int count, 892 - struct xfs_buf *bp, 972 + char *buffer, 893 973 xfs_daddr_t *rblk, 894 974 struct xlog_rec_header **rhead, 895 975 bool *wrapped) ··· 908 988 */ 909 989 end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1; 910 990 for (i = (int) tail_blk; i <= end_blk; i++) { 911 - error = xlog_bread(log, i, 1, bp, &offset); 991 + error = xlog_bread(log, i, 1, buffer, &offset); 912 992 if (error) 913 993 goto out_error; 914 994 ··· 926 1006 */ 927 1007 if (tail_blk > head_blk && found != count) { 928 1008 for (i = 0; i < (int) head_blk; i++) { 929 - error = xlog_bread(log, i, 1, bp, &offset); 1009 + error = xlog_bread(log, i, 1, buffer, &offset); 930 1010 if (error) 931 1011 goto out_error; 932 1012 ··· 989 1069 int hsize) 990 1070 { 991 1071 struct xlog_rec_header *thead; 992 - struct xfs_buf *bp; 1072 + char *buffer; 993 1073 xfs_daddr_t first_bad; 994 1074 int error = 0; 995 1075 bool wrapped; 996 1076 xfs_daddr_t tmp_tail; 997 1077 xfs_daddr_t orig_tail = *tail_blk; 998 1078 999 - bp = xlog_get_bp(log, 1); 1000 - if (!bp) 1079 + buffer = xlog_alloc_buffer(log, 1); 1080 + if (!buffer) 1001 1081 return -ENOMEM; 1002 1082 1003 1083 /* 1004 1084 * Make sure the tail points to a record (returns positive count on 1005 1085 * success). 1006 1086 */ 1007 - error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, bp, 1087 + error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, buffer, 1008 1088 &tmp_tail, &thead, &wrapped); 1009 1089 if (error < 0) 1010 1090 goto out; ··· 1033 1113 break; 1034 1114 1035 1115 /* skip to the next record; returns positive count on success */ 1036 - error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, bp, 1037 - &tmp_tail, &thead, &wrapped); 1116 + error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, 1117 + buffer, &tmp_tail, &thead, &wrapped); 1038 1118 if (error < 0) 1039 1119 goto out; 1040 1120 ··· 1049 1129 "Tail block (0x%llx) overwrite detected. Updated to 0x%llx", 1050 1130 orig_tail, *tail_blk); 1051 1131 out: 1052 - xlog_put_bp(bp); 1132 + kmem_free(buffer); 1053 1133 return error; 1054 1134 } 1055 1135 ··· 1071 1151 struct xlog *log, 1072 1152 xfs_daddr_t *head_blk, /* in/out: unverified head */ 1073 1153 xfs_daddr_t *tail_blk, /* out: tail block */ 1074 - struct xfs_buf *bp, 1154 + char *buffer, 1075 1155 xfs_daddr_t *rhead_blk, /* start blk of last record */ 1076 1156 struct xlog_rec_header **rhead, /* ptr to last record */ 1077 1157 bool *wrapped) /* last rec. wraps phys. log */ 1078 1158 { 1079 1159 struct xlog_rec_header *tmp_rhead; 1080 - struct xfs_buf *tmp_bp; 1160 + char *tmp_buffer; 1081 1161 xfs_daddr_t first_bad; 1082 1162 xfs_daddr_t tmp_rhead_blk; 1083 1163 int found; ··· 1088 1168 * Check the head of the log for torn writes. Search backwards from the 1089 1169 * head until we hit the tail or the maximum number of log record I/Os 1090 1170 * that could have been in flight at one time. Use a temporary buffer so 1091 - * we don't trash the rhead/bp pointers from the caller. 1171 + * we don't trash the rhead/buffer pointers from the caller. 1092 1172 */ 1093 - tmp_bp = xlog_get_bp(log, 1); 1094 - if (!tmp_bp) 1173 + tmp_buffer = xlog_alloc_buffer(log, 1); 1174 + if (!tmp_buffer) 1095 1175 return -ENOMEM; 1096 1176 error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk, 1097 - XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk, 1098 - &tmp_rhead, &tmp_wrapped); 1099 - xlog_put_bp(tmp_bp); 1177 + XLOG_MAX_ICLOGS, tmp_buffer, 1178 + &tmp_rhead_blk, &tmp_rhead, &tmp_wrapped); 1179 + kmem_free(tmp_buffer); 1100 1180 if (error < 0) 1101 1181 return error; 1102 1182 ··· 1125 1205 * (i.e., the records with invalid CRC) if the cycle number 1126 1206 * matches the the current cycle. 1127 1207 */ 1128 - found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, bp, 1129 - rhead_blk, rhead, wrapped); 1208 + found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, 1209 + buffer, rhead_blk, rhead, wrapped); 1130 1210 if (found < 0) 1131 1211 return found; 1132 1212 if (found == 0) /* XXX: right thing to do here? */ ··· 1186 1266 xfs_daddr_t *tail_blk, 1187 1267 struct xlog_rec_header *rhead, 1188 1268 xfs_daddr_t rhead_blk, 1189 - struct xfs_buf *bp, 1269 + char *buffer, 1190 1270 bool *clean) 1191 1271 { 1192 1272 struct xlog_op_header *op_head; ··· 1229 1309 if (*head_blk == after_umount_blk && 1230 1310 be32_to_cpu(rhead->h_num_logops) == 1) { 1231 1311 umount_data_blk = xlog_wrap_logbno(log, rhead_blk + hblks); 1232 - error = xlog_bread(log, umount_data_blk, 1, bp, &offset); 1312 + error = xlog_bread(log, umount_data_blk, 1, buffer, &offset); 1233 1313 if (error) 1234 1314 return error; 1235 1315 ··· 1308 1388 { 1309 1389 xlog_rec_header_t *rhead; 1310 1390 char *offset = NULL; 1311 - xfs_buf_t *bp; 1391 + char *buffer; 1312 1392 int error; 1313 1393 xfs_daddr_t rhead_blk; 1314 1394 xfs_lsn_t tail_lsn; ··· 1322 1402 return error; 1323 1403 ASSERT(*head_blk < INT_MAX); 1324 1404 1325 - bp = xlog_get_bp(log, 1); 1326 - if (!bp) 1405 + buffer = xlog_alloc_buffer(log, 1); 1406 + if (!buffer) 1327 1407 return -ENOMEM; 1328 1408 if (*head_blk == 0) { /* special case */ 1329 - error = xlog_bread(log, 0, 1, bp, &offset); 1409 + error = xlog_bread(log, 0, 1, buffer, &offset); 1330 1410 if (error) 1331 1411 goto done; 1332 1412 ··· 1342 1422 * block. This wraps all the way back around to the head so something is 1343 1423 * seriously wrong if we can't find it. 1344 1424 */ 1345 - error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, 1425 + error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, buffer, 1346 1426 &rhead_blk, &rhead, &wrapped); 1347 1427 if (error < 0) 1348 1428 return error; ··· 1363 1443 * state to determine whether recovery is necessary. 1364 1444 */ 1365 1445 error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead, 1366 - rhead_blk, bp, &clean); 1446 + rhead_blk, buffer, &clean); 1367 1447 if (error) 1368 1448 goto done; 1369 1449 ··· 1380 1460 if (!clean) { 1381 1461 xfs_daddr_t orig_head = *head_blk; 1382 1462 1383 - error = xlog_verify_head(log, head_blk, tail_blk, bp, 1463 + error = xlog_verify_head(log, head_blk, tail_blk, buffer, 1384 1464 &rhead_blk, &rhead, &wrapped); 1385 1465 if (error) 1386 1466 goto done; ··· 1391 1471 wrapped); 1392 1472 tail_lsn = atomic64_read(&log->l_tail_lsn); 1393 1473 error = xlog_check_unmount_rec(log, head_blk, tail_blk, 1394 - rhead, rhead_blk, bp, 1474 + rhead, rhead_blk, buffer, 1395 1475 &clean); 1396 1476 if (error) 1397 1477 goto done; ··· 1425 1505 * But... if the -device- itself is readonly, just skip this. 1426 1506 * We can't recover this device anyway, so it won't matter. 1427 1507 */ 1428 - if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) 1508 + if (!xfs_readonly_buftarg(log->l_targ)) 1429 1509 error = xlog_clear_stale_blocks(log, tail_lsn); 1430 1510 1431 1511 done: 1432 - xlog_put_bp(bp); 1512 + kmem_free(buffer); 1433 1513 1434 1514 if (error) 1435 1515 xfs_warn(log->l_mp, "failed to locate log tail"); ··· 1457 1537 struct xlog *log, 1458 1538 xfs_daddr_t *blk_no) 1459 1539 { 1460 - xfs_buf_t *bp; 1540 + char *buffer; 1461 1541 char *offset; 1462 1542 uint first_cycle, last_cycle; 1463 1543 xfs_daddr_t new_blk, last_blk, start_blk; ··· 1467 1547 *blk_no = 0; 1468 1548 1469 1549 /* check totally zeroed log */ 1470 - bp = xlog_get_bp(log, 1); 1471 - if (!bp) 1550 + buffer = xlog_alloc_buffer(log, 1); 1551 + if (!buffer) 1472 1552 return -ENOMEM; 1473 - error = xlog_bread(log, 0, 1, bp, &offset); 1553 + error = xlog_bread(log, 0, 1, buffer, &offset); 1474 1554 if (error) 1475 - goto bp_err; 1555 + goto out_free_buffer; 1476 1556 1477 1557 first_cycle = xlog_get_cycle(offset); 1478 1558 if (first_cycle == 0) { /* completely zeroed log */ 1479 1559 *blk_no = 0; 1480 - xlog_put_bp(bp); 1560 + kmem_free(buffer); 1481 1561 return 1; 1482 1562 } 1483 1563 1484 1564 /* check partially zeroed log */ 1485 - error = xlog_bread(log, log_bbnum-1, 1, bp, &offset); 1565 + error = xlog_bread(log, log_bbnum-1, 1, buffer, &offset); 1486 1566 if (error) 1487 - goto bp_err; 1567 + goto out_free_buffer; 1488 1568 1489 1569 last_cycle = xlog_get_cycle(offset); 1490 1570 if (last_cycle != 0) { /* log completely written to */ 1491 - xlog_put_bp(bp); 1571 + kmem_free(buffer); 1492 1572 return 0; 1493 1573 } 1494 1574 1495 1575 /* we have a partially zeroed log */ 1496 1576 last_blk = log_bbnum-1; 1497 - if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) 1498 - goto bp_err; 1577 + error = xlog_find_cycle_start(log, buffer, 0, &last_blk, 0); 1578 + if (error) 1579 + goto out_free_buffer; 1499 1580 1500 1581 /* 1501 1582 * Validate the answer. Because there is no way to guarantee that ··· 1519 1598 */ 1520 1599 if ((error = xlog_find_verify_cycle(log, start_blk, 1521 1600 (int)num_scan_bblks, 0, &new_blk))) 1522 - goto bp_err; 1601 + goto out_free_buffer; 1523 1602 if (new_blk != -1) 1524 1603 last_blk = new_blk; 1525 1604 ··· 1531 1610 if (error == 1) 1532 1611 error = -EIO; 1533 1612 if (error) 1534 - goto bp_err; 1613 + goto out_free_buffer; 1535 1614 1536 1615 *blk_no = last_blk; 1537 - bp_err: 1538 - xlog_put_bp(bp); 1616 + out_free_buffer: 1617 + kmem_free(buffer); 1539 1618 if (error) 1540 1619 return error; 1541 1620 return 1; ··· 1578 1657 int tail_block) 1579 1658 { 1580 1659 char *offset; 1581 - xfs_buf_t *bp; 1660 + char *buffer; 1582 1661 int balign, ealign; 1583 1662 int sectbb = log->l_sectBBsize; 1584 1663 int end_block = start_block + blocks; ··· 1595 1674 bufblks = 1 << ffs(blocks); 1596 1675 while (bufblks > log->l_logBBsize) 1597 1676 bufblks >>= 1; 1598 - while (!(bp = xlog_get_bp(log, bufblks))) { 1677 + while (!(buffer = xlog_alloc_buffer(log, bufblks))) { 1599 1678 bufblks >>= 1; 1600 1679 if (bufblks < sectbb) 1601 1680 return -ENOMEM; ··· 1607 1686 */ 1608 1687 balign = round_down(start_block, sectbb); 1609 1688 if (balign != start_block) { 1610 - error = xlog_bread_noalign(log, start_block, 1, bp); 1689 + error = xlog_bread_noalign(log, start_block, 1, buffer); 1611 1690 if (error) 1612 - goto out_put_bp; 1691 + goto out_free_buffer; 1613 1692 1614 1693 j = start_block - balign; 1615 1694 } ··· 1626 1705 */ 1627 1706 ealign = round_down(end_block, sectbb); 1628 1707 if (j == 0 && (start_block + endcount > ealign)) { 1629 - offset = bp->b_addr + BBTOB(ealign - start_block); 1630 - error = xlog_bread_offset(log, ealign, sectbb, 1631 - bp, offset); 1708 + error = xlog_bread_noalign(log, ealign, sectbb, 1709 + buffer + BBTOB(ealign - start_block)); 1632 1710 if (error) 1633 1711 break; 1634 1712 1635 1713 } 1636 1714 1637 - offset = xlog_align(log, start_block, endcount, bp); 1715 + offset = buffer + xlog_align(log, start_block); 1638 1716 for (; j < endcount; j++) { 1639 1717 xlog_add_record(log, offset, cycle, i+j, 1640 1718 tail_cycle, tail_block); 1641 1719 offset += BBSIZE; 1642 1720 } 1643 - error = xlog_bwrite(log, start_block, endcount, bp); 1721 + error = xlog_bwrite(log, start_block, endcount, buffer); 1644 1722 if (error) 1645 1723 break; 1646 1724 start_block += endcount; 1647 1725 j = 0; 1648 1726 } 1649 1727 1650 - out_put_bp: 1651 - xlog_put_bp(bp); 1728 + out_free_buffer: 1729 + kmem_free(buffer); 1652 1730 return error; 1653 1731 } 1654 1732 ··· 2082 2162 if (xfs_sb_version_hascrc(&mp->m_sb)) 2083 2163 bp->b_ops = &xfs_inode_buf_ops; 2084 2164 2085 - inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; 2165 + inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog; 2086 2166 for (i = 0; i < inodes_per_buf; i++) { 2087 2167 next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + 2088 2168 offsetof(xfs_dinode_t, di_next_unlinked); ··· 2124 2204 2125 2205 ASSERT(item->ri_buf[item_index].i_addr != NULL); 2126 2206 ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); 2127 - ASSERT((reg_buf_offset + reg_buf_bytes) <= 2128 - BBTOB(bp->b_io_length)); 2207 + ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length)); 2129 2208 2130 2209 /* 2131 2210 * The current logged region contains a copy of the ··· 2589 2670 ASSERT(nbits > 0); 2590 2671 ASSERT(item->ri_buf[i].i_addr != NULL); 2591 2672 ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); 2592 - ASSERT(BBTOB(bp->b_io_length) >= 2673 + ASSERT(BBTOB(bp->b_length) >= 2593 2674 ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); 2594 2675 2595 2676 /* ··· 2801 2882 * 2802 2883 * Also make sure that only inode buffers with good sizes stay in 2803 2884 * the buffer cache. The kernel moves inodes in buffers of 1 block 2804 - * or mp->m_inode_cluster_size bytes, whichever is bigger. The inode 2885 + * or inode_cluster_size bytes, whichever is bigger. The inode 2805 2886 * buffers in the log can be a different size if the log was generated 2806 2887 * by an older kernel using unclustered inode buffers or a newer kernel 2807 2888 * running with a different inode cluster size. Regardless, if the 2808 - * the inode buffer size isn't max(blocksize, mp->m_inode_cluster_size) 2809 - * for *our* value of mp->m_inode_cluster_size, then we need to keep 2889 + * the inode buffer size isn't max(blocksize, inode_cluster_size) 2890 + * for *our* value of inode_cluster_size, then we need to keep 2810 2891 * the buffer out of the buffer cache so that the buffer won't 2811 2892 * overlap with future reads of those inodes. 2812 2893 */ 2813 2894 if (XFS_DINODE_MAGIC == 2814 2895 be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && 2815 - (BBTOB(bp->b_io_length) != max(log->l_mp->m_sb.sb_blocksize, 2816 - (uint32_t)log->l_mp->m_inode_cluster_size))) { 2896 + (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) { 2817 2897 xfs_buf_stale(bp); 2818 2898 error = xfs_bwrite(bp); 2819 2899 } else { 2820 - ASSERT(bp->b_target->bt_mount == mp); 2900 + ASSERT(bp->b_mount == mp); 2821 2901 bp->b_iodone = xlog_recover_iodone; 2822 2902 xfs_buf_delwri_queue(bp, buffer_list); 2823 2903 } ··· 3178 3260 /* re-generate the checksum. */ 3179 3261 xfs_dinode_calc_crc(log->l_mp, dip); 3180 3262 3181 - ASSERT(bp->b_target->bt_mount == mp); 3263 + ASSERT(bp->b_mount == mp); 3182 3264 bp->b_iodone = xlog_recover_iodone; 3183 3265 xfs_buf_delwri_queue(bp, buffer_list); 3184 3266 ··· 3317 3399 } 3318 3400 3319 3401 ASSERT(dq_f->qlf_size == 2); 3320 - ASSERT(bp->b_target->bt_mount == mp); 3402 + ASSERT(bp->b_mount == mp); 3321 3403 bp->b_iodone = xlog_recover_iodone; 3322 3404 xfs_buf_delwri_queue(bp, buffer_list); 3323 3405 ··· 3381 3463 { 3382 3464 xfs_efd_log_format_t *efd_formatp; 3383 3465 xfs_efi_log_item_t *efip = NULL; 3384 - xfs_log_item_t *lip; 3466 + struct xfs_log_item *lip; 3385 3467 uint64_t efi_id; 3386 3468 struct xfs_ail_cursor cur; 3387 3469 struct xfs_ail *ailp = log->l_ailp; ··· 3767 3849 { 3768 3850 struct xfs_mount *mp = log->l_mp; 3769 3851 struct xfs_icreate_log *icl; 3852 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 3770 3853 xfs_agnumber_t agno; 3771 3854 xfs_agblock_t agbno; 3772 3855 unsigned int count; ··· 3817 3898 3818 3899 /* 3819 3900 * The inode chunk is either full or sparse and we only support 3820 - * m_ialloc_min_blks sized sparse allocations at this time. 3901 + * m_ino_geo.ialloc_min_blks sized sparse allocations at this time. 3821 3902 */ 3822 - if (length != mp->m_ialloc_blks && 3823 - length != mp->m_ialloc_min_blks) { 3903 + if (length != igeo->ialloc_blks && 3904 + length != igeo->ialloc_min_blks) { 3824 3905 xfs_warn(log->l_mp, 3825 3906 "%s: unsupported chunk length", __FUNCTION__); 3826 3907 return -EINVAL; ··· 3840 3921 * buffers for cancellation so we don't overwrite anything written after 3841 3922 * a cancellation. 3842 3923 */ 3843 - bb_per_cluster = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster); 3844 - nbufs = length / mp->m_blocks_per_cluster; 3924 + bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster); 3925 + nbufs = length / igeo->blocks_per_cluster; 3845 3926 for (i = 0, cancel_count = 0; i < nbufs; i++) { 3846 3927 xfs_daddr_t daddr; 3847 3928 3848 3929 daddr = XFS_AGB_TO_DADDR(mp, agno, 3849 - agbno + i * mp->m_blocks_per_cluster); 3930 + agbno + i * igeo->blocks_per_cluster); 3850 3931 if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0)) 3851 3932 cancel_count++; 3852 3933 } ··· 4875 4956 * A cancel occurs when the mount has failed and we're bailing out. 4876 4957 * Release all pending log intent items so they don't pin the AIL. 4877 4958 */ 4878 - STATIC int 4959 + STATIC void 4879 4960 xlog_recover_cancel_intents( 4880 4961 struct xlog *log) 4881 4962 { 4882 4963 struct xfs_log_item *lip; 4883 - int error = 0; 4884 4964 struct xfs_ail_cursor cur; 4885 4965 struct xfs_ail *ailp; 4886 4966 ··· 4919 5001 4920 5002 xfs_trans_ail_cursor_done(&cur); 4921 5003 spin_unlock(&ailp->ail_lock); 4922 - return error; 4923 5004 } 4924 5005 4925 5006 /* ··· 5224 5307 xfs_daddr_t blk_no, rblk_no; 5225 5308 xfs_daddr_t rhead_blk; 5226 5309 char *offset; 5227 - xfs_buf_t *hbp, *dbp; 5310 + char *hbp, *dbp; 5228 5311 int error = 0, h_size, h_len; 5229 5312 int error2 = 0; 5230 5313 int bblks, split_bblks; ··· 5249 5332 * iclog header and extract the header size from it. Get a 5250 5333 * new hbp that is the correct size. 5251 5334 */ 5252 - hbp = xlog_get_bp(log, 1); 5335 + hbp = xlog_alloc_buffer(log, 1); 5253 5336 if (!hbp) 5254 5337 return -ENOMEM; 5255 5338 ··· 5291 5374 hblks = h_size / XLOG_HEADER_CYCLE_SIZE; 5292 5375 if (h_size % XLOG_HEADER_CYCLE_SIZE) 5293 5376 hblks++; 5294 - xlog_put_bp(hbp); 5295 - hbp = xlog_get_bp(log, hblks); 5377 + kmem_free(hbp); 5378 + hbp = xlog_alloc_buffer(log, hblks); 5296 5379 } else { 5297 5380 hblks = 1; 5298 5381 } 5299 5382 } else { 5300 5383 ASSERT(log->l_sectBBsize == 1); 5301 5384 hblks = 1; 5302 - hbp = xlog_get_bp(log, 1); 5385 + hbp = xlog_alloc_buffer(log, 1); 5303 5386 h_size = XLOG_BIG_RECORD_BSIZE; 5304 5387 } 5305 5388 5306 5389 if (!hbp) 5307 5390 return -ENOMEM; 5308 - dbp = xlog_get_bp(log, BTOBB(h_size)); 5391 + dbp = xlog_alloc_buffer(log, BTOBB(h_size)); 5309 5392 if (!dbp) { 5310 - xlog_put_bp(hbp); 5393 + kmem_free(hbp); 5311 5394 return -ENOMEM; 5312 5395 } 5313 5396 ··· 5322 5405 /* 5323 5406 * Check for header wrapping around physical end-of-log 5324 5407 */ 5325 - offset = hbp->b_addr; 5408 + offset = hbp; 5326 5409 split_hblks = 0; 5327 5410 wrapped_hblks = 0; 5328 5411 if (blk_no + hblks <= log->l_logBBsize) { ··· 5358 5441 * - order is important. 5359 5442 */ 5360 5443 wrapped_hblks = hblks - split_hblks; 5361 - error = xlog_bread_offset(log, 0, 5362 - wrapped_hblks, hbp, 5444 + error = xlog_bread_noalign(log, 0, 5445 + wrapped_hblks, 5363 5446 offset + BBTOB(split_hblks)); 5364 5447 if (error) 5365 5448 goto bread_err2; ··· 5390 5473 } else { 5391 5474 /* This log record is split across the 5392 5475 * physical end of log */ 5393 - offset = dbp->b_addr; 5476 + offset = dbp; 5394 5477 split_bblks = 0; 5395 5478 if (blk_no != log->l_logBBsize) { 5396 5479 /* some data is before the physical ··· 5419 5502 * _first_, then the log start (LR header end) 5420 5503 * - order is important. 5421 5504 */ 5422 - error = xlog_bread_offset(log, 0, 5423 - bblks - split_bblks, dbp, 5505 + error = xlog_bread_noalign(log, 0, 5506 + bblks - split_bblks, 5424 5507 offset + BBTOB(split_bblks)); 5425 5508 if (error) 5426 5509 goto bread_err2; ··· 5468 5551 } 5469 5552 5470 5553 bread_err2: 5471 - xlog_put_bp(dbp); 5554 + kmem_free(dbp); 5472 5555 bread_err1: 5473 - xlog_put_bp(hbp); 5556 + kmem_free(hbp); 5474 5557 5475 5558 /* 5476 5559 * Submit buffers that have been added from the last record processed, ··· 5604 5687 * Now that we've finished replaying all buffer and inode 5605 5688 * updates, re-read in the superblock and reverify it. 5606 5689 */ 5607 - bp = xfs_getsb(mp, 0); 5690 + bp = xfs_getsb(mp); 5608 5691 bp->b_flags &= ~(XBF_DONE | XBF_ASYNC); 5609 5692 ASSERT(!(bp->b_flags & XBF_WRITE)); 5610 5693 bp->b_flags |= XBF_READ; ··· 5777 5860 return 0; 5778 5861 } 5779 5862 5780 - int 5863 + void 5781 5864 xlog_recover_cancel( 5782 5865 struct xlog *log) 5783 5866 { 5784 - int error = 0; 5785 - 5786 5867 if (log->l_flags & XLOG_RECOVERY_NEEDED) 5787 - error = xlog_recover_cancel_intents(log); 5788 - 5789 - return error; 5868 + xlog_recover_cancel_intents(log); 5790 5869 } 5791 5870 5792 5871 #if defined(DEBUG)

+1 -1

fs/xfs/xfs_message.c

··· 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 8 #include "xfs_error.h" 9 + #include "xfs_shared.h" 9 10 #include "xfs_format.h" 10 - #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 13

+6 -96

fs/xfs/xfs_mount.c

··· 12 12 #include "xfs_bit.h" 13 13 #include "xfs_sb.h" 14 14 #include "xfs_mount.h" 15 - #include "xfs_defer.h" 16 - #include "xfs_da_format.h" 17 - #include "xfs_da_btree.h" 18 15 #include "xfs_inode.h" 19 16 #include "xfs_dir2.h" 20 17 #include "xfs_ialloc.h" ··· 24 27 #include "xfs_error.h" 25 28 #include "xfs_quota.h" 26 29 #include "xfs_fsops.h" 27 - #include "xfs_trace.h" 28 30 #include "xfs_icache.h" 29 31 #include "xfs_sysfs.h" 30 32 #include "xfs_rmap_btree.h" ··· 426 430 } 427 431 428 432 /* 429 - * Set the maximum inode count for this filesystem 430 - */ 431 - STATIC void 432 - xfs_set_maxicount(xfs_mount_t *mp) 433 - { 434 - xfs_sb_t *sbp = &(mp->m_sb); 435 - uint64_t icount; 436 - 437 - if (sbp->sb_imax_pct) { 438 - /* 439 - * Make sure the maximum inode count is a multiple 440 - * of the units we allocate inodes in. 441 - */ 442 - icount = sbp->sb_dblocks * sbp->sb_imax_pct; 443 - do_div(icount, 100); 444 - do_div(icount, mp->m_ialloc_blks); 445 - mp->m_maxicount = (icount * mp->m_ialloc_blks) << 446 - sbp->sb_inopblog; 447 - } else { 448 - mp->m_maxicount = 0; 449 - } 450 - } 451 - 452 - /* 453 433 * Set the default minimum read and write sizes unless 454 434 * already specified in a mount option. 455 435 * We use smaller I/O sizes when the file system ··· 479 507 do_div(space, 100); 480 508 mp->m_low_space[i] = space * (i + 1); 481 509 } 482 - } 483 - 484 - 485 - /* 486 - * Set whether we're using inode alignment. 487 - */ 488 - STATIC void 489 - xfs_set_inoalignment(xfs_mount_t *mp) 490 - { 491 - if (xfs_sb_version_hasalign(&mp->m_sb) && 492 - mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) 493 - mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; 494 - else 495 - mp->m_inoalign_mask = 0; 496 - /* 497 - * If we are using stripe alignment, check whether 498 - * the stripe unit is a multiple of the inode alignment 499 - */ 500 - if (mp->m_dalign && mp->m_inoalign_mask && 501 - !(mp->m_dalign & mp->m_inoalign_mask)) 502 - mp->m_sinoalign = mp->m_dalign; 503 - else 504 - mp->m_sinoalign = 0; 505 510 } 506 511 507 512 /* ··· 632 683 { 633 684 struct xfs_sb *sbp = &(mp->m_sb); 634 685 struct xfs_inode *rip; 686 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 635 687 uint64_t resblks; 636 688 uint quotamount = 0; 637 689 uint quotaflags = 0; ··· 699 749 xfs_alloc_compute_maxlevels(mp); 700 750 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); 701 751 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); 702 - xfs_ialloc_compute_maxlevels(mp); 752 + xfs_ialloc_setup_geometry(mp); 703 753 xfs_rmapbt_compute_maxlevels(mp); 704 754 xfs_refcountbt_compute_maxlevels(mp); 705 - 706 - xfs_set_maxicount(mp); 707 755 708 756 /* enable fail_at_unmount as default */ 709 757 mp->m_fail_unmount = true; ··· 736 788 xfs_set_low_space_thresholds(mp); 737 789 738 790 /* 739 - * Set the inode cluster size. 740 - * This may still be overridden by the file system 741 - * block size if it is larger than the chosen cluster size. 742 - * 743 - * For v5 filesystems, scale the cluster size with the inode size to 744 - * keep a constant ratio of inode per cluster buffer, but only if mkfs 745 - * has set the inode alignment value appropriately for larger cluster 746 - * sizes. 747 - */ 748 - mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; 749 - if (xfs_sb_version_hascrc(&mp->m_sb)) { 750 - int new_size = mp->m_inode_cluster_size; 751 - 752 - new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; 753 - if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) 754 - mp->m_inode_cluster_size = new_size; 755 - } 756 - mp->m_blocks_per_cluster = xfs_icluster_size_fsb(mp); 757 - mp->m_inodes_per_cluster = XFS_FSB_TO_INO(mp, mp->m_blocks_per_cluster); 758 - mp->m_cluster_align = xfs_ialloc_cluster_alignment(mp); 759 - mp->m_cluster_align_inodes = XFS_FSB_TO_INO(mp, mp->m_cluster_align); 760 - 761 - /* 762 791 * If enabled, sparse inode chunk alignment is expected to match the 763 792 * cluster size. Full inode chunk alignment must match the chunk size, 764 793 * but that is checked on sb read verification... 765 794 */ 766 795 if (xfs_sb_version_hassparseinodes(&mp->m_sb) && 767 796 mp->m_sb.sb_spino_align != 768 - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) { 797 + XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) { 769 798 xfs_warn(mp, 770 799 "Sparse inode block alignment (%u) must match cluster size (%llu).", 771 800 mp->m_sb.sb_spino_align, 772 - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)); 801 + XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)); 773 802 error = -EINVAL; 774 803 goto out_remove_uuid; 775 804 } 776 - 777 - /* 778 - * Set inode alignment fields 779 - */ 780 - xfs_set_inoalignment(mp); 781 805 782 806 /* 783 807 * Check that the data (and log if separate) is an ok size. ··· 1305 1385 * xfs_getsb() is called to obtain the buffer for the superblock. 1306 1386 * The buffer is returned locked and read in from disk. 1307 1387 * The buffer should be released with a call to xfs_brelse(). 1308 - * 1309 - * If the flags parameter is BUF_TRYLOCK, then we'll only return 1310 - * the superblock buffer if it can be locked without sleeping. 1311 - * If it can't then we'll return NULL. 1312 1388 */ 1313 1389 struct xfs_buf * 1314 1390 xfs_getsb( 1315 - struct xfs_mount *mp, 1316 - int flags) 1391 + struct xfs_mount *mp) 1317 1392 { 1318 1393 struct xfs_buf *bp = mp->m_sb_bp; 1319 1394 1320 - if (!xfs_buf_trylock(bp)) { 1321 - if (flags & XBF_TRYLOCK) 1322 - return NULL; 1323 - xfs_buf_lock(bp); 1324 - } 1325 - 1395 + xfs_buf_lock(bp); 1326 1396 xfs_buf_hold(bp); 1327 1397 ASSERT(bp->b_flags & XBF_DONE); 1328 1398 return bp;

+4 -18

fs/xfs/xfs_mount.h

··· 105 105 struct xfs_da_geometry *m_dir_geo; /* directory block geometry */ 106 106 struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */ 107 107 struct xlog *m_log; /* log specific stuff */ 108 + struct xfs_ino_geometry m_ino_geo; /* inode geometry */ 108 109 int m_logbufs; /* number of log buffers */ 109 110 int m_logbsize; /* size of each log buffer */ 110 111 uint m_rsumlevels; /* rt summary levels */ ··· 127 126 uint8_t m_blkbit_log; /* blocklog + NBBY */ 128 127 uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ 129 128 uint8_t m_agno_log; /* log #ag's */ 130 - uint8_t m_agino_log; /* #bits for agino in inum */ 131 - uint m_inode_cluster_size;/* min inode buf size */ 132 - unsigned int m_inodes_per_cluster; 133 - unsigned int m_blocks_per_cluster; 134 - unsigned int m_cluster_align; 135 - unsigned int m_cluster_align_inodes; 136 129 uint m_blockmask; /* sb_blocksize-1 */ 137 130 uint m_blockwsize; /* sb_blocksize in words */ 138 131 uint m_blockwmask; /* blockwsize-1 */ ··· 134 139 uint m_alloc_mnr[2]; /* min alloc btree records */ 135 140 uint m_bmap_dmxr[2]; /* max bmap btree records */ 136 141 uint m_bmap_dmnr[2]; /* min bmap btree records */ 137 - uint m_inobt_mxr[2]; /* max inobt btree records */ 138 - uint m_inobt_mnr[2]; /* min inobt btree records */ 139 142 uint m_rmap_mxr[2]; /* max rmap btree records */ 140 143 uint m_rmap_mnr[2]; /* min rmap btree records */ 141 144 uint m_refc_mxr[2]; /* max refc btree records */ 142 145 uint m_refc_mnr[2]; /* min refc btree records */ 143 146 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ 144 147 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ 145 - uint m_in_maxlevels; /* max inobt btree levels. */ 146 148 uint m_rmap_maxlevels; /* max rmap btree levels */ 147 149 uint m_refc_maxlevels; /* max refcount btree level */ 148 150 xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */ ··· 151 159 int m_fixedfsid[2]; /* unchanged for life of FS */ 152 160 uint64_t m_flags; /* global mount flags */ 153 161 bool m_finobt_nores; /* no per-AG finobt resv. */ 154 - int m_ialloc_inos; /* inodes in inode allocation */ 155 - int m_ialloc_blks; /* blocks in inode allocation */ 156 - int m_ialloc_min_blks;/* min blocks in sparse inode 157 - * allocation */ 158 - int m_inoalign_mask;/* mask sb_inoalignmt if used */ 159 162 uint m_qflags; /* quota status flags */ 160 163 struct xfs_trans_resv m_resv; /* precomputed res values */ 161 - uint64_t m_maxicount; /* maximum inode count */ 162 164 uint64_t m_resblks; /* total reserved blocks */ 163 165 uint64_t m_resblks_avail;/* available reserved blocks */ 164 166 uint64_t m_resblks_save; /* reserved blks @ remount,ro */ 165 167 int m_dalign; /* stripe unit */ 166 168 int m_swidth; /* stripe width */ 167 - int m_sinoalign; /* stripe unit inode alignment */ 168 169 uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ 169 170 const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ 170 171 const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */ ··· 183 198 struct workqueue_struct *m_unwritten_workqueue; 184 199 struct workqueue_struct *m_cil_workqueue; 185 200 struct workqueue_struct *m_reclaim_workqueue; 186 - struct workqueue_struct *m_log_workqueue; 187 201 struct workqueue_struct *m_eofblocks_workqueue; 188 202 struct workqueue_struct *m_sync_workqueue; 189 203 ··· 209 225 struct xfs_kobj m_errortag_kobj; 210 226 #endif 211 227 } xfs_mount_t; 228 + 229 + #define M_IGEO(mp) (&(mp)->m_ino_geo) 212 230 213 231 /* 214 232 * Flags for m_flags. ··· 451 465 bool reserved); 452 466 extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta); 453 467 454 - extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); 468 + extern struct xfs_buf *xfs_getsb(xfs_mount_t *); 455 469 extern int xfs_readsb(xfs_mount_t *, int); 456 470 extern void xfs_freesb(xfs_mount_t *); 457 471 extern bool xfs_fs_writable(struct xfs_mount *mp, int level);

+5

fs/xfs/xfs_ondisk.h

··· 146 146 XFS_CHECK_OFFSET(struct xfs_dir3_data_hdr, hdr.magic, 0); 147 147 XFS_CHECK_OFFSET(struct xfs_dir3_free, hdr.hdr.magic, 0); 148 148 XFS_CHECK_OFFSET(struct xfs_attr3_leafblock, hdr.info.hdr, 0); 149 + 150 + XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat, 192); 151 + XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers, 24); 152 + XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req, 64); 153 + XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req, 64); 149 154 } 150 155 151 156 #endif /* __XFS_ONDISK_H */

+1 -8

fs/xfs/xfs_pnfs.c

··· 2 2 /* 3 3 * Copyright (c) 2014 Christoph Hellwig. 4 4 */ 5 - #include <linux/iomap.h> 6 5 #include "xfs.h" 6 + #include "xfs_shared.h" 7 7 #include "xfs_format.h" 8 8 #include "xfs_log_format.h" 9 9 #include "xfs_trans_resv.h" 10 - #include "xfs_sb.h" 11 10 #include "xfs_mount.h" 12 11 #include "xfs_inode.h" 13 12 #include "xfs_trans.h" 14 - #include "xfs_log.h" 15 13 #include "xfs_bmap.h" 16 - #include "xfs_bmap_util.h" 17 - #include "xfs_error.h" 18 14 #include "xfs_iomap.h" 19 - #include "xfs_shared.h" 20 - #include "xfs_bit.h" 21 - #include "xfs_pnfs.h" 22 15 23 16 /* 24 17 * Ensure that we do not have any outstanding pNFS layouts that can be used by

+136

fs/xfs/xfs_pwork.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Copyright (C) 2019 Oracle. All Rights Reserved. 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + */ 6 + #include "xfs.h" 7 + #include "xfs_fs.h" 8 + #include "xfs_shared.h" 9 + #include "xfs_format.h" 10 + #include "xfs_log_format.h" 11 + #include "xfs_trans_resv.h" 12 + #include "xfs_mount.h" 13 + #include "xfs_trace.h" 14 + #include "xfs_sysctl.h" 15 + #include "xfs_pwork.h" 16 + #include <linux/nmi.h> 17 + 18 + /* 19 + * Parallel Work Queue 20 + * =================== 21 + * 22 + * Abstract away the details of running a large and "obviously" parallelizable 23 + * task across multiple CPUs. Callers initialize the pwork control object with 24 + * a desired level of parallelization and a work function. Next, they embed 25 + * struct xfs_pwork in whatever structure they use to pass work context to a 26 + * worker thread and queue that pwork. The work function will be passed the 27 + * pwork item when it is run (from process context) and any returned error will 28 + * be recorded in xfs_pwork_ctl.error. Work functions should check for errors 29 + * and abort if necessary; the non-zeroness of xfs_pwork_ctl.error does not 30 + * stop workqueue item processing. 31 + * 32 + * This is the rough equivalent of the xfsprogs workqueue code, though we can't 33 + * reuse that name here. 34 + */ 35 + 36 + /* Invoke our caller's function. */ 37 + static void 38 + xfs_pwork_work( 39 + struct work_struct *work) 40 + { 41 + struct xfs_pwork *pwork; 42 + struct xfs_pwork_ctl *pctl; 43 + int error; 44 + 45 + pwork = container_of(work, struct xfs_pwork, work); 46 + pctl = pwork->pctl; 47 + error = pctl->work_fn(pctl->mp, pwork); 48 + if (error && !pctl->error) 49 + pctl->error = error; 50 + if (atomic_dec_and_test(&pctl->nr_work)) 51 + wake_up(&pctl->poll_wait); 52 + } 53 + 54 + /* 55 + * Set up control data for parallel work. @work_fn is the function that will 56 + * be called. @tag will be written into the kernel threads. @nr_threads is 57 + * the level of parallelism desired, or 0 for no limit. 58 + */ 59 + int 60 + xfs_pwork_init( 61 + struct xfs_mount *mp, 62 + struct xfs_pwork_ctl *pctl, 63 + xfs_pwork_work_fn work_fn, 64 + const char *tag, 65 + unsigned int nr_threads) 66 + { 67 + #ifdef DEBUG 68 + if (xfs_globals.pwork_threads >= 0) 69 + nr_threads = xfs_globals.pwork_threads; 70 + #endif 71 + trace_xfs_pwork_init(mp, nr_threads, current->pid); 72 + 73 + pctl->wq = alloc_workqueue("%s-%d", WQ_FREEZABLE, nr_threads, tag, 74 + current->pid); 75 + if (!pctl->wq) 76 + return -ENOMEM; 77 + pctl->work_fn = work_fn; 78 + pctl->error = 0; 79 + pctl->mp = mp; 80 + atomic_set(&pctl->nr_work, 0); 81 + init_waitqueue_head(&pctl->poll_wait); 82 + 83 + return 0; 84 + } 85 + 86 + /* Queue some parallel work. */ 87 + void 88 + xfs_pwork_queue( 89 + struct xfs_pwork_ctl *pctl, 90 + struct xfs_pwork *pwork) 91 + { 92 + INIT_WORK(&pwork->work, xfs_pwork_work); 93 + pwork->pctl = pctl; 94 + atomic_inc(&pctl->nr_work); 95 + queue_work(pctl->wq, &pwork->work); 96 + } 97 + 98 + /* Wait for the work to finish and tear down the control structure. */ 99 + int 100 + xfs_pwork_destroy( 101 + struct xfs_pwork_ctl *pctl) 102 + { 103 + destroy_workqueue(pctl->wq); 104 + pctl->wq = NULL; 105 + return pctl->error; 106 + } 107 + 108 + /* 109 + * Wait for the work to finish by polling completion status and touch the soft 110 + * lockup watchdog. This is for callers such as mount which hold locks. 111 + */ 112 + void 113 + xfs_pwork_poll( 114 + struct xfs_pwork_ctl *pctl) 115 + { 116 + while (wait_event_timeout(pctl->poll_wait, 117 + atomic_read(&pctl->nr_work) == 0, HZ) == 0) 118 + touch_softlockup_watchdog(); 119 + } 120 + 121 + /* 122 + * Return the amount of parallelism that the data device can handle, or 0 for 123 + * no limit. 124 + */ 125 + unsigned int 126 + xfs_pwork_guess_datadev_parallelism( 127 + struct xfs_mount *mp) 128 + { 129 + struct xfs_buftarg *btp = mp->m_ddev_targp; 130 + 131 + /* 132 + * For now we'll go with the most conservative setting possible, 133 + * which is two threads for an SSD and 1 thread everywhere else. 134 + */ 135 + return blk_queue_nonrot(btp->bt_bdev->bd_queue) ? 2 : 1; 136 + }

+61

fs/xfs/xfs_pwork.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * Copyright (C) 2019 Oracle. All Rights Reserved. 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + */ 6 + #ifndef __XFS_PWORK_H__ 7 + #define __XFS_PWORK_H__ 8 + 9 + struct xfs_pwork; 10 + struct xfs_mount; 11 + 12 + typedef int (*xfs_pwork_work_fn)(struct xfs_mount *mp, struct xfs_pwork *pwork); 13 + 14 + /* 15 + * Parallel work coordination structure. 16 + */ 17 + struct xfs_pwork_ctl { 18 + struct workqueue_struct *wq; 19 + struct xfs_mount *mp; 20 + xfs_pwork_work_fn work_fn; 21 + struct wait_queue_head poll_wait; 22 + atomic_t nr_work; 23 + int error; 24 + }; 25 + 26 + /* 27 + * Embed this parallel work control item inside your own work structure, 28 + * then queue work with it. 29 + */ 30 + struct xfs_pwork { 31 + struct work_struct work; 32 + struct xfs_pwork_ctl *pctl; 33 + }; 34 + 35 + #define XFS_PWORK_SINGLE_THREADED { .pctl = NULL } 36 + 37 + /* Have we been told to abort? */ 38 + static inline bool 39 + xfs_pwork_ctl_want_abort( 40 + struct xfs_pwork_ctl *pctl) 41 + { 42 + return pctl && pctl->error; 43 + } 44 + 45 + /* Have we been told to abort? */ 46 + static inline bool 47 + xfs_pwork_want_abort( 48 + struct xfs_pwork *pwork) 49 + { 50 + return xfs_pwork_ctl_want_abort(pwork->pctl); 51 + } 52 + 53 + int xfs_pwork_init(struct xfs_mount *mp, struct xfs_pwork_ctl *pctl, 54 + xfs_pwork_work_fn work_fn, const char *tag, 55 + unsigned int nr_threads); 56 + void xfs_pwork_queue(struct xfs_pwork_ctl *pctl, struct xfs_pwork *pwork); 57 + int xfs_pwork_destroy(struct xfs_pwork_ctl *pctl); 58 + void xfs_pwork_poll(struct xfs_pwork_ctl *pctl); 59 + unsigned int xfs_pwork_guess_datadev_parallelism(struct xfs_mount *mp); 60 + 61 + #endif /* __XFS_PWORK_H__ */

+21 -47

fs/xfs/xfs_qm.c

··· 13 13 #include "xfs_sb.h" 14 14 #include "xfs_mount.h" 15 15 #include "xfs_inode.h" 16 - #include "xfs_ialloc.h" 17 - #include "xfs_itable.h" 16 + #include "xfs_iwalk.h" 18 17 #include "xfs_quota.h" 19 - #include "xfs_error.h" 20 18 #include "xfs_bmap.h" 21 - #include "xfs_bmap_btree.h" 22 19 #include "xfs_bmap_util.h" 23 20 #include "xfs_trans.h" 24 21 #include "xfs_trans_space.h" 25 22 #include "xfs_qm.h" 26 23 #include "xfs_trace.h" 27 24 #include "xfs_icache.h" 28 - #include "xfs_cksum.h" 29 25 30 26 /* 31 27 * The global quota manager. There is only one of these for the entire ··· 1114 1118 /* ARGSUSED */ 1115 1119 STATIC int 1116 1120 xfs_qm_dqusage_adjust( 1117 - xfs_mount_t *mp, /* mount point for filesystem */ 1118 - xfs_ino_t ino, /* inode number to get data for */ 1119 - void __user *buffer, /* not used */ 1120 - int ubsize, /* not used */ 1121 - int *ubused, /* not used */ 1122 - int *res) /* result code value */ 1121 + struct xfs_mount *mp, 1122 + struct xfs_trans *tp, 1123 + xfs_ino_t ino, 1124 + void *data) 1123 1125 { 1124 - xfs_inode_t *ip; 1125 - xfs_qcnt_t nblks; 1126 - xfs_filblks_t rtblks = 0; /* total rt blks */ 1127 - int error; 1126 + struct xfs_inode *ip; 1127 + xfs_qcnt_t nblks; 1128 + xfs_filblks_t rtblks = 0; /* total rt blks */ 1129 + int error; 1128 1130 1129 1131 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 1130 1132 ··· 1130 1136 * rootino must have its resources accounted for, not so with the quota 1131 1137 * inodes. 1132 1138 */ 1133 - if (xfs_is_quota_inode(&mp->m_sb, ino)) { 1134 - *res = BULKSTAT_RV_NOTHING; 1135 - return -EINVAL; 1136 - } 1139 + if (xfs_is_quota_inode(&mp->m_sb, ino)) 1140 + return 0; 1137 1141 1138 1142 /* 1139 1143 * We don't _need_ to take the ilock EXCL here because quotacheck runs 1140 1144 * at mount time and therefore nobody will be racing chown/chproj. 1141 1145 */ 1142 - error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, 0, &ip); 1143 - if (error) { 1144 - *res = BULKSTAT_RV_NOTHING; 1146 + error = xfs_iget(mp, tp, ino, XFS_IGET_DONTCACHE, 0, &ip); 1147 + if (error == -EINVAL || error == -ENOENT) 1148 + return 0; 1149 + if (error) 1145 1150 return error; 1146 - } 1147 1151 1148 1152 ASSERT(ip->i_delayed_blks == 0); 1149 1153 ··· 1149 1157 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 1150 1158 1151 1159 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 1152 - error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); 1160 + error = xfs_iread_extents(tp, ip, XFS_DATA_FORK); 1153 1161 if (error) 1154 1162 goto error0; 1155 1163 } ··· 1192 1200 goto error0; 1193 1201 } 1194 1202 1195 - xfs_irele(ip); 1196 - *res = BULKSTAT_RV_DIDONE; 1197 - return 0; 1198 - 1199 1203 error0: 1200 1204 xfs_irele(ip); 1201 - *res = BULKSTAT_RV_GIVEUP; 1202 1205 return error; 1203 1206 } 1204 1207 ··· 1257 1270 xfs_qm_quotacheck( 1258 1271 xfs_mount_t *mp) 1259 1272 { 1260 - int done, count, error, error2; 1261 - xfs_ino_t lastino; 1262 - size_t structsz; 1273 + int error, error2; 1263 1274 uint flags; 1264 1275 LIST_HEAD (buffer_list); 1265 1276 struct xfs_inode *uip = mp->m_quotainfo->qi_uquotaip; 1266 1277 struct xfs_inode *gip = mp->m_quotainfo->qi_gquotaip; 1267 1278 struct xfs_inode *pip = mp->m_quotainfo->qi_pquotaip; 1268 1279 1269 - count = INT_MAX; 1270 - structsz = 1; 1271 - lastino = 0; 1272 1280 flags = 0; 1273 1281 1274 1282 ASSERT(uip || gip || pip); ··· 1300 1318 flags |= XFS_PQUOTA_CHKD; 1301 1319 } 1302 1320 1303 - do { 1304 - /* 1305 - * Iterate thru all the inodes in the file system, 1306 - * adjusting the corresponding dquot counters in core. 1307 - */ 1308 - error = xfs_bulkstat(mp, &lastino, &count, 1309 - xfs_qm_dqusage_adjust, 1310 - structsz, NULL, &done); 1311 - if (error) 1312 - break; 1313 - 1314 - } while (!done); 1321 + error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true, 1322 + NULL); 1323 + if (error) 1324 + goto error_return; 1315 1325 1316 1326 /* 1317 1327 * We've made all the changes that we need to make incore. Flush them

+1 -1

fs/xfs/xfs_qm_bhv.c

··· 5 5 */ 6 6 #include "xfs.h" 7 7 #include "xfs_fs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 11 #include "xfs_trans_resv.h" 11 12 #include "xfs_quota.h" 12 13 #include "xfs_mount.h" 13 14 #include "xfs_inode.h" 14 - #include "xfs_error.h" 15 15 #include "xfs_trans.h" 16 16 #include "xfs_qm.h" 17 17

-5

fs/xfs/xfs_qm_syscalls.c

··· 4 4 * All Rights Reserved. 5 5 */ 6 6 7 - #include <linux/capability.h> 8 7 9 8 #include "xfs.h" 10 9 #include "xfs_fs.h" ··· 11 12 #include "xfs_format.h" 12 13 #include "xfs_log_format.h" 13 14 #include "xfs_trans_resv.h" 14 - #include "xfs_bit.h" 15 15 #include "xfs_sb.h" 16 16 #include "xfs_mount.h" 17 17 #include "xfs_inode.h" 18 18 #include "xfs_trans.h" 19 - #include "xfs_error.h" 20 19 #include "xfs_quota.h" 21 20 #include "xfs_qm.h" 22 - #include "xfs_trace.h" 23 21 #include "xfs_icache.h" 24 - #include "xfs_defer.h" 25 22 26 23 STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); 27 24 STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,

+1 -2

fs/xfs/xfs_quotaops.c

··· 4 4 * All Rights Reserved. 5 5 */ 6 6 #include "xfs.h" 7 + #include "xfs_shared.h" 7 8 #include "xfs_format.h" 8 9 #include "xfs_log_format.h" 9 10 #include "xfs_trans_resv.h" ··· 12 11 #include "xfs_inode.h" 13 12 #include "xfs_quota.h" 14 13 #include "xfs_trans.h" 15 - #include "xfs_trace.h" 16 14 #include "xfs_icache.h" 17 15 #include "xfs_qm.h" 18 - #include <linux/quota.h> 19 16 20 17 21 18 static void

+219 -154

fs/xfs/xfs_refcount_item.c

··· 14 14 #include "xfs_defer.h" 15 15 #include "xfs_trans.h" 16 16 #include "xfs_trans_priv.h" 17 - #include "xfs_buf_item.h" 18 17 #include "xfs_refcount_item.h" 19 18 #include "xfs_log.h" 20 19 #include "xfs_refcount.h" ··· 94 95 } 95 96 96 97 /* 97 - * Pinning has no meaning for an cui item, so just return. 98 - */ 99 - STATIC void 100 - xfs_cui_item_pin( 101 - struct xfs_log_item *lip) 102 - { 103 - } 104 - 105 - /* 106 98 * The unpin operation is the last place an CUI is manipulated in the log. It is 107 99 * either inserted in the AIL or aborted in the event of a log I/O error. In 108 100 * either case, the CUI transaction has been successfully committed to make it ··· 112 122 } 113 123 114 124 /* 115 - * CUI items have no locking or pushing. However, since CUIs are pulled from 116 - * the AIL when their corresponding CUDs are committed to disk, their situation 117 - * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller 118 - * will eventually flush the log. This should help in getting the CUI out of 119 - * the AIL. 120 - */ 121 - STATIC uint 122 - xfs_cui_item_push( 123 - struct xfs_log_item *lip, 124 - struct list_head *buffer_list) 125 - { 126 - return XFS_ITEM_PINNED; 127 - } 128 - 129 - /* 130 125 * The CUI has been either committed or aborted if the transaction has been 131 126 * cancelled. If the transaction was cancelled, an CUD isn't going to be 132 127 * constructed and thus we free the CUI here directly. 133 128 */ 134 129 STATIC void 135 - xfs_cui_item_unlock( 130 + xfs_cui_item_release( 136 131 struct xfs_log_item *lip) 137 132 { 138 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) 139 - xfs_cui_release(CUI_ITEM(lip)); 133 + xfs_cui_release(CUI_ITEM(lip)); 140 134 } 141 135 142 - /* 143 - * The CUI is logged only once and cannot be moved in the log, so simply return 144 - * the lsn at which it's been logged. 145 - */ 146 - STATIC xfs_lsn_t 147 - xfs_cui_item_committed( 148 - struct xfs_log_item *lip, 149 - xfs_lsn_t lsn) 150 - { 151 - return lsn; 152 - } 153 - 154 - /* 155 - * The CUI dependency tracking op doesn't do squat. It can't because 156 - * it doesn't know where the free extent is coming from. The dependency 157 - * tracking has to be handled by the "enclosing" metadata object. For 158 - * example, for inodes, the inode is locked throughout the extent freeing 159 - * so the dependency should be recorded there. 160 - */ 161 - STATIC void 162 - xfs_cui_item_committing( 163 - struct xfs_log_item *lip, 164 - xfs_lsn_t lsn) 165 - { 166 - } 167 - 168 - /* 169 - * This is the ops vector shared by all cui log items. 170 - */ 171 136 static const struct xfs_item_ops xfs_cui_item_ops = { 172 137 .iop_size = xfs_cui_item_size, 173 138 .iop_format = xfs_cui_item_format, 174 - .iop_pin = xfs_cui_item_pin, 175 139 .iop_unpin = xfs_cui_item_unpin, 176 - .iop_unlock = xfs_cui_item_unlock, 177 - .iop_committed = xfs_cui_item_committed, 178 - .iop_push = xfs_cui_item_push, 179 - .iop_committing = xfs_cui_item_committing, 140 + .iop_release = xfs_cui_item_release, 180 141 }; 181 142 182 143 /* ··· 195 254 } 196 255 197 256 /* 198 - * Pinning has no meaning for an cud item, so just return. 199 - */ 200 - STATIC void 201 - xfs_cud_item_pin( 202 - struct xfs_log_item *lip) 203 - { 204 - } 205 - 206 - /* 207 - * Since pinning has no meaning for an cud item, unpinning does 208 - * not either. 209 - */ 210 - STATIC void 211 - xfs_cud_item_unpin( 212 - struct xfs_log_item *lip, 213 - int remove) 214 - { 215 - } 216 - 217 - /* 218 - * There isn't much you can do to push on an cud item. It is simply stuck 219 - * waiting for the log to be flushed to disk. 220 - */ 221 - STATIC uint 222 - xfs_cud_item_push( 223 - struct xfs_log_item *lip, 224 - struct list_head *buffer_list) 225 - { 226 - return XFS_ITEM_PINNED; 227 - } 228 - 229 - /* 230 257 * The CUD is either committed or aborted if the transaction is cancelled. If 231 258 * the transaction is cancelled, drop our reference to the CUI and free the 232 259 * CUD. 233 260 */ 234 261 STATIC void 235 - xfs_cud_item_unlock( 262 + xfs_cud_item_release( 236 263 struct xfs_log_item *lip) 237 264 { 238 265 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 239 266 240 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { 241 - xfs_cui_release(cudp->cud_cuip); 242 - kmem_zone_free(xfs_cud_zone, cudp); 243 - } 244 - } 245 - 246 - /* 247 - * When the cud item is committed to disk, all we need to do is delete our 248 - * reference to our partner cui item and then free ourselves. Since we're 249 - * freeing ourselves we must return -1 to keep the transaction code from 250 - * further referencing this item. 251 - */ 252 - STATIC xfs_lsn_t 253 - xfs_cud_item_committed( 254 - struct xfs_log_item *lip, 255 - xfs_lsn_t lsn) 256 - { 257 - struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 258 - 259 - /* 260 - * Drop the CUI reference regardless of whether the CUD has been 261 - * aborted. Once the CUD transaction is constructed, it is the sole 262 - * responsibility of the CUD to release the CUI (even if the CUI is 263 - * aborted due to log I/O error). 264 - */ 265 267 xfs_cui_release(cudp->cud_cuip); 266 268 kmem_zone_free(xfs_cud_zone, cudp); 267 - 268 - return (xfs_lsn_t)-1; 269 269 } 270 270 271 - /* 272 - * The CUD dependency tracking op doesn't do squat. It can't because 273 - * it doesn't know where the free extent is coming from. The dependency 274 - * tracking has to be handled by the "enclosing" metadata object. For 275 - * example, for inodes, the inode is locked throughout the extent freeing 276 - * so the dependency should be recorded there. 277 - */ 278 - STATIC void 279 - xfs_cud_item_committing( 280 - struct xfs_log_item *lip, 281 - xfs_lsn_t lsn) 282 - { 283 - } 284 - 285 - /* 286 - * This is the ops vector shared by all cud log items. 287 - */ 288 271 static const struct xfs_item_ops xfs_cud_item_ops = { 272 + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, 289 273 .iop_size = xfs_cud_item_size, 290 274 .iop_format = xfs_cud_item_format, 291 - .iop_pin = xfs_cud_item_pin, 292 - .iop_unpin = xfs_cud_item_unpin, 293 - .iop_unlock = xfs_cud_item_unlock, 294 - .iop_committed = xfs_cud_item_committed, 295 - .iop_push = xfs_cud_item_push, 296 - .iop_committing = xfs_cud_item_committing, 275 + .iop_release = xfs_cud_item_release, 297 276 }; 298 277 299 - /* 300 - * Allocate and initialize an cud item with the given number of extents. 301 - */ 302 - struct xfs_cud_log_item * 303 - xfs_cud_init( 304 - struct xfs_mount *mp, 278 + static struct xfs_cud_log_item * 279 + xfs_trans_get_cud( 280 + struct xfs_trans *tp, 305 281 struct xfs_cui_log_item *cuip) 306 - 307 282 { 308 - struct xfs_cud_log_item *cudp; 283 + struct xfs_cud_log_item *cudp; 309 284 310 285 cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP); 311 - xfs_log_item_init(mp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops); 286 + xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD, 287 + &xfs_cud_item_ops); 312 288 cudp->cud_cuip = cuip; 313 289 cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id; 314 290 291 + xfs_trans_add_item(tp, &cudp->cud_item); 315 292 return cudp; 316 293 } 294 + 295 + /* 296 + * Finish an refcount update and log it to the CUD. Note that the 297 + * transaction is marked dirty regardless of whether the refcount 298 + * update succeeds or fails to support the CUI/CUD lifecycle rules. 299 + */ 300 + static int 301 + xfs_trans_log_finish_refcount_update( 302 + struct xfs_trans *tp, 303 + struct xfs_cud_log_item *cudp, 304 + enum xfs_refcount_intent_type type, 305 + xfs_fsblock_t startblock, 306 + xfs_extlen_t blockcount, 307 + xfs_fsblock_t *new_fsb, 308 + xfs_extlen_t *new_len, 309 + struct xfs_btree_cur **pcur) 310 + { 311 + int error; 312 + 313 + error = xfs_refcount_finish_one(tp, type, startblock, 314 + blockcount, new_fsb, new_len, pcur); 315 + 316 + /* 317 + * Mark the transaction dirty, even on error. This ensures the 318 + * transaction is aborted, which: 319 + * 320 + * 1.) releases the CUI and frees the CUD 321 + * 2.) shuts down the filesystem 322 + */ 323 + tp->t_flags |= XFS_TRANS_DIRTY; 324 + set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags); 325 + 326 + return error; 327 + } 328 + 329 + /* Sort refcount intents by AG. */ 330 + static int 331 + xfs_refcount_update_diff_items( 332 + void *priv, 333 + struct list_head *a, 334 + struct list_head *b) 335 + { 336 + struct xfs_mount *mp = priv; 337 + struct xfs_refcount_intent *ra; 338 + struct xfs_refcount_intent *rb; 339 + 340 + ra = container_of(a, struct xfs_refcount_intent, ri_list); 341 + rb = container_of(b, struct xfs_refcount_intent, ri_list); 342 + return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) - 343 + XFS_FSB_TO_AGNO(mp, rb->ri_startblock); 344 + } 345 + 346 + /* Get an CUI. */ 347 + STATIC void * 348 + xfs_refcount_update_create_intent( 349 + struct xfs_trans *tp, 350 + unsigned int count) 351 + { 352 + struct xfs_cui_log_item *cuip; 353 + 354 + ASSERT(tp != NULL); 355 + ASSERT(count > 0); 356 + 357 + cuip = xfs_cui_init(tp->t_mountp, count); 358 + ASSERT(cuip != NULL); 359 + 360 + /* 361 + * Get a log_item_desc to point at the new item. 362 + */ 363 + xfs_trans_add_item(tp, &cuip->cui_item); 364 + return cuip; 365 + } 366 + 367 + /* Set the phys extent flags for this reverse mapping. */ 368 + static void 369 + xfs_trans_set_refcount_flags( 370 + struct xfs_phys_extent *refc, 371 + enum xfs_refcount_intent_type type) 372 + { 373 + refc->pe_flags = 0; 374 + switch (type) { 375 + case XFS_REFCOUNT_INCREASE: 376 + case XFS_REFCOUNT_DECREASE: 377 + case XFS_REFCOUNT_ALLOC_COW: 378 + case XFS_REFCOUNT_FREE_COW: 379 + refc->pe_flags |= type; 380 + break; 381 + default: 382 + ASSERT(0); 383 + } 384 + } 385 + 386 + /* Log refcount updates in the intent item. */ 387 + STATIC void 388 + xfs_refcount_update_log_item( 389 + struct xfs_trans *tp, 390 + void *intent, 391 + struct list_head *item) 392 + { 393 + struct xfs_cui_log_item *cuip = intent; 394 + struct xfs_refcount_intent *refc; 395 + uint next_extent; 396 + struct xfs_phys_extent *ext; 397 + 398 + refc = container_of(item, struct xfs_refcount_intent, ri_list); 399 + 400 + tp->t_flags |= XFS_TRANS_DIRTY; 401 + set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags); 402 + 403 + /* 404 + * atomic_inc_return gives us the value after the increment; 405 + * we want to use it as an array index so we need to subtract 1 from 406 + * it. 407 + */ 408 + next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1; 409 + ASSERT(next_extent < cuip->cui_format.cui_nextents); 410 + ext = &cuip->cui_format.cui_extents[next_extent]; 411 + ext->pe_startblock = refc->ri_startblock; 412 + ext->pe_len = refc->ri_blockcount; 413 + xfs_trans_set_refcount_flags(ext, refc->ri_type); 414 + } 415 + 416 + /* Get an CUD so we can process all the deferred refcount updates. */ 417 + STATIC void * 418 + xfs_refcount_update_create_done( 419 + struct xfs_trans *tp, 420 + void *intent, 421 + unsigned int count) 422 + { 423 + return xfs_trans_get_cud(tp, intent); 424 + } 425 + 426 + /* Process a deferred refcount update. */ 427 + STATIC int 428 + xfs_refcount_update_finish_item( 429 + struct xfs_trans *tp, 430 + struct list_head *item, 431 + void *done_item, 432 + void **state) 433 + { 434 + struct xfs_refcount_intent *refc; 435 + xfs_fsblock_t new_fsb; 436 + xfs_extlen_t new_aglen; 437 + int error; 438 + 439 + refc = container_of(item, struct xfs_refcount_intent, ri_list); 440 + error = xfs_trans_log_finish_refcount_update(tp, done_item, 441 + refc->ri_type, 442 + refc->ri_startblock, 443 + refc->ri_blockcount, 444 + &new_fsb, &new_aglen, 445 + (struct xfs_btree_cur **)state); 446 + /* Did we run out of reservation? Requeue what we didn't finish. */ 447 + if (!error && new_aglen > 0) { 448 + ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE || 449 + refc->ri_type == XFS_REFCOUNT_DECREASE); 450 + refc->ri_startblock = new_fsb; 451 + refc->ri_blockcount = new_aglen; 452 + return -EAGAIN; 453 + } 454 + kmem_free(refc); 455 + return error; 456 + } 457 + 458 + /* Clean up after processing deferred refcounts. */ 459 + STATIC void 460 + xfs_refcount_update_finish_cleanup( 461 + struct xfs_trans *tp, 462 + void *state, 463 + int error) 464 + { 465 + struct xfs_btree_cur *rcur = state; 466 + 467 + xfs_refcount_finish_one_cleanup(tp, rcur, error); 468 + } 469 + 470 + /* Abort all pending CUIs. */ 471 + STATIC void 472 + xfs_refcount_update_abort_intent( 473 + void *intent) 474 + { 475 + xfs_cui_release(intent); 476 + } 477 + 478 + /* Cancel a deferred refcount update. */ 479 + STATIC void 480 + xfs_refcount_update_cancel_item( 481 + struct list_head *item) 482 + { 483 + struct xfs_refcount_intent *refc; 484 + 485 + refc = container_of(item, struct xfs_refcount_intent, ri_list); 486 + kmem_free(refc); 487 + } 488 + 489 + const struct xfs_defer_op_type xfs_refcount_update_defer_type = { 490 + .max_items = XFS_CUI_MAX_FAST_EXTENTS, 491 + .diff_items = xfs_refcount_update_diff_items, 492 + .create_intent = xfs_refcount_update_create_intent, 493 + .abort_intent = xfs_refcount_update_abort_intent, 494 + .log_item = xfs_refcount_update_log_item, 495 + .create_done = xfs_refcount_update_create_done, 496 + .finish_item = xfs_refcount_update_finish_item, 497 + .finish_cleanup = xfs_refcount_update_finish_cleanup, 498 + .cancel_item = xfs_refcount_update_cancel_item, 499 + }; 317 500 318 501 /* 319 502 * Process a refcount update intent item that was recovered from the log.

-2

fs/xfs/xfs_refcount_item.h

··· 78 78 extern struct kmem_zone *xfs_cud_zone; 79 79 80 80 struct xfs_cui_log_item *xfs_cui_init(struct xfs_mount *, uint); 81 - struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *, 82 - struct xfs_cui_log_item *); 83 81 void xfs_cui_item_free(struct xfs_cui_log_item *); 84 82 void xfs_cui_release(struct xfs_cui_log_item *); 85 83 int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip);

+2 -13

fs/xfs/xfs_reflink.c

··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 13 #include "xfs_defer.h" 14 - #include "xfs_da_format.h" 15 - #include "xfs_da_btree.h" 16 14 #include "xfs_inode.h" 17 15 #include "xfs_trans.h" 18 - #include "xfs_inode_item.h" 19 16 #include "xfs_bmap.h" 20 17 #include "xfs_bmap_util.h" 21 - #include "xfs_error.h" 22 - #include "xfs_dir2.h" 23 - #include "xfs_dir2_priv.h" 24 - #include "xfs_ioctl.h" 25 18 #include "xfs_trace.h" 26 - #include "xfs_log.h" 27 19 #include "xfs_icache.h" 28 - #include "xfs_pnfs.h" 29 20 #include "xfs_btree.h" 30 21 #include "xfs_refcount_btree.h" 31 22 #include "xfs_refcount.h" ··· 24 33 #include "xfs_trans_space.h" 25 34 #include "xfs_bit.h" 26 35 #include "xfs_alloc.h" 27 - #include "xfs_quota_defs.h" 28 36 #include "xfs_quota.h" 29 37 #include "xfs_reflink.h" 30 38 #include "xfs_iomap.h" 31 - #include "xfs_rmap_btree.h" 32 39 #include "xfs_sb.h" 33 40 #include "xfs_ag_resv.h" 34 41 ··· 561 572 562 573 /* Start a rolling transaction to remove the mappings */ 563 574 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, 564 - 0, 0, XFS_TRANS_NOFS, &tp); 575 + 0, 0, 0, &tp); 565 576 if (error) 566 577 goto out; 567 578 ··· 620 631 621 632 resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); 622 633 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 623 - XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp); 634 + XFS_TRANS_RESERVE, &tp); 624 635 if (error) 625 636 return error; 626 637

+241 -155

fs/xfs/xfs_rmap_item.c

··· 14 14 #include "xfs_defer.h" 15 15 #include "xfs_trans.h" 16 16 #include "xfs_trans_priv.h" 17 - #include "xfs_buf_item.h" 18 17 #include "xfs_rmap_item.h" 19 18 #include "xfs_log.h" 20 19 #include "xfs_rmap.h" ··· 93 94 } 94 95 95 96 /* 96 - * Pinning has no meaning for an rui item, so just return. 97 - */ 98 - STATIC void 99 - xfs_rui_item_pin( 100 - struct xfs_log_item *lip) 101 - { 102 - } 103 - 104 - /* 105 97 * The unpin operation is the last place an RUI is manipulated in the log. It is 106 98 * either inserted in the AIL or aborted in the event of a log I/O error. In 107 99 * either case, the RUI transaction has been successfully committed to make it ··· 111 121 } 112 122 113 123 /* 114 - * RUI items have no locking or pushing. However, since RUIs are pulled from 115 - * the AIL when their corresponding RUDs are committed to disk, their situation 116 - * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller 117 - * will eventually flush the log. This should help in getting the RUI out of 118 - * the AIL. 119 - */ 120 - STATIC uint 121 - xfs_rui_item_push( 122 - struct xfs_log_item *lip, 123 - struct list_head *buffer_list) 124 - { 125 - return XFS_ITEM_PINNED; 126 - } 127 - 128 - /* 129 124 * The RUI has been either committed or aborted if the transaction has been 130 125 * cancelled. If the transaction was cancelled, an RUD isn't going to be 131 126 * constructed and thus we free the RUI here directly. 132 127 */ 133 128 STATIC void 134 - xfs_rui_item_unlock( 129 + xfs_rui_item_release( 135 130 struct xfs_log_item *lip) 136 131 { 137 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) 138 - xfs_rui_release(RUI_ITEM(lip)); 132 + xfs_rui_release(RUI_ITEM(lip)); 139 133 } 140 134 141 - /* 142 - * The RUI is logged only once and cannot be moved in the log, so simply return 143 - * the lsn at which it's been logged. 144 - */ 145 - STATIC xfs_lsn_t 146 - xfs_rui_item_committed( 147 - struct xfs_log_item *lip, 148 - xfs_lsn_t lsn) 149 - { 150 - return lsn; 151 - } 152 - 153 - /* 154 - * The RUI dependency tracking op doesn't do squat. It can't because 155 - * it doesn't know where the free extent is coming from. The dependency 156 - * tracking has to be handled by the "enclosing" metadata object. For 157 - * example, for inodes, the inode is locked throughout the extent freeing 158 - * so the dependency should be recorded there. 159 - */ 160 - STATIC void 161 - xfs_rui_item_committing( 162 - struct xfs_log_item *lip, 163 - xfs_lsn_t lsn) 164 - { 165 - } 166 - 167 - /* 168 - * This is the ops vector shared by all rui log items. 169 - */ 170 135 static const struct xfs_item_ops xfs_rui_item_ops = { 171 136 .iop_size = xfs_rui_item_size, 172 137 .iop_format = xfs_rui_item_format, 173 - .iop_pin = xfs_rui_item_pin, 174 138 .iop_unpin = xfs_rui_item_unpin, 175 - .iop_unlock = xfs_rui_item_unlock, 176 - .iop_committed = xfs_rui_item_committed, 177 - .iop_push = xfs_rui_item_push, 178 - .iop_committing = xfs_rui_item_committing, 139 + .iop_release = xfs_rui_item_release, 179 140 }; 180 141 181 142 /* ··· 216 275 } 217 276 218 277 /* 219 - * Pinning has no meaning for an rud item, so just return. 220 - */ 221 - STATIC void 222 - xfs_rud_item_pin( 223 - struct xfs_log_item *lip) 224 - { 225 - } 226 - 227 - /* 228 - * Since pinning has no meaning for an rud item, unpinning does 229 - * not either. 230 - */ 231 - STATIC void 232 - xfs_rud_item_unpin( 233 - struct xfs_log_item *lip, 234 - int remove) 235 - { 236 - } 237 - 238 - /* 239 - * There isn't much you can do to push on an rud item. It is simply stuck 240 - * waiting for the log to be flushed to disk. 241 - */ 242 - STATIC uint 243 - xfs_rud_item_push( 244 - struct xfs_log_item *lip, 245 - struct list_head *buffer_list) 246 - { 247 - return XFS_ITEM_PINNED; 248 - } 249 - 250 - /* 251 278 * The RUD is either committed or aborted if the transaction is cancelled. If 252 279 * the transaction is cancelled, drop our reference to the RUI and free the 253 280 * RUD. 254 281 */ 255 282 STATIC void 256 - xfs_rud_item_unlock( 283 + xfs_rud_item_release( 257 284 struct xfs_log_item *lip) 258 285 { 259 286 struct xfs_rud_log_item *rudp = RUD_ITEM(lip); 260 287 261 - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { 262 - xfs_rui_release(rudp->rud_ruip); 263 - kmem_zone_free(xfs_rud_zone, rudp); 288 + xfs_rui_release(rudp->rud_ruip); 289 + kmem_zone_free(xfs_rud_zone, rudp); 290 + } 291 + 292 + static const struct xfs_item_ops xfs_rud_item_ops = { 293 + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, 294 + .iop_size = xfs_rud_item_size, 295 + .iop_format = xfs_rud_item_format, 296 + .iop_release = xfs_rud_item_release, 297 + }; 298 + 299 + static struct xfs_rud_log_item * 300 + xfs_trans_get_rud( 301 + struct xfs_trans *tp, 302 + struct xfs_rui_log_item *ruip) 303 + { 304 + struct xfs_rud_log_item *rudp; 305 + 306 + rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP); 307 + xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD, 308 + &xfs_rud_item_ops); 309 + rudp->rud_ruip = ruip; 310 + rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id; 311 + 312 + xfs_trans_add_item(tp, &rudp->rud_item); 313 + return rudp; 314 + } 315 + 316 + /* Set the map extent flags for this reverse mapping. */ 317 + static void 318 + xfs_trans_set_rmap_flags( 319 + struct xfs_map_extent *rmap, 320 + enum xfs_rmap_intent_type type, 321 + int whichfork, 322 + xfs_exntst_t state) 323 + { 324 + rmap->me_flags = 0; 325 + if (state == XFS_EXT_UNWRITTEN) 326 + rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN; 327 + if (whichfork == XFS_ATTR_FORK) 328 + rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK; 329 + switch (type) { 330 + case XFS_RMAP_MAP: 331 + rmap->me_flags |= XFS_RMAP_EXTENT_MAP; 332 + break; 333 + case XFS_RMAP_MAP_SHARED: 334 + rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED; 335 + break; 336 + case XFS_RMAP_UNMAP: 337 + rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP; 338 + break; 339 + case XFS_RMAP_UNMAP_SHARED: 340 + rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED; 341 + break; 342 + case XFS_RMAP_CONVERT: 343 + rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT; 344 + break; 345 + case XFS_RMAP_CONVERT_SHARED: 346 + rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED; 347 + break; 348 + case XFS_RMAP_ALLOC: 349 + rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC; 350 + break; 351 + case XFS_RMAP_FREE: 352 + rmap->me_flags |= XFS_RMAP_EXTENT_FREE; 353 + break; 354 + default: 355 + ASSERT(0); 264 356 } 265 357 } 266 358 267 359 /* 268 - * When the rud item is committed to disk, all we need to do is delete our 269 - * reference to our partner rui item and then free ourselves. Since we're 270 - * freeing ourselves we must return -1 to keep the transaction code from 271 - * further referencing this item. 360 + * Finish an rmap update and log it to the RUD. Note that the transaction is 361 + * marked dirty regardless of whether the rmap update succeeds or fails to 362 + * support the RUI/RUD lifecycle rules. 272 363 */ 273 - STATIC xfs_lsn_t 274 - xfs_rud_item_committed( 275 - struct xfs_log_item *lip, 276 - xfs_lsn_t lsn) 364 + static int 365 + xfs_trans_log_finish_rmap_update( 366 + struct xfs_trans *tp, 367 + struct xfs_rud_log_item *rudp, 368 + enum xfs_rmap_intent_type type, 369 + uint64_t owner, 370 + int whichfork, 371 + xfs_fileoff_t startoff, 372 + xfs_fsblock_t startblock, 373 + xfs_filblks_t blockcount, 374 + xfs_exntst_t state, 375 + struct xfs_btree_cur **pcur) 277 376 { 278 - struct xfs_rud_log_item *rudp = RUD_ITEM(lip); 377 + int error; 378 + 379 + error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff, 380 + startblock, blockcount, state, pcur); 279 381 280 382 /* 281 - * Drop the RUI reference regardless of whether the RUD has been 282 - * aborted. Once the RUD transaction is constructed, it is the sole 283 - * responsibility of the RUD to release the RUI (even if the RUI is 284 - * aborted due to log I/O error). 383 + * Mark the transaction dirty, even on error. This ensures the 384 + * transaction is aborted, which: 385 + * 386 + * 1.) releases the RUI and frees the RUD 387 + * 2.) shuts down the filesystem 285 388 */ 286 - xfs_rui_release(rudp->rud_ruip); 287 - kmem_zone_free(xfs_rud_zone, rudp); 389 + tp->t_flags |= XFS_TRANS_DIRTY; 390 + set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags); 288 391 289 - return (xfs_lsn_t)-1; 392 + return error; 290 393 } 291 394 292 - /* 293 - * The RUD dependency tracking op doesn't do squat. It can't because 294 - * it doesn't know where the free extent is coming from. The dependency 295 - * tracking has to be handled by the "enclosing" metadata object. For 296 - * example, for inodes, the inode is locked throughout the extent freeing 297 - * so the dependency should be recorded there. 298 - */ 395 + /* Sort rmap intents by AG. */ 396 + static int 397 + xfs_rmap_update_diff_items( 398 + void *priv, 399 + struct list_head *a, 400 + struct list_head *b) 401 + { 402 + struct xfs_mount *mp = priv; 403 + struct xfs_rmap_intent *ra; 404 + struct xfs_rmap_intent *rb; 405 + 406 + ra = container_of(a, struct xfs_rmap_intent, ri_list); 407 + rb = container_of(b, struct xfs_rmap_intent, ri_list); 408 + return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) - 409 + XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock); 410 + } 411 + 412 + /* Get an RUI. */ 413 + STATIC void * 414 + xfs_rmap_update_create_intent( 415 + struct xfs_trans *tp, 416 + unsigned int count) 417 + { 418 + struct xfs_rui_log_item *ruip; 419 + 420 + ASSERT(tp != NULL); 421 + ASSERT(count > 0); 422 + 423 + ruip = xfs_rui_init(tp->t_mountp, count); 424 + ASSERT(ruip != NULL); 425 + 426 + /* 427 + * Get a log_item_desc to point at the new item. 428 + */ 429 + xfs_trans_add_item(tp, &ruip->rui_item); 430 + return ruip; 431 + } 432 + 433 + /* Log rmap updates in the intent item. */ 299 434 STATIC void 300 - xfs_rud_item_committing( 301 - struct xfs_log_item *lip, 302 - xfs_lsn_t lsn) 435 + xfs_rmap_update_log_item( 436 + struct xfs_trans *tp, 437 + void *intent, 438 + struct list_head *item) 303 439 { 440 + struct xfs_rui_log_item *ruip = intent; 441 + struct xfs_rmap_intent *rmap; 442 + uint next_extent; 443 + struct xfs_map_extent *map; 444 + 445 + rmap = container_of(item, struct xfs_rmap_intent, ri_list); 446 + 447 + tp->t_flags |= XFS_TRANS_DIRTY; 448 + set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags); 449 + 450 + /* 451 + * atomic_inc_return gives us the value after the increment; 452 + * we want to use it as an array index so we need to subtract 1 from 453 + * it. 454 + */ 455 + next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1; 456 + ASSERT(next_extent < ruip->rui_format.rui_nextents); 457 + map = &ruip->rui_format.rui_extents[next_extent]; 458 + map->me_owner = rmap->ri_owner; 459 + map->me_startblock = rmap->ri_bmap.br_startblock; 460 + map->me_startoff = rmap->ri_bmap.br_startoff; 461 + map->me_len = rmap->ri_bmap.br_blockcount; 462 + xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork, 463 + rmap->ri_bmap.br_state); 304 464 } 305 465 306 - /* 307 - * This is the ops vector shared by all rud log items. 308 - */ 309 - static const struct xfs_item_ops xfs_rud_item_ops = { 310 - .iop_size = xfs_rud_item_size, 311 - .iop_format = xfs_rud_item_format, 312 - .iop_pin = xfs_rud_item_pin, 313 - .iop_unpin = xfs_rud_item_unpin, 314 - .iop_unlock = xfs_rud_item_unlock, 315 - .iop_committed = xfs_rud_item_committed, 316 - .iop_push = xfs_rud_item_push, 317 - .iop_committing = xfs_rud_item_committing, 466 + /* Get an RUD so we can process all the deferred rmap updates. */ 467 + STATIC void * 468 + xfs_rmap_update_create_done( 469 + struct xfs_trans *tp, 470 + void *intent, 471 + unsigned int count) 472 + { 473 + return xfs_trans_get_rud(tp, intent); 474 + } 475 + 476 + /* Process a deferred rmap update. */ 477 + STATIC int 478 + xfs_rmap_update_finish_item( 479 + struct xfs_trans *tp, 480 + struct list_head *item, 481 + void *done_item, 482 + void **state) 483 + { 484 + struct xfs_rmap_intent *rmap; 485 + int error; 486 + 487 + rmap = container_of(item, struct xfs_rmap_intent, ri_list); 488 + error = xfs_trans_log_finish_rmap_update(tp, done_item, 489 + rmap->ri_type, 490 + rmap->ri_owner, rmap->ri_whichfork, 491 + rmap->ri_bmap.br_startoff, 492 + rmap->ri_bmap.br_startblock, 493 + rmap->ri_bmap.br_blockcount, 494 + rmap->ri_bmap.br_state, 495 + (struct xfs_btree_cur **)state); 496 + kmem_free(rmap); 497 + return error; 498 + } 499 + 500 + /* Clean up after processing deferred rmaps. */ 501 + STATIC void 502 + xfs_rmap_update_finish_cleanup( 503 + struct xfs_trans *tp, 504 + void *state, 505 + int error) 506 + { 507 + struct xfs_btree_cur *rcur = state; 508 + 509 + xfs_rmap_finish_one_cleanup(tp, rcur, error); 510 + } 511 + 512 + /* Abort all pending RUIs. */ 513 + STATIC void 514 + xfs_rmap_update_abort_intent( 515 + void *intent) 516 + { 517 + xfs_rui_release(intent); 518 + } 519 + 520 + /* Cancel a deferred rmap update. */ 521 + STATIC void 522 + xfs_rmap_update_cancel_item( 523 + struct list_head *item) 524 + { 525 + struct xfs_rmap_intent *rmap; 526 + 527 + rmap = container_of(item, struct xfs_rmap_intent, ri_list); 528 + kmem_free(rmap); 529 + } 530 + 531 + const struct xfs_defer_op_type xfs_rmap_update_defer_type = { 532 + .max_items = XFS_RUI_MAX_FAST_EXTENTS, 533 + .diff_items = xfs_rmap_update_diff_items, 534 + .create_intent = xfs_rmap_update_create_intent, 535 + .abort_intent = xfs_rmap_update_abort_intent, 536 + .log_item = xfs_rmap_update_log_item, 537 + .create_done = xfs_rmap_update_create_done, 538 + .finish_item = xfs_rmap_update_finish_item, 539 + .finish_cleanup = xfs_rmap_update_finish_cleanup, 540 + .cancel_item = xfs_rmap_update_cancel_item, 318 541 }; 319 - 320 - /* 321 - * Allocate and initialize an rud item with the given number of extents. 322 - */ 323 - struct xfs_rud_log_item * 324 - xfs_rud_init( 325 - struct xfs_mount *mp, 326 - struct xfs_rui_log_item *ruip) 327 - 328 - { 329 - struct xfs_rud_log_item *rudp; 330 - 331 - rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP); 332 - xfs_log_item_init(mp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops); 333 - rudp->rud_ruip = ruip; 334 - rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id; 335 - 336 - return rudp; 337 - } 338 542 339 543 /* 340 544 * Process an rmap update intent item that was recovered from the log.

-2

fs/xfs/xfs_rmap_item.h

··· 78 78 extern struct kmem_zone *xfs_rud_zone; 79 79 80 80 struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint); 81 - struct xfs_rud_log_item *xfs_rud_init(struct xfs_mount *, 82 - struct xfs_rui_log_item *); 83 81 int xfs_rui_copy_format(struct xfs_log_iovec *buf, 84 82 struct xfs_rui_log_format *dst_rui_fmt); 85 83 void xfs_rui_item_free(struct xfs_rui_log_item *);

-6

fs/xfs/xfs_rtalloc.c

··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_bit.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_defer.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_bmap.h" 17 - #include "xfs_bmap_util.h" 18 16 #include "xfs_bmap_btree.h" 19 - #include "xfs_alloc.h" 20 - #include "xfs_error.h" 21 17 #include "xfs_trans.h" 22 18 #include "xfs_trans_space.h" 23 - #include "xfs_trace.h" 24 - #include "xfs_buf.h" 25 19 #include "xfs_icache.h" 26 20 #include "xfs_rtalloc.h" 27 21

-1

fs/xfs/xfs_stats.c

+6 -26

fs/xfs/xfs_super.c

··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_sb.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_da_format.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_btree.h" 17 16 #include "xfs_bmap.h" 18 17 #include "xfs_alloc.h" 19 - #include "xfs_error.h" 20 18 #include "xfs_fsops.h" 21 19 #include "xfs_trans.h" 22 20 #include "xfs_buf_item.h" 23 21 #include "xfs_log.h" 24 22 #include "xfs_log_priv.h" 25 - #include "xfs_da_btree.h" 26 23 #include "xfs_dir2.h" 27 24 #include "xfs_extfree_item.h" 28 25 #include "xfs_mru_cache.h" ··· 35 38 #include "xfs_refcount_item.h" 36 39 #include "xfs_bmap_item.h" 37 40 #include "xfs_reflink.h" 38 - #include "xfs_defer.h" 39 41 40 - #include <linux/namei.h> 41 - #include <linux/dax.h> 42 - #include <linux/init.h> 43 - #include <linux/slab.h> 44 42 #include <linux/magic.h> 45 - #include <linux/mount.h> 46 - #include <linux/mempool.h> 47 - #include <linux/writeback.h> 48 - #include <linux/kthread.h> 49 - #include <linux/freezer.h> 50 43 #include <linux/parser.h> 51 44 52 45 static const struct super_operations xfs_super_operations; ··· 569 582 * Calculate how much should be reserved for inodes to meet 570 583 * the max inode percentage. Used only for inode32. 571 584 */ 572 - if (mp->m_maxicount) { 585 + if (M_IGEO(mp)->maxicount) { 573 586 uint64_t icount; 574 587 575 588 icount = sbp->sb_dblocks * sbp->sb_imax_pct; ··· 827 840 if (!mp->m_reclaim_workqueue) 828 841 goto out_destroy_cil; 829 842 830 - mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", 831 - WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0, 832 - mp->m_fsname); 833 - if (!mp->m_log_workqueue) 834 - goto out_destroy_reclaim; 835 - 836 843 mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", 837 844 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); 838 845 if (!mp->m_eofblocks_workqueue) 839 - goto out_destroy_log; 846 + goto out_destroy_reclaim; 840 847 841 848 mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0, 842 849 mp->m_fsname); ··· 841 860 842 861 out_destroy_eofb: 843 862 destroy_workqueue(mp->m_eofblocks_workqueue); 844 - out_destroy_log: 845 - destroy_workqueue(mp->m_log_workqueue); 846 863 out_destroy_reclaim: 847 864 destroy_workqueue(mp->m_reclaim_workqueue); 848 865 out_destroy_cil: ··· 859 880 { 860 881 destroy_workqueue(mp->m_sync_workqueue); 861 882 destroy_workqueue(mp->m_eofblocks_workqueue); 862 - destroy_workqueue(mp->m_log_workqueue); 863 883 destroy_workqueue(mp->m_reclaim_workqueue); 864 884 destroy_workqueue(mp->m_cil_workqueue); 865 885 destroy_workqueue(mp->m_unwritten_workqueue); ··· 1109 1131 1110 1132 fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree); 1111 1133 statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); 1112 - if (mp->m_maxicount) 1134 + if (M_IGEO(mp)->maxicount) 1113 1135 statp->f_files = min_t(typeof(statp->f_files), 1114 1136 statp->f_files, 1115 - mp->m_maxicount); 1137 + M_IGEO(mp)->maxicount); 1116 1138 1117 1139 /* If sb_icount overshot maxicount, report actual allocation */ 1118 1140 statp->f_files = max_t(typeof(statp->f_files), ··· 1663 1685 sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); 1664 1686 sb->s_max_links = XFS_MAXLINK; 1665 1687 sb->s_time_gran = 1; 1688 + sb->s_iflags |= SB_I_CGROUPWB; 1689 + 1666 1690 set_posix_acl_flag(sb); 1667 1691 1668 1692 /* version 5 superblocks support inode version counters. */

+14

fs/xfs/xfs_super.h

··· 38 38 # define XFS_SCRUB_STRING 39 39 #endif 40 40 41 + #ifdef CONFIG_XFS_ONLINE_REPAIR 42 + # define XFS_REPAIR_STRING "repair, " 43 + #else 44 + # define XFS_REPAIR_STRING 45 + #endif 46 + 47 + #ifdef CONFIG_XFS_WARN 48 + # define XFS_WARN_STRING "verbose warnings, " 49 + #else 50 + # define XFS_WARN_STRING 51 + #endif 52 + 41 53 #ifdef DEBUG 42 54 # define XFS_DBG_STRING "debug" 43 55 #else ··· 61 49 XFS_SECURITY_STRING \ 62 50 XFS_REALTIME_STRING \ 63 51 XFS_SCRUB_STRING \ 52 + XFS_REPAIR_STRING \ 53 + XFS_WARN_STRING \ 64 54 XFS_DBG_STRING /* DBG must be last */ 65 55 66 56 struct xfs_inode;

-9

fs/xfs/xfs_symlink.c

··· 12 12 #include "xfs_trans_resv.h" 13 13 #include "xfs_bit.h" 14 14 #include "xfs_mount.h" 15 - #include "xfs_da_format.h" 16 - #include "xfs_da_btree.h" 17 - #include "xfs_defer.h" 18 15 #include "xfs_dir2.h" 19 16 #include "xfs_inode.h" 20 - #include "xfs_ialloc.h" 21 - #include "xfs_alloc.h" 22 17 #include "xfs_bmap.h" 23 18 #include "xfs_bmap_btree.h" 24 - #include "xfs_bmap_util.h" 25 - #include "xfs_error.h" 26 19 #include "xfs_quota.h" 27 20 #include "xfs_trans_space.h" 28 21 #include "xfs_trace.h" 29 - #include "xfs_symlink.h" 30 22 #include "xfs_trans.h" 31 - #include "xfs_log.h" 32 23 33 24 /* ----- Kernel only functions below ----- */ 34 25 int

-3

fs/xfs/xfs_sysctl.c

··· 4 4 * All Rights Reserved. 5 5 */ 6 6 #include "xfs.h" 7 - #include <linux/sysctl.h> 8 - #include <linux/proc_fs.h> 9 7 #include "xfs_error.h" 10 - #include "xfs_stats.h" 11 8 12 9 static struct ctl_table_header *xfs_table_header; 13 10

+3

fs/xfs/xfs_sysctl.h

··· 82 82 extern xfs_param_t xfs_params; 83 83 84 84 struct xfs_globals { 85 + #ifdef DEBUG 86 + int pwork_threads; /* parallel workqueue threads */ 87 + #endif 85 88 int log_recovery_delay; /* log recovery delay (secs) */ 86 89 int mount_delay; /* mount setup delay (secs) */ 87 90 bool bug_on_assert; /* BUG() the kernel on assert failure */

+40 -2

fs/xfs/xfs_sysfs.c

··· 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_sysfs.h" 13 - #include "xfs_log.h" 14 13 #include "xfs_log_priv.h" 15 - #include "xfs_stats.h" 16 14 #include "xfs_mount.h" 17 15 18 16 struct xfs_sysfs_attr { ··· 204 206 } 205 207 XFS_SYSFS_ATTR_RW(always_cow); 206 208 209 + #ifdef DEBUG 210 + /* 211 + * Override how many threads the parallel work queue is allowed to create. 212 + * This has to be a debug-only global (instead of an errortag) because one of 213 + * the main users of parallel workqueues is mount time quotacheck. 214 + */ 215 + STATIC ssize_t 216 + pwork_threads_store( 217 + struct kobject *kobject, 218 + const char *buf, 219 + size_t count) 220 + { 221 + int ret; 222 + int val; 223 + 224 + ret = kstrtoint(buf, 0, &val); 225 + if (ret) 226 + return ret; 227 + 228 + if (val < -1 || val > num_possible_cpus()) 229 + return -EINVAL; 230 + 231 + xfs_globals.pwork_threads = val; 232 + 233 + return count; 234 + } 235 + 236 + STATIC ssize_t 237 + pwork_threads_show( 238 + struct kobject *kobject, 239 + char *buf) 240 + { 241 + return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.pwork_threads); 242 + } 243 + XFS_SYSFS_ATTR_RW(pwork_threads); 244 + #endif /* DEBUG */ 245 + 207 246 static struct attribute *xfs_dbg_attrs[] = { 208 247 ATTR_LIST(bug_on_assert), 209 248 ATTR_LIST(log_recovery_delay), 210 249 ATTR_LIST(mount_delay), 211 250 ATTR_LIST(always_cow), 251 + #ifdef DEBUG 252 + ATTR_LIST(pwork_threads), 253 + #endif 212 254 NULL, 213 255 }; 214 256

-8

fs/xfs/xfs_trace.c

··· 15 15 #include "xfs_inode.h" 16 16 #include "xfs_btree.h" 17 17 #include "xfs_da_btree.h" 18 - #include "xfs_ialloc.h" 19 - #include "xfs_itable.h" 20 18 #include "xfs_alloc.h" 21 19 #include "xfs_bmap.h" 22 20 #include "xfs_attr.h" 23 - #include "xfs_attr_leaf.h" 24 21 #include "xfs_trans.h" 25 - #include "xfs_log.h" 26 22 #include "xfs_log_priv.h" 27 23 #include "xfs_buf_item.h" 28 24 #include "xfs_quota.h" 29 - #include "xfs_iomap.h" 30 - #include "xfs_aops.h" 31 25 #include "xfs_dquot_item.h" 32 26 #include "xfs_dquot.h" 33 27 #include "xfs_log_recover.h" 34 - #include "xfs_inode_item.h" 35 - #include "xfs_bmap_btree.h" 36 28 #include "xfs_filestream.h" 37 29 #include "xfs_fsmap.h" 38 30

+60 -1

fs/xfs/xfs_trace.h

··· 475 475 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); 476 476 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); 477 477 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); 478 - DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); 478 + DEFINE_BUF_ITEM_EVENT(xfs_buf_item_release); 479 479 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); 480 480 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); 481 481 DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); ··· 3360 3360 DEFINE_TRANS_EVENT(xfs_trans_free); 3361 3361 DEFINE_TRANS_EVENT(xfs_trans_roll); 3362 3362 DEFINE_TRANS_EVENT(xfs_trans_add_item); 3363 + DEFINE_TRANS_EVENT(xfs_trans_commit_items); 3363 3364 DEFINE_TRANS_EVENT(xfs_trans_free_items); 3364 3365 3365 3366 TRACE_EVENT(xfs_iunlink_update_bucket, ··· 3516 3515 TP_ARGS(ip, flags)) 3517 3516 DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_sick); 3518 3517 DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_healthy); 3518 + 3519 + TRACE_EVENT(xfs_iwalk_ag, 3520 + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 3521 + xfs_agino_t startino), 3522 + TP_ARGS(mp, agno, startino), 3523 + TP_STRUCT__entry( 3524 + __field(dev_t, dev) 3525 + __field(xfs_agnumber_t, agno) 3526 + __field(xfs_agino_t, startino) 3527 + ), 3528 + TP_fast_assign( 3529 + __entry->dev = mp->m_super->s_dev; 3530 + __entry->agno = agno; 3531 + __entry->startino = startino; 3532 + ), 3533 + TP_printk("dev %d:%d agno %d startino %u", 3534 + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, 3535 + __entry->startino) 3536 + ) 3537 + 3538 + TRACE_EVENT(xfs_iwalk_ag_rec, 3539 + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 3540 + struct xfs_inobt_rec_incore *irec), 3541 + TP_ARGS(mp, agno, irec), 3542 + TP_STRUCT__entry( 3543 + __field(dev_t, dev) 3544 + __field(xfs_agnumber_t, agno) 3545 + __field(xfs_agino_t, startino) 3546 + __field(uint64_t, freemask) 3547 + ), 3548 + TP_fast_assign( 3549 + __entry->dev = mp->m_super->s_dev; 3550 + __entry->agno = agno; 3551 + __entry->startino = irec->ir_startino; 3552 + __entry->freemask = irec->ir_free; 3553 + ), 3554 + TP_printk("dev %d:%d agno %d startino %u freemask 0x%llx", 3555 + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, 3556 + __entry->startino, __entry->freemask) 3557 + ) 3558 + 3559 + TRACE_EVENT(xfs_pwork_init, 3560 + TP_PROTO(struct xfs_mount *mp, unsigned int nr_threads, pid_t pid), 3561 + TP_ARGS(mp, nr_threads, pid), 3562 + TP_STRUCT__entry( 3563 + __field(dev_t, dev) 3564 + __field(unsigned int, nr_threads) 3565 + __field(pid_t, pid) 3566 + ), 3567 + TP_fast_assign( 3568 + __entry->dev = mp->m_super->s_dev; 3569 + __entry->nr_threads = nr_threads; 3570 + __entry->pid = pid; 3571 + ), 3572 + TP_printk("dev %d:%d nr_threads %u pid %u", 3573 + MAJOR(__entry->dev), MINOR(__entry->dev), 3574 + __entry->nr_threads, __entry->pid) 3575 + ) 3519 3576 3520 3577 #endif /* _TRACE_XFS_H */ 3521 3578

+25 -18

fs/xfs/xfs_trans.c

··· 11 11 #include "xfs_log_format.h" 12 12 #include "xfs_trans_resv.h" 13 13 #include "xfs_mount.h" 14 - #include "xfs_inode.h" 15 14 #include "xfs_extent_busy.h" 16 15 #include "xfs_quota.h" 17 16 #include "xfs_trans.h" ··· 263 264 * GFP_NOFS allocation context so that we avoid lockdep false positives 264 265 * by doing GFP_KERNEL allocations inside sb_start_intwrite(). 265 266 */ 266 - tp = kmem_zone_zalloc(xfs_trans_zone, 267 - (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP); 268 - 267 + tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP); 269 268 if (!(flags & XFS_TRANS_NO_WRITECOUNT)) 270 269 sb_start_intwrite(mp->m_super); 271 270 ··· 449 452 xfs_buf_t *bp; 450 453 int whole = 0; 451 454 452 - bp = xfs_trans_getsb(tp, tp->t_mountp, 0); 455 + bp = xfs_trans_getsb(tp, tp->t_mountp); 453 456 sbp = XFS_BUF_TO_SBP(bp); 454 457 455 458 /* ··· 764 767 } 765 768 766 769 /* Detach and unlock all of the items in a transaction */ 767 - void 770 + static void 768 771 xfs_trans_free_items( 769 772 struct xfs_trans *tp, 770 - xfs_lsn_t commit_lsn, 771 773 bool abort) 772 774 { 773 775 struct xfs_log_item *lip, *next; ··· 775 779 776 780 list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { 777 781 xfs_trans_del_item(lip); 778 - if (commit_lsn != NULLCOMMITLSN) 779 - lip->li_ops->iop_committing(lip, commit_lsn); 780 782 if (abort) 781 783 set_bit(XFS_LI_ABORTED, &lip->li_flags); 782 - lip->li_ops->iop_unlock(lip); 784 + if (lip->li_ops->iop_release) 785 + lip->li_ops->iop_release(lip); 783 786 } 784 787 } 785 788 ··· 799 804 for (i = 0; i < nr_items; i++) { 800 805 struct xfs_log_item *lip = log_items[i]; 801 806 802 - lip->li_ops->iop_unpin(lip, 0); 807 + if (lip->li_ops->iop_unpin) 808 + lip->li_ops->iop_unpin(lip, 0); 803 809 } 804 810 } 805 811 ··· 811 815 * 812 816 * If we are called with the aborted flag set, it is because a log write during 813 817 * a CIL checkpoint commit has failed. In this case, all the items in the 814 - * checkpoint have already gone through iop_commited and iop_unlock, which 818 + * checkpoint have already gone through iop_committed and iop_committing, which 815 819 * means that checkpoint commit abort handling is treated exactly the same 816 820 * as an iclog write error even though we haven't started any IO yet. Hence in 817 821 * this case all we need to do is iop_committed processing, followed by an ··· 829 833 struct xfs_ail *ailp, 830 834 struct xfs_log_vec *log_vector, 831 835 xfs_lsn_t commit_lsn, 832 - int aborted) 836 + bool aborted) 833 837 { 834 838 #define LOG_ITEM_BATCH_SIZE 32 835 839 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; ··· 848 852 849 853 if (aborted) 850 854 set_bit(XFS_LI_ABORTED, &lip->li_flags); 851 - item_lsn = lip->li_ops->iop_committed(lip, commit_lsn); 855 + 856 + if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) { 857 + lip->li_ops->iop_release(lip); 858 + continue; 859 + } 860 + 861 + if (lip->li_ops->iop_committed) 862 + item_lsn = lip->li_ops->iop_committed(lip, commit_lsn); 863 + else 864 + item_lsn = commit_lsn; 852 865 853 866 /* item_lsn of -1 means the item needs no further processing */ 854 867 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) ··· 869 864 */ 870 865 if (aborted) { 871 866 ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount)); 872 - lip->li_ops->iop_unpin(lip, 1); 867 + if (lip->li_ops->iop_unpin) 868 + lip->li_ops->iop_unpin(lip, 1); 873 869 continue; 874 870 } 875 871 ··· 888 882 xfs_trans_ail_update(ailp, lip, item_lsn); 889 883 else 890 884 spin_unlock(&ailp->ail_lock); 891 - lip->li_ops->iop_unpin(lip, 0); 885 + if (lip->li_ops->iop_unpin) 886 + lip->li_ops->iop_unpin(lip, 0); 892 887 continue; 893 888 } 894 889 ··· 1005 998 tp->t_ticket = NULL; 1006 999 } 1007 1000 current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 1008 - xfs_trans_free_items(tp, NULLCOMMITLSN, !!error); 1001 + xfs_trans_free_items(tp, !!error); 1009 1002 xfs_trans_free(tp); 1010 1003 1011 1004 XFS_STATS_INC(mp, xs_trans_empty); ··· 1067 1060 /* mark this thread as no longer being in a transaction */ 1068 1061 current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 1069 1062 1070 - xfs_trans_free_items(tp, NULLCOMMITLSN, dirty); 1063 + xfs_trans_free_items(tp, dirty); 1071 1064 xfs_trans_free(tp); 1072 1065 } 1073 1066

+18 -52

fs/xfs/xfs_trans.h

··· 27 27 struct xfs_bui_log_item; 28 28 struct xfs_bud_log_item; 29 29 30 - typedef struct xfs_log_item { 30 + struct xfs_log_item { 31 31 struct list_head li_ail; /* AIL pointers */ 32 32 struct list_head li_trans; /* transaction list */ 33 33 xfs_lsn_t li_lsn; /* last on-disk lsn */ ··· 48 48 struct xfs_log_vec *li_lv; /* active log vector */ 49 49 struct xfs_log_vec *li_lv_shadow; /* standby vector */ 50 50 xfs_lsn_t li_seq; /* CIL commit seq */ 51 - } xfs_log_item_t; 51 + }; 52 52 53 53 /* 54 54 * li_flags use the (set/test/clear)_bit atomic interfaces because updates can ··· 67 67 { (1 << XFS_LI_DIRTY), "DIRTY" } 68 68 69 69 struct xfs_item_ops { 70 - void (*iop_size)(xfs_log_item_t *, int *, int *); 71 - void (*iop_format)(xfs_log_item_t *, struct xfs_log_vec *); 72 - void (*iop_pin)(xfs_log_item_t *); 73 - void (*iop_unpin)(xfs_log_item_t *, int remove); 70 + unsigned flags; 71 + void (*iop_size)(struct xfs_log_item *, int *, int *); 72 + void (*iop_format)(struct xfs_log_item *, struct xfs_log_vec *); 73 + void (*iop_pin)(struct xfs_log_item *); 74 + void (*iop_unpin)(struct xfs_log_item *, int remove); 74 75 uint (*iop_push)(struct xfs_log_item *, struct list_head *); 75 - void (*iop_unlock)(xfs_log_item_t *); 76 - xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); 77 - void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); 78 - void (*iop_error)(xfs_log_item_t *, xfs_buf_t *); 76 + void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn); 77 + void (*iop_release)(struct xfs_log_item *); 78 + xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t); 79 + void (*iop_error)(struct xfs_log_item *, xfs_buf_t *); 79 80 }; 81 + 82 + /* 83 + * Release the log item as soon as committed. This is for items just logging 84 + * intents that never need to be written back in place. 85 + */ 86 + #define XFS_ITEM_RELEASE_WHEN_COMMITTED (1 << 0) 80 87 81 88 void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item, 82 89 int type, const struct xfs_item_ops *ops); ··· 210 203 flags, bpp, ops); 211 204 } 212 205 213 - struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int); 206 + struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *); 214 207 215 208 void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); 216 209 void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *); ··· 230 223 bool xfs_trans_buf_is_dirty(struct xfs_buf *bp); 231 224 void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); 232 225 233 - struct xfs_efd_log_item *xfs_trans_get_efd(struct xfs_trans *, 234 - struct xfs_efi_log_item *, 235 - uint); 236 - int xfs_trans_free_extent(struct xfs_trans *, 237 - struct xfs_efd_log_item *, xfs_fsblock_t, 238 - xfs_extlen_t, 239 - const struct xfs_owner_info *, 240 - bool); 241 226 int xfs_trans_commit(struct xfs_trans *); 242 227 int xfs_trans_roll(struct xfs_trans **); 243 228 int xfs_trans_roll_inode(struct xfs_trans **, struct xfs_inode *); ··· 243 244 struct xfs_buf *src_bp); 244 245 245 246 extern kmem_zone_t *xfs_trans_zone; 246 - 247 - /* rmap updates */ 248 - enum xfs_rmap_intent_type; 249 - 250 - struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp, 251 - struct xfs_rui_log_item *ruip); 252 - int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp, 253 - struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type, 254 - uint64_t owner, int whichfork, xfs_fileoff_t startoff, 255 - xfs_fsblock_t startblock, xfs_filblks_t blockcount, 256 - xfs_exntst_t state, struct xfs_btree_cur **pcur); 257 - 258 - /* refcount updates */ 259 - enum xfs_refcount_intent_type; 260 - 261 - struct xfs_cud_log_item *xfs_trans_get_cud(struct xfs_trans *tp, 262 - struct xfs_cui_log_item *cuip); 263 - int xfs_trans_log_finish_refcount_update(struct xfs_trans *tp, 264 - struct xfs_cud_log_item *cudp, 265 - enum xfs_refcount_intent_type type, xfs_fsblock_t startblock, 266 - xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb, 267 - xfs_extlen_t *new_len, struct xfs_btree_cur **pcur); 268 - 269 - /* mapping updates */ 270 - enum xfs_bmap_intent_type; 271 - 272 - struct xfs_bud_log_item *xfs_trans_get_bud(struct xfs_trans *tp, 273 - struct xfs_bui_log_item *buip); 274 - int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp, 275 - struct xfs_bud_log_item *rudp, enum xfs_bmap_intent_type type, 276 - struct xfs_inode *ip, int whichfork, xfs_fileoff_t startoff, 277 - xfs_fsblock_t startblock, xfs_filblks_t *blockcount, 278 - xfs_exntst_t state); 279 247 280 248 #endif /* __XFS_TRANS_H__ */

+31 -22

fs/xfs/xfs_trans_ail.c

··· 6 6 */ 7 7 #include "xfs.h" 8 8 #include "xfs_fs.h" 9 + #include "xfs_shared.h" 9 10 #include "xfs_format.h" 10 11 #include "xfs_log_format.h" 11 12 #include "xfs_trans_resv.h" ··· 75 74 * Return a pointer to the last item in the AIL. If the AIL is empty, then 76 75 * return NULL. 77 76 */ 78 - static xfs_log_item_t * 77 + static struct xfs_log_item * 79 78 xfs_ail_max( 80 79 struct xfs_ail *ailp) 81 80 { 82 81 if (list_empty(&ailp->ail_head)) 83 82 return NULL; 84 83 85 - return list_entry(ailp->ail_head.prev, xfs_log_item_t, li_ail); 84 + return list_entry(ailp->ail_head.prev, struct xfs_log_item, li_ail); 86 85 } 87 86 88 87 /* 89 88 * Return a pointer to the item which follows the given item in the AIL. If 90 89 * the given item is the last item in the list, then return NULL. 91 90 */ 92 - static xfs_log_item_t * 91 + static struct xfs_log_item * 93 92 xfs_ail_next( 94 - struct xfs_ail *ailp, 95 - xfs_log_item_t *lip) 93 + struct xfs_ail *ailp, 94 + struct xfs_log_item *lip) 96 95 { 97 96 if (lip->li_ail.next == &ailp->ail_head) 98 97 return NULL; 99 98 100 - return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); 99 + return list_first_entry(&lip->li_ail, struct xfs_log_item, li_ail); 101 100 } 102 101 103 102 /* ··· 110 109 */ 111 110 xfs_lsn_t 112 111 xfs_ail_min_lsn( 113 - struct xfs_ail *ailp) 112 + struct xfs_ail *ailp) 114 113 { 115 - xfs_lsn_t lsn = 0; 116 - xfs_log_item_t *lip; 114 + xfs_lsn_t lsn = 0; 115 + struct xfs_log_item *lip; 117 116 118 117 spin_lock(&ailp->ail_lock); 119 118 lip = xfs_ail_min(ailp); ··· 129 128 */ 130 129 static xfs_lsn_t 131 130 xfs_ail_max_lsn( 132 - struct xfs_ail *ailp) 131 + struct xfs_ail *ailp) 133 132 { 134 - xfs_lsn_t lsn = 0; 135 - xfs_log_item_t *lip; 133 + xfs_lsn_t lsn = 0; 134 + struct xfs_log_item *lip; 136 135 137 136 spin_lock(&ailp->ail_lock); 138 137 lip = xfs_ail_max(ailp); ··· 217 216 * ascending traversal. Pass a @lsn of zero to initialise the cursor to the 218 217 * first item in the AIL. Returns NULL if the list is empty. 219 218 */ 220 - xfs_log_item_t * 219 + struct xfs_log_item * 221 220 xfs_trans_ail_cursor_first( 222 221 struct xfs_ail *ailp, 223 222 struct xfs_ail_cursor *cur, 224 223 xfs_lsn_t lsn) 225 224 { 226 - xfs_log_item_t *lip; 225 + struct xfs_log_item *lip; 227 226 228 227 xfs_trans_ail_cursor_init(ailp, cur); 229 228 ··· 249 248 struct xfs_ail *ailp, 250 249 xfs_lsn_t lsn) 251 250 { 252 - xfs_log_item_t *lip; 251 + struct xfs_log_item *lip; 253 252 254 253 list_for_each_entry_reverse(lip, &ailp->ail_head, li_ail) { 255 254 if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0) ··· 328 327 */ 329 328 static void 330 329 xfs_ail_delete( 331 - struct xfs_ail *ailp, 332 - xfs_log_item_t *lip) 330 + struct xfs_ail *ailp, 331 + struct xfs_log_item *lip) 333 332 { 334 333 xfs_ail_check(ailp, lip); 335 334 list_del(&lip->li_ail); ··· 348 347 if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN)) 349 348 return XFS_ITEM_PINNED; 350 349 350 + /* 351 + * Consider the item pinned if a push callback is not defined so the 352 + * caller will force the log. This should only happen for intent items 353 + * as they are unpinned once the associated done item is committed to 354 + * the on-disk log. 355 + */ 356 + if (!lip->li_ops->iop_push) 357 + return XFS_ITEM_PINNED; 351 358 return lip->li_ops->iop_push(lip, &ailp->ail_buf_list); 352 359 } 353 360 ··· 365 356 { 366 357 xfs_mount_t *mp = ailp->ail_mount; 367 358 struct xfs_ail_cursor cur; 368 - xfs_log_item_t *lip; 359 + struct xfs_log_item *lip; 369 360 xfs_lsn_t lsn; 370 361 xfs_lsn_t target; 371 362 long tout; ··· 620 611 */ 621 612 void 622 613 xfs_ail_push( 623 - struct xfs_ail *ailp, 624 - xfs_lsn_t threshold_lsn) 614 + struct xfs_ail *ailp, 615 + xfs_lsn_t threshold_lsn) 625 616 { 626 - xfs_log_item_t *lip; 617 + struct xfs_log_item *lip; 627 618 628 619 lip = xfs_ail_min(ailp); 629 620 if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) || ··· 708 699 int nr_items, 709 700 xfs_lsn_t lsn) __releases(ailp->ail_lock) 710 701 { 711 - xfs_log_item_t *mlip; 702 + struct xfs_log_item *mlip; 712 703 int mlip_changed = 0; 713 704 int i; 714 705 LIST_HEAD(tmp);

-232

fs/xfs/xfs_trans_bmap.c

··· 1 - // SPDX-License-Identifier: GPL-2.0+ 2 - /* 3 - * Copyright (C) 2016 Oracle. All Rights Reserved. 4 - * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 - */ 6 - #include "xfs.h" 7 - #include "xfs_fs.h" 8 - #include "xfs_shared.h" 9 - #include "xfs_format.h" 10 - #include "xfs_log_format.h" 11 - #include "xfs_trans_resv.h" 12 - #include "xfs_mount.h" 13 - #include "xfs_defer.h" 14 - #include "xfs_trans.h" 15 - #include "xfs_trans_priv.h" 16 - #include "xfs_bmap_item.h" 17 - #include "xfs_alloc.h" 18 - #include "xfs_bmap.h" 19 - #include "xfs_inode.h" 20 - 21 - /* 22 - * This routine is called to allocate a "bmap update done" 23 - * log item. 24 - */ 25 - struct xfs_bud_log_item * 26 - xfs_trans_get_bud( 27 - struct xfs_trans *tp, 28 - struct xfs_bui_log_item *buip) 29 - { 30 - struct xfs_bud_log_item *budp; 31 - 32 - budp = xfs_bud_init(tp->t_mountp, buip); 33 - xfs_trans_add_item(tp, &budp->bud_item); 34 - return budp; 35 - } 36 - 37 - /* 38 - * Finish an bmap update and log it to the BUD. Note that the 39 - * transaction is marked dirty regardless of whether the bmap update 40 - * succeeds or fails to support the BUI/BUD lifecycle rules. 41 - */ 42 - int 43 - xfs_trans_log_finish_bmap_update( 44 - struct xfs_trans *tp, 45 - struct xfs_bud_log_item *budp, 46 - enum xfs_bmap_intent_type type, 47 - struct xfs_inode *ip, 48 - int whichfork, 49 - xfs_fileoff_t startoff, 50 - xfs_fsblock_t startblock, 51 - xfs_filblks_t *blockcount, 52 - xfs_exntst_t state) 53 - { 54 - int error; 55 - 56 - error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff, 57 - startblock, blockcount, state); 58 - 59 - /* 60 - * Mark the transaction dirty, even on error. This ensures the 61 - * transaction is aborted, which: 62 - * 63 - * 1.) releases the BUI and frees the BUD 64 - * 2.) shuts down the filesystem 65 - */ 66 - tp->t_flags |= XFS_TRANS_DIRTY; 67 - set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags); 68 - 69 - return error; 70 - } 71 - 72 - /* Sort bmap intents by inode. */ 73 - static int 74 - xfs_bmap_update_diff_items( 75 - void *priv, 76 - struct list_head *a, 77 - struct list_head *b) 78 - { 79 - struct xfs_bmap_intent *ba; 80 - struct xfs_bmap_intent *bb; 81 - 82 - ba = container_of(a, struct xfs_bmap_intent, bi_list); 83 - bb = container_of(b, struct xfs_bmap_intent, bi_list); 84 - return ba->bi_owner->i_ino - bb->bi_owner->i_ino; 85 - } 86 - 87 - /* Get an BUI. */ 88 - STATIC void * 89 - xfs_bmap_update_create_intent( 90 - struct xfs_trans *tp, 91 - unsigned int count) 92 - { 93 - struct xfs_bui_log_item *buip; 94 - 95 - ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS); 96 - ASSERT(tp != NULL); 97 - 98 - buip = xfs_bui_init(tp->t_mountp); 99 - ASSERT(buip != NULL); 100 - 101 - /* 102 - * Get a log_item_desc to point at the new item. 103 - */ 104 - xfs_trans_add_item(tp, &buip->bui_item); 105 - return buip; 106 - } 107 - 108 - /* Set the map extent flags for this mapping. */ 109 - static void 110 - xfs_trans_set_bmap_flags( 111 - struct xfs_map_extent *bmap, 112 - enum xfs_bmap_intent_type type, 113 - int whichfork, 114 - xfs_exntst_t state) 115 - { 116 - bmap->me_flags = 0; 117 - switch (type) { 118 - case XFS_BMAP_MAP: 119 - case XFS_BMAP_UNMAP: 120 - bmap->me_flags = type; 121 - break; 122 - default: 123 - ASSERT(0); 124 - } 125 - if (state == XFS_EXT_UNWRITTEN) 126 - bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN; 127 - if (whichfork == XFS_ATTR_FORK) 128 - bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK; 129 - } 130 - 131 - /* Log bmap updates in the intent item. */ 132 - STATIC void 133 - xfs_bmap_update_log_item( 134 - struct xfs_trans *tp, 135 - void *intent, 136 - struct list_head *item) 137 - { 138 - struct xfs_bui_log_item *buip = intent; 139 - struct xfs_bmap_intent *bmap; 140 - uint next_extent; 141 - struct xfs_map_extent *map; 142 - 143 - bmap = container_of(item, struct xfs_bmap_intent, bi_list); 144 - 145 - tp->t_flags |= XFS_TRANS_DIRTY; 146 - set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags); 147 - 148 - /* 149 - * atomic_inc_return gives us the value after the increment; 150 - * we want to use it as an array index so we need to subtract 1 from 151 - * it. 152 - */ 153 - next_extent = atomic_inc_return(&buip->bui_next_extent) - 1; 154 - ASSERT(next_extent < buip->bui_format.bui_nextents); 155 - map = &buip->bui_format.bui_extents[next_extent]; 156 - map->me_owner = bmap->bi_owner->i_ino; 157 - map->me_startblock = bmap->bi_bmap.br_startblock; 158 - map->me_startoff = bmap->bi_bmap.br_startoff; 159 - map->me_len = bmap->bi_bmap.br_blockcount; 160 - xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork, 161 - bmap->bi_bmap.br_state); 162 - } 163 - 164 - /* Get an BUD so we can process all the deferred rmap updates. */ 165 - STATIC void * 166 - xfs_bmap_update_create_done( 167 - struct xfs_trans *tp, 168 - void *intent, 169 - unsigned int count) 170 - { 171 - return xfs_trans_get_bud(tp, intent); 172 - } 173 - 174 - /* Process a deferred rmap update. */ 175 - STATIC int 176 - xfs_bmap_update_finish_item( 177 - struct xfs_trans *tp, 178 - struct list_head *item, 179 - void *done_item, 180 - void **state) 181 - { 182 - struct xfs_bmap_intent *bmap; 183 - xfs_filblks_t count; 184 - int error; 185 - 186 - bmap = container_of(item, struct xfs_bmap_intent, bi_list); 187 - count = bmap->bi_bmap.br_blockcount; 188 - error = xfs_trans_log_finish_bmap_update(tp, done_item, 189 - bmap->bi_type, 190 - bmap->bi_owner, bmap->bi_whichfork, 191 - bmap->bi_bmap.br_startoff, 192 - bmap->bi_bmap.br_startblock, 193 - &count, 194 - bmap->bi_bmap.br_state); 195 - if (!error && count > 0) { 196 - ASSERT(bmap->bi_type == XFS_BMAP_UNMAP); 197 - bmap->bi_bmap.br_blockcount = count; 198 - return -EAGAIN; 199 - } 200 - kmem_free(bmap); 201 - return error; 202 - } 203 - 204 - /* Abort all pending BUIs. */ 205 - STATIC void 206 - xfs_bmap_update_abort_intent( 207 - void *intent) 208 - { 209 - xfs_bui_release(intent); 210 - } 211 - 212 - /* Cancel a deferred rmap update. */ 213 - STATIC void 214 - xfs_bmap_update_cancel_item( 215 - struct list_head *item) 216 - { 217 - struct xfs_bmap_intent *bmap; 218 - 219 - bmap = container_of(item, struct xfs_bmap_intent, bi_list); 220 - kmem_free(bmap); 221 - } 222 - 223 - const struct xfs_defer_op_type xfs_bmap_update_defer_type = { 224 - .max_items = XFS_BUI_MAX_FAST_EXTENTS, 225 - .diff_items = xfs_bmap_update_diff_items, 226 - .create_intent = xfs_bmap_update_create_intent, 227 - .abort_intent = xfs_bmap_update_abort_intent, 228 - .log_item = xfs_bmap_update_log_item, 229 - .create_done = xfs_bmap_update_create_done, 230 - .finish_item = xfs_bmap_update_finish_item, 231 - .cancel_item = xfs_bmap_update_cancel_item, 232 - };

+4 -7

fs/xfs/xfs_trans_buf.c

··· 10 10 #include "xfs_log_format.h" 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 - #include "xfs_inode.h" 14 13 #include "xfs_trans.h" 15 14 #include "xfs_buf_item.h" 16 15 #include "xfs_trans_priv.h" 17 - #include "xfs_error.h" 18 16 #include "xfs_trace.h" 19 17 20 18 /* ··· 172 174 xfs_buf_t * 173 175 xfs_trans_getsb( 174 176 xfs_trans_t *tp, 175 - struct xfs_mount *mp, 176 - int flags) 177 + struct xfs_mount *mp) 177 178 { 178 179 xfs_buf_t *bp; 179 180 struct xfs_buf_log_item *bip; ··· 182 185 * if tp is NULL. 183 186 */ 184 187 if (tp == NULL) 185 - return xfs_getsb(mp, flags); 188 + return xfs_getsb(mp); 186 189 187 190 /* 188 191 * If the superblock buffer already has this transaction ··· 200 203 return bp; 201 204 } 202 205 203 - bp = xfs_getsb(mp, flags); 206 + bp = xfs_getsb(mp); 204 207 if (bp == NULL) 205 208 return NULL; 206 209 ··· 425 428 426 429 /* 427 430 * Mark the buffer as not needing to be unlocked when the buf item's 428 - * iop_unlock() routine is called. The buffer must already be locked 431 + * iop_committing() routine is called. The buffer must already be locked 429 432 * and associated with the given transaction. 430 433 */ 431 434 /* ARGSUSED */

-11

fs/xfs/xfs_trans_dquot.c

··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 13 #include "xfs_inode.h" 14 - #include "xfs_error.h" 15 14 #include "xfs_trans.h" 16 15 #include "xfs_trans_priv.h" 17 16 #include "xfs_quota.h" ··· 28 29 xfs_trans_t *tp, 29 30 xfs_dquot_t *dqp) 30 31 { 31 - ASSERT(dqp->q_transp != tp); 32 32 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 33 33 ASSERT(dqp->q_logitem.qli_dquot == dqp); 34 34 ··· 35 37 * Get a log_item_desc to point at the new item. 36 38 */ 37 39 xfs_trans_add_item(tp, &dqp->q_logitem.qli_item); 38 - 39 - /* 40 - * Initialize d_transp so we can later determine if this dquot is 41 - * associated with this transaction. 42 - */ 43 - dqp->q_transp = tp; 44 40 } 45 - 46 41 47 42 /* 48 43 * This is called to mark the dquot as needing ··· 52 61 xfs_trans_t *tp, 53 62 xfs_dquot_t *dqp) 54 63 { 55 - ASSERT(dqp->q_transp == tp); 56 64 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 57 65 58 66 tp->t_flags |= XFS_TRANS_DIRTY; ··· 337 347 break; 338 348 339 349 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 340 - ASSERT(dqp->q_transp == tp); 341 350 342 351 /* 343 352 * adjust the actual number of blocks used

-286

fs/xfs/xfs_trans_extfree.c

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * Copyright (c) 2000,2005 Silicon Graphics, Inc. 4 - * All Rights Reserved. 5 - */ 6 - #include "xfs.h" 7 - #include "xfs_fs.h" 8 - #include "xfs_shared.h" 9 - #include "xfs_format.h" 10 - #include "xfs_log_format.h" 11 - #include "xfs_trans_resv.h" 12 - #include "xfs_bit.h" 13 - #include "xfs_mount.h" 14 - #include "xfs_defer.h" 15 - #include "xfs_trans.h" 16 - #include "xfs_trans_priv.h" 17 - #include "xfs_extfree_item.h" 18 - #include "xfs_alloc.h" 19 - #include "xfs_bmap.h" 20 - #include "xfs_trace.h" 21 - 22 - /* 23 - * This routine is called to allocate an "extent free done" 24 - * log item that will hold nextents worth of extents. The 25 - * caller must use all nextents extents, because we are not 26 - * flexible about this at all. 27 - */ 28 - struct xfs_efd_log_item * 29 - xfs_trans_get_efd(struct xfs_trans *tp, 30 - struct xfs_efi_log_item *efip, 31 - uint nextents) 32 - { 33 - struct xfs_efd_log_item *efdp; 34 - 35 - ASSERT(tp != NULL); 36 - ASSERT(nextents > 0); 37 - 38 - efdp = xfs_efd_init(tp->t_mountp, efip, nextents); 39 - ASSERT(efdp != NULL); 40 - 41 - /* 42 - * Get a log_item_desc to point at the new item. 43 - */ 44 - xfs_trans_add_item(tp, &efdp->efd_item); 45 - return efdp; 46 - } 47 - 48 - /* 49 - * Free an extent and log it to the EFD. Note that the transaction is marked 50 - * dirty regardless of whether the extent free succeeds or fails to support the 51 - * EFI/EFD lifecycle rules. 52 - */ 53 - int 54 - xfs_trans_free_extent( 55 - struct xfs_trans *tp, 56 - struct xfs_efd_log_item *efdp, 57 - xfs_fsblock_t start_block, 58 - xfs_extlen_t ext_len, 59 - const struct xfs_owner_info *oinfo, 60 - bool skip_discard) 61 - { 62 - struct xfs_mount *mp = tp->t_mountp; 63 - struct xfs_extent *extp; 64 - uint next_extent; 65 - xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block); 66 - xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, 67 - start_block); 68 - int error; 69 - 70 - trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); 71 - 72 - error = __xfs_free_extent(tp, start_block, ext_len, 73 - oinfo, XFS_AG_RESV_NONE, skip_discard); 74 - /* 75 - * Mark the transaction dirty, even on error. This ensures the 76 - * transaction is aborted, which: 77 - * 78 - * 1.) releases the EFI and frees the EFD 79 - * 2.) shuts down the filesystem 80 - */ 81 - tp->t_flags |= XFS_TRANS_DIRTY; 82 - set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); 83 - 84 - next_extent = efdp->efd_next_extent; 85 - ASSERT(next_extent < efdp->efd_format.efd_nextents); 86 - extp = &(efdp->efd_format.efd_extents[next_extent]); 87 - extp->ext_start = start_block; 88 - extp->ext_len = ext_len; 89 - efdp->efd_next_extent++; 90 - 91 - return error; 92 - } 93 - 94 - /* Sort bmap items by AG. */ 95 - static int 96 - xfs_extent_free_diff_items( 97 - void *priv, 98 - struct list_head *a, 99 - struct list_head *b) 100 - { 101 - struct xfs_mount *mp = priv; 102 - struct xfs_extent_free_item *ra; 103 - struct xfs_extent_free_item *rb; 104 - 105 - ra = container_of(a, struct xfs_extent_free_item, xefi_list); 106 - rb = container_of(b, struct xfs_extent_free_item, xefi_list); 107 - return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) - 108 - XFS_FSB_TO_AGNO(mp, rb->xefi_startblock); 109 - } 110 - 111 - /* Get an EFI. */ 112 - STATIC void * 113 - xfs_extent_free_create_intent( 114 - struct xfs_trans *tp, 115 - unsigned int count) 116 - { 117 - struct xfs_efi_log_item *efip; 118 - 119 - ASSERT(tp != NULL); 120 - ASSERT(count > 0); 121 - 122 - efip = xfs_efi_init(tp->t_mountp, count); 123 - ASSERT(efip != NULL); 124 - 125 - /* 126 - * Get a log_item_desc to point at the new item. 127 - */ 128 - xfs_trans_add_item(tp, &efip->efi_item); 129 - return efip; 130 - } 131 - 132 - /* Log a free extent to the intent item. */ 133 - STATIC void 134 - xfs_extent_free_log_item( 135 - struct xfs_trans *tp, 136 - void *intent, 137 - struct list_head *item) 138 - { 139 - struct xfs_efi_log_item *efip = intent; 140 - struct xfs_extent_free_item *free; 141 - uint next_extent; 142 - struct xfs_extent *extp; 143 - 144 - free = container_of(item, struct xfs_extent_free_item, xefi_list); 145 - 146 - tp->t_flags |= XFS_TRANS_DIRTY; 147 - set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags); 148 - 149 - /* 150 - * atomic_inc_return gives us the value after the increment; 151 - * we want to use it as an array index so we need to subtract 1 from 152 - * it. 153 - */ 154 - next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; 155 - ASSERT(next_extent < efip->efi_format.efi_nextents); 156 - extp = &efip->efi_format.efi_extents[next_extent]; 157 - extp->ext_start = free->xefi_startblock; 158 - extp->ext_len = free->xefi_blockcount; 159 - } 160 - 161 - /* Get an EFD so we can process all the free extents. */ 162 - STATIC void * 163 - xfs_extent_free_create_done( 164 - struct xfs_trans *tp, 165 - void *intent, 166 - unsigned int count) 167 - { 168 - return xfs_trans_get_efd(tp, intent, count); 169 - } 170 - 171 - /* Process a free extent. */ 172 - STATIC int 173 - xfs_extent_free_finish_item( 174 - struct xfs_trans *tp, 175 - struct list_head *item, 176 - void *done_item, 177 - void **state) 178 - { 179 - struct xfs_extent_free_item *free; 180 - int error; 181 - 182 - free = container_of(item, struct xfs_extent_free_item, xefi_list); 183 - error = xfs_trans_free_extent(tp, done_item, 184 - free->xefi_startblock, 185 - free->xefi_blockcount, 186 - &free->xefi_oinfo, free->xefi_skip_discard); 187 - kmem_free(free); 188 - return error; 189 - } 190 - 191 - /* Abort all pending EFIs. */ 192 - STATIC void 193 - xfs_extent_free_abort_intent( 194 - void *intent) 195 - { 196 - xfs_efi_release(intent); 197 - } 198 - 199 - /* Cancel a free extent. */ 200 - STATIC void 201 - xfs_extent_free_cancel_item( 202 - struct list_head *item) 203 - { 204 - struct xfs_extent_free_item *free; 205 - 206 - free = container_of(item, struct xfs_extent_free_item, xefi_list); 207 - kmem_free(free); 208 - } 209 - 210 - const struct xfs_defer_op_type xfs_extent_free_defer_type = { 211 - .max_items = XFS_EFI_MAX_FAST_EXTENTS, 212 - .diff_items = xfs_extent_free_diff_items, 213 - .create_intent = xfs_extent_free_create_intent, 214 - .abort_intent = xfs_extent_free_abort_intent, 215 - .log_item = xfs_extent_free_log_item, 216 - .create_done = xfs_extent_free_create_done, 217 - .finish_item = xfs_extent_free_finish_item, 218 - .cancel_item = xfs_extent_free_cancel_item, 219 - }; 220 - 221 - /* 222 - * AGFL blocks are accounted differently in the reserve pools and are not 223 - * inserted into the busy extent list. 224 - */ 225 - STATIC int 226 - xfs_agfl_free_finish_item( 227 - struct xfs_trans *tp, 228 - struct list_head *item, 229 - void *done_item, 230 - void **state) 231 - { 232 - struct xfs_mount *mp = tp->t_mountp; 233 - struct xfs_efd_log_item *efdp = done_item; 234 - struct xfs_extent_free_item *free; 235 - struct xfs_extent *extp; 236 - struct xfs_buf *agbp; 237 - int error; 238 - xfs_agnumber_t agno; 239 - xfs_agblock_t agbno; 240 - uint next_extent; 241 - 242 - free = container_of(item, struct xfs_extent_free_item, xefi_list); 243 - ASSERT(free->xefi_blockcount == 1); 244 - agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); 245 - agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); 246 - 247 - trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount); 248 - 249 - error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); 250 - if (!error) 251 - error = xfs_free_agfl_block(tp, agno, agbno, agbp, 252 - &free->xefi_oinfo); 253 - 254 - /* 255 - * Mark the transaction dirty, even on error. This ensures the 256 - * transaction is aborted, which: 257 - * 258 - * 1.) releases the EFI and frees the EFD 259 - * 2.) shuts down the filesystem 260 - */ 261 - tp->t_flags |= XFS_TRANS_DIRTY; 262 - set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); 263 - 264 - next_extent = efdp->efd_next_extent; 265 - ASSERT(next_extent < efdp->efd_format.efd_nextents); 266 - extp = &(efdp->efd_format.efd_extents[next_extent]); 267 - extp->ext_start = free->xefi_startblock; 268 - extp->ext_len = free->xefi_blockcount; 269 - efdp->efd_next_extent++; 270 - 271 - kmem_free(free); 272 - return error; 273 - } 274 - 275 - 276 - /* sub-type with special handling for AGFL deferred frees */ 277 - const struct xfs_defer_op_type xfs_agfl_free_defer_type = { 278 - .max_items = XFS_EFI_MAX_FAST_EXTENTS, 279 - .diff_items = xfs_extent_free_diff_items, 280 - .create_intent = xfs_extent_free_create_intent, 281 - .abort_intent = xfs_extent_free_abort_intent, 282 - .log_item = xfs_extent_free_log_item, 283 - .create_done = xfs_extent_free_create_done, 284 - .finish_item = xfs_agfl_free_finish_item, 285 - .cancel_item = xfs_extent_free_cancel_item, 286 - };

-3

fs/xfs/xfs_trans_inode.c

··· 8 8 #include "xfs_shared.h" 9 9 #include "xfs_format.h" 10 10 #include "xfs_log_format.h" 11 - #include "xfs_trans_resv.h" 12 - #include "xfs_mount.h" 13 11 #include "xfs_inode.h" 14 12 #include "xfs_trans.h" 15 13 #include "xfs_trans_priv.h" 16 14 #include "xfs_inode_item.h" 17 - #include "xfs_trace.h" 18 15 19 16 #include <linux/iversion.h> 20 17

+1 -3

fs/xfs/xfs_trans_priv.h

··· 16 16 void xfs_trans_init(struct xfs_mount *); 17 17 void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); 18 18 void xfs_trans_del_item(struct xfs_log_item *); 19 - void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, 20 - bool abort); 21 19 void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); 22 20 23 21 void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, 24 - xfs_lsn_t commit_lsn, int aborted); 22 + xfs_lsn_t commit_lsn, bool aborted); 25 23 /* 26 24 * AIL traversal cursor. 27 25 *

-240

fs/xfs/xfs_trans_refcount.c

··· 1 - // SPDX-License-Identifier: GPL-2.0+ 2 - /* 3 - * Copyright (C) 2016 Oracle. All Rights Reserved. 4 - * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 - */ 6 - #include "xfs.h" 7 - #include "xfs_fs.h" 8 - #include "xfs_shared.h" 9 - #include "xfs_format.h" 10 - #include "xfs_log_format.h" 11 - #include "xfs_trans_resv.h" 12 - #include "xfs_mount.h" 13 - #include "xfs_defer.h" 14 - #include "xfs_trans.h" 15 - #include "xfs_trans_priv.h" 16 - #include "xfs_refcount_item.h" 17 - #include "xfs_alloc.h" 18 - #include "xfs_refcount.h" 19 - 20 - /* 21 - * This routine is called to allocate a "refcount update done" 22 - * log item. 23 - */ 24 - struct xfs_cud_log_item * 25 - xfs_trans_get_cud( 26 - struct xfs_trans *tp, 27 - struct xfs_cui_log_item *cuip) 28 - { 29 - struct xfs_cud_log_item *cudp; 30 - 31 - cudp = xfs_cud_init(tp->t_mountp, cuip); 32 - xfs_trans_add_item(tp, &cudp->cud_item); 33 - return cudp; 34 - } 35 - 36 - /* 37 - * Finish an refcount update and log it to the CUD. Note that the 38 - * transaction is marked dirty regardless of whether the refcount 39 - * update succeeds or fails to support the CUI/CUD lifecycle rules. 40 - */ 41 - int 42 - xfs_trans_log_finish_refcount_update( 43 - struct xfs_trans *tp, 44 - struct xfs_cud_log_item *cudp, 45 - enum xfs_refcount_intent_type type, 46 - xfs_fsblock_t startblock, 47 - xfs_extlen_t blockcount, 48 - xfs_fsblock_t *new_fsb, 49 - xfs_extlen_t *new_len, 50 - struct xfs_btree_cur **pcur) 51 - { 52 - int error; 53 - 54 - error = xfs_refcount_finish_one(tp, type, startblock, 55 - blockcount, new_fsb, new_len, pcur); 56 - 57 - /* 58 - * Mark the transaction dirty, even on error. This ensures the 59 - * transaction is aborted, which: 60 - * 61 - * 1.) releases the CUI and frees the CUD 62 - * 2.) shuts down the filesystem 63 - */ 64 - tp->t_flags |= XFS_TRANS_DIRTY; 65 - set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags); 66 - 67 - return error; 68 - } 69 - 70 - /* Sort refcount intents by AG. */ 71 - static int 72 - xfs_refcount_update_diff_items( 73 - void *priv, 74 - struct list_head *a, 75 - struct list_head *b) 76 - { 77 - struct xfs_mount *mp = priv; 78 - struct xfs_refcount_intent *ra; 79 - struct xfs_refcount_intent *rb; 80 - 81 - ra = container_of(a, struct xfs_refcount_intent, ri_list); 82 - rb = container_of(b, struct xfs_refcount_intent, ri_list); 83 - return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) - 84 - XFS_FSB_TO_AGNO(mp, rb->ri_startblock); 85 - } 86 - 87 - /* Get an CUI. */ 88 - STATIC void * 89 - xfs_refcount_update_create_intent( 90 - struct xfs_trans *tp, 91 - unsigned int count) 92 - { 93 - struct xfs_cui_log_item *cuip; 94 - 95 - ASSERT(tp != NULL); 96 - ASSERT(count > 0); 97 - 98 - cuip = xfs_cui_init(tp->t_mountp, count); 99 - ASSERT(cuip != NULL); 100 - 101 - /* 102 - * Get a log_item_desc to point at the new item. 103 - */ 104 - xfs_trans_add_item(tp, &cuip->cui_item); 105 - return cuip; 106 - } 107 - 108 - /* Set the phys extent flags for this reverse mapping. */ 109 - static void 110 - xfs_trans_set_refcount_flags( 111 - struct xfs_phys_extent *refc, 112 - enum xfs_refcount_intent_type type) 113 - { 114 - refc->pe_flags = 0; 115 - switch (type) { 116 - case XFS_REFCOUNT_INCREASE: 117 - case XFS_REFCOUNT_DECREASE: 118 - case XFS_REFCOUNT_ALLOC_COW: 119 - case XFS_REFCOUNT_FREE_COW: 120 - refc->pe_flags |= type; 121 - break; 122 - default: 123 - ASSERT(0); 124 - } 125 - } 126 - 127 - /* Log refcount updates in the intent item. */ 128 - STATIC void 129 - xfs_refcount_update_log_item( 130 - struct xfs_trans *tp, 131 - void *intent, 132 - struct list_head *item) 133 - { 134 - struct xfs_cui_log_item *cuip = intent; 135 - struct xfs_refcount_intent *refc; 136 - uint next_extent; 137 - struct xfs_phys_extent *ext; 138 - 139 - refc = container_of(item, struct xfs_refcount_intent, ri_list); 140 - 141 - tp->t_flags |= XFS_TRANS_DIRTY; 142 - set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags); 143 - 144 - /* 145 - * atomic_inc_return gives us the value after the increment; 146 - * we want to use it as an array index so we need to subtract 1 from 147 - * it. 148 - */ 149 - next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1; 150 - ASSERT(next_extent < cuip->cui_format.cui_nextents); 151 - ext = &cuip->cui_format.cui_extents[next_extent]; 152 - ext->pe_startblock = refc->ri_startblock; 153 - ext->pe_len = refc->ri_blockcount; 154 - xfs_trans_set_refcount_flags(ext, refc->ri_type); 155 - } 156 - 157 - /* Get an CUD so we can process all the deferred refcount updates. */ 158 - STATIC void * 159 - xfs_refcount_update_create_done( 160 - struct xfs_trans *tp, 161 - void *intent, 162 - unsigned int count) 163 - { 164 - return xfs_trans_get_cud(tp, intent); 165 - } 166 - 167 - /* Process a deferred refcount update. */ 168 - STATIC int 169 - xfs_refcount_update_finish_item( 170 - struct xfs_trans *tp, 171 - struct list_head *item, 172 - void *done_item, 173 - void **state) 174 - { 175 - struct xfs_refcount_intent *refc; 176 - xfs_fsblock_t new_fsb; 177 - xfs_extlen_t new_aglen; 178 - int error; 179 - 180 - refc = container_of(item, struct xfs_refcount_intent, ri_list); 181 - error = xfs_trans_log_finish_refcount_update(tp, done_item, 182 - refc->ri_type, 183 - refc->ri_startblock, 184 - refc->ri_blockcount, 185 - &new_fsb, &new_aglen, 186 - (struct xfs_btree_cur **)state); 187 - /* Did we run out of reservation? Requeue what we didn't finish. */ 188 - if (!error && new_aglen > 0) { 189 - ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE || 190 - refc->ri_type == XFS_REFCOUNT_DECREASE); 191 - refc->ri_startblock = new_fsb; 192 - refc->ri_blockcount = new_aglen; 193 - return -EAGAIN; 194 - } 195 - kmem_free(refc); 196 - return error; 197 - } 198 - 199 - /* Clean up after processing deferred refcounts. */ 200 - STATIC void 201 - xfs_refcount_update_finish_cleanup( 202 - struct xfs_trans *tp, 203 - void *state, 204 - int error) 205 - { 206 - struct xfs_btree_cur *rcur = state; 207 - 208 - xfs_refcount_finish_one_cleanup(tp, rcur, error); 209 - } 210 - 211 - /* Abort all pending CUIs. */ 212 - STATIC void 213 - xfs_refcount_update_abort_intent( 214 - void *intent) 215 - { 216 - xfs_cui_release(intent); 217 - } 218 - 219 - /* Cancel a deferred refcount update. */ 220 - STATIC void 221 - xfs_refcount_update_cancel_item( 222 - struct list_head *item) 223 - { 224 - struct xfs_refcount_intent *refc; 225 - 226 - refc = container_of(item, struct xfs_refcount_intent, ri_list); 227 - kmem_free(refc); 228 - } 229 - 230 - const struct xfs_defer_op_type xfs_refcount_update_defer_type = { 231 - .max_items = XFS_CUI_MAX_FAST_EXTENTS, 232 - .diff_items = xfs_refcount_update_diff_items, 233 - .create_intent = xfs_refcount_update_create_intent, 234 - .abort_intent = xfs_refcount_update_abort_intent, 235 - .log_item = xfs_refcount_update_log_item, 236 - .create_done = xfs_refcount_update_create_done, 237 - .finish_item = xfs_refcount_update_finish_item, 238 - .finish_cleanup = xfs_refcount_update_finish_cleanup, 239 - .cancel_item = xfs_refcount_update_cancel_item, 240 - };

-257

fs/xfs/xfs_trans_rmap.c

··· 1 - // SPDX-License-Identifier: GPL-2.0+ 2 - /* 3 - * Copyright (C) 2016 Oracle. All Rights Reserved. 4 - * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 - */ 6 - #include "xfs.h" 7 - #include "xfs_fs.h" 8 - #include "xfs_shared.h" 9 - #include "xfs_format.h" 10 - #include "xfs_log_format.h" 11 - #include "xfs_trans_resv.h" 12 - #include "xfs_mount.h" 13 - #include "xfs_defer.h" 14 - #include "xfs_trans.h" 15 - #include "xfs_trans_priv.h" 16 - #include "xfs_rmap_item.h" 17 - #include "xfs_alloc.h" 18 - #include "xfs_rmap.h" 19 - 20 - /* Set the map extent flags for this reverse mapping. */ 21 - static void 22 - xfs_trans_set_rmap_flags( 23 - struct xfs_map_extent *rmap, 24 - enum xfs_rmap_intent_type type, 25 - int whichfork, 26 - xfs_exntst_t state) 27 - { 28 - rmap->me_flags = 0; 29 - if (state == XFS_EXT_UNWRITTEN) 30 - rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN; 31 - if (whichfork == XFS_ATTR_FORK) 32 - rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK; 33 - switch (type) { 34 - case XFS_RMAP_MAP: 35 - rmap->me_flags |= XFS_RMAP_EXTENT_MAP; 36 - break; 37 - case XFS_RMAP_MAP_SHARED: 38 - rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED; 39 - break; 40 - case XFS_RMAP_UNMAP: 41 - rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP; 42 - break; 43 - case XFS_RMAP_UNMAP_SHARED: 44 - rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED; 45 - break; 46 - case XFS_RMAP_CONVERT: 47 - rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT; 48 - break; 49 - case XFS_RMAP_CONVERT_SHARED: 50 - rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED; 51 - break; 52 - case XFS_RMAP_ALLOC: 53 - rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC; 54 - break; 55 - case XFS_RMAP_FREE: 56 - rmap->me_flags |= XFS_RMAP_EXTENT_FREE; 57 - break; 58 - default: 59 - ASSERT(0); 60 - } 61 - } 62 - 63 - struct xfs_rud_log_item * 64 - xfs_trans_get_rud( 65 - struct xfs_trans *tp, 66 - struct xfs_rui_log_item *ruip) 67 - { 68 - struct xfs_rud_log_item *rudp; 69 - 70 - rudp = xfs_rud_init(tp->t_mountp, ruip); 71 - xfs_trans_add_item(tp, &rudp->rud_item); 72 - return rudp; 73 - } 74 - 75 - /* 76 - * Finish an rmap update and log it to the RUD. Note that the transaction is 77 - * marked dirty regardless of whether the rmap update succeeds or fails to 78 - * support the RUI/RUD lifecycle rules. 79 - */ 80 - int 81 - xfs_trans_log_finish_rmap_update( 82 - struct xfs_trans *tp, 83 - struct xfs_rud_log_item *rudp, 84 - enum xfs_rmap_intent_type type, 85 - uint64_t owner, 86 - int whichfork, 87 - xfs_fileoff_t startoff, 88 - xfs_fsblock_t startblock, 89 - xfs_filblks_t blockcount, 90 - xfs_exntst_t state, 91 - struct xfs_btree_cur **pcur) 92 - { 93 - int error; 94 - 95 - error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff, 96 - startblock, blockcount, state, pcur); 97 - 98 - /* 99 - * Mark the transaction dirty, even on error. This ensures the 100 - * transaction is aborted, which: 101 - * 102 - * 1.) releases the RUI and frees the RUD 103 - * 2.) shuts down the filesystem 104 - */ 105 - tp->t_flags |= XFS_TRANS_DIRTY; 106 - set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags); 107 - 108 - return error; 109 - } 110 - 111 - /* Sort rmap intents by AG. */ 112 - static int 113 - xfs_rmap_update_diff_items( 114 - void *priv, 115 - struct list_head *a, 116 - struct list_head *b) 117 - { 118 - struct xfs_mount *mp = priv; 119 - struct xfs_rmap_intent *ra; 120 - struct xfs_rmap_intent *rb; 121 - 122 - ra = container_of(a, struct xfs_rmap_intent, ri_list); 123 - rb = container_of(b, struct xfs_rmap_intent, ri_list); 124 - return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) - 125 - XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock); 126 - } 127 - 128 - /* Get an RUI. */ 129 - STATIC void * 130 - xfs_rmap_update_create_intent( 131 - struct xfs_trans *tp, 132 - unsigned int count) 133 - { 134 - struct xfs_rui_log_item *ruip; 135 - 136 - ASSERT(tp != NULL); 137 - ASSERT(count > 0); 138 - 139 - ruip = xfs_rui_init(tp->t_mountp, count); 140 - ASSERT(ruip != NULL); 141 - 142 - /* 143 - * Get a log_item_desc to point at the new item. 144 - */ 145 - xfs_trans_add_item(tp, &ruip->rui_item); 146 - return ruip; 147 - } 148 - 149 - /* Log rmap updates in the intent item. */ 150 - STATIC void 151 - xfs_rmap_update_log_item( 152 - struct xfs_trans *tp, 153 - void *intent, 154 - struct list_head *item) 155 - { 156 - struct xfs_rui_log_item *ruip = intent; 157 - struct xfs_rmap_intent *rmap; 158 - uint next_extent; 159 - struct xfs_map_extent *map; 160 - 161 - rmap = container_of(item, struct xfs_rmap_intent, ri_list); 162 - 163 - tp->t_flags |= XFS_TRANS_DIRTY; 164 - set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags); 165 - 166 - /* 167 - * atomic_inc_return gives us the value after the increment; 168 - * we want to use it as an array index so we need to subtract 1 from 169 - * it. 170 - */ 171 - next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1; 172 - ASSERT(next_extent < ruip->rui_format.rui_nextents); 173 - map = &ruip->rui_format.rui_extents[next_extent]; 174 - map->me_owner = rmap->ri_owner; 175 - map->me_startblock = rmap->ri_bmap.br_startblock; 176 - map->me_startoff = rmap->ri_bmap.br_startoff; 177 - map->me_len = rmap->ri_bmap.br_blockcount; 178 - xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork, 179 - rmap->ri_bmap.br_state); 180 - } 181 - 182 - /* Get an RUD so we can process all the deferred rmap updates. */ 183 - STATIC void * 184 - xfs_rmap_update_create_done( 185 - struct xfs_trans *tp, 186 - void *intent, 187 - unsigned int count) 188 - { 189 - return xfs_trans_get_rud(tp, intent); 190 - } 191 - 192 - /* Process a deferred rmap update. */ 193 - STATIC int 194 - xfs_rmap_update_finish_item( 195 - struct xfs_trans *tp, 196 - struct list_head *item, 197 - void *done_item, 198 - void **state) 199 - { 200 - struct xfs_rmap_intent *rmap; 201 - int error; 202 - 203 - rmap = container_of(item, struct xfs_rmap_intent, ri_list); 204 - error = xfs_trans_log_finish_rmap_update(tp, done_item, 205 - rmap->ri_type, 206 - rmap->ri_owner, rmap->ri_whichfork, 207 - rmap->ri_bmap.br_startoff, 208 - rmap->ri_bmap.br_startblock, 209 - rmap->ri_bmap.br_blockcount, 210 - rmap->ri_bmap.br_state, 211 - (struct xfs_btree_cur **)state); 212 - kmem_free(rmap); 213 - return error; 214 - } 215 - 216 - /* Clean up after processing deferred rmaps. */ 217 - STATIC void 218 - xfs_rmap_update_finish_cleanup( 219 - struct xfs_trans *tp, 220 - void *state, 221 - int error) 222 - { 223 - struct xfs_btree_cur *rcur = state; 224 - 225 - xfs_rmap_finish_one_cleanup(tp, rcur, error); 226 - } 227 - 228 - /* Abort all pending RUIs. */ 229 - STATIC void 230 - xfs_rmap_update_abort_intent( 231 - void *intent) 232 - { 233 - xfs_rui_release(intent); 234 - } 235 - 236 - /* Cancel a deferred rmap update. */ 237 - STATIC void 238 - xfs_rmap_update_cancel_item( 239 - struct list_head *item) 240 - { 241 - struct xfs_rmap_intent *rmap; 242 - 243 - rmap = container_of(item, struct xfs_rmap_intent, ri_list); 244 - kmem_free(rmap); 245 - } 246 - 247 - const struct xfs_defer_op_type xfs_rmap_update_defer_type = { 248 - .max_items = XFS_RUI_MAX_FAST_EXTENTS, 249 - .diff_items = xfs_rmap_update_diff_items, 250 - .create_intent = xfs_rmap_update_create_intent, 251 - .abort_intent = xfs_rmap_update_abort_intent, 252 - .log_item = xfs_rmap_update_log_item, 253 - .create_done = xfs_rmap_update_create_done, 254 - .finish_item = xfs_rmap_update_finish_item, 255 - .finish_cleanup = xfs_rmap_update_finish_cleanup, 256 - .cancel_item = xfs_rmap_update_cancel_item, 257 - };

+1 -4

fs/xfs/xfs_xattr.c

··· 5 5 */ 6 6 7 7 #include "xfs.h" 8 + #include "xfs_shared.h" 8 9 #include "xfs_format.h" 9 10 #include "xfs_log_format.h" 10 - #include "xfs_trans_resv.h" 11 - #include "xfs_mount.h" 12 11 #include "xfs_da_format.h" 13 12 #include "xfs_inode.h" 14 13 #include "xfs_attr.h" 15 - #include "xfs_attr_leaf.h" 16 - #include "xfs_acl.h" 17 14 18 15 #include <linux/posix_acl_xattr.h> 19 16 #include <linux/xattr.h>