Merge tag 'xfs-4.15-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

+17

fs/xfs/Kconfig

··· 71 71 72 72 If unsure, say N. 73 73 74 + config XFS_ONLINE_SCRUB 75 + bool "XFS online metadata check support" 76 + default n 77 + depends on XFS_FS 78 + help 79 + If you say Y here you will be able to check metadata on a 80 + mounted XFS filesystem. This feature is intended to reduce 81 + filesystem downtime by supplementing xfs_repair. The key 82 + advantage here is to look for problems proactively so that 83 + they can be dealt with in a controlled manner. 84 + 85 + This feature is considered EXPERIMENTAL. Use with caution! 86 + 87 + See the xfs_scrub man page in section 8 for additional information. 88 + 89 + If unsure, say N. 90 + 74 91 config XFS_WARN 75 92 bool "XFS Verbose Warnings" 76 93 depends on XFS_FS && !XFS_DEBUG

+29

fs/xfs/Makefile

··· 49 49 xfs_dquot_buf.o \ 50 50 xfs_ialloc.o \ 51 51 xfs_ialloc_btree.o \ 52 + xfs_iext_tree.o \ 52 53 xfs_inode_fork.o \ 53 54 xfs_inode_buf.o \ 54 55 xfs_log_rlimit.o \ ··· 136 135 xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o 137 136 xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o 138 137 xfs-$(CONFIG_EXPORTFS_BLOCK_OPS) += xfs_pnfs.o 138 + 139 + # online scrub/repair 140 + ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y) 141 + 142 + # Tracepoints like to blow up, so build that before everything else 143 + 144 + xfs-y += $(addprefix scrub/, \ 145 + trace.o \ 146 + agheader.o \ 147 + alloc.o \ 148 + attr.o \ 149 + bmap.o \ 150 + btree.o \ 151 + common.o \ 152 + dabtree.o \ 153 + dir.o \ 154 + ialloc.o \ 155 + inode.o \ 156 + parent.o \ 157 + refcount.o \ 158 + rmap.o \ 159 + scrub.o \ 160 + symlink.o \ 161 + ) 162 + 163 + xfs-$(CONFIG_XFS_RT) += scrub/rtbitmap.o 164 + xfs-$(CONFIG_XFS_QUOTA) += scrub/quota.o 165 + endif

+1 -2

fs/xfs/kmem.h

··· 119 119 static inline void 120 120 kmem_zone_destroy(kmem_zone_t *zone) 121 121 { 122 - if (zone) 123 - kmem_cache_destroy(zone); 122 + kmem_cache_destroy(zone); 124 123 } 125 124 126 125 extern void *kmem_zone_alloc(kmem_zone_t *, xfs_km_flags_t);

+1

fs/xfs/libxfs/xfs_ag_resv.c

··· 27 27 #include "xfs_mount.h" 28 28 #include "xfs_defer.h" 29 29 #include "xfs_alloc.h" 30 + #include "xfs_errortag.h" 30 31 #include "xfs_error.h" 31 32 #include "xfs_trace.h" 32 33 #include "xfs_cksum.h"

+50

fs/xfs/libxfs/xfs_alloc.c

··· 31 31 #include "xfs_alloc_btree.h" 32 32 #include "xfs_alloc.h" 33 33 #include "xfs_extent_busy.h" 34 + #include "xfs_errortag.h" 34 35 #include "xfs_error.h" 35 36 #include "xfs_cksum.h" 36 37 #include "xfs_trace.h" ··· 2931 2930 query.priv = priv; 2932 2931 query.fn = fn; 2933 2932 return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query); 2933 + } 2934 + 2935 + /* Find the size of the AG, in blocks. */ 2936 + xfs_agblock_t 2937 + xfs_ag_block_count( 2938 + struct xfs_mount *mp, 2939 + xfs_agnumber_t agno) 2940 + { 2941 + ASSERT(agno < mp->m_sb.sb_agcount); 2942 + 2943 + if (agno < mp->m_sb.sb_agcount - 1) 2944 + return mp->m_sb.sb_agblocks; 2945 + return mp->m_sb.sb_dblocks - (agno * mp->m_sb.sb_agblocks); 2946 + } 2947 + 2948 + /* 2949 + * Verify that an AG block number pointer neither points outside the AG 2950 + * nor points at static metadata. 2951 + */ 2952 + bool 2953 + xfs_verify_agbno( 2954 + struct xfs_mount *mp, 2955 + xfs_agnumber_t agno, 2956 + xfs_agblock_t agbno) 2957 + { 2958 + xfs_agblock_t eoag; 2959 + 2960 + eoag = xfs_ag_block_count(mp, agno); 2961 + if (agbno >= eoag) 2962 + return false; 2963 + if (agbno <= XFS_AGFL_BLOCK(mp)) 2964 + return false; 2965 + return true; 2966 + } 2967 + 2968 + /* 2969 + * Verify that an FS block number pointer neither points outside the 2970 + * filesystem nor points at static AG metadata. 2971 + */ 2972 + bool 2973 + xfs_verify_fsbno( 2974 + struct xfs_mount *mp, 2975 + xfs_fsblock_t fsbno) 2976 + { 2977 + xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno); 2978 + 2979 + if (agno >= mp->m_sb.sb_agcount) 2980 + return false; 2981 + return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno)); 2934 2982 }

+4

fs/xfs/libxfs/xfs_alloc.h

··· 232 232 xfs_alloc_query_range_fn fn, void *priv); 233 233 int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn, 234 234 void *priv); 235 + xfs_agblock_t xfs_ag_block_count(struct xfs_mount *mp, xfs_agnumber_t agno); 236 + bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno, 237 + xfs_agblock_t agbno); 238 + bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno); 235 239 236 240 #endif /* __XFS_ALLOC_H__ */

+1 -5

fs/xfs/libxfs/xfs_attr_leaf.c

··· 397 397 /* rounded down */ 398 398 offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3; 399 399 400 - switch (dp->i_d.di_format) { 401 - case XFS_DINODE_FMT_DEV: 400 + if (dp->i_d.di_format == XFS_DINODE_FMT_DEV) { 402 401 minforkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; 403 - return (offset >= minforkoff) ? minforkoff : 0; 404 - case XFS_DINODE_FMT_UUID: 405 - minforkoff = roundup(sizeof(uuid_t), 8) >> 3; 406 402 return (offset >= minforkoff) ? minforkoff : 0; 407 403 } 408 404

+832 -1263

fs/xfs/libxfs/xfs_bmap.c

··· 38 38 #include "xfs_bmap_util.h" 39 39 #include "xfs_bmap_btree.h" 40 40 #include "xfs_rtalloc.h" 41 + #include "xfs_errortag.h" 41 42 #include "xfs_error.h" 42 43 #include "xfs_quota.h" 43 44 #include "xfs_trans_space.h" ··· 113 112 STATIC int /* error */ 114 113 xfs_bmbt_lookup_eq( 115 114 struct xfs_btree_cur *cur, 116 - xfs_fileoff_t off, 117 - xfs_fsblock_t bno, 118 - xfs_filblks_t len, 115 + struct xfs_bmbt_irec *irec, 119 116 int *stat) /* success/failure */ 120 117 { 121 - cur->bc_rec.b.br_startoff = off; 122 - cur->bc_rec.b.br_startblock = bno; 123 - cur->bc_rec.b.br_blockcount = len; 118 + cur->bc_rec.b = *irec; 124 119 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 125 120 } 126 121 127 122 STATIC int /* error */ 128 - xfs_bmbt_lookup_ge( 123 + xfs_bmbt_lookup_first( 129 124 struct xfs_btree_cur *cur, 130 - xfs_fileoff_t off, 131 - xfs_fsblock_t bno, 132 - xfs_filblks_t len, 133 125 int *stat) /* success/failure */ 134 126 { 135 - cur->bc_rec.b.br_startoff = off; 136 - cur->bc_rec.b.br_startblock = bno; 137 - cur->bc_rec.b.br_blockcount = len; 127 + cur->bc_rec.b.br_startoff = 0; 128 + cur->bc_rec.b.br_startblock = 0; 129 + cur->bc_rec.b.br_blockcount = 0; 138 130 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 139 131 } 140 132 ··· 154 160 } 155 161 156 162 /* 157 - * Update the record referred to by cur to the value given 158 - * by [off, bno, len, state]. 163 + * Update the record referred to by cur to the value given by irec 159 164 * This either works (return 0) or gets an EFSCORRUPTED error. 160 165 */ 161 166 STATIC int 162 167 xfs_bmbt_update( 163 168 struct xfs_btree_cur *cur, 164 - xfs_fileoff_t off, 165 - xfs_fsblock_t bno, 166 - xfs_filblks_t len, 167 - xfs_exntst_t state) 169 + struct xfs_bmbt_irec *irec) 168 170 { 169 171 union xfs_btree_rec rec; 170 172 171 - xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state); 173 + xfs_bmbt_disk_set_all(&rec.bmbt, irec); 172 174 return xfs_btree_update(cur, &rec); 173 175 } 174 176 ··· 232 242 { 233 243 if (whichfork == XFS_ATTR_FORK && 234 244 ip->i_d.di_format != XFS_DINODE_FMT_DEV && 235 - ip->i_d.di_format != XFS_DINODE_FMT_UUID && 236 245 ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { 237 246 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; 238 247 ··· 488 499 } 489 500 490 501 /* 491 - * Add bmap trace insert entries for all the contents of the extent records. 492 - */ 493 - void 494 - xfs_bmap_trace_exlist( 495 - xfs_inode_t *ip, /* incore inode pointer */ 496 - xfs_extnum_t cnt, /* count of entries in the list */ 497 - int whichfork, /* data or attr or cow fork */ 498 - unsigned long caller_ip) 499 - { 500 - xfs_extnum_t idx; /* extent record index */ 501 - xfs_ifork_t *ifp; /* inode fork pointer */ 502 - int state = 0; 503 - 504 - if (whichfork == XFS_ATTR_FORK) 505 - state |= BMAP_ATTRFORK; 506 - else if (whichfork == XFS_COW_FORK) 507 - state |= BMAP_COWFORK; 508 - 509 - ifp = XFS_IFORK_PTR(ip, whichfork); 510 - ASSERT(cnt == xfs_iext_count(ifp)); 511 - for (idx = 0; idx < cnt; idx++) 512 - trace_xfs_extlist(ip, idx, state, caller_ip); 513 - } 514 - 515 - /* 516 502 * Validate that the bmbt_irecs being returned from bmapi are valid 517 503 * given the caller's original parameters. Specifically check the 518 504 * ranges of the returned irecs to ensure that they only extend beyond ··· 621 657 cbno = be64_to_cpu(*pp); 622 658 *logflagsp = 0; 623 659 #ifdef DEBUG 624 - if ((error = xfs_btree_check_lptr(cur, cbno, 1))) 625 - return error; 660 + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 661 + xfs_btree_check_lptr(cur, cbno, 1)); 626 662 #endif 627 663 error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF, 628 664 &xfs_bmbt_buf_ops); ··· 667 703 xfs_bmbt_rec_t *arp; /* child record pointer */ 668 704 struct xfs_btree_block *block; /* btree root block */ 669 705 xfs_btree_cur_t *cur; /* bmap btree cursor */ 670 - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ 671 706 int error; /* error return value */ 672 - xfs_extnum_t i, cnt; /* extent record index */ 673 707 xfs_ifork_t *ifp; /* inode fork pointer */ 674 708 xfs_bmbt_key_t *kp; /* root block key pointer */ 675 709 xfs_mount_t *mp; /* mount structure */ 676 - xfs_extnum_t nextents; /* number of file extents */ 677 710 xfs_bmbt_ptr_t *pp; /* root block address pointer */ 711 + struct xfs_iext_cursor icur; 712 + struct xfs_bmbt_irec rec; 713 + xfs_extnum_t cnt = 0; 678 714 679 715 mp = ip->i_mount; 680 716 ASSERT(whichfork != XFS_COW_FORK); ··· 753 789 XFS_BTNUM_BMAP, 0, 0, ip->i_ino, 754 790 XFS_BTREE_LONG_PTRS); 755 791 756 - arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); 757 - nextents = xfs_iext_count(ifp); 758 - for (cnt = i = 0; i < nextents; i++) { 759 - ep = xfs_iext_get_ext(ifp, i); 760 - if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) { 761 - arp->l0 = cpu_to_be64(ep->l0); 762 - arp->l1 = cpu_to_be64(ep->l1); 763 - arp++; cnt++; 764 - } 792 + for_each_xfs_iext(ifp, &icur, &rec) { 793 + if (isnullstartblock(rec.br_startblock)) 794 + continue; 795 + arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt); 796 + xfs_bmbt_disk_set_all(arp, &rec); 797 + cnt++; 765 798 } 766 799 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); 767 800 xfs_btree_set_numrecs(ablock, cnt); ··· 806 845 xfs_bmap_forkoff_reset(ip, whichfork); 807 846 ifp->if_flags &= ~XFS_IFINLINE; 808 847 ifp->if_flags |= XFS_IFEXTENTS; 848 + ifp->if_u1.if_root = NULL; 849 + ifp->if_height = 0; 809 850 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); 810 851 } 811 852 ··· 831 868 xfs_alloc_arg_t args; /* allocation arguments */ 832 869 xfs_buf_t *bp; /* buffer for extent block */ 833 870 struct xfs_bmbt_irec rec; 871 + struct xfs_iext_cursor icur; 834 872 835 873 /* 836 874 * We don't want to deal with the case of keeping inode data inline yet. ··· 849 885 850 886 flags = 0; 851 887 error = 0; 852 - ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == 853 - XFS_IFINLINE); 888 + ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS)) == XFS_IFINLINE); 854 889 memset(&args, 0, sizeof(args)); 855 890 args.tp = tp; 856 891 args.mp = ip->i_mount; ··· 893 930 xfs_bmap_local_to_extents_empty(ip, whichfork); 894 931 flags |= XFS_ILOG_CORE; 895 932 933 + ifp->if_u1.if_root = NULL; 934 + ifp->if_height = 0; 935 + 896 936 rec.br_startoff = 0; 897 937 rec.br_startblock = args.fsbno; 898 938 rec.br_blockcount = 1; 899 939 rec.br_state = XFS_EXT_NORM; 900 - xfs_iext_insert(ip, 0, 1, &rec, 0); 940 + xfs_iext_first(ifp, &icur); 941 + xfs_iext_insert(ip, &icur, &rec, 0); 901 942 902 - trace_xfs_bmap_post_update(ip, 0, 903 - whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0, 904 - _THIS_IP_); 905 943 XFS_IFORK_NEXT_SET(ip, whichfork, 1); 906 944 ip->i_d.di_nblocks = 1; 907 945 xfs_trans_mod_dquot_byino(tp, ip, ··· 937 973 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); 938 974 cur->bc_private.b.dfops = dfops; 939 975 cur->bc_private.b.firstblock = *firstblock; 940 - if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) 976 + error = xfs_bmbt_lookup_first(cur, &stat); 977 + if (error) 941 978 goto error0; 942 979 /* must be at least one entry */ 943 980 XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0); ··· 1089 1124 case XFS_DINODE_FMT_DEV: 1090 1125 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; 1091 1126 break; 1092 - case XFS_DINODE_FMT_UUID: 1093 - ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3; 1094 - break; 1095 1127 case XFS_DINODE_FMT_LOCAL: 1096 1128 case XFS_DINODE_FMT_EXTENTS: 1097 1129 case XFS_DINODE_FMT_BTREE: ··· 1168 1206 */ 1169 1207 1170 1208 /* 1171 - * Read in the extents to if_extents. 1172 - * All inode fields are set up by caller, we just traverse the btree 1173 - * and copy the records in. If the file system cannot contain unwritten 1174 - * extents, the records are checked for no "state" flags. 1209 + * Read in extents from a btree-format inode. 1175 1210 */ 1176 - int /* error */ 1177 - xfs_bmap_read_extents( 1178 - xfs_trans_t *tp, /* transaction pointer */ 1179 - xfs_inode_t *ip, /* incore inode */ 1180 - int whichfork) /* data or attr fork */ 1211 + int 1212 + xfs_iread_extents( 1213 + struct xfs_trans *tp, 1214 + struct xfs_inode *ip, 1215 + int whichfork) 1181 1216 { 1182 - struct xfs_btree_block *block; /* current btree block */ 1183 - xfs_fsblock_t bno; /* block # of "block" */ 1184 - xfs_buf_t *bp; /* buffer for "block" */ 1185 - int error; /* error return value */ 1186 - xfs_extnum_t i, j; /* index into the extents list */ 1187 - xfs_ifork_t *ifp; /* fork structure */ 1188 - int level; /* btree level, for checking */ 1189 - xfs_mount_t *mp; /* file system mount structure */ 1190 - __be64 *pp; /* pointer to block address */ 1191 - /* REFERENCED */ 1192 - xfs_extnum_t room; /* number of entries there's room for */ 1217 + struct xfs_mount *mp = ip->i_mount; 1218 + int state = xfs_bmap_fork_to_state(whichfork); 1219 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1220 + xfs_extnum_t nextents = XFS_IFORK_NEXTENTS(ip, whichfork); 1221 + struct xfs_btree_block *block = ifp->if_broot; 1222 + struct xfs_iext_cursor icur; 1223 + struct xfs_bmbt_irec new; 1224 + xfs_fsblock_t bno; 1225 + struct xfs_buf *bp; 1226 + xfs_extnum_t i, j; 1227 + int level; 1228 + __be64 *pp; 1229 + int error; 1193 1230 1194 - mp = ip->i_mount; 1195 - ifp = XFS_IFORK_PTR(ip, whichfork); 1196 - block = ifp->if_broot; 1231 + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1232 + 1233 + if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 1234 + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 1235 + return -EFSCORRUPTED; 1236 + } 1237 + 1197 1238 /* 1198 1239 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. 1199 1240 */ ··· 1213 1248 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 1214 1249 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); 1215 1250 if (error) 1216 - return error; 1251 + goto out; 1217 1252 block = XFS_BUF_TO_BLOCK(bp); 1218 1253 if (level == 0) 1219 1254 break; 1220 1255 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 1221 1256 bno = be64_to_cpu(*pp); 1222 1257 XFS_WANT_CORRUPTED_GOTO(mp, 1223 - XFS_FSB_SANITY_CHECK(mp, bno), error0); 1258 + XFS_FSB_SANITY_CHECK(mp, bno), out_brelse); 1224 1259 xfs_trans_brelse(tp, bp); 1225 1260 } 1261 + 1226 1262 /* 1227 1263 * Here with bp and block set to the leftmost leaf node in the tree. 1228 1264 */ 1229 - room = xfs_iext_count(ifp); 1230 1265 i = 0; 1266 + xfs_iext_first(ifp, &icur); 1267 + 1231 1268 /* 1232 1269 * Loop over all leaf nodes. Copy information to the extent records. 1233 1270 */ ··· 1239 1272 xfs_extnum_t num_recs; 1240 1273 1241 1274 num_recs = xfs_btree_get_numrecs(block); 1242 - if (unlikely(i + num_recs > room)) { 1243 - ASSERT(i + num_recs <= room); 1275 + if (unlikely(i + num_recs > nextents)) { 1276 + ASSERT(i + num_recs <= nextents); 1244 1277 xfs_warn(ip->i_mount, 1245 1278 "corrupt dinode %Lu, (btree extents).", 1246 1279 (unsigned long long) ip->i_ino); 1247 - XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)", 1280 + XFS_CORRUPTION_ERROR(__func__, 1248 1281 XFS_ERRLEVEL_LOW, ip->i_mount, block); 1249 - goto error0; 1282 + error = -EFSCORRUPTED; 1283 + goto out_brelse; 1250 1284 } 1251 1285 /* 1252 1286 * Read-ahead the next leaf block, if any. ··· 1260 1292 * Copy records into the extent records. 1261 1293 */ 1262 1294 frp = XFS_BMBT_REC_ADDR(mp, block, 1); 1263 - for (j = 0; j < num_recs; j++, i++, frp++) { 1264 - xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); 1265 - trp->l0 = be64_to_cpu(frp->l0); 1266 - trp->l1 = be64_to_cpu(frp->l1); 1267 - if (!xfs_bmbt_validate_extent(mp, whichfork, trp)) { 1295 + for (j = 0; j < num_recs; j++, frp++, i++) { 1296 + xfs_bmbt_disk_get_all(frp, &new); 1297 + if (!xfs_bmbt_validate_extent(mp, whichfork, &new)) { 1268 1298 XFS_ERROR_REPORT("xfs_bmap_read_extents(2)", 1269 1299 XFS_ERRLEVEL_LOW, mp); 1270 - goto error0; 1300 + error = -EFSCORRUPTED; 1301 + goto out_brelse; 1271 1302 } 1303 + xfs_iext_insert(ip, &icur, &new, state); 1304 + trace_xfs_read_extent(ip, &icur, state, _THIS_IP_); 1305 + xfs_iext_next(ifp, &icur); 1272 1306 } 1273 1307 xfs_trans_brelse(tp, bp); 1274 1308 bno = nextbno; ··· 1282 1312 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 1283 1313 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); 1284 1314 if (error) 1285 - return error; 1315 + goto out; 1286 1316 block = XFS_BUF_TO_BLOCK(bp); 1287 1317 } 1288 - if (i != XFS_IFORK_NEXTENTS(ip, whichfork)) 1289 - return -EFSCORRUPTED; 1318 + 1319 + if (i != XFS_IFORK_NEXTENTS(ip, whichfork)) { 1320 + error = -EFSCORRUPTED; 1321 + goto out; 1322 + } 1290 1323 ASSERT(i == xfs_iext_count(ifp)); 1291 - XFS_BMAP_TRACE_EXLIST(ip, i, whichfork); 1324 + 1325 + ifp->if_flags |= XFS_IFEXTENTS; 1292 1326 return 0; 1293 - error0: 1327 + 1328 + out_brelse: 1294 1329 xfs_trans_brelse(tp, bp); 1295 - return -EFSCORRUPTED; 1330 + out: 1331 + xfs_iext_destroy(ifp); 1332 + return error; 1296 1333 } 1297 1334 1298 1335 /* 1299 - * Returns the file-relative block number of the first unused block(s) 1300 - * in the file with at least "len" logically contiguous blocks free. 1301 - * This is the lowest-address hole if the file has holes, else the first block 1302 - * past the end of file. 1303 - * Return 0 if the file is currently local (in-inode). 1336 + * Returns the relative block number of the first unused block(s) in the given 1337 + * fork with at least "len" logically contiguous blocks free. This is the 1338 + * lowest-address hole if the fork has holes, else the first block past the end 1339 + * of fork. Return 0 if the fork is currently local (in-inode). 1304 1340 */ 1305 1341 int /* error */ 1306 1342 xfs_bmap_first_unused( 1307 - xfs_trans_t *tp, /* transaction pointer */ 1308 - xfs_inode_t *ip, /* incore inode */ 1309 - xfs_extlen_t len, /* size of hole to find */ 1310 - xfs_fileoff_t *first_unused, /* unused block */ 1311 - int whichfork) /* data or attr fork */ 1343 + struct xfs_trans *tp, /* transaction pointer */ 1344 + struct xfs_inode *ip, /* incore inode */ 1345 + xfs_extlen_t len, /* size of hole to find */ 1346 + xfs_fileoff_t *first_unused, /* unused block */ 1347 + int whichfork) /* data or attr fork */ 1312 1348 { 1313 - int error; /* error return value */ 1314 - int idx; /* extent record index */ 1315 - xfs_ifork_t *ifp; /* inode fork pointer */ 1316 - xfs_fileoff_t lastaddr; /* last block number seen */ 1317 - xfs_fileoff_t lowest; /* lowest useful block */ 1318 - xfs_fileoff_t max; /* starting useful block */ 1319 - xfs_extnum_t nextents; /* number of extent entries */ 1349 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1350 + struct xfs_bmbt_irec got; 1351 + struct xfs_iext_cursor icur; 1352 + xfs_fileoff_t lastaddr = 0; 1353 + xfs_fileoff_t lowest, max; 1354 + int error; 1320 1355 1321 1356 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE || 1322 1357 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS || 1323 1358 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); 1359 + 1324 1360 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 1325 1361 *first_unused = 0; 1326 1362 return 0; 1327 1363 } 1328 - ifp = XFS_IFORK_PTR(ip, whichfork); 1329 - if (!(ifp->if_flags & XFS_IFEXTENTS) && 1330 - (error = xfs_iread_extents(tp, ip, whichfork))) 1331 - return error; 1332 - lowest = *first_unused; 1333 - nextents = xfs_iext_count(ifp); 1334 - for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) { 1335 - struct xfs_bmbt_irec got; 1336 1364 1337 - xfs_iext_get_extent(ifp, idx, &got); 1365 + if (!(ifp->if_flags & XFS_IFEXTENTS)) { 1366 + error = xfs_iread_extents(tp, ip, whichfork); 1367 + if (error) 1368 + return error; 1369 + } 1338 1370 1371 + lowest = max = *first_unused; 1372 + for_each_xfs_iext(ifp, &icur, &got) { 1339 1373 /* 1340 1374 * See if the hole before this extent will work. 1341 1375 */ 1342 1376 if (got.br_startoff >= lowest + len && 1343 - got.br_startoff - max >= len) { 1344 - *first_unused = max; 1345 - return 0; 1346 - } 1377 + got.br_startoff - max >= len) 1378 + break; 1347 1379 lastaddr = got.br_startoff + got.br_blockcount; 1348 1380 max = XFS_FILEOFF_MAX(lastaddr, lowest); 1349 1381 } 1382 + 1350 1383 *first_unused = max; 1351 1384 return 0; 1352 1385 } ··· 1369 1396 { 1370 1397 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1371 1398 struct xfs_bmbt_irec got; 1372 - xfs_extnum_t idx; 1399 + struct xfs_iext_cursor icur; 1373 1400 int error; 1374 1401 1375 1402 switch (XFS_IFORK_FORMAT(ip, whichfork)) { ··· 1389 1416 return error; 1390 1417 } 1391 1418 1392 - if (xfs_iext_lookup_extent(ip, ifp, *last_block - 1, &idx, &got)) { 1393 - if (got.br_startoff <= *last_block - 1) 1394 - return 0; 1395 - } 1396 - 1397 - if (xfs_iext_get_extent(ifp, idx - 1, &got)) { 1398 - *last_block = got.br_startoff + got.br_blockcount; 1399 - return 0; 1400 - } 1401 - 1402 - *last_block = 0; 1419 + if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got)) 1420 + *last_block = 0; 1403 1421 return 0; 1404 1422 } 1405 1423 ··· 1403 1439 int *is_empty) 1404 1440 { 1405 1441 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1442 + struct xfs_iext_cursor icur; 1406 1443 int error; 1407 - int nextents; 1408 1444 1409 1445 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 1410 1446 error = xfs_iread_extents(tp, ip, whichfork); ··· 1412 1448 return error; 1413 1449 } 1414 1450 1415 - nextents = xfs_iext_count(ifp); 1416 - if (nextents == 0) { 1451 + xfs_iext_last(ifp, &icur); 1452 + if (!xfs_iext_get_extent(ifp, &icur, rec)) 1417 1453 *is_empty = 1; 1418 - return 0; 1419 - } 1420 - 1421 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec); 1422 - *is_empty = 0; 1454 + else 1455 + *is_empty = 0; 1423 1456 return 0; 1424 1457 } 1425 1458 ··· 1501 1540 xfs_inode_t *ip, /* incore inode */ 1502 1541 int whichfork) /* data or attr fork */ 1503 1542 { 1504 - xfs_bmbt_rec_host_t *ep; /* ptr to fork's extent */ 1505 1543 xfs_ifork_t *ifp; /* inode fork pointer */ 1506 1544 int rval; /* return value */ 1507 1545 xfs_bmbt_irec_t s; /* internal version of extent */ 1546 + struct xfs_iext_cursor icur; 1508 1547 1509 1548 #ifndef DEBUG 1510 1549 if (whichfork == XFS_DATA_FORK) ··· 1516 1555 return 0; 1517 1556 ifp = XFS_IFORK_PTR(ip, whichfork); 1518 1557 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 1519 - ep = xfs_iext_get_ext(ifp, 0); 1520 - xfs_bmbt_get_all(ep, &s); 1558 + xfs_iext_first(ifp, &icur); 1559 + xfs_iext_get_extent(ifp, &icur, &s); 1521 1560 rval = s.br_startoff == 0 && s.br_blockcount == 1; 1522 1561 if (rval && whichfork == XFS_DATA_FORK) 1523 1562 ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize); ··· 1537 1576 int whichfork) 1538 1577 { 1539 1578 struct xfs_bmbt_irec *new = &bma->got; 1540 - int diff; /* temp value */ 1541 - xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ 1542 1579 int error; /* error return value */ 1543 1580 int i; /* temp state */ 1544 1581 xfs_ifork_t *ifp; /* inode fork pointer */ ··· 1544 1585 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ 1545 1586 /* left is 0, right is 1, prev is 2 */ 1546 1587 int rval=0; /* return value (logging flags) */ 1547 - int state = 0;/* state bits, accessed thru macros */ 1588 + int state = xfs_bmap_fork_to_state(whichfork); 1548 1589 xfs_filblks_t da_new; /* new count del alloc blocks used */ 1549 1590 xfs_filblks_t da_old; /* old count del alloc blocks used */ 1550 1591 xfs_filblks_t temp=0; /* value for da_new calculations */ 1551 - xfs_filblks_t temp2=0;/* value for da_new calculations */ 1552 1592 int tmp_rval; /* partial logging flags */ 1553 1593 struct xfs_mount *mp; 1554 1594 xfs_extnum_t *nextents; 1595 + struct xfs_bmbt_irec old; 1555 1596 1556 1597 mp = bma->ip->i_mount; 1557 1598 ifp = XFS_IFORK_PTR(bma->ip, whichfork); ··· 1559 1600 nextents = (whichfork == XFS_COW_FORK ? &bma->ip->i_cnextents : 1560 1601 &bma->ip->i_d.di_nextents); 1561 1602 1562 - ASSERT(bma->idx >= 0); 1563 - ASSERT(bma->idx <= xfs_iext_count(ifp)); 1564 1603 ASSERT(!isnullstartblock(new->br_startblock)); 1565 1604 ASSERT(!bma->cur || 1566 1605 (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); ··· 1569 1612 #define RIGHT r[1] 1570 1613 #define PREV r[2] 1571 1614 1572 - if (whichfork == XFS_COW_FORK) 1573 - state |= BMAP_COWFORK; 1574 - 1575 1615 /* 1576 1616 * Set up a bunch of variables to make the tests simpler. 1577 1617 */ 1578 - ep = xfs_iext_get_ext(ifp, bma->idx); 1579 - xfs_bmbt_get_all(ep, &PREV); 1618 + xfs_iext_get_extent(ifp, &bma->icur, &PREV); 1580 1619 new_endoff = new->br_startoff + new->br_blockcount; 1620 + ASSERT(isnullstartblock(PREV.br_startblock)); 1581 1621 ASSERT(PREV.br_startoff <= new->br_startoff); 1582 1622 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); 1583 1623 ··· 1594 1640 * Check and set flags if this segment has a left neighbor. 1595 1641 * Don't set contiguous if the combined extent would be too large. 1596 1642 */ 1597 - if (bma->idx > 0) { 1643 + if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) { 1598 1644 state |= BMAP_LEFT_VALID; 1599 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT); 1600 - 1601 1645 if (isnullstartblock(LEFT.br_startblock)) 1602 1646 state |= BMAP_LEFT_DELAY; 1603 1647 } ··· 1612 1660 * Don't set contiguous if the combined extent would be too large. 1613 1661 * Also check for all-three-contiguous being too large. 1614 1662 */ 1615 - if (bma->idx < xfs_iext_count(ifp) - 1) { 1663 + if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) { 1616 1664 state |= BMAP_RIGHT_VALID; 1617 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT); 1618 - 1619 1665 if (isnullstartblock(RIGHT.br_startblock)) 1620 1666 state |= BMAP_RIGHT_DELAY; 1621 1667 } ··· 1643 1693 * Filling in all of a previously delayed allocation extent. 1644 1694 * The left and right neighbors are both contiguous with new. 1645 1695 */ 1646 - bma->idx--; 1647 - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 1648 - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), 1649 - LEFT.br_blockcount + PREV.br_blockcount + 1650 - RIGHT.br_blockcount); 1651 - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); 1696 + LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount; 1652 1697 1653 - xfs_iext_remove(bma->ip, bma->idx + 1, 2, state); 1698 + xfs_iext_remove(bma->ip, &bma->icur, state); 1699 + xfs_iext_remove(bma->ip, &bma->icur, state); 1700 + xfs_iext_prev(ifp, &bma->icur); 1701 + xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); 1654 1702 (*nextents)--; 1703 + 1655 1704 if (bma->cur == NULL) 1656 1705 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1657 1706 else { 1658 1707 rval = XFS_ILOG_CORE; 1659 - error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, 1660 - RIGHT.br_startblock, 1661 - RIGHT.br_blockcount, &i); 1708 + error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i); 1662 1709 if (error) 1663 1710 goto done; 1664 1711 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); ··· 1667 1720 if (error) 1668 1721 goto done; 1669 1722 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1670 - error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, 1671 - LEFT.br_startblock, 1672 - LEFT.br_blockcount + 1673 - PREV.br_blockcount + 1674 - RIGHT.br_blockcount, LEFT.br_state); 1723 + error = xfs_bmbt_update(bma->cur, &LEFT); 1675 1724 if (error) 1676 1725 goto done; 1677 1726 } ··· 1678 1735 * Filling in all of a previously delayed allocation extent. 1679 1736 * The left neighbor is contiguous, the right is not. 1680 1737 */ 1681 - bma->idx--; 1738 + old = LEFT; 1739 + LEFT.br_blockcount += PREV.br_blockcount; 1682 1740 1683 - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 1684 - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), 1685 - LEFT.br_blockcount + PREV.br_blockcount); 1686 - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); 1741 + xfs_iext_remove(bma->ip, &bma->icur, state); 1742 + xfs_iext_prev(ifp, &bma->icur); 1743 + xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); 1687 1744 1688 - xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); 1689 1745 if (bma->cur == NULL) 1690 1746 rval = XFS_ILOG_DEXT; 1691 1747 else { 1692 1748 rval = 0; 1693 - error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, 1694 - LEFT.br_startblock, LEFT.br_blockcount, 1695 - &i); 1749 + error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); 1696 1750 if (error) 1697 1751 goto done; 1698 1752 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1699 - error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, 1700 - LEFT.br_startblock, 1701 - LEFT.br_blockcount + 1702 - PREV.br_blockcount, LEFT.br_state); 1753 + error = xfs_bmbt_update(bma->cur, &LEFT); 1703 1754 if (error) 1704 1755 goto done; 1705 1756 } ··· 1704 1767 * Filling in all of a previously delayed allocation extent. 1705 1768 * The right neighbor is contiguous, the left is not. 1706 1769 */ 1707 - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 1708 - xfs_bmbt_set_startblock(ep, new->br_startblock); 1709 - xfs_bmbt_set_blockcount(ep, 1710 - PREV.br_blockcount + RIGHT.br_blockcount); 1711 - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); 1770 + PREV.br_startblock = new->br_startblock; 1771 + PREV.br_blockcount += RIGHT.br_blockcount; 1712 1772 1713 - xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); 1773 + xfs_iext_next(ifp, &bma->icur); 1774 + xfs_iext_remove(bma->ip, &bma->icur, state); 1775 + xfs_iext_prev(ifp, &bma->icur); 1776 + xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1777 + 1714 1778 if (bma->cur == NULL) 1715 1779 rval = XFS_ILOG_DEXT; 1716 1780 else { 1717 1781 rval = 0; 1718 - error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, 1719 - RIGHT.br_startblock, 1720 - RIGHT.br_blockcount, &i); 1782 + error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i); 1721 1783 if (error) 1722 1784 goto done; 1723 1785 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1724 - error = xfs_bmbt_update(bma->cur, PREV.br_startoff, 1725 - new->br_startblock, 1726 - PREV.br_blockcount + 1727 - RIGHT.br_blockcount, PREV.br_state); 1786 + error = xfs_bmbt_update(bma->cur, &PREV); 1728 1787 if (error) 1729 1788 goto done; 1730 1789 } ··· 1732 1799 * Neither the left nor right neighbors are contiguous with 1733 1800 * the new one. 1734 1801 */ 1735 - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 1736 - xfs_bmbt_set_startblock(ep, new->br_startblock); 1737 - xfs_bmbt_set_state(ep, new->br_state); 1738 - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); 1802 + PREV.br_startblock = new->br_startblock; 1803 + PREV.br_state = new->br_state; 1804 + xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1739 1805 1740 1806 (*nextents)++; 1741 1807 if (bma->cur == NULL) 1742 1808 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1743 1809 else { 1744 1810 rval = XFS_ILOG_CORE; 1745 - error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, 1746 - new->br_startblock, new->br_blockcount, 1747 - &i); 1811 + error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1748 1812 if (error) 1749 1813 goto done; 1750 1814 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1751 - bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; 1752 1815 error = xfs_btree_insert(bma->cur, &i); 1753 1816 if (error) 1754 1817 goto done; ··· 1757 1828 * Filling in the first part of a previous delayed allocation. 1758 1829 * The left neighbor is contiguous. 1759 1830 */ 1760 - trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_); 1761 - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1), 1762 - LEFT.br_blockcount + new->br_blockcount); 1763 - xfs_bmbt_set_startoff(ep, 1764 - PREV.br_startoff + new->br_blockcount); 1765 - trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_); 1766 - 1831 + old = LEFT; 1767 1832 temp = PREV.br_blockcount - new->br_blockcount; 1768 - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 1769 - xfs_bmbt_set_blockcount(ep, temp); 1833 + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1834 + startblockval(PREV.br_startblock)); 1835 + 1836 + LEFT.br_blockcount += new->br_blockcount; 1837 + 1838 + PREV.br_blockcount = temp; 1839 + PREV.br_startoff += new->br_blockcount; 1840 + PREV.br_startblock = nullstartblock(da_new); 1841 + 1842 + xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1843 + xfs_iext_prev(ifp, &bma->icur); 1844 + xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); 1845 + 1770 1846 if (bma->cur == NULL) 1771 1847 rval = XFS_ILOG_DEXT; 1772 1848 else { 1773 1849 rval = 0; 1774 - error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, 1775 - LEFT.br_startblock, LEFT.br_blockcount, 1776 - &i); 1850 + error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); 1777 1851 if (error) 1778 1852 goto done; 1779 1853 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1780 - error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, 1781 - LEFT.br_startblock, 1782 - LEFT.br_blockcount + 1783 - new->br_blockcount, 1784 - LEFT.br_state); 1854 + error = xfs_bmbt_update(bma->cur, &LEFT); 1785 1855 if (error) 1786 1856 goto done; 1787 1857 } 1788 - da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1789 - startblockval(PREV.br_startblock)); 1790 - xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); 1791 - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); 1792 - 1793 - bma->idx--; 1794 1858 break; 1795 1859 1796 1860 case BMAP_LEFT_FILLING: ··· 1791 1869 * Filling in the first part of a previous delayed allocation. 1792 1870 * The left neighbor is not contiguous. 1793 1871 */ 1794 - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 1795 - xfs_bmbt_set_startoff(ep, new_endoff); 1796 - temp = PREV.br_blockcount - new->br_blockcount; 1797 - xfs_bmbt_set_blockcount(ep, temp); 1798 - xfs_iext_insert(bma->ip, bma->idx, 1, new, state); 1872 + xfs_iext_update_extent(bma->ip, state, &bma->icur, new); 1799 1873 (*nextents)++; 1800 1874 if (bma->cur == NULL) 1801 1875 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1802 1876 else { 1803 1877 rval = XFS_ILOG_CORE; 1804 - error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, 1805 - new->br_startblock, new->br_blockcount, 1806 - &i); 1878 + error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1807 1879 if (error) 1808 1880 goto done; 1809 1881 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1810 - bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; 1811 1882 error = xfs_btree_insert(bma->cur, &i); 1812 1883 if (error) 1813 1884 goto done; ··· 1815 1900 if (error) 1816 1901 goto done; 1817 1902 } 1903 + 1904 + temp = PREV.br_blockcount - new->br_blockcount; 1818 1905 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1819 1906 startblockval(PREV.br_startblock) - 1820 1907 (bma->cur ? bma->cur->bc_private.b.allocated : 0)); 1821 - ep = xfs_iext_get_ext(ifp, bma->idx + 1); 1822 - xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); 1823 - trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); 1908 + 1909 + PREV.br_startoff = new_endoff; 1910 + PREV.br_blockcount = temp; 1911 + PREV.br_startblock = nullstartblock(da_new); 1912 + xfs_iext_next(ifp, &bma->icur); 1913 + xfs_iext_insert(bma->ip, &bma->icur, &PREV, state); 1914 + xfs_iext_prev(ifp, &bma->icur); 1824 1915 break; 1825 1916 1826 1917 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: ··· 1834 1913 * Filling in the last part of a previous delayed allocation. 1835 1914 * The right neighbor is contiguous with the new allocation. 1836 1915 */ 1837 - temp = PREV.br_blockcount - new->br_blockcount; 1838 - trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_); 1839 - xfs_bmbt_set_blockcount(ep, temp); 1840 - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1), 1841 - new->br_startoff, new->br_startblock, 1842 - new->br_blockcount + RIGHT.br_blockcount, 1843 - RIGHT.br_state); 1844 - trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); 1916 + old = RIGHT; 1917 + RIGHT.br_startoff = new->br_startoff; 1918 + RIGHT.br_startblock = new->br_startblock; 1919 + RIGHT.br_blockcount += new->br_blockcount; 1920 + 1845 1921 if (bma->cur == NULL) 1846 1922 rval = XFS_ILOG_DEXT; 1847 1923 else { 1848 1924 rval = 0; 1849 - error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, 1850 - RIGHT.br_startblock, 1851 - RIGHT.br_blockcount, &i); 1925 + error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); 1852 1926 if (error) 1853 1927 goto done; 1854 1928 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1855 - error = xfs_bmbt_update(bma->cur, new->br_startoff, 1856 - new->br_startblock, 1857 - new->br_blockcount + 1858 - RIGHT.br_blockcount, 1859 - RIGHT.br_state); 1929 + error = xfs_bmbt_update(bma->cur, &RIGHT); 1860 1930 if (error) 1861 1931 goto done; 1862 1932 } 1863 1933 1934 + temp = PREV.br_blockcount - new->br_blockcount; 1864 1935 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1865 1936 startblockval(PREV.br_startblock)); 1866 - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 1867 - xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); 1868 - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); 1869 1937 1870 - bma->idx++; 1938 + PREV.br_blockcount = temp; 1939 + PREV.br_startblock = nullstartblock(da_new); 1940 + 1941 + xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1942 + xfs_iext_next(ifp, &bma->icur); 1943 + xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT); 1871 1944 break; 1872 1945 1873 1946 case BMAP_RIGHT_FILLING: ··· 1869 1954 * Filling in the last part of a previous delayed allocation. 1870 1955 * The right neighbor is not contiguous. 1871 1956 */ 1872 - temp = PREV.br_blockcount - new->br_blockcount; 1873 - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 1874 - xfs_bmbt_set_blockcount(ep, temp); 1875 - xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state); 1957 + xfs_iext_update_extent(bma->ip, state, &bma->icur, new); 1876 1958 (*nextents)++; 1877 1959 if (bma->cur == NULL) 1878 1960 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1879 1961 else { 1880 1962 rval = XFS_ILOG_CORE; 1881 - error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, 1882 - new->br_startblock, new->br_blockcount, 1883 - &i); 1963 + error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1884 1964 if (error) 1885 1965 goto done; 1886 1966 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1887 - bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; 1888 1967 error = xfs_btree_insert(bma->cur, &i); 1889 1968 if (error) 1890 1969 goto done; ··· 1893 1984 if (error) 1894 1985 goto done; 1895 1986 } 1987 + 1988 + temp = PREV.br_blockcount - new->br_blockcount; 1896 1989 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1897 1990 startblockval(PREV.br_startblock) - 1898 1991 (bma->cur ? bma->cur->bc_private.b.allocated : 0)); 1899 - ep = xfs_iext_get_ext(ifp, bma->idx); 1900 - xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); 1901 - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); 1902 1992 1903 - bma->idx++; 1993 + PREV.br_startblock = nullstartblock(da_new); 1994 + PREV.br_blockcount = temp; 1995 + xfs_iext_insert(bma->ip, &bma->icur, &PREV, state); 1996 + xfs_iext_next(ifp, &bma->icur); 1904 1997 break; 1905 1998 1906 1999 case 0: ··· 1926 2015 * PREV @ idx LEFT RIGHT 1927 2016 * inserted at idx + 1 1928 2017 */ 1929 - temp = new->br_startoff - PREV.br_startoff; 1930 - temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; 1931 - trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_); 1932 - xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ 2018 + old = PREV; 2019 + 2020 + /* LEFT is the new middle */ 1933 2021 LEFT = *new; 2022 + 2023 + /* RIGHT is the new right */ 1934 2024 RIGHT.br_state = PREV.br_state; 1935 - RIGHT.br_startblock = nullstartblock( 1936 - (int)xfs_bmap_worst_indlen(bma->ip, temp2)); 1937 2025 RIGHT.br_startoff = new_endoff; 1938 - RIGHT.br_blockcount = temp2; 1939 - /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ 1940 - xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state); 2026 + RIGHT.br_blockcount = 2027 + PREV.br_startoff + PREV.br_blockcount - new_endoff; 2028 + RIGHT.br_startblock = 2029 + nullstartblock(xfs_bmap_worst_indlen(bma->ip, 2030 + RIGHT.br_blockcount)); 2031 + 2032 + /* truncate PREV */ 2033 + PREV.br_blockcount = new->br_startoff - PREV.br_startoff; 2034 + PREV.br_startblock = 2035 + nullstartblock(xfs_bmap_worst_indlen(bma->ip, 2036 + PREV.br_blockcount)); 2037 + xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 2038 + 2039 + xfs_iext_next(ifp, &bma->icur); 2040 + xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); 2041 + xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); 1941 2042 (*nextents)++; 2043 + 1942 2044 if (bma->cur == NULL) 1943 2045 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1944 2046 else { 1945 2047 rval = XFS_ILOG_CORE; 1946 - error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, 1947 - new->br_startblock, new->br_blockcount, 1948 - &i); 2048 + error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1949 2049 if (error) 1950 2050 goto done; 1951 2051 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1952 - bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; 1953 2052 error = xfs_btree_insert(bma->cur, &i); 1954 2053 if (error) 1955 2054 goto done; ··· 1974 2053 if (error) 1975 2054 goto done; 1976 2055 } 1977 - temp = xfs_bmap_worst_indlen(bma->ip, temp); 1978 - temp2 = xfs_bmap_worst_indlen(bma->ip, temp2); 1979 - diff = (int)(temp + temp2 - 1980 - (startblockval(PREV.br_startblock) - 1981 - (bma->cur ? 1982 - bma->cur->bc_private.b.allocated : 0))); 1983 - if (diff > 0) { 1984 - error = xfs_mod_fdblocks(bma->ip->i_mount, 1985 - -((int64_t)diff), false); 1986 - ASSERT(!error); 1987 - if (error) 1988 - goto done; 1989 - } 1990 2056 1991 - ep = xfs_iext_get_ext(ifp, bma->idx); 1992 - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 1993 - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); 1994 - trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_); 1995 - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2), 1996 - nullstartblock((int)temp2)); 1997 - trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_); 1998 - 1999 - bma->idx++; 2000 - da_new = temp + temp2; 2057 + da_new = startblockval(PREV.br_startblock) + 2058 + startblockval(RIGHT.br_startblock); 2001 2059 break; 2002 2060 2003 2061 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: ··· 2010 2110 goto done; 2011 2111 } 2012 2112 2013 - /* adjust for changes in reserved delayed indirect blocks */ 2014 - if (da_old || da_new) { 2015 - temp = da_new; 2016 - if (bma->cur) 2017 - temp += bma->cur->bc_private.b.allocated; 2018 - if (temp < da_old) 2019 - xfs_mod_fdblocks(bma->ip->i_mount, 2020 - (int64_t)(da_old - temp), false); 2113 + if (bma->cur) { 2114 + da_new += bma->cur->bc_private.b.allocated; 2115 + bma->cur->bc_private.b.allocated = 0; 2021 2116 } 2022 2117 2023 - /* clear out the allocated field, done with it now in any case. */ 2024 - if (bma->cur) 2025 - bma->cur->bc_private.b.allocated = 0; 2118 + /* adjust for changes in reserved delayed indirect blocks */ 2119 + if (da_new != da_old) { 2120 + ASSERT(state == 0 || da_new < da_old); 2121 + error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), 2122 + false); 2123 + } 2026 2124 2027 2125 xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); 2028 2126 done: ··· 2040 2142 struct xfs_trans *tp, 2041 2143 xfs_inode_t *ip, /* incore inode pointer */ 2042 2144 int whichfork, 2043 - xfs_extnum_t *idx, /* extent number to update/insert */ 2145 + struct xfs_iext_cursor *icur, 2044 2146 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 2045 2147 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 2046 2148 xfs_fsblock_t *first, /* pointer to firstblock variable */ ··· 2048 2150 int *logflagsp) /* inode logging flags */ 2049 2151 { 2050 2152 xfs_btree_cur_t *cur; /* btree cursor */ 2051 - xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ 2052 2153 int error; /* error return value */ 2053 2154 int i; /* temp state */ 2054 2155 xfs_ifork_t *ifp; /* inode fork pointer */ 2055 2156 xfs_fileoff_t new_endoff; /* end offset of new entry */ 2056 - xfs_exntst_t newext; /* new extent state */ 2057 - xfs_exntst_t oldext; /* old extent state */ 2058 2157 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ 2059 2158 /* left is 0, right is 1, prev is 2 */ 2060 2159 int rval=0; /* return value (logging flags) */ 2061 - int state = 0;/* state bits, accessed thru macros */ 2160 + int state = xfs_bmap_fork_to_state(whichfork); 2062 2161 struct xfs_mount *mp = ip->i_mount; 2162 + struct xfs_bmbt_irec old; 2063 2163 2064 2164 *logflagsp = 0; 2065 2165 2066 2166 cur = *curp; 2067 2167 ifp = XFS_IFORK_PTR(ip, whichfork); 2068 - if (whichfork == XFS_COW_FORK) 2069 - state |= BMAP_COWFORK; 2070 2168 2071 - ASSERT(*idx >= 0); 2072 - ASSERT(*idx <= xfs_iext_count(ifp)); 2073 2169 ASSERT(!isnullstartblock(new->br_startblock)); 2074 2170 2075 2171 XFS_STATS_INC(mp, xs_add_exlist); ··· 2076 2184 * Set up a bunch of variables to make the tests simpler. 2077 2185 */ 2078 2186 error = 0; 2079 - ep = xfs_iext_get_ext(ifp, *idx); 2080 - xfs_bmbt_get_all(ep, &PREV); 2081 - newext = new->br_state; 2082 - oldext = (newext == XFS_EXT_UNWRITTEN) ? 2083 - XFS_EXT_NORM : XFS_EXT_UNWRITTEN; 2084 - ASSERT(PREV.br_state == oldext); 2187 + xfs_iext_get_extent(ifp, icur, &PREV); 2188 + ASSERT(new->br_state != PREV.br_state); 2085 2189 new_endoff = new->br_startoff + new->br_blockcount; 2086 2190 ASSERT(PREV.br_startoff <= new->br_startoff); 2087 2191 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); ··· 2095 2207 * Check and set flags if this segment has a left neighbor. 2096 2208 * Don't set contiguous if the combined extent would be too large. 2097 2209 */ 2098 - if (*idx > 0) { 2210 + if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) { 2099 2211 state |= BMAP_LEFT_VALID; 2100 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT); 2101 - 2102 2212 if (isnullstartblock(LEFT.br_startblock)) 2103 2213 state |= BMAP_LEFT_DELAY; 2104 2214 } ··· 2104 2218 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && 2105 2219 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && 2106 2220 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && 2107 - LEFT.br_state == newext && 2221 + LEFT.br_state == new->br_state && 2108 2222 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) 2109 2223 state |= BMAP_LEFT_CONTIG; 2110 2224 ··· 2113 2227 * Don't set contiguous if the combined extent would be too large. 2114 2228 * Also check for all-three-contiguous being too large. 2115 2229 */ 2116 - if (*idx < xfs_iext_count(ifp) - 1) { 2230 + if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) { 2117 2231 state |= BMAP_RIGHT_VALID; 2118 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); 2119 2232 if (isnullstartblock(RIGHT.br_startblock)) 2120 2233 state |= BMAP_RIGHT_DELAY; 2121 2234 } ··· 2122 2237 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && 2123 2238 new_endoff == RIGHT.br_startoff && 2124 2239 new->br_startblock + new->br_blockcount == RIGHT.br_startblock && 2125 - newext == RIGHT.br_state && 2240 + new->br_state == RIGHT.br_state && 2126 2241 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && 2127 2242 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 2128 2243 BMAP_RIGHT_FILLING)) != ··· 2143 2258 * Setting all of a previous oldext extent to newext. 2144 2259 * The left and right neighbors are both contiguous with new. 2145 2260 */ 2146 - --*idx; 2261 + LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount; 2147 2262 2148 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2149 - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), 2150 - LEFT.br_blockcount + PREV.br_blockcount + 2151 - RIGHT.br_blockcount); 2152 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2153 - 2154 - xfs_iext_remove(ip, *idx + 1, 2, state); 2263 + xfs_iext_remove(ip, icur, state); 2264 + xfs_iext_remove(ip, icur, state); 2265 + xfs_iext_prev(ifp, icur); 2266 + xfs_iext_update_extent(ip, state, icur, &LEFT); 2155 2267 XFS_IFORK_NEXT_SET(ip, whichfork, 2156 2268 XFS_IFORK_NEXTENTS(ip, whichfork) - 2); 2157 2269 if (cur == NULL) 2158 2270 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2159 2271 else { 2160 2272 rval = XFS_ILOG_CORE; 2161 - if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, 2162 - RIGHT.br_startblock, 2163 - RIGHT.br_blockcount, &i))) 2273 + error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i); 2274 + if (error) 2164 2275 goto done; 2165 2276 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2166 2277 if ((error = xfs_btree_delete(cur, &i))) ··· 2171 2290 if ((error = xfs_btree_decrement(cur, 0, &i))) 2172 2291 goto done; 2173 2292 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2174 - if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 2175 - LEFT.br_startblock, 2176 - LEFT.br_blockcount + PREV.br_blockcount + 2177 - RIGHT.br_blockcount, LEFT.br_state))) 2293 + error = xfs_bmbt_update(cur, &LEFT); 2294 + if (error) 2178 2295 goto done; 2179 2296 } 2180 2297 break; ··· 2182 2303 * Setting all of a previous oldext extent to newext. 2183 2304 * The left neighbor is contiguous, the right is not. 2184 2305 */ 2185 - --*idx; 2306 + LEFT.br_blockcount += PREV.br_blockcount; 2186 2307 2187 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2188 - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), 2189 - LEFT.br_blockcount + PREV.br_blockcount); 2190 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2191 - 2192 - xfs_iext_remove(ip, *idx + 1, 1, state); 2308 + xfs_iext_remove(ip, icur, state); 2309 + xfs_iext_prev(ifp, icur); 2310 + xfs_iext_update_extent(ip, state, icur, &LEFT); 2193 2311 XFS_IFORK_NEXT_SET(ip, whichfork, 2194 2312 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 2195 2313 if (cur == NULL) 2196 2314 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2197 2315 else { 2198 2316 rval = XFS_ILOG_CORE; 2199 - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, 2200 - PREV.br_startblock, PREV.br_blockcount, 2201 - &i))) 2317 + error = xfs_bmbt_lookup_eq(cur, &PREV, &i); 2318 + if (error) 2202 2319 goto done; 2203 2320 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2204 2321 if ((error = xfs_btree_delete(cur, &i))) ··· 2203 2328 if ((error = xfs_btree_decrement(cur, 0, &i))) 2204 2329 goto done; 2205 2330 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2206 - if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 2207 - LEFT.br_startblock, 2208 - LEFT.br_blockcount + PREV.br_blockcount, 2209 - LEFT.br_state))) 2331 + error = xfs_bmbt_update(cur, &LEFT); 2332 + if (error) 2210 2333 goto done; 2211 2334 } 2212 2335 break; ··· 2214 2341 * Setting all of a previous oldext extent to newext. 2215 2342 * The right neighbor is contiguous, the left is not. 2216 2343 */ 2217 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2218 - xfs_bmbt_set_blockcount(ep, 2219 - PREV.br_blockcount + RIGHT.br_blockcount); 2220 - xfs_bmbt_set_state(ep, newext); 2221 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2222 - xfs_iext_remove(ip, *idx + 1, 1, state); 2344 + PREV.br_blockcount += RIGHT.br_blockcount; 2345 + PREV.br_state = new->br_state; 2346 + 2347 + xfs_iext_next(ifp, icur); 2348 + xfs_iext_remove(ip, icur, state); 2349 + xfs_iext_prev(ifp, icur); 2350 + xfs_iext_update_extent(ip, state, icur, &PREV); 2351 + 2223 2352 XFS_IFORK_NEXT_SET(ip, whichfork, 2224 2353 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 2225 2354 if (cur == NULL) 2226 2355 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2227 2356 else { 2228 2357 rval = XFS_ILOG_CORE; 2229 - if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, 2230 - RIGHT.br_startblock, 2231 - RIGHT.br_blockcount, &i))) 2358 + error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i); 2359 + if (error) 2232 2360 goto done; 2233 2361 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2234 2362 if ((error = xfs_btree_delete(cur, &i))) ··· 2238 2364 if ((error = xfs_btree_decrement(cur, 0, &i))) 2239 2365 goto done; 2240 2366 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2241 - if ((error = xfs_bmbt_update(cur, new->br_startoff, 2242 - new->br_startblock, 2243 - new->br_blockcount + RIGHT.br_blockcount, 2244 - newext))) 2367 + error = xfs_bmbt_update(cur, &PREV); 2368 + if (error) 2245 2369 goto done; 2246 2370 } 2247 2371 break; ··· 2250 2378 * Neither the left nor right neighbors are contiguous with 2251 2379 * the new one. 2252 2380 */ 2253 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2254 - xfs_bmbt_set_state(ep, newext); 2255 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2381 + PREV.br_state = new->br_state; 2382 + xfs_iext_update_extent(ip, state, icur, &PREV); 2256 2383 2257 2384 if (cur == NULL) 2258 2385 rval = XFS_ILOG_DEXT; 2259 2386 else { 2260 2387 rval = 0; 2261 - if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, 2262 - new->br_startblock, new->br_blockcount, 2263 - &i))) 2388 + error = xfs_bmbt_lookup_eq(cur, new, &i); 2389 + if (error) 2264 2390 goto done; 2265 2391 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2266 - if ((error = xfs_bmbt_update(cur, new->br_startoff, 2267 - new->br_startblock, new->br_blockcount, 2268 - newext))) 2392 + error = xfs_bmbt_update(cur, &PREV); 2393 + if (error) 2269 2394 goto done; 2270 2395 } 2271 2396 break; ··· 2272 2403 * Setting the first part of a previous oldext extent to newext. 2273 2404 * The left neighbor is contiguous. 2274 2405 */ 2275 - trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); 2276 - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), 2277 - LEFT.br_blockcount + new->br_blockcount); 2278 - xfs_bmbt_set_startoff(ep, 2279 - PREV.br_startoff + new->br_blockcount); 2280 - trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); 2406 + LEFT.br_blockcount += new->br_blockcount; 2281 2407 2282 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2283 - xfs_bmbt_set_startblock(ep, 2284 - new->br_startblock + new->br_blockcount); 2285 - xfs_bmbt_set_blockcount(ep, 2286 - PREV.br_blockcount - new->br_blockcount); 2287 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2408 + old = PREV; 2409 + PREV.br_startoff += new->br_blockcount; 2410 + PREV.br_startblock += new->br_blockcount; 2411 + PREV.br_blockcount -= new->br_blockcount; 2288 2412 2289 - --*idx; 2413 + xfs_iext_update_extent(ip, state, icur, &PREV); 2414 + xfs_iext_prev(ifp, icur); 2415 + xfs_iext_update_extent(ip, state, icur, &LEFT); 2290 2416 2291 2417 if (cur == NULL) 2292 2418 rval = XFS_ILOG_DEXT; 2293 2419 else { 2294 2420 rval = 0; 2295 - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, 2296 - PREV.br_startblock, PREV.br_blockcount, 2297 - &i))) 2421 + error = xfs_bmbt_lookup_eq(cur, &old, &i); 2422 + if (error) 2298 2423 goto done; 2299 2424 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2300 - if ((error = xfs_bmbt_update(cur, 2301 - PREV.br_startoff + new->br_blockcount, 2302 - PREV.br_startblock + new->br_blockcount, 2303 - PREV.br_blockcount - new->br_blockcount, 2304 - oldext))) 2425 + error = xfs_bmbt_update(cur, &PREV); 2426 + if (error) 2305 2427 goto done; 2306 - if ((error = xfs_btree_decrement(cur, 0, &i))) 2428 + error = xfs_btree_decrement(cur, 0, &i); 2429 + if (error) 2307 2430 goto done; 2308 - error = xfs_bmbt_update(cur, LEFT.br_startoff, 2309 - LEFT.br_startblock, 2310 - LEFT.br_blockcount + new->br_blockcount, 2311 - LEFT.br_state); 2431 + error = xfs_bmbt_update(cur, &LEFT); 2312 2432 if (error) 2313 2433 goto done; 2314 2434 } ··· 2308 2450 * Setting the first part of a previous oldext extent to newext. 2309 2451 * The left neighbor is not contiguous. 2310 2452 */ 2311 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2312 - ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); 2313 - xfs_bmbt_set_startoff(ep, new_endoff); 2314 - xfs_bmbt_set_blockcount(ep, 2315 - PREV.br_blockcount - new->br_blockcount); 2316 - xfs_bmbt_set_startblock(ep, 2317 - new->br_startblock + new->br_blockcount); 2318 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2453 + old = PREV; 2454 + PREV.br_startoff += new->br_blockcount; 2455 + PREV.br_startblock += new->br_blockcount; 2456 + PREV.br_blockcount -= new->br_blockcount; 2319 2457 2320 - xfs_iext_insert(ip, *idx, 1, new, state); 2458 + xfs_iext_update_extent(ip, state, icur, &PREV); 2459 + xfs_iext_insert(ip, icur, new, state); 2321 2460 XFS_IFORK_NEXT_SET(ip, whichfork, 2322 2461 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2323 2462 if (cur == NULL) 2324 2463 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2325 2464 else { 2326 2465 rval = XFS_ILOG_CORE; 2327 - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, 2328 - PREV.br_startblock, PREV.br_blockcount, 2329 - &i))) 2466 + error = xfs_bmbt_lookup_eq(cur, &old, &i); 2467 + if (error) 2330 2468 goto done; 2331 2469 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2332 - if ((error = xfs_bmbt_update(cur, 2333 - PREV.br_startoff + new->br_blockcount, 2334 - PREV.br_startblock + new->br_blockcount, 2335 - PREV.br_blockcount - new->br_blockcount, 2336 - oldext))) 2470 + error = xfs_bmbt_update(cur, &PREV); 2471 + if (error) 2337 2472 goto done; 2338 2473 cur->bc_rec.b = *new; 2339 2474 if ((error = xfs_btree_insert(cur, &i))) ··· 2340 2489 * Setting the last part of a previous oldext extent to newext. 2341 2490 * The right neighbor is contiguous with the new allocation. 2342 2491 */ 2343 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2344 - xfs_bmbt_set_blockcount(ep, 2345 - PREV.br_blockcount - new->br_blockcount); 2346 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2492 + old = PREV; 2493 + PREV.br_blockcount -= new->br_blockcount; 2347 2494 2348 - ++*idx; 2495 + RIGHT.br_startoff = new->br_startoff; 2496 + RIGHT.br_startblock = new->br_startblock; 2497 + RIGHT.br_blockcount += new->br_blockcount; 2349 2498 2350 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2351 - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), 2352 - new->br_startoff, new->br_startblock, 2353 - new->br_blockcount + RIGHT.br_blockcount, newext); 2354 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2499 + xfs_iext_update_extent(ip, state, icur, &PREV); 2500 + xfs_iext_next(ifp, icur); 2501 + xfs_iext_update_extent(ip, state, icur, &RIGHT); 2355 2502 2356 2503 if (cur == NULL) 2357 2504 rval = XFS_ILOG_DEXT; 2358 2505 else { 2359 2506 rval = 0; 2360 - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, 2361 - PREV.br_startblock, 2362 - PREV.br_blockcount, &i))) 2507 + error = xfs_bmbt_lookup_eq(cur, &old, &i); 2508 + if (error) 2363 2509 goto done; 2364 2510 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2365 - if ((error = xfs_bmbt_update(cur, PREV.br_startoff, 2366 - PREV.br_startblock, 2367 - PREV.br_blockcount - new->br_blockcount, 2368 - oldext))) 2511 + error = xfs_bmbt_update(cur, &PREV); 2512 + if (error) 2369 2513 goto done; 2370 - if ((error = xfs_btree_increment(cur, 0, &i))) 2514 + error = xfs_btree_increment(cur, 0, &i); 2515 + if (error) 2371 2516 goto done; 2372 - if ((error = xfs_bmbt_update(cur, new->br_startoff, 2373 - new->br_startblock, 2374 - new->br_blockcount + RIGHT.br_blockcount, 2375 - newext))) 2517 + error = xfs_bmbt_update(cur, &RIGHT); 2518 + if (error) 2376 2519 goto done; 2377 2520 } 2378 2521 break; ··· 2376 2531 * Setting the last part of a previous oldext extent to newext. 2377 2532 * The right neighbor is not contiguous. 2378 2533 */ 2379 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2380 - xfs_bmbt_set_blockcount(ep, 2381 - PREV.br_blockcount - new->br_blockcount); 2382 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2534 + old = PREV; 2535 + PREV.br_blockcount -= new->br_blockcount; 2383 2536 2384 - ++*idx; 2385 - xfs_iext_insert(ip, *idx, 1, new, state); 2537 + xfs_iext_update_extent(ip, state, icur, &PREV); 2538 + xfs_iext_next(ifp, icur); 2539 + xfs_iext_insert(ip, icur, new, state); 2386 2540 2387 2541 XFS_IFORK_NEXT_SET(ip, whichfork, 2388 2542 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); ··· 2389 2545 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2390 2546 else { 2391 2547 rval = XFS_ILOG_CORE; 2392 - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, 2393 - PREV.br_startblock, PREV.br_blockcount, 2394 - &i))) 2548 + error = xfs_bmbt_lookup_eq(cur, &old, &i); 2549 + if (error) 2395 2550 goto done; 2396 2551 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2397 - if ((error = xfs_bmbt_update(cur, PREV.br_startoff, 2398 - PREV.br_startblock, 2399 - PREV.br_blockcount - new->br_blockcount, 2400 - oldext))) 2552 + error = xfs_bmbt_update(cur, &PREV); 2553 + if (error) 2401 2554 goto done; 2402 - if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, 2403 - new->br_startblock, new->br_blockcount, 2404 - &i))) 2555 + error = xfs_bmbt_lookup_eq(cur, new, &i); 2556 + if (error) 2405 2557 goto done; 2406 2558 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 2407 - cur->bc_rec.b.br_state = XFS_EXT_NORM; 2408 2559 if ((error = xfs_btree_insert(cur, &i))) 2409 2560 goto done; 2410 2561 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); ··· 2412 2573 * newext. Contiguity is impossible here. 2413 2574 * One extent becomes three extents. 2414 2575 */ 2415 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2416 - xfs_bmbt_set_blockcount(ep, 2417 - new->br_startoff - PREV.br_startoff); 2418 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2576 + old = PREV; 2577 + PREV.br_blockcount = new->br_startoff - PREV.br_startoff; 2419 2578 2420 2579 r[0] = *new; 2421 2580 r[1].br_startoff = new_endoff; 2422 2581 r[1].br_blockcount = 2423 - PREV.br_startoff + PREV.br_blockcount - new_endoff; 2582 + old.br_startoff + old.br_blockcount - new_endoff; 2424 2583 r[1].br_startblock = new->br_startblock + new->br_blockcount; 2425 - r[1].br_state = oldext; 2584 + r[1].br_state = PREV.br_state; 2426 2585 2427 - ++*idx; 2428 - xfs_iext_insert(ip, *idx, 2, &r[0], state); 2586 + xfs_iext_update_extent(ip, state, icur, &PREV); 2587 + xfs_iext_next(ifp, icur); 2588 + xfs_iext_insert(ip, icur, &r[1], state); 2589 + xfs_iext_insert(ip, icur, &r[0], state); 2429 2590 2430 2591 XFS_IFORK_NEXT_SET(ip, whichfork, 2431 2592 XFS_IFORK_NEXTENTS(ip, whichfork) + 2); ··· 2433 2594 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2434 2595 else { 2435 2596 rval = XFS_ILOG_CORE; 2436 - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, 2437 - PREV.br_startblock, PREV.br_blockcount, 2438 - &i))) 2597 + error = xfs_bmbt_lookup_eq(cur, &old, &i); 2598 + if (error) 2439 2599 goto done; 2440 2600 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2441 2601 /* new right extent - oldext */ 2442 - if ((error = xfs_bmbt_update(cur, r[1].br_startoff, 2443 - r[1].br_startblock, r[1].br_blockcount, 2444 - r[1].br_state))) 2602 + error = xfs_bmbt_update(cur, &r[1]); 2603 + if (error) 2445 2604 goto done; 2446 2605 /* new left extent - oldext */ 2447 2606 cur->bc_rec.b = PREV; 2448 - cur->bc_rec.b.br_blockcount = 2449 - new->br_startoff - PREV.br_startoff; 2450 2607 if ((error = xfs_btree_insert(cur, &i))) 2451 2608 goto done; 2452 2609 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); ··· 2451 2616 * we are about to insert as we can't trust it after 2452 2617 * the previous insert. 2453 2618 */ 2454 - if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, 2455 - new->br_startblock, new->br_blockcount, 2456 - &i))) 2619 + error = xfs_bmbt_lookup_eq(cur, new, &i); 2620 + if (error) 2457 2621 goto done; 2458 2622 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 2459 2623 /* new middle extent - newext */ 2460 - cur->bc_rec.b.br_state = new->br_state; 2461 2624 if ((error = xfs_btree_insert(cur, &i))) 2462 2625 goto done; 2463 2626 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); ··· 2514 2681 xfs_bmap_add_extent_hole_delay( 2515 2682 xfs_inode_t *ip, /* incore inode pointer */ 2516 2683 int whichfork, 2517 - xfs_extnum_t *idx, /* extent number to update/insert */ 2684 + struct xfs_iext_cursor *icur, 2518 2685 xfs_bmbt_irec_t *new) /* new data to add to file extents */ 2519 2686 { 2520 2687 xfs_ifork_t *ifp; /* inode fork pointer */ ··· 2522 2689 xfs_filblks_t newlen=0; /* new indirect size */ 2523 2690 xfs_filblks_t oldlen=0; /* old indirect size */ 2524 2691 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 2525 - int state; /* state bits, accessed thru macros */ 2526 - xfs_filblks_t temp=0; /* temp for indirect calculations */ 2692 + int state = xfs_bmap_fork_to_state(whichfork); 2693 + xfs_filblks_t temp; /* temp for indirect calculations */ 2527 2694 2528 2695 ifp = XFS_IFORK_PTR(ip, whichfork); 2529 - state = 0; 2530 - if (whichfork == XFS_COW_FORK) 2531 - state |= BMAP_COWFORK; 2532 2696 ASSERT(isnullstartblock(new->br_startblock)); 2533 2697 2534 2698 /* 2535 2699 * Check and set flags if this segment has a left neighbor 2536 2700 */ 2537 - if (*idx > 0) { 2701 + if (xfs_iext_peek_prev_extent(ifp, icur, &left)) { 2538 2702 state |= BMAP_LEFT_VALID; 2539 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); 2540 - 2541 2703 if (isnullstartblock(left.br_startblock)) 2542 2704 state |= BMAP_LEFT_DELAY; 2543 2705 } ··· 2541 2713 * Check and set flags if the current (right) segment exists. 2542 2714 * If it doesn't exist, we're converting the hole at end-of-file. 2543 2715 */ 2544 - if (*idx < xfs_iext_count(ifp)) { 2716 + if (xfs_iext_get_extent(ifp, icur, &right)) { 2545 2717 state |= BMAP_RIGHT_VALID; 2546 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); 2547 - 2548 2718 if (isnullstartblock(right.br_startblock)) 2549 2719 state |= BMAP_RIGHT_DELAY; 2550 2720 } ··· 2574 2748 * on the left and on the right. 2575 2749 * Merge all three into a single extent record. 2576 2750 */ 2577 - --*idx; 2578 2751 temp = left.br_blockcount + new->br_blockcount + 2579 2752 right.br_blockcount; 2580 2753 2581 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2582 - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); 2583 2754 oldlen = startblockval(left.br_startblock) + 2584 2755 startblockval(new->br_startblock) + 2585 2756 startblockval(right.br_startblock); 2586 2757 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2587 2758 oldlen); 2588 - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), 2589 - nullstartblock((int)newlen)); 2590 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2759 + left.br_startblock = nullstartblock(newlen); 2760 + left.br_blockcount = temp; 2591 2761 2592 - xfs_iext_remove(ip, *idx + 1, 1, state); 2762 + xfs_iext_remove(ip, icur, state); 2763 + xfs_iext_prev(ifp, icur); 2764 + xfs_iext_update_extent(ip, state, icur, &left); 2593 2765 break; 2594 2766 2595 2767 case BMAP_LEFT_CONTIG: ··· 2596 2772 * on the left. 2597 2773 * Merge the new allocation with the left neighbor. 2598 2774 */ 2599 - --*idx; 2600 2775 temp = left.br_blockcount + new->br_blockcount; 2601 2776 2602 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2603 - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); 2604 2777 oldlen = startblockval(left.br_startblock) + 2605 2778 startblockval(new->br_startblock); 2606 2779 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2607 2780 oldlen); 2608 - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), 2609 - nullstartblock((int)newlen)); 2610 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2781 + left.br_blockcount = temp; 2782 + left.br_startblock = nullstartblock(newlen); 2783 + 2784 + xfs_iext_prev(ifp, icur); 2785 + xfs_iext_update_extent(ip, state, icur, &left); 2611 2786 break; 2612 2787 2613 2788 case BMAP_RIGHT_CONTIG: ··· 2615 2792 * on the right. 2616 2793 * Merge the new allocation with the right neighbor. 2617 2794 */ 2618 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2619 2795 temp = new->br_blockcount + right.br_blockcount; 2620 2796 oldlen = startblockval(new->br_startblock) + 2621 2797 startblockval(right.br_startblock); 2622 2798 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2623 2799 oldlen); 2624 - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), 2625 - new->br_startoff, 2626 - nullstartblock((int)newlen), temp, right.br_state); 2627 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2800 + right.br_startoff = new->br_startoff; 2801 + right.br_startblock = nullstartblock(newlen); 2802 + right.br_blockcount = temp; 2803 + xfs_iext_update_extent(ip, state, icur, &right); 2628 2804 break; 2629 2805 2630 2806 case 0: ··· 2633 2811 * Insert a new entry. 2634 2812 */ 2635 2813 oldlen = newlen = 0; 2636 - xfs_iext_insert(ip, *idx, 1, new, state); 2814 + xfs_iext_insert(ip, icur, new, state); 2637 2815 break; 2638 2816 } 2639 2817 if (oldlen != newlen) { ··· 2654 2832 struct xfs_trans *tp, 2655 2833 struct xfs_inode *ip, 2656 2834 int whichfork, 2657 - xfs_extnum_t *idx, 2835 + struct xfs_iext_cursor *icur, 2658 2836 struct xfs_btree_cur **curp, 2659 2837 struct xfs_bmbt_irec *new, 2660 2838 xfs_fsblock_t *first, ··· 2669 2847 xfs_bmbt_irec_t left; /* left neighbor extent entry */ 2670 2848 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 2671 2849 int rval=0; /* return value (logging flags) */ 2672 - int state; /* state bits, accessed thru macros */ 2850 + int state = xfs_bmap_fork_to_state(whichfork); 2851 + struct xfs_bmbt_irec old; 2673 2852 2674 - ASSERT(*idx >= 0); 2675 - ASSERT(*idx <= xfs_iext_count(ifp)); 2676 2853 ASSERT(!isnullstartblock(new->br_startblock)); 2677 2854 ASSERT(!cur || !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); 2678 2855 2679 2856 XFS_STATS_INC(mp, xs_add_exlist); 2680 2857 2681 - state = 0; 2682 - if (whichfork == XFS_ATTR_FORK) 2683 - state |= BMAP_ATTRFORK; 2684 - if (whichfork == XFS_COW_FORK) 2685 - state |= BMAP_COWFORK; 2686 - 2687 2858 /* 2688 2859 * Check and set flags if this segment has a left neighbor. 2689 2860 */ 2690 - if (*idx > 0) { 2861 + if (xfs_iext_peek_prev_extent(ifp, icur, &left)) { 2691 2862 state |= BMAP_LEFT_VALID; 2692 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); 2693 2863 if (isnullstartblock(left.br_startblock)) 2694 2864 state |= BMAP_LEFT_DELAY; 2695 2865 } ··· 2690 2876 * Check and set flags if this segment has a current value. 2691 2877 * Not true if we're inserting into the "hole" at eof. 2692 2878 */ 2693 - if (*idx < xfs_iext_count(ifp)) { 2879 + if (xfs_iext_get_extent(ifp, icur, &right)) { 2694 2880 state |= BMAP_RIGHT_VALID; 2695 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); 2696 2881 if (isnullstartblock(right.br_startblock)) 2697 2882 state |= BMAP_RIGHT_DELAY; 2698 2883 } ··· 2728 2915 * left and on the right. 2729 2916 * Merge all three into a single extent record. 2730 2917 */ 2731 - --*idx; 2732 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2733 - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), 2734 - left.br_blockcount + new->br_blockcount + 2735 - right.br_blockcount); 2736 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2918 + left.br_blockcount += new->br_blockcount + right.br_blockcount; 2737 2919 2738 - xfs_iext_remove(ip, *idx + 1, 1, state); 2920 + xfs_iext_remove(ip, icur, state); 2921 + xfs_iext_prev(ifp, icur); 2922 + xfs_iext_update_extent(ip, state, icur, &left); 2739 2923 2740 2924 XFS_IFORK_NEXT_SET(ip, whichfork, 2741 2925 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); ··· 2740 2930 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 2741 2931 } else { 2742 2932 rval = XFS_ILOG_CORE; 2743 - error = xfs_bmbt_lookup_eq(cur, right.br_startoff, 2744 - right.br_startblock, right.br_blockcount, 2745 - &i); 2933 + error = xfs_bmbt_lookup_eq(cur, &right, &i); 2746 2934 if (error) 2747 2935 goto done; 2748 2936 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); ··· 2752 2944 if (error) 2753 2945 goto done; 2754 2946 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2755 - error = xfs_bmbt_update(cur, left.br_startoff, 2756 - left.br_startblock, 2757 - left.br_blockcount + 2758 - new->br_blockcount + 2759 - right.br_blockcount, 2760 - left.br_state); 2947 + error = xfs_bmbt_update(cur, &left); 2761 2948 if (error) 2762 2949 goto done; 2763 2950 } ··· 2764 2961 * on the left. 2765 2962 * Merge the new allocation with the left neighbor. 2766 2963 */ 2767 - --*idx; 2768 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2769 - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), 2770 - left.br_blockcount + new->br_blockcount); 2771 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2964 + old = left; 2965 + left.br_blockcount += new->br_blockcount; 2966 + 2967 + xfs_iext_prev(ifp, icur); 2968 + xfs_iext_update_extent(ip, state, icur, &left); 2772 2969 2773 2970 if (cur == NULL) { 2774 2971 rval = xfs_ilog_fext(whichfork); 2775 2972 } else { 2776 2973 rval = 0; 2777 - error = xfs_bmbt_lookup_eq(cur, left.br_startoff, 2778 - left.br_startblock, left.br_blockcount, 2779 - &i); 2974 + error = xfs_bmbt_lookup_eq(cur, &old, &i); 2780 2975 if (error) 2781 2976 goto done; 2782 2977 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2783 - error = xfs_bmbt_update(cur, left.br_startoff, 2784 - left.br_startblock, 2785 - left.br_blockcount + 2786 - new->br_blockcount, 2787 - left.br_state); 2978 + error = xfs_bmbt_update(cur, &left); 2788 2979 if (error) 2789 2980 goto done; 2790 2981 } ··· 2790 2993 * on the right. 2791 2994 * Merge the new allocation with the right neighbor. 2792 2995 */ 2793 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2794 - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), 2795 - new->br_startoff, new->br_startblock, 2796 - new->br_blockcount + right.br_blockcount, 2797 - right.br_state); 2798 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2996 + old = right; 2997 + 2998 + right.br_startoff = new->br_startoff; 2999 + right.br_startblock = new->br_startblock; 3000 + right.br_blockcount += new->br_blockcount; 3001 + xfs_iext_update_extent(ip, state, icur, &right); 2799 3002 2800 3003 if (cur == NULL) { 2801 3004 rval = xfs_ilog_fext(whichfork); 2802 3005 } else { 2803 3006 rval = 0; 2804 - error = xfs_bmbt_lookup_eq(cur, 2805 - right.br_startoff, 2806 - right.br_startblock, 2807 - right.br_blockcount, &i); 3007 + error = xfs_bmbt_lookup_eq(cur, &old, &i); 2808 3008 if (error) 2809 3009 goto done; 2810 3010 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2811 - error = xfs_bmbt_update(cur, new->br_startoff, 2812 - new->br_startblock, 2813 - new->br_blockcount + 2814 - right.br_blockcount, 2815 - right.br_state); 3011 + error = xfs_bmbt_update(cur, &right); 2816 3012 if (error) 2817 3013 goto done; 2818 3014 } ··· 2817 3027 * real allocation. 2818 3028 * Insert a new entry. 2819 3029 */ 2820 - xfs_iext_insert(ip, *idx, 1, new, state); 3030 + xfs_iext_insert(ip, icur, new, state); 2821 3031 XFS_IFORK_NEXT_SET(ip, whichfork, 2822 3032 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2823 3033 if (cur == NULL) { 2824 3034 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 2825 3035 } else { 2826 3036 rval = XFS_ILOG_CORE; 2827 - error = xfs_bmbt_lookup_eq(cur, 2828 - new->br_startoff, 2829 - new->br_startblock, 2830 - new->br_blockcount, &i); 3037 + error = xfs_bmbt_lookup_eq(cur, new, &i); 2831 3038 if (error) 2832 3039 goto done; 2833 3040 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 2834 - cur->bc_rec.b.br_state = new->br_state; 2835 3041 error = xfs_btree_insert(cur, &i); 2836 3042 if (error) 2837 3043 goto done; ··· 3767 3981 struct xfs_bmbt_irec got; 3768 3982 xfs_fileoff_t obno; 3769 3983 xfs_fileoff_t end; 3770 - xfs_extnum_t idx; 3984 + struct xfs_iext_cursor icur; 3771 3985 int error; 3772 3986 bool eof = false; 3773 3987 int n = 0; ··· 3809 4023 return error; 3810 4024 } 3811 4025 3812 - if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) 4026 + if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) 3813 4027 eof = true; 3814 4028 end = bno + len; 3815 4029 obno = bno; ··· 3841 4055 break; 3842 4056 3843 4057 /* Else go on to the next record. */ 3844 - if (!xfs_iext_get_extent(ifp, ++idx, &got)) 4058 + if (!xfs_iext_next_extent(ifp, &icur, &got)) 3845 4059 eof = true; 3846 4060 } 3847 4061 *nmap = n; ··· 3869 4083 xfs_filblks_t len, 3870 4084 xfs_filblks_t prealloc, 3871 4085 struct xfs_bmbt_irec *got, 3872 - xfs_extnum_t *lastx, 4086 + struct xfs_iext_cursor *icur, 3873 4087 int eof) 3874 4088 { 3875 4089 struct xfs_mount *mp = ip->i_mount; ··· 3899 4113 if (extsz) { 3900 4114 struct xfs_bmbt_irec prev; 3901 4115 3902 - if (!xfs_iext_get_extent(ifp, *lastx - 1, &prev)) 4116 + if (!xfs_iext_peek_prev_extent(ifp, icur, &prev)) 3903 4117 prev.br_startoff = NULLFILEOFF; 3904 4118 3905 4119 error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof, ··· 3948 4162 got->br_blockcount = alen; 3949 4163 got->br_state = XFS_EXT_NORM; 3950 4164 3951 - xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); 4165 + xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got); 3952 4166 3953 4167 /* 3954 4168 * Tag the inode if blocks were preallocated. Note that COW fork ··· 3993 4207 if (bma->wasdel) { 3994 4208 bma->length = (xfs_extlen_t)bma->got.br_blockcount; 3995 4209 bma->offset = bma->got.br_startoff; 3996 - if (bma->idx) { 3997 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), 3998 - &bma->prev); 3999 - } 4210 + xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev); 4000 4211 } else { 4001 4212 bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN); 4002 4213 if (!bma->eof) ··· 4078 4295 error = xfs_bmap_add_extent_delay_real(bma, whichfork); 4079 4296 else 4080 4297 error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip, 4081 - whichfork, &bma->idx, &bma->cur, &bma->got, 4298 + whichfork, &bma->icur, &bma->cur, &bma->got, 4082 4299 bma->firstblock, bma->dfops, &bma->logflags); 4083 4300 4084 4301 bma->logflags |= tmp_logflags; ··· 4090 4307 * or xfs_bmap_add_extent_hole_real might have merged it into one of 4091 4308 * the neighbouring ones. 4092 4309 */ 4093 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got); 4310 + xfs_iext_get_extent(ifp, &bma->icur, &bma->got); 4094 4311 4095 4312 ASSERT(bma->got.br_startoff <= bma->offset); 4096 4313 ASSERT(bma->got.br_startoff + bma->got.br_blockcount >= ··· 4148 4365 } 4149 4366 4150 4367 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork, 4151 - &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops, 4152 - &tmp_logflags); 4368 + &bma->icur, &bma->cur, mval, bma->firstblock, 4369 + bma->dfops, &tmp_logflags); 4153 4370 /* 4154 4371 * Log the inode core unconditionally in the unwritten extent conversion 4155 4372 * path because the conversion might not have done so (e.g., if the ··· 4171 4388 * xfs_bmap_add_extent_unwritten_real might have merged it into one 4172 4389 * of the neighbouring ones. 4173 4390 */ 4174 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got); 4391 + xfs_iext_get_extent(ifp, &bma->icur, &bma->got); 4175 4392 4176 4393 /* 4177 4394 * We may have combined previously unwritten space with written space, ··· 4290 4507 end = bno + len; 4291 4508 obno = bno; 4292 4509 4293 - if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.idx, &bma.got)) 4510 + if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got)) 4294 4511 eof = true; 4295 - if (!xfs_iext_get_extent(ifp, bma.idx - 1, &bma.prev)) 4512 + if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev)) 4296 4513 bma.prev.br_startoff = NULLFILEOFF; 4297 4514 bma.tp = tp; 4298 4515 bma.ip = ip; ··· 4334 4551 * First, deal with the hole before the allocated space 4335 4552 * that we found, if any. 4336 4553 */ 4337 - if (need_alloc || wasdelay) { 4554 + if ((need_alloc || wasdelay) && 4555 + !(flags & XFS_BMAPI_CONVERT_ONLY)) { 4338 4556 bma.eof = eof; 4339 4557 bma.conv = !!(flags & XFS_BMAPI_CONVERT); 4340 4558 bma.wasdel = wasdelay; ··· 4398 4614 4399 4615 /* Else go on to the next record. */ 4400 4616 bma.prev = bma.got; 4401 - if (!xfs_iext_get_extent(ifp, ++bma.idx, &bma.got)) 4617 + if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got)) 4402 4618 eof = true; 4403 4619 } 4404 4620 *nmap = n; ··· 4471 4687 struct xfs_btree_cur *cur = NULL; 4472 4688 xfs_fsblock_t firstblock = NULLFSBLOCK; 4473 4689 struct xfs_bmbt_irec got; 4474 - xfs_extnum_t idx; 4690 + struct xfs_iext_cursor icur; 4475 4691 int logflags = 0, error; 4476 4692 4477 4693 ASSERT(len > 0); ··· 4495 4711 return error; 4496 4712 } 4497 4713 4498 - if (xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) { 4714 + if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) { 4499 4715 /* make sure we only reflink into a hole. */ 4500 4716 ASSERT(got.br_startoff > bno); 4501 4717 ASSERT(got.br_startoff - bno >= len); ··· 4516 4732 got.br_blockcount = len; 4517 4733 got.br_state = XFS_EXT_NORM; 4518 4734 4519 - error = xfs_bmap_add_extent_hole_real(tp, ip, XFS_DATA_FORK, &idx, &cur, 4520 - &got, &firstblock, dfops, &logflags); 4735 + error = xfs_bmap_add_extent_hole_real(tp, ip, XFS_DATA_FORK, &icur, 4736 + &cur, &got, &firstblock, dfops, &logflags); 4521 4737 if (error) 4522 4738 goto error0; 4523 4739 ··· 4633 4849 xfs_bmap_del_extent_delay( 4634 4850 struct xfs_inode *ip, 4635 4851 int whichfork, 4636 - xfs_extnum_t *idx, 4852 + struct xfs_iext_cursor *icur, 4637 4853 struct xfs_bmbt_irec *got, 4638 4854 struct xfs_bmbt_irec *del) 4639 4855 { ··· 4643 4859 int64_t da_old, da_new, da_diff = 0; 4644 4860 xfs_fileoff_t del_endoff, got_endoff; 4645 4861 xfs_filblks_t got_indlen, new_indlen, stolen; 4646 - int error = 0, state = 0; 4862 + int state = xfs_bmap_fork_to_state(whichfork); 4863 + int error = 0; 4647 4864 bool isrt; 4648 4865 4649 4866 XFS_STATS_INC(mp, xs_del_exlist); ··· 4655 4870 da_old = startblockval(got->br_startblock); 4656 4871 da_new = 0; 4657 4872 4658 - ASSERT(*idx >= 0); 4659 - ASSERT(*idx <= xfs_iext_count(ifp)); 4660 4873 ASSERT(del->br_blockcount > 0); 4661 4874 ASSERT(got->br_startoff <= del->br_startoff); 4662 4875 ASSERT(got_endoff >= del_endoff); ··· 4678 4895 return error; 4679 4896 ip->i_delayed_blks -= del->br_blockcount; 4680 4897 4681 - if (whichfork == XFS_COW_FORK) 4682 - state |= BMAP_COWFORK; 4683 - 4684 4898 if (got->br_startoff == del->br_startoff) 4685 - state |= BMAP_LEFT_CONTIG; 4899 + state |= BMAP_LEFT_FILLING; 4686 4900 if (got_endoff == del_endoff) 4687 - state |= BMAP_RIGHT_CONTIG; 4901 + state |= BMAP_RIGHT_FILLING; 4688 4902 4689 - switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { 4690 - case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 4903 + switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { 4904 + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 4691 4905 /* 4692 4906 * Matches the whole extent. Delete the entry. 4693 4907 */ 4694 - xfs_iext_remove(ip, *idx, 1, state); 4695 - --*idx; 4908 + xfs_iext_remove(ip, icur, state); 4909 + xfs_iext_prev(ifp, icur); 4696 4910 break; 4697 - case BMAP_LEFT_CONTIG: 4911 + case BMAP_LEFT_FILLING: 4698 4912 /* 4699 4913 * Deleting the first part of the extent. 4700 4914 */ 4701 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 4702 4915 got->br_startoff = del_endoff; 4703 4916 got->br_blockcount -= del->br_blockcount; 4704 4917 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, 4705 4918 got->br_blockcount), da_old); 4706 4919 got->br_startblock = nullstartblock((int)da_new); 4707 - xfs_iext_update_extent(ifp, *idx, got); 4708 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 4920 + xfs_iext_update_extent(ip, state, icur, got); 4709 4921 break; 4710 - case BMAP_RIGHT_CONTIG: 4922 + case BMAP_RIGHT_FILLING: 4711 4923 /* 4712 4924 * Deleting the last part of the extent. 4713 4925 */ 4714 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 4715 4926 got->br_blockcount = got->br_blockcount - del->br_blockcount; 4716 4927 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, 4717 4928 got->br_blockcount), da_old); 4718 4929 got->br_startblock = nullstartblock((int)da_new); 4719 - xfs_iext_update_extent(ifp, *idx, got); 4720 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 4930 + xfs_iext_update_extent(ip, state, icur, got); 4721 4931 break; 4722 4932 case 0: 4723 4933 /* ··· 4722 4946 * Warn if either of the new indlen reservations is zero as this 4723 4947 * can lead to delalloc problems. 4724 4948 */ 4725 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 4726 - 4727 4949 got->br_blockcount = del->br_startoff - got->br_startoff; 4728 4950 got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount); 4729 4951 ··· 4733 4959 del->br_blockcount); 4734 4960 4735 4961 got->br_startblock = nullstartblock((int)got_indlen); 4736 - xfs_iext_update_extent(ifp, *idx, got); 4737 - trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_); 4738 4962 4739 4963 new.br_startoff = del_endoff; 4740 4964 new.br_state = got->br_state; 4741 4965 new.br_startblock = nullstartblock((int)new_indlen); 4742 4966 4743 - ++*idx; 4744 - xfs_iext_insert(ip, *idx, 1, &new, state); 4967 + xfs_iext_update_extent(ip, state, icur, got); 4968 + xfs_iext_next(ifp, icur); 4969 + xfs_iext_insert(ip, icur, &new, state); 4745 4970 4746 4971 da_new = got_indlen + new_indlen - stolen; 4747 4972 del->br_blockcount -= stolen; ··· 4759 4986 void 4760 4987 xfs_bmap_del_extent_cow( 4761 4988 struct xfs_inode *ip, 4762 - xfs_extnum_t *idx, 4989 + struct xfs_iext_cursor *icur, 4763 4990 struct xfs_bmbt_irec *got, 4764 4991 struct xfs_bmbt_irec *del) 4765 4992 { ··· 4774 5001 del_endoff = del->br_startoff + del->br_blockcount; 4775 5002 got_endoff = got->br_startoff + got->br_blockcount; 4776 5003 4777 - ASSERT(*idx >= 0); 4778 - ASSERT(*idx <= xfs_iext_count(ifp)); 4779 5004 ASSERT(del->br_blockcount > 0); 4780 5005 ASSERT(got->br_startoff <= del->br_startoff); 4781 5006 ASSERT(got_endoff >= del_endoff); 4782 5007 ASSERT(!isnullstartblock(got->br_startblock)); 4783 5008 4784 5009 if (got->br_startoff == del->br_startoff) 4785 - state |= BMAP_LEFT_CONTIG; 5010 + state |= BMAP_LEFT_FILLING; 4786 5011 if (got_endoff == del_endoff) 4787 - state |= BMAP_RIGHT_CONTIG; 5012 + state |= BMAP_RIGHT_FILLING; 4788 5013 4789 - switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { 4790 - case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 5014 + switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { 5015 + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 4791 5016 /* 4792 5017 * Matches the whole extent. Delete the entry. 4793 5018 */ 4794 - xfs_iext_remove(ip, *idx, 1, state); 4795 - --*idx; 5019 + xfs_iext_remove(ip, icur, state); 5020 + xfs_iext_prev(ifp, icur); 4796 5021 break; 4797 - case BMAP_LEFT_CONTIG: 5022 + case BMAP_LEFT_FILLING: 4798 5023 /* 4799 5024 * Deleting the first part of the extent. 4800 5025 */ 4801 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 4802 5026 got->br_startoff = del_endoff; 4803 5027 got->br_blockcount -= del->br_blockcount; 4804 5028 got->br_startblock = del->br_startblock + del->br_blockcount; 4805 - xfs_iext_update_extent(ifp, *idx, got); 4806 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 5029 + xfs_iext_update_extent(ip, state, icur, got); 4807 5030 break; 4808 - case BMAP_RIGHT_CONTIG: 5031 + case BMAP_RIGHT_FILLING: 4809 5032 /* 4810 5033 * Deleting the last part of the extent. 4811 5034 */ 4812 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 4813 5035 got->br_blockcount -= del->br_blockcount; 4814 - xfs_iext_update_extent(ifp, *idx, got); 4815 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 5036 + xfs_iext_update_extent(ip, state, icur, got); 4816 5037 break; 4817 5038 case 0: 4818 5039 /* 4819 5040 * Deleting the middle of the extent. 4820 5041 */ 4821 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 4822 5042 got->br_blockcount = del->br_startoff - got->br_startoff; 4823 - xfs_iext_update_extent(ifp, *idx, got); 4824 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 4825 5043 4826 5044 new.br_startoff = del_endoff; 4827 5045 new.br_blockcount = got_endoff - del_endoff; 4828 5046 new.br_state = got->br_state; 4829 5047 new.br_startblock = del->br_startblock + del->br_blockcount; 4830 5048 4831 - ++*idx; 4832 - xfs_iext_insert(ip, *idx, 1, &new, state); 5049 + xfs_iext_update_extent(ip, state, icur, got); 5050 + xfs_iext_next(ifp, icur); 5051 + xfs_iext_insert(ip, icur, &new, state); 4833 5052 break; 4834 5053 } 4835 5054 } 4836 5055 4837 5056 /* 4838 5057 * Called by xfs_bmapi to update file extent records and the btree 4839 - * after removing space (or undoing a delayed allocation). 5058 + * after removing space. 4840 5059 */ 4841 5060 STATIC int /* error */ 4842 - xfs_bmap_del_extent( 5061 + xfs_bmap_del_extent_real( 4843 5062 xfs_inode_t *ip, /* incore inode pointer */ 4844 5063 xfs_trans_t *tp, /* current transaction pointer */ 4845 - xfs_extnum_t *idx, /* extent number to update/delete */ 5064 + struct xfs_iext_cursor *icur, 4846 5065 struct xfs_defer_ops *dfops, /* list of extents to be freed */ 4847 5066 xfs_btree_cur_t *cur, /* if null, not a btree */ 4848 5067 xfs_bmbt_irec_t *del, /* data to remove from extents */ ··· 4842 5077 int whichfork, /* data or attr fork */ 4843 5078 int bflags) /* bmapi flags */ 4844 5079 { 4845 - xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ 4846 - xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ 4847 5080 xfs_fsblock_t del_endblock=0; /* first block past del */ 4848 5081 xfs_fileoff_t del_endoff; /* first offset past del */ 4849 - int delay; /* current block is delayed allocated */ 4850 5082 int do_fx; /* free extent at end of routine */ 4851 - xfs_bmbt_rec_host_t *ep; /* current extent entry pointer */ 4852 5083 int error; /* error return value */ 4853 - int flags; /* inode logging flags */ 4854 - xfs_bmbt_irec_t got; /* current extent entry */ 5084 + int flags = 0;/* inode logging flags */ 5085 + struct xfs_bmbt_irec got; /* current extent entry */ 4855 5086 xfs_fileoff_t got_endoff; /* first offset past got */ 4856 5087 int i; /* temp state */ 4857 5088 xfs_ifork_t *ifp; /* inode fork pointer */ ··· 4856 5095 xfs_bmbt_irec_t new; /* new record to be inserted */ 4857 5096 /* REFERENCED */ 4858 5097 uint qfield; /* quota field to update */ 4859 - xfs_filblks_t temp; /* for indirect length calculations */ 4860 - xfs_filblks_t temp2; /* for indirect length calculations */ 4861 - int state = 0; 5098 + int state = xfs_bmap_fork_to_state(whichfork); 5099 + struct xfs_bmbt_irec old; 4862 5100 4863 5101 mp = ip->i_mount; 4864 5102 XFS_STATS_INC(mp, xs_del_exlist); 4865 5103 4866 - if (whichfork == XFS_ATTR_FORK) 4867 - state |= BMAP_ATTRFORK; 4868 - else if (whichfork == XFS_COW_FORK) 4869 - state |= BMAP_COWFORK; 4870 - 4871 5104 ifp = XFS_IFORK_PTR(ip, whichfork); 4872 - ASSERT((*idx >= 0) && (*idx < xfs_iext_count(ifp))); 4873 5105 ASSERT(del->br_blockcount > 0); 4874 - ep = xfs_iext_get_ext(ifp, *idx); 4875 - xfs_bmbt_get_all(ep, &got); 5106 + xfs_iext_get_extent(ifp, icur, &got); 4876 5107 ASSERT(got.br_startoff <= del->br_startoff); 4877 5108 del_endoff = del->br_startoff + del->br_blockcount; 4878 5109 got_endoff = got.br_startoff + got.br_blockcount; 4879 5110 ASSERT(got_endoff >= del_endoff); 4880 - delay = isnullstartblock(got.br_startblock); 4881 - ASSERT(isnullstartblock(del->br_startblock) == delay); 4882 - flags = 0; 5111 + ASSERT(!isnullstartblock(got.br_startblock)); 4883 5112 qfield = 0; 4884 5113 error = 0; 4885 - /* 4886 - * If deleting a real allocation, must free up the disk space. 4887 - */ 4888 - if (!delay) { 4889 - flags = XFS_ILOG_CORE; 4890 - /* 4891 - * Realtime allocation. Free it and record di_nblocks update. 4892 - */ 4893 - if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { 4894 - xfs_fsblock_t bno; 4895 - xfs_filblks_t len; 4896 5114 4897 - ASSERT(do_mod(del->br_blockcount, 4898 - mp->m_sb.sb_rextsize) == 0); 4899 - ASSERT(do_mod(del->br_startblock, 4900 - mp->m_sb.sb_rextsize) == 0); 4901 - bno = del->br_startblock; 4902 - len = del->br_blockcount; 4903 - do_div(bno, mp->m_sb.sb_rextsize); 4904 - do_div(len, mp->m_sb.sb_rextsize); 4905 - error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); 4906 - if (error) 4907 - goto done; 4908 - do_fx = 0; 4909 - nblks = len * mp->m_sb.sb_rextsize; 4910 - qfield = XFS_TRANS_DQ_RTBCOUNT; 4911 - } 4912 - /* 4913 - * Ordinary allocation. 4914 - */ 4915 - else { 4916 - do_fx = 1; 4917 - nblks = del->br_blockcount; 4918 - qfield = XFS_TRANS_DQ_BCOUNT; 4919 - } 4920 - /* 4921 - * Set up del_endblock and cur for later. 4922 - */ 4923 - del_endblock = del->br_startblock + del->br_blockcount; 4924 - if (cur) { 4925 - if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff, 4926 - got.br_startblock, got.br_blockcount, 4927 - &i))) 4928 - goto done; 4929 - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 4930 - } 4931 - da_old = da_new = 0; 4932 - } else { 4933 - da_old = startblockval(got.br_startblock); 4934 - da_new = 0; 4935 - nblks = 0; 5115 + /* 5116 + * If it's the case where the directory code is running with no block 5117 + * reservation, and the deleted block is in the middle of its extent, 5118 + * and the resulting insert of an extent would cause transformation to 5119 + * btree format, then reject it. The calling code will then swap blocks 5120 + * around instead. We have to do this now, rather than waiting for the 5121 + * conversion to btree format, since the transaction will be dirty then. 5122 + */ 5123 + if (tp->t_blk_res == 0 && 5124 + XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && 5125 + XFS_IFORK_NEXTENTS(ip, whichfork) >= 5126 + XFS_IFORK_MAXEXT(ip, whichfork) && 5127 + del->br_startoff > got.br_startoff && del_endoff < got_endoff) 5128 + return -ENOSPC; 5129 + 5130 + flags = XFS_ILOG_CORE; 5131 + if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { 5132 + xfs_fsblock_t bno; 5133 + xfs_filblks_t len; 5134 + 5135 + ASSERT(do_mod(del->br_blockcount, mp->m_sb.sb_rextsize) == 0); 5136 + ASSERT(do_mod(del->br_startblock, mp->m_sb.sb_rextsize) == 0); 5137 + bno = del->br_startblock; 5138 + len = del->br_blockcount; 5139 + do_div(bno, mp->m_sb.sb_rextsize); 5140 + do_div(len, mp->m_sb.sb_rextsize); 5141 + error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); 5142 + if (error) 5143 + goto done; 4936 5144 do_fx = 0; 5145 + nblks = len * mp->m_sb.sb_rextsize; 5146 + qfield = XFS_TRANS_DQ_RTBCOUNT; 5147 + } else { 5148 + do_fx = 1; 5149 + nblks = del->br_blockcount; 5150 + qfield = XFS_TRANS_DQ_BCOUNT; 4937 5151 } 4938 5152 4939 - /* 4940 - * Set flag value to use in switch statement. 4941 - * Left-contig is 2, right-contig is 1. 4942 - */ 4943 - switch (((got.br_startoff == del->br_startoff) << 1) | 4944 - (got_endoff == del_endoff)) { 4945 - case 3: 5153 + del_endblock = del->br_startblock + del->br_blockcount; 5154 + if (cur) { 5155 + error = xfs_bmbt_lookup_eq(cur, &got, &i); 5156 + if (error) 5157 + goto done; 5158 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 5159 + } 5160 + 5161 + if (got.br_startoff == del->br_startoff) 5162 + state |= BMAP_LEFT_FILLING; 5163 + if (got_endoff == del_endoff) 5164 + state |= BMAP_RIGHT_FILLING; 5165 + 5166 + switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { 5167 + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 4946 5168 /* 4947 5169 * Matches the whole extent. Delete the entry. 4948 5170 */ 4949 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 4950 - xfs_iext_remove(ip, *idx, 1, 4951 - whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); 4952 - --*idx; 4953 - if (delay) 4954 - break; 4955 - 5171 + xfs_iext_remove(ip, icur, state); 5172 + xfs_iext_prev(ifp, icur); 4956 5173 XFS_IFORK_NEXT_SET(ip, whichfork, 4957 5174 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 4958 5175 flags |= XFS_ILOG_CORE; ··· 4942 5203 goto done; 4943 5204 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 4944 5205 break; 4945 - 4946 - case 2: 5206 + case BMAP_LEFT_FILLING: 4947 5207 /* 4948 5208 * Deleting the first part of the extent. 4949 5209 */ 4950 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 4951 - xfs_bmbt_set_startoff(ep, del_endoff); 4952 - temp = got.br_blockcount - del->br_blockcount; 4953 - xfs_bmbt_set_blockcount(ep, temp); 4954 - if (delay) { 4955 - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 4956 - da_old); 4957 - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 4958 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 4959 - da_new = temp; 4960 - break; 4961 - } 4962 - xfs_bmbt_set_startblock(ep, del_endblock); 4963 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 5210 + got.br_startoff = del_endoff; 5211 + got.br_startblock = del_endblock; 5212 + got.br_blockcount -= del->br_blockcount; 5213 + xfs_iext_update_extent(ip, state, icur, &got); 4964 5214 if (!cur) { 4965 5215 flags |= xfs_ilog_fext(whichfork); 4966 5216 break; 4967 5217 } 4968 - if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock, 4969 - got.br_blockcount - del->br_blockcount, 4970 - got.br_state))) 5218 + error = xfs_bmbt_update(cur, &got); 5219 + if (error) 4971 5220 goto done; 4972 5221 break; 4973 - 4974 - case 1: 5222 + case BMAP_RIGHT_FILLING: 4975 5223 /* 4976 5224 * Deleting the last part of the extent. 4977 5225 */ 4978 - temp = got.br_blockcount - del->br_blockcount; 4979 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 4980 - xfs_bmbt_set_blockcount(ep, temp); 4981 - if (delay) { 4982 - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 4983 - da_old); 4984 - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 4985 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 4986 - da_new = temp; 4987 - break; 4988 - } 4989 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 5226 + got.br_blockcount -= del->br_blockcount; 5227 + xfs_iext_update_extent(ip, state, icur, &got); 4990 5228 if (!cur) { 4991 5229 flags |= xfs_ilog_fext(whichfork); 4992 5230 break; 4993 5231 } 4994 - if ((error = xfs_bmbt_update(cur, got.br_startoff, 4995 - got.br_startblock, 4996 - got.br_blockcount - del->br_blockcount, 4997 - got.br_state))) 5232 + error = xfs_bmbt_update(cur, &got); 5233 + if (error) 4998 5234 goto done; 4999 5235 break; 5000 - 5001 5236 case 0: 5002 5237 /* 5003 5238 * Deleting the middle of the extent. 5004 5239 */ 5005 - temp = del->br_startoff - got.br_startoff; 5006 - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 5007 - xfs_bmbt_set_blockcount(ep, temp); 5240 + old = got; 5241 + 5242 + got.br_blockcount = del->br_startoff - got.br_startoff; 5243 + xfs_iext_update_extent(ip, state, icur, &got); 5244 + 5008 5245 new.br_startoff = del_endoff; 5009 - temp2 = got_endoff - del_endoff; 5010 - new.br_blockcount = temp2; 5246 + new.br_blockcount = got_endoff - del_endoff; 5011 5247 new.br_state = got.br_state; 5012 - if (!delay) { 5013 - new.br_startblock = del_endblock; 5014 - flags |= XFS_ILOG_CORE; 5015 - if (cur) { 5016 - if ((error = xfs_bmbt_update(cur, 5017 - got.br_startoff, 5018 - got.br_startblock, temp, 5019 - got.br_state))) 5248 + new.br_startblock = del_endblock; 5249 + 5250 + flags |= XFS_ILOG_CORE; 5251 + if (cur) { 5252 + error = xfs_bmbt_update(cur, &got); 5253 + if (error) 5254 + goto done; 5255 + error = xfs_btree_increment(cur, 0, &i); 5256 + if (error) 5257 + goto done; 5258 + cur->bc_rec.b = new; 5259 + error = xfs_btree_insert(cur, &i); 5260 + if (error && error != -ENOSPC) 5261 + goto done; 5262 + /* 5263 + * If get no-space back from btree insert, it tried a 5264 + * split, and we have a zero block reservation. Fix up 5265 + * our state and return the error. 5266 + */ 5267 + if (error == -ENOSPC) { 5268 + /* 5269 + * Reset the cursor, don't trust it after any 5270 + * insert operation. 5271 + */ 5272 + error = xfs_bmbt_lookup_eq(cur, &got, &i); 5273 + if (error) 5020 5274 goto done; 5021 - if ((error = xfs_btree_increment(cur, 0, &i))) 5022 - goto done; 5023 - cur->bc_rec.b = new; 5024 - error = xfs_btree_insert(cur, &i); 5025 - if (error && error != -ENOSPC) 5275 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 5276 + /* 5277 + * Update the btree record back 5278 + * to the original value. 5279 + */ 5280 + error = xfs_bmbt_update(cur, &old); 5281 + if (error) 5026 5282 goto done; 5027 5283 /* 5028 - * If get no-space back from btree insert, 5029 - * it tried a split, and we have a zero 5030 - * block reservation. 5031 - * Fix up our state and return the error. 5284 + * Reset the extent record back 5285 + * to the original value. 5032 5286 */ 5033 - if (error == -ENOSPC) { 5034 - /* 5035 - * Reset the cursor, don't trust 5036 - * it after any insert operation. 5037 - */ 5038 - if ((error = xfs_bmbt_lookup_eq(cur, 5039 - got.br_startoff, 5040 - got.br_startblock, 5041 - temp, &i))) 5042 - goto done; 5043 - XFS_WANT_CORRUPTED_GOTO(mp, 5044 - i == 1, done); 5045 - /* 5046 - * Update the btree record back 5047 - * to the original value. 5048 - */ 5049 - if ((error = xfs_bmbt_update(cur, 5050 - got.br_startoff, 5051 - got.br_startblock, 5052 - got.br_blockcount, 5053 - got.br_state))) 5054 - goto done; 5055 - /* 5056 - * Reset the extent record back 5057 - * to the original value. 5058 - */ 5059 - xfs_bmbt_set_blockcount(ep, 5060 - got.br_blockcount); 5061 - flags = 0; 5062 - error = -ENOSPC; 5063 - goto done; 5064 - } 5065 - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 5066 - } else 5067 - flags |= xfs_ilog_fext(whichfork); 5068 - XFS_IFORK_NEXT_SET(ip, whichfork, 5069 - XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 5070 - } else { 5071 - xfs_filblks_t stolen; 5072 - ASSERT(whichfork == XFS_DATA_FORK); 5073 - 5074 - /* 5075 - * Distribute the original indlen reservation across the 5076 - * two new extents. Steal blocks from the deleted extent 5077 - * if necessary. Stealing blocks simply fudges the 5078 - * fdblocks accounting in xfs_bunmapi(). 5079 - */ 5080 - temp = xfs_bmap_worst_indlen(ip, got.br_blockcount); 5081 - temp2 = xfs_bmap_worst_indlen(ip, new.br_blockcount); 5082 - stolen = xfs_bmap_split_indlen(da_old, &temp, &temp2, 5083 - del->br_blockcount); 5084 - da_new = temp + temp2 - stolen; 5085 - del->br_blockcount -= stolen; 5086 - 5087 - /* 5088 - * Set the reservation for each extent. Warn if either 5089 - * is zero as this can lead to delalloc problems. 5090 - */ 5091 - WARN_ON_ONCE(!temp || !temp2); 5092 - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 5093 - new.br_startblock = nullstartblock((int)temp2); 5094 - } 5095 - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 5096 - xfs_iext_insert(ip, *idx + 1, 1, &new, state); 5097 - ++*idx; 5287 + xfs_iext_update_extent(ip, state, icur, &old); 5288 + flags = 0; 5289 + error = -ENOSPC; 5290 + goto done; 5291 + } 5292 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 5293 + } else 5294 + flags |= xfs_ilog_fext(whichfork); 5295 + XFS_IFORK_NEXT_SET(ip, whichfork, 5296 + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 5297 + xfs_iext_next(ifp, icur); 5298 + xfs_iext_insert(ip, icur, &new, state); 5098 5299 break; 5099 5300 } 5100 5301 5101 5302 /* remove reverse mapping */ 5102 - if (!delay) { 5103 - error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del); 5104 - if (error) 5105 - goto done; 5106 - } 5303 + error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del); 5304 + if (error) 5305 + goto done; 5107 5306 5108 5307 /* 5109 5308 * If we need to, add to list of extents to delete. ··· 5067 5390 if (qfield && !(bflags & XFS_BMAPI_REMAP)) 5068 5391 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); 5069 5392 5070 - /* 5071 - * Account for change in delayed indirect blocks. 5072 - * Nothing to do for disk quota accounting here. 5073 - */ 5074 - ASSERT(da_old >= da_new); 5075 - if (da_old > da_new) 5076 - xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false); 5077 5393 done: 5078 5394 *logflagsp = flags; 5079 5395 return error; ··· 5082 5412 __xfs_bunmapi( 5083 5413 xfs_trans_t *tp, /* transaction pointer */ 5084 5414 struct xfs_inode *ip, /* incore inode */ 5085 - xfs_fileoff_t bno, /* starting offset to unmap */ 5415 + xfs_fileoff_t start, /* first file offset deleted */ 5086 5416 xfs_filblks_t *rlen, /* i/o: amount remaining */ 5087 5417 int flags, /* misc flags */ 5088 5418 xfs_extnum_t nexts, /* number of extents max */ ··· 5097 5427 xfs_bmbt_irec_t got; /* current extent record */ 5098 5428 xfs_ifork_t *ifp; /* inode fork pointer */ 5099 5429 int isrt; /* freeing in rt area */ 5100 - xfs_extnum_t lastx; /* last extent index used */ 5101 5430 int logflags; /* transaction logging flags */ 5102 5431 xfs_extlen_t mod; /* rt extent offset */ 5103 5432 xfs_mount_t *mp; /* mount structure */ 5104 - xfs_fileoff_t start; /* first file offset deleted */ 5105 5433 int tmp_logflags; /* partial logging flags */ 5106 5434 int wasdel; /* was a delayed alloc extent */ 5107 5435 int whichfork; /* data or attribute fork */ ··· 5107 5439 xfs_filblks_t len = *rlen; /* length to unmap in file */ 5108 5440 xfs_fileoff_t max_len; 5109 5441 xfs_agnumber_t prev_agno = NULLAGNUMBER, agno; 5442 + xfs_fileoff_t end; 5443 + struct xfs_iext_cursor icur; 5444 + bool done = false; 5110 5445 5111 - trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); 5446 + trace_xfs_bunmap(ip, start, len, flags, _RET_IP_); 5112 5447 5113 5448 whichfork = xfs_bmapi_whichfork(flags); 5114 5449 ASSERT(whichfork != XFS_COW_FORK); ··· 5150 5479 } 5151 5480 XFS_STATS_INC(mp, xs_blk_unmap); 5152 5481 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); 5153 - start = bno; 5154 - bno = start + len - 1; 5482 + end = start + len; 5155 5483 5156 - /* 5157 - * Check to see if the given block number is past the end of the 5158 - * file, back up to the last block if so... 5159 - */ 5160 - if (!xfs_iext_lookup_extent(ip, ifp, bno, &lastx, &got)) { 5161 - ASSERT(lastx > 0); 5162 - xfs_iext_get_extent(ifp, --lastx, &got); 5163 - bno = got.br_startoff + got.br_blockcount - 1; 5484 + if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) { 5485 + *rlen = 0; 5486 + return 0; 5164 5487 } 5488 + end--; 5165 5489 5166 5490 logflags = 0; 5167 5491 if (ifp->if_flags & XFS_IFBROOT) { ··· 5179 5513 } 5180 5514 5181 5515 extno = 0; 5182 - while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 && 5516 + while (end != (xfs_fileoff_t)-1 && end >= start && 5183 5517 (nexts == 0 || extno < nexts) && max_len > 0) { 5184 5518 /* 5185 - * Is the found extent after a hole in which bno lives? 5519 + * Is the found extent after a hole in which end lives? 5186 5520 * Just back up to the previous extent, if so. 5187 5521 */ 5188 - if (got.br_startoff > bno) { 5189 - if (--lastx < 0) 5190 - break; 5191 - xfs_iext_get_extent(ifp, lastx, &got); 5522 + if (got.br_startoff > end && 5523 + !xfs_iext_prev_extent(ifp, &icur, &got)) { 5524 + done = true; 5525 + break; 5192 5526 } 5193 5527 /* 5194 5528 * Is the last block of this extent before the range 5195 5529 * we're supposed to delete? If so, we're done. 5196 5530 */ 5197 - bno = XFS_FILEOFF_MIN(bno, 5531 + end = XFS_FILEOFF_MIN(end, 5198 5532 got.br_startoff + got.br_blockcount - 1); 5199 - if (bno < start) 5533 + if (end < start) 5200 5534 break; 5201 5535 /* 5202 5536 * Then deal with the (possibly delayed) allocated space ··· 5221 5555 if (!wasdel) 5222 5556 del.br_startblock += start - got.br_startoff; 5223 5557 } 5224 - if (del.br_startoff + del.br_blockcount > bno + 1) 5225 - del.br_blockcount = bno + 1 - del.br_startoff; 5558 + if (del.br_startoff + del.br_blockcount > end + 1) 5559 + del.br_blockcount = end + 1 - del.br_startoff; 5226 5560 5227 5561 /* How much can we safely unmap? */ 5228 5562 if (max_len < del.br_blockcount) { ··· 5248 5582 * This piece is unwritten, or we're not 5249 5583 * using unwritten extents. Skip over it. 5250 5584 */ 5251 - ASSERT(bno >= mod); 5252 - bno -= mod > del.br_blockcount ? 5585 + ASSERT(end >= mod); 5586 + end -= mod > del.br_blockcount ? 5253 5587 del.br_blockcount : mod; 5254 - if (bno < got.br_startoff) { 5255 - if (--lastx >= 0) 5256 - xfs_bmbt_get_all(xfs_iext_get_ext( 5257 - ifp, lastx), &got); 5588 + if (end < got.br_startoff && 5589 + !xfs_iext_prev_extent(ifp, &icur, &got)) { 5590 + done = true; 5591 + break; 5258 5592 } 5259 5593 continue; 5260 5594 } ··· 5275 5609 } 5276 5610 del.br_state = XFS_EXT_UNWRITTEN; 5277 5611 error = xfs_bmap_add_extent_unwritten_real(tp, ip, 5278 - whichfork, &lastx, &cur, &del, 5612 + whichfork, &icur, &cur, &del, 5279 5613 firstblock, dfops, &logflags); 5280 5614 if (error) 5281 5615 goto error0; ··· 5300 5634 * Can't make it unwritten. There isn't 5301 5635 * a full extent here so just skip it. 5302 5636 */ 5303 - ASSERT(bno >= del.br_blockcount); 5304 - bno -= del.br_blockcount; 5305 - if (got.br_startoff > bno && --lastx >= 0) 5306 - xfs_iext_get_extent(ifp, lastx, &got); 5637 + ASSERT(end >= del.br_blockcount); 5638 + end -= del.br_blockcount; 5639 + if (got.br_startoff > end && 5640 + !xfs_iext_prev_extent(ifp, &icur, &got)) { 5641 + done = true; 5642 + break; 5643 + } 5307 5644 continue; 5308 5645 } else if (del.br_state == XFS_EXT_UNWRITTEN) { 5309 5646 struct xfs_bmbt_irec prev; ··· 5317 5648 * Unwrite the killed part of that one and 5318 5649 * try again. 5319 5650 */ 5320 - ASSERT(lastx > 0); 5321 - xfs_iext_get_extent(ifp, lastx - 1, &prev); 5651 + if (!xfs_iext_prev_extent(ifp, &icur, &prev)) 5652 + ASSERT(0); 5322 5653 ASSERT(prev.br_state == XFS_EXT_NORM); 5323 5654 ASSERT(!isnullstartblock(prev.br_startblock)); 5324 5655 ASSERT(del.br_startblock == ··· 5330 5661 prev.br_startoff = start; 5331 5662 } 5332 5663 prev.br_state = XFS_EXT_UNWRITTEN; 5333 - lastx--; 5334 5664 error = xfs_bmap_add_extent_unwritten_real(tp, 5335 - ip, whichfork, &lastx, &cur, 5665 + ip, whichfork, &icur, &cur, 5336 5666 &prev, firstblock, dfops, 5337 5667 &logflags); 5338 5668 if (error) ··· 5341 5673 ASSERT(del.br_state == XFS_EXT_NORM); 5342 5674 del.br_state = XFS_EXT_UNWRITTEN; 5343 5675 error = xfs_bmap_add_extent_unwritten_real(tp, 5344 - ip, whichfork, &lastx, &cur, 5676 + ip, whichfork, &icur, &cur, 5345 5677 &del, firstblock, dfops, 5346 5678 &logflags); 5347 5679 if (error) ··· 5350 5682 } 5351 5683 } 5352 5684 5353 - /* 5354 - * If it's the case where the directory code is running 5355 - * with no block reservation, and the deleted block is in 5356 - * the middle of its extent, and the resulting insert 5357 - * of an extent would cause transformation to btree format, 5358 - * then reject it. The calling code will then swap 5359 - * blocks around instead. 5360 - * We have to do this now, rather than waiting for the 5361 - * conversion to btree format, since the transaction 5362 - * will be dirty. 5363 - */ 5364 - if (!wasdel && tp->t_blk_res == 0 && 5365 - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && 5366 - XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */ 5367 - XFS_IFORK_MAXEXT(ip, whichfork) && 5368 - del.br_startoff > got.br_startoff && 5369 - del.br_startoff + del.br_blockcount < 5370 - got.br_startoff + got.br_blockcount) { 5371 - error = -ENOSPC; 5372 - goto error0; 5685 + if (wasdel) { 5686 + error = xfs_bmap_del_extent_delay(ip, whichfork, &icur, 5687 + &got, &del); 5688 + } else { 5689 + error = xfs_bmap_del_extent_real(ip, tp, &icur, dfops, 5690 + cur, &del, &tmp_logflags, whichfork, 5691 + flags); 5692 + logflags |= tmp_logflags; 5373 5693 } 5374 5694 5375 - /* 5376 - * Unreserve quota and update realtime free space, if 5377 - * appropriate. If delayed allocation, update the inode delalloc 5378 - * counter now and wait to update the sb counters as 5379 - * xfs_bmap_del_extent() might need to borrow some blocks. 5380 - */ 5381 - if (wasdel) { 5382 - ASSERT(startblockval(del.br_startblock) > 0); 5383 - if (isrt) { 5384 - xfs_filblks_t rtexts; 5385 - 5386 - rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); 5387 - do_div(rtexts, mp->m_sb.sb_rextsize); 5388 - xfs_mod_frextents(mp, (int64_t)rtexts); 5389 - (void)xfs_trans_reserve_quota_nblks(NULL, 5390 - ip, -((long)del.br_blockcount), 0, 5391 - XFS_QMOPT_RES_RTBLKS); 5392 - } else { 5393 - (void)xfs_trans_reserve_quota_nblks(NULL, 5394 - ip, -((long)del.br_blockcount), 0, 5395 - XFS_QMOPT_RES_REGBLKS); 5396 - } 5397 - ip->i_delayed_blks -= del.br_blockcount; 5398 - if (cur) 5399 - cur->bc_private.b.flags |= 5400 - XFS_BTCUR_BPRV_WASDEL; 5401 - } else if (cur) 5402 - cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL; 5403 - 5404 - error = xfs_bmap_del_extent(ip, tp, &lastx, dfops, cur, &del, 5405 - &tmp_logflags, whichfork, flags); 5406 - logflags |= tmp_logflags; 5407 5695 if (error) 5408 5696 goto error0; 5409 5697 5410 - if (!isrt && wasdel) 5411 - xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false); 5412 - 5413 5698 max_len -= del.br_blockcount; 5414 - bno = del.br_startoff - 1; 5699 + end = del.br_startoff - 1; 5415 5700 nodelete: 5416 5701 /* 5417 5702 * If not done go on to the next (previous) record. 5418 5703 */ 5419 - if (bno != (xfs_fileoff_t)-1 && bno >= start) { 5420 - if (lastx >= 0) { 5421 - xfs_iext_get_extent(ifp, lastx, &got); 5422 - if (got.br_startoff > bno && --lastx >= 0) 5423 - xfs_iext_get_extent(ifp, lastx, &got); 5704 + if (end != (xfs_fileoff_t)-1 && end >= start) { 5705 + if (!xfs_iext_get_extent(ifp, &icur, &got) || 5706 + (got.br_startoff > end && 5707 + !xfs_iext_prev_extent(ifp, &icur, &got))) { 5708 + done = true; 5709 + break; 5424 5710 } 5425 5711 extno++; 5426 5712 } 5427 5713 } 5428 - if (bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0) 5714 + if (done || end == (xfs_fileoff_t)-1 || end < start) 5429 5715 *rlen = 0; 5430 5716 else 5431 - *rlen = bno - start + 1; 5717 + *rlen = end - start + 1; 5432 5718 5433 5719 /* 5434 5720 * Convert to a btree if necessary. ··· 5500 5878 struct xfs_inode *ip, 5501 5879 int whichfork, 5502 5880 xfs_fileoff_t shift, /* shift fsb */ 5503 - int current_ext, /* idx of gotp */ 5881 + struct xfs_iext_cursor *icur, 5504 5882 struct xfs_bmbt_irec *got, /* extent to shift */ 5505 5883 struct xfs_bmbt_irec *left, /* preceding extent */ 5506 5884 struct xfs_btree_cur *cur, 5507 5885 int *logflags, /* output */ 5508 5886 struct xfs_defer_ops *dfops) 5509 5887 { 5510 - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 5511 5888 struct xfs_bmbt_irec new; 5512 5889 xfs_filblks_t blockcount; 5513 5890 int error, i; ··· 5534 5913 } 5535 5914 5536 5915 /* lookup and remove the extent to merge */ 5537 - error = xfs_bmbt_lookup_eq(cur, got->br_startoff, got->br_startblock, 5538 - got->br_blockcount, &i); 5916 + error = xfs_bmbt_lookup_eq(cur, got, &i); 5539 5917 if (error) 5540 5918 return error; 5541 5919 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); ··· 5545 5925 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5546 5926 5547 5927 /* lookup and update size of the previous extent */ 5548 - error = xfs_bmbt_lookup_eq(cur, left->br_startoff, left->br_startblock, 5549 - left->br_blockcount, &i); 5928 + error = xfs_bmbt_lookup_eq(cur, left, &i); 5550 5929 if (error) 5551 5930 return error; 5552 5931 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5553 5932 5554 - error = xfs_bmbt_update(cur, new.br_startoff, new.br_startblock, 5555 - new.br_blockcount, new.br_state); 5933 + error = xfs_bmbt_update(cur, &new); 5556 5934 if (error) 5557 5935 return error; 5558 5936 5559 5937 done: 5560 - xfs_iext_update_extent(ifp, current_ext - 1, &new); 5561 - xfs_iext_remove(ip, current_ext, 1, 0); 5938 + xfs_iext_remove(ip, icur, 0); 5939 + xfs_iext_prev(XFS_IFORK_PTR(ip, whichfork), icur); 5940 + xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur, 5941 + &new); 5562 5942 5563 5943 /* update reverse mapping. rmap functions merge the rmaps for us */ 5564 5944 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got); ··· 5569 5949 return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new); 5570 5950 } 5571 5951 5572 - /* 5573 - * Shift a single extent. 5574 - */ 5575 - STATIC int 5576 - xfs_bmse_shift_one( 5577 - struct xfs_inode *ip, 5578 - int whichfork, 5579 - xfs_fileoff_t offset_shift_fsb, 5580 - int *current_ext, 5581 - struct xfs_bmbt_irec *got, 5582 - struct xfs_btree_cur *cur, 5583 - int *logflags, 5584 - enum shift_direction direction, 5585 - struct xfs_defer_ops *dfops) 5952 + static int 5953 + xfs_bmap_shift_update_extent( 5954 + struct xfs_inode *ip, 5955 + int whichfork, 5956 + struct xfs_iext_cursor *icur, 5957 + struct xfs_bmbt_irec *got, 5958 + struct xfs_btree_cur *cur, 5959 + int *logflags, 5960 + struct xfs_defer_ops *dfops, 5961 + xfs_fileoff_t startoff) 5586 5962 { 5587 - struct xfs_ifork *ifp; 5588 - struct xfs_mount *mp; 5589 - xfs_fileoff_t startoff; 5590 - struct xfs_bmbt_irec adj_irec, new; 5591 - int error; 5592 - int i; 5593 - int total_extents; 5963 + struct xfs_mount *mp = ip->i_mount; 5964 + struct xfs_bmbt_irec prev = *got; 5965 + int error, i; 5594 5966 5595 - mp = ip->i_mount; 5596 - ifp = XFS_IFORK_PTR(ip, whichfork); 5597 - total_extents = xfs_iext_count(ifp); 5598 - 5599 - /* delalloc extents should be prevented by caller */ 5600 - XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got->br_startblock)); 5601 - 5602 - if (direction == SHIFT_LEFT) { 5603 - startoff = got->br_startoff - offset_shift_fsb; 5604 - 5605 - /* 5606 - * Check for merge if we've got an extent to the left, 5607 - * otherwise make sure there's enough room at the start 5608 - * of the file for the shift. 5609 - */ 5610 - if (!*current_ext) { 5611 - if (got->br_startoff < offset_shift_fsb) 5612 - return -EINVAL; 5613 - goto update_current_ext; 5614 - } 5615 - 5616 - /* 5617 - * grab the left extent and check for a large enough hole. 5618 - */ 5619 - xfs_iext_get_extent(ifp, *current_ext - 1, &adj_irec); 5620 - if (startoff < adj_irec.br_startoff + adj_irec.br_blockcount) 5621 - return -EINVAL; 5622 - 5623 - /* check whether to merge the extent or shift it down */ 5624 - if (xfs_bmse_can_merge(&adj_irec, got, offset_shift_fsb)) { 5625 - return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, 5626 - *current_ext, got, &adj_irec, 5627 - cur, logflags, dfops); 5628 - } 5629 - } else { 5630 - startoff = got->br_startoff + offset_shift_fsb; 5631 - /* nothing to move if this is the last extent */ 5632 - if (*current_ext >= (total_extents - 1)) 5633 - goto update_current_ext; 5634 - 5635 - /* 5636 - * If this is not the last extent in the file, make sure there 5637 - * is enough room between current extent and next extent for 5638 - * accommodating the shift. 5639 - */ 5640 - xfs_iext_get_extent(ifp, *current_ext + 1, &adj_irec); 5641 - if (startoff + got->br_blockcount > adj_irec.br_startoff) 5642 - return -EINVAL; 5643 - 5644 - /* 5645 - * Unlike a left shift (which involves a hole punch), 5646 - * a right shift does not modify extent neighbors 5647 - * in any way. We should never find mergeable extents 5648 - * in this scenario. Check anyways and warn if we 5649 - * encounter two extents that could be one. 5650 - */ 5651 - if (xfs_bmse_can_merge(got, &adj_irec, offset_shift_fsb)) 5652 - WARN_ON_ONCE(1); 5653 - } 5654 - 5655 - /* 5656 - * Increment the extent index for the next iteration, update the start 5657 - * offset of the in-core extent and update the btree if applicable. 5658 - */ 5659 - update_current_ext: 5660 5967 *logflags |= XFS_ILOG_CORE; 5661 5968 5662 - new = *got; 5663 - new.br_startoff = startoff; 5969 + got->br_startoff = startoff; 5664 5970 5665 5971 if (cur) { 5666 - error = xfs_bmbt_lookup_eq(cur, got->br_startoff, 5667 - got->br_startblock, got->br_blockcount, &i); 5972 + error = xfs_bmbt_lookup_eq(cur, &prev, &i); 5668 5973 if (error) 5669 5974 return error; 5670 5975 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5671 5976 5672 - error = xfs_bmbt_update(cur, new.br_startoff, 5673 - new.br_startblock, new.br_blockcount, 5674 - new.br_state); 5977 + error = xfs_bmbt_update(cur, got); 5675 5978 if (error) 5676 5979 return error; 5677 5980 } else { 5678 5981 *logflags |= XFS_ILOG_DEXT; 5679 5982 } 5680 5983 5681 - xfs_iext_update_extent(ifp, *current_ext, &new); 5682 - 5683 - if (direction == SHIFT_LEFT) 5684 - (*current_ext)++; 5685 - else 5686 - (*current_ext)--; 5984 + xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur, 5985 + got); 5687 5986 5688 5987 /* update reverse mapping */ 5689 - error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got); 5988 + error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &prev); 5690 5989 if (error) 5691 5990 return error; 5692 - return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new); 5991 + return xfs_rmap_map_extent(mp, dfops, ip, whichfork, got); 5693 5992 } 5694 5993 5695 - /* 5696 - * Shift extent records to the left/right to cover/create a hole. 5697 - * 5698 - * The maximum number of extents to be shifted in a single operation is 5699 - * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the 5700 - * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb 5701 - * is the length by which each extent is shifted. If there is no hole to shift 5702 - * the extents into, this will be considered invalid operation and we abort 5703 - * immediately. 5704 - */ 5705 5994 int 5706 - xfs_bmap_shift_extents( 5995 + xfs_bmap_collapse_extents( 5707 5996 struct xfs_trans *tp, 5708 5997 struct xfs_inode *ip, 5709 5998 xfs_fileoff_t *next_fsb, 5710 5999 xfs_fileoff_t offset_shift_fsb, 5711 - int *done, 6000 + bool *done, 5712 6001 xfs_fileoff_t stop_fsb, 5713 6002 xfs_fsblock_t *firstblock, 5714 - struct xfs_defer_ops *dfops, 5715 - enum shift_direction direction, 5716 - int num_exts) 6003 + struct xfs_defer_ops *dfops) 5717 6004 { 5718 - struct xfs_btree_cur *cur = NULL; 5719 - struct xfs_bmbt_irec got; 5720 - struct xfs_mount *mp = ip->i_mount; 5721 - struct xfs_ifork *ifp; 5722 - xfs_extnum_t nexts = 0; 5723 - xfs_extnum_t current_ext; 5724 - xfs_extnum_t total_extents; 5725 - xfs_extnum_t stop_extent; 5726 - int error = 0; 5727 - int whichfork = XFS_DATA_FORK; 5728 - int logflags = 0; 6005 + int whichfork = XFS_DATA_FORK; 6006 + struct xfs_mount *mp = ip->i_mount; 6007 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 6008 + struct xfs_btree_cur *cur = NULL; 6009 + struct xfs_bmbt_irec got, prev; 6010 + struct xfs_iext_cursor icur; 6011 + xfs_fileoff_t new_startoff; 6012 + int error = 0; 6013 + int logflags = 0; 5729 6014 5730 6015 if (unlikely(XFS_TEST_ERROR( 5731 6016 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5732 6017 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 5733 6018 mp, XFS_ERRTAG_BMAPIFORMAT))) { 5734 - XFS_ERROR_REPORT("xfs_bmap_shift_extents", 5735 - XFS_ERRLEVEL_LOW, mp); 6019 + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 5736 6020 return -EFSCORRUPTED; 5737 6021 } 5738 6022 5739 6023 if (XFS_FORCED_SHUTDOWN(mp)) 5740 6024 return -EIO; 5741 6025 5742 - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 5743 - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 5744 - ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT); 6026 + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL)); 5745 6027 5746 - ifp = XFS_IFORK_PTR(ip, whichfork); 5747 6028 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 5748 - /* Read in all the extents */ 5749 6029 error = xfs_iread_extents(tp, ip, whichfork); 5750 6030 if (error) 5751 6031 return error; ··· 5658 6138 cur->bc_private.b.flags = 0; 5659 6139 } 5660 6140 5661 - /* 5662 - * There may be delalloc extents in the data fork before the range we 5663 - * are collapsing out, so we cannot use the count of real extents here. 5664 - * Instead we have to calculate it from the incore fork. 5665 - */ 5666 - total_extents = xfs_iext_count(ifp); 5667 - if (total_extents == 0) { 5668 - *done = 1; 6141 + if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) { 6142 + *done = true; 6143 + goto del_cursor; 6144 + } 6145 + XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); 6146 + 6147 + new_startoff = got.br_startoff - offset_shift_fsb; 6148 + if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) { 6149 + if (new_startoff < prev.br_startoff + prev.br_blockcount) { 6150 + error = -EINVAL; 6151 + goto del_cursor; 6152 + } 6153 + 6154 + if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) { 6155 + error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb, 6156 + &icur, &got, &prev, cur, &logflags, 6157 + dfops); 6158 + if (error) 6159 + goto del_cursor; 6160 + goto done; 6161 + } 6162 + } else { 6163 + if (got.br_startoff < offset_shift_fsb) { 6164 + error = -EINVAL; 6165 + goto del_cursor; 6166 + } 6167 + } 6168 + 6169 + error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur, 6170 + &logflags, dfops, new_startoff); 6171 + if (error) 6172 + goto del_cursor; 6173 + 6174 + done: 6175 + if (!xfs_iext_next_extent(ifp, &icur, &got)) { 6176 + *done = true; 5669 6177 goto del_cursor; 5670 6178 } 5671 6179 5672 - /* 5673 - * In case of first right shift, we need to initialize next_fsb 5674 - */ 5675 - if (*next_fsb == NULLFSBLOCK) { 5676 - ASSERT(direction == SHIFT_RIGHT); 5677 - 5678 - current_ext = total_extents - 1; 5679 - xfs_iext_get_extent(ifp, current_ext, &got); 5680 - if (stop_fsb > got.br_startoff) { 5681 - *done = 1; 5682 - goto del_cursor; 5683 - } 5684 - *next_fsb = got.br_startoff; 5685 - } else { 5686 - /* 5687 - * Look up the extent index for the fsb where we start shifting. We can 5688 - * henceforth iterate with current_ext as extent list changes are locked 5689 - * out via ilock. 5690 - * 5691 - * If next_fsb lies in a hole beyond which there are no extents we are 5692 - * done. 5693 - */ 5694 - if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &current_ext, 5695 - &got)) { 5696 - *done = 1; 5697 - goto del_cursor; 5698 - } 5699 - } 5700 - 5701 - /* Lookup the extent index at which we have to stop */ 5702 - if (direction == SHIFT_RIGHT) { 5703 - struct xfs_bmbt_irec s; 5704 - 5705 - xfs_iext_lookup_extent(ip, ifp, stop_fsb, &stop_extent, &s); 5706 - /* Make stop_extent exclusive of shift range */ 5707 - stop_extent--; 5708 - if (current_ext <= stop_extent) { 5709 - error = -EIO; 5710 - goto del_cursor; 5711 - } 5712 - } else { 5713 - stop_extent = total_extents; 5714 - if (current_ext >= stop_extent) { 5715 - error = -EIO; 5716 - goto del_cursor; 5717 - } 5718 - } 5719 - 5720 - while (nexts++ < num_exts) { 5721 - error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb, 5722 - &current_ext, &got, cur, &logflags, 5723 - direction, dfops); 5724 - if (error) 5725 - goto del_cursor; 5726 - /* 5727 - * If there was an extent merge during the shift, the extent 5728 - * count can change. Update the total and grade the next record. 5729 - */ 5730 - if (direction == SHIFT_LEFT) { 5731 - total_extents = xfs_iext_count(ifp); 5732 - stop_extent = total_extents; 5733 - } 5734 - 5735 - if (current_ext == stop_extent) { 5736 - *done = 1; 5737 - *next_fsb = NULLFSBLOCK; 5738 - break; 5739 - } 5740 - xfs_iext_get_extent(ifp, current_ext, &got); 5741 - } 5742 - 5743 - if (!*done) 5744 - *next_fsb = got.br_startoff; 5745 - 6180 + *next_fsb = got.br_startoff; 5746 6181 del_cursor: 5747 6182 if (cur) 5748 6183 xfs_btree_del_cursor(cur, 5749 6184 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 5750 - 5751 6185 if (logflags) 5752 6186 xfs_trans_log_inode(tp, ip, logflags); 6187 + return error; 6188 + } 5753 6189 6190 + int 6191 + xfs_bmap_insert_extents( 6192 + struct xfs_trans *tp, 6193 + struct xfs_inode *ip, 6194 + xfs_fileoff_t *next_fsb, 6195 + xfs_fileoff_t offset_shift_fsb, 6196 + bool *done, 6197 + xfs_fileoff_t stop_fsb, 6198 + xfs_fsblock_t *firstblock, 6199 + struct xfs_defer_ops *dfops) 6200 + { 6201 + int whichfork = XFS_DATA_FORK; 6202 + struct xfs_mount *mp = ip->i_mount; 6203 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 6204 + struct xfs_btree_cur *cur = NULL; 6205 + struct xfs_bmbt_irec got, next; 6206 + struct xfs_iext_cursor icur; 6207 + xfs_fileoff_t new_startoff; 6208 + int error = 0; 6209 + int logflags = 0; 6210 + 6211 + if (unlikely(XFS_TEST_ERROR( 6212 + (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 6213 + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 6214 + mp, XFS_ERRTAG_BMAPIFORMAT))) { 6215 + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 6216 + return -EFSCORRUPTED; 6217 + } 6218 + 6219 + if (XFS_FORCED_SHUTDOWN(mp)) 6220 + return -EIO; 6221 + 6222 + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL)); 6223 + 6224 + if (!(ifp->if_flags & XFS_IFEXTENTS)) { 6225 + error = xfs_iread_extents(tp, ip, whichfork); 6226 + if (error) 6227 + return error; 6228 + } 6229 + 6230 + if (ifp->if_flags & XFS_IFBROOT) { 6231 + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 6232 + cur->bc_private.b.firstblock = *firstblock; 6233 + cur->bc_private.b.dfops = dfops; 6234 + cur->bc_private.b.flags = 0; 6235 + } 6236 + 6237 + if (*next_fsb == NULLFSBLOCK) { 6238 + xfs_iext_last(ifp, &icur); 6239 + if (!xfs_iext_get_extent(ifp, &icur, &got) || 6240 + stop_fsb > got.br_startoff) { 6241 + *done = true; 6242 + goto del_cursor; 6243 + } 6244 + } else { 6245 + if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) { 6246 + *done = true; 6247 + goto del_cursor; 6248 + } 6249 + } 6250 + XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); 6251 + 6252 + if (stop_fsb >= got.br_startoff + got.br_blockcount) { 6253 + error = -EIO; 6254 + goto del_cursor; 6255 + } 6256 + 6257 + new_startoff = got.br_startoff + offset_shift_fsb; 6258 + if (xfs_iext_peek_next_extent(ifp, &icur, &next)) { 6259 + if (new_startoff + got.br_blockcount > next.br_startoff) { 6260 + error = -EINVAL; 6261 + goto del_cursor; 6262 + } 6263 + 6264 + /* 6265 + * Unlike a left shift (which involves a hole punch), a right 6266 + * shift does not modify extent neighbors in any way. We should 6267 + * never find mergeable extents in this scenario. Check anyways 6268 + * and warn if we encounter two extents that could be one. 6269 + */ 6270 + if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb)) 6271 + WARN_ON_ONCE(1); 6272 + } 6273 + 6274 + error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur, 6275 + &logflags, dfops, new_startoff); 6276 + if (error) 6277 + goto del_cursor; 6278 + 6279 + if (!xfs_iext_prev_extent(ifp, &icur, &got) || 6280 + stop_fsb >= got.br_startoff + got.br_blockcount) { 6281 + *done = true; 6282 + goto del_cursor; 6283 + } 6284 + 6285 + *next_fsb = got.br_startoff; 6286 + del_cursor: 6287 + if (cur) 6288 + xfs_btree_del_cursor(cur, 6289 + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 6290 + if (logflags) 6291 + xfs_trans_log_inode(tp, ip, logflags); 5754 6292 return error; 5755 6293 } 5756 6294 5757 6295 /* 5758 - * Splits an extent into two extents at split_fsb block such that it is 5759 - * the first block of the current_ext. @current_ext is a target extent 5760 - * to be split. @split_fsb is a block where the extents is split. 5761 - * If split_fsb lies in a hole or the first block of extents, just return 0. 6296 + * Splits an extent into two extents at split_fsb block such that it is the 6297 + * first block of the current_ext. @ext is a target extent to be split. 6298 + * @split_fsb is a block where the extents is split. If split_fsb lies in a 6299 + * hole or the first block of extents, just return 0. 5762 6300 */ 5763 6301 STATIC int 5764 6302 xfs_bmap_split_extent_at( ··· 5833 6255 struct xfs_mount *mp = ip->i_mount; 5834 6256 struct xfs_ifork *ifp; 5835 6257 xfs_fsblock_t gotblkcnt; /* new block count for got */ 5836 - xfs_extnum_t current_ext; 6258 + struct xfs_iext_cursor icur; 5837 6259 int error = 0; 5838 6260 int logflags = 0; 5839 6261 int i = 0; ··· 5861 6283 /* 5862 6284 * If there are not extents, or split_fsb lies in a hole we are done. 5863 6285 */ 5864 - if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &current_ext, &got) || 6286 + if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) || 5865 6287 got.br_startoff >= split_fsb) 5866 6288 return 0; 5867 6289 ··· 5876 6298 cur->bc_private.b.firstblock = *firstfsb; 5877 6299 cur->bc_private.b.dfops = dfops; 5878 6300 cur->bc_private.b.flags = 0; 5879 - error = xfs_bmbt_lookup_eq(cur, got.br_startoff, 5880 - got.br_startblock, 5881 - got.br_blockcount, 5882 - &i); 6301 + error = xfs_bmbt_lookup_eq(cur, &got, &i); 5883 6302 if (error) 5884 6303 goto del_cursor; 5885 6304 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor); 5886 6305 } 5887 6306 5888 6307 got.br_blockcount = gotblkcnt; 5889 - xfs_iext_update_extent(ifp, current_ext, &got); 6308 + xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur, 6309 + &got); 5890 6310 5891 6311 logflags = XFS_ILOG_CORE; 5892 6312 if (cur) { 5893 - error = xfs_bmbt_update(cur, got.br_startoff, 5894 - got.br_startblock, 5895 - got.br_blockcount, 5896 - got.br_state); 6313 + error = xfs_bmbt_update(cur, &got); 5897 6314 if (error) 5898 6315 goto del_cursor; 5899 6316 } else 5900 6317 logflags |= XFS_ILOG_DEXT; 5901 6318 5902 6319 /* Add new extent */ 5903 - current_ext++; 5904 - xfs_iext_insert(ip, current_ext, 1, &new, 0); 6320 + xfs_iext_next(ifp, &icur); 6321 + xfs_iext_insert(ip, &icur, &new, 0); 5905 6322 XFS_IFORK_NEXT_SET(ip, whichfork, 5906 6323 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 5907 6324 5908 6325 if (cur) { 5909 - error = xfs_bmbt_lookup_eq(cur, new.br_startoff, 5910 - new.br_startblock, new.br_blockcount, 5911 - &i); 6326 + error = xfs_bmbt_lookup_eq(cur, &new, &i); 5912 6327 if (error) 5913 6328 goto del_cursor; 5914 6329 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor); 5915 - cur->bc_rec.b.br_state = new.br_state; 5916 - 5917 6330 error = xfs_btree_insert(cur, &i); 5918 6331 if (error) 5919 6332 goto del_cursor;

+31 -35

fs/xfs/libxfs/xfs_bmap.h

··· 43 43 xfs_fsblock_t blkno; /* starting block of new extent */ 44 44 45 45 struct xfs_btree_cur *cur; /* btree cursor */ 46 - xfs_extnum_t idx; /* current extent index */ 46 + struct xfs_iext_cursor icur; /* incore extent cursor */ 47 47 int nallocs;/* number of extents alloc'd */ 48 48 int logflags;/* flags for transaction logging */ 49 49 ··· 113 113 /* Only convert delalloc space, don't allocate entirely new extents */ 114 114 #define XFS_BMAPI_DELALLOC 0x400 115 115 116 + /* Only convert unwritten extents, don't allocate new blocks */ 117 + #define XFS_BMAPI_CONVERT_ONLY 0x800 118 + 116 119 #define XFS_BMAPI_FLAGS \ 117 120 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ 118 121 { XFS_BMAPI_METADATA, "METADATA" }, \ ··· 127 124 { XFS_BMAPI_ZERO, "ZERO" }, \ 128 125 { XFS_BMAPI_REMAP, "REMAP" }, \ 129 126 { XFS_BMAPI_COWFORK, "COWFORK" }, \ 130 - { XFS_BMAPI_DELALLOC, "DELALLOC" } 127 + { XFS_BMAPI_DELALLOC, "DELALLOC" }, \ 128 + { XFS_BMAPI_CONVERT_ONLY, "CONVERT_ONLY" } 131 129 132 130 133 131 static inline int xfs_bmapi_aflag(int w) ··· 187 183 !isnullstartblock(irec->br_startblock); 188 184 } 189 185 190 - /* 191 - * This macro is used to determine how many extents will be shifted 192 - * in one write transaction. We could require two splits, 193 - * an extent move on the first and an extent merge on the second, 194 - * So it is proper that one extent is shifted inside write transaction 195 - * at a time. 196 - */ 197 - #define XFS_BMAP_MAX_SHIFT_EXTENTS 1 198 - 199 - enum shift_direction { 200 - SHIFT_LEFT = 0, 201 - SHIFT_RIGHT, 202 - }; 203 - 204 - #ifdef DEBUG 205 - void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, 206 - int whichfork, unsigned long caller_ip); 207 - #define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ 208 - xfs_bmap_trace_exlist(ip,c,w, _THIS_IP_) 209 - #else 210 - #define XFS_BMAP_TRACE_EXLIST(ip,c,w) 211 - #endif 212 - 213 186 void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, 214 187 xfs_filblks_t len); 215 188 void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *); ··· 203 222 int xfs_bmap_last_offset(struct xfs_inode *ip, xfs_fileoff_t *unused, 204 223 int whichfork); 205 224 int xfs_bmap_one_block(struct xfs_inode *ip, int whichfork); 206 - int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip, 207 - int whichfork); 208 225 int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno, 209 226 xfs_filblks_t len, struct xfs_bmbt_irec *mval, 210 227 int *nmap, int flags); ··· 220 241 xfs_extnum_t nexts, xfs_fsblock_t *firstblock, 221 242 struct xfs_defer_ops *dfops, int *done); 222 243 int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork, 223 - xfs_extnum_t *idx, struct xfs_bmbt_irec *got, 244 + struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got, 224 245 struct xfs_bmbt_irec *del); 225 - void xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx, 226 - struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del); 246 + void xfs_bmap_del_extent_cow(struct xfs_inode *ip, 247 + struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got, 248 + struct xfs_bmbt_irec *del); 227 249 uint xfs_default_attroffset(struct xfs_inode *ip); 228 - int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, 250 + int xfs_bmap_collapse_extents(struct xfs_trans *tp, struct xfs_inode *ip, 229 251 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, 230 - int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, 231 - struct xfs_defer_ops *dfops, enum shift_direction direction, 232 - int num_exts); 252 + bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, 253 + struct xfs_defer_ops *dfops); 254 + int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip, 255 + xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, 256 + bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, 257 + struct xfs_defer_ops *dfops); 233 258 int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); 234 259 int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, 235 260 xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc, 236 - struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof); 261 + struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur, 262 + int eof); 237 263 238 264 enum xfs_bmap_intent_type { 239 265 XFS_BMAP_MAP = 1, ··· 261 277 struct xfs_inode *ip, struct xfs_bmbt_irec *imap); 262 278 int xfs_bmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, 263 279 struct xfs_inode *ip, struct xfs_bmbt_irec *imap); 280 + 281 + static inline int xfs_bmap_fork_to_state(int whichfork) 282 + { 283 + switch (whichfork) { 284 + case XFS_ATTR_FORK: 285 + return BMAP_ATTRFORK; 286 + case XFS_COW_FORK: 287 + return BMAP_COWFORK; 288 + default: 289 + return 0; 290 + } 291 + } 264 292 265 293 #endif /* __XFS_BMAP_H__ */

+27 -219

fs/xfs/libxfs/xfs_bmap_btree.c

··· 38 38 #include "xfs_rmap.h" 39 39 40 40 /* 41 - * Determine the extent state. 42 - */ 43 - /* ARGSUSED */ 44 - STATIC xfs_exntst_t 45 - xfs_extent_state( 46 - xfs_filblks_t blks, 47 - int extent_flag) 48 - { 49 - if (extent_flag) { 50 - ASSERT(blks != 0); /* saved for DMIG */ 51 - return XFS_EXT_UNWRITTEN; 52 - } 53 - return XFS_EXT_NORM; 54 - } 55 - 56 - /* 57 41 * Convert on-disk form of btree root to in-memory form. 58 42 */ 59 43 void ··· 71 87 memcpy(tpp, fpp, sizeof(*fpp) * dmxr); 72 88 } 73 89 74 - /* 75 - * Convert a compressed bmap extent record to an uncompressed form. 76 - * This code must be in sync with the routines xfs_bmbt_get_startoff, 77 - * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state. 78 - */ 79 - STATIC void 80 - __xfs_bmbt_get_all( 81 - uint64_t l0, 82 - uint64_t l1, 83 - xfs_bmbt_irec_t *s) 84 - { 85 - int ext_flag; 86 - xfs_exntst_t st; 87 - 88 - ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN)); 89 - s->br_startoff = ((xfs_fileoff_t)l0 & 90 - xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; 91 - s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) | 92 - (((xfs_fsblock_t)l1) >> 21); 93 - s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21)); 94 - /* This is xfs_extent_state() in-line */ 95 - if (ext_flag) { 96 - ASSERT(s->br_blockcount != 0); /* saved for DMIG */ 97 - st = XFS_EXT_UNWRITTEN; 98 - } else 99 - st = XFS_EXT_NORM; 100 - s->br_state = st; 101 - } 102 - 103 90 void 104 - xfs_bmbt_get_all( 105 - xfs_bmbt_rec_host_t *r, 106 - xfs_bmbt_irec_t *s) 91 + xfs_bmbt_disk_get_all( 92 + struct xfs_bmbt_rec *rec, 93 + struct xfs_bmbt_irec *irec) 107 94 { 108 - __xfs_bmbt_get_all(r->l0, r->l1, s); 109 - } 95 + uint64_t l0 = get_unaligned_be64(&rec->l0); 96 + uint64_t l1 = get_unaligned_be64(&rec->l1); 110 97 111 - /* 112 - * Extract the blockcount field from an in memory bmap extent record. 113 - */ 114 - xfs_filblks_t 115 - xfs_bmbt_get_blockcount( 116 - xfs_bmbt_rec_host_t *r) 117 - { 118 - return (xfs_filblks_t)(r->l1 & xfs_mask64lo(21)); 119 - } 120 - 121 - /* 122 - * Extract the startblock field from an in memory bmap extent record. 123 - */ 124 - xfs_fsblock_t 125 - xfs_bmbt_get_startblock( 126 - xfs_bmbt_rec_host_t *r) 127 - { 128 - return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) | 129 - (((xfs_fsblock_t)r->l1) >> 21); 130 - } 131 - 132 - /* 133 - * Extract the startoff field from an in memory bmap extent record. 134 - */ 135 - xfs_fileoff_t 136 - xfs_bmbt_get_startoff( 137 - xfs_bmbt_rec_host_t *r) 138 - { 139 - return ((xfs_fileoff_t)r->l0 & 140 - xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; 141 - } 142 - 143 - xfs_exntst_t 144 - xfs_bmbt_get_state( 145 - xfs_bmbt_rec_host_t *r) 146 - { 147 - int ext_flag; 148 - 149 - ext_flag = (int)((r->l0) >> (64 - BMBT_EXNTFLAG_BITLEN)); 150 - return xfs_extent_state(xfs_bmbt_get_blockcount(r), 151 - ext_flag); 98 + irec->br_startoff = (l0 & xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; 99 + irec->br_startblock = ((l0 & xfs_mask64lo(9)) << 43) | (l1 >> 21); 100 + irec->br_blockcount = l1 & xfs_mask64lo(21); 101 + if (l0 >> (64 - BMBT_EXNTFLAG_BITLEN)) 102 + irec->br_state = XFS_EXT_UNWRITTEN; 103 + else 104 + irec->br_state = XFS_EXT_NORM; 152 105 } 153 106 154 107 /* ··· 109 188 xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; 110 189 } 111 190 112 - 113 - /* 114 - * Set all the fields in a bmap extent record from the arguments. 115 - */ 116 - void 117 - xfs_bmbt_set_allf( 118 - xfs_bmbt_rec_host_t *r, 119 - xfs_fileoff_t startoff, 120 - xfs_fsblock_t startblock, 121 - xfs_filblks_t blockcount, 122 - xfs_exntst_t state) 123 - { 124 - int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1; 125 - 126 - ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN); 127 - ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); 128 - ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); 129 - 130 - ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); 131 - 132 - r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | 133 - ((xfs_bmbt_rec_base_t)startoff << 9) | 134 - ((xfs_bmbt_rec_base_t)startblock >> 43); 135 - r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) | 136 - ((xfs_bmbt_rec_base_t)blockcount & 137 - (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); 138 - } 139 - 140 191 /* 141 192 * Set all the fields in a bmap extent record from the uncompressed form. 142 193 */ 143 194 void 144 - xfs_bmbt_set_all( 145 - xfs_bmbt_rec_host_t *r, 146 - xfs_bmbt_irec_t *s) 147 - { 148 - xfs_bmbt_set_allf(r, s->br_startoff, s->br_startblock, 149 - s->br_blockcount, s->br_state); 150 - } 151 - 152 - 153 - /* 154 - * Set all the fields in a disk format bmap extent record from the arguments. 155 - */ 156 - void 157 - xfs_bmbt_disk_set_allf( 158 - xfs_bmbt_rec_t *r, 159 - xfs_fileoff_t startoff, 160 - xfs_fsblock_t startblock, 161 - xfs_filblks_t blockcount, 162 - xfs_exntst_t state) 163 - { 164 - int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1; 165 - 166 - ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN); 167 - ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); 168 - ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); 169 - ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); 170 - 171 - r->l0 = cpu_to_be64( 172 - ((xfs_bmbt_rec_base_t)extent_flag << 63) | 173 - ((xfs_bmbt_rec_base_t)startoff << 9) | 174 - ((xfs_bmbt_rec_base_t)startblock >> 43)); 175 - r->l1 = cpu_to_be64( 176 - ((xfs_bmbt_rec_base_t)startblock << 21) | 177 - ((xfs_bmbt_rec_base_t)blockcount & 178 - (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); 179 - } 180 - 181 - /* 182 - * Set all the fields in a bmap extent record from the uncompressed form. 183 - */ 184 - STATIC void 185 195 xfs_bmbt_disk_set_all( 186 - xfs_bmbt_rec_t *r, 187 - xfs_bmbt_irec_t *s) 196 + struct xfs_bmbt_rec *r, 197 + struct xfs_bmbt_irec *s) 188 198 { 189 - xfs_bmbt_disk_set_allf(r, s->br_startoff, s->br_startblock, 190 - s->br_blockcount, s->br_state); 191 - } 199 + int extent_flag = (s->br_state != XFS_EXT_NORM); 192 200 193 - /* 194 - * Set the blockcount field in a bmap extent record. 195 - */ 196 - void 197 - xfs_bmbt_set_blockcount( 198 - xfs_bmbt_rec_host_t *r, 199 - xfs_filblks_t v) 200 - { 201 - ASSERT((v & xfs_mask64hi(43)) == 0); 202 - r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64hi(43)) | 203 - (xfs_bmbt_rec_base_t)(v & xfs_mask64lo(21)); 204 - } 201 + ASSERT(s->br_state == XFS_EXT_NORM || s->br_state == XFS_EXT_UNWRITTEN); 202 + ASSERT(!(s->br_startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN))); 203 + ASSERT(!(s->br_blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN))); 204 + ASSERT(!(s->br_startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN))); 205 205 206 - /* 207 - * Set the startblock field in a bmap extent record. 208 - */ 209 - void 210 - xfs_bmbt_set_startblock( 211 - xfs_bmbt_rec_host_t *r, 212 - xfs_fsblock_t v) 213 - { 214 - ASSERT((v & xfs_mask64hi(12)) == 0); 215 - r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) | 216 - (xfs_bmbt_rec_base_t)(v >> 43); 217 - r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) | 218 - (xfs_bmbt_rec_base_t)(v << 21); 219 - } 220 - 221 - /* 222 - * Set the startoff field in a bmap extent record. 223 - */ 224 - void 225 - xfs_bmbt_set_startoff( 226 - xfs_bmbt_rec_host_t *r, 227 - xfs_fileoff_t v) 228 - { 229 - ASSERT((v & xfs_mask64hi(9)) == 0); 230 - r->l0 = (r->l0 & (xfs_bmbt_rec_base_t) xfs_mask64hi(1)) | 231 - ((xfs_bmbt_rec_base_t)v << 9) | 232 - (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64lo(9)); 233 - } 234 - 235 - /* 236 - * Set the extent state field in a bmap extent record. 237 - */ 238 - void 239 - xfs_bmbt_set_state( 240 - xfs_bmbt_rec_host_t *r, 241 - xfs_exntst_t v) 242 - { 243 - ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN); 244 - if (v == XFS_EXT_NORM) 245 - r->l0 &= xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN); 246 - else 247 - r->l0 |= xfs_mask64hi(BMBT_EXNTFLAG_BITLEN); 206 + put_unaligned_be64( 207 + ((xfs_bmbt_rec_base_t)extent_flag << 63) | 208 + ((xfs_bmbt_rec_base_t)s->br_startoff << 9) | 209 + ((xfs_bmbt_rec_base_t)s->br_startblock >> 43), &r->l0); 210 + put_unaligned_be64( 211 + ((xfs_bmbt_rec_base_t)s->br_startblock << 21) | 212 + ((xfs_bmbt_rec_base_t)s->br_blockcount & 213 + (xfs_bmbt_rec_base_t)xfs_mask64lo(21)), &r->l1); 248 214 } 249 215 250 216 /*

+4 -18

fs/xfs/libxfs/xfs_bmap_btree.h

··· 98 98 */ 99 99 extern void xfs_bmdr_to_bmbt(struct xfs_inode *, xfs_bmdr_block_t *, int, 100 100 struct xfs_btree_block *, int); 101 - extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); 102 - extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r); 103 - extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); 104 - extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); 105 - extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r); 106 101 102 + void xfs_bmbt_disk_set_all(struct xfs_bmbt_rec *r, struct xfs_bmbt_irec *s); 107 103 extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); 108 104 extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); 109 - 110 - extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); 111 - extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o, 112 - xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); 113 - extern void xfs_bmbt_set_blockcount(xfs_bmbt_rec_host_t *r, xfs_filblks_t v); 114 - extern void xfs_bmbt_set_startblock(xfs_bmbt_rec_host_t *r, xfs_fsblock_t v); 115 - extern void xfs_bmbt_set_startoff(xfs_bmbt_rec_host_t *r, xfs_fileoff_t v); 116 - extern void xfs_bmbt_set_state(xfs_bmbt_rec_host_t *r, xfs_exntst_t v); 117 - 118 - extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, 119 - xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); 105 + extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); 120 106 121 107 extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int, 122 108 xfs_bmdr_block_t *, int); ··· 122 136 * Check that the extent does not contain an invalid unwritten extent flag. 123 137 */ 124 138 static inline bool xfs_bmbt_validate_extent(struct xfs_mount *mp, int whichfork, 125 - struct xfs_bmbt_rec_host *ep) 139 + struct xfs_bmbt_irec *irec) 126 140 { 127 - if (ep->l0 >> (64 - BMBT_EXNTFLAG_BITLEN) == 0) 141 + if (irec->br_state == XFS_EXT_NORM) 128 142 return true; 129 143 if (whichfork == XFS_DATA_FORK && 130 144 xfs_sb_version_hasextflgbit(&mp->m_sb))

+150 -111

fs/xfs/libxfs/xfs_btree.c

··· 29 29 #include "xfs_inode_item.h" 30 30 #include "xfs_buf_item.h" 31 31 #include "xfs_btree.h" 32 + #include "xfs_errortag.h" 32 33 #include "xfs_error.h" 33 34 #include "xfs_trace.h" 34 35 #include "xfs_cksum.h" ··· 64 63 return magic; 65 64 } 66 65 67 - STATIC int /* error (0 or EFSCORRUPTED) */ 68 - xfs_btree_check_lblock( 69 - struct xfs_btree_cur *cur, /* btree cursor */ 70 - struct xfs_btree_block *block, /* btree long form block pointer */ 71 - int level, /* level of the btree block */ 72 - struct xfs_buf *bp) /* buffer for block, if any */ 66 + /* 67 + * Check a long btree block header. Return the address of the failing check, 68 + * or NULL if everything is ok. 69 + */ 70 + xfs_failaddr_t 71 + __xfs_btree_check_lblock( 72 + struct xfs_btree_cur *cur, 73 + struct xfs_btree_block *block, 74 + int level, 75 + struct xfs_buf *bp) 73 76 { 74 - int lblock_ok = 1; /* block passes checks */ 75 - struct xfs_mount *mp; /* file system mount point */ 77 + struct xfs_mount *mp = cur->bc_mp; 76 78 xfs_btnum_t btnum = cur->bc_btnum; 77 - int crc; 78 - 79 - mp = cur->bc_mp; 80 - crc = xfs_sb_version_hascrc(&mp->m_sb); 79 + int crc = xfs_sb_version_hascrc(&mp->m_sb); 81 80 82 81 if (crc) { 83 - lblock_ok = lblock_ok && 84 - uuid_equal(&block->bb_u.l.bb_uuid, 85 - &mp->m_sb.sb_meta_uuid) && 86 - block->bb_u.l.bb_blkno == cpu_to_be64( 87 - bp ? bp->b_bn : XFS_BUF_DADDR_NULL); 82 + if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) 83 + return __this_address; 84 + if (block->bb_u.l.bb_blkno != 85 + cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL)) 86 + return __this_address; 87 + if (block->bb_u.l.bb_pad != cpu_to_be32(0)) 88 + return __this_address; 88 89 } 89 90 90 - lblock_ok = lblock_ok && 91 - be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) && 92 - be16_to_cpu(block->bb_level) == level && 93 - be16_to_cpu(block->bb_numrecs) <= 94 - cur->bc_ops->get_maxrecs(cur, level) && 95 - block->bb_u.l.bb_leftsib && 96 - (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) || 97 - XFS_FSB_SANITY_CHECK(mp, 98 - be64_to_cpu(block->bb_u.l.bb_leftsib))) && 99 - block->bb_u.l.bb_rightsib && 100 - (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) || 101 - XFS_FSB_SANITY_CHECK(mp, 102 - be64_to_cpu(block->bb_u.l.bb_rightsib))); 91 + if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) 92 + return __this_address; 93 + if (be16_to_cpu(block->bb_level) != level) 94 + return __this_address; 95 + if (be16_to_cpu(block->bb_numrecs) > 96 + cur->bc_ops->get_maxrecs(cur, level)) 97 + return __this_address; 98 + if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && 99 + !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_leftsib), 100 + level + 1)) 101 + return __this_address; 102 + if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && 103 + !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_rightsib), 104 + level + 1)) 105 + return __this_address; 103 106 104 - if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, 107 + return NULL; 108 + } 109 + 110 + /* Check a long btree block header. */ 111 + static int 112 + xfs_btree_check_lblock( 113 + struct xfs_btree_cur *cur, 114 + struct xfs_btree_block *block, 115 + int level, 116 + struct xfs_buf *bp) 117 + { 118 + struct xfs_mount *mp = cur->bc_mp; 119 + xfs_failaddr_t fa; 120 + 121 + fa = __xfs_btree_check_lblock(cur, block, level, bp); 122 + if (unlikely(XFS_TEST_ERROR(fa != NULL, mp, 105 123 XFS_ERRTAG_BTREE_CHECK_LBLOCK))) { 106 124 if (bp) 107 125 trace_xfs_btree_corrupt(bp, _RET_IP_); ··· 130 110 return 0; 131 111 } 132 112 133 - STATIC int /* error (0 or EFSCORRUPTED) */ 134 - xfs_btree_check_sblock( 135 - struct xfs_btree_cur *cur, /* btree cursor */ 136 - struct xfs_btree_block *block, /* btree short form block pointer */ 137 - int level, /* level of the btree block */ 138 - struct xfs_buf *bp) /* buffer containing block */ 113 + /* 114 + * Check a short btree block header. Return the address of the failing check, 115 + * or NULL if everything is ok. 116 + */ 117 + xfs_failaddr_t 118 + __xfs_btree_check_sblock( 119 + struct xfs_btree_cur *cur, 120 + struct xfs_btree_block *block, 121 + int level, 122 + struct xfs_buf *bp) 139 123 { 140 - struct xfs_mount *mp; /* file system mount point */ 141 - struct xfs_buf *agbp; /* buffer for ag. freespace struct */ 142 - struct xfs_agf *agf; /* ag. freespace structure */ 143 - xfs_agblock_t agflen; /* native ag. freespace length */ 144 - int sblock_ok = 1; /* block passes checks */ 124 + struct xfs_mount *mp = cur->bc_mp; 145 125 xfs_btnum_t btnum = cur->bc_btnum; 146 - int crc; 147 - 148 - mp = cur->bc_mp; 149 - crc = xfs_sb_version_hascrc(&mp->m_sb); 150 - agbp = cur->bc_private.a.agbp; 151 - agf = XFS_BUF_TO_AGF(agbp); 152 - agflen = be32_to_cpu(agf->agf_length); 126 + int crc = xfs_sb_version_hascrc(&mp->m_sb); 153 127 154 128 if (crc) { 155 - sblock_ok = sblock_ok && 156 - uuid_equal(&block->bb_u.s.bb_uuid, 157 - &mp->m_sb.sb_meta_uuid) && 158 - block->bb_u.s.bb_blkno == cpu_to_be64( 159 - bp ? bp->b_bn : XFS_BUF_DADDR_NULL); 129 + if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 130 + return __this_address; 131 + if (block->bb_u.s.bb_blkno != 132 + cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL)) 133 + return __this_address; 160 134 } 161 135 162 - sblock_ok = sblock_ok && 163 - be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) && 164 - be16_to_cpu(block->bb_level) == level && 165 - be16_to_cpu(block->bb_numrecs) <= 166 - cur->bc_ops->get_maxrecs(cur, level) && 167 - (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || 168 - be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) && 169 - block->bb_u.s.bb_leftsib && 170 - (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || 171 - be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && 172 - block->bb_u.s.bb_rightsib; 136 + if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) 137 + return __this_address; 138 + if (be16_to_cpu(block->bb_level) != level) 139 + return __this_address; 140 + if (be16_to_cpu(block->bb_numrecs) > 141 + cur->bc_ops->get_maxrecs(cur, level)) 142 + return __this_address; 143 + if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && 144 + !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_leftsib), 145 + level + 1)) 146 + return __this_address; 147 + if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && 148 + !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_rightsib), 149 + level + 1)) 150 + return __this_address; 173 151 174 - if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp, 152 + return NULL; 153 + } 154 + 155 + /* Check a short btree block header. */ 156 + STATIC int 157 + xfs_btree_check_sblock( 158 + struct xfs_btree_cur *cur, 159 + struct xfs_btree_block *block, 160 + int level, 161 + struct xfs_buf *bp) 162 + { 163 + struct xfs_mount *mp = cur->bc_mp; 164 + xfs_failaddr_t fa; 165 + 166 + fa = __xfs_btree_check_sblock(cur, block, level, bp); 167 + if (unlikely(XFS_TEST_ERROR(fa != NULL, mp, 175 168 XFS_ERRTAG_BTREE_CHECK_SBLOCK))) { 176 169 if (bp) 177 170 trace_xfs_btree_corrupt(bp, _RET_IP_); ··· 210 177 return xfs_btree_check_sblock(cur, block, level, bp); 211 178 } 212 179 213 - /* 214 - * Check that (long) pointer is ok. 215 - */ 216 - int /* error (0 or EFSCORRUPTED) */ 180 + /* Check that this long pointer is valid and points within the fs. */ 181 + bool 217 182 xfs_btree_check_lptr( 218 - struct xfs_btree_cur *cur, /* btree cursor */ 219 - xfs_fsblock_t bno, /* btree block disk address */ 220 - int level) /* btree block level */ 183 + struct xfs_btree_cur *cur, 184 + xfs_fsblock_t fsbno, 185 + int level) 221 186 { 222 - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 223 - level > 0 && 224 - bno != NULLFSBLOCK && 225 - XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); 226 - return 0; 187 + if (level <= 0) 188 + return false; 189 + return xfs_verify_fsbno(cur->bc_mp, fsbno); 190 + } 191 + 192 + /* Check that this short pointer is valid and points within the AG. */ 193 + bool 194 + xfs_btree_check_sptr( 195 + struct xfs_btree_cur *cur, 196 + xfs_agblock_t agbno, 197 + int level) 198 + { 199 + if (level <= 0) 200 + return false; 201 + return xfs_verify_agbno(cur->bc_mp, cur->bc_private.a.agno, agbno); 227 202 } 228 203 229 204 #ifdef DEBUG 230 205 /* 231 - * Check that (short) pointer is ok. 206 + * Check that a given (indexed) btree pointer at a certain level of a 207 + * btree is valid and doesn't point past where it should. 232 208 */ 233 - STATIC int /* error (0 or EFSCORRUPTED) */ 234 - xfs_btree_check_sptr( 235 - struct xfs_btree_cur *cur, /* btree cursor */ 236 - xfs_agblock_t bno, /* btree block disk address */ 237 - int level) /* btree block level */ 238 - { 239 - xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks; 240 - 241 - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 242 - level > 0 && 243 - bno != NULLAGBLOCK && 244 - bno != 0 && 245 - bno < agblocks); 246 - return 0; 247 - } 248 - 249 - /* 250 - * Check that block ptr is ok. 251 - */ 252 - STATIC int /* error (0 or EFSCORRUPTED) */ 209 + static int 253 210 xfs_btree_check_ptr( 254 - struct xfs_btree_cur *cur, /* btree cursor */ 255 - union xfs_btree_ptr *ptr, /* btree block disk address */ 256 - int index, /* offset from ptr to check */ 257 - int level) /* btree block level */ 211 + struct xfs_btree_cur *cur, 212 + union xfs_btree_ptr *ptr, 213 + int index, 214 + int level) 258 215 { 259 216 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 260 - return xfs_btree_check_lptr(cur, 261 - be64_to_cpu((&ptr->l)[index]), level); 217 + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 218 + xfs_btree_check_lptr(cur, 219 + be64_to_cpu((&ptr->l)[index]), level)); 262 220 } else { 263 - return xfs_btree_check_sptr(cur, 264 - be32_to_cpu((&ptr->s)[index]), level); 221 + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 222 + xfs_btree_check_sptr(cur, 223 + be32_to_cpu((&ptr->s)[index]), level)); 265 224 } 225 + 226 + return 0; 266 227 } 267 228 #endif 268 229 ··· 1054 1027 } 1055 1028 } 1056 1029 1057 - STATIC int 1030 + bool 1058 1031 xfs_btree_ptr_is_null( 1059 1032 struct xfs_btree_cur *cur, 1060 1033 union xfs_btree_ptr *ptr) ··· 1079 1052 /* 1080 1053 * Get/set/init sibling pointers 1081 1054 */ 1082 - STATIC void 1055 + void 1083 1056 xfs_btree_get_sibling( 1084 1057 struct xfs_btree_cur *cur, 1085 1058 struct xfs_btree_block *block, ··· 2028 2001 } 2029 2002 2030 2003 /* Find the high key storage area from a regular key. */ 2031 - STATIC union xfs_btree_key * 2004 + union xfs_btree_key * 2032 2005 xfs_btree_high_key_from_key( 2033 2006 struct xfs_btree_cur *cur, 2034 2007 union xfs_btree_key *key) ··· 2102 2075 } 2103 2076 2104 2077 /* Derive the keys for any btree block. */ 2105 - STATIC void 2078 + void 2106 2079 xfs_btree_get_keys( 2107 2080 struct xfs_btree_cur *cur, 2108 2081 struct xfs_btree_block *block, ··· 4940 4913 *blocks = 0; 4941 4914 return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper, 4942 4915 blocks); 4916 + } 4917 + 4918 + /* Compare two btree pointers. */ 4919 + int64_t 4920 + xfs_btree_diff_two_ptrs( 4921 + struct xfs_btree_cur *cur, 4922 + const union xfs_btree_ptr *a, 4923 + const union xfs_btree_ptr *b) 4924 + { 4925 + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 4926 + return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l); 4927 + return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); 4943 4928 }

+30 -2

fs/xfs/libxfs/xfs_btree.h

··· 255 255 */ 256 256 #define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)((bp)->b_addr)) 257 257 258 + /* 259 + * Internal long and short btree block checks. They return NULL if the 260 + * block is ok or the address of the failed check otherwise. 261 + */ 262 + xfs_failaddr_t __xfs_btree_check_lblock(struct xfs_btree_cur *cur, 263 + struct xfs_btree_block *block, int level, struct xfs_buf *bp); 264 + xfs_failaddr_t __xfs_btree_check_sblock(struct xfs_btree_cur *cur, 265 + struct xfs_btree_block *block, int level, struct xfs_buf *bp); 258 266 259 267 /* 260 268 * Check that block header is ok. ··· 277 269 /* 278 270 * Check that (long) pointer is ok. 279 271 */ 280 - int /* error (0 or EFSCORRUPTED) */ 272 + bool /* error (0 or EFSCORRUPTED) */ 281 273 xfs_btree_check_lptr( 282 274 struct xfs_btree_cur *cur, /* btree cursor */ 283 - xfs_fsblock_t ptr, /* btree block disk address */ 275 + xfs_fsblock_t fsbno, /* btree block disk address */ 276 + int level); /* btree block level */ 277 + 278 + /* 279 + * Check that (short) pointer is ok. 280 + */ 281 + bool /* error (0 or EFSCORRUPTED) */ 282 + xfs_btree_check_sptr( 283 + struct xfs_btree_cur *cur, /* btree cursor */ 284 + xfs_agblock_t agbno, /* btree block disk address */ 284 285 int level); /* btree block level */ 285 286 286 287 /* ··· 534 517 union xfs_btree_ptr *pp, struct xfs_btree_block **blkp); 535 518 struct xfs_btree_block *xfs_btree_get_block(struct xfs_btree_cur *cur, 536 519 int level, struct xfs_buf **bpp); 520 + bool xfs_btree_ptr_is_null(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr); 521 + int64_t xfs_btree_diff_two_ptrs(struct xfs_btree_cur *cur, 522 + const union xfs_btree_ptr *a, 523 + const union xfs_btree_ptr *b); 524 + void xfs_btree_get_sibling(struct xfs_btree_cur *cur, 525 + struct xfs_btree_block *block, 526 + union xfs_btree_ptr *ptr, int lr); 527 + void xfs_btree_get_keys(struct xfs_btree_cur *cur, 528 + struct xfs_btree_block *block, union xfs_btree_key *key); 529 + union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur, 530 + union xfs_btree_key *key); 537 531 538 532 #endif /* __XFS_BTREE_H__ */

+21 -1

fs/xfs/libxfs/xfs_da_btree.c

··· 1466 1466 int max; 1467 1467 int error; 1468 1468 int retval; 1469 + unsigned int expected_level = 0; 1469 1470 struct xfs_inode *dp = state->args->dp; 1470 1471 1471 1472 args = state->args; ··· 1475 1474 * Descend thru the B-tree searching each level for the right 1476 1475 * node to use, until the right hashval is found. 1477 1476 */ 1478 - blkno = (args->whichfork == XFS_DATA_FORK)? args->geo->leafblk : 0; 1477 + blkno = args->geo->leafblk; 1479 1478 for (blk = &state->path.blk[0], state->path.active = 1; 1480 1479 state->path.active <= XFS_DA_NODE_MAXDEPTH; 1481 1480 blk++, state->path.active++) { ··· 1517 1516 node = blk->bp->b_addr; 1518 1517 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1519 1518 btree = dp->d_ops->node_tree_p(node); 1519 + 1520 + /* Tree taller than we can handle; bail out! */ 1521 + if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) 1522 + return -EFSCORRUPTED; 1523 + 1524 + /* Check the level from the root. */ 1525 + if (blkno == args->geo->leafblk) 1526 + expected_level = nodehdr.level - 1; 1527 + else if (expected_level != nodehdr.level) 1528 + return -EFSCORRUPTED; 1529 + else 1530 + expected_level--; 1520 1531 1521 1532 max = nodehdr.count; 1522 1533 blk->hashval = be32_to_cpu(btree[max - 1].hashval); ··· 1575 1562 blk->index = probe; 1576 1563 blkno = be32_to_cpu(btree[probe].before); 1577 1564 } 1565 + 1566 + /* We can't point back to the root. */ 1567 + if (blkno == args->geo->leafblk) 1568 + return -EFSCORRUPTED; 1578 1569 } 1570 + 1571 + if (expected_level != 0) 1572 + return -EFSCORRUPTED; 1579 1573 1580 1574 /* 1581 1575 * A leaf block that ends in the hashval that we are interested in

+6 -16

fs/xfs/libxfs/xfs_dir2.c

··· 30 30 #include "xfs_bmap.h" 31 31 #include "xfs_dir2.h" 32 32 #include "xfs_dir2_priv.h" 33 + #include "xfs_ialloc.h" 34 + #include "xfs_errortag.h" 33 35 #include "xfs_error.h" 34 36 #include "xfs_trace.h" 35 37 ··· 40 38 /* 41 39 * Convert inode mode to directory entry filetype 42 40 */ 43 - unsigned char xfs_mode_to_ftype(int mode) 41 + unsigned char 42 + xfs_mode_to_ftype( 43 + int mode) 44 44 { 45 45 switch (mode & S_IFMT) { 46 46 case S_IFREG: ··· 206 202 xfs_mount_t *mp, 207 203 xfs_ino_t ino) 208 204 { 209 - xfs_agblock_t agblkno; 210 - xfs_agino_t agino; 211 - xfs_agnumber_t agno; 212 - int ino_ok; 213 - int ioff; 205 + bool ino_ok = xfs_verify_dir_ino(mp, ino); 214 206 215 - agno = XFS_INO_TO_AGNO(mp, ino); 216 - agblkno = XFS_INO_TO_AGBNO(mp, ino); 217 - ioff = XFS_INO_TO_OFFSET(mp, ino); 218 - agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff); 219 - ino_ok = 220 - agno < mp->m_sb.sb_agcount && 221 - agblkno < mp->m_sb.sb_agblocks && 222 - agblkno != 0 && 223 - ioff < (1 << mp->m_sb.sb_inopblog) && 224 - XFS_AGINO_TO_INO(mp, agno, agino) == ino; 225 207 if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE))) { 226 208 xfs_warn(mp, "Invalid inode number 0x%Lx", 227 209 (unsigned long long) ino);

+17

fs/xfs/libxfs/xfs_dir2.h

··· 324 324 sizeof(struct xfs_dir2_leaf_tail)); 325 325 } 326 326 327 + /* 328 + * The Linux API doesn't pass down the total size of the buffer 329 + * we read into down to the filesystem. With the filldir concept 330 + * it's not needed for correct information, but the XFS dir2 leaf 331 + * code wants an estimate of the buffer size to calculate it's 332 + * readahead window and size the buffers used for mapping to 333 + * physical blocks. 334 + * 335 + * Try to give it an estimate that's good enough, maybe at some 336 + * point we can change the ->readdir prototype to include the 337 + * buffer size. For now we use the current glibc buffer size. 338 + * musl libc hardcodes 2k and dietlibc uses PAGE_SIZE. 339 + */ 340 + #define XFS_READDIR_BUFSIZE (32768) 341 + 342 + unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, uint8_t filetype); 343 + 327 344 #endif /* __XFS_DIR2_H__ */

+106

fs/xfs/libxfs/xfs_errortag.h

··· 1 + /* 2 + * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 3 + * Copyright (C) 2017 Oracle. 4 + * All Rights Reserved. 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #ifndef __XFS_ERRORTAG_H_ 21 + #define __XFS_ERRORTAG_H_ 22 + 23 + /* 24 + * error injection tags - the labels can be anything you want 25 + * but each tag should have its own unique number 26 + */ 27 + 28 + #define XFS_ERRTAG_NOERROR 0 29 + #define XFS_ERRTAG_IFLUSH_1 1 30 + #define XFS_ERRTAG_IFLUSH_2 2 31 + #define XFS_ERRTAG_IFLUSH_3 3 32 + #define XFS_ERRTAG_IFLUSH_4 4 33 + #define XFS_ERRTAG_IFLUSH_5 5 34 + #define XFS_ERRTAG_IFLUSH_6 6 35 + #define XFS_ERRTAG_DA_READ_BUF 7 36 + #define XFS_ERRTAG_BTREE_CHECK_LBLOCK 8 37 + #define XFS_ERRTAG_BTREE_CHECK_SBLOCK 9 38 + #define XFS_ERRTAG_ALLOC_READ_AGF 10 39 + #define XFS_ERRTAG_IALLOC_READ_AGI 11 40 + #define XFS_ERRTAG_ITOBP_INOTOBP 12 41 + #define XFS_ERRTAG_IUNLINK 13 42 + #define XFS_ERRTAG_IUNLINK_REMOVE 14 43 + #define XFS_ERRTAG_DIR_INO_VALIDATE 15 44 + #define XFS_ERRTAG_BULKSTAT_READ_CHUNK 16 45 + #define XFS_ERRTAG_IODONE_IOERR 17 46 + #define XFS_ERRTAG_STRATREAD_IOERR 18 47 + #define XFS_ERRTAG_STRATCMPL_IOERR 19 48 + #define XFS_ERRTAG_DIOWRITE_IOERR 20 49 + #define XFS_ERRTAG_BMAPIFORMAT 21 50 + #define XFS_ERRTAG_FREE_EXTENT 22 51 + #define XFS_ERRTAG_RMAP_FINISH_ONE 23 52 + #define XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE 24 53 + #define XFS_ERRTAG_REFCOUNT_FINISH_ONE 25 54 + #define XFS_ERRTAG_BMAP_FINISH_ONE 26 55 + #define XFS_ERRTAG_AG_RESV_CRITICAL 27 56 + /* 57 + * DEBUG mode instrumentation to test and/or trigger delayed allocation 58 + * block killing in the event of failed writes. When enabled, all 59 + * buffered writes are silenty dropped and handled as if they failed. 60 + * All delalloc blocks in the range of the write (including pre-existing 61 + * delalloc blocks!) are tossed as part of the write failure error 62 + * handling sequence. 63 + */ 64 + #define XFS_ERRTAG_DROP_WRITES 28 65 + #define XFS_ERRTAG_LOG_BAD_CRC 29 66 + #define XFS_ERRTAG_LOG_ITEM_PIN 30 67 + #define XFS_ERRTAG_BUF_LRU_REF 31 68 + #define XFS_ERRTAG_MAX 32 69 + 70 + /* 71 + * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. 72 + */ 73 + #define XFS_RANDOM_DEFAULT 100 74 + #define XFS_RANDOM_IFLUSH_1 XFS_RANDOM_DEFAULT 75 + #define XFS_RANDOM_IFLUSH_2 XFS_RANDOM_DEFAULT 76 + #define XFS_RANDOM_IFLUSH_3 XFS_RANDOM_DEFAULT 77 + #define XFS_RANDOM_IFLUSH_4 XFS_RANDOM_DEFAULT 78 + #define XFS_RANDOM_IFLUSH_5 XFS_RANDOM_DEFAULT 79 + #define XFS_RANDOM_IFLUSH_6 XFS_RANDOM_DEFAULT 80 + #define XFS_RANDOM_DA_READ_BUF XFS_RANDOM_DEFAULT 81 + #define XFS_RANDOM_BTREE_CHECK_LBLOCK (XFS_RANDOM_DEFAULT/4) 82 + #define XFS_RANDOM_BTREE_CHECK_SBLOCK XFS_RANDOM_DEFAULT 83 + #define XFS_RANDOM_ALLOC_READ_AGF XFS_RANDOM_DEFAULT 84 + #define XFS_RANDOM_IALLOC_READ_AGI XFS_RANDOM_DEFAULT 85 + #define XFS_RANDOM_ITOBP_INOTOBP XFS_RANDOM_DEFAULT 86 + #define XFS_RANDOM_IUNLINK XFS_RANDOM_DEFAULT 87 + #define XFS_RANDOM_IUNLINK_REMOVE XFS_RANDOM_DEFAULT 88 + #define XFS_RANDOM_DIR_INO_VALIDATE XFS_RANDOM_DEFAULT 89 + #define XFS_RANDOM_BULKSTAT_READ_CHUNK XFS_RANDOM_DEFAULT 90 + #define XFS_RANDOM_IODONE_IOERR (XFS_RANDOM_DEFAULT/10) 91 + #define XFS_RANDOM_STRATREAD_IOERR (XFS_RANDOM_DEFAULT/10) 92 + #define XFS_RANDOM_STRATCMPL_IOERR (XFS_RANDOM_DEFAULT/10) 93 + #define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10) 94 + #define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT 95 + #define XFS_RANDOM_FREE_EXTENT 1 96 + #define XFS_RANDOM_RMAP_FINISH_ONE 1 97 + #define XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE 1 98 + #define XFS_RANDOM_REFCOUNT_FINISH_ONE 1 99 + #define XFS_RANDOM_BMAP_FINISH_ONE 1 100 + #define XFS_RANDOM_AG_RESV_CRITICAL 4 101 + #define XFS_RANDOM_DROP_WRITES 1 102 + #define XFS_RANDOM_LOG_BAD_CRC 1 103 + #define XFS_RANDOM_LOG_ITEM_PIN 1 104 + #define XFS_RANDOM_BUF_LRU_REF 2 105 + 106 + #endif /* __XFS_ERRORTAG_H_ */

+10 -27

fs/xfs/libxfs/xfs_format.h

··· 315 315 return false; 316 316 } 317 317 318 + static inline bool xfs_sb_version_hasrealtime(struct xfs_sb *sbp) 319 + { 320 + return sbp->sb_rblocks > 0; 321 + } 322 + 318 323 /* 319 324 * Detect a mismatched features2 field. Older kernels read/wrote 320 325 * this into the wrong slot, so to be safe we keep them in sync. ··· 505 500 /* 506 501 * V5 superblock specific feature checks 507 502 */ 508 - static inline int xfs_sb_version_hascrc(struct xfs_sb *sbp) 503 + static inline bool xfs_sb_version_hascrc(struct xfs_sb *sbp) 509 504 { 510 505 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; 511 506 } 512 507 513 - static inline int xfs_sb_version_has_pquotino(struct xfs_sb *sbp) 508 + static inline bool xfs_sb_version_has_pquotino(struct xfs_sb *sbp) 514 509 { 515 510 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; 516 511 } ··· 523 518 (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE)); 524 519 } 525 520 526 - static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp) 521 + static inline bool xfs_sb_version_hasfinobt(xfs_sb_t *sbp) 527 522 { 528 523 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) && 529 524 (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT); ··· 946 941 XFS_DINODE_FMT_LOCAL, /* bulk data */ 947 942 XFS_DINODE_FMT_EXTENTS, /* struct xfs_bmbt_rec */ 948 943 XFS_DINODE_FMT_BTREE, /* struct xfs_bmdr_block */ 949 - XFS_DINODE_FMT_UUID /* uuid_t */ 944 + XFS_DINODE_FMT_UUID /* added long ago, but never used */ 950 945 } xfs_dinode_fmt_t; 951 946 952 947 /* ··· 1147 1142 * Dquot and dquot block format definitions 1148 1143 */ 1149 1144 #define XFS_DQUOT_MAGIC 0x4451 /* 'DQ' */ 1150 - #define XFS_DQUOT_VERSION (u_int8_t)0x01 /* latest version number */ 1145 + #define XFS_DQUOT_VERSION (uint8_t)0x01 /* latest version number */ 1151 1146 1152 1147 /* 1153 1148 * This is the main portion of the on-disk representation of quota ··· 1553 1548 typedef uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ 1554 1549 typedef xfs_bmbt_rec_t xfs_bmdr_rec_t; 1555 1550 1556 - typedef struct xfs_bmbt_rec_host { 1557 - uint64_t l0, l1; 1558 - } xfs_bmbt_rec_host_t; 1559 - 1560 1551 /* 1561 1552 * Values and macros for delayed-allocation startblock fields. 1562 1553 */ ··· 1576 1575 { 1577 1576 return (xfs_filblks_t)((x) & ~STARTBLOCKMASK); 1578 1577 } 1579 - 1580 - /* 1581 - * Possible extent states. 1582 - */ 1583 - typedef enum { 1584 - XFS_EXT_NORM, XFS_EXT_UNWRITTEN, 1585 - } xfs_exntst_t; 1586 - 1587 - /* 1588 - * Incore version of above. 1589 - */ 1590 - typedef struct xfs_bmbt_irec 1591 - { 1592 - xfs_fileoff_t br_startoff; /* starting file offset */ 1593 - xfs_fsblock_t br_startblock; /* starting block number */ 1594 - xfs_filblks_t br_blockcount; /* number of blocks */ 1595 - xfs_exntst_t br_state; /* extent state */ 1596 - } xfs_bmbt_irec_t; 1597 1578 1598 1579 /* 1599 1580 * Key structure for non-leaf levels of the tree.

+77

fs/xfs/libxfs/xfs_fs.h

··· 468 468 #define XFS_FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ 469 469 #define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ 470 470 471 + /* metadata scrubbing */ 472 + struct xfs_scrub_metadata { 473 + __u32 sm_type; /* What to check? */ 474 + __u32 sm_flags; /* flags; see below. */ 475 + __u64 sm_ino; /* inode number. */ 476 + __u32 sm_gen; /* inode generation. */ 477 + __u32 sm_agno; /* ag number. */ 478 + __u64 sm_reserved[5]; /* pad to 64 bytes */ 479 + }; 480 + 481 + /* 482 + * Metadata types and flags for scrub operation. 483 + */ 484 + 485 + /* Scrub subcommands. */ 486 + #define XFS_SCRUB_TYPE_PROBE 0 /* presence test ioctl */ 487 + #define XFS_SCRUB_TYPE_SB 1 /* superblock */ 488 + #define XFS_SCRUB_TYPE_AGF 2 /* AG free header */ 489 + #define XFS_SCRUB_TYPE_AGFL 3 /* AG free list */ 490 + #define XFS_SCRUB_TYPE_AGI 4 /* AG inode header */ 491 + #define XFS_SCRUB_TYPE_BNOBT 5 /* freesp by block btree */ 492 + #define XFS_SCRUB_TYPE_CNTBT 6 /* freesp by length btree */ 493 + #define XFS_SCRUB_TYPE_INOBT 7 /* inode btree */ 494 + #define XFS_SCRUB_TYPE_FINOBT 8 /* free inode btree */ 495 + #define XFS_SCRUB_TYPE_RMAPBT 9 /* reverse mapping btree */ 496 + #define XFS_SCRUB_TYPE_REFCNTBT 10 /* reference count btree */ 497 + #define XFS_SCRUB_TYPE_INODE 11 /* inode record */ 498 + #define XFS_SCRUB_TYPE_BMBTD 12 /* data fork block mapping */ 499 + #define XFS_SCRUB_TYPE_BMBTA 13 /* attr fork block mapping */ 500 + #define XFS_SCRUB_TYPE_BMBTC 14 /* CoW fork block mapping */ 501 + #define XFS_SCRUB_TYPE_DIR 15 /* directory */ 502 + #define XFS_SCRUB_TYPE_XATTR 16 /* extended attribute */ 503 + #define XFS_SCRUB_TYPE_SYMLINK 17 /* symbolic link */ 504 + #define XFS_SCRUB_TYPE_PARENT 18 /* parent pointers */ 505 + #define XFS_SCRUB_TYPE_RTBITMAP 19 /* realtime bitmap */ 506 + #define XFS_SCRUB_TYPE_RTSUM 20 /* realtime summary */ 507 + #define XFS_SCRUB_TYPE_UQUOTA 21 /* user quotas */ 508 + #define XFS_SCRUB_TYPE_GQUOTA 22 /* group quotas */ 509 + #define XFS_SCRUB_TYPE_PQUOTA 23 /* project quotas */ 510 + 511 + /* Number of scrub subcommands. */ 512 + #define XFS_SCRUB_TYPE_NR 24 513 + 514 + /* i: Repair this metadata. */ 515 + #define XFS_SCRUB_IFLAG_REPAIR (1 << 0) 516 + 517 + /* o: Metadata object needs repair. */ 518 + #define XFS_SCRUB_OFLAG_CORRUPT (1 << 1) 519 + 520 + /* 521 + * o: Metadata object could be optimized. It's not corrupt, but 522 + * we could improve on it somehow. 523 + */ 524 + #define XFS_SCRUB_OFLAG_PREEN (1 << 2) 525 + 526 + /* o: Cross-referencing failed. */ 527 + #define XFS_SCRUB_OFLAG_XFAIL (1 << 3) 528 + 529 + /* o: Metadata object disagrees with cross-referenced metadata. */ 530 + #define XFS_SCRUB_OFLAG_XCORRUPT (1 << 4) 531 + 532 + /* o: Scan was not complete. */ 533 + #define XFS_SCRUB_OFLAG_INCOMPLETE (1 << 5) 534 + 535 + /* o: Metadata object looked funny but isn't corrupt. */ 536 + #define XFS_SCRUB_OFLAG_WARNING (1 << 6) 537 + 538 + #define XFS_SCRUB_FLAGS_IN (XFS_SCRUB_IFLAG_REPAIR) 539 + #define XFS_SCRUB_FLAGS_OUT (XFS_SCRUB_OFLAG_CORRUPT | \ 540 + XFS_SCRUB_OFLAG_PREEN | \ 541 + XFS_SCRUB_OFLAG_XFAIL | \ 542 + XFS_SCRUB_OFLAG_XCORRUPT | \ 543 + XFS_SCRUB_OFLAG_INCOMPLETE | \ 544 + XFS_SCRUB_OFLAG_WARNING) 545 + #define XFS_SCRUB_FLAGS_ALL (XFS_SCRUB_FLAGS_IN | XFS_SCRUB_FLAGS_OUT) 546 + 471 547 /* 472 548 * ioctl limits 473 549 */ ··· 587 511 #define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64) 588 512 #define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks) 589 513 /* XFS_IOC_GETFSMAP ------ hoisted 59 */ 514 + #define XFS_IOC_SCRUB_METADATA _IOWR('X', 60, struct xfs_scrub_metadata) 590 515 591 516 /* 592 517 * ioctl commands that replace IRIX syssgi()'s

+91

fs/xfs/libxfs/xfs_ialloc.c

··· 31 31 #include "xfs_ialloc_btree.h" 32 32 #include "xfs_alloc.h" 33 33 #include "xfs_rtalloc.h" 34 + #include "xfs_errortag.h" 34 35 #include "xfs_error.h" 35 36 #include "xfs_bmap.h" 36 37 #include "xfs_cksum.h" ··· 2664 2663 if (bp) 2665 2664 xfs_trans_brelse(tp, bp); 2666 2665 return 0; 2666 + } 2667 + 2668 + /* Calculate the first and last possible inode number in an AG. */ 2669 + void 2670 + xfs_ialloc_agino_range( 2671 + struct xfs_mount *mp, 2672 + xfs_agnumber_t agno, 2673 + xfs_agino_t *first, 2674 + xfs_agino_t *last) 2675 + { 2676 + xfs_agblock_t bno; 2677 + xfs_agblock_t eoag; 2678 + 2679 + eoag = xfs_ag_block_count(mp, agno); 2680 + 2681 + /* 2682 + * Calculate the first inode, which will be in the first 2683 + * cluster-aligned block after the AGFL. 2684 + */ 2685 + bno = round_up(XFS_AGFL_BLOCK(mp) + 1, 2686 + xfs_ialloc_cluster_alignment(mp)); 2687 + *first = XFS_OFFBNO_TO_AGINO(mp, bno, 0); 2688 + 2689 + /* 2690 + * Calculate the last inode, which will be at the end of the 2691 + * last (aligned) cluster that can be allocated in the AG. 2692 + */ 2693 + bno = round_down(eoag, xfs_ialloc_cluster_alignment(mp)); 2694 + *last = XFS_OFFBNO_TO_AGINO(mp, bno, 0) - 1; 2695 + } 2696 + 2697 + /* 2698 + * Verify that an AG inode number pointer neither points outside the AG 2699 + * nor points at static metadata. 2700 + */ 2701 + bool 2702 + xfs_verify_agino( 2703 + struct xfs_mount *mp, 2704 + xfs_agnumber_t agno, 2705 + xfs_agino_t agino) 2706 + { 2707 + xfs_agino_t first; 2708 + xfs_agino_t last; 2709 + 2710 + xfs_ialloc_agino_range(mp, agno, &first, &last); 2711 + return agino >= first && agino <= last; 2712 + } 2713 + 2714 + /* 2715 + * Verify that an FS inode number pointer neither points outside the 2716 + * filesystem nor points at static AG metadata. 2717 + */ 2718 + bool 2719 + xfs_verify_ino( 2720 + struct xfs_mount *mp, 2721 + xfs_ino_t ino) 2722 + { 2723 + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ino); 2724 + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); 2725 + 2726 + if (agno >= mp->m_sb.sb_agcount) 2727 + return false; 2728 + if (XFS_AGINO_TO_INO(mp, agno, agino) != ino) 2729 + return false; 2730 + return xfs_verify_agino(mp, agno, agino); 2731 + } 2732 + 2733 + /* Is this an internal inode number? */ 2734 + bool 2735 + xfs_internal_inum( 2736 + struct xfs_mount *mp, 2737 + xfs_ino_t ino) 2738 + { 2739 + return ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino || 2740 + (xfs_sb_version_hasquota(&mp->m_sb) && 2741 + xfs_is_quota_inode(&mp->m_sb, ino)); 2742 + } 2743 + 2744 + /* 2745 + * Verify that a directory entry's inode number doesn't point at an internal 2746 + * inode, empty space, or static AG metadata. 2747 + */ 2748 + bool 2749 + xfs_verify_dir_ino( 2750 + struct xfs_mount *mp, 2751 + xfs_ino_t ino) 2752 + { 2753 + if (xfs_internal_inum(mp, ino)) 2754 + return false; 2755 + return xfs_verify_ino(mp, ino); 2667 2756 }

+7

fs/xfs/libxfs/xfs_ialloc.h

··· 173 173 struct xfs_inobt_rec_incore *irec); 174 174 175 175 int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); 176 + void xfs_ialloc_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno, 177 + xfs_agino_t *first, xfs_agino_t *last); 178 + bool xfs_verify_agino(struct xfs_mount *mp, xfs_agnumber_t agno, 179 + xfs_agino_t agino); 180 + bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino); 181 + bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino); 182 + bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino); 176 183 177 184 #endif /* __XFS_IALLOC_H__ */

+1043

fs/xfs/libxfs/xfs_iext_tree.c

··· 1 + /* 2 + * Copyright (c) 2017 Christoph Hellwig. 3 + * 4 + * This program is free software; you can redistribute it and/or modify it 5 + * under the terms and conditions of the GNU General Public License, 6 + * version 2, as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope it will be useful, but WITHOUT 9 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 + * more details. 12 + */ 13 + 14 + #include <linux/cache.h> 15 + #include <linux/kernel.h> 16 + #include <linux/slab.h> 17 + #include "xfs.h" 18 + #include "xfs_format.h" 19 + #include "xfs_bit.h" 20 + #include "xfs_log_format.h" 21 + #include "xfs_inode.h" 22 + #include "xfs_inode_fork.h" 23 + #include "xfs_trans_resv.h" 24 + #include "xfs_mount.h" 25 + #include "xfs_trace.h" 26 + 27 + /* 28 + * In-core extent record layout: 29 + * 30 + * +-------+----------------------------+ 31 + * | 00:53 | all 54 bits of startoff | 32 + * | 54:63 | low 10 bits of startblock | 33 + * +-------+----------------------------+ 34 + * | 00:20 | all 21 bits of length | 35 + * | 21 | unwritten extent bit | 36 + * | 22:63 | high 42 bits of startblock | 37 + * +-------+----------------------------+ 38 + */ 39 + #define XFS_IEXT_STARTOFF_MASK xfs_mask64lo(BMBT_STARTOFF_BITLEN) 40 + #define XFS_IEXT_LENGTH_MASK xfs_mask64lo(BMBT_BLOCKCOUNT_BITLEN) 41 + #define XFS_IEXT_STARTBLOCK_MASK xfs_mask64lo(BMBT_STARTBLOCK_BITLEN) 42 + 43 + struct xfs_iext_rec { 44 + uint64_t lo; 45 + uint64_t hi; 46 + }; 47 + 48 + /* 49 + * Given that the length can't be a zero, only an empty hi value indicates an 50 + * unused record. 51 + */ 52 + static bool xfs_iext_rec_is_empty(struct xfs_iext_rec *rec) 53 + { 54 + return rec->hi == 0; 55 + } 56 + 57 + static inline void xfs_iext_rec_clear(struct xfs_iext_rec *rec) 58 + { 59 + rec->lo = 0; 60 + rec->hi = 0; 61 + } 62 + 63 + static void 64 + xfs_iext_set( 65 + struct xfs_iext_rec *rec, 66 + struct xfs_bmbt_irec *irec) 67 + { 68 + ASSERT((irec->br_startoff & ~XFS_IEXT_STARTOFF_MASK) == 0); 69 + ASSERT((irec->br_blockcount & ~XFS_IEXT_LENGTH_MASK) == 0); 70 + ASSERT((irec->br_startblock & ~XFS_IEXT_STARTBLOCK_MASK) == 0); 71 + 72 + rec->lo = irec->br_startoff & XFS_IEXT_STARTOFF_MASK; 73 + rec->hi = irec->br_blockcount & XFS_IEXT_LENGTH_MASK; 74 + 75 + rec->lo |= (irec->br_startblock << 54); 76 + rec->hi |= ((irec->br_startblock & ~xfs_mask64lo(10)) << (22 - 10)); 77 + 78 + if (irec->br_state == XFS_EXT_UNWRITTEN) 79 + rec->hi |= (1 << 21); 80 + } 81 + 82 + static void 83 + xfs_iext_get( 84 + struct xfs_bmbt_irec *irec, 85 + struct xfs_iext_rec *rec) 86 + { 87 + irec->br_startoff = rec->lo & XFS_IEXT_STARTOFF_MASK; 88 + irec->br_blockcount = rec->hi & XFS_IEXT_LENGTH_MASK; 89 + 90 + irec->br_startblock = rec->lo >> 54; 91 + irec->br_startblock |= (rec->hi & xfs_mask64hi(42)) >> (22 - 10); 92 + 93 + if (rec->hi & (1 << 21)) 94 + irec->br_state = XFS_EXT_UNWRITTEN; 95 + else 96 + irec->br_state = XFS_EXT_NORM; 97 + } 98 + 99 + enum { 100 + NODE_SIZE = 256, 101 + KEYS_PER_NODE = NODE_SIZE / (sizeof(uint64_t) + sizeof(void *)), 102 + RECS_PER_LEAF = (NODE_SIZE - (2 * sizeof(struct xfs_iext_leaf *))) / 103 + sizeof(struct xfs_iext_rec), 104 + }; 105 + 106 + /* 107 + * In-core extent btree block layout: 108 + * 109 + * There are two types of blocks in the btree: leaf and inner (non-leaf) blocks. 110 + * 111 + * The leaf blocks are made up by %KEYS_PER_NODE extent records, which each 112 + * contain the startoffset, blockcount, startblock and unwritten extent flag. 113 + * See above for the exact format, followed by pointers to the previous and next 114 + * leaf blocks (if there are any). 115 + * 116 + * The inner (non-leaf) blocks first contain KEYS_PER_NODE lookup keys, followed 117 + * by an equal number of pointers to the btree blocks at the next lower level. 118 + * 119 + * +-------+-------+-------+-------+-------+----------+----------+ 120 + * Leaf: | rec 1 | rec 2 | rec 3 | rec 4 | rec N | prev-ptr | next-ptr | 121 + * +-------+-------+-------+-------+-------+----------+----------+ 122 + * 123 + * +-------+-------+-------+-------+-------+-------+------+-------+ 124 + * Inner: | key 1 | key 2 | key 3 | key N | ptr 1 | ptr 2 | ptr3 | ptr N | 125 + * +-------+-------+-------+-------+-------+-------+------+-------+ 126 + */ 127 + struct xfs_iext_node { 128 + uint64_t keys[KEYS_PER_NODE]; 129 + #define XFS_IEXT_KEY_INVALID (1ULL << 63) 130 + void *ptrs[KEYS_PER_NODE]; 131 + }; 132 + 133 + struct xfs_iext_leaf { 134 + struct xfs_iext_rec recs[RECS_PER_LEAF]; 135 + struct xfs_iext_leaf *prev; 136 + struct xfs_iext_leaf *next; 137 + }; 138 + 139 + inline xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp) 140 + { 141 + return ifp->if_bytes / sizeof(struct xfs_iext_rec); 142 + } 143 + 144 + static inline int xfs_iext_max_recs(struct xfs_ifork *ifp) 145 + { 146 + if (ifp->if_height == 1) 147 + return xfs_iext_count(ifp); 148 + return RECS_PER_LEAF; 149 + } 150 + 151 + static inline struct xfs_iext_rec *cur_rec(struct xfs_iext_cursor *cur) 152 + { 153 + return &cur->leaf->recs[cur->pos]; 154 + } 155 + 156 + static inline bool xfs_iext_valid(struct xfs_ifork *ifp, 157 + struct xfs_iext_cursor *cur) 158 + { 159 + if (!cur->leaf) 160 + return false; 161 + if (cur->pos < 0 || cur->pos >= xfs_iext_max_recs(ifp)) 162 + return false; 163 + if (xfs_iext_rec_is_empty(cur_rec(cur))) 164 + return false; 165 + return true; 166 + } 167 + 168 + static void * 169 + xfs_iext_find_first_leaf( 170 + struct xfs_ifork *ifp) 171 + { 172 + struct xfs_iext_node *node = ifp->if_u1.if_root; 173 + int height; 174 + 175 + if (!ifp->if_height) 176 + return NULL; 177 + 178 + for (height = ifp->if_height; height > 1; height--) { 179 + node = node->ptrs[0]; 180 + ASSERT(node); 181 + } 182 + 183 + return node; 184 + } 185 + 186 + static void * 187 + xfs_iext_find_last_leaf( 188 + struct xfs_ifork *ifp) 189 + { 190 + struct xfs_iext_node *node = ifp->if_u1.if_root; 191 + int height, i; 192 + 193 + if (!ifp->if_height) 194 + return NULL; 195 + 196 + for (height = ifp->if_height; height > 1; height--) { 197 + for (i = 1; i < KEYS_PER_NODE; i++) 198 + if (!node->ptrs[i]) 199 + break; 200 + node = node->ptrs[i - 1]; 201 + ASSERT(node); 202 + } 203 + 204 + return node; 205 + } 206 + 207 + void 208 + xfs_iext_first( 209 + struct xfs_ifork *ifp, 210 + struct xfs_iext_cursor *cur) 211 + { 212 + cur->pos = 0; 213 + cur->leaf = xfs_iext_find_first_leaf(ifp); 214 + } 215 + 216 + void 217 + xfs_iext_last( 218 + struct xfs_ifork *ifp, 219 + struct xfs_iext_cursor *cur) 220 + { 221 + int i; 222 + 223 + cur->leaf = xfs_iext_find_last_leaf(ifp); 224 + if (!cur->leaf) { 225 + cur->pos = 0; 226 + return; 227 + } 228 + 229 + for (i = 1; i < xfs_iext_max_recs(ifp); i++) { 230 + if (xfs_iext_rec_is_empty(&cur->leaf->recs[i])) 231 + break; 232 + } 233 + cur->pos = i - 1; 234 + } 235 + 236 + void 237 + xfs_iext_next( 238 + struct xfs_ifork *ifp, 239 + struct xfs_iext_cursor *cur) 240 + { 241 + if (!cur->leaf) { 242 + ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF); 243 + xfs_iext_first(ifp, cur); 244 + return; 245 + } 246 + 247 + ASSERT(cur->pos >= 0); 248 + ASSERT(cur->pos < xfs_iext_max_recs(ifp)); 249 + 250 + cur->pos++; 251 + if (ifp->if_height > 1 && !xfs_iext_valid(ifp, cur) && 252 + cur->leaf->next) { 253 + cur->leaf = cur->leaf->next; 254 + cur->pos = 0; 255 + } 256 + } 257 + 258 + void 259 + xfs_iext_prev( 260 + struct xfs_ifork *ifp, 261 + struct xfs_iext_cursor *cur) 262 + { 263 + if (!cur->leaf) { 264 + ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF); 265 + xfs_iext_last(ifp, cur); 266 + return; 267 + } 268 + 269 + ASSERT(cur->pos >= 0); 270 + ASSERT(cur->pos <= RECS_PER_LEAF); 271 + 272 + recurse: 273 + do { 274 + cur->pos--; 275 + if (xfs_iext_valid(ifp, cur)) 276 + return; 277 + } while (cur->pos > 0); 278 + 279 + if (ifp->if_height > 1 && cur->leaf->prev) { 280 + cur->leaf = cur->leaf->prev; 281 + cur->pos = RECS_PER_LEAF; 282 + goto recurse; 283 + } 284 + } 285 + 286 + static inline int 287 + xfs_iext_key_cmp( 288 + struct xfs_iext_node *node, 289 + int n, 290 + xfs_fileoff_t offset) 291 + { 292 + if (node->keys[n] > offset) 293 + return 1; 294 + if (node->keys[n] < offset) 295 + return -1; 296 + return 0; 297 + } 298 + 299 + static inline int 300 + xfs_iext_rec_cmp( 301 + struct xfs_iext_rec *rec, 302 + xfs_fileoff_t offset) 303 + { 304 + uint64_t rec_offset = rec->lo & XFS_IEXT_STARTOFF_MASK; 305 + u32 rec_len = rec->hi & XFS_IEXT_LENGTH_MASK; 306 + 307 + if (rec_offset > offset) 308 + return 1; 309 + if (rec_offset + rec_len <= offset) 310 + return -1; 311 + return 0; 312 + } 313 + 314 + static void * 315 + xfs_iext_find_level( 316 + struct xfs_ifork *ifp, 317 + xfs_fileoff_t offset, 318 + int level) 319 + { 320 + struct xfs_iext_node *node = ifp->if_u1.if_root; 321 + int height, i; 322 + 323 + if (!ifp->if_height) 324 + return NULL; 325 + 326 + for (height = ifp->if_height; height > level; height--) { 327 + for (i = 1; i < KEYS_PER_NODE; i++) 328 + if (xfs_iext_key_cmp(node, i, offset) > 0) 329 + break; 330 + 331 + node = node->ptrs[i - 1]; 332 + if (!node) 333 + break; 334 + } 335 + 336 + return node; 337 + } 338 + 339 + static int 340 + xfs_iext_node_pos( 341 + struct xfs_iext_node *node, 342 + xfs_fileoff_t offset) 343 + { 344 + int i; 345 + 346 + for (i = 1; i < KEYS_PER_NODE; i++) { 347 + if (xfs_iext_key_cmp(node, i, offset) > 0) 348 + break; 349 + } 350 + 351 + return i - 1; 352 + } 353 + 354 + static int 355 + xfs_iext_node_insert_pos( 356 + struct xfs_iext_node *node, 357 + xfs_fileoff_t offset) 358 + { 359 + int i; 360 + 361 + for (i = 0; i < KEYS_PER_NODE; i++) { 362 + if (xfs_iext_key_cmp(node, i, offset) > 0) 363 + return i; 364 + } 365 + 366 + return KEYS_PER_NODE; 367 + } 368 + 369 + static int 370 + xfs_iext_node_nr_entries( 371 + struct xfs_iext_node *node, 372 + int start) 373 + { 374 + int i; 375 + 376 + for (i = start; i < KEYS_PER_NODE; i++) { 377 + if (node->keys[i] == XFS_IEXT_KEY_INVALID) 378 + break; 379 + } 380 + 381 + return i; 382 + } 383 + 384 + static int 385 + xfs_iext_leaf_nr_entries( 386 + struct xfs_ifork *ifp, 387 + struct xfs_iext_leaf *leaf, 388 + int start) 389 + { 390 + int i; 391 + 392 + for (i = start; i < xfs_iext_max_recs(ifp); i++) { 393 + if (xfs_iext_rec_is_empty(&leaf->recs[i])) 394 + break; 395 + } 396 + 397 + return i; 398 + } 399 + 400 + static inline uint64_t 401 + xfs_iext_leaf_key( 402 + struct xfs_iext_leaf *leaf, 403 + int n) 404 + { 405 + return leaf->recs[n].lo & XFS_IEXT_STARTOFF_MASK; 406 + } 407 + 408 + static void 409 + xfs_iext_grow( 410 + struct xfs_ifork *ifp) 411 + { 412 + struct xfs_iext_node *node = kmem_zalloc(NODE_SIZE, KM_NOFS); 413 + int i; 414 + 415 + if (ifp->if_height == 1) { 416 + struct xfs_iext_leaf *prev = ifp->if_u1.if_root; 417 + 418 + node->keys[0] = xfs_iext_leaf_key(prev, 0); 419 + node->ptrs[0] = prev; 420 + } else { 421 + struct xfs_iext_node *prev = ifp->if_u1.if_root; 422 + 423 + ASSERT(ifp->if_height > 1); 424 + 425 + node->keys[0] = prev->keys[0]; 426 + node->ptrs[0] = prev; 427 + } 428 + 429 + for (i = 1; i < KEYS_PER_NODE; i++) 430 + node->keys[i] = XFS_IEXT_KEY_INVALID; 431 + 432 + ifp->if_u1.if_root = node; 433 + ifp->if_height++; 434 + } 435 + 436 + static void 437 + xfs_iext_update_node( 438 + struct xfs_ifork *ifp, 439 + xfs_fileoff_t old_offset, 440 + xfs_fileoff_t new_offset, 441 + int level, 442 + void *ptr) 443 + { 444 + struct xfs_iext_node *node = ifp->if_u1.if_root; 445 + int height, i; 446 + 447 + for (height = ifp->if_height; height > level; height--) { 448 + for (i = 0; i < KEYS_PER_NODE; i++) { 449 + if (i > 0 && xfs_iext_key_cmp(node, i, old_offset) > 0) 450 + break; 451 + if (node->keys[i] == old_offset) 452 + node->keys[i] = new_offset; 453 + } 454 + node = node->ptrs[i - 1]; 455 + ASSERT(node); 456 + } 457 + 458 + ASSERT(node == ptr); 459 + } 460 + 461 + static struct xfs_iext_node * 462 + xfs_iext_split_node( 463 + struct xfs_iext_node **nodep, 464 + int *pos, 465 + int *nr_entries) 466 + { 467 + struct xfs_iext_node *node = *nodep; 468 + struct xfs_iext_node *new = kmem_zalloc(NODE_SIZE, KM_NOFS); 469 + const int nr_move = KEYS_PER_NODE / 2; 470 + int nr_keep = nr_move + (KEYS_PER_NODE & 1); 471 + int i = 0; 472 + 473 + /* for sequential append operations just spill over into the new node */ 474 + if (*pos == KEYS_PER_NODE) { 475 + *nodep = new; 476 + *pos = 0; 477 + *nr_entries = 0; 478 + goto done; 479 + } 480 + 481 + 482 + for (i = 0; i < nr_move; i++) { 483 + new->keys[i] = node->keys[nr_keep + i]; 484 + new->ptrs[i] = node->ptrs[nr_keep + i]; 485 + 486 + node->keys[nr_keep + i] = XFS_IEXT_KEY_INVALID; 487 + node->ptrs[nr_keep + i] = NULL; 488 + } 489 + 490 + if (*pos >= nr_keep) { 491 + *nodep = new; 492 + *pos -= nr_keep; 493 + *nr_entries = nr_move; 494 + } else { 495 + *nr_entries = nr_keep; 496 + } 497 + done: 498 + for (; i < KEYS_PER_NODE; i++) 499 + new->keys[i] = XFS_IEXT_KEY_INVALID; 500 + return new; 501 + } 502 + 503 + static void 504 + xfs_iext_insert_node( 505 + struct xfs_ifork *ifp, 506 + uint64_t offset, 507 + void *ptr, 508 + int level) 509 + { 510 + struct xfs_iext_node *node, *new; 511 + int i, pos, nr_entries; 512 + 513 + again: 514 + if (ifp->if_height < level) 515 + xfs_iext_grow(ifp); 516 + 517 + new = NULL; 518 + node = xfs_iext_find_level(ifp, offset, level); 519 + pos = xfs_iext_node_insert_pos(node, offset); 520 + nr_entries = xfs_iext_node_nr_entries(node, pos); 521 + 522 + ASSERT(pos >= nr_entries || xfs_iext_key_cmp(node, pos, offset) != 0); 523 + ASSERT(nr_entries <= KEYS_PER_NODE); 524 + 525 + if (nr_entries == KEYS_PER_NODE) 526 + new = xfs_iext_split_node(&node, &pos, &nr_entries); 527 + 528 + /* 529 + * Update the pointers in higher levels if the first entry changes 530 + * in an existing node. 531 + */ 532 + if (node != new && pos == 0 && nr_entries > 0) 533 + xfs_iext_update_node(ifp, node->keys[0], offset, level, node); 534 + 535 + for (i = nr_entries; i > pos; i--) { 536 + node->keys[i] = node->keys[i - 1]; 537 + node->ptrs[i] = node->ptrs[i - 1]; 538 + } 539 + node->keys[pos] = offset; 540 + node->ptrs[pos] = ptr; 541 + 542 + if (new) { 543 + offset = new->keys[0]; 544 + ptr = new; 545 + level++; 546 + goto again; 547 + } 548 + } 549 + 550 + static struct xfs_iext_leaf * 551 + xfs_iext_split_leaf( 552 + struct xfs_iext_cursor *cur, 553 + int *nr_entries) 554 + { 555 + struct xfs_iext_leaf *leaf = cur->leaf; 556 + struct xfs_iext_leaf *new = kmem_zalloc(NODE_SIZE, KM_NOFS); 557 + const int nr_move = RECS_PER_LEAF / 2; 558 + int nr_keep = nr_move + (RECS_PER_LEAF & 1); 559 + int i; 560 + 561 + /* for sequential append operations just spill over into the new node */ 562 + if (cur->pos == RECS_PER_LEAF) { 563 + cur->leaf = new; 564 + cur->pos = 0; 565 + *nr_entries = 0; 566 + goto done; 567 + } 568 + 569 + for (i = 0; i < nr_move; i++) { 570 + new->recs[i] = leaf->recs[nr_keep + i]; 571 + xfs_iext_rec_clear(&leaf->recs[nr_keep + i]); 572 + } 573 + 574 + if (cur->pos >= nr_keep) { 575 + cur->leaf = new; 576 + cur->pos -= nr_keep; 577 + *nr_entries = nr_move; 578 + } else { 579 + *nr_entries = nr_keep; 580 + } 581 + done: 582 + if (leaf->next) 583 + leaf->next->prev = new; 584 + new->next = leaf->next; 585 + new->prev = leaf; 586 + leaf->next = new; 587 + return new; 588 + } 589 + 590 + static void 591 + xfs_iext_alloc_root( 592 + struct xfs_ifork *ifp, 593 + struct xfs_iext_cursor *cur) 594 + { 595 + ASSERT(ifp->if_bytes == 0); 596 + 597 + ifp->if_u1.if_root = kmem_zalloc(sizeof(struct xfs_iext_rec), KM_NOFS); 598 + ifp->if_height = 1; 599 + 600 + /* now that we have a node step into it */ 601 + cur->leaf = ifp->if_u1.if_root; 602 + cur->pos = 0; 603 + } 604 + 605 + static void 606 + xfs_iext_realloc_root( 607 + struct xfs_ifork *ifp, 608 + struct xfs_iext_cursor *cur) 609 + { 610 + size_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec); 611 + void *new; 612 + 613 + /* account for the prev/next pointers */ 614 + if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF) 615 + new_size = NODE_SIZE; 616 + 617 + new = kmem_realloc(ifp->if_u1.if_root, new_size, KM_NOFS); 618 + memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes); 619 + ifp->if_u1.if_root = new; 620 + cur->leaf = new; 621 + } 622 + 623 + void 624 + xfs_iext_insert( 625 + struct xfs_inode *ip, 626 + struct xfs_iext_cursor *cur, 627 + struct xfs_bmbt_irec *irec, 628 + int state) 629 + { 630 + struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); 631 + xfs_fileoff_t offset = irec->br_startoff; 632 + struct xfs_iext_leaf *new = NULL; 633 + int nr_entries, i; 634 + 635 + trace_xfs_iext_insert(ip, cur, state, _RET_IP_); 636 + 637 + if (ifp->if_height == 0) 638 + xfs_iext_alloc_root(ifp, cur); 639 + else if (ifp->if_height == 1) 640 + xfs_iext_realloc_root(ifp, cur); 641 + 642 + nr_entries = xfs_iext_leaf_nr_entries(ifp, cur->leaf, cur->pos); 643 + ASSERT(nr_entries <= RECS_PER_LEAF); 644 + ASSERT(cur->pos >= nr_entries || 645 + xfs_iext_rec_cmp(cur_rec(cur), irec->br_startoff) != 0); 646 + 647 + if (nr_entries == RECS_PER_LEAF) 648 + new = xfs_iext_split_leaf(cur, &nr_entries); 649 + 650 + /* 651 + * Update the pointers in higher levels if the first entry changes 652 + * in an existing node. 653 + */ 654 + if (cur->leaf != new && cur->pos == 0 && nr_entries > 0) { 655 + xfs_iext_update_node(ifp, xfs_iext_leaf_key(cur->leaf, 0), 656 + offset, 1, cur->leaf); 657 + } 658 + 659 + for (i = nr_entries; i > cur->pos; i--) 660 + cur->leaf->recs[i] = cur->leaf->recs[i - 1]; 661 + xfs_iext_set(cur_rec(cur), irec); 662 + ifp->if_bytes += sizeof(struct xfs_iext_rec); 663 + 664 + if (new) 665 + xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2); 666 + } 667 + 668 + static struct xfs_iext_node * 669 + xfs_iext_rebalance_node( 670 + struct xfs_iext_node *parent, 671 + int *pos, 672 + struct xfs_iext_node *node, 673 + int nr_entries) 674 + { 675 + /* 676 + * If the neighbouring nodes are completely full, or have different 677 + * parents, we might never be able to merge our node, and will only 678 + * delete it once the number of entries hits zero. 679 + */ 680 + if (nr_entries == 0) 681 + return node; 682 + 683 + if (*pos > 0) { 684 + struct xfs_iext_node *prev = parent->ptrs[*pos - 1]; 685 + int nr_prev = xfs_iext_node_nr_entries(prev, 0), i; 686 + 687 + if (nr_prev + nr_entries <= KEYS_PER_NODE) { 688 + for (i = 0; i < nr_entries; i++) { 689 + prev->keys[nr_prev + i] = node->keys[i]; 690 + prev->ptrs[nr_prev + i] = node->ptrs[i]; 691 + } 692 + return node; 693 + } 694 + } 695 + 696 + if (*pos + 1 < xfs_iext_node_nr_entries(parent, *pos)) { 697 + struct xfs_iext_node *next = parent->ptrs[*pos + 1]; 698 + int nr_next = xfs_iext_node_nr_entries(next, 0), i; 699 + 700 + if (nr_entries + nr_next <= KEYS_PER_NODE) { 701 + /* 702 + * Merge the next node into this node so that we don't 703 + * have to do an additional update of the keys in the 704 + * higher levels. 705 + */ 706 + for (i = 0; i < nr_next; i++) { 707 + node->keys[nr_entries + i] = next->keys[i]; 708 + node->ptrs[nr_entries + i] = next->ptrs[i]; 709 + } 710 + 711 + ++*pos; 712 + return next; 713 + } 714 + } 715 + 716 + return NULL; 717 + } 718 + 719 + static void 720 + xfs_iext_remove_node( 721 + struct xfs_ifork *ifp, 722 + xfs_fileoff_t offset, 723 + void *victim) 724 + { 725 + struct xfs_iext_node *node, *parent; 726 + int level = 2, pos, nr_entries, i; 727 + 728 + ASSERT(level <= ifp->if_height); 729 + node = xfs_iext_find_level(ifp, offset, level); 730 + pos = xfs_iext_node_pos(node, offset); 731 + again: 732 + ASSERT(node->ptrs[pos]); 733 + ASSERT(node->ptrs[pos] == victim); 734 + kmem_free(victim); 735 + 736 + nr_entries = xfs_iext_node_nr_entries(node, pos) - 1; 737 + offset = node->keys[0]; 738 + for (i = pos; i < nr_entries; i++) { 739 + node->keys[i] = node->keys[i + 1]; 740 + node->ptrs[i] = node->ptrs[i + 1]; 741 + } 742 + node->keys[nr_entries] = XFS_IEXT_KEY_INVALID; 743 + node->ptrs[nr_entries] = NULL; 744 + 745 + if (pos == 0 && nr_entries > 0) { 746 + xfs_iext_update_node(ifp, offset, node->keys[0], level, node); 747 + offset = node->keys[0]; 748 + } 749 + 750 + if (nr_entries >= KEYS_PER_NODE / 2) 751 + return; 752 + 753 + if (level < ifp->if_height) { 754 + /* 755 + * If we aren't at the root yet try to find a neighbour node to 756 + * merge with (or delete the node if it is empty), and then 757 + * recurse up to the next level. 758 + */ 759 + level++; 760 + parent = xfs_iext_find_level(ifp, offset, level); 761 + pos = xfs_iext_node_pos(parent, offset); 762 + 763 + ASSERT(pos != KEYS_PER_NODE); 764 + ASSERT(parent->ptrs[pos] == node); 765 + 766 + node = xfs_iext_rebalance_node(parent, &pos, node, nr_entries); 767 + if (node) { 768 + victim = node; 769 + node = parent; 770 + goto again; 771 + } 772 + } else if (nr_entries == 1) { 773 + /* 774 + * If we are at the root and only one entry is left we can just 775 + * free this node and update the root pointer. 776 + */ 777 + ASSERT(node == ifp->if_u1.if_root); 778 + ifp->if_u1.if_root = node->ptrs[0]; 779 + ifp->if_height--; 780 + kmem_free(node); 781 + } 782 + } 783 + 784 + static void 785 + xfs_iext_rebalance_leaf( 786 + struct xfs_ifork *ifp, 787 + struct xfs_iext_cursor *cur, 788 + struct xfs_iext_leaf *leaf, 789 + xfs_fileoff_t offset, 790 + int nr_entries) 791 + { 792 + /* 793 + * If the neighbouring nodes are completely full we might never be able 794 + * to merge our node, and will only delete it once the number of 795 + * entries hits zero. 796 + */ 797 + if (nr_entries == 0) 798 + goto remove_node; 799 + 800 + if (leaf->prev) { 801 + int nr_prev = xfs_iext_leaf_nr_entries(ifp, leaf->prev, 0), i; 802 + 803 + if (nr_prev + nr_entries <= RECS_PER_LEAF) { 804 + for (i = 0; i < nr_entries; i++) 805 + leaf->prev->recs[nr_prev + i] = leaf->recs[i]; 806 + 807 + if (cur->leaf == leaf) { 808 + cur->leaf = leaf->prev; 809 + cur->pos += nr_prev; 810 + } 811 + goto remove_node; 812 + } 813 + } 814 + 815 + if (leaf->next) { 816 + int nr_next = xfs_iext_leaf_nr_entries(ifp, leaf->next, 0), i; 817 + 818 + if (nr_entries + nr_next <= RECS_PER_LEAF) { 819 + /* 820 + * Merge the next node into this node so that we don't 821 + * have to do an additional update of the keys in the 822 + * higher levels. 823 + */ 824 + for (i = 0; i < nr_next; i++) { 825 + leaf->recs[nr_entries + i] = 826 + leaf->next->recs[i]; 827 + } 828 + 829 + if (cur->leaf == leaf->next) { 830 + cur->leaf = leaf; 831 + cur->pos += nr_entries; 832 + } 833 + 834 + offset = xfs_iext_leaf_key(leaf->next, 0); 835 + leaf = leaf->next; 836 + goto remove_node; 837 + } 838 + } 839 + 840 + return; 841 + remove_node: 842 + if (leaf->prev) 843 + leaf->prev->next = leaf->next; 844 + if (leaf->next) 845 + leaf->next->prev = leaf->prev; 846 + xfs_iext_remove_node(ifp, offset, leaf); 847 + } 848 + 849 + static void 850 + xfs_iext_free_last_leaf( 851 + struct xfs_ifork *ifp) 852 + { 853 + ifp->if_u1.if_root = NULL; 854 + ifp->if_height--; 855 + kmem_free(ifp->if_u1.if_root); 856 + } 857 + 858 + void 859 + xfs_iext_remove( 860 + struct xfs_inode *ip, 861 + struct xfs_iext_cursor *cur, 862 + int state) 863 + { 864 + struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); 865 + struct xfs_iext_leaf *leaf = cur->leaf; 866 + xfs_fileoff_t offset = xfs_iext_leaf_key(leaf, 0); 867 + int i, nr_entries; 868 + 869 + trace_xfs_iext_remove(ip, cur, state, _RET_IP_); 870 + 871 + ASSERT(ifp->if_height > 0); 872 + ASSERT(ifp->if_u1.if_root != NULL); 873 + ASSERT(xfs_iext_valid(ifp, cur)); 874 + 875 + nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1; 876 + for (i = cur->pos; i < nr_entries; i++) 877 + leaf->recs[i] = leaf->recs[i + 1]; 878 + xfs_iext_rec_clear(&leaf->recs[nr_entries]); 879 + ifp->if_bytes -= sizeof(struct xfs_iext_rec); 880 + 881 + if (cur->pos == 0 && nr_entries > 0) { 882 + xfs_iext_update_node(ifp, offset, xfs_iext_leaf_key(leaf, 0), 1, 883 + leaf); 884 + offset = xfs_iext_leaf_key(leaf, 0); 885 + } else if (cur->pos == nr_entries) { 886 + if (ifp->if_height > 1 && leaf->next) 887 + cur->leaf = leaf->next; 888 + else 889 + cur->leaf = NULL; 890 + cur->pos = 0; 891 + } 892 + 893 + if (nr_entries >= RECS_PER_LEAF / 2) 894 + return; 895 + 896 + if (ifp->if_height > 1) 897 + xfs_iext_rebalance_leaf(ifp, cur, leaf, offset, nr_entries); 898 + else if (nr_entries == 0) 899 + xfs_iext_free_last_leaf(ifp); 900 + } 901 + 902 + /* 903 + * Lookup the extent covering bno. 904 + * 905 + * If there is an extent covering bno return the extent index, and store the 906 + * expanded extent structure in *gotp, and the extent cursor in *cur. 907 + * If there is no extent covering bno, but there is an extent after it (e.g. 908 + * it lies in a hole) return that extent in *gotp and its cursor in *cur 909 + * instead. 910 + * If bno is beyond the last extent return false, and return an invalid 911 + * cursor value. 912 + */ 913 + bool 914 + xfs_iext_lookup_extent( 915 + struct xfs_inode *ip, 916 + struct xfs_ifork *ifp, 917 + xfs_fileoff_t offset, 918 + struct xfs_iext_cursor *cur, 919 + struct xfs_bmbt_irec *gotp) 920 + { 921 + XFS_STATS_INC(ip->i_mount, xs_look_exlist); 922 + 923 + cur->leaf = xfs_iext_find_level(ifp, offset, 1); 924 + if (!cur->leaf) { 925 + cur->pos = 0; 926 + return false; 927 + } 928 + 929 + for (cur->pos = 0; cur->pos < xfs_iext_max_recs(ifp); cur->pos++) { 930 + struct xfs_iext_rec *rec = cur_rec(cur); 931 + 932 + if (xfs_iext_rec_is_empty(rec)) 933 + break; 934 + if (xfs_iext_rec_cmp(rec, offset) >= 0) 935 + goto found; 936 + } 937 + 938 + /* Try looking in the next node for an entry > offset */ 939 + if (ifp->if_height == 1 || !cur->leaf->next) 940 + return false; 941 + cur->leaf = cur->leaf->next; 942 + cur->pos = 0; 943 + if (!xfs_iext_valid(ifp, cur)) 944 + return false; 945 + found: 946 + xfs_iext_get(gotp, cur_rec(cur)); 947 + return true; 948 + } 949 + 950 + /* 951 + * Returns the last extent before end, and if this extent doesn't cover 952 + * end, update end to the end of the extent. 953 + */ 954 + bool 955 + xfs_iext_lookup_extent_before( 956 + struct xfs_inode *ip, 957 + struct xfs_ifork *ifp, 958 + xfs_fileoff_t *end, 959 + struct xfs_iext_cursor *cur, 960 + struct xfs_bmbt_irec *gotp) 961 + { 962 + /* could be optimized to not even look up the next on a match.. */ 963 + if (xfs_iext_lookup_extent(ip, ifp, *end - 1, cur, gotp) && 964 + gotp->br_startoff <= *end - 1) 965 + return true; 966 + if (!xfs_iext_prev_extent(ifp, cur, gotp)) 967 + return false; 968 + *end = gotp->br_startoff + gotp->br_blockcount; 969 + return true; 970 + } 971 + 972 + void 973 + xfs_iext_update_extent( 974 + struct xfs_inode *ip, 975 + int state, 976 + struct xfs_iext_cursor *cur, 977 + struct xfs_bmbt_irec *new) 978 + { 979 + struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); 980 + 981 + if (cur->pos == 0) { 982 + struct xfs_bmbt_irec old; 983 + 984 + xfs_iext_get(&old, cur_rec(cur)); 985 + if (new->br_startoff != old.br_startoff) { 986 + xfs_iext_update_node(ifp, old.br_startoff, 987 + new->br_startoff, 1, cur->leaf); 988 + } 989 + } 990 + 991 + trace_xfs_bmap_pre_update(ip, cur, state, _RET_IP_); 992 + xfs_iext_set(cur_rec(cur), new); 993 + trace_xfs_bmap_post_update(ip, cur, state, _RET_IP_); 994 + } 995 + 996 + /* 997 + * Return true if the cursor points at an extent and return the extent structure 998 + * in gotp. Else return false. 999 + */ 1000 + bool 1001 + xfs_iext_get_extent( 1002 + struct xfs_ifork *ifp, 1003 + struct xfs_iext_cursor *cur, 1004 + struct xfs_bmbt_irec *gotp) 1005 + { 1006 + if (!xfs_iext_valid(ifp, cur)) 1007 + return false; 1008 + xfs_iext_get(gotp, cur_rec(cur)); 1009 + return true; 1010 + } 1011 + 1012 + /* 1013 + * This is a recursive function, because of that we need to be extremely 1014 + * careful with stack usage. 1015 + */ 1016 + static void 1017 + xfs_iext_destroy_node( 1018 + struct xfs_iext_node *node, 1019 + int level) 1020 + { 1021 + int i; 1022 + 1023 + if (level > 1) { 1024 + for (i = 0; i < KEYS_PER_NODE; i++) { 1025 + if (node->keys[i] == XFS_IEXT_KEY_INVALID) 1026 + break; 1027 + xfs_iext_destroy_node(node->ptrs[i], level - 1); 1028 + } 1029 + } 1030 + 1031 + kmem_free(node); 1032 + } 1033 + 1034 + void 1035 + xfs_iext_destroy( 1036 + struct xfs_ifork *ifp) 1037 + { 1038 + xfs_iext_destroy_node(ifp->if_u1.if_root, ifp->if_height); 1039 + 1040 + ifp->if_bytes = 0; 1041 + ifp->if_height = 0; 1042 + ifp->if_u1.if_root = NULL; 1043 + }

+1

fs/xfs/libxfs/xfs_inode_buf.c

··· 24 24 #include "xfs_mount.h" 25 25 #include "xfs_defer.h" 26 26 #include "xfs_inode.h" 27 + #include "xfs_errortag.h" 27 28 #include "xfs_error.h" 28 29 #include "xfs_cksum.h" 29 30 #include "xfs_icache.h"

+66 -1267

fs/xfs/libxfs/xfs_inode_fork.c

··· 42 42 STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); 43 43 STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); 44 44 45 + static inline dev_t xfs_to_linux_dev_t(xfs_dev_t dev) 46 + { 47 + return MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev)); 48 + } 49 + 45 50 /* 46 - * Move inode type and inode format specific information from the 47 - * on-disk inode to the in-core inode. For fifos, devs, and sockets 48 - * this means set if_rdev to the proper value. For files, directories, 49 - * and symlinks this means to bring in the in-line data or extent 50 - * pointers. For a file in B-tree format, only the root is immediately 51 - * brought in-core. The rest will be in-lined in if_extents when it 52 - * is first referenced (see xfs_iread_extents()). 51 + * Copy inode type and data and attr format specific information from the 52 + * on-disk inode to the in-core inode and fork structures. For fifos, devices, 53 + * and sockets this means set i_rdev to the proper value. For files, 54 + * directories, and symlinks this means to bring in the in-line data or extent 55 + * pointers as well as the attribute fork. For a fork in B-tree format, only 56 + * the root is immediately brought in-core. The rest will be read in later when 57 + * first referenced (see xfs_iread_extents()). 53 58 */ 54 59 int 55 60 xfs_iformat_fork( 56 - xfs_inode_t *ip, 57 - xfs_dinode_t *dip) 61 + struct xfs_inode *ip, 62 + struct xfs_dinode *dip) 58 63 { 59 - xfs_attr_shortform_t *atp; 64 + struct inode *inode = VFS_I(ip); 65 + struct xfs_attr_shortform *atp; 60 66 int size; 61 67 int error = 0; 62 68 xfs_fsize_t di_size; ··· 101 95 return -EFSCORRUPTED; 102 96 } 103 97 104 - if (unlikely(xfs_is_reflink_inode(ip) && 105 - (VFS_I(ip)->i_mode & S_IFMT) != S_IFREG)) { 98 + if (unlikely(xfs_is_reflink_inode(ip) && !S_ISREG(inode->i_mode))) { 106 99 xfs_warn(ip->i_mount, 107 100 "corrupt dinode %llu, wrong file type for reflink.", 108 101 ip->i_ino); ··· 120 115 return -EFSCORRUPTED; 121 116 } 122 117 123 - switch (VFS_I(ip)->i_mode & S_IFMT) { 118 + switch (inode->i_mode & S_IFMT) { 124 119 case S_IFIFO: 125 120 case S_IFCHR: 126 121 case S_IFBLK: ··· 131 126 return -EFSCORRUPTED; 132 127 } 133 128 ip->i_d.di_size = 0; 134 - ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); 129 + inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip)); 135 130 break; 136 131 137 132 case S_IFREG: ··· 189 184 return error; 190 185 191 186 /* Check inline dir contents. */ 192 - if (S_ISDIR(VFS_I(ip)->i_mode) && 193 - dip->di_format == XFS_DINODE_FMT_LOCAL) { 187 + if (S_ISDIR(inode->i_mode) && dip->di_format == XFS_DINODE_FMT_LOCAL) { 194 188 error = xfs_dir2_sf_verify(ip); 195 189 if (error) { 196 190 xfs_idestroy_fork(ip, XFS_DATA_FORK); ··· 269 265 if (zero_terminate) 270 266 mem_size++; 271 267 272 - if (size == 0) 273 - ifp->if_u1.if_data = NULL; 274 - else if (mem_size <= sizeof(ifp->if_u2.if_inline_data)) 275 - ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 276 - else { 268 + if (size) { 277 269 real_size = roundup(mem_size, 4); 278 270 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS); 279 - } 280 - 281 - if (size) { 282 271 memcpy(ifp->if_u1.if_data, data, size); 283 272 if (zero_terminate) 284 273 ifp->if_u1.if_data[size] = '\0'; 274 + } else { 275 + ifp->if_u1.if_data = NULL; 285 276 } 286 277 287 278 ifp->if_bytes = size; ··· 287 288 288 289 /* 289 290 * The file is in-lined in the on-disk inode. 290 - * If it fits into if_inline_data, then copy 291 - * it there, otherwise allocate a buffer for it 292 - * and copy the data there. Either way, set 293 - * if_data to point at the data. 294 - * If we allocate a buffer for the data, make 295 - * sure that its size is a multiple of 4 and 296 - * record the real size in i_real_bytes. 297 291 */ 298 292 STATIC int 299 293 xfs_iformat_local( ··· 316 324 317 325 /* 318 326 * The file consists of a set of extents all of which fit into the on-disk 319 - * inode. If there are few enough extents to fit into the if_inline_ext, then 320 - * copy them there. Otherwise allocate a buffer for them and copy them into it. 321 - * Either way, set if_extents to point at the extents. 327 + * inode. 322 328 */ 323 329 STATIC int 324 330 xfs_iformat_extents( ··· 326 336 { 327 337 struct xfs_mount *mp = ip->i_mount; 328 338 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 339 + int state = xfs_bmap_fork_to_state(whichfork); 329 340 int nex = XFS_DFORK_NEXTENTS(dip, whichfork); 330 341 int size = nex * sizeof(xfs_bmbt_rec_t); 342 + struct xfs_iext_cursor icur; 331 343 struct xfs_bmbt_rec *dp; 344 + struct xfs_bmbt_irec new; 332 345 int i; 333 346 334 347 /* ··· 347 354 } 348 355 349 356 ifp->if_real_bytes = 0; 350 - if (nex == 0) 351 - ifp->if_u1.if_extents = NULL; 352 - else if (nex <= XFS_INLINE_EXTS) 353 - ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 354 - else 355 - xfs_iext_add(ifp, 0, nex); 356 - 357 - ifp->if_bytes = size; 357 + ifp->if_bytes = 0; 358 + ifp->if_u1.if_root = NULL; 359 + ifp->if_height = 0; 358 360 if (size) { 359 361 dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); 362 + 363 + xfs_iext_first(ifp, &icur); 360 364 for (i = 0; i < nex; i++, dp++) { 361 - xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 362 - ep->l0 = get_unaligned_be64(&dp->l0); 363 - ep->l1 = get_unaligned_be64(&dp->l1); 364 - if (!xfs_bmbt_validate_extent(mp, whichfork, ep)) { 365 + xfs_bmbt_disk_get_all(dp, &new); 366 + if (!xfs_bmbt_validate_extent(mp, whichfork, &new)) { 365 367 XFS_ERROR_REPORT("xfs_iformat_extents(2)", 366 368 XFS_ERRLEVEL_LOW, mp); 367 369 return -EFSCORRUPTED; 368 370 } 371 + 372 + xfs_iext_insert(ip, &icur, &new, state); 373 + trace_xfs_read_extent(ip, &icur, state, _THIS_IP_); 374 + xfs_iext_next(ifp, &icur); 369 375 } 370 - XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); 371 376 } 372 377 ifp->if_flags |= XFS_IFEXTENTS; 373 378 return 0; ··· 431 440 ifp->if_flags &= ~XFS_IFEXTENTS; 432 441 ifp->if_flags |= XFS_IFBROOT; 433 442 443 + ifp->if_real_bytes = 0; 444 + ifp->if_bytes = 0; 445 + ifp->if_u1.if_root = NULL; 446 + ifp->if_height = 0; 434 447 return 0; 435 448 } 436 449 437 - /* 438 - * Read in extents from a btree-format inode. 439 - * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. 440 - */ 441 - int 442 - xfs_iread_extents( 443 - xfs_trans_t *tp, 444 - xfs_inode_t *ip, 445 - int whichfork) 446 - { 447 - int error; 448 - xfs_ifork_t *ifp; 449 - xfs_extnum_t nextents; 450 - 451 - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 452 - 453 - if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 454 - XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, 455 - ip->i_mount); 456 - return -EFSCORRUPTED; 457 - } 458 - nextents = XFS_IFORK_NEXTENTS(ip, whichfork); 459 - ifp = XFS_IFORK_PTR(ip, whichfork); 460 - 461 - /* 462 - * We know that the size is valid (it's checked in iformat_btree) 463 - */ 464 - ifp->if_bytes = ifp->if_real_bytes = 0; 465 - xfs_iext_add(ifp, 0, nextents); 466 - error = xfs_bmap_read_extents(tp, ip, whichfork); 467 - if (error) { 468 - xfs_iext_destroy(ifp); 469 - return error; 470 - } 471 - ifp->if_flags |= XFS_IFEXTENTS; 472 - return 0; 473 - } 474 450 /* 475 451 * Reallocate the space for if_broot based on the number of records 476 452 * being added or deleted as indicated in rec_diff. Move the records ··· 602 644 ASSERT(new_size >= 0); 603 645 604 646 if (new_size == 0) { 605 - if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 606 - kmem_free(ifp->if_u1.if_data); 607 - } 647 + kmem_free(ifp->if_u1.if_data); 608 648 ifp->if_u1.if_data = NULL; 609 - real_size = 0; 610 - } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { 611 - /* 612 - * If the valid extents/data can fit in if_inline_ext/data, 613 - * copy them from the malloc'd vector and free it. 614 - */ 615 - if (ifp->if_u1.if_data == NULL) { 616 - ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 617 - } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 618 - ASSERT(ifp->if_real_bytes != 0); 619 - memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, 620 - new_size); 621 - kmem_free(ifp->if_u1.if_data); 622 - ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 623 - } 624 649 real_size = 0; 625 650 } else { 626 651 /* ··· 618 677 ASSERT(ifp->if_real_bytes == 0); 619 678 ifp->if_u1.if_data = kmem_alloc(real_size, 620 679 KM_SLEEP | KM_NOFS); 621 - } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 680 + } else { 622 681 /* 623 682 * Only do the realloc if the underlying size 624 683 * is really changing. ··· 629 688 real_size, 630 689 KM_SLEEP | KM_NOFS); 631 690 } 632 - } else { 633 - ASSERT(ifp->if_real_bytes == 0); 634 - ifp->if_u1.if_data = kmem_alloc(real_size, 635 - KM_SLEEP | KM_NOFS); 636 - memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, 637 - ifp->if_bytes); 638 691 } 639 692 } 640 693 ifp->if_real_bytes = real_size; ··· 656 721 * so check and free it up if we do. 657 722 */ 658 723 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 659 - if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && 660 - (ifp->if_u1.if_data != NULL)) { 724 + if (ifp->if_u1.if_data != NULL) { 661 725 ASSERT(ifp->if_real_bytes != 0); 662 726 kmem_free(ifp->if_u1.if_data); 663 727 ifp->if_u1.if_data = NULL; 664 728 ifp->if_real_bytes = 0; 665 729 } 666 - } else if ((ifp->if_flags & XFS_IFEXTENTS) && 667 - ((ifp->if_flags & XFS_IFEXTIREC) || 668 - ((ifp->if_u1.if_extents != NULL) && 669 - (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { 670 - ASSERT(ifp->if_real_bytes != 0); 730 + } else if ((ifp->if_flags & XFS_IFEXTENTS) && ifp->if_height) { 671 731 xfs_iext_destroy(ifp); 672 732 } 673 - ASSERT(ifp->if_u1.if_extents == NULL || 674 - ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); 733 + 675 734 ASSERT(ifp->if_real_bytes == 0); 735 + 676 736 if (whichfork == XFS_ATTR_FORK) { 677 737 kmem_zone_free(xfs_ifork_zone, ip->i_afp); 678 738 ip->i_afp = NULL; ··· 677 747 } 678 748 } 679 749 680 - /* Count number of incore extents based on if_bytes */ 681 - xfs_extnum_t 682 - xfs_iext_count(struct xfs_ifork *ifp) 683 - { 684 - return ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 685 - } 686 - 687 750 /* 688 751 * Convert in-core extents to on-disk form 689 - * 690 - * For either the data or attr fork in extent format, we need to endian convert 691 - * the in-core extent as we place them into the on-disk inode. 692 752 * 693 753 * In the case of the data fork, the in-core and on-disk fork sizes can be 694 754 * different due to delayed allocation extents. We only copy on-disk extents ··· 688 768 */ 689 769 int 690 770 xfs_iextents_copy( 691 - xfs_inode_t *ip, 692 - xfs_bmbt_rec_t *dp, 771 + struct xfs_inode *ip, 772 + struct xfs_bmbt_rec *dp, 693 773 int whichfork) 694 774 { 695 - int copied; 696 - int i; 697 - xfs_ifork_t *ifp; 698 - int nrecs; 699 - xfs_fsblock_t start_block; 775 + int state = xfs_bmap_fork_to_state(whichfork); 776 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 777 + struct xfs_iext_cursor icur; 778 + struct xfs_bmbt_irec rec; 779 + int copied = 0; 700 780 701 - ifp = XFS_IFORK_PTR(ip, whichfork); 702 - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 781 + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); 703 782 ASSERT(ifp->if_bytes > 0); 704 783 705 - nrecs = xfs_iext_count(ifp); 706 - XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); 707 - ASSERT(nrecs > 0); 708 - 709 - /* 710 - * There are some delayed allocation extents in the 711 - * inode, so copy the extents one at a time and skip 712 - * the delayed ones. There must be at least one 713 - * non-delayed extent. 714 - */ 715 - copied = 0; 716 - for (i = 0; i < nrecs; i++) { 717 - xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 718 - 719 - ASSERT(xfs_bmbt_validate_extent(ip->i_mount, whichfork, ep)); 720 - 721 - start_block = xfs_bmbt_get_startblock(ep); 722 - if (isnullstartblock(start_block)) { 723 - /* 724 - * It's a delayed allocation extent, so skip it. 725 - */ 784 + for_each_xfs_iext(ifp, &icur, &rec) { 785 + if (isnullstartblock(rec.br_startblock)) 726 786 continue; 727 - } 728 - 729 - /* Translate to on disk format */ 730 - put_unaligned_be64(ep->l0, &dp->l0); 731 - put_unaligned_be64(ep->l1, &dp->l1); 787 + ASSERT(xfs_bmbt_validate_extent(ip->i_mount, whichfork, &rec)); 788 + xfs_bmbt_disk_set_all(dp, &rec); 789 + trace_xfs_write_extent(ip, &icur, state, _RET_IP_); 790 + copied += sizeof(struct xfs_bmbt_rec); 732 791 dp++; 733 - copied++; 734 792 } 735 - ASSERT(copied != 0); 736 793 737 - return (copied * (uint)sizeof(xfs_bmbt_rec_t)); 794 + ASSERT(copied > 0); 795 + ASSERT(copied <= ifp->if_bytes); 796 + return copied; 738 797 } 739 798 740 799 /* ··· 771 872 !(iip->ili_fields & extflag[whichfork])); 772 873 if ((iip->ili_fields & extflag[whichfork]) && 773 874 (ifp->if_bytes > 0)) { 774 - ASSERT(xfs_iext_get_ext(ifp, 0)); 775 875 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); 776 876 (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, 777 877 whichfork); ··· 792 894 case XFS_DINODE_FMT_DEV: 793 895 if (iip->ili_fields & XFS_ILOG_DEV) { 794 896 ASSERT(whichfork == XFS_DATA_FORK); 795 - xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); 796 - } 797 - break; 798 - 799 - case XFS_DINODE_FMT_UUID: 800 - if (iip->ili_fields & XFS_ILOG_UUID) { 801 - ASSERT(whichfork == XFS_DATA_FORK); 802 - memcpy(XFS_DFORK_DPTR(dip), 803 - &ip->i_df.if_u2.if_uuid, 804 - sizeof(uuid_t)); 897 + xfs_dinode_put_rdev(dip, sysv_encode_dev(VFS_I(ip)->i_rdev)); 805 898 } 806 899 break; 807 900 808 901 default: 809 902 ASSERT(0); 810 903 break; 811 - } 812 - } 813 - 814 - /* 815 - * Return a pointer to the extent record at file index idx. 816 - */ 817 - xfs_bmbt_rec_host_t * 818 - xfs_iext_get_ext( 819 - xfs_ifork_t *ifp, /* inode fork pointer */ 820 - xfs_extnum_t idx) /* index of target extent */ 821 - { 822 - ASSERT(idx >= 0); 823 - ASSERT(idx < xfs_iext_count(ifp)); 824 - 825 - if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { 826 - return ifp->if_u1.if_ext_irec->er_extbuf; 827 - } else if (ifp->if_flags & XFS_IFEXTIREC) { 828 - xfs_ext_irec_t *erp; /* irec pointer */ 829 - int erp_idx = 0; /* irec index */ 830 - xfs_extnum_t page_idx = idx; /* ext index in target list */ 831 - 832 - erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 833 - return &erp->er_extbuf[page_idx]; 834 - } else if (ifp->if_bytes) { 835 - return &ifp->if_u1.if_extents[idx]; 836 - } else { 837 - return NULL; 838 904 } 839 905 } 840 906 ··· 816 954 } 817 955 818 956 /* 819 - * Insert new item(s) into the extent records for incore inode 820 - * fork 'ifp'. 'count' new items are inserted at index 'idx'. 821 - */ 822 - void 823 - xfs_iext_insert( 824 - xfs_inode_t *ip, /* incore inode pointer */ 825 - xfs_extnum_t idx, /* starting index of new items */ 826 - xfs_extnum_t count, /* number of inserted items */ 827 - xfs_bmbt_irec_t *new, /* items to insert */ 828 - int state) /* type of extent conversion */ 829 - { 830 - xfs_ifork_t *ifp = xfs_iext_state_to_fork(ip, state); 831 - xfs_extnum_t i; /* extent record index */ 832 - 833 - trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_); 834 - 835 - ASSERT(ifp->if_flags & XFS_IFEXTENTS); 836 - xfs_iext_add(ifp, idx, count); 837 - for (i = idx; i < idx + count; i++, new++) 838 - xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new); 839 - } 840 - 841 - /* 842 - * This is called when the amount of space required for incore file 843 - * extents needs to be increased. The ext_diff parameter stores the 844 - * number of new extents being added and the idx parameter contains 845 - * the extent index where the new extents will be added. If the new 846 - * extents are being appended, then we just need to (re)allocate and 847 - * initialize the space. Otherwise, if the new extents are being 848 - * inserted into the middle of the existing entries, a bit more work 849 - * is required to make room for the new extents to be inserted. The 850 - * caller is responsible for filling in the new extent entries upon 851 - * return. 852 - */ 853 - void 854 - xfs_iext_add( 855 - xfs_ifork_t *ifp, /* inode fork pointer */ 856 - xfs_extnum_t idx, /* index to begin adding exts */ 857 - int ext_diff) /* number of extents to add */ 858 - { 859 - int byte_diff; /* new bytes being added */ 860 - int new_size; /* size of extents after adding */ 861 - xfs_extnum_t nextents; /* number of extents in file */ 862 - 863 - nextents = xfs_iext_count(ifp); 864 - ASSERT((idx >= 0) && (idx <= nextents)); 865 - byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); 866 - new_size = ifp->if_bytes + byte_diff; 867 - /* 868 - * If the new number of extents (nextents + ext_diff) 869 - * fits inside the inode, then continue to use the inline 870 - * extent buffer. 871 - */ 872 - if (nextents + ext_diff <= XFS_INLINE_EXTS) { 873 - if (idx < nextents) { 874 - memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], 875 - &ifp->if_u2.if_inline_ext[idx], 876 - (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 877 - memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); 878 - } 879 - ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 880 - ifp->if_real_bytes = 0; 881 - } 882 - /* 883 - * Otherwise use a linear (direct) extent list. 884 - * If the extents are currently inside the inode, 885 - * xfs_iext_realloc_direct will switch us from 886 - * inline to direct extent allocation mode. 887 - */ 888 - else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { 889 - xfs_iext_realloc_direct(ifp, new_size); 890 - if (idx < nextents) { 891 - memmove(&ifp->if_u1.if_extents[idx + ext_diff], 892 - &ifp->if_u1.if_extents[idx], 893 - (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 894 - memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); 895 - } 896 - } 897 - /* Indirection array */ 898 - else { 899 - xfs_ext_irec_t *erp; 900 - int erp_idx = 0; 901 - int page_idx = idx; 902 - 903 - ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); 904 - if (ifp->if_flags & XFS_IFEXTIREC) { 905 - erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); 906 - } else { 907 - xfs_iext_irec_init(ifp); 908 - ASSERT(ifp->if_flags & XFS_IFEXTIREC); 909 - erp = ifp->if_u1.if_ext_irec; 910 - } 911 - /* Extents fit in target extent page */ 912 - if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { 913 - if (page_idx < erp->er_extcount) { 914 - memmove(&erp->er_extbuf[page_idx + ext_diff], 915 - &erp->er_extbuf[page_idx], 916 - (erp->er_extcount - page_idx) * 917 - sizeof(xfs_bmbt_rec_t)); 918 - memset(&erp->er_extbuf[page_idx], 0, byte_diff); 919 - } 920 - erp->er_extcount += ext_diff; 921 - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 922 - } 923 - /* Insert a new extent page */ 924 - else if (erp) { 925 - xfs_iext_add_indirect_multi(ifp, 926 - erp_idx, page_idx, ext_diff); 927 - } 928 - /* 929 - * If extent(s) are being appended to the last page in 930 - * the indirection array and the new extent(s) don't fit 931 - * in the page, then erp is NULL and erp_idx is set to 932 - * the next index needed in the indirection array. 933 - */ 934 - else { 935 - uint count = ext_diff; 936 - 937 - while (count) { 938 - erp = xfs_iext_irec_new(ifp, erp_idx); 939 - erp->er_extcount = min(count, XFS_LINEAR_EXTS); 940 - count -= erp->er_extcount; 941 - if (count) 942 - erp_idx++; 943 - } 944 - } 945 - } 946 - ifp->if_bytes = new_size; 947 - } 948 - 949 - /* 950 - * This is called when incore extents are being added to the indirection 951 - * array and the new extents do not fit in the target extent list. The 952 - * erp_idx parameter contains the irec index for the target extent list 953 - * in the indirection array, and the idx parameter contains the extent 954 - * index within the list. The number of extents being added is stored 955 - * in the count parameter. 956 - * 957 - * |-------| |-------| 958 - * | | | | idx - number of extents before idx 959 - * | idx | | count | 960 - * | | | | count - number of extents being inserted at idx 961 - * |-------| |-------| 962 - * | count | | nex2 | nex2 - number of extents after idx + count 963 - * |-------| |-------| 964 - */ 965 - void 966 - xfs_iext_add_indirect_multi( 967 - xfs_ifork_t *ifp, /* inode fork pointer */ 968 - int erp_idx, /* target extent irec index */ 969 - xfs_extnum_t idx, /* index within target list */ 970 - int count) /* new extents being added */ 971 - { 972 - int byte_diff; /* new bytes being added */ 973 - xfs_ext_irec_t *erp; /* pointer to irec entry */ 974 - xfs_extnum_t ext_diff; /* number of extents to add */ 975 - xfs_extnum_t ext_cnt; /* new extents still needed */ 976 - xfs_extnum_t nex2; /* extents after idx + count */ 977 - xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ 978 - int nlists; /* number of irec's (lists) */ 979 - 980 - ASSERT(ifp->if_flags & XFS_IFEXTIREC); 981 - erp = &ifp->if_u1.if_ext_irec[erp_idx]; 982 - nex2 = erp->er_extcount - idx; 983 - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 984 - 985 - /* 986 - * Save second part of target extent list 987 - * (all extents past */ 988 - if (nex2) { 989 - byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 990 - nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); 991 - memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); 992 - erp->er_extcount -= nex2; 993 - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); 994 - memset(&erp->er_extbuf[idx], 0, byte_diff); 995 - } 996 - 997 - /* 998 - * Add the new extents to the end of the target 999 - * list, then allocate new irec record(s) and 1000 - * extent buffer(s) as needed to store the rest 1001 - * of the new extents. 1002 - */ 1003 - ext_cnt = count; 1004 - ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); 1005 - if (ext_diff) { 1006 - erp->er_extcount += ext_diff; 1007 - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 1008 - ext_cnt -= ext_diff; 1009 - } 1010 - while (ext_cnt) { 1011 - erp_idx++; 1012 - erp = xfs_iext_irec_new(ifp, erp_idx); 1013 - ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); 1014 - erp->er_extcount = ext_diff; 1015 - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 1016 - ext_cnt -= ext_diff; 1017 - } 1018 - 1019 - /* Add nex2 extents back to indirection array */ 1020 - if (nex2) { 1021 - xfs_extnum_t ext_avail; 1022 - int i; 1023 - 1024 - byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 1025 - ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; 1026 - i = 0; 1027 - /* 1028 - * If nex2 extents fit in the current page, append 1029 - * nex2_ep after the new extents. 1030 - */ 1031 - if (nex2 <= ext_avail) { 1032 - i = erp->er_extcount; 1033 - } 1034 - /* 1035 - * Otherwise, check if space is available in the 1036 - * next page. 1037 - */ 1038 - else if ((erp_idx < nlists - 1) && 1039 - (nex2 <= (ext_avail = XFS_LINEAR_EXTS - 1040 - ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { 1041 - erp_idx++; 1042 - erp++; 1043 - /* Create a hole for nex2 extents */ 1044 - memmove(&erp->er_extbuf[nex2], erp->er_extbuf, 1045 - erp->er_extcount * sizeof(xfs_bmbt_rec_t)); 1046 - } 1047 - /* 1048 - * Final choice, create a new extent page for 1049 - * nex2 extents. 1050 - */ 1051 - else { 1052 - erp_idx++; 1053 - erp = xfs_iext_irec_new(ifp, erp_idx); 1054 - } 1055 - memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); 1056 - kmem_free(nex2_ep); 1057 - erp->er_extcount += nex2; 1058 - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); 1059 - } 1060 - } 1061 - 1062 - /* 1063 - * This is called when the amount of space required for incore file 1064 - * extents needs to be decreased. The ext_diff parameter stores the 1065 - * number of extents to be removed and the idx parameter contains 1066 - * the extent index where the extents will be removed from. 1067 - * 1068 - * If the amount of space needed has decreased below the linear 1069 - * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous 1070 - * extent array. Otherwise, use kmem_realloc() to adjust the 1071 - * size to what is needed. 1072 - */ 1073 - void 1074 - xfs_iext_remove( 1075 - xfs_inode_t *ip, /* incore inode pointer */ 1076 - xfs_extnum_t idx, /* index to begin removing exts */ 1077 - int ext_diff, /* number of extents to remove */ 1078 - int state) /* type of extent conversion */ 1079 - { 1080 - xfs_ifork_t *ifp = xfs_iext_state_to_fork(ip, state); 1081 - xfs_extnum_t nextents; /* number of extents in file */ 1082 - int new_size; /* size of extents after removal */ 1083 - 1084 - trace_xfs_iext_remove(ip, idx, state, _RET_IP_); 1085 - 1086 - ASSERT(ext_diff > 0); 1087 - nextents = xfs_iext_count(ifp); 1088 - new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); 1089 - 1090 - if (new_size == 0) { 1091 - xfs_iext_destroy(ifp); 1092 - } else if (ifp->if_flags & XFS_IFEXTIREC) { 1093 - xfs_iext_remove_indirect(ifp, idx, ext_diff); 1094 - } else if (ifp->if_real_bytes) { 1095 - xfs_iext_remove_direct(ifp, idx, ext_diff); 1096 - } else { 1097 - xfs_iext_remove_inline(ifp, idx, ext_diff); 1098 - } 1099 - ifp->if_bytes = new_size; 1100 - } 1101 - 1102 - /* 1103 - * This removes ext_diff extents from the inline buffer, beginning 1104 - * at extent index idx. 1105 - */ 1106 - void 1107 - xfs_iext_remove_inline( 1108 - xfs_ifork_t *ifp, /* inode fork pointer */ 1109 - xfs_extnum_t idx, /* index to begin removing exts */ 1110 - int ext_diff) /* number of extents to remove */ 1111 - { 1112 - int nextents; /* number of extents in file */ 1113 - 1114 - ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 1115 - ASSERT(idx < XFS_INLINE_EXTS); 1116 - nextents = xfs_iext_count(ifp); 1117 - ASSERT(((nextents - ext_diff) > 0) && 1118 - (nextents - ext_diff) < XFS_INLINE_EXTS); 1119 - 1120 - if (idx + ext_diff < nextents) { 1121 - memmove(&ifp->if_u2.if_inline_ext[idx], 1122 - &ifp->if_u2.if_inline_ext[idx + ext_diff], 1123 - (nextents - (idx + ext_diff)) * 1124 - sizeof(xfs_bmbt_rec_t)); 1125 - memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], 1126 - 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 1127 - } else { 1128 - memset(&ifp->if_u2.if_inline_ext[idx], 0, 1129 - ext_diff * sizeof(xfs_bmbt_rec_t)); 1130 - } 1131 - } 1132 - 1133 - /* 1134 - * This removes ext_diff extents from a linear (direct) extent list, 1135 - * beginning at extent index idx. If the extents are being removed 1136 - * from the end of the list (ie. truncate) then we just need to re- 1137 - * allocate the list to remove the extra space. Otherwise, if the 1138 - * extents are being removed from the middle of the existing extent 1139 - * entries, then we first need to move the extent records beginning 1140 - * at idx + ext_diff up in the list to overwrite the records being 1141 - * removed, then remove the extra space via kmem_realloc. 1142 - */ 1143 - void 1144 - xfs_iext_remove_direct( 1145 - xfs_ifork_t *ifp, /* inode fork pointer */ 1146 - xfs_extnum_t idx, /* index to begin removing exts */ 1147 - int ext_diff) /* number of extents to remove */ 1148 - { 1149 - xfs_extnum_t nextents; /* number of extents in file */ 1150 - int new_size; /* size of extents after removal */ 1151 - 1152 - ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 1153 - new_size = ifp->if_bytes - 1154 - (ext_diff * sizeof(xfs_bmbt_rec_t)); 1155 - nextents = xfs_iext_count(ifp); 1156 - 1157 - if (new_size == 0) { 1158 - xfs_iext_destroy(ifp); 1159 - return; 1160 - } 1161 - /* Move extents up in the list (if needed) */ 1162 - if (idx + ext_diff < nextents) { 1163 - memmove(&ifp->if_u1.if_extents[idx], 1164 - &ifp->if_u1.if_extents[idx + ext_diff], 1165 - (nextents - (idx + ext_diff)) * 1166 - sizeof(xfs_bmbt_rec_t)); 1167 - } 1168 - memset(&ifp->if_u1.if_extents[nextents - ext_diff], 1169 - 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 1170 - /* 1171 - * Reallocate the direct extent list. If the extents 1172 - * will fit inside the inode then xfs_iext_realloc_direct 1173 - * will switch from direct to inline extent allocation 1174 - * mode for us. 1175 - */ 1176 - xfs_iext_realloc_direct(ifp, new_size); 1177 - ifp->if_bytes = new_size; 1178 - } 1179 - 1180 - /* 1181 - * This is called when incore extents are being removed from the 1182 - * indirection array and the extents being removed span multiple extent 1183 - * buffers. The idx parameter contains the file extent index where we 1184 - * want to begin removing extents, and the count parameter contains 1185 - * how many extents need to be removed. 1186 - * 1187 - * |-------| |-------| 1188 - * | nex1 | | | nex1 - number of extents before idx 1189 - * |-------| | count | 1190 - * | | | | count - number of extents being removed at idx 1191 - * | count | |-------| 1192 - * | | | nex2 | nex2 - number of extents after idx + count 1193 - * |-------| |-------| 1194 - */ 1195 - void 1196 - xfs_iext_remove_indirect( 1197 - xfs_ifork_t *ifp, /* inode fork pointer */ 1198 - xfs_extnum_t idx, /* index to begin removing extents */ 1199 - int count) /* number of extents to remove */ 1200 - { 1201 - xfs_ext_irec_t *erp; /* indirection array pointer */ 1202 - int erp_idx = 0; /* indirection array index */ 1203 - xfs_extnum_t ext_cnt; /* extents left to remove */ 1204 - xfs_extnum_t ext_diff; /* extents to remove in current list */ 1205 - xfs_extnum_t nex1; /* number of extents before idx */ 1206 - xfs_extnum_t nex2; /* extents after idx + count */ 1207 - int page_idx = idx; /* index in target extent list */ 1208 - 1209 - ASSERT(ifp->if_flags & XFS_IFEXTIREC); 1210 - erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 1211 - ASSERT(erp != NULL); 1212 - nex1 = page_idx; 1213 - ext_cnt = count; 1214 - while (ext_cnt) { 1215 - nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); 1216 - ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); 1217 - /* 1218 - * Check for deletion of entire list; 1219 - * xfs_iext_irec_remove() updates extent offsets. 1220 - */ 1221 - if (ext_diff == erp->er_extcount) { 1222 - xfs_iext_irec_remove(ifp, erp_idx); 1223 - ext_cnt -= ext_diff; 1224 - nex1 = 0; 1225 - if (ext_cnt) { 1226 - ASSERT(erp_idx < ifp->if_real_bytes / 1227 - XFS_IEXT_BUFSZ); 1228 - erp = &ifp->if_u1.if_ext_irec[erp_idx]; 1229 - nex1 = 0; 1230 - continue; 1231 - } else { 1232 - break; 1233 - } 1234 - } 1235 - /* Move extents up (if needed) */ 1236 - if (nex2) { 1237 - memmove(&erp->er_extbuf[nex1], 1238 - &erp->er_extbuf[nex1 + ext_diff], 1239 - nex2 * sizeof(xfs_bmbt_rec_t)); 1240 - } 1241 - /* Zero out rest of page */ 1242 - memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - 1243 - ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); 1244 - /* Update remaining counters */ 1245 - erp->er_extcount -= ext_diff; 1246 - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); 1247 - ext_cnt -= ext_diff; 1248 - nex1 = 0; 1249 - erp_idx++; 1250 - erp++; 1251 - } 1252 - ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); 1253 - xfs_iext_irec_compact(ifp); 1254 - } 1255 - 1256 - /* 1257 - * Create, destroy, or resize a linear (direct) block of extents. 1258 - */ 1259 - void 1260 - xfs_iext_realloc_direct( 1261 - xfs_ifork_t *ifp, /* inode fork pointer */ 1262 - int new_size) /* new size of extents after adding */ 1263 - { 1264 - int rnew_size; /* real new size of extents */ 1265 - 1266 - rnew_size = new_size; 1267 - 1268 - ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || 1269 - ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && 1270 - (new_size != ifp->if_real_bytes))); 1271 - 1272 - /* Free extent records */ 1273 - if (new_size == 0) { 1274 - xfs_iext_destroy(ifp); 1275 - } 1276 - /* Resize direct extent list and zero any new bytes */ 1277 - else if (ifp->if_real_bytes) { 1278 - /* Check if extents will fit inside the inode */ 1279 - if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { 1280 - xfs_iext_direct_to_inline(ifp, new_size / 1281 - (uint)sizeof(xfs_bmbt_rec_t)); 1282 - ifp->if_bytes = new_size; 1283 - return; 1284 - } 1285 - if (!is_power_of_2(new_size)){ 1286 - rnew_size = roundup_pow_of_two(new_size); 1287 - } 1288 - if (rnew_size != ifp->if_real_bytes) { 1289 - ifp->if_u1.if_extents = 1290 - kmem_realloc(ifp->if_u1.if_extents, 1291 - rnew_size, KM_NOFS); 1292 - } 1293 - if (rnew_size > ifp->if_real_bytes) { 1294 - memset(&ifp->if_u1.if_extents[ifp->if_bytes / 1295 - (uint)sizeof(xfs_bmbt_rec_t)], 0, 1296 - rnew_size - ifp->if_real_bytes); 1297 - } 1298 - } 1299 - /* Switch from the inline extent buffer to a direct extent list */ 1300 - else { 1301 - if (!is_power_of_2(new_size)) { 1302 - rnew_size = roundup_pow_of_two(new_size); 1303 - } 1304 - xfs_iext_inline_to_direct(ifp, rnew_size); 1305 - } 1306 - ifp->if_real_bytes = rnew_size; 1307 - ifp->if_bytes = new_size; 1308 - } 1309 - 1310 - /* 1311 - * Switch from linear (direct) extent records to inline buffer. 1312 - */ 1313 - void 1314 - xfs_iext_direct_to_inline( 1315 - xfs_ifork_t *ifp, /* inode fork pointer */ 1316 - xfs_extnum_t nextents) /* number of extents in file */ 1317 - { 1318 - ASSERT(ifp->if_flags & XFS_IFEXTENTS); 1319 - ASSERT(nextents <= XFS_INLINE_EXTS); 1320 - /* 1321 - * The inline buffer was zeroed when we switched 1322 - * from inline to direct extent allocation mode, 1323 - * so we don't need to clear it here. 1324 - */ 1325 - memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, 1326 - nextents * sizeof(xfs_bmbt_rec_t)); 1327 - kmem_free(ifp->if_u1.if_extents); 1328 - ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 1329 - ifp->if_real_bytes = 0; 1330 - } 1331 - 1332 - /* 1333 - * Switch from inline buffer to linear (direct) extent records. 1334 - * new_size should already be rounded up to the next power of 2 1335 - * by the caller (when appropriate), so use new_size as it is. 1336 - * However, since new_size may be rounded up, we can't update 1337 - * if_bytes here. It is the caller's responsibility to update 1338 - * if_bytes upon return. 1339 - */ 1340 - void 1341 - xfs_iext_inline_to_direct( 1342 - xfs_ifork_t *ifp, /* inode fork pointer */ 1343 - int new_size) /* number of extents in file */ 1344 - { 1345 - ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS); 1346 - memset(ifp->if_u1.if_extents, 0, new_size); 1347 - if (ifp->if_bytes) { 1348 - memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, 1349 - ifp->if_bytes); 1350 - memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 1351 - sizeof(xfs_bmbt_rec_t)); 1352 - } 1353 - ifp->if_real_bytes = new_size; 1354 - } 1355 - 1356 - /* 1357 - * Resize an extent indirection array to new_size bytes. 1358 - */ 1359 - STATIC void 1360 - xfs_iext_realloc_indirect( 1361 - xfs_ifork_t *ifp, /* inode fork pointer */ 1362 - int new_size) /* new indirection array size */ 1363 - { 1364 - ASSERT(ifp->if_flags & XFS_IFEXTIREC); 1365 - ASSERT(ifp->if_real_bytes); 1366 - ASSERT((new_size >= 0) && 1367 - (new_size != ((ifp->if_real_bytes / XFS_IEXT_BUFSZ) * 1368 - sizeof(xfs_ext_irec_t)))); 1369 - if (new_size == 0) { 1370 - xfs_iext_destroy(ifp); 1371 - } else { 1372 - ifp->if_u1.if_ext_irec = 1373 - kmem_realloc(ifp->if_u1.if_ext_irec, new_size, KM_NOFS); 1374 - } 1375 - } 1376 - 1377 - /* 1378 - * Switch from indirection array to linear (direct) extent allocations. 1379 - */ 1380 - STATIC void 1381 - xfs_iext_indirect_to_direct( 1382 - xfs_ifork_t *ifp) /* inode fork pointer */ 1383 - { 1384 - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ 1385 - xfs_extnum_t nextents; /* number of extents in file */ 1386 - int size; /* size of file extents */ 1387 - 1388 - ASSERT(ifp->if_flags & XFS_IFEXTIREC); 1389 - nextents = xfs_iext_count(ifp); 1390 - ASSERT(nextents <= XFS_LINEAR_EXTS); 1391 - size = nextents * sizeof(xfs_bmbt_rec_t); 1392 - 1393 - xfs_iext_irec_compact_pages(ifp); 1394 - ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); 1395 - 1396 - ep = ifp->if_u1.if_ext_irec->er_extbuf; 1397 - kmem_free(ifp->if_u1.if_ext_irec); 1398 - ifp->if_flags &= ~XFS_IFEXTIREC; 1399 - ifp->if_u1.if_extents = ep; 1400 - ifp->if_bytes = size; 1401 - if (nextents < XFS_LINEAR_EXTS) { 1402 - xfs_iext_realloc_direct(ifp, size); 1403 - } 1404 - } 1405 - 1406 - /* 1407 - * Remove all records from the indirection array. 1408 - */ 1409 - STATIC void 1410 - xfs_iext_irec_remove_all( 1411 - struct xfs_ifork *ifp) 1412 - { 1413 - int nlists; 1414 - int i; 1415 - 1416 - ASSERT(ifp->if_flags & XFS_IFEXTIREC); 1417 - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 1418 - for (i = 0; i < nlists; i++) 1419 - kmem_free(ifp->if_u1.if_ext_irec[i].er_extbuf); 1420 - kmem_free(ifp->if_u1.if_ext_irec); 1421 - ifp->if_flags &= ~XFS_IFEXTIREC; 1422 - } 1423 - 1424 - /* 1425 - * Free incore file extents. 1426 - */ 1427 - void 1428 - xfs_iext_destroy( 1429 - xfs_ifork_t *ifp) /* inode fork pointer */ 1430 - { 1431 - if (ifp->if_flags & XFS_IFEXTIREC) { 1432 - xfs_iext_irec_remove_all(ifp); 1433 - } else if (ifp->if_real_bytes) { 1434 - kmem_free(ifp->if_u1.if_extents); 1435 - } else if (ifp->if_bytes) { 1436 - memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 1437 - sizeof(xfs_bmbt_rec_t)); 1438 - } 1439 - ifp->if_u1.if_extents = NULL; 1440 - ifp->if_real_bytes = 0; 1441 - ifp->if_bytes = 0; 1442 - } 1443 - 1444 - /* 1445 - * Return a pointer to the extent record for file system block bno. 1446 - */ 1447 - xfs_bmbt_rec_host_t * /* pointer to found extent record */ 1448 - xfs_iext_bno_to_ext( 1449 - xfs_ifork_t *ifp, /* inode fork pointer */ 1450 - xfs_fileoff_t bno, /* block number to search for */ 1451 - xfs_extnum_t *idxp) /* index of target extent */ 1452 - { 1453 - xfs_bmbt_rec_host_t *base; /* pointer to first extent */ 1454 - xfs_filblks_t blockcount = 0; /* number of blocks in extent */ 1455 - xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */ 1456 - xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 1457 - int high; /* upper boundary in search */ 1458 - xfs_extnum_t idx = 0; /* index of target extent */ 1459 - int low; /* lower boundary in search */ 1460 - xfs_extnum_t nextents; /* number of file extents */ 1461 - xfs_fileoff_t startoff = 0; /* start offset of extent */ 1462 - 1463 - nextents = xfs_iext_count(ifp); 1464 - if (nextents == 0) { 1465 - *idxp = 0; 1466 - return NULL; 1467 - } 1468 - low = 0; 1469 - if (ifp->if_flags & XFS_IFEXTIREC) { 1470 - /* Find target extent list */ 1471 - int erp_idx = 0; 1472 - erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); 1473 - base = erp->er_extbuf; 1474 - high = erp->er_extcount - 1; 1475 - } else { 1476 - base = ifp->if_u1.if_extents; 1477 - high = nextents - 1; 1478 - } 1479 - /* Binary search extent records */ 1480 - while (low <= high) { 1481 - idx = (low + high) >> 1; 1482 - ep = base + idx; 1483 - startoff = xfs_bmbt_get_startoff(ep); 1484 - blockcount = xfs_bmbt_get_blockcount(ep); 1485 - if (bno < startoff) { 1486 - high = idx - 1; 1487 - } else if (bno >= startoff + blockcount) { 1488 - low = idx + 1; 1489 - } else { 1490 - /* Convert back to file-based extent index */ 1491 - if (ifp->if_flags & XFS_IFEXTIREC) { 1492 - idx += erp->er_extoff; 1493 - } 1494 - *idxp = idx; 1495 - return ep; 1496 - } 1497 - } 1498 - /* Convert back to file-based extent index */ 1499 - if (ifp->if_flags & XFS_IFEXTIREC) { 1500 - idx += erp->er_extoff; 1501 - } 1502 - if (bno >= startoff + blockcount) { 1503 - if (++idx == nextents) { 1504 - ep = NULL; 1505 - } else { 1506 - ep = xfs_iext_get_ext(ifp, idx); 1507 - } 1508 - } 1509 - *idxp = idx; 1510 - return ep; 1511 - } 1512 - 1513 - /* 1514 - * Return a pointer to the indirection array entry containing the 1515 - * extent record for filesystem block bno. Store the index of the 1516 - * target irec in *erp_idxp. 1517 - */ 1518 - xfs_ext_irec_t * /* pointer to found extent record */ 1519 - xfs_iext_bno_to_irec( 1520 - xfs_ifork_t *ifp, /* inode fork pointer */ 1521 - xfs_fileoff_t bno, /* block number to search for */ 1522 - int *erp_idxp) /* irec index of target ext list */ 1523 - { 1524 - xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 1525 - xfs_ext_irec_t *erp_next; /* next indirection array entry */ 1526 - int erp_idx; /* indirection array index */ 1527 - int nlists; /* number of extent irec's (lists) */ 1528 - int high; /* binary search upper limit */ 1529 - int low; /* binary search lower limit */ 1530 - 1531 - ASSERT(ifp->if_flags & XFS_IFEXTIREC); 1532 - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 1533 - erp_idx = 0; 1534 - low = 0; 1535 - high = nlists - 1; 1536 - while (low <= high) { 1537 - erp_idx = (low + high) >> 1; 1538 - erp = &ifp->if_u1.if_ext_irec[erp_idx]; 1539 - erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; 1540 - if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { 1541 - high = erp_idx - 1; 1542 - } else if (erp_next && bno >= 1543 - xfs_bmbt_get_startoff(erp_next->er_extbuf)) { 1544 - low = erp_idx + 1; 1545 - } else { 1546 - break; 1547 - } 1548 - } 1549 - *erp_idxp = erp_idx; 1550 - return erp; 1551 - } 1552 - 1553 - /* 1554 - * Return a pointer to the indirection array entry containing the 1555 - * extent record at file extent index *idxp. Store the index of the 1556 - * target irec in *erp_idxp and store the page index of the target 1557 - * extent record in *idxp. 1558 - */ 1559 - xfs_ext_irec_t * 1560 - xfs_iext_idx_to_irec( 1561 - xfs_ifork_t *ifp, /* inode fork pointer */ 1562 - xfs_extnum_t *idxp, /* extent index (file -> page) */ 1563 - int *erp_idxp, /* pointer to target irec */ 1564 - int realloc) /* new bytes were just added */ 1565 - { 1566 - xfs_ext_irec_t *prev; /* pointer to previous irec */ 1567 - xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ 1568 - int erp_idx; /* indirection array index */ 1569 - int nlists; /* number of irec's (ex lists) */ 1570 - int high; /* binary search upper limit */ 1571 - int low; /* binary search lower limit */ 1572 - xfs_extnum_t page_idx = *idxp; /* extent index in target list */ 1573 - 1574 - ASSERT(ifp->if_flags & XFS_IFEXTIREC); 1575 - ASSERT(page_idx >= 0); 1576 - ASSERT(page_idx <= xfs_iext_count(ifp)); 1577 - ASSERT(page_idx < xfs_iext_count(ifp) || realloc); 1578 - 1579 - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 1580 - erp_idx = 0; 1581 - low = 0; 1582 - high = nlists - 1; 1583 - 1584 - /* Binary search extent irec's */ 1585 - while (low <= high) { 1586 - erp_idx = (low + high) >> 1; 1587 - erp = &ifp->if_u1.if_ext_irec[erp_idx]; 1588 - prev = erp_idx > 0 ? erp - 1 : NULL; 1589 - if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && 1590 - realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { 1591 - high = erp_idx - 1; 1592 - } else if (page_idx > erp->er_extoff + erp->er_extcount || 1593 - (page_idx == erp->er_extoff + erp->er_extcount && 1594 - !realloc)) { 1595 - low = erp_idx + 1; 1596 - } else if (page_idx == erp->er_extoff + erp->er_extcount && 1597 - erp->er_extcount == XFS_LINEAR_EXTS) { 1598 - ASSERT(realloc); 1599 - page_idx = 0; 1600 - erp_idx++; 1601 - erp = erp_idx < nlists ? erp + 1 : NULL; 1602 - break; 1603 - } else { 1604 - page_idx -= erp->er_extoff; 1605 - break; 1606 - } 1607 - } 1608 - *idxp = page_idx; 1609 - *erp_idxp = erp_idx; 1610 - return erp; 1611 - } 1612 - 1613 - /* 1614 - * Allocate and initialize an indirection array once the space needed 1615 - * for incore extents increases above XFS_IEXT_BUFSZ. 1616 - */ 1617 - void 1618 - xfs_iext_irec_init( 1619 - xfs_ifork_t *ifp) /* inode fork pointer */ 1620 - { 1621 - xfs_ext_irec_t *erp; /* indirection array pointer */ 1622 - xfs_extnum_t nextents; /* number of extents in file */ 1623 - 1624 - ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 1625 - nextents = xfs_iext_count(ifp); 1626 - ASSERT(nextents <= XFS_LINEAR_EXTS); 1627 - 1628 - erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); 1629 - 1630 - if (nextents == 0) { 1631 - ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); 1632 - } else if (!ifp->if_real_bytes) { 1633 - xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); 1634 - } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { 1635 - xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); 1636 - } 1637 - erp->er_extbuf = ifp->if_u1.if_extents; 1638 - erp->er_extcount = nextents; 1639 - erp->er_extoff = 0; 1640 - 1641 - ifp->if_flags |= XFS_IFEXTIREC; 1642 - ifp->if_real_bytes = XFS_IEXT_BUFSZ; 1643 - ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); 1644 - ifp->if_u1.if_ext_irec = erp; 1645 - 1646 - return; 1647 - } 1648 - 1649 - /* 1650 - * Allocate and initialize a new entry in the indirection array. 1651 - */ 1652 - xfs_ext_irec_t * 1653 - xfs_iext_irec_new( 1654 - xfs_ifork_t *ifp, /* inode fork pointer */ 1655 - int erp_idx) /* index for new irec */ 1656 - { 1657 - xfs_ext_irec_t *erp; /* indirection array pointer */ 1658 - int i; /* loop counter */ 1659 - int nlists; /* number of irec's (ex lists) */ 1660 - 1661 - ASSERT(ifp->if_flags & XFS_IFEXTIREC); 1662 - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 1663 - 1664 - /* Resize indirection array */ 1665 - xfs_iext_realloc_indirect(ifp, ++nlists * 1666 - sizeof(xfs_ext_irec_t)); 1667 - /* 1668 - * Move records down in the array so the 1669 - * new page can use erp_idx. 1670 - */ 1671 - erp = ifp->if_u1.if_ext_irec; 1672 - for (i = nlists - 1; i > erp_idx; i--) { 1673 - memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); 1674 - } 1675 - ASSERT(i == erp_idx); 1676 - 1677 - /* Initialize new extent record */ 1678 - erp = ifp->if_u1.if_ext_irec; 1679 - erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); 1680 - ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 1681 - memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); 1682 - erp[erp_idx].er_extcount = 0; 1683 - erp[erp_idx].er_extoff = erp_idx > 0 ? 1684 - erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; 1685 - return (&erp[erp_idx]); 1686 - } 1687 - 1688 - /* 1689 - * Remove a record from the indirection array. 1690 - */ 1691 - void 1692 - xfs_iext_irec_remove( 1693 - xfs_ifork_t *ifp, /* inode fork pointer */ 1694 - int erp_idx) /* irec index to remove */ 1695 - { 1696 - xfs_ext_irec_t *erp; /* indirection array pointer */ 1697 - int i; /* loop counter */ 1698 - int nlists; /* number of irec's (ex lists) */ 1699 - 1700 - ASSERT(ifp->if_flags & XFS_IFEXTIREC); 1701 - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 1702 - erp = &ifp->if_u1.if_ext_irec[erp_idx]; 1703 - if (erp->er_extbuf) { 1704 - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, 1705 - -erp->er_extcount); 1706 - kmem_free(erp->er_extbuf); 1707 - } 1708 - /* Compact extent records */ 1709 - erp = ifp->if_u1.if_ext_irec; 1710 - for (i = erp_idx; i < nlists - 1; i++) { 1711 - memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); 1712 - } 1713 - /* 1714 - * Manually free the last extent record from the indirection 1715 - * array. A call to xfs_iext_realloc_indirect() with a size 1716 - * of zero would result in a call to xfs_iext_destroy() which 1717 - * would in turn call this function again, creating a nasty 1718 - * infinite loop. 1719 - */ 1720 - if (--nlists) { 1721 - xfs_iext_realloc_indirect(ifp, 1722 - nlists * sizeof(xfs_ext_irec_t)); 1723 - } else { 1724 - kmem_free(ifp->if_u1.if_ext_irec); 1725 - } 1726 - ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 1727 - } 1728 - 1729 - /* 1730 - * This is called to clean up large amounts of unused memory allocated 1731 - * by the indirection array. Before compacting anything though, verify 1732 - * that the indirection array is still needed and switch back to the 1733 - * linear extent list (or even the inline buffer) if possible. The 1734 - * compaction policy is as follows: 1735 - * 1736 - * Full Compaction: Extents fit into a single page (or inline buffer) 1737 - * Partial Compaction: Extents occupy less than 50% of allocated space 1738 - * No Compaction: Extents occupy at least 50% of allocated space 1739 - */ 1740 - void 1741 - xfs_iext_irec_compact( 1742 - xfs_ifork_t *ifp) /* inode fork pointer */ 1743 - { 1744 - xfs_extnum_t nextents; /* number of extents in file */ 1745 - int nlists; /* number of irec's (ex lists) */ 1746 - 1747 - ASSERT(ifp->if_flags & XFS_IFEXTIREC); 1748 - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 1749 - nextents = xfs_iext_count(ifp); 1750 - 1751 - if (nextents == 0) { 1752 - xfs_iext_destroy(ifp); 1753 - } else if (nextents <= XFS_INLINE_EXTS) { 1754 - xfs_iext_indirect_to_direct(ifp); 1755 - xfs_iext_direct_to_inline(ifp, nextents); 1756 - } else if (nextents <= XFS_LINEAR_EXTS) { 1757 - xfs_iext_indirect_to_direct(ifp); 1758 - } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { 1759 - xfs_iext_irec_compact_pages(ifp); 1760 - } 1761 - } 1762 - 1763 - /* 1764 - * Combine extents from neighboring extent pages. 1765 - */ 1766 - void 1767 - xfs_iext_irec_compact_pages( 1768 - xfs_ifork_t *ifp) /* inode fork pointer */ 1769 - { 1770 - xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ 1771 - int erp_idx = 0; /* indirection array index */ 1772 - int nlists; /* number of irec's (ex lists) */ 1773 - 1774 - ASSERT(ifp->if_flags & XFS_IFEXTIREC); 1775 - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 1776 - while (erp_idx < nlists - 1) { 1777 - erp = &ifp->if_u1.if_ext_irec[erp_idx]; 1778 - erp_next = erp + 1; 1779 - if (erp_next->er_extcount <= 1780 - (XFS_LINEAR_EXTS - erp->er_extcount)) { 1781 - memcpy(&erp->er_extbuf[erp->er_extcount], 1782 - erp_next->er_extbuf, erp_next->er_extcount * 1783 - sizeof(xfs_bmbt_rec_t)); 1784 - erp->er_extcount += erp_next->er_extcount; 1785 - /* 1786 - * Free page before removing extent record 1787 - * so er_extoffs don't get modified in 1788 - * xfs_iext_irec_remove. 1789 - */ 1790 - kmem_free(erp_next->er_extbuf); 1791 - erp_next->er_extbuf = NULL; 1792 - xfs_iext_irec_remove(ifp, erp_idx + 1); 1793 - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 1794 - } else { 1795 - erp_idx++; 1796 - } 1797 - } 1798 - } 1799 - 1800 - /* 1801 - * This is called to update the er_extoff field in the indirection 1802 - * array when extents have been added or removed from one of the 1803 - * extent lists. erp_idx contains the irec index to begin updating 1804 - * at and ext_diff contains the number of extents that were added 1805 - * or removed. 1806 - */ 1807 - void 1808 - xfs_iext_irec_update_extoffs( 1809 - xfs_ifork_t *ifp, /* inode fork pointer */ 1810 - int erp_idx, /* irec index to update */ 1811 - int ext_diff) /* number of new extents */ 1812 - { 1813 - int i; /* loop counter */ 1814 - int nlists; /* number of irec's (ex lists */ 1815 - 1816 - ASSERT(ifp->if_flags & XFS_IFEXTIREC); 1817 - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 1818 - for (i = erp_idx; i < nlists; i++) { 1819 - ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; 1820 - } 1821 - } 1822 - 1823 - /* 1824 957 * Initialize an inode's copy-on-write fork. 1825 958 */ 1826 959 void ··· 830 1973 ip->i_cowfp->if_flags = XFS_IFEXTENTS; 831 1974 ip->i_cformat = XFS_DINODE_FMT_EXTENTS; 832 1975 ip->i_cnextents = 0; 833 - } 834 - 835 - /* 836 - * Lookup the extent covering bno. 837 - * 838 - * If there is an extent covering bno return the extent index, and store the 839 - * expanded extent structure in *gotp, and the extent index in *idx. 840 - * If there is no extent covering bno, but there is an extent after it (e.g. 841 - * it lies in a hole) return that extent in *gotp and its index in *idx 842 - * instead. 843 - * If bno is beyond the last extent return false, and return the index after 844 - * the last valid index in *idxp. 845 - */ 846 - bool 847 - xfs_iext_lookup_extent( 848 - struct xfs_inode *ip, 849 - struct xfs_ifork *ifp, 850 - xfs_fileoff_t bno, 851 - xfs_extnum_t *idxp, 852 - struct xfs_bmbt_irec *gotp) 853 - { 854 - struct xfs_bmbt_rec_host *ep; 855 - 856 - XFS_STATS_INC(ip->i_mount, xs_look_exlist); 857 - 858 - ep = xfs_iext_bno_to_ext(ifp, bno, idxp); 859 - if (!ep) 860 - return false; 861 - xfs_bmbt_get_all(ep, gotp); 862 - return true; 863 - } 864 - 865 - /* 866 - * Return true if there is an extent at index idx, and return the expanded 867 - * extent structure at idx in that case. Else return false. 868 - */ 869 - bool 870 - xfs_iext_get_extent( 871 - struct xfs_ifork *ifp, 872 - xfs_extnum_t idx, 873 - struct xfs_bmbt_irec *gotp) 874 - { 875 - if (idx < 0 || idx >= xfs_iext_count(ifp)) 876 - return false; 877 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), gotp); 878 - return true; 879 - } 880 - 881 - void 882 - xfs_iext_update_extent( 883 - struct xfs_ifork *ifp, 884 - xfs_extnum_t idx, 885 - struct xfs_bmbt_irec *gotp) 886 - { 887 - ASSERT(idx >= 0); 888 - ASSERT(idx < xfs_iext_count(ifp)); 889 - 890 - xfs_bmbt_set_all(xfs_iext_get_ext(ifp, idx), gotp); 891 1976 }

+65 -73

fs/xfs/libxfs/xfs_inode_fork.h

··· 22 22 struct xfs_dinode; 23 23 24 24 /* 25 - * The following xfs_ext_irec_t struct introduces a second (top) level 26 - * to the in-core extent allocation scheme. These structs are allocated 27 - * in a contiguous block, creating an indirection array where each entry 28 - * (irec) contains a pointer to a buffer of in-core extent records which 29 - * it manages. Each extent buffer is 4k in size, since 4k is the system 30 - * page size on Linux i386 and systems with larger page sizes don't seem 31 - * to gain much, if anything, by using their native page size as the 32 - * extent buffer size. Also, using 4k extent buffers everywhere provides 33 - * a consistent interface for CXFS across different platforms. 34 - * 35 - * There is currently no limit on the number of irec's (extent lists) 36 - * allowed, so heavily fragmented files may require an indirection array 37 - * which spans multiple system pages of memory. The number of extents 38 - * which would require this amount of contiguous memory is very large 39 - * and should not cause problems in the foreseeable future. However, 40 - * if the memory needed for the contiguous array ever becomes a problem, 41 - * it is possible that a third level of indirection may be required. 42 - */ 43 - typedef struct xfs_ext_irec { 44 - xfs_bmbt_rec_host_t *er_extbuf; /* block of extent records */ 45 - xfs_extnum_t er_extoff; /* extent offset in file */ 46 - xfs_extnum_t er_extcount; /* number of extents in page/block */ 47 - } xfs_ext_irec_t; 48 - 49 - /* 50 25 * File incore extent information, present for each of data & attr forks. 51 26 */ 52 - #define XFS_IEXT_BUFSZ 4096 53 - #define XFS_LINEAR_EXTS (XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t)) 54 - #define XFS_INLINE_EXTS 2 55 - #define XFS_INLINE_DATA 32 56 27 typedef struct xfs_ifork { 57 28 int if_bytes; /* bytes in if_u1 */ 58 29 int if_real_bytes; /* bytes allocated in if_u1 */ 59 30 struct xfs_btree_block *if_broot; /* file's incore btree root */ 60 31 short if_broot_bytes; /* bytes allocated for root */ 61 32 unsigned char if_flags; /* per-fork flags */ 33 + int if_height; /* height of the extent tree */ 62 34 union { 63 - xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ 64 - xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ 35 + void *if_root; /* extent tree root */ 65 36 char *if_data; /* inline file data */ 66 37 } if_u1; 67 - union { 68 - xfs_bmbt_rec_host_t if_inline_ext[XFS_INLINE_EXTS]; 69 - /* very small file extents */ 70 - char if_inline_data[XFS_INLINE_DATA]; 71 - /* very small file data */ 72 - xfs_dev_t if_rdev; /* dev number if special */ 73 - uuid_t if_uuid; /* mount point value */ 74 - } if_u2; 75 38 } xfs_ifork_t; 76 39 77 40 /* ··· 43 80 #define XFS_IFINLINE 0x01 /* Inline data is read in */ 44 81 #define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */ 45 82 #define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */ 46 - #define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ 47 83 48 84 /* 49 85 * Fork handling. ··· 112 150 int); 113 151 void xfs_init_local_fork(struct xfs_inode *, int, const void *, int); 114 152 115 - struct xfs_bmbt_rec_host * 116 - xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t); 117 - xfs_extnum_t xfs_iext_count(struct xfs_ifork *); 118 - void xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t, 119 - struct xfs_bmbt_irec *, int); 120 - void xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int); 121 - void xfs_iext_add_indirect_multi(struct xfs_ifork *, int, 122 - xfs_extnum_t, int); 123 - void xfs_iext_remove(struct xfs_inode *, xfs_extnum_t, int, int); 124 - void xfs_iext_remove_inline(struct xfs_ifork *, xfs_extnum_t, int); 125 - void xfs_iext_remove_direct(struct xfs_ifork *, xfs_extnum_t, int); 126 - void xfs_iext_remove_indirect(struct xfs_ifork *, xfs_extnum_t, int); 127 - void xfs_iext_realloc_direct(struct xfs_ifork *, int); 128 - void xfs_iext_direct_to_inline(struct xfs_ifork *, xfs_extnum_t); 129 - void xfs_iext_inline_to_direct(struct xfs_ifork *, int); 153 + xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp); 154 + void xfs_iext_insert(struct xfs_inode *, struct xfs_iext_cursor *cur, 155 + struct xfs_bmbt_irec *, int); 156 + void xfs_iext_remove(struct xfs_inode *, struct xfs_iext_cursor *, 157 + int); 130 158 void xfs_iext_destroy(struct xfs_ifork *); 131 - struct xfs_bmbt_rec_host * 132 - xfs_iext_bno_to_ext(struct xfs_ifork *, xfs_fileoff_t, int *); 133 - struct xfs_ext_irec * 134 - xfs_iext_bno_to_irec(struct xfs_ifork *, xfs_fileoff_t, int *); 135 - struct xfs_ext_irec * 136 - xfs_iext_idx_to_irec(struct xfs_ifork *, xfs_extnum_t *, int *, 137 - int); 138 - void xfs_iext_irec_init(struct xfs_ifork *); 139 - struct xfs_ext_irec * 140 - xfs_iext_irec_new(struct xfs_ifork *, int); 141 - void xfs_iext_irec_remove(struct xfs_ifork *, int); 142 - void xfs_iext_irec_compact(struct xfs_ifork *); 143 - void xfs_iext_irec_compact_pages(struct xfs_ifork *); 144 - void xfs_iext_irec_compact_full(struct xfs_ifork *); 145 - void xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int); 146 159 147 160 bool xfs_iext_lookup_extent(struct xfs_inode *ip, 148 161 struct xfs_ifork *ifp, xfs_fileoff_t bno, 149 - xfs_extnum_t *idxp, struct xfs_bmbt_irec *gotp); 150 - bool xfs_iext_get_extent(struct xfs_ifork *ifp, xfs_extnum_t idx, 162 + struct xfs_iext_cursor *cur, 151 163 struct xfs_bmbt_irec *gotp); 152 - void xfs_iext_update_extent(struct xfs_ifork *ifp, xfs_extnum_t idx, 164 + bool xfs_iext_lookup_extent_before(struct xfs_inode *ip, 165 + struct xfs_ifork *ifp, xfs_fileoff_t *end, 166 + struct xfs_iext_cursor *cur, 153 167 struct xfs_bmbt_irec *gotp); 168 + bool xfs_iext_get_extent(struct xfs_ifork *ifp, 169 + struct xfs_iext_cursor *cur, 170 + struct xfs_bmbt_irec *gotp); 171 + void xfs_iext_update_extent(struct xfs_inode *ip, int state, 172 + struct xfs_iext_cursor *cur, 173 + struct xfs_bmbt_irec *gotp); 174 + 175 + void xfs_iext_first(struct xfs_ifork *, struct xfs_iext_cursor *); 176 + void xfs_iext_last(struct xfs_ifork *, struct xfs_iext_cursor *); 177 + void xfs_iext_next(struct xfs_ifork *, struct xfs_iext_cursor *); 178 + void xfs_iext_prev(struct xfs_ifork *, struct xfs_iext_cursor *); 179 + 180 + static inline bool xfs_iext_next_extent(struct xfs_ifork *ifp, 181 + struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp) 182 + { 183 + xfs_iext_next(ifp, cur); 184 + return xfs_iext_get_extent(ifp, cur, gotp); 185 + } 186 + 187 + static inline bool xfs_iext_prev_extent(struct xfs_ifork *ifp, 188 + struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp) 189 + { 190 + xfs_iext_prev(ifp, cur); 191 + return xfs_iext_get_extent(ifp, cur, gotp); 192 + } 193 + 194 + /* 195 + * Return the extent after cur in gotp without updating the cursor. 196 + */ 197 + static inline bool xfs_iext_peek_next_extent(struct xfs_ifork *ifp, 198 + struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp) 199 + { 200 + struct xfs_iext_cursor ncur = *cur; 201 + 202 + xfs_iext_next(ifp, &ncur); 203 + return xfs_iext_get_extent(ifp, &ncur, gotp); 204 + } 205 + 206 + /* 207 + * Return the extent before cur in gotp without updating the cursor. 208 + */ 209 + static inline bool xfs_iext_peek_prev_extent(struct xfs_ifork *ifp, 210 + struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp) 211 + { 212 + struct xfs_iext_cursor ncur = *cur; 213 + 214 + xfs_iext_prev(ifp, &ncur); 215 + return xfs_iext_get_extent(ifp, &ncur, gotp); 216 + } 217 + 218 + #define for_each_xfs_iext(ifp, ext, got) \ 219 + for (xfs_iext_first((ifp), (ext)); \ 220 + xfs_iext_get_extent((ifp), (ext), (got)); \ 221 + xfs_iext_next((ifp), (ext))) 154 222 155 223 extern struct kmem_zone *xfs_ifork_zone; 156 224

+12 -12

fs/xfs/libxfs/xfs_log_format.h

··· 264 264 * (if any) is indicated in the ilf_dsize field. Changes to this structure 265 265 * must be added on to the end. 266 266 */ 267 - typedef struct xfs_inode_log_format { 267 + struct xfs_inode_log_format { 268 268 uint16_t ilf_type; /* inode log item type */ 269 269 uint16_t ilf_size; /* size of this item */ 270 270 uint32_t ilf_fields; /* flags for fields logged */ ··· 274 274 uint64_t ilf_ino; /* inode number */ 275 275 union { 276 276 uint32_t ilfu_rdev; /* rdev value for dev inode*/ 277 - uuid_t ilfu_uuid; /* mount point value */ 277 + u8 __pad[16]; /* unused */ 278 278 } ilf_u; 279 279 int64_t ilf_blkno; /* blkno of inode buffer */ 280 280 int32_t ilf_len; /* len of inode buffer */ 281 281 int32_t ilf_boffset; /* off of inode in buffer */ 282 - } xfs_inode_log_format_t; 282 + }; 283 283 284 284 /* 285 285 * Old 32 bit systems will log in this format without the 64 bit ··· 295 295 uint64_t ilf_ino; /* inode number */ 296 296 union { 297 297 uint32_t ilfu_rdev; /* rdev value for dev inode*/ 298 - uuid_t ilfu_uuid; /* mount point value */ 298 + u8 __pad[16]; /* unused */ 299 299 } ilf_u; 300 300 int64_t ilf_blkno; /* blkno of inode buffer */ 301 301 int32_t ilf_len; /* len of inode buffer */ ··· 311 311 #define XFS_ILOG_DEXT 0x004 /* log i_df.if_extents */ 312 312 #define XFS_ILOG_DBROOT 0x008 /* log i_df.i_broot */ 313 313 #define XFS_ILOG_DEV 0x010 /* log the dev field */ 314 - #define XFS_ILOG_UUID 0x020 /* log the uuid field */ 314 + #define XFS_ILOG_UUID 0x020 /* added long ago, but never used */ 315 315 #define XFS_ILOG_ADATA 0x040 /* log i_af.if_data */ 316 316 #define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */ 317 317 #define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */ ··· 329 329 330 330 #define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ 331 331 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \ 332 - XFS_ILOG_UUID | XFS_ILOG_ADATA | \ 333 - XFS_ILOG_AEXT | XFS_ILOG_ABROOT | \ 334 - XFS_ILOG_DOWNER | XFS_ILOG_AOWNER) 332 + XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ 333 + XFS_ILOG_ABROOT | XFS_ILOG_DOWNER | \ 334 + XFS_ILOG_AOWNER) 335 335 336 336 #define XFS_ILOG_DFORK (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ 337 337 XFS_ILOG_DBROOT) ··· 341 341 342 342 #define XFS_ILOG_ALL (XFS_ILOG_CORE | XFS_ILOG_DDATA | \ 343 343 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \ 344 - XFS_ILOG_DEV | XFS_ILOG_UUID | \ 345 - XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ 346 - XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP | \ 347 - XFS_ILOG_DOWNER | XFS_ILOG_AOWNER) 344 + XFS_ILOG_DEV | XFS_ILOG_ADATA | \ 345 + XFS_ILOG_AEXT | XFS_ILOG_ABROOT | \ 346 + XFS_ILOG_TIMESTAMP | XFS_ILOG_DOWNER | \ 347 + XFS_ILOG_AOWNER) 348 348 349 349 static inline int xfs_ilog_fbroot(int w) 350 350 {

+1

fs/xfs/libxfs/xfs_refcount.c

··· 30 30 #include "xfs_bmap.h" 31 31 #include "xfs_refcount_btree.h" 32 32 #include "xfs_alloc.h" 33 + #include "xfs_errortag.h" 33 34 #include "xfs_error.h" 34 35 #include "xfs_trace.h" 35 36 #include "xfs_cksum.h"

+1

fs/xfs/libxfs/xfs_rmap.c

··· 34 34 #include "xfs_rmap_btree.h" 35 35 #include "xfs_trans_space.h" 36 36 #include "xfs_trace.h" 37 + #include "xfs_errortag.h" 37 38 #include "xfs_error.h" 38 39 #include "xfs_extent_busy.h" 39 40 #include "xfs_bmap.h"

+12 -1

fs/xfs/libxfs/xfs_rtbitmap.c

··· 672 672 /* 673 673 * Compute a mask of relevant bits. 674 674 */ 675 - bit = 0; 676 675 mask = ((xfs_rtword_t)1 << lastbit) - 1; 677 676 /* 678 677 * Set/clear the active bits. ··· 1084 1085 keys[0].ar_blockcount = keys[1].ar_blockcount = 0; 1085 1086 1086 1087 return xfs_rtalloc_query_range(tp, &keys[0], &keys[1], fn, priv); 1088 + } 1089 + 1090 + /* 1091 + * Verify that an realtime block number pointer doesn't point off the 1092 + * end of the realtime device. 1093 + */ 1094 + bool 1095 + xfs_verify_rtbno( 1096 + struct xfs_mount *mp, 1097 + xfs_rtblock_t rtbno) 1098 + { 1099 + return rtbno < mp->m_sb.sb_rblocks; 1087 1100 }

+22

fs/xfs/libxfs/xfs_types.h

··· 48 48 typedef int64_t xfs_sfiloff_t; /* signed block number in a file */ 49 49 50 50 /* 51 + * New verifiers will return the instruction address of the failing check. 52 + * NULL means everything is ok. 53 + */ 54 + typedef void * xfs_failaddr_t; 55 + 56 + /* 51 57 * Null values for the types. 52 58 */ 53 59 #define NULLFSBLOCK ((xfs_fsblock_t)-1) ··· 142 136 #define XFS_NBWORD (1 << XFS_NBWORDLOG) 143 137 #define XFS_WORDMASK ((1 << XFS_WORDLOG) - 1) 144 138 139 + struct xfs_iext_cursor { 140 + struct xfs_iext_leaf *leaf; 141 + int pos; 142 + }; 143 + 144 + typedef enum { 145 + XFS_EXT_NORM, XFS_EXT_UNWRITTEN, 146 + } xfs_exntst_t; 147 + 148 + typedef struct xfs_bmbt_irec 149 + { 150 + xfs_fileoff_t br_startoff; /* starting file offset */ 151 + xfs_fsblock_t br_startblock; /* starting block number */ 152 + xfs_filblks_t br_blockcount; /* number of blocks */ 153 + xfs_exntst_t br_state; /* extent state */ 154 + } xfs_bmbt_irec_t; 145 155 146 156 #endif /* __XFS_TYPES_H__ */

+658

fs/xfs/scrub/agheader.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_mount.h" 26 + #include "xfs_defer.h" 27 + #include "xfs_btree.h" 28 + #include "xfs_bit.h" 29 + #include "xfs_log_format.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_sb.h" 32 + #include "xfs_inode.h" 33 + #include "xfs_alloc.h" 34 + #include "xfs_ialloc.h" 35 + #include "scrub/xfs_scrub.h" 36 + #include "scrub/scrub.h" 37 + #include "scrub/common.h" 38 + #include "scrub/trace.h" 39 + 40 + /* 41 + * Set up scrub to check all the static metadata in each AG. 42 + * This means the SB, AGF, AGI, and AGFL headers. 43 + */ 44 + int 45 + xfs_scrub_setup_ag_header( 46 + struct xfs_scrub_context *sc, 47 + struct xfs_inode *ip) 48 + { 49 + struct xfs_mount *mp = sc->mp; 50 + 51 + if (sc->sm->sm_agno >= mp->m_sb.sb_agcount || 52 + sc->sm->sm_ino || sc->sm->sm_gen) 53 + return -EINVAL; 54 + return xfs_scrub_setup_fs(sc, ip); 55 + } 56 + 57 + /* Walk all the blocks in the AGFL. */ 58 + int 59 + xfs_scrub_walk_agfl( 60 + struct xfs_scrub_context *sc, 61 + int (*fn)(struct xfs_scrub_context *, 62 + xfs_agblock_t bno, void *), 63 + void *priv) 64 + { 65 + struct xfs_agf *agf; 66 + __be32 *agfl_bno; 67 + struct xfs_mount *mp = sc->mp; 68 + unsigned int flfirst; 69 + unsigned int fllast; 70 + int i; 71 + int error; 72 + 73 + agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); 74 + agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, sc->sa.agfl_bp); 75 + flfirst = be32_to_cpu(agf->agf_flfirst); 76 + fllast = be32_to_cpu(agf->agf_fllast); 77 + 78 + /* Nothing to walk in an empty AGFL. */ 79 + if (agf->agf_flcount == cpu_to_be32(0)) 80 + return 0; 81 + 82 + /* first to last is a consecutive list. */ 83 + if (fllast >= flfirst) { 84 + for (i = flfirst; i <= fllast; i++) { 85 + error = fn(sc, be32_to_cpu(agfl_bno[i]), priv); 86 + if (error) 87 + return error; 88 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 89 + return error; 90 + } 91 + 92 + return 0; 93 + } 94 + 95 + /* first to the end */ 96 + for (i = flfirst; i < XFS_AGFL_SIZE(mp); i++) { 97 + error = fn(sc, be32_to_cpu(agfl_bno[i]), priv); 98 + if (error) 99 + return error; 100 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 101 + return error; 102 + } 103 + 104 + /* the start to last. */ 105 + for (i = 0; i <= fllast; i++) { 106 + error = fn(sc, be32_to_cpu(agfl_bno[i]), priv); 107 + if (error) 108 + return error; 109 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 110 + return error; 111 + } 112 + 113 + return 0; 114 + } 115 + 116 + /* Superblock */ 117 + 118 + /* 119 + * Scrub the filesystem superblock. 120 + * 121 + * Note: We do /not/ attempt to check AG 0's superblock. Mount is 122 + * responsible for validating all the geometry information in sb 0, so 123 + * if the filesystem is capable of initiating online scrub, then clearly 124 + * sb 0 is ok and we can use its information to check everything else. 125 + */ 126 + int 127 + xfs_scrub_superblock( 128 + struct xfs_scrub_context *sc) 129 + { 130 + struct xfs_mount *mp = sc->mp; 131 + struct xfs_buf *bp; 132 + struct xfs_dsb *sb; 133 + xfs_agnumber_t agno; 134 + uint32_t v2_ok; 135 + __be32 features_mask; 136 + int error; 137 + __be16 vernum_mask; 138 + 139 + agno = sc->sm->sm_agno; 140 + if (agno == 0) 141 + return 0; 142 + 143 + error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, 144 + XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 145 + XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops); 146 + if (!xfs_scrub_process_error(sc, agno, XFS_SB_BLOCK(mp), &error)) 147 + return error; 148 + 149 + sb = XFS_BUF_TO_SBP(bp); 150 + 151 + /* 152 + * Verify the geometries match. Fields that are permanently 153 + * set by mkfs are checked; fields that can be updated later 154 + * (and are not propagated to backup superblocks) are preen 155 + * checked. 156 + */ 157 + if (sb->sb_blocksize != cpu_to_be32(mp->m_sb.sb_blocksize)) 158 + xfs_scrub_block_set_corrupt(sc, bp); 159 + 160 + if (sb->sb_dblocks != cpu_to_be64(mp->m_sb.sb_dblocks)) 161 + xfs_scrub_block_set_corrupt(sc, bp); 162 + 163 + if (sb->sb_rblocks != cpu_to_be64(mp->m_sb.sb_rblocks)) 164 + xfs_scrub_block_set_corrupt(sc, bp); 165 + 166 + if (sb->sb_rextents != cpu_to_be64(mp->m_sb.sb_rextents)) 167 + xfs_scrub_block_set_corrupt(sc, bp); 168 + 169 + if (!uuid_equal(&sb->sb_uuid, &mp->m_sb.sb_uuid)) 170 + xfs_scrub_block_set_preen(sc, bp); 171 + 172 + if (sb->sb_logstart != cpu_to_be64(mp->m_sb.sb_logstart)) 173 + xfs_scrub_block_set_corrupt(sc, bp); 174 + 175 + if (sb->sb_rootino != cpu_to_be64(mp->m_sb.sb_rootino)) 176 + xfs_scrub_block_set_preen(sc, bp); 177 + 178 + if (sb->sb_rbmino != cpu_to_be64(mp->m_sb.sb_rbmino)) 179 + xfs_scrub_block_set_preen(sc, bp); 180 + 181 + if (sb->sb_rsumino != cpu_to_be64(mp->m_sb.sb_rsumino)) 182 + xfs_scrub_block_set_preen(sc, bp); 183 + 184 + if (sb->sb_rextsize != cpu_to_be32(mp->m_sb.sb_rextsize)) 185 + xfs_scrub_block_set_corrupt(sc, bp); 186 + 187 + if (sb->sb_agblocks != cpu_to_be32(mp->m_sb.sb_agblocks)) 188 + xfs_scrub_block_set_corrupt(sc, bp); 189 + 190 + if (sb->sb_agcount != cpu_to_be32(mp->m_sb.sb_agcount)) 191 + xfs_scrub_block_set_corrupt(sc, bp); 192 + 193 + if (sb->sb_rbmblocks != cpu_to_be32(mp->m_sb.sb_rbmblocks)) 194 + xfs_scrub_block_set_corrupt(sc, bp); 195 + 196 + if (sb->sb_logblocks != cpu_to_be32(mp->m_sb.sb_logblocks)) 197 + xfs_scrub_block_set_corrupt(sc, bp); 198 + 199 + /* Check sb_versionnum bits that are set at mkfs time. */ 200 + vernum_mask = cpu_to_be16(~XFS_SB_VERSION_OKBITS | 201 + XFS_SB_VERSION_NUMBITS | 202 + XFS_SB_VERSION_ALIGNBIT | 203 + XFS_SB_VERSION_DALIGNBIT | 204 + XFS_SB_VERSION_SHAREDBIT | 205 + XFS_SB_VERSION_LOGV2BIT | 206 + XFS_SB_VERSION_SECTORBIT | 207 + XFS_SB_VERSION_EXTFLGBIT | 208 + XFS_SB_VERSION_DIRV2BIT); 209 + if ((sb->sb_versionnum & vernum_mask) != 210 + (cpu_to_be16(mp->m_sb.sb_versionnum) & vernum_mask)) 211 + xfs_scrub_block_set_corrupt(sc, bp); 212 + 213 + /* Check sb_versionnum bits that can be set after mkfs time. */ 214 + vernum_mask = cpu_to_be16(XFS_SB_VERSION_ATTRBIT | 215 + XFS_SB_VERSION_NLINKBIT | 216 + XFS_SB_VERSION_QUOTABIT); 217 + if ((sb->sb_versionnum & vernum_mask) != 218 + (cpu_to_be16(mp->m_sb.sb_versionnum) & vernum_mask)) 219 + xfs_scrub_block_set_preen(sc, bp); 220 + 221 + if (sb->sb_sectsize != cpu_to_be16(mp->m_sb.sb_sectsize)) 222 + xfs_scrub_block_set_corrupt(sc, bp); 223 + 224 + if (sb->sb_inodesize != cpu_to_be16(mp->m_sb.sb_inodesize)) 225 + xfs_scrub_block_set_corrupt(sc, bp); 226 + 227 + if (sb->sb_inopblock != cpu_to_be16(mp->m_sb.sb_inopblock)) 228 + xfs_scrub_block_set_corrupt(sc, bp); 229 + 230 + if (memcmp(sb->sb_fname, mp->m_sb.sb_fname, sizeof(sb->sb_fname))) 231 + xfs_scrub_block_set_preen(sc, bp); 232 + 233 + if (sb->sb_blocklog != mp->m_sb.sb_blocklog) 234 + xfs_scrub_block_set_corrupt(sc, bp); 235 + 236 + if (sb->sb_sectlog != mp->m_sb.sb_sectlog) 237 + xfs_scrub_block_set_corrupt(sc, bp); 238 + 239 + if (sb->sb_inodelog != mp->m_sb.sb_inodelog) 240 + xfs_scrub_block_set_corrupt(sc, bp); 241 + 242 + if (sb->sb_inopblog != mp->m_sb.sb_inopblog) 243 + xfs_scrub_block_set_corrupt(sc, bp); 244 + 245 + if (sb->sb_agblklog != mp->m_sb.sb_agblklog) 246 + xfs_scrub_block_set_corrupt(sc, bp); 247 + 248 + if (sb->sb_rextslog != mp->m_sb.sb_rextslog) 249 + xfs_scrub_block_set_corrupt(sc, bp); 250 + 251 + if (sb->sb_imax_pct != mp->m_sb.sb_imax_pct) 252 + xfs_scrub_block_set_preen(sc, bp); 253 + 254 + /* 255 + * Skip the summary counters since we track them in memory anyway. 256 + * sb_icount, sb_ifree, sb_fdblocks, sb_frexents 257 + */ 258 + 259 + if (sb->sb_uquotino != cpu_to_be64(mp->m_sb.sb_uquotino)) 260 + xfs_scrub_block_set_preen(sc, bp); 261 + 262 + if (sb->sb_gquotino != cpu_to_be64(mp->m_sb.sb_gquotino)) 263 + xfs_scrub_block_set_preen(sc, bp); 264 + 265 + /* 266 + * Skip the quota flags since repair will force quotacheck. 267 + * sb_qflags 268 + */ 269 + 270 + if (sb->sb_flags != mp->m_sb.sb_flags) 271 + xfs_scrub_block_set_corrupt(sc, bp); 272 + 273 + if (sb->sb_shared_vn != mp->m_sb.sb_shared_vn) 274 + xfs_scrub_block_set_corrupt(sc, bp); 275 + 276 + if (sb->sb_inoalignmt != cpu_to_be32(mp->m_sb.sb_inoalignmt)) 277 + xfs_scrub_block_set_corrupt(sc, bp); 278 + 279 + if (sb->sb_unit != cpu_to_be32(mp->m_sb.sb_unit)) 280 + xfs_scrub_block_set_preen(sc, bp); 281 + 282 + if (sb->sb_width != cpu_to_be32(mp->m_sb.sb_width)) 283 + xfs_scrub_block_set_preen(sc, bp); 284 + 285 + if (sb->sb_dirblklog != mp->m_sb.sb_dirblklog) 286 + xfs_scrub_block_set_corrupt(sc, bp); 287 + 288 + if (sb->sb_logsectlog != mp->m_sb.sb_logsectlog) 289 + xfs_scrub_block_set_corrupt(sc, bp); 290 + 291 + if (sb->sb_logsectsize != cpu_to_be16(mp->m_sb.sb_logsectsize)) 292 + xfs_scrub_block_set_corrupt(sc, bp); 293 + 294 + if (sb->sb_logsunit != cpu_to_be32(mp->m_sb.sb_logsunit)) 295 + xfs_scrub_block_set_corrupt(sc, bp); 296 + 297 + /* Do we see any invalid bits in sb_features2? */ 298 + if (!xfs_sb_version_hasmorebits(&mp->m_sb)) { 299 + if (sb->sb_features2 != 0) 300 + xfs_scrub_block_set_corrupt(sc, bp); 301 + } else { 302 + v2_ok = XFS_SB_VERSION2_OKBITS; 303 + if (XFS_SB_VERSION_NUM(&mp->m_sb) >= XFS_SB_VERSION_5) 304 + v2_ok |= XFS_SB_VERSION2_CRCBIT; 305 + 306 + if (!!(sb->sb_features2 & cpu_to_be32(~v2_ok))) 307 + xfs_scrub_block_set_corrupt(sc, bp); 308 + 309 + if (sb->sb_features2 != sb->sb_bad_features2) 310 + xfs_scrub_block_set_preen(sc, bp); 311 + } 312 + 313 + /* Check sb_features2 flags that are set at mkfs time. */ 314 + features_mask = cpu_to_be32(XFS_SB_VERSION2_LAZYSBCOUNTBIT | 315 + XFS_SB_VERSION2_PROJID32BIT | 316 + XFS_SB_VERSION2_CRCBIT | 317 + XFS_SB_VERSION2_FTYPE); 318 + if ((sb->sb_features2 & features_mask) != 319 + (cpu_to_be32(mp->m_sb.sb_features2) & features_mask)) 320 + xfs_scrub_block_set_corrupt(sc, bp); 321 + 322 + /* Check sb_features2 flags that can be set after mkfs time. */ 323 + features_mask = cpu_to_be32(XFS_SB_VERSION2_ATTR2BIT); 324 + if ((sb->sb_features2 & features_mask) != 325 + (cpu_to_be32(mp->m_sb.sb_features2) & features_mask)) 326 + xfs_scrub_block_set_corrupt(sc, bp); 327 + 328 + if (!xfs_sb_version_hascrc(&mp->m_sb)) { 329 + /* all v5 fields must be zero */ 330 + if (memchr_inv(&sb->sb_features_compat, 0, 331 + sizeof(struct xfs_dsb) - 332 + offsetof(struct xfs_dsb, sb_features_compat))) 333 + xfs_scrub_block_set_corrupt(sc, bp); 334 + } else { 335 + /* Check compat flags; all are set at mkfs time. */ 336 + features_mask = cpu_to_be32(XFS_SB_FEAT_COMPAT_UNKNOWN); 337 + if ((sb->sb_features_compat & features_mask) != 338 + (cpu_to_be32(mp->m_sb.sb_features_compat) & features_mask)) 339 + xfs_scrub_block_set_corrupt(sc, bp); 340 + 341 + /* Check ro compat flags; all are set at mkfs time. */ 342 + features_mask = cpu_to_be32(XFS_SB_FEAT_RO_COMPAT_UNKNOWN | 343 + XFS_SB_FEAT_RO_COMPAT_FINOBT | 344 + XFS_SB_FEAT_RO_COMPAT_RMAPBT | 345 + XFS_SB_FEAT_RO_COMPAT_REFLINK); 346 + if ((sb->sb_features_ro_compat & features_mask) != 347 + (cpu_to_be32(mp->m_sb.sb_features_ro_compat) & 348 + features_mask)) 349 + xfs_scrub_block_set_corrupt(sc, bp); 350 + 351 + /* Check incompat flags; all are set at mkfs time. */ 352 + features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_UNKNOWN | 353 + XFS_SB_FEAT_INCOMPAT_FTYPE | 354 + XFS_SB_FEAT_INCOMPAT_SPINODES | 355 + XFS_SB_FEAT_INCOMPAT_META_UUID); 356 + if ((sb->sb_features_incompat & features_mask) != 357 + (cpu_to_be32(mp->m_sb.sb_features_incompat) & 358 + features_mask)) 359 + xfs_scrub_block_set_corrupt(sc, bp); 360 + 361 + /* Check log incompat flags; all are set at mkfs time. */ 362 + features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN); 363 + if ((sb->sb_features_log_incompat & features_mask) != 364 + (cpu_to_be32(mp->m_sb.sb_features_log_incompat) & 365 + features_mask)) 366 + xfs_scrub_block_set_corrupt(sc, bp); 367 + 368 + /* Don't care about sb_crc */ 369 + 370 + if (sb->sb_spino_align != cpu_to_be32(mp->m_sb.sb_spino_align)) 371 + xfs_scrub_block_set_corrupt(sc, bp); 372 + 373 + if (sb->sb_pquotino != cpu_to_be64(mp->m_sb.sb_pquotino)) 374 + xfs_scrub_block_set_preen(sc, bp); 375 + 376 + /* Don't care about sb_lsn */ 377 + } 378 + 379 + if (xfs_sb_version_hasmetauuid(&mp->m_sb)) { 380 + /* The metadata UUID must be the same for all supers */ 381 + if (!uuid_equal(&sb->sb_meta_uuid, &mp->m_sb.sb_meta_uuid)) 382 + xfs_scrub_block_set_corrupt(sc, bp); 383 + } 384 + 385 + /* Everything else must be zero. */ 386 + if (memchr_inv(sb + 1, 0, 387 + BBTOB(bp->b_length) - sizeof(struct xfs_dsb))) 388 + xfs_scrub_block_set_corrupt(sc, bp); 389 + 390 + return error; 391 + } 392 + 393 + /* AGF */ 394 + 395 + /* Scrub the AGF. */ 396 + int 397 + xfs_scrub_agf( 398 + struct xfs_scrub_context *sc) 399 + { 400 + struct xfs_mount *mp = sc->mp; 401 + struct xfs_agf *agf; 402 + xfs_agnumber_t agno; 403 + xfs_agblock_t agbno; 404 + xfs_agblock_t eoag; 405 + xfs_agblock_t agfl_first; 406 + xfs_agblock_t agfl_last; 407 + xfs_agblock_t agfl_count; 408 + xfs_agblock_t fl_count; 409 + int level; 410 + int error = 0; 411 + 412 + agno = sc->sa.agno = sc->sm->sm_agno; 413 + error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp, 414 + &sc->sa.agf_bp, &sc->sa.agfl_bp); 415 + if (!xfs_scrub_process_error(sc, agno, XFS_AGF_BLOCK(sc->mp), &error)) 416 + goto out; 417 + 418 + agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); 419 + 420 + /* Check the AG length */ 421 + eoag = be32_to_cpu(agf->agf_length); 422 + if (eoag != xfs_ag_block_count(mp, agno)) 423 + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); 424 + 425 + /* Check the AGF btree roots and levels */ 426 + agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]); 427 + if (!xfs_verify_agbno(mp, agno, agbno)) 428 + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); 429 + 430 + agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]); 431 + if (!xfs_verify_agbno(mp, agno, agbno)) 432 + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); 433 + 434 + level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); 435 + if (level <= 0 || level > XFS_BTREE_MAXLEVELS) 436 + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); 437 + 438 + level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); 439 + if (level <= 0 || level > XFS_BTREE_MAXLEVELS) 440 + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); 441 + 442 + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { 443 + agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]); 444 + if (!xfs_verify_agbno(mp, agno, agbno)) 445 + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); 446 + 447 + level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); 448 + if (level <= 0 || level > XFS_BTREE_MAXLEVELS) 449 + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); 450 + } 451 + 452 + if (xfs_sb_version_hasreflink(&mp->m_sb)) { 453 + agbno = be32_to_cpu(agf->agf_refcount_root); 454 + if (!xfs_verify_agbno(mp, agno, agbno)) 455 + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); 456 + 457 + level = be32_to_cpu(agf->agf_refcount_level); 458 + if (level <= 0 || level > XFS_BTREE_MAXLEVELS) 459 + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); 460 + } 461 + 462 + /* Check the AGFL counters */ 463 + agfl_first = be32_to_cpu(agf->agf_flfirst); 464 + agfl_last = be32_to_cpu(agf->agf_fllast); 465 + agfl_count = be32_to_cpu(agf->agf_flcount); 466 + if (agfl_last > agfl_first) 467 + fl_count = agfl_last - agfl_first + 1; 468 + else 469 + fl_count = XFS_AGFL_SIZE(mp) - agfl_first + agfl_last + 1; 470 + if (agfl_count != 0 && fl_count != agfl_count) 471 + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); 472 + 473 + out: 474 + return error; 475 + } 476 + 477 + /* AGFL */ 478 + 479 + struct xfs_scrub_agfl_info { 480 + unsigned int sz_entries; 481 + unsigned int nr_entries; 482 + xfs_agblock_t *entries; 483 + }; 484 + 485 + /* Scrub an AGFL block. */ 486 + STATIC int 487 + xfs_scrub_agfl_block( 488 + struct xfs_scrub_context *sc, 489 + xfs_agblock_t agbno, 490 + void *priv) 491 + { 492 + struct xfs_mount *mp = sc->mp; 493 + struct xfs_scrub_agfl_info *sai = priv; 494 + xfs_agnumber_t agno = sc->sa.agno; 495 + 496 + if (xfs_verify_agbno(mp, agno, agbno) && 497 + sai->nr_entries < sai->sz_entries) 498 + sai->entries[sai->nr_entries++] = agbno; 499 + else 500 + xfs_scrub_block_set_corrupt(sc, sc->sa.agfl_bp); 501 + 502 + return 0; 503 + } 504 + 505 + static int 506 + xfs_scrub_agblock_cmp( 507 + const void *pa, 508 + const void *pb) 509 + { 510 + const xfs_agblock_t *a = pa; 511 + const xfs_agblock_t *b = pb; 512 + 513 + return (int)*a - (int)*b; 514 + } 515 + 516 + /* Scrub the AGFL. */ 517 + int 518 + xfs_scrub_agfl( 519 + struct xfs_scrub_context *sc) 520 + { 521 + struct xfs_scrub_agfl_info sai = { 0 }; 522 + struct xfs_agf *agf; 523 + xfs_agnumber_t agno; 524 + unsigned int agflcount; 525 + unsigned int i; 526 + int error; 527 + 528 + agno = sc->sa.agno = sc->sm->sm_agno; 529 + error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp, 530 + &sc->sa.agf_bp, &sc->sa.agfl_bp); 531 + if (!xfs_scrub_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error)) 532 + goto out; 533 + if (!sc->sa.agf_bp) 534 + return -EFSCORRUPTED; 535 + 536 + /* Allocate buffer to ensure uniqueness of AGFL entries. */ 537 + agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); 538 + agflcount = be32_to_cpu(agf->agf_flcount); 539 + if (agflcount > XFS_AGFL_SIZE(sc->mp)) { 540 + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); 541 + goto out; 542 + } 543 + sai.sz_entries = agflcount; 544 + sai.entries = kmem_zalloc(sizeof(xfs_agblock_t) * agflcount, KM_NOFS); 545 + if (!sai.entries) { 546 + error = -ENOMEM; 547 + goto out; 548 + } 549 + 550 + /* Check the blocks in the AGFL. */ 551 + error = xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sai); 552 + if (error) 553 + goto out_free; 554 + 555 + if (agflcount != sai.nr_entries) { 556 + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); 557 + goto out_free; 558 + } 559 + 560 + /* Sort entries, check for duplicates. */ 561 + sort(sai.entries, sai.nr_entries, sizeof(sai.entries[0]), 562 + xfs_scrub_agblock_cmp, NULL); 563 + for (i = 1; i < sai.nr_entries; i++) { 564 + if (sai.entries[i] == sai.entries[i - 1]) { 565 + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); 566 + break; 567 + } 568 + } 569 + 570 + out_free: 571 + kmem_free(sai.entries); 572 + out: 573 + return error; 574 + } 575 + 576 + /* AGI */ 577 + 578 + /* Scrub the AGI. */ 579 + int 580 + xfs_scrub_agi( 581 + struct xfs_scrub_context *sc) 582 + { 583 + struct xfs_mount *mp = sc->mp; 584 + struct xfs_agi *agi; 585 + xfs_agnumber_t agno; 586 + xfs_agblock_t agbno; 587 + xfs_agblock_t eoag; 588 + xfs_agino_t agino; 589 + xfs_agino_t first_agino; 590 + xfs_agino_t last_agino; 591 + xfs_agino_t icount; 592 + int i; 593 + int level; 594 + int error = 0; 595 + 596 + agno = sc->sa.agno = sc->sm->sm_agno; 597 + error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp, 598 + &sc->sa.agf_bp, &sc->sa.agfl_bp); 599 + if (!xfs_scrub_process_error(sc, agno, XFS_AGI_BLOCK(sc->mp), &error)) 600 + goto out; 601 + 602 + agi = XFS_BUF_TO_AGI(sc->sa.agi_bp); 603 + 604 + /* Check the AG length */ 605 + eoag = be32_to_cpu(agi->agi_length); 606 + if (eoag != xfs_ag_block_count(mp, agno)) 607 + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); 608 + 609 + /* Check btree roots and levels */ 610 + agbno = be32_to_cpu(agi->agi_root); 611 + if (!xfs_verify_agbno(mp, agno, agbno)) 612 + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); 613 + 614 + level = be32_to_cpu(agi->agi_level); 615 + if (level <= 0 || level > XFS_BTREE_MAXLEVELS) 616 + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); 617 + 618 + if (xfs_sb_version_hasfinobt(&mp->m_sb)) { 619 + agbno = be32_to_cpu(agi->agi_free_root); 620 + if (!xfs_verify_agbno(mp, agno, agbno)) 621 + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); 622 + 623 + level = be32_to_cpu(agi->agi_free_level); 624 + if (level <= 0 || level > XFS_BTREE_MAXLEVELS) 625 + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); 626 + } 627 + 628 + /* Check inode counters */ 629 + xfs_ialloc_agino_range(mp, agno, &first_agino, &last_agino); 630 + icount = be32_to_cpu(agi->agi_count); 631 + if (icount > last_agino - first_agino + 1 || 632 + icount < be32_to_cpu(agi->agi_freecount)) 633 + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); 634 + 635 + /* Check inode pointers */ 636 + agino = be32_to_cpu(agi->agi_newino); 637 + if (agino != NULLAGINO && !xfs_verify_agino(mp, agno, agino)) 638 + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); 639 + 640 + agino = be32_to_cpu(agi->agi_dirino); 641 + if (agino != NULLAGINO && !xfs_verify_agino(mp, agno, agino)) 642 + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); 643 + 644 + /* Check unlinked inode buckets */ 645 + for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) { 646 + agino = be32_to_cpu(agi->agi_unlinked[i]); 647 + if (agino == NULLAGINO) 648 + continue; 649 + if (!xfs_verify_agino(mp, agno, agino)) 650 + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); 651 + } 652 + 653 + if (agi->agi_pad32 != cpu_to_be32(0)) 654 + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); 655 + 656 + out: 657 + return error; 658 + }

+102

fs/xfs/scrub/alloc.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_mount.h" 26 + #include "xfs_defer.h" 27 + #include "xfs_btree.h" 28 + #include "xfs_bit.h" 29 + #include "xfs_log_format.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_sb.h" 32 + #include "xfs_alloc.h" 33 + #include "xfs_rmap.h" 34 + #include "scrub/xfs_scrub.h" 35 + #include "scrub/scrub.h" 36 + #include "scrub/common.h" 37 + #include "scrub/btree.h" 38 + #include "scrub/trace.h" 39 + 40 + /* 41 + * Set us up to scrub free space btrees. 42 + */ 43 + int 44 + xfs_scrub_setup_ag_allocbt( 45 + struct xfs_scrub_context *sc, 46 + struct xfs_inode *ip) 47 + { 48 + return xfs_scrub_setup_ag_btree(sc, ip, false); 49 + } 50 + 51 + /* Free space btree scrubber. */ 52 + 53 + /* Scrub a bnobt/cntbt record. */ 54 + STATIC int 55 + xfs_scrub_allocbt_rec( 56 + struct xfs_scrub_btree *bs, 57 + union xfs_btree_rec *rec) 58 + { 59 + struct xfs_mount *mp = bs->cur->bc_mp; 60 + xfs_agnumber_t agno = bs->cur->bc_private.a.agno; 61 + xfs_agblock_t bno; 62 + xfs_extlen_t len; 63 + int error = 0; 64 + 65 + bno = be32_to_cpu(rec->alloc.ar_startblock); 66 + len = be32_to_cpu(rec->alloc.ar_blockcount); 67 + 68 + if (bno + len <= bno || 69 + !xfs_verify_agbno(mp, agno, bno) || 70 + !xfs_verify_agbno(mp, agno, bno + len - 1)) 71 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 72 + 73 + return error; 74 + } 75 + 76 + /* Scrub the freespace btrees for some AG. */ 77 + STATIC int 78 + xfs_scrub_allocbt( 79 + struct xfs_scrub_context *sc, 80 + xfs_btnum_t which) 81 + { 82 + struct xfs_owner_info oinfo; 83 + struct xfs_btree_cur *cur; 84 + 85 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); 86 + cur = which == XFS_BTNUM_BNO ? sc->sa.bno_cur : sc->sa.cnt_cur; 87 + return xfs_scrub_btree(sc, cur, xfs_scrub_allocbt_rec, &oinfo, NULL); 88 + } 89 + 90 + int 91 + xfs_scrub_bnobt( 92 + struct xfs_scrub_context *sc) 93 + { 94 + return xfs_scrub_allocbt(sc, XFS_BTNUM_BNO); 95 + } 96 + 97 + int 98 + xfs_scrub_cntbt( 99 + struct xfs_scrub_context *sc) 100 + { 101 + return xfs_scrub_allocbt(sc, XFS_BTNUM_CNT); 102 + }

+471

fs/xfs/scrub/attr.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_mount.h" 26 + #include "xfs_defer.h" 27 + #include "xfs_btree.h" 28 + #include "xfs_bit.h" 29 + #include "xfs_log_format.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_sb.h" 32 + #include "xfs_inode.h" 33 + #include "xfs_da_format.h" 34 + #include "xfs_da_btree.h" 35 + #include "xfs_dir2.h" 36 + #include "xfs_attr.h" 37 + #include "xfs_attr_leaf.h" 38 + #include "scrub/xfs_scrub.h" 39 + #include "scrub/scrub.h" 40 + #include "scrub/common.h" 41 + #include "scrub/dabtree.h" 42 + #include "scrub/trace.h" 43 + 44 + #include <linux/posix_acl_xattr.h> 45 + #include <linux/xattr.h> 46 + 47 + /* Set us up to scrub an inode's extended attributes. */ 48 + int 49 + xfs_scrub_setup_xattr( 50 + struct xfs_scrub_context *sc, 51 + struct xfs_inode *ip) 52 + { 53 + size_t sz; 54 + 55 + /* 56 + * Allocate the buffer without the inode lock held. We need enough 57 + * space to read every xattr value in the file or enough space to 58 + * hold three copies of the xattr free space bitmap. (Not both at 59 + * the same time.) 60 + */ 61 + sz = max_t(size_t, XATTR_SIZE_MAX, 3 * sizeof(long) * 62 + BITS_TO_LONGS(sc->mp->m_attr_geo->blksize)); 63 + sc->buf = kmem_zalloc_large(sz, KM_SLEEP); 64 + if (!sc->buf) 65 + return -ENOMEM; 66 + 67 + return xfs_scrub_setup_inode_contents(sc, ip, 0); 68 + } 69 + 70 + /* Extended Attributes */ 71 + 72 + struct xfs_scrub_xattr { 73 + struct xfs_attr_list_context context; 74 + struct xfs_scrub_context *sc; 75 + }; 76 + 77 + /* 78 + * Check that an extended attribute key can be looked up by hash. 79 + * 80 + * We use the XFS attribute list iterator (i.e. xfs_attr_list_int_ilocked) 81 + * to call this function for every attribute key in an inode. Once 82 + * we're here, we load the attribute value to see if any errors happen, 83 + * or if we get more or less data than we expected. 84 + */ 85 + static void 86 + xfs_scrub_xattr_listent( 87 + struct xfs_attr_list_context *context, 88 + int flags, 89 + unsigned char *name, 90 + int namelen, 91 + int valuelen) 92 + { 93 + struct xfs_scrub_xattr *sx; 94 + struct xfs_da_args args = { NULL }; 95 + int error = 0; 96 + 97 + sx = container_of(context, struct xfs_scrub_xattr, context); 98 + 99 + if (flags & XFS_ATTR_INCOMPLETE) { 100 + /* Incomplete attr key, just mark the inode for preening. */ 101 + xfs_scrub_ino_set_preen(sx->sc, context->dp->i_ino, NULL); 102 + return; 103 + } 104 + 105 + args.flags = ATTR_KERNOTIME; 106 + if (flags & XFS_ATTR_ROOT) 107 + args.flags |= ATTR_ROOT; 108 + else if (flags & XFS_ATTR_SECURE) 109 + args.flags |= ATTR_SECURE; 110 + args.geo = context->dp->i_mount->m_attr_geo; 111 + args.whichfork = XFS_ATTR_FORK; 112 + args.dp = context->dp; 113 + args.name = name; 114 + args.namelen = namelen; 115 + args.hashval = xfs_da_hashname(args.name, args.namelen); 116 + args.trans = context->tp; 117 + args.value = sx->sc->buf; 118 + args.valuelen = XATTR_SIZE_MAX; 119 + 120 + error = xfs_attr_get_ilocked(context->dp, &args); 121 + if (error == -EEXIST) 122 + error = 0; 123 + if (!xfs_scrub_fblock_process_error(sx->sc, XFS_ATTR_FORK, args.blkno, 124 + &error)) 125 + goto fail_xref; 126 + if (args.valuelen != valuelen) 127 + xfs_scrub_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, 128 + args.blkno); 129 + 130 + fail_xref: 131 + return; 132 + } 133 + 134 + /* 135 + * Mark a range [start, start+len) in this map. Returns true if the 136 + * region was free, and false if there's a conflict or a problem. 137 + * 138 + * Within a char, the lowest bit of the char represents the byte with 139 + * the smallest address 140 + */ 141 + STATIC bool 142 + xfs_scrub_xattr_set_map( 143 + struct xfs_scrub_context *sc, 144 + unsigned long *map, 145 + unsigned int start, 146 + unsigned int len) 147 + { 148 + unsigned int mapsize = sc->mp->m_attr_geo->blksize; 149 + bool ret = true; 150 + 151 + if (start >= mapsize) 152 + return false; 153 + if (start + len > mapsize) { 154 + len = mapsize - start; 155 + ret = false; 156 + } 157 + 158 + if (find_next_bit(map, mapsize, start) < start + len) 159 + ret = false; 160 + bitmap_set(map, start, len); 161 + 162 + return ret; 163 + } 164 + 165 + /* 166 + * Check the leaf freemap from the usage bitmap. Returns false if the 167 + * attr freemap has problems or points to used space. 168 + */ 169 + STATIC bool 170 + xfs_scrub_xattr_check_freemap( 171 + struct xfs_scrub_context *sc, 172 + unsigned long *map, 173 + struct xfs_attr3_icleaf_hdr *leafhdr) 174 + { 175 + unsigned long *freemap; 176 + unsigned long *dstmap; 177 + unsigned int mapsize = sc->mp->m_attr_geo->blksize; 178 + int i; 179 + 180 + /* Construct bitmap of freemap contents. */ 181 + freemap = (unsigned long *)sc->buf + BITS_TO_LONGS(mapsize); 182 + bitmap_zero(freemap, mapsize); 183 + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { 184 + if (!xfs_scrub_xattr_set_map(sc, freemap, 185 + leafhdr->freemap[i].base, 186 + leafhdr->freemap[i].size)) 187 + return false; 188 + } 189 + 190 + /* Look for bits that are set in freemap and are marked in use. */ 191 + dstmap = freemap + BITS_TO_LONGS(mapsize); 192 + return bitmap_and(dstmap, freemap, map, mapsize) == 0; 193 + } 194 + 195 + /* 196 + * Check this leaf entry's relations to everything else. 197 + * Returns the number of bytes used for the name/value data. 198 + */ 199 + STATIC void 200 + xfs_scrub_xattr_entry( 201 + struct xfs_scrub_da_btree *ds, 202 + int level, 203 + char *buf_end, 204 + struct xfs_attr_leafblock *leaf, 205 + struct xfs_attr3_icleaf_hdr *leafhdr, 206 + unsigned long *usedmap, 207 + struct xfs_attr_leaf_entry *ent, 208 + int idx, 209 + unsigned int *usedbytes, 210 + __u32 *last_hashval) 211 + { 212 + struct xfs_mount *mp = ds->state->mp; 213 + char *name_end; 214 + struct xfs_attr_leaf_name_local *lentry; 215 + struct xfs_attr_leaf_name_remote *rentry; 216 + unsigned int nameidx; 217 + unsigned int namesize; 218 + 219 + if (ent->pad2 != 0) 220 + xfs_scrub_da_set_corrupt(ds, level); 221 + 222 + /* Hash values in order? */ 223 + if (be32_to_cpu(ent->hashval) < *last_hashval) 224 + xfs_scrub_da_set_corrupt(ds, level); 225 + *last_hashval = be32_to_cpu(ent->hashval); 226 + 227 + nameidx = be16_to_cpu(ent->nameidx); 228 + if (nameidx < leafhdr->firstused || 229 + nameidx >= mp->m_attr_geo->blksize) { 230 + xfs_scrub_da_set_corrupt(ds, level); 231 + return; 232 + } 233 + 234 + /* Check the name information. */ 235 + if (ent->flags & XFS_ATTR_LOCAL) { 236 + lentry = xfs_attr3_leaf_name_local(leaf, idx); 237 + namesize = xfs_attr_leaf_entsize_local(lentry->namelen, 238 + be16_to_cpu(lentry->valuelen)); 239 + name_end = (char *)lentry + namesize; 240 + if (lentry->namelen == 0) 241 + xfs_scrub_da_set_corrupt(ds, level); 242 + } else { 243 + rentry = xfs_attr3_leaf_name_remote(leaf, idx); 244 + namesize = xfs_attr_leaf_entsize_remote(rentry->namelen); 245 + name_end = (char *)rentry + namesize; 246 + if (rentry->namelen == 0 || rentry->valueblk == 0) 247 + xfs_scrub_da_set_corrupt(ds, level); 248 + } 249 + if (name_end > buf_end) 250 + xfs_scrub_da_set_corrupt(ds, level); 251 + 252 + if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, nameidx, namesize)) 253 + xfs_scrub_da_set_corrupt(ds, level); 254 + if (!(ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 255 + *usedbytes += namesize; 256 + } 257 + 258 + /* Scrub an attribute leaf. */ 259 + STATIC int 260 + xfs_scrub_xattr_block( 261 + struct xfs_scrub_da_btree *ds, 262 + int level) 263 + { 264 + struct xfs_attr3_icleaf_hdr leafhdr; 265 + struct xfs_mount *mp = ds->state->mp; 266 + struct xfs_da_state_blk *blk = &ds->state->path.blk[level]; 267 + struct xfs_buf *bp = blk->bp; 268 + xfs_dablk_t *last_checked = ds->private; 269 + struct xfs_attr_leafblock *leaf = bp->b_addr; 270 + struct xfs_attr_leaf_entry *ent; 271 + struct xfs_attr_leaf_entry *entries; 272 + unsigned long *usedmap = ds->sc->buf; 273 + char *buf_end; 274 + size_t off; 275 + __u32 last_hashval = 0; 276 + unsigned int usedbytes = 0; 277 + unsigned int hdrsize; 278 + int i; 279 + 280 + if (*last_checked == blk->blkno) 281 + return 0; 282 + *last_checked = blk->blkno; 283 + bitmap_zero(usedmap, mp->m_attr_geo->blksize); 284 + 285 + /* Check all the padding. */ 286 + if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb)) { 287 + struct xfs_attr3_leafblock *leaf = bp->b_addr; 288 + 289 + if (leaf->hdr.pad1 != 0 || leaf->hdr.pad2 != 0 || 290 + leaf->hdr.info.hdr.pad != 0) 291 + xfs_scrub_da_set_corrupt(ds, level); 292 + } else { 293 + if (leaf->hdr.pad1 != 0 || leaf->hdr.info.pad != 0) 294 + xfs_scrub_da_set_corrupt(ds, level); 295 + } 296 + 297 + /* Check the leaf header */ 298 + xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); 299 + hdrsize = xfs_attr3_leaf_hdr_size(leaf); 300 + 301 + if (leafhdr.usedbytes > mp->m_attr_geo->blksize) 302 + xfs_scrub_da_set_corrupt(ds, level); 303 + if (leafhdr.firstused > mp->m_attr_geo->blksize) 304 + xfs_scrub_da_set_corrupt(ds, level); 305 + if (leafhdr.firstused < hdrsize) 306 + xfs_scrub_da_set_corrupt(ds, level); 307 + if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, 0, hdrsize)) 308 + xfs_scrub_da_set_corrupt(ds, level); 309 + 310 + if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 311 + goto out; 312 + 313 + entries = xfs_attr3_leaf_entryp(leaf); 314 + if ((char *)&entries[leafhdr.count] > (char *)leaf + leafhdr.firstused) 315 + xfs_scrub_da_set_corrupt(ds, level); 316 + 317 + buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize; 318 + for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) { 319 + /* Mark the leaf entry itself. */ 320 + off = (char *)ent - (char *)leaf; 321 + if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, off, 322 + sizeof(xfs_attr_leaf_entry_t))) { 323 + xfs_scrub_da_set_corrupt(ds, level); 324 + goto out; 325 + } 326 + 327 + /* Check the entry and nameval. */ 328 + xfs_scrub_xattr_entry(ds, level, buf_end, leaf, &leafhdr, 329 + usedmap, ent, i, &usedbytes, &last_hashval); 330 + 331 + if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 332 + goto out; 333 + } 334 + 335 + if (!xfs_scrub_xattr_check_freemap(ds->sc, usedmap, &leafhdr)) 336 + xfs_scrub_da_set_corrupt(ds, level); 337 + 338 + if (leafhdr.usedbytes != usedbytes) 339 + xfs_scrub_da_set_corrupt(ds, level); 340 + 341 + out: 342 + return 0; 343 + } 344 + 345 + /* Scrub a attribute btree record. */ 346 + STATIC int 347 + xfs_scrub_xattr_rec( 348 + struct xfs_scrub_da_btree *ds, 349 + int level, 350 + void *rec) 351 + { 352 + struct xfs_mount *mp = ds->state->mp; 353 + struct xfs_attr_leaf_entry *ent = rec; 354 + struct xfs_da_state_blk *blk; 355 + struct xfs_attr_leaf_name_local *lentry; 356 + struct xfs_attr_leaf_name_remote *rentry; 357 + struct xfs_buf *bp; 358 + xfs_dahash_t calc_hash; 359 + xfs_dahash_t hash; 360 + int nameidx; 361 + int hdrsize; 362 + unsigned int badflags; 363 + int error; 364 + 365 + blk = &ds->state->path.blk[level]; 366 + 367 + /* Check the whole block, if necessary. */ 368 + error = xfs_scrub_xattr_block(ds, level); 369 + if (error) 370 + goto out; 371 + if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 372 + goto out; 373 + 374 + /* Check the hash of the entry. */ 375 + error = xfs_scrub_da_btree_hash(ds, level, &ent->hashval); 376 + if (error) 377 + goto out; 378 + 379 + /* Find the attr entry's location. */ 380 + bp = blk->bp; 381 + hdrsize = xfs_attr3_leaf_hdr_size(bp->b_addr); 382 + nameidx = be16_to_cpu(ent->nameidx); 383 + if (nameidx < hdrsize || nameidx >= mp->m_attr_geo->blksize) { 384 + xfs_scrub_da_set_corrupt(ds, level); 385 + goto out; 386 + } 387 + 388 + /* Retrieve the entry and check it. */ 389 + hash = be32_to_cpu(ent->hashval); 390 + badflags = ~(XFS_ATTR_LOCAL | XFS_ATTR_ROOT | XFS_ATTR_SECURE | 391 + XFS_ATTR_INCOMPLETE); 392 + if ((ent->flags & badflags) != 0) 393 + xfs_scrub_da_set_corrupt(ds, level); 394 + if (ent->flags & XFS_ATTR_LOCAL) { 395 + lentry = (struct xfs_attr_leaf_name_local *) 396 + (((char *)bp->b_addr) + nameidx); 397 + if (lentry->namelen <= 0) { 398 + xfs_scrub_da_set_corrupt(ds, level); 399 + goto out; 400 + } 401 + calc_hash = xfs_da_hashname(lentry->nameval, lentry->namelen); 402 + } else { 403 + rentry = (struct xfs_attr_leaf_name_remote *) 404 + (((char *)bp->b_addr) + nameidx); 405 + if (rentry->namelen <= 0) { 406 + xfs_scrub_da_set_corrupt(ds, level); 407 + goto out; 408 + } 409 + calc_hash = xfs_da_hashname(rentry->name, rentry->namelen); 410 + } 411 + if (calc_hash != hash) 412 + xfs_scrub_da_set_corrupt(ds, level); 413 + 414 + out: 415 + return error; 416 + } 417 + 418 + /* Scrub the extended attribute metadata. */ 419 + int 420 + xfs_scrub_xattr( 421 + struct xfs_scrub_context *sc) 422 + { 423 + struct xfs_scrub_xattr sx; 424 + struct attrlist_cursor_kern cursor = { 0 }; 425 + xfs_dablk_t last_checked = -1U; 426 + int error = 0; 427 + 428 + if (!xfs_inode_hasattr(sc->ip)) 429 + return -ENOENT; 430 + 431 + memset(&sx, 0, sizeof(sx)); 432 + /* Check attribute tree structure */ 433 + error = xfs_scrub_da_btree(sc, XFS_ATTR_FORK, xfs_scrub_xattr_rec, 434 + &last_checked); 435 + if (error) 436 + goto out; 437 + 438 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 439 + goto out; 440 + 441 + /* Check that every attr key can also be looked up by hash. */ 442 + sx.context.dp = sc->ip; 443 + sx.context.cursor = &cursor; 444 + sx.context.resynch = 1; 445 + sx.context.put_listent = xfs_scrub_xattr_listent; 446 + sx.context.tp = sc->tp; 447 + sx.context.flags = ATTR_INCOMPLETE; 448 + sx.sc = sc; 449 + 450 + /* 451 + * Look up every xattr in this file by name. 452 + * 453 + * Use the backend implementation of xfs_attr_list to call 454 + * xfs_scrub_xattr_listent on every attribute key in this inode. 455 + * In other words, we use the same iterator/callback mechanism 456 + * that listattr uses to scrub extended attributes, though in our 457 + * _listent function, we check the value of the attribute. 458 + * 459 + * The VFS only locks i_rwsem when modifying attrs, so keep all 460 + * three locks held because that's the only way to ensure we're 461 + * the only thread poking into the da btree. We traverse the da 462 + * btree while holding a leaf buffer locked for the xattr name 463 + * iteration, which doesn't really follow the usual buffer 464 + * locking order. 465 + */ 466 + error = xfs_attr_list_int_ilocked(&sx.context); 467 + if (!xfs_scrub_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error)) 468 + goto out; 469 + out: 470 + return error; 471 + }

+363

fs/xfs/scrub/bmap.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_mount.h" 26 + #include "xfs_defer.h" 27 + #include "xfs_btree.h" 28 + #include "xfs_bit.h" 29 + #include "xfs_log_format.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_sb.h" 32 + #include "xfs_inode.h" 33 + #include "xfs_inode_fork.h" 34 + #include "xfs_alloc.h" 35 + #include "xfs_rtalloc.h" 36 + #include "xfs_bmap.h" 37 + #include "xfs_bmap_util.h" 38 + #include "xfs_bmap_btree.h" 39 + #include "xfs_rmap.h" 40 + #include "scrub/xfs_scrub.h" 41 + #include "scrub/scrub.h" 42 + #include "scrub/common.h" 43 + #include "scrub/btree.h" 44 + #include "scrub/trace.h" 45 + 46 + /* Set us up with an inode's bmap. */ 47 + int 48 + xfs_scrub_setup_inode_bmap( 49 + struct xfs_scrub_context *sc, 50 + struct xfs_inode *ip) 51 + { 52 + struct xfs_mount *mp = sc->mp; 53 + int error; 54 + 55 + error = xfs_scrub_get_inode(sc, ip); 56 + if (error) 57 + goto out; 58 + 59 + sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 60 + xfs_ilock(sc->ip, sc->ilock_flags); 61 + 62 + /* 63 + * We don't want any ephemeral data fork updates sitting around 64 + * while we inspect block mappings, so wait for directio to finish 65 + * and flush dirty data if we have delalloc reservations. 66 + */ 67 + if (S_ISREG(VFS_I(sc->ip)->i_mode) && 68 + sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) { 69 + inode_dio_wait(VFS_I(sc->ip)); 70 + error = filemap_write_and_wait(VFS_I(sc->ip)->i_mapping); 71 + if (error) 72 + goto out; 73 + } 74 + 75 + /* Got the inode, lock it and we're ready to go. */ 76 + error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp); 77 + if (error) 78 + goto out; 79 + sc->ilock_flags |= XFS_ILOCK_EXCL; 80 + xfs_ilock(sc->ip, XFS_ILOCK_EXCL); 81 + 82 + out: 83 + /* scrub teardown will unlock and release the inode */ 84 + return error; 85 + } 86 + 87 + /* 88 + * Inode fork block mapping (BMBT) scrubber. 89 + * More complex than the others because we have to scrub 90 + * all the extents regardless of whether or not the fork 91 + * is in btree format. 92 + */ 93 + 94 + struct xfs_scrub_bmap_info { 95 + struct xfs_scrub_context *sc; 96 + xfs_fileoff_t lastoff; 97 + bool is_rt; 98 + bool is_shared; 99 + int whichfork; 100 + }; 101 + 102 + /* Scrub a single extent record. */ 103 + STATIC int 104 + xfs_scrub_bmap_extent( 105 + struct xfs_inode *ip, 106 + struct xfs_btree_cur *cur, 107 + struct xfs_scrub_bmap_info *info, 108 + struct xfs_bmbt_irec *irec) 109 + { 110 + struct xfs_mount *mp = info->sc->mp; 111 + struct xfs_buf *bp = NULL; 112 + int error = 0; 113 + 114 + if (cur) 115 + xfs_btree_get_block(cur, 0, &bp); 116 + 117 + /* 118 + * Check for out-of-order extents. This record could have come 119 + * from the incore list, for which there is no ordering check. 120 + */ 121 + if (irec->br_startoff < info->lastoff) 122 + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 123 + irec->br_startoff); 124 + 125 + /* There should never be a "hole" extent in either extent list. */ 126 + if (irec->br_startblock == HOLESTARTBLOCK) 127 + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 128 + irec->br_startoff); 129 + 130 + /* 131 + * Check for delalloc extents. We never iterate the ones in the 132 + * in-core extent scan, and we should never see these in the bmbt. 133 + */ 134 + if (isnullstartblock(irec->br_startblock)) 135 + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 136 + irec->br_startoff); 137 + 138 + /* Make sure the extent points to a valid place. */ 139 + if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock) 140 + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 141 + irec->br_startoff); 142 + if (info->is_rt && 143 + (!xfs_verify_rtbno(mp, irec->br_startblock) || 144 + !xfs_verify_rtbno(mp, irec->br_startblock + 145 + irec->br_blockcount - 1))) 146 + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 147 + irec->br_startoff); 148 + if (!info->is_rt && 149 + (!xfs_verify_fsbno(mp, irec->br_startblock) || 150 + !xfs_verify_fsbno(mp, irec->br_startblock + 151 + irec->br_blockcount - 1))) 152 + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 153 + irec->br_startoff); 154 + 155 + /* We don't allow unwritten extents on attr forks. */ 156 + if (irec->br_state == XFS_EXT_UNWRITTEN && 157 + info->whichfork == XFS_ATTR_FORK) 158 + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 159 + irec->br_startoff); 160 + 161 + info->lastoff = irec->br_startoff + irec->br_blockcount; 162 + return error; 163 + } 164 + 165 + /* Scrub a bmbt record. */ 166 + STATIC int 167 + xfs_scrub_bmapbt_rec( 168 + struct xfs_scrub_btree *bs, 169 + union xfs_btree_rec *rec) 170 + { 171 + struct xfs_bmbt_irec irec; 172 + struct xfs_scrub_bmap_info *info = bs->private; 173 + struct xfs_inode *ip = bs->cur->bc_private.b.ip; 174 + struct xfs_buf *bp = NULL; 175 + struct xfs_btree_block *block; 176 + uint64_t owner; 177 + int i; 178 + 179 + /* 180 + * Check the owners of the btree blocks up to the level below 181 + * the root since the verifiers don't do that. 182 + */ 183 + if (xfs_sb_version_hascrc(&bs->cur->bc_mp->m_sb) && 184 + bs->cur->bc_ptrs[0] == 1) { 185 + for (i = 0; i < bs->cur->bc_nlevels - 1; i++) { 186 + block = xfs_btree_get_block(bs->cur, i, &bp); 187 + owner = be64_to_cpu(block->bb_u.l.bb_owner); 188 + if (owner != ip->i_ino) 189 + xfs_scrub_fblock_set_corrupt(bs->sc, 190 + info->whichfork, 0); 191 + } 192 + } 193 + 194 + /* Set up the in-core record and scrub it. */ 195 + xfs_bmbt_disk_get_all(&rec->bmbt, &irec); 196 + return xfs_scrub_bmap_extent(ip, bs->cur, info, &irec); 197 + } 198 + 199 + /* Scan the btree records. */ 200 + STATIC int 201 + xfs_scrub_bmap_btree( 202 + struct xfs_scrub_context *sc, 203 + int whichfork, 204 + struct xfs_scrub_bmap_info *info) 205 + { 206 + struct xfs_owner_info oinfo; 207 + struct xfs_mount *mp = sc->mp; 208 + struct xfs_inode *ip = sc->ip; 209 + struct xfs_btree_cur *cur; 210 + int error; 211 + 212 + cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork); 213 + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); 214 + error = xfs_scrub_btree(sc, cur, xfs_scrub_bmapbt_rec, &oinfo, info); 215 + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : 216 + XFS_BTREE_NOERROR); 217 + return error; 218 + } 219 + 220 + /* 221 + * Scrub an inode fork's block mappings. 222 + * 223 + * First we scan every record in every btree block, if applicable. 224 + * Then we unconditionally scan the incore extent cache. 225 + */ 226 + STATIC int 227 + xfs_scrub_bmap( 228 + struct xfs_scrub_context *sc, 229 + int whichfork) 230 + { 231 + struct xfs_bmbt_irec irec; 232 + struct xfs_scrub_bmap_info info = { NULL }; 233 + struct xfs_mount *mp = sc->mp; 234 + struct xfs_inode *ip = sc->ip; 235 + struct xfs_ifork *ifp; 236 + xfs_fileoff_t endoff; 237 + struct xfs_iext_cursor icur; 238 + bool found; 239 + int error = 0; 240 + 241 + ifp = XFS_IFORK_PTR(ip, whichfork); 242 + 243 + info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip); 244 + info.whichfork = whichfork; 245 + info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip); 246 + info.sc = sc; 247 + 248 + switch (whichfork) { 249 + case XFS_COW_FORK: 250 + /* Non-existent CoW forks are ignorable. */ 251 + if (!ifp) 252 + goto out; 253 + /* No CoW forks on non-reflink inodes/filesystems. */ 254 + if (!xfs_is_reflink_inode(ip)) { 255 + xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL); 256 + goto out; 257 + } 258 + break; 259 + case XFS_ATTR_FORK: 260 + if (!ifp) 261 + goto out; 262 + if (!xfs_sb_version_hasattr(&mp->m_sb) && 263 + !xfs_sb_version_hasattr2(&mp->m_sb)) 264 + xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL); 265 + break; 266 + default: 267 + ASSERT(whichfork == XFS_DATA_FORK); 268 + break; 269 + } 270 + 271 + /* Check the fork values */ 272 + switch (XFS_IFORK_FORMAT(ip, whichfork)) { 273 + case XFS_DINODE_FMT_UUID: 274 + case XFS_DINODE_FMT_DEV: 275 + case XFS_DINODE_FMT_LOCAL: 276 + /* No mappings to check. */ 277 + goto out; 278 + case XFS_DINODE_FMT_EXTENTS: 279 + if (!(ifp->if_flags & XFS_IFEXTENTS)) { 280 + xfs_scrub_fblock_set_corrupt(sc, whichfork, 0); 281 + goto out; 282 + } 283 + break; 284 + case XFS_DINODE_FMT_BTREE: 285 + if (whichfork == XFS_COW_FORK) { 286 + xfs_scrub_fblock_set_corrupt(sc, whichfork, 0); 287 + goto out; 288 + } 289 + 290 + error = xfs_scrub_bmap_btree(sc, whichfork, &info); 291 + if (error) 292 + goto out; 293 + break; 294 + default: 295 + xfs_scrub_fblock_set_corrupt(sc, whichfork, 0); 296 + goto out; 297 + } 298 + 299 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 300 + goto out; 301 + 302 + /* Now try to scrub the in-memory extent list. */ 303 + if (!(ifp->if_flags & XFS_IFEXTENTS)) { 304 + error = xfs_iread_extents(sc->tp, ip, whichfork); 305 + if (!xfs_scrub_fblock_process_error(sc, whichfork, 0, &error)) 306 + goto out; 307 + } 308 + 309 + /* Find the offset of the last extent in the mapping. */ 310 + error = xfs_bmap_last_offset(ip, &endoff, whichfork); 311 + if (!xfs_scrub_fblock_process_error(sc, whichfork, 0, &error)) 312 + goto out; 313 + 314 + /* Scrub extent records. */ 315 + info.lastoff = 0; 316 + ifp = XFS_IFORK_PTR(ip, whichfork); 317 + for (found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &irec); 318 + found != 0; 319 + found = xfs_iext_next_extent(ifp, &icur, &irec)) { 320 + if (xfs_scrub_should_terminate(sc, &error)) 321 + break; 322 + if (isnullstartblock(irec.br_startblock)) 323 + continue; 324 + if (irec.br_startoff >= endoff) { 325 + xfs_scrub_fblock_set_corrupt(sc, whichfork, 326 + irec.br_startoff); 327 + goto out; 328 + } 329 + error = xfs_scrub_bmap_extent(ip, NULL, &info, &irec); 330 + if (error) 331 + goto out; 332 + } 333 + 334 + out: 335 + return error; 336 + } 337 + 338 + /* Scrub an inode's data fork. */ 339 + int 340 + xfs_scrub_bmap_data( 341 + struct xfs_scrub_context *sc) 342 + { 343 + return xfs_scrub_bmap(sc, XFS_DATA_FORK); 344 + } 345 + 346 + /* Scrub an inode's attr fork. */ 347 + int 348 + xfs_scrub_bmap_attr( 349 + struct xfs_scrub_context *sc) 350 + { 351 + return xfs_scrub_bmap(sc, XFS_ATTR_FORK); 352 + } 353 + 354 + /* Scrub an inode's CoW fork. */ 355 + int 356 + xfs_scrub_bmap_cow( 357 + struct xfs_scrub_context *sc) 358 + { 359 + if (!xfs_is_reflink_inode(sc->ip)) 360 + return -ENOENT; 361 + 362 + return xfs_scrub_bmap(sc, XFS_COW_FORK); 363 + }

+516

fs/xfs/scrub/btree.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_mount.h" 26 + #include "xfs_defer.h" 27 + #include "xfs_btree.h" 28 + #include "xfs_bit.h" 29 + #include "xfs_log_format.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_sb.h" 32 + #include "xfs_inode.h" 33 + #include "xfs_alloc.h" 34 + #include "scrub/scrub.h" 35 + #include "scrub/common.h" 36 + #include "scrub/btree.h" 37 + #include "scrub/trace.h" 38 + 39 + /* btree scrubbing */ 40 + 41 + /* 42 + * Check for btree operation errors. See the section about handling 43 + * operational errors in common.c. 44 + */ 45 + bool 46 + xfs_scrub_btree_process_error( 47 + struct xfs_scrub_context *sc, 48 + struct xfs_btree_cur *cur, 49 + int level, 50 + int *error) 51 + { 52 + if (*error == 0) 53 + return true; 54 + 55 + switch (*error) { 56 + case -EDEADLOCK: 57 + /* Used to restart an op with deadlock avoidance. */ 58 + trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); 59 + break; 60 + case -EFSBADCRC: 61 + case -EFSCORRUPTED: 62 + /* Note the badness but don't abort. */ 63 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 64 + *error = 0; 65 + /* fall through */ 66 + default: 67 + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 68 + trace_xfs_scrub_ifork_btree_op_error(sc, cur, level, 69 + *error, __return_address); 70 + else 71 + trace_xfs_scrub_btree_op_error(sc, cur, level, 72 + *error, __return_address); 73 + break; 74 + } 75 + return false; 76 + } 77 + 78 + /* Record btree block corruption. */ 79 + void 80 + xfs_scrub_btree_set_corrupt( 81 + struct xfs_scrub_context *sc, 82 + struct xfs_btree_cur *cur, 83 + int level) 84 + { 85 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 86 + 87 + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 88 + trace_xfs_scrub_ifork_btree_error(sc, cur, level, 89 + __return_address); 90 + else 91 + trace_xfs_scrub_btree_error(sc, cur, level, 92 + __return_address); 93 + } 94 + 95 + /* 96 + * Make sure this record is in order and doesn't stray outside of the parent 97 + * keys. 98 + */ 99 + STATIC void 100 + xfs_scrub_btree_rec( 101 + struct xfs_scrub_btree *bs) 102 + { 103 + struct xfs_btree_cur *cur = bs->cur; 104 + union xfs_btree_rec *rec; 105 + union xfs_btree_key key; 106 + union xfs_btree_key hkey; 107 + union xfs_btree_key *keyp; 108 + struct xfs_btree_block *block; 109 + struct xfs_btree_block *keyblock; 110 + struct xfs_buf *bp; 111 + 112 + block = xfs_btree_get_block(cur, 0, &bp); 113 + rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); 114 + 115 + trace_xfs_scrub_btree_rec(bs->sc, cur, 0); 116 + 117 + /* If this isn't the first record, are they in order? */ 118 + if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec)) 119 + xfs_scrub_btree_set_corrupt(bs->sc, cur, 0); 120 + bs->firstrec = false; 121 + memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len); 122 + 123 + if (cur->bc_nlevels == 1) 124 + return; 125 + 126 + /* Is this at least as large as the parent low key? */ 127 + cur->bc_ops->init_key_from_rec(&key, rec); 128 + keyblock = xfs_btree_get_block(cur, 1, &bp); 129 + keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock); 130 + if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0) 131 + xfs_scrub_btree_set_corrupt(bs->sc, cur, 1); 132 + 133 + if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) 134 + return; 135 + 136 + /* Is this no larger than the parent high key? */ 137 + cur->bc_ops->init_high_key_from_rec(&hkey, rec); 138 + keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock); 139 + if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0) 140 + xfs_scrub_btree_set_corrupt(bs->sc, cur, 1); 141 + } 142 + 143 + /* 144 + * Make sure this key is in order and doesn't stray outside of the parent 145 + * keys. 146 + */ 147 + STATIC void 148 + xfs_scrub_btree_key( 149 + struct xfs_scrub_btree *bs, 150 + int level) 151 + { 152 + struct xfs_btree_cur *cur = bs->cur; 153 + union xfs_btree_key *key; 154 + union xfs_btree_key *keyp; 155 + struct xfs_btree_block *block; 156 + struct xfs_btree_block *keyblock; 157 + struct xfs_buf *bp; 158 + 159 + block = xfs_btree_get_block(cur, level, &bp); 160 + key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block); 161 + 162 + trace_xfs_scrub_btree_key(bs->sc, cur, level); 163 + 164 + /* If this isn't the first key, are they in order? */ 165 + if (!bs->firstkey[level] && 166 + !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key)) 167 + xfs_scrub_btree_set_corrupt(bs->sc, cur, level); 168 + bs->firstkey[level] = false; 169 + memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len); 170 + 171 + if (level + 1 >= cur->bc_nlevels) 172 + return; 173 + 174 + /* Is this at least as large as the parent low key? */ 175 + keyblock = xfs_btree_get_block(cur, level + 1, &bp); 176 + keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock); 177 + if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0) 178 + xfs_scrub_btree_set_corrupt(bs->sc, cur, level); 179 + 180 + if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) 181 + return; 182 + 183 + /* Is this no larger than the parent high key? */ 184 + key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block); 185 + keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock); 186 + if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0) 187 + xfs_scrub_btree_set_corrupt(bs->sc, cur, level); 188 + } 189 + 190 + /* 191 + * Check a btree pointer. Returns true if it's ok to use this pointer. 192 + * Callers do not need to set the corrupt flag. 193 + */ 194 + static bool 195 + xfs_scrub_btree_ptr_ok( 196 + struct xfs_scrub_btree *bs, 197 + int level, 198 + union xfs_btree_ptr *ptr) 199 + { 200 + bool res; 201 + 202 + /* A btree rooted in an inode has no block pointer to the root. */ 203 + if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 204 + level == bs->cur->bc_nlevels) 205 + return true; 206 + 207 + /* Otherwise, check the pointers. */ 208 + if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS) 209 + res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level); 210 + else 211 + res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level); 212 + if (!res) 213 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level); 214 + 215 + return res; 216 + } 217 + 218 + /* Check that a btree block's sibling matches what we expect it. */ 219 + STATIC int 220 + xfs_scrub_btree_block_check_sibling( 221 + struct xfs_scrub_btree *bs, 222 + int level, 223 + int direction, 224 + union xfs_btree_ptr *sibling) 225 + { 226 + struct xfs_btree_cur *cur = bs->cur; 227 + struct xfs_btree_block *pblock; 228 + struct xfs_buf *pbp; 229 + struct xfs_btree_cur *ncur = NULL; 230 + union xfs_btree_ptr *pp; 231 + int success; 232 + int error; 233 + 234 + error = xfs_btree_dup_cursor(cur, &ncur); 235 + if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error) || 236 + !ncur) 237 + return error; 238 + 239 + /* 240 + * If the pointer is null, we shouldn't be able to move the upper 241 + * level pointer anywhere. 242 + */ 243 + if (xfs_btree_ptr_is_null(cur, sibling)) { 244 + if (direction > 0) 245 + error = xfs_btree_increment(ncur, level + 1, &success); 246 + else 247 + error = xfs_btree_decrement(ncur, level + 1, &success); 248 + if (error == 0 && success) 249 + xfs_scrub_btree_set_corrupt(bs->sc, cur, level); 250 + error = 0; 251 + goto out; 252 + } 253 + 254 + /* Increment upper level pointer. */ 255 + if (direction > 0) 256 + error = xfs_btree_increment(ncur, level + 1, &success); 257 + else 258 + error = xfs_btree_decrement(ncur, level + 1, &success); 259 + if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error)) 260 + goto out; 261 + if (!success) { 262 + xfs_scrub_btree_set_corrupt(bs->sc, cur, level + 1); 263 + goto out; 264 + } 265 + 266 + /* Compare upper level pointer to sibling pointer. */ 267 + pblock = xfs_btree_get_block(ncur, level + 1, &pbp); 268 + pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock); 269 + if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp)) 270 + goto out; 271 + 272 + if (xfs_btree_diff_two_ptrs(cur, pp, sibling)) 273 + xfs_scrub_btree_set_corrupt(bs->sc, cur, level); 274 + out: 275 + xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR); 276 + return error; 277 + } 278 + 279 + /* Check the siblings of a btree block. */ 280 + STATIC int 281 + xfs_scrub_btree_block_check_siblings( 282 + struct xfs_scrub_btree *bs, 283 + struct xfs_btree_block *block) 284 + { 285 + struct xfs_btree_cur *cur = bs->cur; 286 + union xfs_btree_ptr leftsib; 287 + union xfs_btree_ptr rightsib; 288 + int level; 289 + int error = 0; 290 + 291 + xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB); 292 + xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB); 293 + level = xfs_btree_get_level(block); 294 + 295 + /* Root block should never have siblings. */ 296 + if (level == cur->bc_nlevels - 1) { 297 + if (!xfs_btree_ptr_is_null(cur, &leftsib) || 298 + !xfs_btree_ptr_is_null(cur, &rightsib)) 299 + xfs_scrub_btree_set_corrupt(bs->sc, cur, level); 300 + goto out; 301 + } 302 + 303 + /* 304 + * Does the left & right sibling pointers match the adjacent 305 + * parent level pointers? 306 + * (These function absorbs error codes for us.) 307 + */ 308 + error = xfs_scrub_btree_block_check_sibling(bs, level, -1, &leftsib); 309 + if (error) 310 + return error; 311 + error = xfs_scrub_btree_block_check_sibling(bs, level, 1, &rightsib); 312 + if (error) 313 + return error; 314 + out: 315 + return error; 316 + } 317 + 318 + /* 319 + * Grab and scrub a btree block given a btree pointer. Returns block 320 + * and buffer pointers (if applicable) if they're ok to use. 321 + */ 322 + STATIC int 323 + xfs_scrub_btree_get_block( 324 + struct xfs_scrub_btree *bs, 325 + int level, 326 + union xfs_btree_ptr *pp, 327 + struct xfs_btree_block **pblock, 328 + struct xfs_buf **pbp) 329 + { 330 + void *failed_at; 331 + int error; 332 + 333 + *pblock = NULL; 334 + *pbp = NULL; 335 + 336 + error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock); 337 + if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, level, &error) || 338 + !*pblock) 339 + return error; 340 + 341 + xfs_btree_get_block(bs->cur, level, pbp); 342 + if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS) 343 + failed_at = __xfs_btree_check_lblock(bs->cur, *pblock, 344 + level, *pbp); 345 + else 346 + failed_at = __xfs_btree_check_sblock(bs->cur, *pblock, 347 + level, *pbp); 348 + if (failed_at) { 349 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level); 350 + return 0; 351 + } 352 + 353 + /* 354 + * Check the block's siblings; this function absorbs error codes 355 + * for us. 356 + */ 357 + return xfs_scrub_btree_block_check_siblings(bs, *pblock); 358 + } 359 + 360 + /* 361 + * Check that the low and high keys of this block match the keys stored 362 + * in the parent block. 363 + */ 364 + STATIC void 365 + xfs_scrub_btree_block_keys( 366 + struct xfs_scrub_btree *bs, 367 + int level, 368 + struct xfs_btree_block *block) 369 + { 370 + union xfs_btree_key block_keys; 371 + struct xfs_btree_cur *cur = bs->cur; 372 + union xfs_btree_key *high_bk; 373 + union xfs_btree_key *parent_keys; 374 + union xfs_btree_key *high_pk; 375 + struct xfs_btree_block *parent_block; 376 + struct xfs_buf *bp; 377 + 378 + if (level >= cur->bc_nlevels - 1) 379 + return; 380 + 381 + /* Calculate the keys for this block. */ 382 + xfs_btree_get_keys(cur, block, &block_keys); 383 + 384 + /* Obtain the parent's copy of the keys for this block. */ 385 + parent_block = xfs_btree_get_block(cur, level + 1, &bp); 386 + parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], 387 + parent_block); 388 + 389 + if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0) 390 + xfs_scrub_btree_set_corrupt(bs->sc, cur, 1); 391 + 392 + if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) 393 + return; 394 + 395 + /* Get high keys */ 396 + high_bk = xfs_btree_high_key_from_key(cur, &block_keys); 397 + high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], 398 + parent_block); 399 + 400 + if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0) 401 + xfs_scrub_btree_set_corrupt(bs->sc, cur, 1); 402 + } 403 + 404 + /* 405 + * Visit all nodes and leaves of a btree. Check that all pointers and 406 + * records are in order, that the keys reflect the records, and use a callback 407 + * so that the caller can verify individual records. 408 + */ 409 + int 410 + xfs_scrub_btree( 411 + struct xfs_scrub_context *sc, 412 + struct xfs_btree_cur *cur, 413 + xfs_scrub_btree_rec_fn scrub_fn, 414 + struct xfs_owner_info *oinfo, 415 + void *private) 416 + { 417 + struct xfs_scrub_btree bs = { NULL }; 418 + union xfs_btree_ptr ptr; 419 + union xfs_btree_ptr *pp; 420 + union xfs_btree_rec *recp; 421 + struct xfs_btree_block *block; 422 + int level; 423 + struct xfs_buf *bp; 424 + int i; 425 + int error = 0; 426 + 427 + /* Initialize scrub state */ 428 + bs.cur = cur; 429 + bs.scrub_rec = scrub_fn; 430 + bs.oinfo = oinfo; 431 + bs.firstrec = true; 432 + bs.private = private; 433 + bs.sc = sc; 434 + for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) 435 + bs.firstkey[i] = true; 436 + INIT_LIST_HEAD(&bs.to_check); 437 + 438 + /* Don't try to check a tree with a height we can't handle. */ 439 + if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) { 440 + xfs_scrub_btree_set_corrupt(sc, cur, 0); 441 + goto out; 442 + } 443 + 444 + /* 445 + * Load the root of the btree. The helper function absorbs 446 + * error codes for us. 447 + */ 448 + level = cur->bc_nlevels - 1; 449 + cur->bc_ops->init_ptr_from_cur(cur, &ptr); 450 + if (!xfs_scrub_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr)) 451 + goto out; 452 + error = xfs_scrub_btree_get_block(&bs, level, &ptr, &block, &bp); 453 + if (error || !block) 454 + goto out; 455 + 456 + cur->bc_ptrs[level] = 1; 457 + 458 + while (level < cur->bc_nlevels) { 459 + block = xfs_btree_get_block(cur, level, &bp); 460 + 461 + if (level == 0) { 462 + /* End of leaf, pop back towards the root. */ 463 + if (cur->bc_ptrs[level] > 464 + be16_to_cpu(block->bb_numrecs)) { 465 + xfs_scrub_btree_block_keys(&bs, level, block); 466 + if (level < cur->bc_nlevels - 1) 467 + cur->bc_ptrs[level + 1]++; 468 + level++; 469 + continue; 470 + } 471 + 472 + /* Records in order for scrub? */ 473 + xfs_scrub_btree_rec(&bs); 474 + 475 + /* Call out to the record checker. */ 476 + recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); 477 + error = bs.scrub_rec(&bs, recp); 478 + if (error) 479 + break; 480 + if (xfs_scrub_should_terminate(sc, &error) || 481 + (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 482 + break; 483 + 484 + cur->bc_ptrs[level]++; 485 + continue; 486 + } 487 + 488 + /* End of node, pop back towards the root. */ 489 + if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { 490 + xfs_scrub_btree_block_keys(&bs, level, block); 491 + if (level < cur->bc_nlevels - 1) 492 + cur->bc_ptrs[level + 1]++; 493 + level++; 494 + continue; 495 + } 496 + 497 + /* Keys in order for scrub? */ 498 + xfs_scrub_btree_key(&bs, level); 499 + 500 + /* Drill another level deeper. */ 501 + pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block); 502 + if (!xfs_scrub_btree_ptr_ok(&bs, level, pp)) { 503 + cur->bc_ptrs[level]++; 504 + continue; 505 + } 506 + level--; 507 + error = xfs_scrub_btree_get_block(&bs, level, pp, &block, &bp); 508 + if (error || !block) 509 + goto out; 510 + 511 + cur->bc_ptrs[level] = 1; 512 + } 513 + 514 + out: 515 + return error; 516 + }

+57

fs/xfs/scrub/btree.h

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #ifndef __XFS_SCRUB_BTREE_H__ 21 + #define __XFS_SCRUB_BTREE_H__ 22 + 23 + /* btree scrub */ 24 + 25 + /* Check for btree operation errors. */ 26 + bool xfs_scrub_btree_process_error(struct xfs_scrub_context *sc, 27 + struct xfs_btree_cur *cur, int level, int *error); 28 + 29 + /* Check for btree corruption. */ 30 + void xfs_scrub_btree_set_corrupt(struct xfs_scrub_context *sc, 31 + struct xfs_btree_cur *cur, int level); 32 + 33 + struct xfs_scrub_btree; 34 + typedef int (*xfs_scrub_btree_rec_fn)( 35 + struct xfs_scrub_btree *bs, 36 + union xfs_btree_rec *rec); 37 + 38 + struct xfs_scrub_btree { 39 + /* caller-provided scrub state */ 40 + struct xfs_scrub_context *sc; 41 + struct xfs_btree_cur *cur; 42 + xfs_scrub_btree_rec_fn scrub_rec; 43 + struct xfs_owner_info *oinfo; 44 + void *private; 45 + 46 + /* internal scrub state */ 47 + union xfs_btree_rec lastrec; 48 + bool firstrec; 49 + union xfs_btree_key lastkey[XFS_BTREE_MAXLEVELS]; 50 + bool firstkey[XFS_BTREE_MAXLEVELS]; 51 + struct list_head to_check; 52 + }; 53 + int xfs_scrub_btree(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, 54 + xfs_scrub_btree_rec_fn scrub_fn, 55 + struct xfs_owner_info *oinfo, void *private); 56 + 57 + #endif /* __XFS_SCRUB_BTREE_H__ */

+574

fs/xfs/scrub/common.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_mount.h" 26 + #include "xfs_defer.h" 27 + #include "xfs_btree.h" 28 + #include "xfs_bit.h" 29 + #include "xfs_log_format.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_sb.h" 32 + #include "xfs_inode.h" 33 + #include "xfs_icache.h" 34 + #include "xfs_itable.h" 35 + #include "xfs_alloc.h" 36 + #include "xfs_alloc_btree.h" 37 + #include "xfs_bmap.h" 38 + #include "xfs_bmap_btree.h" 39 + #include "xfs_ialloc.h" 40 + #include "xfs_ialloc_btree.h" 41 + #include "xfs_refcount.h" 42 + #include "xfs_refcount_btree.h" 43 + #include "xfs_rmap.h" 44 + #include "xfs_rmap_btree.h" 45 + #include "xfs_log.h" 46 + #include "xfs_trans_priv.h" 47 + #include "scrub/xfs_scrub.h" 48 + #include "scrub/scrub.h" 49 + #include "scrub/common.h" 50 + #include "scrub/trace.h" 51 + #include "scrub/btree.h" 52 + 53 + /* Common code for the metadata scrubbers. */ 54 + 55 + /* 56 + * Handling operational errors. 57 + * 58 + * The *_process_error() family of functions are used to process error return 59 + * codes from functions called as part of a scrub operation. 60 + * 61 + * If there's no error, we return true to tell the caller that it's ok 62 + * to move on to the next check in its list. 63 + * 64 + * For non-verifier errors (e.g. ENOMEM) we return false to tell the 65 + * caller that something bad happened, and we preserve *error so that 66 + * the caller can return the *error up the stack to userspace. 67 + * 68 + * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting 69 + * OFLAG_CORRUPT in sm_flags and the *error is cleared. In other words, 70 + * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT, 71 + * not via return codes. We return false to tell the caller that 72 + * something bad happened. Since the error has been cleared, the caller 73 + * will (presumably) return that zero and scrubbing will move on to 74 + * whatever's next. 75 + * 76 + * ftrace can be used to record the precise metadata location and the 77 + * approximate code location of the failed operation. 78 + */ 79 + 80 + /* Check for operational errors. */ 81 + bool 82 + xfs_scrub_process_error( 83 + struct xfs_scrub_context *sc, 84 + xfs_agnumber_t agno, 85 + xfs_agblock_t bno, 86 + int *error) 87 + { 88 + switch (*error) { 89 + case 0: 90 + return true; 91 + case -EDEADLOCK: 92 + /* Used to restart an op with deadlock avoidance. */ 93 + trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); 94 + break; 95 + case -EFSBADCRC: 96 + case -EFSCORRUPTED: 97 + /* Note the badness but don't abort. */ 98 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 99 + *error = 0; 100 + /* fall through */ 101 + default: 102 + trace_xfs_scrub_op_error(sc, agno, bno, *error, 103 + __return_address); 104 + break; 105 + } 106 + return false; 107 + } 108 + 109 + /* Check for operational errors for a file offset. */ 110 + bool 111 + xfs_scrub_fblock_process_error( 112 + struct xfs_scrub_context *sc, 113 + int whichfork, 114 + xfs_fileoff_t offset, 115 + int *error) 116 + { 117 + switch (*error) { 118 + case 0: 119 + return true; 120 + case -EDEADLOCK: 121 + /* Used to restart an op with deadlock avoidance. */ 122 + trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); 123 + break; 124 + case -EFSBADCRC: 125 + case -EFSCORRUPTED: 126 + /* Note the badness but don't abort. */ 127 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 128 + *error = 0; 129 + /* fall through */ 130 + default: 131 + trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error, 132 + __return_address); 133 + break; 134 + } 135 + return false; 136 + } 137 + 138 + /* 139 + * Handling scrub corruption/optimization/warning checks. 140 + * 141 + * The *_set_{corrupt,preen,warning}() family of functions are used to 142 + * record the presence of metadata that is incorrect (corrupt), could be 143 + * optimized somehow (preen), or should be flagged for administrative 144 + * review but is not incorrect (warn). 145 + * 146 + * ftrace can be used to record the precise metadata location and 147 + * approximate code location of the failed check. 148 + */ 149 + 150 + /* Record a block which could be optimized. */ 151 + void 152 + xfs_scrub_block_set_preen( 153 + struct xfs_scrub_context *sc, 154 + struct xfs_buf *bp) 155 + { 156 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; 157 + trace_xfs_scrub_block_preen(sc, bp->b_bn, __return_address); 158 + } 159 + 160 + /* 161 + * Record an inode which could be optimized. The trace data will 162 + * include the block given by bp if bp is given; otherwise it will use 163 + * the block location of the inode record itself. 164 + */ 165 + void 166 + xfs_scrub_ino_set_preen( 167 + struct xfs_scrub_context *sc, 168 + xfs_ino_t ino, 169 + struct xfs_buf *bp) 170 + { 171 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; 172 + trace_xfs_scrub_ino_preen(sc, ino, bp ? bp->b_bn : 0, 173 + __return_address); 174 + } 175 + 176 + /* Record a corrupt block. */ 177 + void 178 + xfs_scrub_block_set_corrupt( 179 + struct xfs_scrub_context *sc, 180 + struct xfs_buf *bp) 181 + { 182 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 183 + trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address); 184 + } 185 + 186 + /* 187 + * Record a corrupt inode. The trace data will include the block given 188 + * by bp if bp is given; otherwise it will use the block location of the 189 + * inode record itself. 190 + */ 191 + void 192 + xfs_scrub_ino_set_corrupt( 193 + struct xfs_scrub_context *sc, 194 + xfs_ino_t ino, 195 + struct xfs_buf *bp) 196 + { 197 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 198 + trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address); 199 + } 200 + 201 + /* Record corruption in a block indexed by a file fork. */ 202 + void 203 + xfs_scrub_fblock_set_corrupt( 204 + struct xfs_scrub_context *sc, 205 + int whichfork, 206 + xfs_fileoff_t offset) 207 + { 208 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 209 + trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address); 210 + } 211 + 212 + /* 213 + * Warn about inodes that need administrative review but is not 214 + * incorrect. 215 + */ 216 + void 217 + xfs_scrub_ino_set_warning( 218 + struct xfs_scrub_context *sc, 219 + xfs_ino_t ino, 220 + struct xfs_buf *bp) 221 + { 222 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING; 223 + trace_xfs_scrub_ino_warning(sc, ino, bp ? bp->b_bn : 0, 224 + __return_address); 225 + } 226 + 227 + /* Warn about a block indexed by a file fork that needs review. */ 228 + void 229 + xfs_scrub_fblock_set_warning( 230 + struct xfs_scrub_context *sc, 231 + int whichfork, 232 + xfs_fileoff_t offset) 233 + { 234 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING; 235 + trace_xfs_scrub_fblock_warning(sc, whichfork, offset, __return_address); 236 + } 237 + 238 + /* Signal an incomplete scrub. */ 239 + void 240 + xfs_scrub_set_incomplete( 241 + struct xfs_scrub_context *sc) 242 + { 243 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE; 244 + trace_xfs_scrub_incomplete(sc, __return_address); 245 + } 246 + 247 + /* 248 + * AG scrubbing 249 + * 250 + * These helpers facilitate locking an allocation group's header 251 + * buffers, setting up cursors for all btrees that are present, and 252 + * cleaning everything up once we're through. 253 + */ 254 + 255 + /* Decide if we want to return an AG header read failure. */ 256 + static inline bool 257 + want_ag_read_header_failure( 258 + struct xfs_scrub_context *sc, 259 + unsigned int type) 260 + { 261 + /* Return all AG header read failures when scanning btrees. */ 262 + if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF && 263 + sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL && 264 + sc->sm->sm_type != XFS_SCRUB_TYPE_AGI) 265 + return true; 266 + /* 267 + * If we're scanning a given type of AG header, we only want to 268 + * see read failures from that specific header. We'd like the 269 + * other headers to cross-check them, but this isn't required. 270 + */ 271 + if (sc->sm->sm_type == type) 272 + return true; 273 + return false; 274 + } 275 + 276 + /* 277 + * Grab all the headers for an AG. 278 + * 279 + * The headers should be released by xfs_scrub_ag_free, but as a fail 280 + * safe we attach all the buffers we grab to the scrub transaction so 281 + * they'll all be freed when we cancel it. 282 + */ 283 + int 284 + xfs_scrub_ag_read_headers( 285 + struct xfs_scrub_context *sc, 286 + xfs_agnumber_t agno, 287 + struct xfs_buf **agi, 288 + struct xfs_buf **agf, 289 + struct xfs_buf **agfl) 290 + { 291 + struct xfs_mount *mp = sc->mp; 292 + int error; 293 + 294 + error = xfs_ialloc_read_agi(mp, sc->tp, agno, agi); 295 + if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI)) 296 + goto out; 297 + 298 + error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, agf); 299 + if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF)) 300 + goto out; 301 + 302 + error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl); 303 + if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) 304 + goto out; 305 + 306 + out: 307 + return error; 308 + } 309 + 310 + /* Release all the AG btree cursors. */ 311 + void 312 + xfs_scrub_ag_btcur_free( 313 + struct xfs_scrub_ag *sa) 314 + { 315 + if (sa->refc_cur) 316 + xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR); 317 + if (sa->rmap_cur) 318 + xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR); 319 + if (sa->fino_cur) 320 + xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR); 321 + if (sa->ino_cur) 322 + xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR); 323 + if (sa->cnt_cur) 324 + xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR); 325 + if (sa->bno_cur) 326 + xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR); 327 + 328 + sa->refc_cur = NULL; 329 + sa->rmap_cur = NULL; 330 + sa->fino_cur = NULL; 331 + sa->ino_cur = NULL; 332 + sa->bno_cur = NULL; 333 + sa->cnt_cur = NULL; 334 + } 335 + 336 + /* Initialize all the btree cursors for an AG. */ 337 + int 338 + xfs_scrub_ag_btcur_init( 339 + struct xfs_scrub_context *sc, 340 + struct xfs_scrub_ag *sa) 341 + { 342 + struct xfs_mount *mp = sc->mp; 343 + xfs_agnumber_t agno = sa->agno; 344 + 345 + if (sa->agf_bp) { 346 + /* Set up a bnobt cursor for cross-referencing. */ 347 + sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp, 348 + agno, XFS_BTNUM_BNO); 349 + if (!sa->bno_cur) 350 + goto err; 351 + 352 + /* Set up a cntbt cursor for cross-referencing. */ 353 + sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp, 354 + agno, XFS_BTNUM_CNT); 355 + if (!sa->cnt_cur) 356 + goto err; 357 + } 358 + 359 + /* Set up a inobt cursor for cross-referencing. */ 360 + if (sa->agi_bp) { 361 + sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, 362 + agno, XFS_BTNUM_INO); 363 + if (!sa->ino_cur) 364 + goto err; 365 + } 366 + 367 + /* Set up a finobt cursor for cross-referencing. */ 368 + if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb)) { 369 + sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, 370 + agno, XFS_BTNUM_FINO); 371 + if (!sa->fino_cur) 372 + goto err; 373 + } 374 + 375 + /* Set up a rmapbt cursor for cross-referencing. */ 376 + if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb)) { 377 + sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp, 378 + agno); 379 + if (!sa->rmap_cur) 380 + goto err; 381 + } 382 + 383 + /* Set up a refcountbt cursor for cross-referencing. */ 384 + if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb)) { 385 + sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp, 386 + sa->agf_bp, agno, NULL); 387 + if (!sa->refc_cur) 388 + goto err; 389 + } 390 + 391 + return 0; 392 + err: 393 + return -ENOMEM; 394 + } 395 + 396 + /* Release the AG header context and btree cursors. */ 397 + void 398 + xfs_scrub_ag_free( 399 + struct xfs_scrub_context *sc, 400 + struct xfs_scrub_ag *sa) 401 + { 402 + xfs_scrub_ag_btcur_free(sa); 403 + if (sa->agfl_bp) { 404 + xfs_trans_brelse(sc->tp, sa->agfl_bp); 405 + sa->agfl_bp = NULL; 406 + } 407 + if (sa->agf_bp) { 408 + xfs_trans_brelse(sc->tp, sa->agf_bp); 409 + sa->agf_bp = NULL; 410 + } 411 + if (sa->agi_bp) { 412 + xfs_trans_brelse(sc->tp, sa->agi_bp); 413 + sa->agi_bp = NULL; 414 + } 415 + sa->agno = NULLAGNUMBER; 416 + } 417 + 418 + /* 419 + * For scrub, grab the AGI and the AGF headers, in that order. Locking 420 + * order requires us to get the AGI before the AGF. We use the 421 + * transaction to avoid deadlocking on crosslinked metadata buffers; 422 + * either the caller passes one in (bmap scrub) or we have to create a 423 + * transaction ourselves. 424 + */ 425 + int 426 + xfs_scrub_ag_init( 427 + struct xfs_scrub_context *sc, 428 + xfs_agnumber_t agno, 429 + struct xfs_scrub_ag *sa) 430 + { 431 + int error; 432 + 433 + sa->agno = agno; 434 + error = xfs_scrub_ag_read_headers(sc, agno, &sa->agi_bp, 435 + &sa->agf_bp, &sa->agfl_bp); 436 + if (error) 437 + return error; 438 + 439 + return xfs_scrub_ag_btcur_init(sc, sa); 440 + } 441 + 442 + /* Per-scrubber setup functions */ 443 + 444 + /* Set us up with a transaction and an empty context. */ 445 + int 446 + xfs_scrub_setup_fs( 447 + struct xfs_scrub_context *sc, 448 + struct xfs_inode *ip) 449 + { 450 + return xfs_scrub_trans_alloc(sc->sm, sc->mp, &sc->tp); 451 + } 452 + 453 + /* Set us up with AG headers and btree cursors. */ 454 + int 455 + xfs_scrub_setup_ag_btree( 456 + struct xfs_scrub_context *sc, 457 + struct xfs_inode *ip, 458 + bool force_log) 459 + { 460 + struct xfs_mount *mp = sc->mp; 461 + int error; 462 + 463 + /* 464 + * If the caller asks us to checkpont the log, do so. This 465 + * expensive operation should be performed infrequently and only 466 + * as a last resort. Any caller that sets force_log should 467 + * document why they need to do so. 468 + */ 469 + if (force_log) { 470 + error = xfs_scrub_checkpoint_log(mp); 471 + if (error) 472 + return error; 473 + } 474 + 475 + error = xfs_scrub_setup_ag_header(sc, ip); 476 + if (error) 477 + return error; 478 + 479 + return xfs_scrub_ag_init(sc, sc->sm->sm_agno, &sc->sa); 480 + } 481 + 482 + /* Push everything out of the log onto disk. */ 483 + int 484 + xfs_scrub_checkpoint_log( 485 + struct xfs_mount *mp) 486 + { 487 + int error; 488 + 489 + error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL); 490 + if (error) 491 + return error; 492 + xfs_ail_push_all_sync(mp->m_ail); 493 + return 0; 494 + } 495 + 496 + /* 497 + * Given an inode and the scrub control structure, grab either the 498 + * inode referenced in the control structure or the inode passed in. 499 + * The inode is not locked. 500 + */ 501 + int 502 + xfs_scrub_get_inode( 503 + struct xfs_scrub_context *sc, 504 + struct xfs_inode *ip_in) 505 + { 506 + struct xfs_mount *mp = sc->mp; 507 + struct xfs_inode *ip = NULL; 508 + int error; 509 + 510 + /* 511 + * If userspace passed us an AG number or a generation number 512 + * without an inode number, they haven't got a clue so bail out 513 + * immediately. 514 + */ 515 + if (sc->sm->sm_agno || (sc->sm->sm_gen && !sc->sm->sm_ino)) 516 + return -EINVAL; 517 + 518 + /* We want to scan the inode we already had opened. */ 519 + if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) { 520 + sc->ip = ip_in; 521 + return 0; 522 + } 523 + 524 + /* Look up the inode, see if the generation number matches. */ 525 + if (xfs_internal_inum(mp, sc->sm->sm_ino)) 526 + return -ENOENT; 527 + error = xfs_iget(mp, NULL, sc->sm->sm_ino, 528 + XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip); 529 + if (error == -ENOENT || error == -EINVAL) { 530 + /* inode doesn't exist... */ 531 + return -ENOENT; 532 + } else if (error) { 533 + trace_xfs_scrub_op_error(sc, 534 + XFS_INO_TO_AGNO(mp, sc->sm->sm_ino), 535 + XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), 536 + error, __return_address); 537 + return error; 538 + } 539 + if (VFS_I(ip)->i_generation != sc->sm->sm_gen) { 540 + iput(VFS_I(ip)); 541 + return -ENOENT; 542 + } 543 + 544 + sc->ip = ip; 545 + return 0; 546 + } 547 + 548 + /* Set us up to scrub a file's contents. */ 549 + int 550 + xfs_scrub_setup_inode_contents( 551 + struct xfs_scrub_context *sc, 552 + struct xfs_inode *ip, 553 + unsigned int resblks) 554 + { 555 + struct xfs_mount *mp = sc->mp; 556 + int error; 557 + 558 + error = xfs_scrub_get_inode(sc, ip); 559 + if (error) 560 + return error; 561 + 562 + /* Got the inode, lock it and we're ready to go. */ 563 + sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 564 + xfs_ilock(sc->ip, sc->ilock_flags); 565 + error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp); 566 + if (error) 567 + goto out; 568 + sc->ilock_flags |= XFS_ILOCK_EXCL; 569 + xfs_ilock(sc->ip, XFS_ILOCK_EXCL); 570 + 571 + out: 572 + /* scrub teardown will unlock and release the inode for us */ 573 + return error; 574 + }

+144

fs/xfs/scrub/common.h

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #ifndef __XFS_SCRUB_COMMON_H__ 21 + #define __XFS_SCRUB_COMMON_H__ 22 + 23 + /* 24 + * We /could/ terminate a scrub/repair operation early. If we're not 25 + * in a good place to continue (fatal signal, etc.) then bail out. 26 + * Note that we're careful not to make any judgements about *error. 27 + */ 28 + static inline bool 29 + xfs_scrub_should_terminate( 30 + struct xfs_scrub_context *sc, 31 + int *error) 32 + { 33 + if (fatal_signal_pending(current)) { 34 + if (*error == 0) 35 + *error = -EAGAIN; 36 + return true; 37 + } 38 + return false; 39 + } 40 + 41 + /* 42 + * Grab an empty transaction so that we can re-grab locked buffers if 43 + * one of our btrees turns out to be cyclic. 44 + */ 45 + static inline int 46 + xfs_scrub_trans_alloc( 47 + struct xfs_scrub_metadata *sm, 48 + struct xfs_mount *mp, 49 + struct xfs_trans **tpp) 50 + { 51 + return xfs_trans_alloc_empty(mp, tpp); 52 + } 53 + 54 + bool xfs_scrub_process_error(struct xfs_scrub_context *sc, xfs_agnumber_t agno, 55 + xfs_agblock_t bno, int *error); 56 + bool xfs_scrub_fblock_process_error(struct xfs_scrub_context *sc, int whichfork, 57 + xfs_fileoff_t offset, int *error); 58 + 59 + void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc, 60 + struct xfs_buf *bp); 61 + void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino, 62 + struct xfs_buf *bp); 63 + 64 + void xfs_scrub_block_set_corrupt(struct xfs_scrub_context *sc, 65 + struct xfs_buf *bp); 66 + void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino, 67 + struct xfs_buf *bp); 68 + void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork, 69 + xfs_fileoff_t offset); 70 + 71 + void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino, 72 + struct xfs_buf *bp); 73 + void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork, 74 + xfs_fileoff_t offset); 75 + 76 + void xfs_scrub_set_incomplete(struct xfs_scrub_context *sc); 77 + int xfs_scrub_checkpoint_log(struct xfs_mount *mp); 78 + 79 + /* Setup functions */ 80 + int xfs_scrub_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip); 81 + int xfs_scrub_setup_ag_header(struct xfs_scrub_context *sc, 82 + struct xfs_inode *ip); 83 + int xfs_scrub_setup_ag_allocbt(struct xfs_scrub_context *sc, 84 + struct xfs_inode *ip); 85 + int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc, 86 + struct xfs_inode *ip); 87 + int xfs_scrub_setup_ag_rmapbt(struct xfs_scrub_context *sc, 88 + struct xfs_inode *ip); 89 + int xfs_scrub_setup_ag_refcountbt(struct xfs_scrub_context *sc, 90 + struct xfs_inode *ip); 91 + int xfs_scrub_setup_inode(struct xfs_scrub_context *sc, 92 + struct xfs_inode *ip); 93 + int xfs_scrub_setup_inode_bmap(struct xfs_scrub_context *sc, 94 + struct xfs_inode *ip); 95 + int xfs_scrub_setup_inode_bmap_data(struct xfs_scrub_context *sc, 96 + struct xfs_inode *ip); 97 + int xfs_scrub_setup_directory(struct xfs_scrub_context *sc, 98 + struct xfs_inode *ip); 99 + int xfs_scrub_setup_xattr(struct xfs_scrub_context *sc, 100 + struct xfs_inode *ip); 101 + int xfs_scrub_setup_symlink(struct xfs_scrub_context *sc, 102 + struct xfs_inode *ip); 103 + int xfs_scrub_setup_parent(struct xfs_scrub_context *sc, 104 + struct xfs_inode *ip); 105 + #ifdef CONFIG_XFS_RT 106 + int xfs_scrub_setup_rt(struct xfs_scrub_context *sc, struct xfs_inode *ip); 107 + #else 108 + static inline int 109 + xfs_scrub_setup_rt(struct xfs_scrub_context *sc, struct xfs_inode *ip) 110 + { 111 + return -ENOENT; 112 + } 113 + #endif 114 + #ifdef CONFIG_XFS_QUOTA 115 + int xfs_scrub_setup_quota(struct xfs_scrub_context *sc, struct xfs_inode *ip); 116 + #else 117 + static inline int 118 + xfs_scrub_setup_quota(struct xfs_scrub_context *sc, struct xfs_inode *ip) 119 + { 120 + return -ENOENT; 121 + } 122 + #endif 123 + 124 + void xfs_scrub_ag_free(struct xfs_scrub_context *sc, struct xfs_scrub_ag *sa); 125 + int xfs_scrub_ag_init(struct xfs_scrub_context *sc, xfs_agnumber_t agno, 126 + struct xfs_scrub_ag *sa); 127 + int xfs_scrub_ag_read_headers(struct xfs_scrub_context *sc, xfs_agnumber_t agno, 128 + struct xfs_buf **agi, struct xfs_buf **agf, 129 + struct xfs_buf **agfl); 130 + void xfs_scrub_ag_btcur_free(struct xfs_scrub_ag *sa); 131 + int xfs_scrub_ag_btcur_init(struct xfs_scrub_context *sc, 132 + struct xfs_scrub_ag *sa); 133 + int xfs_scrub_walk_agfl(struct xfs_scrub_context *sc, 134 + int (*fn)(struct xfs_scrub_context *, xfs_agblock_t bno, 135 + void *), 136 + void *priv); 137 + 138 + int xfs_scrub_setup_ag_btree(struct xfs_scrub_context *sc, 139 + struct xfs_inode *ip, bool force_log); 140 + int xfs_scrub_get_inode(struct xfs_scrub_context *sc, struct xfs_inode *ip_in); 141 + int xfs_scrub_setup_inode_contents(struct xfs_scrub_context *sc, 142 + struct xfs_inode *ip, unsigned int resblks); 143 + 144 + #endif /* __XFS_SCRUB_COMMON_H__ */

+591

fs/xfs/scrub/dabtree.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_mount.h" 26 + #include "xfs_defer.h" 27 + #include "xfs_btree.h" 28 + #include "xfs_bit.h" 29 + #include "xfs_log_format.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_sb.h" 32 + #include "xfs_inode.h" 33 + #include "xfs_inode_fork.h" 34 + #include "xfs_da_format.h" 35 + #include "xfs_da_btree.h" 36 + #include "xfs_dir2.h" 37 + #include "xfs_dir2_priv.h" 38 + #include "xfs_attr_leaf.h" 39 + #include "scrub/xfs_scrub.h" 40 + #include "scrub/scrub.h" 41 + #include "scrub/common.h" 42 + #include "scrub/trace.h" 43 + #include "scrub/dabtree.h" 44 + 45 + /* Directory/Attribute Btree */ 46 + 47 + /* 48 + * Check for da btree operation errors. See the section about handling 49 + * operational errors in common.c. 50 + */ 51 + bool 52 + xfs_scrub_da_process_error( 53 + struct xfs_scrub_da_btree *ds, 54 + int level, 55 + int *error) 56 + { 57 + struct xfs_scrub_context *sc = ds->sc; 58 + 59 + if (*error == 0) 60 + return true; 61 + 62 + switch (*error) { 63 + case -EDEADLOCK: 64 + /* Used to restart an op with deadlock avoidance. */ 65 + trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); 66 + break; 67 + case -EFSBADCRC: 68 + case -EFSCORRUPTED: 69 + /* Note the badness but don't abort. */ 70 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 71 + *error = 0; 72 + /* fall through */ 73 + default: 74 + trace_xfs_scrub_file_op_error(sc, ds->dargs.whichfork, 75 + xfs_dir2_da_to_db(ds->dargs.geo, 76 + ds->state->path.blk[level].blkno), 77 + *error, __return_address); 78 + break; 79 + } 80 + return false; 81 + } 82 + 83 + /* 84 + * Check for da btree corruption. See the section about handling 85 + * operational errors in common.c. 86 + */ 87 + void 88 + xfs_scrub_da_set_corrupt( 89 + struct xfs_scrub_da_btree *ds, 90 + int level) 91 + { 92 + struct xfs_scrub_context *sc = ds->sc; 93 + 94 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 95 + 96 + trace_xfs_scrub_fblock_error(sc, ds->dargs.whichfork, 97 + xfs_dir2_da_to_db(ds->dargs.geo, 98 + ds->state->path.blk[level].blkno), 99 + __return_address); 100 + } 101 + 102 + /* Find an entry at a certain level in a da btree. */ 103 + STATIC void * 104 + xfs_scrub_da_btree_entry( 105 + struct xfs_scrub_da_btree *ds, 106 + int level, 107 + int rec) 108 + { 109 + char *ents; 110 + struct xfs_da_state_blk *blk; 111 + void *baddr; 112 + 113 + /* Dispatch the entry finding function. */ 114 + blk = &ds->state->path.blk[level]; 115 + baddr = blk->bp->b_addr; 116 + switch (blk->magic) { 117 + case XFS_ATTR_LEAF_MAGIC: 118 + case XFS_ATTR3_LEAF_MAGIC: 119 + ents = (char *)xfs_attr3_leaf_entryp(baddr); 120 + return ents + (rec * sizeof(struct xfs_attr_leaf_entry)); 121 + case XFS_DIR2_LEAFN_MAGIC: 122 + case XFS_DIR3_LEAFN_MAGIC: 123 + ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr); 124 + return ents + (rec * sizeof(struct xfs_dir2_leaf_entry)); 125 + case XFS_DIR2_LEAF1_MAGIC: 126 + case XFS_DIR3_LEAF1_MAGIC: 127 + ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr); 128 + return ents + (rec * sizeof(struct xfs_dir2_leaf_entry)); 129 + case XFS_DA_NODE_MAGIC: 130 + case XFS_DA3_NODE_MAGIC: 131 + ents = (char *)ds->dargs.dp->d_ops->node_tree_p(baddr); 132 + return ents + (rec * sizeof(struct xfs_da_node_entry)); 133 + } 134 + 135 + return NULL; 136 + } 137 + 138 + /* Scrub a da btree hash (key). */ 139 + int 140 + xfs_scrub_da_btree_hash( 141 + struct xfs_scrub_da_btree *ds, 142 + int level, 143 + __be32 *hashp) 144 + { 145 + struct xfs_da_state_blk *blks; 146 + struct xfs_da_node_entry *entry; 147 + xfs_dahash_t hash; 148 + xfs_dahash_t parent_hash; 149 + 150 + /* Is this hash in order? */ 151 + hash = be32_to_cpu(*hashp); 152 + if (hash < ds->hashes[level]) 153 + xfs_scrub_da_set_corrupt(ds, level); 154 + ds->hashes[level] = hash; 155 + 156 + if (level == 0) 157 + return 0; 158 + 159 + /* Is this hash no larger than the parent hash? */ 160 + blks = ds->state->path.blk; 161 + entry = xfs_scrub_da_btree_entry(ds, level - 1, blks[level - 1].index); 162 + parent_hash = be32_to_cpu(entry->hashval); 163 + if (parent_hash < hash) 164 + xfs_scrub_da_set_corrupt(ds, level); 165 + 166 + return 0; 167 + } 168 + 169 + /* 170 + * Check a da btree pointer. Returns true if it's ok to use this 171 + * pointer. 172 + */ 173 + STATIC bool 174 + xfs_scrub_da_btree_ptr_ok( 175 + struct xfs_scrub_da_btree *ds, 176 + int level, 177 + xfs_dablk_t blkno) 178 + { 179 + if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) { 180 + xfs_scrub_da_set_corrupt(ds, level); 181 + return false; 182 + } 183 + 184 + return true; 185 + } 186 + 187 + /* 188 + * The da btree scrubber can handle leaf1 blocks as a degenerate 189 + * form of leafn blocks. Since the regular da code doesn't handle 190 + * leaf1, we must multiplex the verifiers. 191 + */ 192 + static void 193 + xfs_scrub_da_btree_read_verify( 194 + struct xfs_buf *bp) 195 + { 196 + struct xfs_da_blkinfo *info = bp->b_addr; 197 + 198 + switch (be16_to_cpu(info->magic)) { 199 + case XFS_DIR2_LEAF1_MAGIC: 200 + case XFS_DIR3_LEAF1_MAGIC: 201 + bp->b_ops = &xfs_dir3_leaf1_buf_ops; 202 + bp->b_ops->verify_read(bp); 203 + return; 204 + default: 205 + /* 206 + * xfs_da3_node_buf_ops already know how to handle 207 + * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks. 208 + */ 209 + bp->b_ops = &xfs_da3_node_buf_ops; 210 + bp->b_ops->verify_read(bp); 211 + return; 212 + } 213 + } 214 + static void 215 + xfs_scrub_da_btree_write_verify( 216 + struct xfs_buf *bp) 217 + { 218 + struct xfs_da_blkinfo *info = bp->b_addr; 219 + 220 + switch (be16_to_cpu(info->magic)) { 221 + case XFS_DIR2_LEAF1_MAGIC: 222 + case XFS_DIR3_LEAF1_MAGIC: 223 + bp->b_ops = &xfs_dir3_leaf1_buf_ops; 224 + bp->b_ops->verify_write(bp); 225 + return; 226 + default: 227 + /* 228 + * xfs_da3_node_buf_ops already know how to handle 229 + * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks. 230 + */ 231 + bp->b_ops = &xfs_da3_node_buf_ops; 232 + bp->b_ops->verify_write(bp); 233 + return; 234 + } 235 + } 236 + 237 + static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = { 238 + .name = "xfs_scrub_da_btree", 239 + .verify_read = xfs_scrub_da_btree_read_verify, 240 + .verify_write = xfs_scrub_da_btree_write_verify, 241 + }; 242 + 243 + /* Check a block's sibling. */ 244 + STATIC int 245 + xfs_scrub_da_btree_block_check_sibling( 246 + struct xfs_scrub_da_btree *ds, 247 + int level, 248 + int direction, 249 + xfs_dablk_t sibling) 250 + { 251 + int retval; 252 + int error; 253 + 254 + memcpy(&ds->state->altpath, &ds->state->path, 255 + sizeof(ds->state->altpath)); 256 + 257 + /* 258 + * If the pointer is null, we shouldn't be able to move the upper 259 + * level pointer anywhere. 260 + */ 261 + if (sibling == 0) { 262 + error = xfs_da3_path_shift(ds->state, &ds->state->altpath, 263 + direction, false, &retval); 264 + if (error == 0 && retval == 0) 265 + xfs_scrub_da_set_corrupt(ds, level); 266 + error = 0; 267 + goto out; 268 + } 269 + 270 + /* Move the alternate cursor one block in the direction given. */ 271 + error = xfs_da3_path_shift(ds->state, &ds->state->altpath, 272 + direction, false, &retval); 273 + if (!xfs_scrub_da_process_error(ds, level, &error)) 274 + return error; 275 + if (retval) { 276 + xfs_scrub_da_set_corrupt(ds, level); 277 + return error; 278 + } 279 + 280 + /* Compare upper level pointer to sibling pointer. */ 281 + if (ds->state->altpath.blk[level].blkno != sibling) 282 + xfs_scrub_da_set_corrupt(ds, level); 283 + xfs_trans_brelse(ds->dargs.trans, ds->state->altpath.blk[level].bp); 284 + out: 285 + return error; 286 + } 287 + 288 + /* Check a block's sibling pointers. */ 289 + STATIC int 290 + xfs_scrub_da_btree_block_check_siblings( 291 + struct xfs_scrub_da_btree *ds, 292 + int level, 293 + struct xfs_da_blkinfo *hdr) 294 + { 295 + xfs_dablk_t forw; 296 + xfs_dablk_t back; 297 + int error = 0; 298 + 299 + forw = be32_to_cpu(hdr->forw); 300 + back = be32_to_cpu(hdr->back); 301 + 302 + /* Top level blocks should not have sibling pointers. */ 303 + if (level == 0) { 304 + if (forw != 0 || back != 0) 305 + xfs_scrub_da_set_corrupt(ds, level); 306 + return 0; 307 + } 308 + 309 + /* 310 + * Check back (left) and forw (right) pointers. These functions 311 + * absorb error codes for us. 312 + */ 313 + error = xfs_scrub_da_btree_block_check_sibling(ds, level, 0, back); 314 + if (error) 315 + goto out; 316 + error = xfs_scrub_da_btree_block_check_sibling(ds, level, 1, forw); 317 + 318 + out: 319 + memset(&ds->state->altpath, 0, sizeof(ds->state->altpath)); 320 + return error; 321 + } 322 + 323 + /* Load a dir/attribute block from a btree. */ 324 + STATIC int 325 + xfs_scrub_da_btree_block( 326 + struct xfs_scrub_da_btree *ds, 327 + int level, 328 + xfs_dablk_t blkno) 329 + { 330 + struct xfs_da_state_blk *blk; 331 + struct xfs_da_intnode *node; 332 + struct xfs_da_node_entry *btree; 333 + struct xfs_da3_blkinfo *hdr3; 334 + struct xfs_da_args *dargs = &ds->dargs; 335 + struct xfs_inode *ip = ds->dargs.dp; 336 + xfs_ino_t owner; 337 + int *pmaxrecs; 338 + struct xfs_da3_icnode_hdr nodehdr; 339 + int error = 0; 340 + 341 + blk = &ds->state->path.blk[level]; 342 + ds->state->path.active = level + 1; 343 + 344 + /* Release old block. */ 345 + if (blk->bp) { 346 + xfs_trans_brelse(dargs->trans, blk->bp); 347 + blk->bp = NULL; 348 + } 349 + 350 + /* Check the pointer. */ 351 + blk->blkno = blkno; 352 + if (!xfs_scrub_da_btree_ptr_ok(ds, level, blkno)) 353 + goto out_nobuf; 354 + 355 + /* Read the buffer. */ 356 + error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, -2, 357 + &blk->bp, dargs->whichfork, 358 + &xfs_scrub_da_btree_buf_ops); 359 + if (!xfs_scrub_da_process_error(ds, level, &error)) 360 + goto out_nobuf; 361 + 362 + /* 363 + * We didn't find a dir btree root block, which means that 364 + * there's no LEAF1/LEAFN tree (at least not where it's supposed 365 + * to be), so jump out now. 366 + */ 367 + if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 && 368 + blk->bp == NULL) 369 + goto out_nobuf; 370 + 371 + /* It's /not/ ok for attr trees not to have a da btree. */ 372 + if (blk->bp == NULL) { 373 + xfs_scrub_da_set_corrupt(ds, level); 374 + goto out_nobuf; 375 + } 376 + 377 + hdr3 = blk->bp->b_addr; 378 + blk->magic = be16_to_cpu(hdr3->hdr.magic); 379 + pmaxrecs = &ds->maxrecs[level]; 380 + 381 + /* We only started zeroing the header on v5 filesystems. */ 382 + if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb) && hdr3->hdr.pad) 383 + xfs_scrub_da_set_corrupt(ds, level); 384 + 385 + /* Check the owner. */ 386 + if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) { 387 + owner = be64_to_cpu(hdr3->owner); 388 + if (owner != ip->i_ino) 389 + xfs_scrub_da_set_corrupt(ds, level); 390 + } 391 + 392 + /* Check the siblings. */ 393 + error = xfs_scrub_da_btree_block_check_siblings(ds, level, &hdr3->hdr); 394 + if (error) 395 + goto out; 396 + 397 + /* Interpret the buffer. */ 398 + switch (blk->magic) { 399 + case XFS_ATTR_LEAF_MAGIC: 400 + case XFS_ATTR3_LEAF_MAGIC: 401 + xfs_trans_buf_set_type(dargs->trans, blk->bp, 402 + XFS_BLFT_ATTR_LEAF_BUF); 403 + blk->magic = XFS_ATTR_LEAF_MAGIC; 404 + blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs); 405 + if (ds->tree_level != 0) 406 + xfs_scrub_da_set_corrupt(ds, level); 407 + break; 408 + case XFS_DIR2_LEAFN_MAGIC: 409 + case XFS_DIR3_LEAFN_MAGIC: 410 + xfs_trans_buf_set_type(dargs->trans, blk->bp, 411 + XFS_BLFT_DIR_LEAFN_BUF); 412 + blk->magic = XFS_DIR2_LEAFN_MAGIC; 413 + blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); 414 + if (ds->tree_level != 0) 415 + xfs_scrub_da_set_corrupt(ds, level); 416 + break; 417 + case XFS_DIR2_LEAF1_MAGIC: 418 + case XFS_DIR3_LEAF1_MAGIC: 419 + xfs_trans_buf_set_type(dargs->trans, blk->bp, 420 + XFS_BLFT_DIR_LEAF1_BUF); 421 + blk->magic = XFS_DIR2_LEAF1_MAGIC; 422 + blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); 423 + if (ds->tree_level != 0) 424 + xfs_scrub_da_set_corrupt(ds, level); 425 + break; 426 + case XFS_DA_NODE_MAGIC: 427 + case XFS_DA3_NODE_MAGIC: 428 + xfs_trans_buf_set_type(dargs->trans, blk->bp, 429 + XFS_BLFT_DA_NODE_BUF); 430 + blk->magic = XFS_DA_NODE_MAGIC; 431 + node = blk->bp->b_addr; 432 + ip->d_ops->node_hdr_from_disk(&nodehdr, node); 433 + btree = ip->d_ops->node_tree_p(node); 434 + *pmaxrecs = nodehdr.count; 435 + blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval); 436 + if (level == 0) { 437 + if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { 438 + xfs_scrub_da_set_corrupt(ds, level); 439 + goto out_freebp; 440 + } 441 + ds->tree_level = nodehdr.level; 442 + } else { 443 + if (ds->tree_level != nodehdr.level) { 444 + xfs_scrub_da_set_corrupt(ds, level); 445 + goto out_freebp; 446 + } 447 + } 448 + 449 + /* XXX: Check hdr3.pad32 once we know how to fix it. */ 450 + break; 451 + default: 452 + xfs_scrub_da_set_corrupt(ds, level); 453 + goto out_freebp; 454 + } 455 + 456 + out: 457 + return error; 458 + out_freebp: 459 + xfs_trans_brelse(dargs->trans, blk->bp); 460 + blk->bp = NULL; 461 + out_nobuf: 462 + blk->blkno = 0; 463 + return error; 464 + } 465 + 466 + /* Visit all nodes and leaves of a da btree. */ 467 + int 468 + xfs_scrub_da_btree( 469 + struct xfs_scrub_context *sc, 470 + int whichfork, 471 + xfs_scrub_da_btree_rec_fn scrub_fn, 472 + void *private) 473 + { 474 + struct xfs_scrub_da_btree ds = {}; 475 + struct xfs_mount *mp = sc->mp; 476 + struct xfs_da_state_blk *blks; 477 + struct xfs_da_node_entry *key; 478 + void *rec; 479 + xfs_dablk_t blkno; 480 + int level; 481 + int error; 482 + 483 + /* Skip short format data structures; no btree to scan. */ 484 + if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 485 + XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE) 486 + return 0; 487 + 488 + /* Set up initial da state. */ 489 + ds.dargs.dp = sc->ip; 490 + ds.dargs.whichfork = whichfork; 491 + ds.dargs.trans = sc->tp; 492 + ds.dargs.op_flags = XFS_DA_OP_OKNOENT; 493 + ds.state = xfs_da_state_alloc(); 494 + ds.state->args = &ds.dargs; 495 + ds.state->mp = mp; 496 + ds.sc = sc; 497 + ds.private = private; 498 + if (whichfork == XFS_ATTR_FORK) { 499 + ds.dargs.geo = mp->m_attr_geo; 500 + ds.lowest = 0; 501 + ds.highest = 0; 502 + } else { 503 + ds.dargs.geo = mp->m_dir_geo; 504 + ds.lowest = ds.dargs.geo->leafblk; 505 + ds.highest = ds.dargs.geo->freeblk; 506 + } 507 + blkno = ds.lowest; 508 + level = 0; 509 + 510 + /* Find the root of the da tree, if present. */ 511 + blks = ds.state->path.blk; 512 + error = xfs_scrub_da_btree_block(&ds, level, blkno); 513 + if (error) 514 + goto out_state; 515 + /* 516 + * We didn't find a block at ds.lowest, which means that there's 517 + * no LEAF1/LEAFN tree (at least not where it's supposed to be), 518 + * so jump out now. 519 + */ 520 + if (blks[level].bp == NULL) 521 + goto out_state; 522 + 523 + blks[level].index = 0; 524 + while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) { 525 + /* Handle leaf block. */ 526 + if (blks[level].magic != XFS_DA_NODE_MAGIC) { 527 + /* End of leaf, pop back towards the root. */ 528 + if (blks[level].index >= ds.maxrecs[level]) { 529 + if (level > 0) 530 + blks[level - 1].index++; 531 + ds.tree_level++; 532 + level--; 533 + continue; 534 + } 535 + 536 + /* Dispatch record scrubbing. */ 537 + rec = xfs_scrub_da_btree_entry(&ds, level, 538 + blks[level].index); 539 + error = scrub_fn(&ds, level, rec); 540 + if (error) 541 + break; 542 + if (xfs_scrub_should_terminate(sc, &error) || 543 + (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 544 + break; 545 + 546 + blks[level].index++; 547 + continue; 548 + } 549 + 550 + 551 + /* End of node, pop back towards the root. */ 552 + if (blks[level].index >= ds.maxrecs[level]) { 553 + if (level > 0) 554 + blks[level - 1].index++; 555 + ds.tree_level++; 556 + level--; 557 + continue; 558 + } 559 + 560 + /* Hashes in order for scrub? */ 561 + key = xfs_scrub_da_btree_entry(&ds, level, blks[level].index); 562 + error = xfs_scrub_da_btree_hash(&ds, level, &key->hashval); 563 + if (error) 564 + goto out; 565 + 566 + /* Drill another level deeper. */ 567 + blkno = be32_to_cpu(key->before); 568 + level++; 569 + ds.tree_level--; 570 + error = xfs_scrub_da_btree_block(&ds, level, blkno); 571 + if (error) 572 + goto out; 573 + if (blks[level].bp == NULL) 574 + goto out; 575 + 576 + blks[level].index = 0; 577 + } 578 + 579 + out: 580 + /* Release all the buffers we're tracking. */ 581 + for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) { 582 + if (blks[level].bp == NULL) 583 + continue; 584 + xfs_trans_brelse(sc->tp, blks[level].bp); 585 + blks[level].bp = NULL; 586 + } 587 + 588 + out_state: 589 + xfs_da_state_free(ds.state); 590 + return error; 591 + }

+59

fs/xfs/scrub/dabtree.h

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #ifndef __XFS_SCRUB_DABTREE_H__ 21 + #define __XFS_SCRUB_DABTREE_H__ 22 + 23 + /* dir/attr btree */ 24 + 25 + struct xfs_scrub_da_btree { 26 + struct xfs_da_args dargs; 27 + xfs_dahash_t hashes[XFS_DA_NODE_MAXDEPTH]; 28 + int maxrecs[XFS_DA_NODE_MAXDEPTH]; 29 + struct xfs_da_state *state; 30 + struct xfs_scrub_context *sc; 31 + void *private; 32 + 33 + /* 34 + * Lowest and highest directory block address in which we expect 35 + * to find dir/attr btree node blocks. For a directory this 36 + * (presumably) means between LEAF_OFFSET and FREE_OFFSET; for 37 + * attributes there is no limit. 38 + */ 39 + xfs_dablk_t lowest; 40 + xfs_dablk_t highest; 41 + 42 + int tree_level; 43 + }; 44 + 45 + typedef int (*xfs_scrub_da_btree_rec_fn)(struct xfs_scrub_da_btree *ds, 46 + int level, void *rec); 47 + 48 + /* Check for da btree operation errors. */ 49 + bool xfs_scrub_da_process_error(struct xfs_scrub_da_btree *ds, int level, int *error); 50 + 51 + /* Check for da btree corruption. */ 52 + void xfs_scrub_da_set_corrupt(struct xfs_scrub_da_btree *ds, int level); 53 + 54 + int xfs_scrub_da_btree_hash(struct xfs_scrub_da_btree *ds, int level, 55 + __be32 *hashp); 56 + int xfs_scrub_da_btree(struct xfs_scrub_context *sc, int whichfork, 57 + xfs_scrub_da_btree_rec_fn scrub_fn, void *private); 58 + 59 + #endif /* __XFS_SCRUB_DABTREE_H__ */

+816

fs/xfs/scrub/dir.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_mount.h" 26 + #include "xfs_defer.h" 27 + #include "xfs_btree.h" 28 + #include "xfs_bit.h" 29 + #include "xfs_log_format.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_sb.h" 32 + #include "xfs_inode.h" 33 + #include "xfs_icache.h" 34 + #include "xfs_itable.h" 35 + #include "xfs_da_format.h" 36 + #include "xfs_da_btree.h" 37 + #include "xfs_dir2.h" 38 + #include "xfs_dir2_priv.h" 39 + #include "xfs_ialloc.h" 40 + #include "scrub/xfs_scrub.h" 41 + #include "scrub/scrub.h" 42 + #include "scrub/common.h" 43 + #include "scrub/trace.h" 44 + #include "scrub/dabtree.h" 45 + 46 + /* Set us up to scrub directories. */ 47 + int 48 + xfs_scrub_setup_directory( 49 + struct xfs_scrub_context *sc, 50 + struct xfs_inode *ip) 51 + { 52 + return xfs_scrub_setup_inode_contents(sc, ip, 0); 53 + } 54 + 55 + /* Directories */ 56 + 57 + /* Scrub a directory entry. */ 58 + 59 + struct xfs_scrub_dir_ctx { 60 + /* VFS fill-directory iterator */ 61 + struct dir_context dir_iter; 62 + 63 + struct xfs_scrub_context *sc; 64 + }; 65 + 66 + /* Check that an inode's mode matches a given DT_ type. */ 67 + STATIC int 68 + xfs_scrub_dir_check_ftype( 69 + struct xfs_scrub_dir_ctx *sdc, 70 + xfs_fileoff_t offset, 71 + xfs_ino_t inum, 72 + int dtype) 73 + { 74 + struct xfs_mount *mp = sdc->sc->mp; 75 + struct xfs_inode *ip; 76 + int ino_dtype; 77 + int error = 0; 78 + 79 + if (!xfs_sb_version_hasftype(&mp->m_sb)) { 80 + if (dtype != DT_UNKNOWN && dtype != DT_DIR) 81 + xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, 82 + offset); 83 + goto out; 84 + } 85 + 86 + /* 87 + * Grab the inode pointed to by the dirent. We release the 88 + * inode before we cancel the scrub transaction. Since we're 89 + * don't know a priori that releasing the inode won't trigger 90 + * eofblocks cleanup (which allocates what would be a nested 91 + * transaction), we can't use DONTCACHE here because DONTCACHE 92 + * inodes can trigger immediate inactive cleanup of the inode. 93 + */ 94 + error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip); 95 + if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset, 96 + &error)) 97 + goto out; 98 + 99 + /* Convert mode to the DT_* values that dir_emit uses. */ 100 + ino_dtype = xfs_dir3_get_dtype(mp, 101 + xfs_mode_to_ftype(VFS_I(ip)->i_mode)); 102 + if (ino_dtype != dtype) 103 + xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); 104 + iput(VFS_I(ip)); 105 + out: 106 + return error; 107 + } 108 + 109 + /* 110 + * Scrub a single directory entry. 111 + * 112 + * We use the VFS directory iterator (i.e. readdir) to call this 113 + * function for every directory entry in a directory. Once we're here, 114 + * we check the inode number to make sure it's sane, then we check that 115 + * we can look up this filename. Finally, we check the ftype. 116 + */ 117 + STATIC int 118 + xfs_scrub_dir_actor( 119 + struct dir_context *dir_iter, 120 + const char *name, 121 + int namelen, 122 + loff_t pos, 123 + u64 ino, 124 + unsigned type) 125 + { 126 + struct xfs_mount *mp; 127 + struct xfs_inode *ip; 128 + struct xfs_scrub_dir_ctx *sdc; 129 + struct xfs_name xname; 130 + xfs_ino_t lookup_ino; 131 + xfs_dablk_t offset; 132 + int error = 0; 133 + 134 + sdc = container_of(dir_iter, struct xfs_scrub_dir_ctx, dir_iter); 135 + ip = sdc->sc->ip; 136 + mp = ip->i_mount; 137 + offset = xfs_dir2_db_to_da(mp->m_dir_geo, 138 + xfs_dir2_dataptr_to_db(mp->m_dir_geo, pos)); 139 + 140 + /* Does this inode number make sense? */ 141 + if (!xfs_verify_dir_ino(mp, ino)) { 142 + xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); 143 + goto out; 144 + } 145 + 146 + if (!strncmp(".", name, namelen)) { 147 + /* If this is "." then check that the inum matches the dir. */ 148 + if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR) 149 + xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, 150 + offset); 151 + if (ino != ip->i_ino) 152 + xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, 153 + offset); 154 + } else if (!strncmp("..", name, namelen)) { 155 + /* 156 + * If this is ".." in the root inode, check that the inum 157 + * matches this dir. 158 + */ 159 + if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR) 160 + xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, 161 + offset); 162 + if (ip->i_ino == mp->m_sb.sb_rootino && ino != ip->i_ino) 163 + xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, 164 + offset); 165 + } 166 + 167 + /* Verify that we can look up this name by hash. */ 168 + xname.name = name; 169 + xname.len = namelen; 170 + xname.type = XFS_DIR3_FT_UNKNOWN; 171 + 172 + error = xfs_dir_lookup(sdc->sc->tp, ip, &xname, &lookup_ino, NULL); 173 + if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset, 174 + &error)) 175 + goto fail_xref; 176 + if (lookup_ino != ino) { 177 + xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); 178 + goto out; 179 + } 180 + 181 + /* Verify the file type. This function absorbs error codes. */ 182 + error = xfs_scrub_dir_check_ftype(sdc, offset, lookup_ino, type); 183 + if (error) 184 + goto out; 185 + out: 186 + return error; 187 + fail_xref: 188 + return error; 189 + } 190 + 191 + /* Scrub a directory btree record. */ 192 + STATIC int 193 + xfs_scrub_dir_rec( 194 + struct xfs_scrub_da_btree *ds, 195 + int level, 196 + void *rec) 197 + { 198 + struct xfs_mount *mp = ds->state->mp; 199 + struct xfs_dir2_leaf_entry *ent = rec; 200 + struct xfs_inode *dp = ds->dargs.dp; 201 + struct xfs_dir2_data_entry *dent; 202 + struct xfs_buf *bp; 203 + xfs_ino_t ino; 204 + xfs_dablk_t rec_bno; 205 + xfs_dir2_db_t db; 206 + xfs_dir2_data_aoff_t off; 207 + xfs_dir2_dataptr_t ptr; 208 + xfs_dahash_t calc_hash; 209 + xfs_dahash_t hash; 210 + unsigned int tag; 211 + int error; 212 + 213 + /* Check the hash of the entry. */ 214 + error = xfs_scrub_da_btree_hash(ds, level, &ent->hashval); 215 + if (error) 216 + goto out; 217 + 218 + /* Valid hash pointer? */ 219 + ptr = be32_to_cpu(ent->address); 220 + if (ptr == 0) 221 + return 0; 222 + 223 + /* Find the directory entry's location. */ 224 + db = xfs_dir2_dataptr_to_db(mp->m_dir_geo, ptr); 225 + off = xfs_dir2_dataptr_to_off(mp->m_dir_geo, ptr); 226 + rec_bno = xfs_dir2_db_to_da(mp->m_dir_geo, db); 227 + 228 + if (rec_bno >= mp->m_dir_geo->leafblk) { 229 + xfs_scrub_da_set_corrupt(ds, level); 230 + goto out; 231 + } 232 + error = xfs_dir3_data_read(ds->dargs.trans, dp, rec_bno, -2, &bp); 233 + if (!xfs_scrub_fblock_process_error(ds->sc, XFS_DATA_FORK, rec_bno, 234 + &error)) 235 + goto out; 236 + if (!bp) { 237 + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); 238 + goto out; 239 + } 240 + 241 + /* Retrieve the entry, sanity check it, and compare hashes. */ 242 + dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off); 243 + ino = be64_to_cpu(dent->inumber); 244 + hash = be32_to_cpu(ent->hashval); 245 + tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent)); 246 + if (!xfs_verify_dir_ino(mp, ino) || tag != off) 247 + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); 248 + if (dent->namelen == 0) { 249 + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); 250 + goto out_relse; 251 + } 252 + calc_hash = xfs_da_hashname(dent->name, dent->namelen); 253 + if (calc_hash != hash) 254 + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); 255 + 256 + out_relse: 257 + xfs_trans_brelse(ds->dargs.trans, bp); 258 + out: 259 + return error; 260 + } 261 + 262 + /* 263 + * Is this unused entry either in the bestfree or smaller than all of 264 + * them? We've already checked that the bestfrees are sorted longest to 265 + * shortest, and that there aren't any bogus entries. 266 + */ 267 + STATIC void 268 + xfs_scrub_directory_check_free_entry( 269 + struct xfs_scrub_context *sc, 270 + xfs_dablk_t lblk, 271 + struct xfs_dir2_data_free *bf, 272 + struct xfs_dir2_data_unused *dup) 273 + { 274 + struct xfs_dir2_data_free *dfp; 275 + unsigned int dup_length; 276 + 277 + dup_length = be16_to_cpu(dup->length); 278 + 279 + /* Unused entry is shorter than any of the bestfrees */ 280 + if (dup_length < be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length)) 281 + return; 282 + 283 + for (dfp = &bf[XFS_DIR2_DATA_FD_COUNT - 1]; dfp >= bf; dfp--) 284 + if (dup_length == be16_to_cpu(dfp->length)) 285 + return; 286 + 287 + /* Unused entry should be in the bestfrees but wasn't found. */ 288 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 289 + } 290 + 291 + /* Check free space info in a directory data block. */ 292 + STATIC int 293 + xfs_scrub_directory_data_bestfree( 294 + struct xfs_scrub_context *sc, 295 + xfs_dablk_t lblk, 296 + bool is_block) 297 + { 298 + struct xfs_dir2_data_unused *dup; 299 + struct xfs_dir2_data_free *dfp; 300 + struct xfs_buf *bp; 301 + struct xfs_dir2_data_free *bf; 302 + struct xfs_mount *mp = sc->mp; 303 + const struct xfs_dir_ops *d_ops; 304 + char *ptr; 305 + char *endptr; 306 + u16 tag; 307 + unsigned int nr_bestfrees = 0; 308 + unsigned int nr_frees = 0; 309 + unsigned int smallest_bestfree; 310 + int newlen; 311 + int offset; 312 + int error; 313 + 314 + d_ops = sc->ip->d_ops; 315 + 316 + if (is_block) { 317 + /* dir block format */ 318 + if (lblk != XFS_B_TO_FSBT(mp, XFS_DIR2_DATA_OFFSET)) 319 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 320 + error = xfs_dir3_block_read(sc->tp, sc->ip, &bp); 321 + } else { 322 + /* dir data format */ 323 + error = xfs_dir3_data_read(sc->tp, sc->ip, lblk, -1, &bp); 324 + } 325 + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) 326 + goto out; 327 + 328 + /* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */ 329 + 330 + /* Do the bestfrees correspond to actual free space? */ 331 + bf = d_ops->data_bestfree_p(bp->b_addr); 332 + smallest_bestfree = UINT_MAX; 333 + for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) { 334 + offset = be16_to_cpu(dfp->offset); 335 + if (offset == 0) 336 + continue; 337 + if (offset >= mp->m_dir_geo->blksize) { 338 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 339 + goto out_buf; 340 + } 341 + dup = (struct xfs_dir2_data_unused *)(bp->b_addr + offset); 342 + tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)); 343 + 344 + /* bestfree doesn't match the entry it points at? */ 345 + if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG) || 346 + be16_to_cpu(dup->length) != be16_to_cpu(dfp->length) || 347 + tag != ((char *)dup - (char *)bp->b_addr)) { 348 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 349 + goto out_buf; 350 + } 351 + 352 + /* bestfree records should be ordered largest to smallest */ 353 + if (smallest_bestfree < be16_to_cpu(dfp->length)) { 354 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 355 + goto out_buf; 356 + } 357 + 358 + smallest_bestfree = be16_to_cpu(dfp->length); 359 + nr_bestfrees++; 360 + } 361 + 362 + /* Make sure the bestfrees are actually the best free spaces. */ 363 + ptr = (char *)d_ops->data_entry_p(bp->b_addr); 364 + if (is_block) { 365 + struct xfs_dir2_block_tail *btp; 366 + 367 + btp = xfs_dir2_block_tail_p(mp->m_dir_geo, bp->b_addr); 368 + endptr = (char *)xfs_dir2_block_leaf_p(btp); 369 + } else 370 + endptr = (char *)bp->b_addr + BBTOB(bp->b_length); 371 + 372 + /* Iterate the entries, stopping when we hit or go past the end. */ 373 + while (ptr < endptr) { 374 + dup = (struct xfs_dir2_data_unused *)ptr; 375 + /* Skip real entries */ 376 + if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG)) { 377 + struct xfs_dir2_data_entry *dep; 378 + 379 + dep = (struct xfs_dir2_data_entry *)ptr; 380 + newlen = d_ops->data_entsize(dep->namelen); 381 + if (newlen <= 0) { 382 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 383 + lblk); 384 + goto out_buf; 385 + } 386 + ptr += newlen; 387 + continue; 388 + } 389 + 390 + /* Spot check this free entry */ 391 + tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)); 392 + if (tag != ((char *)dup - (char *)bp->b_addr)) 393 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 394 + 395 + /* 396 + * Either this entry is a bestfree or it's smaller than 397 + * any of the bestfrees. 398 + */ 399 + xfs_scrub_directory_check_free_entry(sc, lblk, bf, dup); 400 + 401 + /* Move on. */ 402 + newlen = be16_to_cpu(dup->length); 403 + if (newlen <= 0) { 404 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 405 + goto out_buf; 406 + } 407 + ptr += newlen; 408 + if (ptr <= endptr) 409 + nr_frees++; 410 + } 411 + 412 + /* We're required to fill all the space. */ 413 + if (ptr != endptr) 414 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 415 + 416 + /* Did we see at least as many free slots as there are bestfrees? */ 417 + if (nr_frees < nr_bestfrees) 418 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 419 + out_buf: 420 + xfs_trans_brelse(sc->tp, bp); 421 + out: 422 + return error; 423 + } 424 + 425 + /* 426 + * Does the free space length in the free space index block ($len) match 427 + * the longest length in the directory data block's bestfree array? 428 + * Assume that we've already checked that the data block's bestfree 429 + * array is in order. 430 + */ 431 + STATIC void 432 + xfs_scrub_directory_check_freesp( 433 + struct xfs_scrub_context *sc, 434 + xfs_dablk_t lblk, 435 + struct xfs_buf *dbp, 436 + unsigned int len) 437 + { 438 + struct xfs_dir2_data_free *dfp; 439 + 440 + dfp = sc->ip->d_ops->data_bestfree_p(dbp->b_addr); 441 + 442 + if (len != be16_to_cpu(dfp->length)) 443 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 444 + 445 + if (len > 0 && be16_to_cpu(dfp->offset) == 0) 446 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 447 + } 448 + 449 + /* Check free space info in a directory leaf1 block. */ 450 + STATIC int 451 + xfs_scrub_directory_leaf1_bestfree( 452 + struct xfs_scrub_context *sc, 453 + struct xfs_da_args *args, 454 + xfs_dablk_t lblk) 455 + { 456 + struct xfs_dir3_icleaf_hdr leafhdr; 457 + struct xfs_dir2_leaf_entry *ents; 458 + struct xfs_dir2_leaf_tail *ltp; 459 + struct xfs_dir2_leaf *leaf; 460 + struct xfs_buf *dbp; 461 + struct xfs_buf *bp; 462 + const struct xfs_dir_ops *d_ops = sc->ip->d_ops; 463 + struct xfs_da_geometry *geo = sc->mp->m_dir_geo; 464 + __be16 *bestp; 465 + __u16 best; 466 + __u32 hash; 467 + __u32 lasthash = 0; 468 + __u32 bestcount; 469 + unsigned int stale = 0; 470 + int i; 471 + int error; 472 + 473 + /* Read the free space block. */ 474 + error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp); 475 + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) 476 + goto out; 477 + 478 + leaf = bp->b_addr; 479 + d_ops->leaf_hdr_from_disk(&leafhdr, leaf); 480 + ents = d_ops->leaf_ents_p(leaf); 481 + ltp = xfs_dir2_leaf_tail_p(geo, leaf); 482 + bestcount = be32_to_cpu(ltp->bestcount); 483 + bestp = xfs_dir2_leaf_bests_p(ltp); 484 + 485 + if (xfs_sb_version_hascrc(&sc->mp->m_sb)) { 486 + struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; 487 + 488 + if (hdr3->pad != cpu_to_be32(0)) 489 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 490 + } 491 + 492 + /* 493 + * There should be as many bestfree slots as there are dir data 494 + * blocks that can fit under i_size. 495 + */ 496 + if (bestcount != xfs_dir2_byte_to_db(geo, sc->ip->i_d.di_size)) { 497 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 498 + goto out; 499 + } 500 + 501 + /* Is the leaf count even remotely sane? */ 502 + if (leafhdr.count > d_ops->leaf_max_ents(geo)) { 503 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 504 + goto out; 505 + } 506 + 507 + /* Leaves and bests don't overlap in leaf format. */ 508 + if ((char *)&ents[leafhdr.count] > (char *)bestp) { 509 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 510 + goto out; 511 + } 512 + 513 + /* Check hash value order, count stale entries. */ 514 + for (i = 0; i < leafhdr.count; i++) { 515 + hash = be32_to_cpu(ents[i].hashval); 516 + if (i > 0 && lasthash > hash) 517 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 518 + lasthash = hash; 519 + if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 520 + stale++; 521 + } 522 + if (leafhdr.stale != stale) 523 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 524 + 525 + /* Check all the bestfree entries. */ 526 + for (i = 0; i < bestcount; i++, bestp++) { 527 + best = be16_to_cpu(*bestp); 528 + if (best == NULLDATAOFF) 529 + continue; 530 + error = xfs_dir3_data_read(sc->tp, sc->ip, 531 + i * args->geo->fsbcount, -1, &dbp); 532 + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, 533 + &error)) 534 + continue; 535 + xfs_scrub_directory_check_freesp(sc, lblk, dbp, best); 536 + xfs_trans_brelse(sc->tp, dbp); 537 + } 538 + out: 539 + return error; 540 + } 541 + 542 + /* Check free space info in a directory freespace block. */ 543 + STATIC int 544 + xfs_scrub_directory_free_bestfree( 545 + struct xfs_scrub_context *sc, 546 + struct xfs_da_args *args, 547 + xfs_dablk_t lblk) 548 + { 549 + struct xfs_dir3_icfree_hdr freehdr; 550 + struct xfs_buf *dbp; 551 + struct xfs_buf *bp; 552 + __be16 *bestp; 553 + __u16 best; 554 + unsigned int stale = 0; 555 + int i; 556 + int error; 557 + 558 + /* Read the free space block */ 559 + error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp); 560 + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) 561 + goto out; 562 + 563 + if (xfs_sb_version_hascrc(&sc->mp->m_sb)) { 564 + struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; 565 + 566 + if (hdr3->pad != cpu_to_be32(0)) 567 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 568 + } 569 + 570 + /* Check all the entries. */ 571 + sc->ip->d_ops->free_hdr_from_disk(&freehdr, bp->b_addr); 572 + bestp = sc->ip->d_ops->free_bests_p(bp->b_addr); 573 + for (i = 0; i < freehdr.nvalid; i++, bestp++) { 574 + best = be16_to_cpu(*bestp); 575 + if (best == NULLDATAOFF) { 576 + stale++; 577 + continue; 578 + } 579 + error = xfs_dir3_data_read(sc->tp, sc->ip, 580 + (freehdr.firstdb + i) * args->geo->fsbcount, 581 + -1, &dbp); 582 + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, 583 + &error)) 584 + continue; 585 + xfs_scrub_directory_check_freesp(sc, lblk, dbp, best); 586 + xfs_trans_brelse(sc->tp, dbp); 587 + } 588 + 589 + if (freehdr.nused + stale != freehdr.nvalid) 590 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 591 + out: 592 + return error; 593 + } 594 + 595 + /* Check free space information in directories. */ 596 + STATIC int 597 + xfs_scrub_directory_blocks( 598 + struct xfs_scrub_context *sc) 599 + { 600 + struct xfs_bmbt_irec got; 601 + struct xfs_da_args args; 602 + struct xfs_ifork *ifp; 603 + struct xfs_mount *mp = sc->mp; 604 + xfs_fileoff_t leaf_lblk; 605 + xfs_fileoff_t free_lblk; 606 + xfs_fileoff_t lblk; 607 + struct xfs_iext_cursor icur; 608 + xfs_dablk_t dabno; 609 + bool found; 610 + int is_block = 0; 611 + int error; 612 + 613 + /* Ignore local format directories. */ 614 + if (sc->ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && 615 + sc->ip->i_d.di_format != XFS_DINODE_FMT_BTREE) 616 + return 0; 617 + 618 + ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK); 619 + lblk = XFS_B_TO_FSB(mp, XFS_DIR2_DATA_OFFSET); 620 + leaf_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_LEAF_OFFSET); 621 + free_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_FREE_OFFSET); 622 + 623 + /* Is this a block dir? */ 624 + args.dp = sc->ip; 625 + args.geo = mp->m_dir_geo; 626 + args.trans = sc->tp; 627 + error = xfs_dir2_isblock(&args, &is_block); 628 + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) 629 + goto out; 630 + 631 + /* Iterate all the data extents in the directory... */ 632 + found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got); 633 + while (found) { 634 + /* Block directories only have a single block at offset 0. */ 635 + if (is_block && 636 + (got.br_startoff > 0 || 637 + got.br_blockcount != args.geo->fsbcount)) { 638 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 639 + got.br_startoff); 640 + break; 641 + } 642 + 643 + /* No more data blocks... */ 644 + if (got.br_startoff >= leaf_lblk) 645 + break; 646 + 647 + /* 648 + * Check each data block's bestfree data. 649 + * 650 + * Iterate all the fsbcount-aligned block offsets in 651 + * this directory. The directory block reading code is 652 + * smart enough to do its own bmap lookups to handle 653 + * discontiguous directory blocks. When we're done 654 + * with the extent record, re-query the bmap at the 655 + * next fsbcount-aligned offset to avoid redundant 656 + * block checks. 657 + */ 658 + for (lblk = roundup((xfs_dablk_t)got.br_startoff, 659 + args.geo->fsbcount); 660 + lblk < got.br_startoff + got.br_blockcount; 661 + lblk += args.geo->fsbcount) { 662 + error = xfs_scrub_directory_data_bestfree(sc, lblk, 663 + is_block); 664 + if (error) 665 + goto out; 666 + } 667 + dabno = got.br_startoff + got.br_blockcount; 668 + lblk = roundup(dabno, args.geo->fsbcount); 669 + found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got); 670 + } 671 + 672 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 673 + goto out; 674 + 675 + /* Look for a leaf1 block, which has free info. */ 676 + if (xfs_iext_lookup_extent(sc->ip, ifp, leaf_lblk, &icur, &got) && 677 + got.br_startoff == leaf_lblk && 678 + got.br_blockcount == args.geo->fsbcount && 679 + !xfs_iext_next_extent(ifp, &icur, &got)) { 680 + if (is_block) { 681 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 682 + goto out; 683 + } 684 + error = xfs_scrub_directory_leaf1_bestfree(sc, &args, 685 + leaf_lblk); 686 + if (error) 687 + goto out; 688 + } 689 + 690 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 691 + goto out; 692 + 693 + /* Scan for free blocks */ 694 + lblk = free_lblk; 695 + found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got); 696 + while (found) { 697 + /* 698 + * Dirs can't have blocks mapped above 2^32. 699 + * Single-block dirs shouldn't even be here. 700 + */ 701 + lblk = got.br_startoff; 702 + if (lblk & ~0xFFFFFFFFULL) { 703 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 704 + goto out; 705 + } 706 + if (is_block) { 707 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); 708 + goto out; 709 + } 710 + 711 + /* 712 + * Check each dir free block's bestfree data. 713 + * 714 + * Iterate all the fsbcount-aligned block offsets in 715 + * this directory. The directory block reading code is 716 + * smart enough to do its own bmap lookups to handle 717 + * discontiguous directory blocks. When we're done 718 + * with the extent record, re-query the bmap at the 719 + * next fsbcount-aligned offset to avoid redundant 720 + * block checks. 721 + */ 722 + for (lblk = roundup((xfs_dablk_t)got.br_startoff, 723 + args.geo->fsbcount); 724 + lblk < got.br_startoff + got.br_blockcount; 725 + lblk += args.geo->fsbcount) { 726 + error = xfs_scrub_directory_free_bestfree(sc, &args, 727 + lblk); 728 + if (error) 729 + goto out; 730 + } 731 + dabno = got.br_startoff + got.br_blockcount; 732 + lblk = roundup(dabno, args.geo->fsbcount); 733 + found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got); 734 + } 735 + out: 736 + return error; 737 + } 738 + 739 + /* Scrub a whole directory. */ 740 + int 741 + xfs_scrub_directory( 742 + struct xfs_scrub_context *sc) 743 + { 744 + struct xfs_scrub_dir_ctx sdc = { 745 + .dir_iter.actor = xfs_scrub_dir_actor, 746 + .dir_iter.pos = 0, 747 + .sc = sc, 748 + }; 749 + size_t bufsize; 750 + loff_t oldpos; 751 + int error = 0; 752 + 753 + if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) 754 + return -ENOENT; 755 + 756 + /* Plausible size? */ 757 + if (sc->ip->i_d.di_size < xfs_dir2_sf_hdr_size(0)) { 758 + xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL); 759 + goto out; 760 + } 761 + 762 + /* Check directory tree structure */ 763 + error = xfs_scrub_da_btree(sc, XFS_DATA_FORK, xfs_scrub_dir_rec, NULL); 764 + if (error) 765 + return error; 766 + 767 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 768 + return error; 769 + 770 + /* Check the freespace. */ 771 + error = xfs_scrub_directory_blocks(sc); 772 + if (error) 773 + return error; 774 + 775 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 776 + return error; 777 + 778 + /* 779 + * Check that every dirent we see can also be looked up by hash. 780 + * Userspace usually asks for a 32k buffer, so we will too. 781 + */ 782 + bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, 783 + sc->ip->i_d.di_size); 784 + 785 + /* 786 + * Look up every name in this directory by hash. 787 + * 788 + * Use the xfs_readdir function to call xfs_scrub_dir_actor on 789 + * every directory entry in this directory. In _actor, we check 790 + * the name, inode number, and ftype (if applicable) of the 791 + * entry. xfs_readdir uses the VFS filldir functions to provide 792 + * iteration context. 793 + * 794 + * The VFS grabs a read or write lock via i_rwsem before it reads 795 + * or writes to a directory. If we've gotten this far we've 796 + * already obtained IOLOCK_EXCL, which (since 4.10) is the same as 797 + * getting a write lock on i_rwsem. Therefore, it is safe for us 798 + * to drop the ILOCK here in order to reuse the _readdir and 799 + * _dir_lookup routines, which do their own ILOCK locking. 800 + */ 801 + oldpos = 0; 802 + sc->ilock_flags &= ~XFS_ILOCK_EXCL; 803 + xfs_iunlock(sc->ip, XFS_ILOCK_EXCL); 804 + while (true) { 805 + error = xfs_readdir(sc->tp, sc->ip, &sdc.dir_iter, bufsize); 806 + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, 807 + &error)) 808 + goto out; 809 + if (oldpos == sdc.dir_iter.pos) 810 + break; 811 + oldpos = sdc.dir_iter.pos; 812 + } 813 + 814 + out: 815 + return error; 816 + }

+337

fs/xfs/scrub/ialloc.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_mount.h" 26 + #include "xfs_defer.h" 27 + #include "xfs_btree.h" 28 + #include "xfs_bit.h" 29 + #include "xfs_log_format.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_sb.h" 32 + #include "xfs_inode.h" 33 + #include "xfs_alloc.h" 34 + #include "xfs_ialloc.h" 35 + #include "xfs_ialloc_btree.h" 36 + #include "xfs_icache.h" 37 + #include "xfs_rmap.h" 38 + #include "xfs_log.h" 39 + #include "xfs_trans_priv.h" 40 + #include "scrub/xfs_scrub.h" 41 + #include "scrub/scrub.h" 42 + #include "scrub/common.h" 43 + #include "scrub/btree.h" 44 + #include "scrub/trace.h" 45 + 46 + /* 47 + * Set us up to scrub inode btrees. 48 + * If we detect a discrepancy between the inobt and the inode, 49 + * try again after forcing logged inode cores out to disk. 50 + */ 51 + int 52 + xfs_scrub_setup_ag_iallocbt( 53 + struct xfs_scrub_context *sc, 54 + struct xfs_inode *ip) 55 + { 56 + return xfs_scrub_setup_ag_btree(sc, ip, sc->try_harder); 57 + } 58 + 59 + /* Inode btree scrubber. */ 60 + 61 + /* Is this chunk worth checking? */ 62 + STATIC bool 63 + xfs_scrub_iallocbt_chunk( 64 + struct xfs_scrub_btree *bs, 65 + struct xfs_inobt_rec_incore *irec, 66 + xfs_agino_t agino, 67 + xfs_extlen_t len) 68 + { 69 + struct xfs_mount *mp = bs->cur->bc_mp; 70 + xfs_agnumber_t agno = bs->cur->bc_private.a.agno; 71 + xfs_agblock_t bno; 72 + 73 + bno = XFS_AGINO_TO_AGBNO(mp, agino); 74 + if (bno + len <= bno || 75 + !xfs_verify_agbno(mp, agno, bno) || 76 + !xfs_verify_agbno(mp, agno, bno + len - 1)) 77 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 78 + 79 + return true; 80 + } 81 + 82 + /* Count the number of free inodes. */ 83 + static unsigned int 84 + xfs_scrub_iallocbt_freecount( 85 + xfs_inofree_t freemask) 86 + { 87 + BUILD_BUG_ON(sizeof(freemask) != sizeof(__u64)); 88 + return hweight64(freemask); 89 + } 90 + 91 + /* Check a particular inode with ir_free. */ 92 + STATIC int 93 + xfs_scrub_iallocbt_check_cluster_freemask( 94 + struct xfs_scrub_btree *bs, 95 + xfs_ino_t fsino, 96 + xfs_agino_t chunkino, 97 + xfs_agino_t clusterino, 98 + struct xfs_inobt_rec_incore *irec, 99 + struct xfs_buf *bp) 100 + { 101 + struct xfs_dinode *dip; 102 + struct xfs_mount *mp = bs->cur->bc_mp; 103 + bool inode_is_free = false; 104 + bool freemask_ok; 105 + bool inuse; 106 + int error = 0; 107 + 108 + if (xfs_scrub_should_terminate(bs->sc, &error)) 109 + return error; 110 + 111 + dip = xfs_buf_offset(bp, clusterino * mp->m_sb.sb_inodesize); 112 + if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC || 113 + (dip->di_version >= 3 && 114 + be64_to_cpu(dip->di_ino) != fsino + clusterino)) { 115 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 116 + goto out; 117 + } 118 + 119 + if (irec->ir_free & XFS_INOBT_MASK(chunkino + clusterino)) 120 + inode_is_free = true; 121 + error = xfs_icache_inode_is_allocated(mp, bs->cur->bc_tp, 122 + fsino + clusterino, &inuse); 123 + if (error == -ENODATA) { 124 + /* Not cached, just read the disk buffer */ 125 + freemask_ok = inode_is_free ^ !!(dip->di_mode); 126 + if (!bs->sc->try_harder && !freemask_ok) 127 + return -EDEADLOCK; 128 + } else if (error < 0) { 129 + /* 130 + * Inode is only half assembled, or there was an IO error, 131 + * or the verifier failed, so don't bother trying to check. 132 + * The inode scrubber can deal with this. 133 + */ 134 + goto out; 135 + } else { 136 + /* Inode is all there. */ 137 + freemask_ok = inode_is_free ^ inuse; 138 + } 139 + if (!freemask_ok) 140 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 141 + out: 142 + return 0; 143 + } 144 + 145 + /* Make sure the free mask is consistent with what the inodes think. */ 146 + STATIC int 147 + xfs_scrub_iallocbt_check_freemask( 148 + struct xfs_scrub_btree *bs, 149 + struct xfs_inobt_rec_incore *irec) 150 + { 151 + struct xfs_owner_info oinfo; 152 + struct xfs_imap imap; 153 + struct xfs_mount *mp = bs->cur->bc_mp; 154 + struct xfs_dinode *dip; 155 + struct xfs_buf *bp; 156 + xfs_ino_t fsino; 157 + xfs_agino_t nr_inodes; 158 + xfs_agino_t agino; 159 + xfs_agino_t chunkino; 160 + xfs_agino_t clusterino; 161 + xfs_agblock_t agbno; 162 + int blks_per_cluster; 163 + uint16_t holemask; 164 + uint16_t ir_holemask; 165 + int error = 0; 166 + 167 + /* Make sure the freemask matches the inode records. */ 168 + blks_per_cluster = xfs_icluster_size_fsb(mp); 169 + nr_inodes = XFS_OFFBNO_TO_AGINO(mp, blks_per_cluster, 0); 170 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); 171 + 172 + for (agino = irec->ir_startino; 173 + agino < irec->ir_startino + XFS_INODES_PER_CHUNK; 174 + agino += blks_per_cluster * mp->m_sb.sb_inopblock) { 175 + fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_private.a.agno, agino); 176 + chunkino = agino - irec->ir_startino; 177 + agbno = XFS_AGINO_TO_AGBNO(mp, agino); 178 + 179 + /* Compute the holemask mask for this cluster. */ 180 + for (clusterino = 0, holemask = 0; clusterino < nr_inodes; 181 + clusterino += XFS_INODES_PER_HOLEMASK_BIT) 182 + holemask |= XFS_INOBT_MASK((chunkino + clusterino) / 183 + XFS_INODES_PER_HOLEMASK_BIT); 184 + 185 + /* The whole cluster must be a hole or not a hole. */ 186 + ir_holemask = (irec->ir_holemask & holemask); 187 + if (ir_holemask != holemask && ir_holemask != 0) { 188 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 189 + continue; 190 + } 191 + 192 + /* If any part of this is a hole, skip it. */ 193 + if (ir_holemask) 194 + continue; 195 + 196 + /* Grab the inode cluster buffer. */ 197 + imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno, 198 + agbno); 199 + imap.im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); 200 + imap.im_boffset = 0; 201 + 202 + error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap, 203 + &dip, &bp, 0, 0); 204 + if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, 0, &error)) 205 + continue; 206 + 207 + /* Which inodes are free? */ 208 + for (clusterino = 0; clusterino < nr_inodes; clusterino++) { 209 + error = xfs_scrub_iallocbt_check_cluster_freemask(bs, 210 + fsino, chunkino, clusterino, irec, bp); 211 + if (error) { 212 + xfs_trans_brelse(bs->cur->bc_tp, bp); 213 + return error; 214 + } 215 + } 216 + 217 + xfs_trans_brelse(bs->cur->bc_tp, bp); 218 + } 219 + 220 + return error; 221 + } 222 + 223 + /* Scrub an inobt/finobt record. */ 224 + STATIC int 225 + xfs_scrub_iallocbt_rec( 226 + struct xfs_scrub_btree *bs, 227 + union xfs_btree_rec *rec) 228 + { 229 + struct xfs_mount *mp = bs->cur->bc_mp; 230 + struct xfs_inobt_rec_incore irec; 231 + uint64_t holes; 232 + xfs_agnumber_t agno = bs->cur->bc_private.a.agno; 233 + xfs_agino_t agino; 234 + xfs_agblock_t agbno; 235 + xfs_extlen_t len; 236 + int holecount; 237 + int i; 238 + int error = 0; 239 + unsigned int real_freecount; 240 + uint16_t holemask; 241 + 242 + xfs_inobt_btrec_to_irec(mp, rec, &irec); 243 + 244 + if (irec.ir_count > XFS_INODES_PER_CHUNK || 245 + irec.ir_freecount > XFS_INODES_PER_CHUNK) 246 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 247 + 248 + real_freecount = irec.ir_freecount + 249 + (XFS_INODES_PER_CHUNK - irec.ir_count); 250 + if (real_freecount != xfs_scrub_iallocbt_freecount(irec.ir_free)) 251 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 252 + 253 + agino = irec.ir_startino; 254 + /* Record has to be properly aligned within the AG. */ 255 + if (!xfs_verify_agino(mp, agno, agino) || 256 + !xfs_verify_agino(mp, agno, agino + XFS_INODES_PER_CHUNK - 1)) { 257 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 258 + goto out; 259 + } 260 + 261 + /* Make sure this record is aligned to cluster and inoalignmnt size. */ 262 + agbno = XFS_AGINO_TO_AGBNO(mp, irec.ir_startino); 263 + if ((agbno & (xfs_ialloc_cluster_alignment(mp) - 1)) || 264 + (agbno & (xfs_icluster_size_fsb(mp) - 1))) 265 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 266 + 267 + /* Handle non-sparse inodes */ 268 + if (!xfs_inobt_issparse(irec.ir_holemask)) { 269 + len = XFS_B_TO_FSB(mp, 270 + XFS_INODES_PER_CHUNK * mp->m_sb.sb_inodesize); 271 + if (irec.ir_count != XFS_INODES_PER_CHUNK) 272 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 273 + 274 + if (!xfs_scrub_iallocbt_chunk(bs, &irec, agino, len)) 275 + goto out; 276 + goto check_freemask; 277 + } 278 + 279 + /* Check each chunk of a sparse inode cluster. */ 280 + holemask = irec.ir_holemask; 281 + holecount = 0; 282 + len = XFS_B_TO_FSB(mp, 283 + XFS_INODES_PER_HOLEMASK_BIT * mp->m_sb.sb_inodesize); 284 + holes = ~xfs_inobt_irec_to_allocmask(&irec); 285 + if ((holes & irec.ir_free) != holes || 286 + irec.ir_freecount > irec.ir_count) 287 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 288 + 289 + for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; i++) { 290 + if (holemask & 1) 291 + holecount += XFS_INODES_PER_HOLEMASK_BIT; 292 + else if (!xfs_scrub_iallocbt_chunk(bs, &irec, agino, len)) 293 + break; 294 + holemask >>= 1; 295 + agino += XFS_INODES_PER_HOLEMASK_BIT; 296 + } 297 + 298 + if (holecount > XFS_INODES_PER_CHUNK || 299 + holecount + irec.ir_count != XFS_INODES_PER_CHUNK) 300 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 301 + 302 + check_freemask: 303 + error = xfs_scrub_iallocbt_check_freemask(bs, &irec); 304 + if (error) 305 + goto out; 306 + 307 + out: 308 + return error; 309 + } 310 + 311 + /* Scrub the inode btrees for some AG. */ 312 + STATIC int 313 + xfs_scrub_iallocbt( 314 + struct xfs_scrub_context *sc, 315 + xfs_btnum_t which) 316 + { 317 + struct xfs_btree_cur *cur; 318 + struct xfs_owner_info oinfo; 319 + 320 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); 321 + cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur; 322 + return xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo, NULL); 323 + } 324 + 325 + int 326 + xfs_scrub_inobt( 327 + struct xfs_scrub_context *sc) 328 + { 329 + return xfs_scrub_iallocbt(sc, XFS_BTNUM_INO); 330 + } 331 + 332 + int 333 + xfs_scrub_finobt( 334 + struct xfs_scrub_context *sc) 335 + { 336 + return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO); 337 + }

+611

fs/xfs/scrub/inode.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_mount.h" 26 + #include "xfs_defer.h" 27 + #include "xfs_btree.h" 28 + #include "xfs_bit.h" 29 + #include "xfs_log_format.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_sb.h" 32 + #include "xfs_inode.h" 33 + #include "xfs_icache.h" 34 + #include "xfs_inode_buf.h" 35 + #include "xfs_inode_fork.h" 36 + #include "xfs_ialloc.h" 37 + #include "xfs_da_format.h" 38 + #include "xfs_reflink.h" 39 + #include "scrub/xfs_scrub.h" 40 + #include "scrub/scrub.h" 41 + #include "scrub/common.h" 42 + #include "scrub/trace.h" 43 + 44 + /* 45 + * Grab total control of the inode metadata. It doesn't matter here if 46 + * the file data is still changing; exclusive access to the metadata is 47 + * the goal. 48 + */ 49 + int 50 + xfs_scrub_setup_inode( 51 + struct xfs_scrub_context *sc, 52 + struct xfs_inode *ip) 53 + { 54 + struct xfs_mount *mp = sc->mp; 55 + int error; 56 + 57 + /* 58 + * Try to get the inode. If the verifiers fail, we try again 59 + * in raw mode. 60 + */ 61 + error = xfs_scrub_get_inode(sc, ip); 62 + switch (error) { 63 + case 0: 64 + break; 65 + case -EFSCORRUPTED: 66 + case -EFSBADCRC: 67 + return 0; 68 + default: 69 + return error; 70 + } 71 + 72 + /* Got the inode, lock it and we're ready to go. */ 73 + sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 74 + xfs_ilock(sc->ip, sc->ilock_flags); 75 + error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp); 76 + if (error) 77 + goto out; 78 + sc->ilock_flags |= XFS_ILOCK_EXCL; 79 + xfs_ilock(sc->ip, XFS_ILOCK_EXCL); 80 + 81 + out: 82 + /* scrub teardown will unlock and release the inode for us */ 83 + return error; 84 + } 85 + 86 + /* Inode core */ 87 + 88 + /* 89 + * Validate di_extsize hint. 90 + * 91 + * The rules are documented at xfs_ioctl_setattr_check_extsize(). 92 + * These functions must be kept in sync with each other. 93 + */ 94 + STATIC void 95 + xfs_scrub_inode_extsize( 96 + struct xfs_scrub_context *sc, 97 + struct xfs_buf *bp, 98 + struct xfs_dinode *dip, 99 + xfs_ino_t ino, 100 + uint16_t mode, 101 + uint16_t flags) 102 + { 103 + struct xfs_mount *mp = sc->mp; 104 + bool rt_flag; 105 + bool hint_flag; 106 + bool inherit_flag; 107 + uint32_t extsize; 108 + uint32_t extsize_bytes; 109 + uint32_t blocksize_bytes; 110 + 111 + rt_flag = (flags & XFS_DIFLAG_REALTIME); 112 + hint_flag = (flags & XFS_DIFLAG_EXTSIZE); 113 + inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); 114 + extsize = be32_to_cpu(dip->di_extsize); 115 + extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize); 116 + 117 + if (rt_flag) 118 + blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; 119 + else 120 + blocksize_bytes = mp->m_sb.sb_blocksize; 121 + 122 + if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode))) 123 + goto bad; 124 + 125 + if (hint_flag && !S_ISREG(mode)) 126 + goto bad; 127 + 128 + if (inherit_flag && !S_ISDIR(mode)) 129 + goto bad; 130 + 131 + if ((hint_flag || inherit_flag) && extsize == 0) 132 + goto bad; 133 + 134 + if (!(hint_flag || inherit_flag) && extsize != 0) 135 + goto bad; 136 + 137 + if (extsize_bytes % blocksize_bytes) 138 + goto bad; 139 + 140 + if (extsize > MAXEXTLEN) 141 + goto bad; 142 + 143 + if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) 144 + goto bad; 145 + 146 + return; 147 + bad: 148 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 149 + } 150 + 151 + /* 152 + * Validate di_cowextsize hint. 153 + * 154 + * The rules are documented at xfs_ioctl_setattr_check_cowextsize(). 155 + * These functions must be kept in sync with each other. 156 + */ 157 + STATIC void 158 + xfs_scrub_inode_cowextsize( 159 + struct xfs_scrub_context *sc, 160 + struct xfs_buf *bp, 161 + struct xfs_dinode *dip, 162 + xfs_ino_t ino, 163 + uint16_t mode, 164 + uint16_t flags, 165 + uint64_t flags2) 166 + { 167 + struct xfs_mount *mp = sc->mp; 168 + bool rt_flag; 169 + bool hint_flag; 170 + uint32_t extsize; 171 + uint32_t extsize_bytes; 172 + 173 + rt_flag = (flags & XFS_DIFLAG_REALTIME); 174 + hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); 175 + extsize = be32_to_cpu(dip->di_cowextsize); 176 + extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize); 177 + 178 + if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb)) 179 + goto bad; 180 + 181 + if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) 182 + goto bad; 183 + 184 + if (hint_flag && extsize == 0) 185 + goto bad; 186 + 187 + if (!hint_flag && extsize != 0) 188 + goto bad; 189 + 190 + if (hint_flag && rt_flag) 191 + goto bad; 192 + 193 + if (extsize_bytes % mp->m_sb.sb_blocksize) 194 + goto bad; 195 + 196 + if (extsize > MAXEXTLEN) 197 + goto bad; 198 + 199 + if (extsize > mp->m_sb.sb_agblocks / 2) 200 + goto bad; 201 + 202 + return; 203 + bad: 204 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 205 + } 206 + 207 + /* Make sure the di_flags make sense for the inode. */ 208 + STATIC void 209 + xfs_scrub_inode_flags( 210 + struct xfs_scrub_context *sc, 211 + struct xfs_buf *bp, 212 + struct xfs_dinode *dip, 213 + xfs_ino_t ino, 214 + uint16_t mode, 215 + uint16_t flags) 216 + { 217 + struct xfs_mount *mp = sc->mp; 218 + 219 + if (flags & ~XFS_DIFLAG_ANY) 220 + goto bad; 221 + 222 + /* rt flags require rt device */ 223 + if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) && 224 + !mp->m_rtdev_targp) 225 + goto bad; 226 + 227 + /* new rt bitmap flag only valid for rbmino */ 228 + if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino) 229 + goto bad; 230 + 231 + /* directory-only flags */ 232 + if ((flags & (XFS_DIFLAG_RTINHERIT | 233 + XFS_DIFLAG_EXTSZINHERIT | 234 + XFS_DIFLAG_PROJINHERIT | 235 + XFS_DIFLAG_NOSYMLINKS)) && 236 + !S_ISDIR(mode)) 237 + goto bad; 238 + 239 + /* file-only flags */ 240 + if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) && 241 + !S_ISREG(mode)) 242 + goto bad; 243 + 244 + /* filestreams and rt make no sense */ 245 + if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME)) 246 + goto bad; 247 + 248 + return; 249 + bad: 250 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 251 + } 252 + 253 + /* Make sure the di_flags2 make sense for the inode. */ 254 + STATIC void 255 + xfs_scrub_inode_flags2( 256 + struct xfs_scrub_context *sc, 257 + struct xfs_buf *bp, 258 + struct xfs_dinode *dip, 259 + xfs_ino_t ino, 260 + uint16_t mode, 261 + uint16_t flags, 262 + uint64_t flags2) 263 + { 264 + struct xfs_mount *mp = sc->mp; 265 + 266 + if (flags2 & ~XFS_DIFLAG2_ANY) 267 + goto bad; 268 + 269 + /* reflink flag requires reflink feature */ 270 + if ((flags2 & XFS_DIFLAG2_REFLINK) && 271 + !xfs_sb_version_hasreflink(&mp->m_sb)) 272 + goto bad; 273 + 274 + /* cowextsize flag is checked w.r.t. mode separately */ 275 + 276 + /* file/dir-only flags */ 277 + if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode))) 278 + goto bad; 279 + 280 + /* file-only flags */ 281 + if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode)) 282 + goto bad; 283 + 284 + /* realtime and reflink make no sense, currently */ 285 + if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK)) 286 + goto bad; 287 + 288 + /* dax and reflink make no sense, currently */ 289 + if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK)) 290 + goto bad; 291 + 292 + return; 293 + bad: 294 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 295 + } 296 + 297 + /* Scrub all the ondisk inode fields. */ 298 + STATIC void 299 + xfs_scrub_dinode( 300 + struct xfs_scrub_context *sc, 301 + struct xfs_buf *bp, 302 + struct xfs_dinode *dip, 303 + xfs_ino_t ino) 304 + { 305 + struct xfs_mount *mp = sc->mp; 306 + size_t fork_recs; 307 + unsigned long long isize; 308 + uint64_t flags2; 309 + uint32_t nextents; 310 + uint16_t flags; 311 + uint16_t mode; 312 + 313 + flags = be16_to_cpu(dip->di_flags); 314 + if (dip->di_version >= 3) 315 + flags2 = be64_to_cpu(dip->di_flags2); 316 + else 317 + flags2 = 0; 318 + 319 + /* di_mode */ 320 + mode = be16_to_cpu(dip->di_mode); 321 + if (mode & ~(S_IALLUGO | S_IFMT)) 322 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 323 + 324 + /* v1/v2 fields */ 325 + switch (dip->di_version) { 326 + case 1: 327 + /* 328 + * We autoconvert v1 inodes into v2 inodes on writeout, 329 + * so just mark this inode for preening. 330 + */ 331 + xfs_scrub_ino_set_preen(sc, ino, bp); 332 + break; 333 + case 2: 334 + case 3: 335 + if (dip->di_onlink != 0) 336 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 337 + 338 + if (dip->di_mode == 0 && sc->ip) 339 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 340 + 341 + if (dip->di_projid_hi != 0 && 342 + !xfs_sb_version_hasprojid32bit(&mp->m_sb)) 343 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 344 + break; 345 + default: 346 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 347 + return; 348 + } 349 + 350 + /* 351 + * di_uid/di_gid -- -1 isn't invalid, but there's no way that 352 + * userspace could have created that. 353 + */ 354 + if (dip->di_uid == cpu_to_be32(-1U) || 355 + dip->di_gid == cpu_to_be32(-1U)) 356 + xfs_scrub_ino_set_warning(sc, ino, bp); 357 + 358 + /* di_format */ 359 + switch (dip->di_format) { 360 + case XFS_DINODE_FMT_DEV: 361 + if (!S_ISCHR(mode) && !S_ISBLK(mode) && 362 + !S_ISFIFO(mode) && !S_ISSOCK(mode)) 363 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 364 + break; 365 + case XFS_DINODE_FMT_LOCAL: 366 + if (!S_ISDIR(mode) && !S_ISLNK(mode)) 367 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 368 + break; 369 + case XFS_DINODE_FMT_EXTENTS: 370 + if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) 371 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 372 + break; 373 + case XFS_DINODE_FMT_BTREE: 374 + if (!S_ISREG(mode) && !S_ISDIR(mode)) 375 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 376 + break; 377 + case XFS_DINODE_FMT_UUID: 378 + default: 379 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 380 + break; 381 + } 382 + 383 + /* 384 + * di_size. xfs_dinode_verify checks for things that screw up 385 + * the VFS such as the upper bit being set and zero-length 386 + * symlinks/directories, but we can do more here. 387 + */ 388 + isize = be64_to_cpu(dip->di_size); 389 + if (isize & (1ULL << 63)) 390 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 391 + 392 + /* Devices, fifos, and sockets must have zero size */ 393 + if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0) 394 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 395 + 396 + /* Directories can't be larger than the data section size (32G) */ 397 + if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE)) 398 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 399 + 400 + /* Symlinks can't be larger than SYMLINK_MAXLEN */ 401 + if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN)) 402 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 403 + 404 + /* 405 + * Warn if the running kernel can't handle the kinds of offsets 406 + * needed to deal with the file size. In other words, if the 407 + * pagecache can't cache all the blocks in this file due to 408 + * overly large offsets, flag the inode for admin review. 409 + */ 410 + if (isize >= mp->m_super->s_maxbytes) 411 + xfs_scrub_ino_set_warning(sc, ino, bp); 412 + 413 + /* di_nblocks */ 414 + if (flags2 & XFS_DIFLAG2_REFLINK) { 415 + ; /* nblocks can exceed dblocks */ 416 + } else if (flags & XFS_DIFLAG_REALTIME) { 417 + /* 418 + * nblocks is the sum of data extents (in the rtdev), 419 + * attr extents (in the datadev), and both forks' bmbt 420 + * blocks (in the datadev). This clumsy check is the 421 + * best we can do without cross-referencing with the 422 + * inode forks. 423 + */ 424 + if (be64_to_cpu(dip->di_nblocks) >= 425 + mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks) 426 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 427 + } else { 428 + if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks) 429 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 430 + } 431 + 432 + xfs_scrub_inode_flags(sc, bp, dip, ino, mode, flags); 433 + 434 + xfs_scrub_inode_extsize(sc, bp, dip, ino, mode, flags); 435 + 436 + /* di_nextents */ 437 + nextents = be32_to_cpu(dip->di_nextents); 438 + fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 439 + switch (dip->di_format) { 440 + case XFS_DINODE_FMT_EXTENTS: 441 + if (nextents > fork_recs) 442 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 443 + break; 444 + case XFS_DINODE_FMT_BTREE: 445 + if (nextents <= fork_recs) 446 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 447 + break; 448 + default: 449 + if (nextents != 0) 450 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 451 + break; 452 + } 453 + 454 + /* di_forkoff */ 455 + if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize) 456 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 457 + if (dip->di_anextents != 0 && dip->di_forkoff == 0) 458 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 459 + if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS) 460 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 461 + 462 + /* di_aformat */ 463 + if (dip->di_aformat != XFS_DINODE_FMT_LOCAL && 464 + dip->di_aformat != XFS_DINODE_FMT_EXTENTS && 465 + dip->di_aformat != XFS_DINODE_FMT_BTREE) 466 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 467 + 468 + /* di_anextents */ 469 + nextents = be16_to_cpu(dip->di_anextents); 470 + fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 471 + switch (dip->di_aformat) { 472 + case XFS_DINODE_FMT_EXTENTS: 473 + if (nextents > fork_recs) 474 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 475 + break; 476 + case XFS_DINODE_FMT_BTREE: 477 + if (nextents <= fork_recs) 478 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 479 + break; 480 + default: 481 + if (nextents != 0) 482 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 483 + } 484 + 485 + if (dip->di_version >= 3) { 486 + xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2); 487 + xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags, 488 + flags2); 489 + } 490 + } 491 + 492 + /* Map and read a raw inode. */ 493 + STATIC int 494 + xfs_scrub_inode_map_raw( 495 + struct xfs_scrub_context *sc, 496 + xfs_ino_t ino, 497 + struct xfs_buf **bpp, 498 + struct xfs_dinode **dipp) 499 + { 500 + struct xfs_imap imap; 501 + struct xfs_mount *mp = sc->mp; 502 + struct xfs_buf *bp = NULL; 503 + struct xfs_dinode *dip; 504 + int error; 505 + 506 + error = xfs_imap(mp, sc->tp, ino, &imap, XFS_IGET_UNTRUSTED); 507 + if (error == -EINVAL) { 508 + /* 509 + * Inode could have gotten deleted out from under us; 510 + * just forget about it. 511 + */ 512 + error = -ENOENT; 513 + goto out; 514 + } 515 + if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 516 + XFS_INO_TO_AGBNO(mp, ino), &error)) 517 + goto out; 518 + 519 + error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, 520 + imap.im_blkno, imap.im_len, XBF_UNMAPPED, &bp, 521 + NULL); 522 + if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 523 + XFS_INO_TO_AGBNO(mp, ino), &error)) 524 + goto out; 525 + 526 + /* 527 + * Is this really an inode? We disabled verifiers in the above 528 + * xfs_trans_read_buf call because the inode buffer verifier 529 + * fails on /any/ inode record in the inode cluster with a bad 530 + * magic or version number, not just the one that we're 531 + * checking. Therefore, grab the buffer unconditionally, attach 532 + * the inode verifiers by hand, and run the inode verifier only 533 + * on the one inode we want. 534 + */ 535 + bp->b_ops = &xfs_inode_buf_ops; 536 + dip = xfs_buf_offset(bp, imap.im_boffset); 537 + if (!xfs_dinode_verify(mp, ino, dip) || 538 + !xfs_dinode_good_version(mp, dip->di_version)) { 539 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 540 + goto out_buf; 541 + } 542 + 543 + /* ...and is it the one we asked for? */ 544 + if (be32_to_cpu(dip->di_gen) != sc->sm->sm_gen) { 545 + error = -ENOENT; 546 + goto out_buf; 547 + } 548 + 549 + *dipp = dip; 550 + *bpp = bp; 551 + out: 552 + return error; 553 + out_buf: 554 + xfs_trans_brelse(sc->tp, bp); 555 + return error; 556 + } 557 + 558 + /* Scrub an inode. */ 559 + int 560 + xfs_scrub_inode( 561 + struct xfs_scrub_context *sc) 562 + { 563 + struct xfs_dinode di; 564 + struct xfs_mount *mp = sc->mp; 565 + struct xfs_buf *bp = NULL; 566 + struct xfs_dinode *dip; 567 + xfs_ino_t ino; 568 + 569 + bool has_shared; 570 + int error = 0; 571 + 572 + /* Did we get the in-core inode, or are we doing this manually? */ 573 + if (sc->ip) { 574 + ino = sc->ip->i_ino; 575 + xfs_inode_to_disk(sc->ip, &di, 0); 576 + dip = &di; 577 + } else { 578 + /* Map & read inode. */ 579 + ino = sc->sm->sm_ino; 580 + error = xfs_scrub_inode_map_raw(sc, ino, &bp, &dip); 581 + if (error || !bp) 582 + goto out; 583 + } 584 + 585 + xfs_scrub_dinode(sc, bp, dip, ino); 586 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 587 + goto out; 588 + 589 + /* Now let's do the things that require a live inode. */ 590 + if (!sc->ip) 591 + goto out; 592 + 593 + /* 594 + * Does this inode have the reflink flag set but no shared extents? 595 + * Set the preening flag if this is the case. 596 + */ 597 + if (xfs_is_reflink_inode(sc->ip)) { 598 + error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 599 + &has_shared); 600 + if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 601 + XFS_INO_TO_AGBNO(mp, ino), &error)) 602 + goto out; 603 + if (!has_shared) 604 + xfs_scrub_ino_set_preen(sc, ino, bp); 605 + } 606 + 607 + out: 608 + if (bp) 609 + xfs_trans_brelse(sc->tp, bp); 610 + return error; 611 + }

+317

fs/xfs/scrub/parent.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_mount.h" 26 + #include "xfs_defer.h" 27 + #include "xfs_btree.h" 28 + #include "xfs_bit.h" 29 + #include "xfs_log_format.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_sb.h" 32 + #include "xfs_inode.h" 33 + #include "xfs_icache.h" 34 + #include "xfs_dir2.h" 35 + #include "xfs_dir2_priv.h" 36 + #include "xfs_ialloc.h" 37 + #include "scrub/xfs_scrub.h" 38 + #include "scrub/scrub.h" 39 + #include "scrub/common.h" 40 + #include "scrub/trace.h" 41 + 42 + /* Set us up to scrub parents. */ 43 + int 44 + xfs_scrub_setup_parent( 45 + struct xfs_scrub_context *sc, 46 + struct xfs_inode *ip) 47 + { 48 + return xfs_scrub_setup_inode_contents(sc, ip, 0); 49 + } 50 + 51 + /* Parent pointers */ 52 + 53 + /* Look for an entry in a parent pointing to this inode. */ 54 + 55 + struct xfs_scrub_parent_ctx { 56 + struct dir_context dc; 57 + xfs_ino_t ino; 58 + xfs_nlink_t nlink; 59 + }; 60 + 61 + /* Look for a single entry in a directory pointing to an inode. */ 62 + STATIC int 63 + xfs_scrub_parent_actor( 64 + struct dir_context *dc, 65 + const char *name, 66 + int namelen, 67 + loff_t pos, 68 + u64 ino, 69 + unsigned type) 70 + { 71 + struct xfs_scrub_parent_ctx *spc; 72 + 73 + spc = container_of(dc, struct xfs_scrub_parent_ctx, dc); 74 + if (spc->ino == ino) 75 + spc->nlink++; 76 + return 0; 77 + } 78 + 79 + /* Count the number of dentries in the parent dir that point to this inode. */ 80 + STATIC int 81 + xfs_scrub_parent_count_parent_dentries( 82 + struct xfs_scrub_context *sc, 83 + struct xfs_inode *parent, 84 + xfs_nlink_t *nlink) 85 + { 86 + struct xfs_scrub_parent_ctx spc = { 87 + .dc.actor = xfs_scrub_parent_actor, 88 + .dc.pos = 0, 89 + .ino = sc->ip->i_ino, 90 + .nlink = 0, 91 + }; 92 + size_t bufsize; 93 + loff_t oldpos; 94 + uint lock_mode; 95 + int error = 0; 96 + 97 + /* 98 + * If there are any blocks, read-ahead block 0 as we're almost 99 + * certain to have the next operation be a read there. This is 100 + * how we guarantee that the parent's extent map has been loaded, 101 + * if there is one. 102 + */ 103 + lock_mode = xfs_ilock_data_map_shared(parent); 104 + if (parent->i_d.di_nextents > 0) 105 + error = xfs_dir3_data_readahead(parent, 0, -1); 106 + xfs_iunlock(parent, lock_mode); 107 + if (error) 108 + return error; 109 + 110 + /* 111 + * Iterate the parent dir to confirm that there is 112 + * exactly one entry pointing back to the inode being 113 + * scanned. 114 + */ 115 + bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, 116 + parent->i_d.di_size); 117 + oldpos = 0; 118 + while (true) { 119 + error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize); 120 + if (error) 121 + goto out; 122 + if (oldpos == spc.dc.pos) 123 + break; 124 + oldpos = spc.dc.pos; 125 + } 126 + *nlink = spc.nlink; 127 + out: 128 + return error; 129 + } 130 + 131 + /* 132 + * Given the inode number of the alleged parent of the inode being 133 + * scrubbed, try to validate that the parent has exactly one directory 134 + * entry pointing back to the inode being scrubbed. 135 + */ 136 + STATIC int 137 + xfs_scrub_parent_validate( 138 + struct xfs_scrub_context *sc, 139 + xfs_ino_t dnum, 140 + bool *try_again) 141 + { 142 + struct xfs_mount *mp = sc->mp; 143 + struct xfs_inode *dp = NULL; 144 + xfs_nlink_t expected_nlink; 145 + xfs_nlink_t nlink; 146 + int error = 0; 147 + 148 + *try_again = false; 149 + 150 + /* '..' must not point to ourselves. */ 151 + if (sc->ip->i_ino == dnum) { 152 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 153 + goto out; 154 + } 155 + 156 + /* 157 + * If we're an unlinked directory, the parent /won't/ have a link 158 + * to us. Otherwise, it should have one link. 159 + */ 160 + expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; 161 + 162 + /* 163 + * Grab this parent inode. We release the inode before we 164 + * cancel the scrub transaction. Since we're don't know a 165 + * priori that releasing the inode won't trigger eofblocks 166 + * cleanup (which allocates what would be a nested transaction) 167 + * if the parent pointer erroneously points to a file, we 168 + * can't use DONTCACHE here because DONTCACHE inodes can trigger 169 + * immediate inactive cleanup of the inode. 170 + */ 171 + error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp); 172 + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) 173 + goto out; 174 + if (dp == sc->ip) { 175 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 176 + goto out_rele; 177 + } 178 + 179 + /* 180 + * We prefer to keep the inode locked while we lock and search 181 + * its alleged parent for a forward reference. If we can grab 182 + * the iolock, validate the pointers and we're done. We must 183 + * use nowait here to avoid an ABBA deadlock on the parent and 184 + * the child inodes. 185 + */ 186 + if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) { 187 + error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); 188 + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, 189 + &error)) 190 + goto out_unlock; 191 + if (nlink != expected_nlink) 192 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 193 + goto out_unlock; 194 + } 195 + 196 + /* 197 + * The game changes if we get here. We failed to lock the parent, 198 + * so we're going to try to verify both pointers while only holding 199 + * one lock so as to avoid deadlocking with something that's actually 200 + * trying to traverse down the directory tree. 201 + */ 202 + xfs_iunlock(sc->ip, sc->ilock_flags); 203 + sc->ilock_flags = 0; 204 + xfs_ilock(dp, XFS_IOLOCK_SHARED); 205 + 206 + /* Go looking for our dentry. */ 207 + error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); 208 + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) 209 + goto out_unlock; 210 + 211 + /* Drop the parent lock, relock this inode. */ 212 + xfs_iunlock(dp, XFS_IOLOCK_SHARED); 213 + sc->ilock_flags = XFS_IOLOCK_EXCL; 214 + xfs_ilock(sc->ip, sc->ilock_flags); 215 + 216 + /* 217 + * If we're an unlinked directory, the parent /won't/ have a link 218 + * to us. Otherwise, it should have one link. We have to re-set 219 + * it here because we dropped the lock on sc->ip. 220 + */ 221 + expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; 222 + 223 + /* Look up '..' to see if the inode changed. */ 224 + error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL); 225 + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) 226 + goto out_rele; 227 + 228 + /* Drat, parent changed. Try again! */ 229 + if (dnum != dp->i_ino) { 230 + iput(VFS_I(dp)); 231 + *try_again = true; 232 + return 0; 233 + } 234 + iput(VFS_I(dp)); 235 + 236 + /* 237 + * '..' didn't change, so check that there was only one entry 238 + * for us in the parent. 239 + */ 240 + if (nlink != expected_nlink) 241 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 242 + return error; 243 + 244 + out_unlock: 245 + xfs_iunlock(dp, XFS_IOLOCK_SHARED); 246 + out_rele: 247 + iput(VFS_I(dp)); 248 + out: 249 + return error; 250 + } 251 + 252 + /* Scrub a parent pointer. */ 253 + int 254 + xfs_scrub_parent( 255 + struct xfs_scrub_context *sc) 256 + { 257 + struct xfs_mount *mp = sc->mp; 258 + xfs_ino_t dnum; 259 + bool try_again; 260 + int tries = 0; 261 + int error = 0; 262 + 263 + /* 264 + * If we're a directory, check that the '..' link points up to 265 + * a directory that has one entry pointing to us. 266 + */ 267 + if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) 268 + return -ENOENT; 269 + 270 + /* We're not a special inode, are we? */ 271 + if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) { 272 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 273 + goto out; 274 + } 275 + 276 + /* 277 + * The VFS grabs a read or write lock via i_rwsem before it reads 278 + * or writes to a directory. If we've gotten this far we've 279 + * already obtained IOLOCK_EXCL, which (since 4.10) is the same as 280 + * getting a write lock on i_rwsem. Therefore, it is safe for us 281 + * to drop the ILOCK here in order to do directory lookups. 282 + */ 283 + sc->ilock_flags &= ~(XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL); 284 + xfs_iunlock(sc->ip, XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL); 285 + 286 + /* Look up '..' */ 287 + error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL); 288 + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) 289 + goto out; 290 + if (!xfs_verify_dir_ino(mp, dnum)) { 291 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 292 + goto out; 293 + } 294 + 295 + /* Is this the root dir? Then '..' must point to itself. */ 296 + if (sc->ip == mp->m_rootip) { 297 + if (sc->ip->i_ino != mp->m_sb.sb_rootino || 298 + sc->ip->i_ino != dnum) 299 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 300 + goto out; 301 + } 302 + 303 + do { 304 + error = xfs_scrub_parent_validate(sc, dnum, &try_again); 305 + if (error) 306 + goto out; 307 + } while (try_again && ++tries < 20); 308 + 309 + /* 310 + * We gave it our best shot but failed, so mark this scrub 311 + * incomplete. Userspace can decide if it wants to try again. 312 + */ 313 + if (try_again && tries == 20) 314 + xfs_scrub_set_incomplete(sc); 315 + out: 316 + return error; 317 + }

+304

fs/xfs/scrub/quota.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_mount.h" 26 + #include "xfs_defer.h" 27 + #include "xfs_btree.h" 28 + #include "xfs_bit.h" 29 + #include "xfs_log_format.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_sb.h" 32 + #include "xfs_inode.h" 33 + #include "xfs_inode_fork.h" 34 + #include "xfs_alloc.h" 35 + #include "xfs_bmap.h" 36 + #include "xfs_quota.h" 37 + #include "xfs_qm.h" 38 + #include "xfs_dquot.h" 39 + #include "xfs_dquot_item.h" 40 + #include "scrub/xfs_scrub.h" 41 + #include "scrub/scrub.h" 42 + #include "scrub/common.h" 43 + #include "scrub/trace.h" 44 + 45 + /* Convert a scrub type code to a DQ flag, or return 0 if error. */ 46 + static inline uint 47 + xfs_scrub_quota_to_dqtype( 48 + struct xfs_scrub_context *sc) 49 + { 50 + switch (sc->sm->sm_type) { 51 + case XFS_SCRUB_TYPE_UQUOTA: 52 + return XFS_DQ_USER; 53 + case XFS_SCRUB_TYPE_GQUOTA: 54 + return XFS_DQ_GROUP; 55 + case XFS_SCRUB_TYPE_PQUOTA: 56 + return XFS_DQ_PROJ; 57 + default: 58 + return 0; 59 + } 60 + } 61 + 62 + /* Set us up to scrub a quota. */ 63 + int 64 + xfs_scrub_setup_quota( 65 + struct xfs_scrub_context *sc, 66 + struct xfs_inode *ip) 67 + { 68 + uint dqtype; 69 + 70 + /* 71 + * If userspace gave us an AG number or inode data, they don't 72 + * know what they're doing. Get out. 73 + */ 74 + if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen) 75 + return -EINVAL; 76 + 77 + dqtype = xfs_scrub_quota_to_dqtype(sc); 78 + if (dqtype == 0) 79 + return -EINVAL; 80 + if (!xfs_this_quota_on(sc->mp, dqtype)) 81 + return -ENOENT; 82 + return 0; 83 + } 84 + 85 + /* Quotas. */ 86 + 87 + /* Scrub the fields in an individual quota item. */ 88 + STATIC void 89 + xfs_scrub_quota_item( 90 + struct xfs_scrub_context *sc, 91 + uint dqtype, 92 + struct xfs_dquot *dq, 93 + xfs_dqid_t id) 94 + { 95 + struct xfs_mount *mp = sc->mp; 96 + struct xfs_disk_dquot *d = &dq->q_core; 97 + struct xfs_quotainfo *qi = mp->m_quotainfo; 98 + xfs_fileoff_t offset; 99 + unsigned long long bsoft; 100 + unsigned long long isoft; 101 + unsigned long long rsoft; 102 + unsigned long long bhard; 103 + unsigned long long ihard; 104 + unsigned long long rhard; 105 + unsigned long long bcount; 106 + unsigned long long icount; 107 + unsigned long long rcount; 108 + xfs_ino_t fs_icount; 109 + 110 + offset = id * qi->qi_dqperchunk; 111 + 112 + /* 113 + * We fed $id and DQNEXT into the xfs_qm_dqget call, which means 114 + * that the actual dquot we got must either have the same id or 115 + * the next higher id. 116 + */ 117 + if (id > be32_to_cpu(d->d_id)) 118 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); 119 + 120 + /* Did we get the dquot type we wanted? */ 121 + if (dqtype != (d->d_flags & XFS_DQ_ALLTYPES)) 122 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); 123 + 124 + if (d->d_pad0 != cpu_to_be32(0) || d->d_pad != cpu_to_be16(0)) 125 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); 126 + 127 + /* Check the limits. */ 128 + bhard = be64_to_cpu(d->d_blk_hardlimit); 129 + ihard = be64_to_cpu(d->d_ino_hardlimit); 130 + rhard = be64_to_cpu(d->d_rtb_hardlimit); 131 + 132 + bsoft = be64_to_cpu(d->d_blk_softlimit); 133 + isoft = be64_to_cpu(d->d_ino_softlimit); 134 + rsoft = be64_to_cpu(d->d_rtb_softlimit); 135 + 136 + /* 137 + * Warn if the hard limits are larger than the fs. 138 + * Administrators can do this, though in production this seems 139 + * suspect, which is why we flag it for review. 140 + * 141 + * Complain about corruption if the soft limit is greater than 142 + * the hard limit. 143 + */ 144 + if (bhard > mp->m_sb.sb_dblocks) 145 + xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); 146 + if (bsoft > bhard) 147 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); 148 + 149 + if (ihard > mp->m_maxicount) 150 + xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); 151 + if (isoft > ihard) 152 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); 153 + 154 + if (rhard > mp->m_sb.sb_rblocks) 155 + xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); 156 + if (rsoft > rhard) 157 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); 158 + 159 + /* Check the resource counts. */ 160 + bcount = be64_to_cpu(d->d_bcount); 161 + icount = be64_to_cpu(d->d_icount); 162 + rcount = be64_to_cpu(d->d_rtbcount); 163 + fs_icount = percpu_counter_sum(&mp->m_icount); 164 + 165 + /* 166 + * Check that usage doesn't exceed physical limits. However, on 167 + * a reflink filesystem we're allowed to exceed physical space 168 + * if there are no quota limits. 169 + */ 170 + if (xfs_sb_version_hasreflink(&mp->m_sb)) { 171 + if (mp->m_sb.sb_dblocks < bcount) 172 + xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, 173 + offset); 174 + } else { 175 + if (mp->m_sb.sb_dblocks < bcount) 176 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 177 + offset); 178 + } 179 + if (icount > fs_icount || rcount > mp->m_sb.sb_rblocks) 180 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); 181 + 182 + /* 183 + * We can violate the hard limits if the admin suddenly sets a 184 + * lower limit than the actual usage. However, we flag it for 185 + * admin review. 186 + */ 187 + if (id != 0 && bhard != 0 && bcount > bhard) 188 + xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); 189 + if (id != 0 && ihard != 0 && icount > ihard) 190 + xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); 191 + if (id != 0 && rhard != 0 && rcount > rhard) 192 + xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); 193 + } 194 + 195 + /* Scrub all of a quota type's items. */ 196 + int 197 + xfs_scrub_quota( 198 + struct xfs_scrub_context *sc) 199 + { 200 + struct xfs_bmbt_irec irec = { 0 }; 201 + struct xfs_mount *mp = sc->mp; 202 + struct xfs_inode *ip; 203 + struct xfs_quotainfo *qi = mp->m_quotainfo; 204 + struct xfs_dquot *dq; 205 + xfs_fileoff_t max_dqid_off; 206 + xfs_fileoff_t off = 0; 207 + xfs_dqid_t id = 0; 208 + uint dqtype; 209 + int nimaps; 210 + int error; 211 + 212 + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) 213 + return -ENOENT; 214 + 215 + mutex_lock(&qi->qi_quotaofflock); 216 + dqtype = xfs_scrub_quota_to_dqtype(sc); 217 + if (!xfs_this_quota_on(sc->mp, dqtype)) { 218 + error = -ENOENT; 219 + goto out_unlock_quota; 220 + } 221 + 222 + /* Attach to the quota inode and set sc->ip so that reporting works. */ 223 + ip = xfs_quota_inode(sc->mp, dqtype); 224 + sc->ip = ip; 225 + 226 + /* Look for problem extents. */ 227 + xfs_ilock(ip, XFS_ILOCK_EXCL); 228 + if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) { 229 + xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL); 230 + goto out_unlock_inode; 231 + } 232 + max_dqid_off = ((xfs_dqid_t)-1) / qi->qi_dqperchunk; 233 + while (1) { 234 + if (xfs_scrub_should_terminate(sc, &error)) 235 + break; 236 + 237 + off = irec.br_startoff + irec.br_blockcount; 238 + nimaps = 1; 239 + error = xfs_bmapi_read(ip, off, -1, &irec, &nimaps, 240 + XFS_BMAPI_ENTIRE); 241 + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, off, 242 + &error)) 243 + goto out_unlock_inode; 244 + if (!nimaps) 245 + break; 246 + if (irec.br_startblock == HOLESTARTBLOCK) 247 + continue; 248 + 249 + /* Check the extent record doesn't point to crap. */ 250 + if (irec.br_startblock + irec.br_blockcount <= 251 + irec.br_startblock) 252 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 253 + irec.br_startoff); 254 + if (!xfs_verify_fsbno(mp, irec.br_startblock) || 255 + !xfs_verify_fsbno(mp, irec.br_startblock + 256 + irec.br_blockcount - 1)) 257 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 258 + irec.br_startoff); 259 + 260 + /* 261 + * Unwritten extents or blocks mapped above the highest 262 + * quota id shouldn't happen. 263 + */ 264 + if (isnullstartblock(irec.br_startblock) || 265 + irec.br_startoff > max_dqid_off || 266 + irec.br_startoff + irec.br_blockcount > max_dqid_off + 1) 267 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, off); 268 + } 269 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 270 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 271 + goto out; 272 + 273 + /* Check all the quota items. */ 274 + while (id < ((xfs_dqid_t)-1ULL)) { 275 + if (xfs_scrub_should_terminate(sc, &error)) 276 + break; 277 + 278 + error = xfs_qm_dqget(mp, NULL, id, dqtype, XFS_QMOPT_DQNEXT, 279 + &dq); 280 + if (error == -ENOENT) 281 + break; 282 + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 283 + id * qi->qi_dqperchunk, &error)) 284 + break; 285 + 286 + xfs_scrub_quota_item(sc, dqtype, dq, id); 287 + 288 + id = be32_to_cpu(dq->q_core.d_id) + 1; 289 + xfs_qm_dqput(dq); 290 + if (!id) 291 + break; 292 + } 293 + 294 + out: 295 + /* We set sc->ip earlier, so make sure we clear it now. */ 296 + sc->ip = NULL; 297 + out_unlock_quota: 298 + mutex_unlock(&qi->qi_quotaofflock); 299 + return error; 300 + 301 + out_unlock_inode: 302 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 303 + goto out; 304 + }

+99

fs/xfs/scrub/refcount.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_mount.h" 26 + #include "xfs_defer.h" 27 + #include "xfs_btree.h" 28 + #include "xfs_bit.h" 29 + #include "xfs_log_format.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_sb.h" 32 + #include "xfs_alloc.h" 33 + #include "xfs_rmap.h" 34 + #include "scrub/xfs_scrub.h" 35 + #include "scrub/scrub.h" 36 + #include "scrub/common.h" 37 + #include "scrub/btree.h" 38 + #include "scrub/trace.h" 39 + 40 + /* 41 + * Set us up to scrub reference count btrees. 42 + */ 43 + int 44 + xfs_scrub_setup_ag_refcountbt( 45 + struct xfs_scrub_context *sc, 46 + struct xfs_inode *ip) 47 + { 48 + return xfs_scrub_setup_ag_btree(sc, ip, false); 49 + } 50 + 51 + /* Reference count btree scrubber. */ 52 + 53 + /* Scrub a refcountbt record. */ 54 + STATIC int 55 + xfs_scrub_refcountbt_rec( 56 + struct xfs_scrub_btree *bs, 57 + union xfs_btree_rec *rec) 58 + { 59 + struct xfs_mount *mp = bs->cur->bc_mp; 60 + xfs_agnumber_t agno = bs->cur->bc_private.a.agno; 61 + xfs_agblock_t bno; 62 + xfs_extlen_t len; 63 + xfs_nlink_t refcount; 64 + bool has_cowflag; 65 + int error = 0; 66 + 67 + bno = be32_to_cpu(rec->refc.rc_startblock); 68 + len = be32_to_cpu(rec->refc.rc_blockcount); 69 + refcount = be32_to_cpu(rec->refc.rc_refcount); 70 + 71 + /* Only CoW records can have refcount == 1. */ 72 + has_cowflag = (bno & XFS_REFC_COW_START); 73 + if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag)) 74 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 75 + 76 + /* Check the extent. */ 77 + bno &= ~XFS_REFC_COW_START; 78 + if (bno + len <= bno || 79 + !xfs_verify_agbno(mp, agno, bno) || 80 + !xfs_verify_agbno(mp, agno, bno + len - 1)) 81 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 82 + 83 + if (refcount == 0) 84 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 85 + 86 + return error; 87 + } 88 + 89 + /* Scrub the refcount btree for some AG. */ 90 + int 91 + xfs_scrub_refcountbt( 92 + struct xfs_scrub_context *sc) 93 + { 94 + struct xfs_owner_info oinfo; 95 + 96 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC); 97 + return xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec, 98 + &oinfo, NULL); 99 + }

+138

fs/xfs/scrub/rmap.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_mount.h" 26 + #include "xfs_defer.h" 27 + #include "xfs_btree.h" 28 + #include "xfs_bit.h" 29 + #include "xfs_log_format.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_sb.h" 32 + #include "xfs_alloc.h" 33 + #include "xfs_ialloc.h" 34 + #include "xfs_rmap.h" 35 + #include "scrub/xfs_scrub.h" 36 + #include "scrub/scrub.h" 37 + #include "scrub/common.h" 38 + #include "scrub/btree.h" 39 + #include "scrub/trace.h" 40 + 41 + /* 42 + * Set us up to scrub reverse mapping btrees. 43 + */ 44 + int 45 + xfs_scrub_setup_ag_rmapbt( 46 + struct xfs_scrub_context *sc, 47 + struct xfs_inode *ip) 48 + { 49 + return xfs_scrub_setup_ag_btree(sc, ip, false); 50 + } 51 + 52 + /* Reverse-mapping scrubber. */ 53 + 54 + /* Scrub an rmapbt record. */ 55 + STATIC int 56 + xfs_scrub_rmapbt_rec( 57 + struct xfs_scrub_btree *bs, 58 + union xfs_btree_rec *rec) 59 + { 60 + struct xfs_mount *mp = bs->cur->bc_mp; 61 + struct xfs_rmap_irec irec; 62 + xfs_agnumber_t agno = bs->cur->bc_private.a.agno; 63 + bool non_inode; 64 + bool is_unwritten; 65 + bool is_bmbt; 66 + bool is_attr; 67 + int error; 68 + 69 + error = xfs_rmap_btrec_to_irec(rec, &irec); 70 + if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, 0, &error)) 71 + goto out; 72 + 73 + /* Check extent. */ 74 + if (irec.rm_startblock + irec.rm_blockcount <= irec.rm_startblock) 75 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 76 + 77 + if (irec.rm_owner == XFS_RMAP_OWN_FS) { 78 + /* 79 + * xfs_verify_agbno returns false for static fs metadata. 80 + * Since that only exists at the start of the AG, validate 81 + * that by hand. 82 + */ 83 + if (irec.rm_startblock != 0 || 84 + irec.rm_blockcount != XFS_AGFL_BLOCK(mp) + 1) 85 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 86 + } else { 87 + /* 88 + * Otherwise we must point somewhere past the static metadata 89 + * but before the end of the FS. Run the regular check. 90 + */ 91 + if (!xfs_verify_agbno(mp, agno, irec.rm_startblock) || 92 + !xfs_verify_agbno(mp, agno, irec.rm_startblock + 93 + irec.rm_blockcount - 1)) 94 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 95 + } 96 + 97 + /* Check flags. */ 98 + non_inode = XFS_RMAP_NON_INODE_OWNER(irec.rm_owner); 99 + is_bmbt = irec.rm_flags & XFS_RMAP_BMBT_BLOCK; 100 + is_attr = irec.rm_flags & XFS_RMAP_ATTR_FORK; 101 + is_unwritten = irec.rm_flags & XFS_RMAP_UNWRITTEN; 102 + 103 + if (is_bmbt && irec.rm_offset != 0) 104 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 105 + 106 + if (non_inode && irec.rm_offset != 0) 107 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 108 + 109 + if (is_unwritten && (is_bmbt || non_inode || is_attr)) 110 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 111 + 112 + if (non_inode && (is_bmbt || is_unwritten || is_attr)) 113 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 114 + 115 + if (!non_inode) { 116 + if (!xfs_verify_ino(mp, irec.rm_owner)) 117 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 118 + } else { 119 + /* Non-inode owner within the magic values? */ 120 + if (irec.rm_owner <= XFS_RMAP_OWN_MIN || 121 + irec.rm_owner > XFS_RMAP_OWN_FS) 122 + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 123 + } 124 + out: 125 + return error; 126 + } 127 + 128 + /* Scrub the rmap btree for some AG. */ 129 + int 130 + xfs_scrub_rmapbt( 131 + struct xfs_scrub_context *sc) 132 + { 133 + struct xfs_owner_info oinfo; 134 + 135 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); 136 + return xfs_scrub_btree(sc, sc->sa.rmap_cur, xfs_scrub_rmapbt_rec, 137 + &oinfo, NULL); 138 + }

+108

fs/xfs/scrub/rtbitmap.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_mount.h" 26 + #include "xfs_defer.h" 27 + #include "xfs_btree.h" 28 + #include "xfs_bit.h" 29 + #include "xfs_log_format.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_sb.h" 32 + #include "xfs_alloc.h" 33 + #include "xfs_rtalloc.h" 34 + #include "xfs_inode.h" 35 + #include "scrub/xfs_scrub.h" 36 + #include "scrub/scrub.h" 37 + #include "scrub/common.h" 38 + #include "scrub/trace.h" 39 + 40 + /* Set us up with the realtime metadata locked. */ 41 + int 42 + xfs_scrub_setup_rt( 43 + struct xfs_scrub_context *sc, 44 + struct xfs_inode *ip) 45 + { 46 + struct xfs_mount *mp = sc->mp; 47 + int error = 0; 48 + 49 + /* 50 + * If userspace gave us an AG number or inode data, they don't 51 + * know what they're doing. Get out. 52 + */ 53 + if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen) 54 + return -EINVAL; 55 + 56 + error = xfs_scrub_setup_fs(sc, ip); 57 + if (error) 58 + return error; 59 + 60 + sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP; 61 + sc->ip = mp->m_rbmip; 62 + xfs_ilock(sc->ip, sc->ilock_flags); 63 + 64 + return 0; 65 + } 66 + 67 + /* Realtime bitmap. */ 68 + 69 + /* Scrub a free extent record from the realtime bitmap. */ 70 + STATIC int 71 + xfs_scrub_rtbitmap_rec( 72 + struct xfs_trans *tp, 73 + struct xfs_rtalloc_rec *rec, 74 + void *priv) 75 + { 76 + struct xfs_scrub_context *sc = priv; 77 + 78 + if (rec->ar_startblock + rec->ar_blockcount <= rec->ar_startblock || 79 + !xfs_verify_rtbno(sc->mp, rec->ar_startblock) || 80 + !xfs_verify_rtbno(sc->mp, rec->ar_startblock + 81 + rec->ar_blockcount - 1)) 82 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 83 + return 0; 84 + } 85 + 86 + /* Scrub the realtime bitmap. */ 87 + int 88 + xfs_scrub_rtbitmap( 89 + struct xfs_scrub_context *sc) 90 + { 91 + int error; 92 + 93 + error = xfs_rtalloc_query_all(sc->tp, xfs_scrub_rtbitmap_rec, sc); 94 + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) 95 + goto out; 96 + 97 + out: 98 + return error; 99 + } 100 + 101 + /* Scrub the realtime summary. */ 102 + int 103 + xfs_scrub_rtsummary( 104 + struct xfs_scrub_context *sc) 105 + { 106 + /* XXX: implement this some day */ 107 + return -ENOENT; 108 + }

+392

fs/xfs/scrub/scrub.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_mount.h" 26 + #include "xfs_defer.h" 27 + #include "xfs_btree.h" 28 + #include "xfs_bit.h" 29 + #include "xfs_log_format.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_sb.h" 32 + #include "xfs_inode.h" 33 + #include "xfs_icache.h" 34 + #include "xfs_itable.h" 35 + #include "xfs_alloc.h" 36 + #include "xfs_alloc_btree.h" 37 + #include "xfs_bmap.h" 38 + #include "xfs_bmap_btree.h" 39 + #include "xfs_ialloc.h" 40 + #include "xfs_ialloc_btree.h" 41 + #include "xfs_refcount.h" 42 + #include "xfs_refcount_btree.h" 43 + #include "xfs_rmap.h" 44 + #include "xfs_rmap_btree.h" 45 + #include "scrub/xfs_scrub.h" 46 + #include "scrub/scrub.h" 47 + #include "scrub/common.h" 48 + #include "scrub/trace.h" 49 + #include "scrub/scrub.h" 50 + #include "scrub/btree.h" 51 + 52 + /* 53 + * Online Scrub and Repair 54 + * 55 + * Traditionally, XFS (the kernel driver) did not know how to check or 56 + * repair on-disk data structures. That task was left to the xfs_check 57 + * and xfs_repair tools, both of which require taking the filesystem 58 + * offline for a thorough but time consuming examination. Online 59 + * scrub & repair, on the other hand, enables us to check the metadata 60 + * for obvious errors while carefully stepping around the filesystem's 61 + * ongoing operations, locking rules, etc. 62 + * 63 + * Given that most XFS metadata consist of records stored in a btree, 64 + * most of the checking functions iterate the btree blocks themselves 65 + * looking for irregularities. When a record block is encountered, each 66 + * record can be checked for obviously bad values. Record values can 67 + * also be cross-referenced against other btrees to look for potential 68 + * misunderstandings between pieces of metadata. 69 + * 70 + * It is expected that the checkers responsible for per-AG metadata 71 + * structures will lock the AG headers (AGI, AGF, AGFL), iterate the 72 + * metadata structure, and perform any relevant cross-referencing before 73 + * unlocking the AG and returning the results to userspace. These 74 + * scrubbers must not keep an AG locked for too long to avoid tying up 75 + * the block and inode allocators. 76 + * 77 + * Block maps and b-trees rooted in an inode present a special challenge 78 + * because they can involve extents from any AG. The general scrubber 79 + * structure of lock -> check -> xref -> unlock still holds, but AG 80 + * locking order rules /must/ be obeyed to avoid deadlocks. The 81 + * ordering rule, of course, is that we must lock in increasing AG 82 + * order. Helper functions are provided to track which AG headers we've 83 + * already locked. If we detect an imminent locking order violation, we 84 + * can signal a potential deadlock, in which case the scrubber can jump 85 + * out to the top level, lock all the AGs in order, and retry the scrub. 86 + * 87 + * For file data (directories, extended attributes, symlinks) scrub, we 88 + * can simply lock the inode and walk the data. For btree data 89 + * (directories and attributes) we follow the same btree-scrubbing 90 + * strategy outlined previously to check the records. 91 + * 92 + * We use a bit of trickery with transactions to avoid buffer deadlocks 93 + * if there is a cycle in the metadata. The basic problem is that 94 + * travelling down a btree involves locking the current buffer at each 95 + * tree level. If a pointer should somehow point back to a buffer that 96 + * we've already examined, we will deadlock due to the second buffer 97 + * locking attempt. Note however that grabbing a buffer in transaction 98 + * context links the locked buffer to the transaction. If we try to 99 + * re-grab the buffer in the context of the same transaction, we avoid 100 + * the second lock attempt and continue. Between the verifier and the 101 + * scrubber, something will notice that something is amiss and report 102 + * the corruption. Therefore, each scrubber will allocate an empty 103 + * transaction, attach buffers to it, and cancel the transaction at the 104 + * end of the scrub run. Cancelling a non-dirty transaction simply 105 + * unlocks the buffers. 106 + * 107 + * There are four pieces of data that scrub can communicate to 108 + * userspace. The first is the error code (errno), which can be used to 109 + * communicate operational errors in performing the scrub. There are 110 + * also three flags that can be set in the scrub context. If the data 111 + * structure itself is corrupt, the CORRUPT flag will be set. If 112 + * the metadata is correct but otherwise suboptimal, the PREEN flag 113 + * will be set. 114 + */ 115 + 116 + /* 117 + * Scrub probe -- userspace uses this to probe if we're willing to scrub 118 + * or repair a given mountpoint. This will be used by xfs_scrub to 119 + * probe the kernel's abilities to scrub (and repair) the metadata. We 120 + * do this by validating the ioctl inputs from userspace, preparing the 121 + * filesystem for a scrub (or a repair) operation, and immediately 122 + * returning to userspace. Userspace can use the returned errno and 123 + * structure state to decide (in broad terms) if scrub/repair are 124 + * supported by the running kernel. 125 + */ 126 + static int 127 + xfs_scrub_probe( 128 + struct xfs_scrub_context *sc) 129 + { 130 + int error = 0; 131 + 132 + if (sc->sm->sm_ino || sc->sm->sm_agno) 133 + return -EINVAL; 134 + if (xfs_scrub_should_terminate(sc, &error)) 135 + return error; 136 + 137 + return 0; 138 + } 139 + 140 + /* Scrub setup and teardown */ 141 + 142 + /* Free all the resources and finish the transactions. */ 143 + STATIC int 144 + xfs_scrub_teardown( 145 + struct xfs_scrub_context *sc, 146 + struct xfs_inode *ip_in, 147 + int error) 148 + { 149 + xfs_scrub_ag_free(sc, &sc->sa); 150 + if (sc->tp) { 151 + xfs_trans_cancel(sc->tp); 152 + sc->tp = NULL; 153 + } 154 + if (sc->ip) { 155 + xfs_iunlock(sc->ip, sc->ilock_flags); 156 + if (sc->ip != ip_in && 157 + !xfs_internal_inum(sc->mp, sc->ip->i_ino)) 158 + iput(VFS_I(sc->ip)); 159 + sc->ip = NULL; 160 + } 161 + if (sc->buf) { 162 + kmem_free(sc->buf); 163 + sc->buf = NULL; 164 + } 165 + return error; 166 + } 167 + 168 + /* Scrubbing dispatch. */ 169 + 170 + static const struct xfs_scrub_meta_ops meta_scrub_ops[] = { 171 + { /* ioctl presence test */ 172 + .setup = xfs_scrub_setup_fs, 173 + .scrub = xfs_scrub_probe, 174 + }, 175 + { /* superblock */ 176 + .setup = xfs_scrub_setup_ag_header, 177 + .scrub = xfs_scrub_superblock, 178 + }, 179 + { /* agf */ 180 + .setup = xfs_scrub_setup_ag_header, 181 + .scrub = xfs_scrub_agf, 182 + }, 183 + { /* agfl */ 184 + .setup = xfs_scrub_setup_ag_header, 185 + .scrub = xfs_scrub_agfl, 186 + }, 187 + { /* agi */ 188 + .setup = xfs_scrub_setup_ag_header, 189 + .scrub = xfs_scrub_agi, 190 + }, 191 + { /* bnobt */ 192 + .setup = xfs_scrub_setup_ag_allocbt, 193 + .scrub = xfs_scrub_bnobt, 194 + }, 195 + { /* cntbt */ 196 + .setup = xfs_scrub_setup_ag_allocbt, 197 + .scrub = xfs_scrub_cntbt, 198 + }, 199 + { /* inobt */ 200 + .setup = xfs_scrub_setup_ag_iallocbt, 201 + .scrub = xfs_scrub_inobt, 202 + }, 203 + { /* finobt */ 204 + .setup = xfs_scrub_setup_ag_iallocbt, 205 + .scrub = xfs_scrub_finobt, 206 + .has = xfs_sb_version_hasfinobt, 207 + }, 208 + { /* rmapbt */ 209 + .setup = xfs_scrub_setup_ag_rmapbt, 210 + .scrub = xfs_scrub_rmapbt, 211 + .has = xfs_sb_version_hasrmapbt, 212 + }, 213 + { /* refcountbt */ 214 + .setup = xfs_scrub_setup_ag_refcountbt, 215 + .scrub = xfs_scrub_refcountbt, 216 + .has = xfs_sb_version_hasreflink, 217 + }, 218 + { /* inode record */ 219 + .setup = xfs_scrub_setup_inode, 220 + .scrub = xfs_scrub_inode, 221 + }, 222 + { /* inode data fork */ 223 + .setup = xfs_scrub_setup_inode_bmap, 224 + .scrub = xfs_scrub_bmap_data, 225 + }, 226 + { /* inode attr fork */ 227 + .setup = xfs_scrub_setup_inode_bmap, 228 + .scrub = xfs_scrub_bmap_attr, 229 + }, 230 + { /* inode CoW fork */ 231 + .setup = xfs_scrub_setup_inode_bmap, 232 + .scrub = xfs_scrub_bmap_cow, 233 + }, 234 + { /* directory */ 235 + .setup = xfs_scrub_setup_directory, 236 + .scrub = xfs_scrub_directory, 237 + }, 238 + { /* extended attributes */ 239 + .setup = xfs_scrub_setup_xattr, 240 + .scrub = xfs_scrub_xattr, 241 + }, 242 + { /* symbolic link */ 243 + .setup = xfs_scrub_setup_symlink, 244 + .scrub = xfs_scrub_symlink, 245 + }, 246 + { /* parent pointers */ 247 + .setup = xfs_scrub_setup_parent, 248 + .scrub = xfs_scrub_parent, 249 + }, 250 + { /* realtime bitmap */ 251 + .setup = xfs_scrub_setup_rt, 252 + .scrub = xfs_scrub_rtbitmap, 253 + .has = xfs_sb_version_hasrealtime, 254 + }, 255 + { /* realtime summary */ 256 + .setup = xfs_scrub_setup_rt, 257 + .scrub = xfs_scrub_rtsummary, 258 + .has = xfs_sb_version_hasrealtime, 259 + }, 260 + { /* user quota */ 261 + .setup = xfs_scrub_setup_quota, 262 + .scrub = xfs_scrub_quota, 263 + }, 264 + { /* group quota */ 265 + .setup = xfs_scrub_setup_quota, 266 + .scrub = xfs_scrub_quota, 267 + }, 268 + { /* project quota */ 269 + .setup = xfs_scrub_setup_quota, 270 + .scrub = xfs_scrub_quota, 271 + }, 272 + }; 273 + 274 + /* This isn't a stable feature, warn once per day. */ 275 + static inline void 276 + xfs_scrub_experimental_warning( 277 + struct xfs_mount *mp) 278 + { 279 + static struct ratelimit_state scrub_warning = RATELIMIT_STATE_INIT( 280 + "xfs_scrub_warning", 86400 * HZ, 1); 281 + ratelimit_set_flags(&scrub_warning, RATELIMIT_MSG_ON_RELEASE); 282 + 283 + if (__ratelimit(&scrub_warning)) 284 + xfs_alert(mp, 285 + "EXPERIMENTAL online scrub feature in use. Use at your own risk!"); 286 + } 287 + 288 + /* Dispatch metadata scrubbing. */ 289 + int 290 + xfs_scrub_metadata( 291 + struct xfs_inode *ip, 292 + struct xfs_scrub_metadata *sm) 293 + { 294 + struct xfs_scrub_context sc; 295 + struct xfs_mount *mp = ip->i_mount; 296 + const struct xfs_scrub_meta_ops *ops; 297 + bool try_harder = false; 298 + int error = 0; 299 + 300 + trace_xfs_scrub_start(ip, sm, error); 301 + 302 + /* Forbidden if we are shut down or mounted norecovery. */ 303 + error = -ESHUTDOWN; 304 + if (XFS_FORCED_SHUTDOWN(mp)) 305 + goto out; 306 + error = -ENOTRECOVERABLE; 307 + if (mp->m_flags & XFS_MOUNT_NORECOVERY) 308 + goto out; 309 + 310 + /* Check our inputs. */ 311 + error = -EINVAL; 312 + sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; 313 + if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN) 314 + goto out; 315 + if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved))) 316 + goto out; 317 + 318 + /* Do we know about this type of metadata? */ 319 + error = -ENOENT; 320 + if (sm->sm_type >= XFS_SCRUB_TYPE_NR) 321 + goto out; 322 + ops = &meta_scrub_ops[sm->sm_type]; 323 + if (ops->scrub == NULL) 324 + goto out; 325 + 326 + /* 327 + * We won't scrub any filesystem that doesn't have the ability 328 + * to record unwritten extents. The option was made default in 329 + * 2003, removed from mkfs in 2007, and cannot be disabled in 330 + * v5, so if we find a filesystem without this flag it's either 331 + * really old or totally unsupported. Avoid it either way. 332 + * We also don't support v1-v3 filesystems, which aren't 333 + * mountable. 334 + */ 335 + error = -EOPNOTSUPP; 336 + if (!xfs_sb_version_hasextflgbit(&mp->m_sb)) 337 + goto out; 338 + 339 + /* Does this fs even support this type of metadata? */ 340 + error = -ENOENT; 341 + if (ops->has && !ops->has(&mp->m_sb)) 342 + goto out; 343 + 344 + /* We don't know how to repair anything yet. */ 345 + error = -EOPNOTSUPP; 346 + if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) 347 + goto out; 348 + 349 + xfs_scrub_experimental_warning(mp); 350 + 351 + retry_op: 352 + /* Set up for the operation. */ 353 + memset(&sc, 0, sizeof(sc)); 354 + sc.mp = ip->i_mount; 355 + sc.sm = sm; 356 + sc.ops = ops; 357 + sc.try_harder = try_harder; 358 + sc.sa.agno = NULLAGNUMBER; 359 + error = sc.ops->setup(&sc, ip); 360 + if (error) 361 + goto out_teardown; 362 + 363 + /* Scrub for errors. */ 364 + error = sc.ops->scrub(&sc); 365 + if (!try_harder && error == -EDEADLOCK) { 366 + /* 367 + * Scrubbers return -EDEADLOCK to mean 'try harder'. 368 + * Tear down everything we hold, then set up again with 369 + * preparation for worst-case scenarios. 370 + */ 371 + error = xfs_scrub_teardown(&sc, ip, 0); 372 + if (error) 373 + goto out; 374 + try_harder = true; 375 + goto retry_op; 376 + } else if (error) 377 + goto out_teardown; 378 + 379 + if (sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | 380 + XFS_SCRUB_OFLAG_XCORRUPT)) 381 + xfs_alert_ratelimited(mp, "Corruption detected during scrub."); 382 + 383 + out_teardown: 384 + error = xfs_scrub_teardown(&sc, ip, error); 385 + out: 386 + trace_xfs_scrub_done(ip, sm, error); 387 + if (error == -EFSCORRUPTED || error == -EFSBADCRC) { 388 + sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 389 + error = 0; 390 + } 391 + return error; 392 + }

+115

fs/xfs/scrub/scrub.h

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #ifndef __XFS_SCRUB_SCRUB_H__ 21 + #define __XFS_SCRUB_SCRUB_H__ 22 + 23 + struct xfs_scrub_context; 24 + 25 + struct xfs_scrub_meta_ops { 26 + /* Acquire whatever resources are needed for the operation. */ 27 + int (*setup)(struct xfs_scrub_context *, 28 + struct xfs_inode *); 29 + 30 + /* Examine metadata for errors. */ 31 + int (*scrub)(struct xfs_scrub_context *); 32 + 33 + /* Decide if we even have this piece of metadata. */ 34 + bool (*has)(struct xfs_sb *); 35 + }; 36 + 37 + /* Buffer pointers and btree cursors for an entire AG. */ 38 + struct xfs_scrub_ag { 39 + xfs_agnumber_t agno; 40 + 41 + /* AG btree roots */ 42 + struct xfs_buf *agf_bp; 43 + struct xfs_buf *agfl_bp; 44 + struct xfs_buf *agi_bp; 45 + 46 + /* AG btrees */ 47 + struct xfs_btree_cur *bno_cur; 48 + struct xfs_btree_cur *cnt_cur; 49 + struct xfs_btree_cur *ino_cur; 50 + struct xfs_btree_cur *fino_cur; 51 + struct xfs_btree_cur *rmap_cur; 52 + struct xfs_btree_cur *refc_cur; 53 + }; 54 + 55 + struct xfs_scrub_context { 56 + /* General scrub state. */ 57 + struct xfs_mount *mp; 58 + struct xfs_scrub_metadata *sm; 59 + const struct xfs_scrub_meta_ops *ops; 60 + struct xfs_trans *tp; 61 + struct xfs_inode *ip; 62 + void *buf; 63 + uint ilock_flags; 64 + bool try_harder; 65 + 66 + /* State tracking for single-AG operations. */ 67 + struct xfs_scrub_ag sa; 68 + }; 69 + 70 + /* Metadata scrubbers */ 71 + int xfs_scrub_tester(struct xfs_scrub_context *sc); 72 + int xfs_scrub_superblock(struct xfs_scrub_context *sc); 73 + int xfs_scrub_agf(struct xfs_scrub_context *sc); 74 + int xfs_scrub_agfl(struct xfs_scrub_context *sc); 75 + int xfs_scrub_agi(struct xfs_scrub_context *sc); 76 + int xfs_scrub_bnobt(struct xfs_scrub_context *sc); 77 + int xfs_scrub_cntbt(struct xfs_scrub_context *sc); 78 + int xfs_scrub_inobt(struct xfs_scrub_context *sc); 79 + int xfs_scrub_finobt(struct xfs_scrub_context *sc); 80 + int xfs_scrub_rmapbt(struct xfs_scrub_context *sc); 81 + int xfs_scrub_refcountbt(struct xfs_scrub_context *sc); 82 + int xfs_scrub_inode(struct xfs_scrub_context *sc); 83 + int xfs_scrub_bmap_data(struct xfs_scrub_context *sc); 84 + int xfs_scrub_bmap_attr(struct xfs_scrub_context *sc); 85 + int xfs_scrub_bmap_cow(struct xfs_scrub_context *sc); 86 + int xfs_scrub_directory(struct xfs_scrub_context *sc); 87 + int xfs_scrub_xattr(struct xfs_scrub_context *sc); 88 + int xfs_scrub_symlink(struct xfs_scrub_context *sc); 89 + int xfs_scrub_parent(struct xfs_scrub_context *sc); 90 + #ifdef CONFIG_XFS_RT 91 + int xfs_scrub_rtbitmap(struct xfs_scrub_context *sc); 92 + int xfs_scrub_rtsummary(struct xfs_scrub_context *sc); 93 + #else 94 + static inline int 95 + xfs_scrub_rtbitmap(struct xfs_scrub_context *sc) 96 + { 97 + return -ENOENT; 98 + } 99 + static inline int 100 + xfs_scrub_rtsummary(struct xfs_scrub_context *sc) 101 + { 102 + return -ENOENT; 103 + } 104 + #endif 105 + #ifdef CONFIG_XFS_QUOTA 106 + int xfs_scrub_quota(struct xfs_scrub_context *sc); 107 + #else 108 + static inline int 109 + xfs_scrub_quota(struct xfs_scrub_context *sc) 110 + { 111 + return -ENOENT; 112 + } 113 + #endif 114 + 115 + #endif /* __XFS_SCRUB_SCRUB_H__ */

+92

fs/xfs/scrub/symlink.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_mount.h" 26 + #include "xfs_defer.h" 27 + #include "xfs_btree.h" 28 + #include "xfs_bit.h" 29 + #include "xfs_log_format.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_sb.h" 32 + #include "xfs_inode.h" 33 + #include "xfs_inode_fork.h" 34 + #include "xfs_symlink.h" 35 + #include "scrub/xfs_scrub.h" 36 + #include "scrub/scrub.h" 37 + #include "scrub/common.h" 38 + #include "scrub/trace.h" 39 + 40 + /* Set us up to scrub a symbolic link. */ 41 + int 42 + xfs_scrub_setup_symlink( 43 + struct xfs_scrub_context *sc, 44 + struct xfs_inode *ip) 45 + { 46 + /* Allocate the buffer without the inode lock held. */ 47 + sc->buf = kmem_zalloc_large(XFS_SYMLINK_MAXLEN + 1, KM_SLEEP); 48 + if (!sc->buf) 49 + return -ENOMEM; 50 + 51 + return xfs_scrub_setup_inode_contents(sc, ip, 0); 52 + } 53 + 54 + /* Symbolic links. */ 55 + 56 + int 57 + xfs_scrub_symlink( 58 + struct xfs_scrub_context *sc) 59 + { 60 + struct xfs_inode *ip = sc->ip; 61 + struct xfs_ifork *ifp; 62 + loff_t len; 63 + int error = 0; 64 + 65 + if (!S_ISLNK(VFS_I(ip)->i_mode)) 66 + return -ENOENT; 67 + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 68 + len = ip->i_d.di_size; 69 + 70 + /* Plausible size? */ 71 + if (len > XFS_SYMLINK_MAXLEN || len <= 0) { 72 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 73 + goto out; 74 + } 75 + 76 + /* Inline symlink? */ 77 + if (ifp->if_flags & XFS_IFINLINE) { 78 + if (len > XFS_IFORK_DSIZE(ip) || 79 + len > strnlen(ifp->if_u1.if_data, XFS_IFORK_DSIZE(ip))) 80 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 81 + goto out; 82 + } 83 + 84 + /* Remote symlink; must read the contents. */ 85 + error = xfs_readlink_bmap_ilocked(sc->ip, sc->buf); 86 + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) 87 + goto out; 88 + if (strnlen(sc->buf, XFS_SYMLINK_MAXLEN) < len) 89 + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 90 + out: 91 + return error; 92 + }

+59

fs/xfs/scrub/trace.c

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_log_format.h" 25 + #include "xfs_trans_resv.h" 26 + #include "xfs_mount.h" 27 + #include "xfs_defer.h" 28 + #include "xfs_da_format.h" 29 + #include "xfs_defer.h" 30 + #include "xfs_inode.h" 31 + #include "xfs_btree.h" 32 + #include "xfs_trans.h" 33 + #include "xfs_bit.h" 34 + #include "scrub/xfs_scrub.h" 35 + #include "scrub/scrub.h" 36 + #include "scrub/common.h" 37 + 38 + /* Figure out which block the btree cursor was pointing to. */ 39 + static inline xfs_fsblock_t 40 + xfs_scrub_btree_cur_fsbno( 41 + struct xfs_btree_cur *cur, 42 + int level) 43 + { 44 + if (level < cur->bc_nlevels && cur->bc_bufs[level]) 45 + return XFS_DADDR_TO_FSB(cur->bc_mp, cur->bc_bufs[level]->b_bn); 46 + else if (level == cur->bc_nlevels - 1 && 47 + cur->bc_flags & XFS_BTREE_LONG_PTRS) 48 + return XFS_INO_TO_FSB(cur->bc_mp, cur->bc_private.b.ip->i_ino); 49 + else if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS)) 50 + return XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno, 0); 51 + return NULLFSBLOCK; 52 + } 53 + 54 + /* 55 + * We include this last to have the helpers above available for the trace 56 + * event implementations. 57 + */ 58 + #define CREATE_TRACE_POINTS 59 + #include "scrub/trace.h"

+499

fs/xfs/scrub/trace.h

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #undef TRACE_SYSTEM 21 + #define TRACE_SYSTEM xfs_scrub 22 + 23 + #if !defined(_TRACE_XFS_SCRUB_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) 24 + #define _TRACE_XFS_SCRUB_TRACE_H 25 + 26 + #include <linux/tracepoint.h> 27 + #include "xfs_bit.h" 28 + 29 + DECLARE_EVENT_CLASS(xfs_scrub_class, 30 + TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm, 31 + int error), 32 + TP_ARGS(ip, sm, error), 33 + TP_STRUCT__entry( 34 + __field(dev_t, dev) 35 + __field(xfs_ino_t, ino) 36 + __field(unsigned int, type) 37 + __field(xfs_agnumber_t, agno) 38 + __field(xfs_ino_t, inum) 39 + __field(unsigned int, gen) 40 + __field(unsigned int, flags) 41 + __field(int, error) 42 + ), 43 + TP_fast_assign( 44 + __entry->dev = ip->i_mount->m_super->s_dev; 45 + __entry->ino = ip->i_ino; 46 + __entry->type = sm->sm_type; 47 + __entry->agno = sm->sm_agno; 48 + __entry->inum = sm->sm_ino; 49 + __entry->gen = sm->sm_gen; 50 + __entry->flags = sm->sm_flags; 51 + __entry->error = error; 52 + ), 53 + TP_printk("dev %d:%d ino %llu type %u agno %u inum %llu gen %u flags 0x%x error %d", 54 + MAJOR(__entry->dev), MINOR(__entry->dev), 55 + __entry->ino, 56 + __entry->type, 57 + __entry->agno, 58 + __entry->inum, 59 + __entry->gen, 60 + __entry->flags, 61 + __entry->error) 62 + ) 63 + #define DEFINE_SCRUB_EVENT(name) \ 64 + DEFINE_EVENT(xfs_scrub_class, name, \ 65 + TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm, \ 66 + int error), \ 67 + TP_ARGS(ip, sm, error)) 68 + 69 + DEFINE_SCRUB_EVENT(xfs_scrub_start); 70 + DEFINE_SCRUB_EVENT(xfs_scrub_done); 71 + DEFINE_SCRUB_EVENT(xfs_scrub_deadlock_retry); 72 + 73 + TRACE_EVENT(xfs_scrub_op_error, 74 + TP_PROTO(struct xfs_scrub_context *sc, xfs_agnumber_t agno, 75 + xfs_agblock_t bno, int error, void *ret_ip), 76 + TP_ARGS(sc, agno, bno, error, ret_ip), 77 + TP_STRUCT__entry( 78 + __field(dev_t, dev) 79 + __field(unsigned int, type) 80 + __field(xfs_agnumber_t, agno) 81 + __field(xfs_agblock_t, bno) 82 + __field(int, error) 83 + __field(void *, ret_ip) 84 + ), 85 + TP_fast_assign( 86 + __entry->dev = sc->mp->m_super->s_dev; 87 + __entry->type = sc->sm->sm_type; 88 + __entry->agno = agno; 89 + __entry->bno = bno; 90 + __entry->error = error; 91 + __entry->ret_ip = ret_ip; 92 + ), 93 + TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pF", 94 + MAJOR(__entry->dev), MINOR(__entry->dev), 95 + __entry->type, 96 + __entry->agno, 97 + __entry->bno, 98 + __entry->error, 99 + __entry->ret_ip) 100 + ); 101 + 102 + TRACE_EVENT(xfs_scrub_file_op_error, 103 + TP_PROTO(struct xfs_scrub_context *sc, int whichfork, 104 + xfs_fileoff_t offset, int error, void *ret_ip), 105 + TP_ARGS(sc, whichfork, offset, error, ret_ip), 106 + TP_STRUCT__entry( 107 + __field(dev_t, dev) 108 + __field(xfs_ino_t, ino) 109 + __field(int, whichfork) 110 + __field(unsigned int, type) 111 + __field(xfs_fileoff_t, offset) 112 + __field(int, error) 113 + __field(void *, ret_ip) 114 + ), 115 + TP_fast_assign( 116 + __entry->dev = sc->ip->i_mount->m_super->s_dev; 117 + __entry->ino = sc->ip->i_ino; 118 + __entry->whichfork = whichfork; 119 + __entry->type = sc->sm->sm_type; 120 + __entry->offset = offset; 121 + __entry->error = error; 122 + __entry->ret_ip = ret_ip; 123 + ), 124 + TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu error %d ret_ip %pF", 125 + MAJOR(__entry->dev), MINOR(__entry->dev), 126 + __entry->ino, 127 + __entry->whichfork, 128 + __entry->type, 129 + __entry->offset, 130 + __entry->error, 131 + __entry->ret_ip) 132 + ); 133 + 134 + DECLARE_EVENT_CLASS(xfs_scrub_block_error_class, 135 + TP_PROTO(struct xfs_scrub_context *sc, xfs_daddr_t daddr, void *ret_ip), 136 + TP_ARGS(sc, daddr, ret_ip), 137 + TP_STRUCT__entry( 138 + __field(dev_t, dev) 139 + __field(unsigned int, type) 140 + __field(xfs_agnumber_t, agno) 141 + __field(xfs_agblock_t, bno) 142 + __field(void *, ret_ip) 143 + ), 144 + TP_fast_assign( 145 + xfs_fsblock_t fsbno; 146 + xfs_agnumber_t agno; 147 + xfs_agblock_t bno; 148 + 149 + fsbno = XFS_DADDR_TO_FSB(sc->mp, daddr); 150 + agno = XFS_FSB_TO_AGNO(sc->mp, fsbno); 151 + bno = XFS_FSB_TO_AGBNO(sc->mp, fsbno); 152 + 153 + __entry->dev = sc->mp->m_super->s_dev; 154 + __entry->type = sc->sm->sm_type; 155 + __entry->agno = agno; 156 + __entry->bno = bno; 157 + __entry->ret_ip = ret_ip; 158 + ), 159 + TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pF", 160 + MAJOR(__entry->dev), MINOR(__entry->dev), 161 + __entry->type, 162 + __entry->agno, 163 + __entry->bno, 164 + __entry->ret_ip) 165 + ) 166 + 167 + #define DEFINE_SCRUB_BLOCK_ERROR_EVENT(name) \ 168 + DEFINE_EVENT(xfs_scrub_block_error_class, name, \ 169 + TP_PROTO(struct xfs_scrub_context *sc, xfs_daddr_t daddr, \ 170 + void *ret_ip), \ 171 + TP_ARGS(sc, daddr, ret_ip)) 172 + 173 + DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_error); 174 + DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_preen); 175 + 176 + DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class, 177 + TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, xfs_daddr_t daddr, 178 + void *ret_ip), 179 + TP_ARGS(sc, ino, daddr, ret_ip), 180 + TP_STRUCT__entry( 181 + __field(dev_t, dev) 182 + __field(xfs_ino_t, ino) 183 + __field(unsigned int, type) 184 + __field(xfs_agnumber_t, agno) 185 + __field(xfs_agblock_t, bno) 186 + __field(void *, ret_ip) 187 + ), 188 + TP_fast_assign( 189 + xfs_fsblock_t fsbno; 190 + xfs_agnumber_t agno; 191 + xfs_agblock_t bno; 192 + 193 + if (daddr) { 194 + fsbno = XFS_DADDR_TO_FSB(sc->mp, daddr); 195 + agno = XFS_FSB_TO_AGNO(sc->mp, fsbno); 196 + bno = XFS_FSB_TO_AGBNO(sc->mp, fsbno); 197 + } else { 198 + agno = XFS_INO_TO_AGNO(sc->mp, ino); 199 + bno = XFS_AGINO_TO_AGBNO(sc->mp, 200 + XFS_INO_TO_AGINO(sc->mp, ino)); 201 + } 202 + 203 + __entry->dev = sc->mp->m_super->s_dev; 204 + __entry->ino = ino; 205 + __entry->type = sc->sm->sm_type; 206 + __entry->agno = agno; 207 + __entry->bno = bno; 208 + __entry->ret_ip = ret_ip; 209 + ), 210 + TP_printk("dev %d:%d ino %llu type %u agno %u agbno %u ret_ip %pF", 211 + MAJOR(__entry->dev), MINOR(__entry->dev), 212 + __entry->ino, 213 + __entry->type, 214 + __entry->agno, 215 + __entry->bno, 216 + __entry->ret_ip) 217 + ) 218 + 219 + #define DEFINE_SCRUB_INO_ERROR_EVENT(name) \ 220 + DEFINE_EVENT(xfs_scrub_ino_error_class, name, \ 221 + TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, \ 222 + xfs_daddr_t daddr, void *ret_ip), \ 223 + TP_ARGS(sc, ino, daddr, ret_ip)) 224 + 225 + DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_error); 226 + DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_preen); 227 + DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_warning); 228 + 229 + DECLARE_EVENT_CLASS(xfs_scrub_fblock_error_class, 230 + TP_PROTO(struct xfs_scrub_context *sc, int whichfork, 231 + xfs_fileoff_t offset, void *ret_ip), 232 + TP_ARGS(sc, whichfork, offset, ret_ip), 233 + TP_STRUCT__entry( 234 + __field(dev_t, dev) 235 + __field(xfs_ino_t, ino) 236 + __field(int, whichfork) 237 + __field(unsigned int, type) 238 + __field(xfs_fileoff_t, offset) 239 + __field(void *, ret_ip) 240 + ), 241 + TP_fast_assign( 242 + __entry->dev = sc->ip->i_mount->m_super->s_dev; 243 + __entry->ino = sc->ip->i_ino; 244 + __entry->whichfork = whichfork; 245 + __entry->type = sc->sm->sm_type; 246 + __entry->offset = offset; 247 + __entry->ret_ip = ret_ip; 248 + ), 249 + TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu ret_ip %pF", 250 + MAJOR(__entry->dev), MINOR(__entry->dev), 251 + __entry->ino, 252 + __entry->whichfork, 253 + __entry->type, 254 + __entry->offset, 255 + __entry->ret_ip) 256 + ); 257 + 258 + #define DEFINE_SCRUB_FBLOCK_ERROR_EVENT(name) \ 259 + DEFINE_EVENT(xfs_scrub_fblock_error_class, name, \ 260 + TP_PROTO(struct xfs_scrub_context *sc, int whichfork, \ 261 + xfs_fileoff_t offset, void *ret_ip), \ 262 + TP_ARGS(sc, whichfork, offset, ret_ip)) 263 + 264 + DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xfs_scrub_fblock_error); 265 + DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xfs_scrub_fblock_warning); 266 + 267 + TRACE_EVENT(xfs_scrub_incomplete, 268 + TP_PROTO(struct xfs_scrub_context *sc, void *ret_ip), 269 + TP_ARGS(sc, ret_ip), 270 + TP_STRUCT__entry( 271 + __field(dev_t, dev) 272 + __field(unsigned int, type) 273 + __field(void *, ret_ip) 274 + ), 275 + TP_fast_assign( 276 + __entry->dev = sc->mp->m_super->s_dev; 277 + __entry->type = sc->sm->sm_type; 278 + __entry->ret_ip = ret_ip; 279 + ), 280 + TP_printk("dev %d:%d type %u ret_ip %pF", 281 + MAJOR(__entry->dev), MINOR(__entry->dev), 282 + __entry->type, 283 + __entry->ret_ip) 284 + ); 285 + 286 + TRACE_EVENT(xfs_scrub_btree_op_error, 287 + TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, 288 + int level, int error, void *ret_ip), 289 + TP_ARGS(sc, cur, level, error, ret_ip), 290 + TP_STRUCT__entry( 291 + __field(dev_t, dev) 292 + __field(unsigned int, type) 293 + __field(xfs_btnum_t, btnum) 294 + __field(int, level) 295 + __field(xfs_agnumber_t, agno) 296 + __field(xfs_agblock_t, bno) 297 + __field(int, ptr); 298 + __field(int, error) 299 + __field(void *, ret_ip) 300 + ), 301 + TP_fast_assign( 302 + xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level); 303 + 304 + __entry->dev = sc->mp->m_super->s_dev; 305 + __entry->type = sc->sm->sm_type; 306 + __entry->btnum = cur->bc_btnum; 307 + __entry->level = level; 308 + __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); 309 + __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); 310 + __entry->ptr = cur->bc_ptrs[level]; 311 + __entry->error = error; 312 + __entry->ret_ip = ret_ip; 313 + ), 314 + TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF", 315 + MAJOR(__entry->dev), MINOR(__entry->dev), 316 + __entry->type, 317 + __entry->btnum, 318 + __entry->level, 319 + __entry->ptr, 320 + __entry->agno, 321 + __entry->bno, 322 + __entry->error, 323 + __entry->ret_ip) 324 + ); 325 + 326 + TRACE_EVENT(xfs_scrub_ifork_btree_op_error, 327 + TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, 328 + int level, int error, void *ret_ip), 329 + TP_ARGS(sc, cur, level, error, ret_ip), 330 + TP_STRUCT__entry( 331 + __field(dev_t, dev) 332 + __field(xfs_ino_t, ino) 333 + __field(int, whichfork) 334 + __field(unsigned int, type) 335 + __field(xfs_btnum_t, btnum) 336 + __field(int, level) 337 + __field(int, ptr) 338 + __field(xfs_agnumber_t, agno) 339 + __field(xfs_agblock_t, bno) 340 + __field(int, error) 341 + __field(void *, ret_ip) 342 + ), 343 + TP_fast_assign( 344 + xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level); 345 + __entry->dev = sc->mp->m_super->s_dev; 346 + __entry->ino = sc->ip->i_ino; 347 + __entry->whichfork = cur->bc_private.b.whichfork; 348 + __entry->type = sc->sm->sm_type; 349 + __entry->btnum = cur->bc_btnum; 350 + __entry->level = level; 351 + __entry->ptr = cur->bc_ptrs[level]; 352 + __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); 353 + __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); 354 + __entry->error = error; 355 + __entry->ret_ip = ret_ip; 356 + ), 357 + TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF", 358 + MAJOR(__entry->dev), MINOR(__entry->dev), 359 + __entry->ino, 360 + __entry->whichfork, 361 + __entry->type, 362 + __entry->btnum, 363 + __entry->level, 364 + __entry->ptr, 365 + __entry->agno, 366 + __entry->bno, 367 + __entry->error, 368 + __entry->ret_ip) 369 + ); 370 + 371 + TRACE_EVENT(xfs_scrub_btree_error, 372 + TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, 373 + int level, void *ret_ip), 374 + TP_ARGS(sc, cur, level, ret_ip), 375 + TP_STRUCT__entry( 376 + __field(dev_t, dev) 377 + __field(unsigned int, type) 378 + __field(xfs_btnum_t, btnum) 379 + __field(int, level) 380 + __field(xfs_agnumber_t, agno) 381 + __field(xfs_agblock_t, bno) 382 + __field(int, ptr); 383 + __field(void *, ret_ip) 384 + ), 385 + TP_fast_assign( 386 + xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level); 387 + __entry->dev = sc->mp->m_super->s_dev; 388 + __entry->type = sc->sm->sm_type; 389 + __entry->btnum = cur->bc_btnum; 390 + __entry->level = level; 391 + __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); 392 + __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); 393 + __entry->ptr = cur->bc_ptrs[level]; 394 + __entry->ret_ip = ret_ip; 395 + ), 396 + TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF", 397 + MAJOR(__entry->dev), MINOR(__entry->dev), 398 + __entry->type, 399 + __entry->btnum, 400 + __entry->level, 401 + __entry->ptr, 402 + __entry->agno, 403 + __entry->bno, 404 + __entry->ret_ip) 405 + ); 406 + 407 + TRACE_EVENT(xfs_scrub_ifork_btree_error, 408 + TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, 409 + int level, void *ret_ip), 410 + TP_ARGS(sc, cur, level, ret_ip), 411 + TP_STRUCT__entry( 412 + __field(dev_t, dev) 413 + __field(xfs_ino_t, ino) 414 + __field(int, whichfork) 415 + __field(unsigned int, type) 416 + __field(xfs_btnum_t, btnum) 417 + __field(int, level) 418 + __field(xfs_agnumber_t, agno) 419 + __field(xfs_agblock_t, bno) 420 + __field(int, ptr); 421 + __field(void *, ret_ip) 422 + ), 423 + TP_fast_assign( 424 + xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level); 425 + __entry->dev = sc->mp->m_super->s_dev; 426 + __entry->ino = sc->ip->i_ino; 427 + __entry->whichfork = cur->bc_private.b.whichfork; 428 + __entry->type = sc->sm->sm_type; 429 + __entry->btnum = cur->bc_btnum; 430 + __entry->level = level; 431 + __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); 432 + __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); 433 + __entry->ptr = cur->bc_ptrs[level]; 434 + __entry->ret_ip = ret_ip; 435 + ), 436 + TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF", 437 + MAJOR(__entry->dev), MINOR(__entry->dev), 438 + __entry->ino, 439 + __entry->whichfork, 440 + __entry->type, 441 + __entry->btnum, 442 + __entry->level, 443 + __entry->ptr, 444 + __entry->agno, 445 + __entry->bno, 446 + __entry->ret_ip) 447 + ); 448 + 449 + DECLARE_EVENT_CLASS(xfs_scrub_sbtree_class, 450 + TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, 451 + int level), 452 + TP_ARGS(sc, cur, level), 453 + TP_STRUCT__entry( 454 + __field(dev_t, dev) 455 + __field(int, type) 456 + __field(xfs_btnum_t, btnum) 457 + __field(xfs_agnumber_t, agno) 458 + __field(xfs_agblock_t, bno) 459 + __field(int, level) 460 + __field(int, nlevels) 461 + __field(int, ptr) 462 + ), 463 + TP_fast_assign( 464 + xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level); 465 + 466 + __entry->dev = sc->mp->m_super->s_dev; 467 + __entry->type = sc->sm->sm_type; 468 + __entry->btnum = cur->bc_btnum; 469 + __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); 470 + __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); 471 + __entry->level = level; 472 + __entry->nlevels = cur->bc_nlevels; 473 + __entry->ptr = cur->bc_ptrs[level]; 474 + ), 475 + TP_printk("dev %d:%d type %u btnum %d agno %u agbno %u level %d nlevels %d ptr %d", 476 + MAJOR(__entry->dev), MINOR(__entry->dev), 477 + __entry->type, 478 + __entry->btnum, 479 + __entry->agno, 480 + __entry->bno, 481 + __entry->level, 482 + __entry->nlevels, 483 + __entry->ptr) 484 + ) 485 + #define DEFINE_SCRUB_SBTREE_EVENT(name) \ 486 + DEFINE_EVENT(xfs_scrub_sbtree_class, name, \ 487 + TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, \ 488 + int level), \ 489 + TP_ARGS(sc, cur, level)) 490 + 491 + DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_rec); 492 + DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_key); 493 + 494 + #endif /* _TRACE_XFS_SCRUB_TRACE_H */ 495 + 496 + #undef TRACE_INCLUDE_PATH 497 + #define TRACE_INCLUDE_PATH . 498 + #define TRACE_INCLUDE_FILE scrub/trace 499 + #include <trace/define_trace.h>

+29

fs/xfs/scrub/xfs_scrub.h

··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #ifndef __XFS_SCRUB_H__ 21 + #define __XFS_SCRUB_H__ 22 + 23 + #ifndef CONFIG_XFS_ONLINE_SCRUB 24 + # define xfs_scrub_metadata(ip, sm) (-ENOTTY) 25 + #else 26 + int xfs_scrub_metadata(struct xfs_inode *ip, struct xfs_scrub_metadata *sm); 27 + #endif /* CONFIG_XFS_ONLINE_SCRUB */ 28 + 29 + #endif /* __XFS_SCRUB_H__ */

-1

fs/xfs/xfs.h

··· 19 19 #define __XFS_H__ 20 20 21 21 #ifdef CONFIG_XFS_DEBUG 22 - #define STATIC 23 22 #define DEBUG 1 24 23 #define XFS_BUF_LOCK_TRACKING 1 25 24 #endif

+4 -1

fs/xfs/xfs_attr.h

··· 48 48 #define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */ 49 49 #define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ 50 50 51 + #define ATTR_INCOMPLETE 0x4000 /* [kernel] return INCOMPLETE attr keys */ 52 + 51 53 #define XFS_ATTR_FLAGS \ 52 54 { ATTR_DONTFOLLOW, "DONTFOLLOW" }, \ 53 55 { ATTR_ROOT, "ROOT" }, \ ··· 58 56 { ATTR_CREATE, "CREATE" }, \ 59 57 { ATTR_REPLACE, "REPLACE" }, \ 60 58 { ATTR_KERNOTIME, "KERNOTIME" }, \ 61 - { ATTR_KERNOVAL, "KERNOVAL" } 59 + { ATTR_KERNOVAL, "KERNOVAL" }, \ 60 + { ATTR_INCOMPLETE, "INCOMPLETE" } 62 61 63 62 /* 64 63 * The maximum size (into the kernel or returned from the kernel) of an

+33 -36

fs/xfs/xfs_attr_inactive.c

··· 251 251 * traversal of the tree so we may deal with many blocks 252 252 * before we come back to this one. 253 253 */ 254 - error = xfs_da3_node_read(*trans, dp, child_fsb, -2, &child_bp, 255 - XFS_ATTR_FORK); 254 + error = xfs_da3_node_read(*trans, dp, child_fsb, -1, &child_bp, 255 + XFS_ATTR_FORK); 256 256 if (error) 257 257 return error; 258 - if (child_bp) { 259 - /* save for re-read later */ 260 - child_blkno = XFS_BUF_ADDR(child_bp); 261 258 262 - /* 263 - * Invalidate the subtree, however we have to. 264 - */ 265 - info = child_bp->b_addr; 266 - switch (info->magic) { 267 - case cpu_to_be16(XFS_DA_NODE_MAGIC): 268 - case cpu_to_be16(XFS_DA3_NODE_MAGIC): 269 - error = xfs_attr3_node_inactive(trans, dp, 270 - child_bp, level + 1); 271 - break; 272 - case cpu_to_be16(XFS_ATTR_LEAF_MAGIC): 273 - case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC): 274 - error = xfs_attr3_leaf_inactive(trans, dp, 275 - child_bp); 276 - break; 277 - default: 278 - error = -EIO; 279 - xfs_trans_brelse(*trans, child_bp); 280 - break; 281 - } 282 - if (error) 283 - return error; 259 + /* save for re-read later */ 260 + child_blkno = XFS_BUF_ADDR(child_bp); 284 261 285 - /* 286 - * Remove the subsidiary block from the cache 287 - * and from the log. 288 - */ 289 - error = xfs_da_get_buf(*trans, dp, 0, child_blkno, 290 - &child_bp, XFS_ATTR_FORK); 291 - if (error) 292 - return error; 293 - xfs_trans_binval(*trans, child_bp); 262 + /* 263 + * Invalidate the subtree, however we have to. 264 + */ 265 + info = child_bp->b_addr; 266 + switch (info->magic) { 267 + case cpu_to_be16(XFS_DA_NODE_MAGIC): 268 + case cpu_to_be16(XFS_DA3_NODE_MAGIC): 269 + error = xfs_attr3_node_inactive(trans, dp, child_bp, 270 + level + 1); 271 + break; 272 + case cpu_to_be16(XFS_ATTR_LEAF_MAGIC): 273 + case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC): 274 + error = xfs_attr3_leaf_inactive(trans, dp, child_bp); 275 + break; 276 + default: 277 + error = -EIO; 278 + xfs_trans_brelse(*trans, child_bp); 279 + break; 294 280 } 281 + if (error) 282 + return error; 283 + 284 + /* 285 + * Remove the subsidiary block from the cache and from the log. 286 + */ 287 + error = xfs_da_get_buf(*trans, dp, 0, child_blkno, &child_bp, 288 + XFS_ATTR_FORK); 289 + if (error) 290 + return error; 291 + xfs_trans_binval(*trans, child_bp); 295 292 296 293 /* 297 294 * If we're not done, re-read the parent to get the next

+106 -55

fs/xfs/xfs_attr_list.c

··· 204 204 return 0; 205 205 } 206 206 207 + /* 208 + * We didn't find the block & hash mentioned in the cursor state, so 209 + * walk down the attr btree looking for the hash. 210 + */ 207 211 STATIC int 208 - xfs_attr_node_list(xfs_attr_list_context_t *context) 212 + xfs_attr_node_list_lookup( 213 + struct xfs_attr_list_context *context, 214 + struct attrlist_cursor_kern *cursor, 215 + struct xfs_buf **pbp) 209 216 { 210 - attrlist_cursor_kern_t *cursor; 211 - xfs_attr_leafblock_t *leaf; 212 - xfs_da_intnode_t *node; 213 - struct xfs_attr3_icleaf_hdr leafhdr; 214 - struct xfs_da3_icnode_hdr nodehdr; 215 - struct xfs_da_node_entry *btree; 216 - int error, i; 217 - struct xfs_buf *bp; 218 - struct xfs_inode *dp = context->dp; 219 - struct xfs_mount *mp = dp->i_mount; 217 + struct xfs_da3_icnode_hdr nodehdr; 218 + struct xfs_da_intnode *node; 219 + struct xfs_da_node_entry *btree; 220 + struct xfs_inode *dp = context->dp; 221 + struct xfs_mount *mp = dp->i_mount; 222 + struct xfs_trans *tp = context->tp; 223 + struct xfs_buf *bp; 224 + int i; 225 + int error = 0; 226 + unsigned int expected_level = 0; 227 + uint16_t magic; 228 + 229 + ASSERT(*pbp == NULL); 230 + cursor->blkno = 0; 231 + for (;;) { 232 + error = xfs_da3_node_read(tp, dp, cursor->blkno, -1, &bp, 233 + XFS_ATTR_FORK); 234 + if (error) 235 + return error; 236 + node = bp->b_addr; 237 + magic = be16_to_cpu(node->hdr.info.magic); 238 + if (magic == XFS_ATTR_LEAF_MAGIC || 239 + magic == XFS_ATTR3_LEAF_MAGIC) 240 + break; 241 + if (magic != XFS_DA_NODE_MAGIC && 242 + magic != XFS_DA3_NODE_MAGIC) { 243 + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 244 + node); 245 + goto out_corruptbuf; 246 + } 247 + 248 + dp->d_ops->node_hdr_from_disk(&nodehdr, node); 249 + 250 + /* Tree taller than we can handle; bail out! */ 251 + if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) 252 + goto out_corruptbuf; 253 + 254 + /* Check the level from the root node. */ 255 + if (cursor->blkno == 0) 256 + expected_level = nodehdr.level - 1; 257 + else if (expected_level != nodehdr.level) 258 + goto out_corruptbuf; 259 + else 260 + expected_level--; 261 + 262 + btree = dp->d_ops->node_tree_p(node); 263 + for (i = 0; i < nodehdr.count; btree++, i++) { 264 + if (cursor->hashval <= be32_to_cpu(btree->hashval)) { 265 + cursor->blkno = be32_to_cpu(btree->before); 266 + trace_xfs_attr_list_node_descend(context, 267 + btree); 268 + break; 269 + } 270 + } 271 + xfs_trans_brelse(tp, bp); 272 + 273 + if (i == nodehdr.count) 274 + return 0; 275 + 276 + /* We can't point back to the root. */ 277 + if (cursor->blkno == 0) 278 + return -EFSCORRUPTED; 279 + } 280 + 281 + if (expected_level != 0) 282 + goto out_corruptbuf; 283 + 284 + *pbp = bp; 285 + return 0; 286 + 287 + out_corruptbuf: 288 + xfs_trans_brelse(tp, bp); 289 + return -EFSCORRUPTED; 290 + } 291 + 292 + STATIC int 293 + xfs_attr_node_list( 294 + struct xfs_attr_list_context *context) 295 + { 296 + struct xfs_attr3_icleaf_hdr leafhdr; 297 + struct attrlist_cursor_kern *cursor; 298 + struct xfs_attr_leafblock *leaf; 299 + struct xfs_da_intnode *node; 300 + struct xfs_buf *bp; 301 + struct xfs_inode *dp = context->dp; 302 + struct xfs_mount *mp = dp->i_mount; 303 + int error; 220 304 221 305 trace_xfs_attr_node_list(context); 222 306 ··· 361 277 * Note that start of node block is same as start of leaf block. 362 278 */ 363 279 if (bp == NULL) { 364 - cursor->blkno = 0; 365 - for (;;) { 366 - uint16_t magic; 367 - 368 - error = xfs_da3_node_read(context->tp, dp, 369 - cursor->blkno, -1, &bp, 370 - XFS_ATTR_FORK); 371 - if (error) 372 - return error; 373 - node = bp->b_addr; 374 - magic = be16_to_cpu(node->hdr.info.magic); 375 - if (magic == XFS_ATTR_LEAF_MAGIC || 376 - magic == XFS_ATTR3_LEAF_MAGIC) 377 - break; 378 - if (magic != XFS_DA_NODE_MAGIC && 379 - magic != XFS_DA3_NODE_MAGIC) { 380 - XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)", 381 - XFS_ERRLEVEL_LOW, 382 - context->dp->i_mount, 383 - node); 384 - xfs_trans_brelse(context->tp, bp); 385 - return -EFSCORRUPTED; 386 - } 387 - 388 - dp->d_ops->node_hdr_from_disk(&nodehdr, node); 389 - btree = dp->d_ops->node_tree_p(node); 390 - for (i = 0; i < nodehdr.count; btree++, i++) { 391 - if (cursor->hashval 392 - <= be32_to_cpu(btree->hashval)) { 393 - cursor->blkno = be32_to_cpu(btree->before); 394 - trace_xfs_attr_list_node_descend(context, 395 - btree); 396 - break; 397 - } 398 - } 399 - if (i == nodehdr.count) { 400 - xfs_trans_brelse(context->tp, bp); 401 - return 0; 402 - } 403 - xfs_trans_brelse(context->tp, bp); 404 - } 280 + error = xfs_attr_node_list_lookup(context, cursor, &bp); 281 + if (error || !bp) 282 + return error; 405 283 } 406 284 ASSERT(bp != NULL); 407 285 ··· 453 407 cursor->offset = 0; 454 408 } 455 409 456 - if (entry->flags & XFS_ATTR_INCOMPLETE) 410 + if ((entry->flags & XFS_ATTR_INCOMPLETE) && 411 + !(context->flags & ATTR_INCOMPLETE)) 457 412 continue; /* skip incomplete entries */ 458 413 459 414 if (entry->flags & XFS_ATTR_LOCAL) { ··· 546 499 #define ATTR_ENTBASESIZE /* minimum bytes used by an attr */ \ 547 500 (((struct attrlist_ent *) 0)->a_name - (char *) 0) 548 501 #define ATTR_ENTSIZE(namelen) /* actual bytes used by an attr */ \ 549 - ((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \ 550 - & ~(sizeof(u_int32_t)-1)) 502 + ((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(uint32_t)-1) \ 503 + & ~(sizeof(uint32_t)-1)) 551 504 552 505 /* 553 506 * Format an attribute and copy it out to the user's buffer. ··· 628 581 return -EINVAL; 629 582 if ((cursor->initted == 0) && 630 583 (cursor->hashval || cursor->blkno || cursor->offset)) 584 + return -EINVAL; 585 + 586 + /* Only internal consumers can retrieve incomplete attrs. */ 587 + if (flags & ATTR_INCOMPLETE) 631 588 return -EINVAL; 632 589 633 590 /*

+317 -455

fs/xfs/xfs_bmap_util.c

··· 229 229 struct xfs_ifork *ifp, 230 230 xfs_filblks_t *count) 231 231 { 232 + struct xfs_iext_cursor icur; 232 233 struct xfs_bmbt_irec got; 233 - xfs_extnum_t numrecs = 0, i = 0; 234 + xfs_extnum_t numrecs = 0; 234 235 235 - while (xfs_iext_get_extent(ifp, i++, &got)) { 236 + for_each_xfs_iext(ifp, &icur, &got) { 236 237 if (!isnullstartblock(got.br_startblock)) { 237 238 *count += got.br_blockcount; 238 239 numrecs++; 239 240 } 240 241 } 242 + 241 243 return numrecs; 242 244 } 243 245 ··· 407 405 return 0; 408 406 } 409 407 410 - /* 411 - * returns 1 for success, 0 if we failed to map the extent. 412 - */ 413 - STATIC int 414 - xfs_getbmapx_fix_eof_hole( 415 - xfs_inode_t *ip, /* xfs incore inode pointer */ 416 - int whichfork, 417 - struct getbmapx *out, /* output structure */ 418 - int prealloced, /* this is a file with 419 - * preallocated data space */ 420 - int64_t end, /* last block requested */ 421 - xfs_fsblock_t startblock, 422 - bool moretocome) 408 + static int 409 + xfs_getbmap_report_one( 410 + struct xfs_inode *ip, 411 + struct getbmapx *bmv, 412 + struct kgetbmap *out, 413 + int64_t bmv_end, 414 + struct xfs_bmbt_irec *got) 423 415 { 424 - int64_t fixlen; 425 - xfs_mount_t *mp; /* file system mount point */ 426 - xfs_ifork_t *ifp; /* inode fork pointer */ 427 - xfs_extnum_t lastx; /* last extent pointer */ 428 - xfs_fileoff_t fileblock; 416 + struct kgetbmap *p = out + bmv->bmv_entries; 417 + bool shared = false, trimmed = false; 418 + int error; 429 419 430 - if (startblock == HOLESTARTBLOCK) { 431 - mp = ip->i_mount; 432 - out->bmv_block = -1; 433 - fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip))); 434 - fixlen -= out->bmv_offset; 435 - if (prealloced && out->bmv_offset + out->bmv_length == end) { 436 - /* Came to hole at EOF. Trim it. */ 437 - if (fixlen <= 0) 438 - return 0; 439 - out->bmv_length = fixlen; 440 - } 441 - } else { 442 - if (startblock == DELAYSTARTBLOCK) 443 - out->bmv_block = -2; 444 - else 445 - out->bmv_block = xfs_fsb_to_db(ip, startblock); 446 - fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset); 447 - ifp = XFS_IFORK_PTR(ip, whichfork); 448 - if (!moretocome && 449 - xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && 450 - (lastx == xfs_iext_count(ifp) - 1)) 451 - out->bmv_oflags |= BMV_OF_LAST; 452 - } 453 - 454 - return 1; 455 - } 456 - 457 - /* Adjust the reported bmap around shared/unshared extent transitions. */ 458 - STATIC int 459 - xfs_getbmap_adjust_shared( 460 - struct xfs_inode *ip, 461 - int whichfork, 462 - struct xfs_bmbt_irec *map, 463 - struct getbmapx *out, 464 - struct xfs_bmbt_irec *next_map) 465 - { 466 - struct xfs_mount *mp = ip->i_mount; 467 - xfs_agnumber_t agno; 468 - xfs_agblock_t agbno; 469 - xfs_agblock_t ebno; 470 - xfs_extlen_t elen; 471 - xfs_extlen_t nlen; 472 - int error; 473 - 474 - next_map->br_startblock = NULLFSBLOCK; 475 - next_map->br_startoff = NULLFILEOFF; 476 - next_map->br_blockcount = 0; 477 - 478 - /* Only written data blocks can be shared. */ 479 - if (!xfs_is_reflink_inode(ip) || 480 - whichfork != XFS_DATA_FORK || 481 - !xfs_bmap_is_real_extent(map)) 482 - return 0; 483 - 484 - agno = XFS_FSB_TO_AGNO(mp, map->br_startblock); 485 - agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock); 486 - error = xfs_reflink_find_shared(mp, NULL, agno, agbno, 487 - map->br_blockcount, &ebno, &elen, true); 420 + error = xfs_reflink_trim_around_shared(ip, got, &shared, &trimmed); 488 421 if (error) 489 422 return error; 490 423 491 - if (ebno == NULLAGBLOCK) { 492 - /* No shared blocks at all. */ 493 - return 0; 494 - } else if (agbno == ebno) { 424 + if (isnullstartblock(got->br_startblock) || 425 + got->br_startblock == DELAYSTARTBLOCK) { 495 426 /* 496 - * Shared extent at (agbno, elen). Shrink the reported 497 - * extent length and prepare to move the start of map[i] 498 - * to agbno+elen, with the aim of (re)formatting the new 499 - * map[i] the next time through the inner loop. 427 + * Delalloc extents that start beyond EOF can occur due to 428 + * speculative EOF allocation when the delalloc extent is larger 429 + * than the largest freespace extent at conversion time. These 430 + * extents cannot be converted by data writeback, so can exist 431 + * here even if we are not supposed to be finding delalloc 432 + * extents. 500 433 */ 501 - out->bmv_length = XFS_FSB_TO_BB(mp, elen); 502 - out->bmv_oflags |= BMV_OF_SHARED; 503 - if (elen != map->br_blockcount) { 504 - *next_map = *map; 505 - next_map->br_startblock += elen; 506 - next_map->br_startoff += elen; 507 - next_map->br_blockcount -= elen; 508 - } 509 - map->br_blockcount -= elen; 434 + if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip))) 435 + ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0); 436 + 437 + p->bmv_oflags |= BMV_OF_DELALLOC; 438 + p->bmv_block = -2; 510 439 } else { 511 - /* 512 - * There's an unshared extent (agbno, ebno - agbno) 513 - * followed by shared extent at (ebno, elen). Shrink 514 - * the reported extent length to cover only the unshared 515 - * extent and prepare to move up the start of map[i] to 516 - * ebno, with the aim of (re)formatting the new map[i] 517 - * the next time through the inner loop. 518 - */ 519 - *next_map = *map; 520 - nlen = ebno - agbno; 521 - out->bmv_length = XFS_FSB_TO_BB(mp, nlen); 522 - next_map->br_startblock += nlen; 523 - next_map->br_startoff += nlen; 524 - next_map->br_blockcount -= nlen; 525 - map->br_blockcount -= nlen; 440 + p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock); 526 441 } 527 442 443 + if (got->br_state == XFS_EXT_UNWRITTEN && 444 + (bmv->bmv_iflags & BMV_IF_PREALLOC)) 445 + p->bmv_oflags |= BMV_OF_PREALLOC; 446 + 447 + if (shared) 448 + p->bmv_oflags |= BMV_OF_SHARED; 449 + 450 + p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff); 451 + p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount); 452 + 453 + bmv->bmv_offset = p->bmv_offset + p->bmv_length; 454 + bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset); 455 + bmv->bmv_entries++; 528 456 return 0; 457 + } 458 + 459 + static void 460 + xfs_getbmap_report_hole( 461 + struct xfs_inode *ip, 462 + struct getbmapx *bmv, 463 + struct kgetbmap *out, 464 + int64_t bmv_end, 465 + xfs_fileoff_t bno, 466 + xfs_fileoff_t end) 467 + { 468 + struct kgetbmap *p = out + bmv->bmv_entries; 469 + 470 + if (bmv->bmv_iflags & BMV_IF_NO_HOLES) 471 + return; 472 + 473 + p->bmv_block = -1; 474 + p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno); 475 + p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno); 476 + 477 + bmv->bmv_offset = p->bmv_offset + p->bmv_length; 478 + bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset); 479 + bmv->bmv_entries++; 480 + } 481 + 482 + static inline bool 483 + xfs_getbmap_full( 484 + struct getbmapx *bmv) 485 + { 486 + return bmv->bmv_length == 0 || bmv->bmv_entries >= bmv->bmv_count - 1; 487 + } 488 + 489 + static bool 490 + xfs_getbmap_next_rec( 491 + struct xfs_bmbt_irec *rec, 492 + xfs_fileoff_t total_end) 493 + { 494 + xfs_fileoff_t end = rec->br_startoff + rec->br_blockcount; 495 + 496 + if (end == total_end) 497 + return false; 498 + 499 + rec->br_startoff += rec->br_blockcount; 500 + if (!isnullstartblock(rec->br_startblock) && 501 + rec->br_startblock != DELAYSTARTBLOCK) 502 + rec->br_startblock += rec->br_blockcount; 503 + rec->br_blockcount = total_end - end; 504 + return true; 529 505 } 530 506 531 507 /* ··· 515 535 */ 516 536 int /* error code */ 517 537 xfs_getbmap( 518 - xfs_inode_t *ip, 538 + struct xfs_inode *ip, 519 539 struct getbmapx *bmv, /* user bmap structure */ 520 - xfs_bmap_format_t formatter, /* format to user */ 521 - void *arg) /* formatter arg */ 540 + struct kgetbmap *out) 522 541 { 523 - int64_t bmvend; /* last block requested */ 524 - int error = 0; /* return value */ 525 - int64_t fixlen; /* length for -1 case */ 526 - int i; /* extent number */ 527 - int lock; /* lock state */ 528 - xfs_bmbt_irec_t *map; /* buffer for user's data */ 529 - xfs_mount_t *mp; /* file system mount point */ 530 - int nex; /* # of user extents can do */ 531 - int subnex; /* # of bmapi's can do */ 532 - int nmap; /* number of map entries */ 533 - struct getbmapx *out; /* output structure */ 534 - int whichfork; /* data or attr fork */ 535 - int prealloced; /* this is a file with 536 - * preallocated data space */ 537 - int iflags; /* interface flags */ 538 - int bmapi_flags; /* flags for xfs_bmapi */ 539 - int cur_ext = 0; 540 - struct xfs_bmbt_irec inject_map; 542 + struct xfs_mount *mp = ip->i_mount; 543 + int iflags = bmv->bmv_iflags; 544 + int whichfork, lock, error = 0; 545 + int64_t bmv_end, max_len; 546 + xfs_fileoff_t bno, first_bno; 547 + struct xfs_ifork *ifp; 548 + struct xfs_bmbt_irec got, rec; 549 + xfs_filblks_t len; 550 + struct xfs_iext_cursor icur; 541 551 542 - mp = ip->i_mount; 543 - iflags = bmv->bmv_iflags; 544 - 552 + if (bmv->bmv_iflags & ~BMV_IF_VALID) 553 + return -EINVAL; 545 554 #ifndef DEBUG 546 555 /* Only allow CoW fork queries if we're debugging. */ 547 556 if (iflags & BMV_IF_COWFORK) ··· 539 570 if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK)) 540 571 return -EINVAL; 541 572 573 + if (bmv->bmv_length < -1) 574 + return -EINVAL; 575 + bmv->bmv_entries = 0; 576 + if (bmv->bmv_length == 0) 577 + return 0; 578 + 542 579 if (iflags & BMV_IF_ATTRFORK) 543 580 whichfork = XFS_ATTR_FORK; 544 581 else if (iflags & BMV_IF_COWFORK) 545 582 whichfork = XFS_COW_FORK; 546 583 else 547 584 whichfork = XFS_DATA_FORK; 548 - 549 - switch (whichfork) { 550 - case XFS_ATTR_FORK: 551 - if (XFS_IFORK_Q(ip)) { 552 - if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && 553 - ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE && 554 - ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) 555 - return -EINVAL; 556 - } else if (unlikely( 557 - ip->i_d.di_aformat != 0 && 558 - ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) { 559 - XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW, 560 - ip->i_mount); 561 - return -EFSCORRUPTED; 562 - } 563 - 564 - prealloced = 0; 565 - fixlen = 1LL << 32; 566 - break; 567 - case XFS_COW_FORK: 568 - if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS) 569 - return -EINVAL; 570 - 571 - if (xfs_get_cowextsz_hint(ip)) { 572 - prealloced = 1; 573 - fixlen = mp->m_super->s_maxbytes; 574 - } else { 575 - prealloced = 0; 576 - fixlen = XFS_ISIZE(ip); 577 - } 578 - break; 579 - default: 580 - /* Local format data forks report no extents. */ 581 - if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { 582 - bmv->bmv_entries = 0; 583 - return 0; 584 - } 585 - if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && 586 - ip->i_d.di_format != XFS_DINODE_FMT_BTREE) 587 - return -EINVAL; 588 - 589 - if (xfs_get_extsz_hint(ip) || 590 - ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ 591 - prealloced = 1; 592 - fixlen = mp->m_super->s_maxbytes; 593 - } else { 594 - prealloced = 0; 595 - fixlen = XFS_ISIZE(ip); 596 - } 597 - break; 598 - } 599 - 600 - if (bmv->bmv_length == -1) { 601 - fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); 602 - bmv->bmv_length = 603 - max_t(int64_t, fixlen - bmv->bmv_offset, 0); 604 - } else if (bmv->bmv_length == 0) { 605 - bmv->bmv_entries = 0; 606 - return 0; 607 - } else if (bmv->bmv_length < 0) { 608 - return -EINVAL; 609 - } 610 - 611 - nex = bmv->bmv_count - 1; 612 - if (nex <= 0) 613 - return -EINVAL; 614 - bmvend = bmv->bmv_offset + bmv->bmv_length; 615 - 616 - 617 - if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) 618 - return -ENOMEM; 619 - out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); 620 - if (!out) 621 - return -ENOMEM; 585 + ifp = XFS_IFORK_PTR(ip, whichfork); 622 586 623 587 xfs_ilock(ip, XFS_IOLOCK_SHARED); 624 588 switch (whichfork) { 589 + case XFS_ATTR_FORK: 590 + if (!XFS_IFORK_Q(ip)) 591 + goto out_unlock_iolock; 592 + 593 + max_len = 1LL << 32; 594 + lock = xfs_ilock_attr_map_shared(ip); 595 + break; 596 + case XFS_COW_FORK: 597 + /* No CoW fork? Just return */ 598 + if (!ifp) 599 + goto out_unlock_iolock; 600 + 601 + if (xfs_get_cowextsz_hint(ip)) 602 + max_len = mp->m_super->s_maxbytes; 603 + else 604 + max_len = XFS_ISIZE(ip); 605 + 606 + lock = XFS_ILOCK_SHARED; 607 + xfs_ilock(ip, lock); 608 + break; 625 609 case XFS_DATA_FORK: 626 610 if (!(iflags & BMV_IF_DELALLOC) && 627 611 (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { ··· 592 670 */ 593 671 } 594 672 673 + if (xfs_get_extsz_hint(ip) || 674 + (ip->i_d.di_flags & 675 + (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))) 676 + max_len = mp->m_super->s_maxbytes; 677 + else 678 + max_len = XFS_ISIZE(ip); 679 + 595 680 lock = xfs_ilock_data_map_shared(ip); 596 681 break; 597 - case XFS_COW_FORK: 598 - lock = XFS_ILOCK_SHARED; 599 - xfs_ilock(ip, lock); 600 - break; 601 - case XFS_ATTR_FORK: 602 - lock = xfs_ilock_attr_map_shared(ip); 603 - break; 604 682 } 605 683 606 - /* 607 - * Don't let nex be bigger than the number of extents 608 - * we can have assuming alternating holes and real extents. 609 - */ 610 - if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) 611 - nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; 612 - 613 - bmapi_flags = xfs_bmapi_aflag(whichfork); 614 - if (!(iflags & BMV_IF_PREALLOC)) 615 - bmapi_flags |= XFS_BMAPI_IGSTATE; 616 - 617 - /* 618 - * Allocate enough space to handle "subnex" maps at a time. 619 - */ 620 - error = -ENOMEM; 621 - subnex = 16; 622 - map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); 623 - if (!map) 684 + switch (XFS_IFORK_FORMAT(ip, whichfork)) { 685 + case XFS_DINODE_FMT_EXTENTS: 686 + case XFS_DINODE_FMT_BTREE: 687 + break; 688 + case XFS_DINODE_FMT_LOCAL: 689 + /* Local format inode forks report no extents. */ 624 690 goto out_unlock_ilock; 625 - 626 - bmv->bmv_entries = 0; 627 - 628 - if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && 629 - (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) { 630 - error = 0; 631 - goto out_free_map; 691 + default: 692 + error = -EINVAL; 693 + goto out_unlock_ilock; 632 694 } 633 695 634 - do { 635 - nmap = (nex> subnex) ? subnex : nex; 636 - error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), 637 - XFS_BB_TO_FSB(mp, bmv->bmv_length), 638 - map, &nmap, bmapi_flags); 696 + if (bmv->bmv_length == -1) { 697 + max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len)); 698 + bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset); 699 + } 700 + 701 + bmv_end = bmv->bmv_offset + bmv->bmv_length; 702 + 703 + first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset); 704 + len = XFS_BB_TO_FSB(mp, bmv->bmv_length); 705 + 706 + if (!(ifp->if_flags & XFS_IFEXTENTS)) { 707 + error = xfs_iread_extents(NULL, ip, whichfork); 639 708 if (error) 640 - goto out_free_map; 641 - ASSERT(nmap <= subnex); 709 + goto out_unlock_ilock; 710 + } 642 711 643 - for (i = 0; i < nmap && bmv->bmv_length && 644 - cur_ext < bmv->bmv_count - 1; i++) { 645 - out[cur_ext].bmv_oflags = 0; 646 - if (map[i].br_state == XFS_EXT_UNWRITTEN) 647 - out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; 648 - else if (map[i].br_startblock == DELAYSTARTBLOCK) 649 - out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC; 650 - out[cur_ext].bmv_offset = 651 - XFS_FSB_TO_BB(mp, map[i].br_startoff); 652 - out[cur_ext].bmv_length = 653 - XFS_FSB_TO_BB(mp, map[i].br_blockcount); 654 - out[cur_ext].bmv_unused1 = 0; 655 - out[cur_ext].bmv_unused2 = 0; 712 + if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) { 713 + /* 714 + * Report a whole-file hole if the delalloc flag is set to 715 + * stay compatible with the old implementation. 716 + */ 717 + if (iflags & BMV_IF_DELALLOC) 718 + xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno, 719 + XFS_B_TO_FSB(mp, XFS_ISIZE(ip))); 720 + goto out_unlock_ilock; 721 + } 656 722 657 - /* 658 - * delayed allocation extents that start beyond EOF can 659 - * occur due to speculative EOF allocation when the 660 - * delalloc extent is larger than the largest freespace 661 - * extent at conversion time. These extents cannot be 662 - * converted by data writeback, so can exist here even 663 - * if we are not supposed to be finding delalloc 664 - * extents. 665 - */ 666 - if (map[i].br_startblock == DELAYSTARTBLOCK && 667 - map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) 668 - ASSERT((iflags & BMV_IF_DELALLOC) != 0); 723 + while (!xfs_getbmap_full(bmv)) { 724 + xfs_trim_extent(&got, first_bno, len); 669 725 670 - if (map[i].br_startblock == HOLESTARTBLOCK && 671 - whichfork == XFS_ATTR_FORK) { 672 - /* came to the end of attribute fork */ 673 - out[cur_ext].bmv_oflags |= BMV_OF_LAST; 674 - goto out_free_map; 675 - } 676 - 677 - /* Is this a shared block? */ 678 - error = xfs_getbmap_adjust_shared(ip, whichfork, 679 - &map[i], &out[cur_ext], &inject_map); 680 - if (error) 681 - goto out_free_map; 682 - 683 - if (!xfs_getbmapx_fix_eof_hole(ip, whichfork, 684 - &out[cur_ext], prealloced, bmvend, 685 - map[i].br_startblock, 686 - inject_map.br_startblock != NULLFSBLOCK)) 687 - goto out_free_map; 688 - 689 - bmv->bmv_offset = 690 - out[cur_ext].bmv_offset + 691 - out[cur_ext].bmv_length; 692 - bmv->bmv_length = 693 - max_t(int64_t, 0, bmvend - bmv->bmv_offset); 694 - 695 - /* 696 - * In case we don't want to return the hole, 697 - * don't increase cur_ext so that we can reuse 698 - * it in the next loop. 699 - */ 700 - if ((iflags & BMV_IF_NO_HOLES) && 701 - map[i].br_startblock == HOLESTARTBLOCK) { 702 - memset(&out[cur_ext], 0, sizeof(out[cur_ext])); 703 - continue; 704 - } 705 - 706 - /* 707 - * In order to report shared extents accurately, 708 - * we report each distinct shared/unshared part 709 - * of a single bmbt record using multiple bmap 710 - * extents. To make that happen, we iterate the 711 - * same map array item multiple times, each 712 - * time trimming out the subextent that we just 713 - * reported. 714 - * 715 - * Because of this, we must check the out array 716 - * index (cur_ext) directly against bmv_count-1 717 - * to avoid overflows. 718 - */ 719 - if (inject_map.br_startblock != NULLFSBLOCK) { 720 - map[i] = inject_map; 721 - i--; 722 - } 723 - bmv->bmv_entries++; 724 - cur_ext++; 726 + /* 727 + * Report an entry for a hole if this extent doesn't directly 728 + * follow the previous one. 729 + */ 730 + if (got.br_startoff > bno) { 731 + xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno, 732 + got.br_startoff); 733 + if (xfs_getbmap_full(bmv)) 734 + break; 725 735 } 726 - } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1); 727 736 728 - out_free_map: 729 - kmem_free(map); 730 - out_unlock_ilock: 731 - xfs_iunlock(ip, lock); 732 - out_unlock_iolock: 733 - xfs_iunlock(ip, XFS_IOLOCK_SHARED); 737 + /* 738 + * In order to report shared extents accurately, we report each 739 + * distinct shared / unshared part of a single bmbt record with 740 + * an individual getbmapx record. 741 + */ 742 + bno = got.br_startoff + got.br_blockcount; 743 + rec = got; 744 + do { 745 + error = xfs_getbmap_report_one(ip, bmv, out, bmv_end, 746 + &rec); 747 + if (error || xfs_getbmap_full(bmv)) 748 + goto out_unlock_ilock; 749 + } while (xfs_getbmap_next_rec(&rec, bno)); 734 750 735 - for (i = 0; i < cur_ext; i++) { 736 - /* format results & advance arg */ 737 - error = formatter(&arg, &out[i]); 738 - if (error) 751 + if (!xfs_iext_next_extent(ifp, &icur, &got)) { 752 + xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); 753 + 754 + out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST; 755 + 756 + if (whichfork != XFS_ATTR_FORK && bno < end && 757 + !xfs_getbmap_full(bmv)) { 758 + xfs_getbmap_report_hole(ip, bmv, out, bmv_end, 759 + bno, end); 760 + } 761 + break; 762 + } 763 + 764 + if (bno >= first_bno + len) 739 765 break; 740 766 } 741 767 742 - kmem_free(out); 768 + out_unlock_ilock: 769 + xfs_iunlock(ip, lock); 770 + out_unlock_iolock: 771 + xfs_iunlock(ip, XFS_IOLOCK_SHARED); 743 772 return error; 744 773 } 745 774 ··· 1262 1389 1263 1390 } 1264 1391 1265 - /* 1266 - * @next_fsb will keep track of the extent currently undergoing shift. 1267 - * @stop_fsb will keep track of the extent at which we have to stop. 1268 - * If we are shifting left, we will start with block (offset + len) and 1269 - * shift each extent till last extent. 1270 - * If we are shifting right, we will start with last extent inside file space 1271 - * and continue until we reach the block corresponding to offset. 1272 - */ 1273 1392 static int 1274 - xfs_shift_file_space( 1275 - struct xfs_inode *ip, 1276 - xfs_off_t offset, 1277 - xfs_off_t len, 1278 - enum shift_direction direction) 1393 + xfs_prepare_shift( 1394 + struct xfs_inode *ip, 1395 + loff_t offset) 1279 1396 { 1280 - int done = 0; 1281 - struct xfs_mount *mp = ip->i_mount; 1282 - struct xfs_trans *tp; 1283 1397 int error; 1284 - struct xfs_defer_ops dfops; 1285 - xfs_fsblock_t first_block; 1286 - xfs_fileoff_t stop_fsb; 1287 - xfs_fileoff_t next_fsb; 1288 - xfs_fileoff_t shift_fsb; 1289 - uint resblks; 1290 - 1291 - ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT); 1292 - 1293 - if (direction == SHIFT_LEFT) { 1294 - /* 1295 - * Reserve blocks to cover potential extent merges after left 1296 - * shift operations. 1297 - */ 1298 - resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1299 - next_fsb = XFS_B_TO_FSB(mp, offset + len); 1300 - stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size); 1301 - } else { 1302 - /* 1303 - * If right shift, delegate the work of initialization of 1304 - * next_fsb to xfs_bmap_shift_extent as it has ilock held. 1305 - */ 1306 - resblks = 0; 1307 - next_fsb = NULLFSBLOCK; 1308 - stop_fsb = XFS_B_TO_FSB(mp, offset); 1309 - } 1310 - 1311 - shift_fsb = XFS_B_TO_FSB(mp, len); 1312 1398 1313 1399 /* 1314 1400 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation ··· 1283 1451 * Writeback and invalidate cache for the remainder of the file as we're 1284 1452 * about to shift down every extent from offset to EOF. 1285 1453 */ 1286 - error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 1287 - offset, -1); 1454 + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, offset, -1); 1288 1455 if (error) 1289 1456 return error; 1290 1457 error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, ··· 1303 1472 return error; 1304 1473 } 1305 1474 1306 - /* 1307 - * The extent shifting code works on extent granularity. So, if 1308 - * stop_fsb is not the starting block of extent, we need to split 1309 - * the extent at stop_fsb. 1310 - */ 1311 - if (direction == SHIFT_RIGHT) { 1312 - error = xfs_bmap_split_extent(ip, stop_fsb); 1313 - if (error) 1314 - return error; 1315 - } 1316 - 1317 - while (!error && !done) { 1318 - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, 1319 - &tp); 1320 - if (error) 1321 - break; 1322 - 1323 - xfs_ilock(ip, XFS_ILOCK_EXCL); 1324 - error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, 1325 - ip->i_gdquot, ip->i_pdquot, resblks, 0, 1326 - XFS_QMOPT_RES_REGBLKS); 1327 - if (error) 1328 - goto out_trans_cancel; 1329 - 1330 - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1331 - 1332 - xfs_defer_init(&dfops, &first_block); 1333 - 1334 - /* 1335 - * We are using the write transaction in which max 2 bmbt 1336 - * updates are allowed 1337 - */ 1338 - error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb, 1339 - &done, stop_fsb, &first_block, &dfops, 1340 - direction, XFS_BMAP_MAX_SHIFT_EXTENTS); 1341 - if (error) 1342 - goto out_bmap_cancel; 1343 - 1344 - error = xfs_defer_finish(&tp, &dfops); 1345 - if (error) 1346 - goto out_bmap_cancel; 1347 - 1348 - error = xfs_trans_commit(tp); 1349 - } 1350 - 1351 - return error; 1352 - 1353 - out_bmap_cancel: 1354 - xfs_defer_cancel(&dfops); 1355 - out_trans_cancel: 1356 - xfs_trans_cancel(tp); 1357 - return error; 1475 + return 0; 1358 1476 } 1359 1477 1360 1478 /* ··· 1324 1544 xfs_off_t offset, 1325 1545 xfs_off_t len) 1326 1546 { 1327 - int error; 1547 + struct xfs_mount *mp = ip->i_mount; 1548 + struct xfs_trans *tp; 1549 + int error; 1550 + struct xfs_defer_ops dfops; 1551 + xfs_fsblock_t first_block; 1552 + xfs_fileoff_t stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size); 1553 + xfs_fileoff_t next_fsb = XFS_B_TO_FSB(mp, offset + len); 1554 + xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len); 1555 + uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1556 + bool done = false; 1328 1557 1329 1558 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1559 + ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL)); 1560 + 1330 1561 trace_xfs_collapse_file_space(ip); 1331 1562 1332 1563 error = xfs_free_file_space(ip, offset, len); 1333 1564 if (error) 1334 1565 return error; 1335 1566 1336 - return xfs_shift_file_space(ip, offset, len, SHIFT_LEFT); 1567 + error = xfs_prepare_shift(ip, offset); 1568 + if (error) 1569 + return error; 1570 + 1571 + while (!error && !done) { 1572 + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, 1573 + &tp); 1574 + if (error) 1575 + break; 1576 + 1577 + xfs_ilock(ip, XFS_ILOCK_EXCL); 1578 + error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, 1579 + ip->i_gdquot, ip->i_pdquot, resblks, 0, 1580 + XFS_QMOPT_RES_REGBLKS); 1581 + if (error) 1582 + goto out_trans_cancel; 1583 + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1584 + 1585 + xfs_defer_init(&dfops, &first_block); 1586 + error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb, 1587 + &done, stop_fsb, &first_block, &dfops); 1588 + if (error) 1589 + goto out_bmap_cancel; 1590 + 1591 + error = xfs_defer_finish(&tp, &dfops); 1592 + if (error) 1593 + goto out_bmap_cancel; 1594 + error = xfs_trans_commit(tp); 1595 + } 1596 + 1597 + return error; 1598 + 1599 + out_bmap_cancel: 1600 + xfs_defer_cancel(&dfops); 1601 + out_trans_cancel: 1602 + xfs_trans_cancel(tp); 1603 + return error; 1337 1604 } 1338 1605 1339 1606 /* ··· 1401 1574 loff_t offset, 1402 1575 loff_t len) 1403 1576 { 1577 + struct xfs_mount *mp = ip->i_mount; 1578 + struct xfs_trans *tp; 1579 + int error; 1580 + struct xfs_defer_ops dfops; 1581 + xfs_fsblock_t first_block; 1582 + xfs_fileoff_t stop_fsb = XFS_B_TO_FSB(mp, offset); 1583 + xfs_fileoff_t next_fsb = NULLFSBLOCK; 1584 + xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len); 1585 + bool done = false; 1586 + 1404 1587 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1588 + ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL)); 1589 + 1405 1590 trace_xfs_insert_file_space(ip); 1406 1591 1407 - return xfs_shift_file_space(ip, offset, len, SHIFT_RIGHT); 1592 + error = xfs_prepare_shift(ip, offset); 1593 + if (error) 1594 + return error; 1595 + 1596 + /* 1597 + * The extent shifting code works on extent granularity. So, if stop_fsb 1598 + * is not the starting block of extent, we need to split the extent at 1599 + * stop_fsb. 1600 + */ 1601 + error = xfs_bmap_split_extent(ip, stop_fsb); 1602 + if (error) 1603 + return error; 1604 + 1605 + while (!error && !done) { 1606 + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, 1607 + &tp); 1608 + if (error) 1609 + break; 1610 + 1611 + xfs_ilock(ip, XFS_ILOCK_EXCL); 1612 + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1613 + xfs_defer_init(&dfops, &first_block); 1614 + error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb, 1615 + &done, stop_fsb, &first_block, &dfops); 1616 + if (error) 1617 + goto out_bmap_cancel; 1618 + 1619 + error = xfs_defer_finish(&tp, &dfops); 1620 + if (error) 1621 + goto out_bmap_cancel; 1622 + error = xfs_trans_commit(tp); 1623 + } 1624 + 1625 + return error; 1626 + 1627 + out_bmap_cancel: 1628 + xfs_defer_cancel(&dfops); 1629 + xfs_trans_cancel(tp); 1630 + return error; 1408 1631 } 1409 1632 1410 1633 /* ··· 1709 1832 xfs_filblks_t aforkblks = 0; 1710 1833 xfs_filblks_t taforkblks = 0; 1711 1834 xfs_extnum_t junk; 1712 - xfs_extnum_t nextents; 1713 1835 uint64_t tmp; 1714 1836 int error; 1715 1837 ··· 1783 1907 1784 1908 switch (ip->i_d.di_format) { 1785 1909 case XFS_DINODE_FMT_EXTENTS: 1786 - /* 1787 - * If the extents fit in the inode, fix the pointer. Otherwise 1788 - * it's already NULL or pointing to the extent. 1789 - */ 1790 - nextents = xfs_iext_count(&ip->i_df); 1791 - if (nextents <= XFS_INLINE_EXTS) 1792 - ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 1793 1910 (*src_log_flags) |= XFS_ILOG_DEXT; 1794 1911 break; 1795 1912 case XFS_DINODE_FMT_BTREE: ··· 1794 1925 1795 1926 switch (tip->i_d.di_format) { 1796 1927 case XFS_DINODE_FMT_EXTENTS: 1797 - /* 1798 - * If the extents fit in the inode, fix the pointer. Otherwise 1799 - * it's already NULL or pointing to the extent. 1800 - */ 1801 - nextents = xfs_iext_count(&tip->i_df); 1802 - if (nextents <= XFS_INLINE_EXTS) 1803 - tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; 1804 1928 (*target_log_flags) |= XFS_ILOG_DEXT; 1805 1929 break; 1806 1930 case XFS_DINODE_FMT_BTREE:

+7 -3

fs/xfs/xfs_bmap_util.h

··· 47 47 int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, 48 48 xfs_fileoff_t start_fsb, xfs_fileoff_t length); 49 49 50 - /* bmap to userspace formatter - copy to user & advance pointer */ 51 - typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *); 50 + struct kgetbmap { 51 + __s64 bmv_offset; /* file offset of segment in blocks */ 52 + __s64 bmv_block; /* starting block (64-bit daddr_t) */ 53 + __s64 bmv_length; /* length of segment, blocks */ 54 + __s32 bmv_oflags; /* output flags */ 55 + }; 52 56 int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv, 53 - xfs_bmap_format_t formatter, void *arg); 57 + struct kgetbmap *out); 54 58 55 59 /* functions in xfs_bmap.c that are only needed by xfs_bmap_util.c */ 56 60 int xfs_bmap_extsize_align(struct xfs_mount *mp, struct xfs_bmbt_irec *gotp,

+16

fs/xfs/xfs_buf.c

··· 42 42 #include "xfs_mount.h" 43 43 #include "xfs_trace.h" 44 44 #include "xfs_log.h" 45 + #include "xfs_errortag.h" 46 + #include "xfs_error.h" 45 47 46 48 static kmem_zone_t *xfs_buf_zone; 47 49 ··· 2130 2128 xfs_buf_terminate(void) 2131 2129 { 2132 2130 kmem_zone_destroy(xfs_buf_zone); 2131 + } 2132 + 2133 + void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) 2134 + { 2135 + /* 2136 + * Set the lru reference count to 0 based on the error injection tag. 2137 + * This allows userspace to disrupt buffer caching for debug/testing 2138 + * purposes. 2139 + */ 2140 + if (XFS_TEST_ERROR(false, bp->b_target->bt_mount, 2141 + XFS_ERRTAG_BUF_LRU_REF)) 2142 + lru_ref = 0; 2143 + 2144 + atomic_set(&bp->b_lru_ref, lru_ref); 2133 2145 }

+1 -4

fs/xfs/xfs_buf.h

··· 352 352 #define XFS_BUF_ADDR(bp) ((bp)->b_maps[0].bm_bn) 353 353 #define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_maps[0].bm_bn = (xfs_daddr_t)(bno)) 354 354 355 - static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) 356 - { 357 - atomic_set(&bp->b_lru_ref, lru_ref); 358 - } 355 + void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref); 359 356 360 357 static inline int xfs_buf_ispinned(struct xfs_buf *bp) 361 358 {

+5 -5

fs/xfs/xfs_dir2_readdir.c

··· 41 41 DT_FIFO, DT_SOCK, DT_LNK, DT_WHT, 42 42 }; 43 43 44 - static unsigned char 44 + unsigned char 45 45 xfs_dir3_get_dtype( 46 46 struct xfs_mount *mp, 47 47 uint8_t filetype) ··· 266 266 xfs_dablk_t next_ra; 267 267 xfs_dablk_t map_off; 268 268 xfs_dablk_t last_da; 269 - xfs_extnum_t idx; 269 + struct xfs_iext_cursor icur; 270 270 int ra_want; 271 271 int error = 0; 272 272 ··· 283 283 */ 284 284 last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET); 285 285 map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *cur_off)); 286 - if (!xfs_iext_lookup_extent(dp, ifp, map_off, &idx, &map)) 286 + if (!xfs_iext_lookup_extent(dp, ifp, map_off, &icur, &map)) 287 287 goto out; 288 288 if (map.br_startoff >= last_da) 289 289 goto out; ··· 311 311 if (next_ra >= last_da) 312 312 goto out_no_ra; 313 313 if (map.br_blockcount < geo->fsbcount && 314 - !xfs_iext_get_extent(ifp, ++idx, &map)) 314 + !xfs_iext_next_extent(ifp, &icur, &map)) 315 315 goto out_no_ra; 316 316 if (map.br_startoff >= last_da) 317 317 goto out_no_ra; ··· 334 334 ra_want -= geo->fsbcount; 335 335 next_ra += geo->fsbcount; 336 336 } 337 - if (!xfs_iext_get_extent(ifp, ++idx, &map)) { 337 + if (!xfs_iext_next_extent(ifp, &icur, &map)) { 338 338 *ra_blk = last_da; 339 339 break; 340 340 }

+2 -19

fs/xfs/xfs_dquot.c

··· 53 53 * otherwise by the lowest id first, see xfs_dqlock2. 54 54 */ 55 55 56 - #ifdef DEBUG 57 - xfs_buftarg_t *xfs_dqerror_target; 58 - int xfs_do_dqerror; 59 - int xfs_dqreq_num; 60 - int xfs_dqerror_mod = 33; 61 - #endif 62 - 63 56 struct kmem_zone *xfs_qm_dqtrxzone; 64 57 static struct kmem_zone *xfs_qm_dqzone; 65 58 ··· 696 703 xfs_dqid_t next_id = *id + 1; /* simple advance */ 697 704 uint lock_flags; 698 705 struct xfs_bmbt_irec got; 699 - xfs_extnum_t idx; 706 + struct xfs_iext_cursor cur; 700 707 xfs_fsblock_t start; 701 708 int error = 0; 702 709 ··· 720 727 return error; 721 728 } 722 729 723 - if (xfs_iext_lookup_extent(quotip, &quotip->i_df, start, &idx, &got)) { 730 + if (xfs_iext_lookup_extent(quotip, &quotip->i_df, start, &cur, &got)) { 724 731 /* contiguous chunk, bump startoff for the id calculation */ 725 732 if (got.br_startoff < start) 726 733 got.br_startoff = start; ··· 763 770 return -ESRCH; 764 771 } 765 772 766 - #ifdef DEBUG 767 - if (xfs_do_dqerror) { 768 - if ((xfs_dqerror_target == mp->m_ddev_targp) && 769 - (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { 770 - xfs_debug(mp, "Returning error in dqget"); 771 - return -EIO; 772 - } 773 - } 774 - 775 773 ASSERT(type == XFS_DQ_USER || 776 774 type == XFS_DQ_PROJ || 777 775 type == XFS_DQ_GROUP); ··· 770 786 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 771 787 ASSERT(xfs_inode_dquot(ip, type) == NULL); 772 788 } 773 - #endif 774 789 775 790 restart: 776 791 mutex_lock(&qi->qi_tree_lock);

+5 -1

fs/xfs/xfs_error.c

··· 21 21 #include "xfs_log_format.h" 22 22 #include "xfs_trans_resv.h" 23 23 #include "xfs_mount.h" 24 + #include "xfs_errortag.h" 24 25 #include "xfs_error.h" 25 26 #include "xfs_sysfs.h" 26 27 ··· 59 58 XFS_RANDOM_DROP_WRITES, 60 59 XFS_RANDOM_LOG_BAD_CRC, 61 60 XFS_RANDOM_LOG_ITEM_PIN, 61 + XFS_RANDOM_BUF_LRU_REF, 62 62 }; 63 63 64 64 struct xfs_errortag_attr { ··· 165 163 XFS_ERRORTAG_ATTR_RW(drop_writes, XFS_ERRTAG_DROP_WRITES); 166 164 XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC); 167 165 XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN); 166 + XFS_ERRORTAG_ATTR_RW(buf_lru_ref, XFS_ERRTAG_BUF_LRU_REF); 168 167 169 168 static struct attribute *xfs_errortag_attrs[] = { 170 169 XFS_ERRORTAG_ATTR_LIST(noerror), ··· 199 196 XFS_ERRORTAG_ATTR_LIST(drop_writes), 200 197 XFS_ERRORTAG_ATTR_LIST(log_bad_crc), 201 198 XFS_ERRORTAG_ATTR_LIST(log_item_pin), 199 + XFS_ERRORTAG_ATTR_LIST(buf_lru_ref), 202 200 NULL, 203 201 }; 204 202 205 - struct kobj_type xfs_errortag_ktype = { 203 + static struct kobj_type xfs_errortag_ktype = { 206 204 .release = xfs_sysfs_release, 207 205 .sysfs_ops = &xfs_errortag_sysfs_ops, 208 206 .default_attrs = xfs_errortag_attrs,

-81

fs/xfs/xfs_error.h

··· 63 63 } \ 64 64 } 65 65 66 - /* 67 - * error injection tags - the labels can be anything you want 68 - * but each tag should have its own unique number 69 - */ 70 - 71 - #define XFS_ERRTAG_NOERROR 0 72 - #define XFS_ERRTAG_IFLUSH_1 1 73 - #define XFS_ERRTAG_IFLUSH_2 2 74 - #define XFS_ERRTAG_IFLUSH_3 3 75 - #define XFS_ERRTAG_IFLUSH_4 4 76 - #define XFS_ERRTAG_IFLUSH_5 5 77 - #define XFS_ERRTAG_IFLUSH_6 6 78 - #define XFS_ERRTAG_DA_READ_BUF 7 79 - #define XFS_ERRTAG_BTREE_CHECK_LBLOCK 8 80 - #define XFS_ERRTAG_BTREE_CHECK_SBLOCK 9 81 - #define XFS_ERRTAG_ALLOC_READ_AGF 10 82 - #define XFS_ERRTAG_IALLOC_READ_AGI 11 83 - #define XFS_ERRTAG_ITOBP_INOTOBP 12 84 - #define XFS_ERRTAG_IUNLINK 13 85 - #define XFS_ERRTAG_IUNLINK_REMOVE 14 86 - #define XFS_ERRTAG_DIR_INO_VALIDATE 15 87 - #define XFS_ERRTAG_BULKSTAT_READ_CHUNK 16 88 - #define XFS_ERRTAG_IODONE_IOERR 17 89 - #define XFS_ERRTAG_STRATREAD_IOERR 18 90 - #define XFS_ERRTAG_STRATCMPL_IOERR 19 91 - #define XFS_ERRTAG_DIOWRITE_IOERR 20 92 - #define XFS_ERRTAG_BMAPIFORMAT 21 93 - #define XFS_ERRTAG_FREE_EXTENT 22 94 - #define XFS_ERRTAG_RMAP_FINISH_ONE 23 95 - #define XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE 24 96 - #define XFS_ERRTAG_REFCOUNT_FINISH_ONE 25 97 - #define XFS_ERRTAG_BMAP_FINISH_ONE 26 98 - #define XFS_ERRTAG_AG_RESV_CRITICAL 27 99 - /* 100 - * DEBUG mode instrumentation to test and/or trigger delayed allocation 101 - * block killing in the event of failed writes. When enabled, all 102 - * buffered writes are silenty dropped and handled as if they failed. 103 - * All delalloc blocks in the range of the write (including pre-existing 104 - * delalloc blocks!) are tossed as part of the write failure error 105 - * handling sequence. 106 - */ 107 - #define XFS_ERRTAG_DROP_WRITES 28 108 - #define XFS_ERRTAG_LOG_BAD_CRC 29 109 - #define XFS_ERRTAG_LOG_ITEM_PIN 30 110 - #define XFS_ERRTAG_MAX 31 111 - 112 - /* 113 - * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. 114 - */ 115 - #define XFS_RANDOM_DEFAULT 100 116 - #define XFS_RANDOM_IFLUSH_1 XFS_RANDOM_DEFAULT 117 - #define XFS_RANDOM_IFLUSH_2 XFS_RANDOM_DEFAULT 118 - #define XFS_RANDOM_IFLUSH_3 XFS_RANDOM_DEFAULT 119 - #define XFS_RANDOM_IFLUSH_4 XFS_RANDOM_DEFAULT 120 - #define XFS_RANDOM_IFLUSH_5 XFS_RANDOM_DEFAULT 121 - #define XFS_RANDOM_IFLUSH_6 XFS_RANDOM_DEFAULT 122 - #define XFS_RANDOM_DA_READ_BUF XFS_RANDOM_DEFAULT 123 - #define XFS_RANDOM_BTREE_CHECK_LBLOCK (XFS_RANDOM_DEFAULT/4) 124 - #define XFS_RANDOM_BTREE_CHECK_SBLOCK XFS_RANDOM_DEFAULT 125 - #define XFS_RANDOM_ALLOC_READ_AGF XFS_RANDOM_DEFAULT 126 - #define XFS_RANDOM_IALLOC_READ_AGI XFS_RANDOM_DEFAULT 127 - #define XFS_RANDOM_ITOBP_INOTOBP XFS_RANDOM_DEFAULT 128 - #define XFS_RANDOM_IUNLINK XFS_RANDOM_DEFAULT 129 - #define XFS_RANDOM_IUNLINK_REMOVE XFS_RANDOM_DEFAULT 130 - #define XFS_RANDOM_DIR_INO_VALIDATE XFS_RANDOM_DEFAULT 131 - #define XFS_RANDOM_BULKSTAT_READ_CHUNK XFS_RANDOM_DEFAULT 132 - #define XFS_RANDOM_IODONE_IOERR (XFS_RANDOM_DEFAULT/10) 133 - #define XFS_RANDOM_STRATREAD_IOERR (XFS_RANDOM_DEFAULT/10) 134 - #define XFS_RANDOM_STRATCMPL_IOERR (XFS_RANDOM_DEFAULT/10) 135 - #define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10) 136 - #define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT 137 - #define XFS_RANDOM_FREE_EXTENT 1 138 - #define XFS_RANDOM_RMAP_FINISH_ONE 1 139 - #define XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE 1 140 - #define XFS_RANDOM_REFCOUNT_FINISH_ONE 1 141 - #define XFS_RANDOM_BMAP_FINISH_ONE 1 142 - #define XFS_RANDOM_AG_RESV_CRITICAL 4 143 - #define XFS_RANDOM_DROP_WRITES 1 144 - #define XFS_RANDOM_LOG_BAD_CRC 1 145 - #define XFS_RANDOM_LOG_ITEM_PIN 1 146 - 147 66 #ifdef DEBUG 148 67 extern int xfs_errortag_init(struct xfs_mount *mp); 149 68 extern void xfs_errortag_del(struct xfs_mount *mp);

+1 -1

fs/xfs/xfs_file.c

··· 984 984 * point we can change the ->readdir prototype to include the 985 985 * buffer size. For now we use the current glibc buffer size. 986 986 */ 987 - bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); 987 + bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, ip->i_d.di_size); 988 988 989 989 return xfs_readdir(NULL, ip, ctx, bufsize); 990 990 }

+1 -1

fs/xfs/xfs_icache.c

··· 610 610 } else { 611 611 rcu_read_unlock(); 612 612 if (flags & XFS_IGET_INCORE) { 613 - error = -ENOENT; 613 + error = -ENODATA; 614 614 goto out_error_or_again; 615 615 } 616 616 XFS_STATS_INC(mp, xs_ig_missed);

+6 -27

fs/xfs/xfs_inode.c

··· 39 39 #include "xfs_ialloc.h" 40 40 #include "xfs_bmap.h" 41 41 #include "xfs_bmap_util.h" 42 + #include "xfs_errortag.h" 42 43 #include "xfs_error.h" 43 44 #include "xfs_quota.h" 44 45 #include "xfs_filestream.h" ··· 385 384 } 386 385 #endif 387 386 388 - #ifdef DEBUG 389 - int xfs_locked_n; 390 - int xfs_small_retries; 391 - int xfs_middle_retries; 392 - int xfs_lots_retries; 393 - int xfs_lock_delays; 394 - #endif 395 - 396 387 /* 397 388 * xfs_lockdep_subclass_ok() is only used in an ASSERT, so is only called when 398 389 * DEBUG or XFS_WARN is set. And MAX_LOCKDEP_SUBCLASSES is then only defined ··· 537 544 538 545 if ((attempts % 5) == 0) { 539 546 delay(1); /* Don't just spin the CPU */ 540 - #ifdef DEBUG 541 - xfs_lock_delays++; 542 - #endif 543 547 } 544 548 i = 0; 545 549 try_lock = 0; 546 550 goto again; 547 551 } 548 - 549 - #ifdef DEBUG 550 - if (attempts) { 551 - if (attempts < 5) xfs_small_retries++; 552 - else if (attempts < 100) xfs_middle_retries++; 553 - else xfs_lots_retries++; 554 - } else { 555 - xfs_locked_n++; 556 - } 557 - #endif 558 552 } 559 553 560 554 /* ··· 747 767 xfs_inode_t *pip, 748 768 umode_t mode, 749 769 xfs_nlink_t nlink, 750 - xfs_dev_t rdev, 770 + dev_t rdev, 751 771 prid_t prid, 752 772 int okalloc, 753 773 xfs_buf_t **ialloc_context, ··· 799 819 set_nlink(inode, nlink); 800 820 ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid()); 801 821 ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid()); 822 + inode->i_rdev = rdev; 802 823 xfs_set_projid(ip, prid); 803 824 804 825 if (pip && XFS_INHERIT_GID(pip)) { ··· 848 867 case S_IFBLK: 849 868 case S_IFSOCK: 850 869 ip->i_d.di_format = XFS_DINODE_FMT_DEV; 851 - ip->i_df.if_u2.if_rdev = rdev; 852 870 ip->i_df.if_flags = 0; 853 871 flags |= XFS_ILOG_DEV; 854 872 break; ··· 913 933 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 914 934 ip->i_df.if_flags = XFS_IFEXTENTS; 915 935 ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; 916 - ip->i_df.if_u1.if_extents = NULL; 936 + ip->i_df.if_u1.if_root = NULL; 917 937 break; 918 938 default: 919 939 ASSERT(0); ··· 955 975 the inode. */ 956 976 umode_t mode, 957 977 xfs_nlink_t nlink, 958 - xfs_dev_t rdev, 978 + dev_t rdev, 959 979 prid_t prid, /* project id */ 960 980 int okalloc, /* ok to allocate new space */ 961 981 xfs_inode_t **ipp, /* pointer to inode; it will be ··· 1127 1147 xfs_inode_t *dp, 1128 1148 struct xfs_name *name, 1129 1149 umode_t mode, 1130 - xfs_dev_t rdev, 1150 + dev_t rdev, 1131 1151 xfs_inode_t **ipp) 1132 1152 { 1133 1153 int is_dir = S_ISDIR(mode); ··· 1163 1183 return error; 1164 1184 1165 1185 if (is_dir) { 1166 - rdev = 0; 1167 1186 resblks = XFS_MKDIR_SPACE_RES(mp, name->len); 1168 1187 tres = &M_RES(mp)->tr_mkdir; 1169 1188 } else {

+2 -2

fs/xfs/xfs_inode.h

··· 391 391 int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, 392 392 struct xfs_inode **ipp, struct xfs_name *ci_name); 393 393 int xfs_create(struct xfs_inode *dp, struct xfs_name *name, 394 - umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp); 394 + umode_t mode, dev_t rdev, struct xfs_inode **ipp); 395 395 int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry, 396 396 umode_t mode, struct xfs_inode **ipp); 397 397 int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, ··· 428 428 xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); 429 429 430 430 int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t, 431 - xfs_nlink_t, xfs_dev_t, prid_t, int, 431 + xfs_nlink_t, dev_t, prid_t, int, 432 432 struct xfs_inode **, int *); 433 433 434 434 /* from xfs_file.c */

+7 -22

fs/xfs/xfs_inode_item.c

··· 72 72 break; 73 73 74 74 case XFS_DINODE_FMT_DEV: 75 - case XFS_DINODE_FMT_UUID: 76 75 break; 77 76 default: 78 77 ASSERT(0); ··· 155 156 switch (ip->i_d.di_format) { 156 157 case XFS_DINODE_FMT_EXTENTS: 157 158 iip->ili_fields &= 158 - ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 159 - XFS_ILOG_DEV | XFS_ILOG_UUID); 159 + ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV); 160 160 161 161 if ((iip->ili_fields & XFS_ILOG_DEXT) && 162 162 ip->i_d.di_nextents > 0 && 163 163 ip->i_df.if_bytes > 0) { 164 164 struct xfs_bmbt_rec *p; 165 165 166 - ASSERT(ip->i_df.if_u1.if_extents != NULL); 167 166 ASSERT(xfs_iext_count(&ip->i_df) > 0); 168 167 169 168 p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT); ··· 178 181 break; 179 182 case XFS_DINODE_FMT_BTREE: 180 183 iip->ili_fields &= 181 - ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | 182 - XFS_ILOG_DEV | XFS_ILOG_UUID); 184 + ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV); 183 185 184 186 if ((iip->ili_fields & XFS_ILOG_DBROOT) && 185 187 ip->i_df.if_broot_bytes > 0) { ··· 196 200 break; 197 201 case XFS_DINODE_FMT_LOCAL: 198 202 iip->ili_fields &= 199 - ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | 200 - XFS_ILOG_DEV | XFS_ILOG_UUID); 203 + ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV); 201 204 if ((iip->ili_fields & XFS_ILOG_DDATA) && 202 205 ip->i_df.if_bytes > 0) { 203 206 /* ··· 219 224 break; 220 225 case XFS_DINODE_FMT_DEV: 221 226 iip->ili_fields &= 222 - ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 223 - XFS_ILOG_DEXT | XFS_ILOG_UUID); 227 + ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT); 224 228 if (iip->ili_fields & XFS_ILOG_DEV) 225 - ilf->ilf_u.ilfu_rdev = ip->i_df.if_u2.if_rdev; 226 - break; 227 - case XFS_DINODE_FMT_UUID: 228 - iip->ili_fields &= 229 - ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 230 - XFS_ILOG_DEXT | XFS_ILOG_DEV); 231 - if (iip->ili_fields & XFS_ILOG_UUID) 232 - ilf->ilf_u.ilfu_uuid = ip->i_df.if_u2.if_uuid; 229 + ilf->ilf_u.ilfu_rdev = sysv_encode_dev(VFS_I(ip)->i_rdev); 233 230 break; 234 231 default: 235 232 ASSERT(0); ··· 251 264 252 265 ASSERT(xfs_iext_count(ip->i_afp) == 253 266 ip->i_d.di_anextents); 254 - ASSERT(ip->i_afp->if_u1.if_extents != NULL); 255 267 256 268 p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT); 257 269 data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK); ··· 427 441 ilf->ilf_dsize = 0; 428 442 ilf->ilf_asize = 0; 429 443 ilf->ilf_pad = 0; 430 - uuid_copy(&ilf->ilf_u.ilfu_uuid, &uuid_null); 444 + memset(&ilf->ilf_u, 0, sizeof(ilf->ilf_u)); 431 445 432 446 xlog_finish_iovec(lv, vecp, sizeof(*ilf)); 433 447 ··· 878 892 in_f->ilf_asize = in_f32->ilf_asize; 879 893 in_f->ilf_dsize = in_f32->ilf_dsize; 880 894 in_f->ilf_ino = in_f32->ilf_ino; 881 - /* copy biggest field of ilf_u */ 882 - uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid); 895 + memcpy(&in_f->ilf_u, &in_f32->ilf_u, sizeof(in_f->ilf_u)); 883 896 in_f->ilf_blkno = in_f32->ilf_blkno; 884 897 in_f->ilf_len = in_f32->ilf_len; 885 898 in_f->ilf_boffset = in_f32->ilf_boffset;

+1 -1

fs/xfs/xfs_inode_item.h

··· 48 48 extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *); 49 49 extern void xfs_iflush_abort(struct xfs_inode *, bool); 50 50 extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, 51 - xfs_inode_log_format_t *); 51 + struct xfs_inode_log_format *); 52 52 53 53 extern struct kmem_zone *xfs_ili_zone; 54 54

+94 -66

fs/xfs/xfs_ioctl.c

··· 44 44 #include "xfs_btree.h" 45 45 #include <linux/fsmap.h> 46 46 #include "xfs_fsmap.h" 47 + #include "scrub/xfs_scrub.h" 47 48 48 49 #include <linux/capability.h> 49 50 #include <linux/cred.h> ··· 311 310 int 312 311 xfs_set_dmattrs( 313 312 xfs_inode_t *ip, 314 - u_int evmask, 315 - u_int16_t state) 313 + uint evmask, 314 + uint16_t state) 316 315 { 317 316 xfs_mount_t *mp = ip->i_mount; 318 317 xfs_trans_t *tp; ··· 1202 1201 * 8. for non-realtime files, the extent size hint must be limited 1203 1202 * to half the AG size to avoid alignment extending the extent beyond the 1204 1203 * limits of the AG. 1204 + * 1205 + * Please keep this function in sync with xfs_scrub_inode_extsize. 1205 1206 */ 1206 1207 static int 1207 1208 xfs_ioctl_setattr_check_extsize( ··· 1260 1257 * 5. Extent size must be a multiple of the appropriate block size. 1261 1258 * 6. The extent size hint must be limited to half the AG size to avoid 1262 1259 * alignment extending the extent beyond the limits of the AG. 1260 + * 1261 + * Please keep this function in sync with xfs_scrub_inode_cowextsize. 1263 1262 */ 1264 1263 static int 1265 1264 xfs_ioctl_setattr_check_cowextsize( ··· 1545 1540 return error; 1546 1541 } 1547 1542 1548 - STATIC int 1549 - xfs_getbmap_format(void **ap, struct getbmapx *bmv) 1543 + static bool 1544 + xfs_getbmap_format( 1545 + struct kgetbmap *p, 1546 + struct getbmapx __user *u, 1547 + size_t recsize) 1550 1548 { 1551 - struct getbmap __user *base = (struct getbmap __user *)*ap; 1552 - 1553 - /* copy only getbmap portion (not getbmapx) */ 1554 - if (copy_to_user(base, bmv, sizeof(struct getbmap))) 1555 - return -EFAULT; 1556 - 1557 - *ap += sizeof(struct getbmap); 1558 - return 0; 1549 + if (put_user(p->bmv_offset, &u->bmv_offset) || 1550 + put_user(p->bmv_block, &u->bmv_block) || 1551 + put_user(p->bmv_length, &u->bmv_length) || 1552 + put_user(0, &u->bmv_count) || 1553 + put_user(0, &u->bmv_entries)) 1554 + return false; 1555 + if (recsize < sizeof(struct getbmapx)) 1556 + return true; 1557 + if (put_user(0, &u->bmv_iflags) || 1558 + put_user(p->bmv_oflags, &u->bmv_oflags) || 1559 + put_user(0, &u->bmv_unused1) || 1560 + put_user(0, &u->bmv_unused2)) 1561 + return false; 1562 + return true; 1559 1563 } 1560 1564 1561 1565 STATIC int ··· 1574 1560 void __user *arg) 1575 1561 { 1576 1562 struct getbmapx bmx = { 0 }; 1577 - int error; 1563 + struct kgetbmap *buf; 1564 + size_t recsize; 1565 + int error, i; 1578 1566 1579 - /* struct getbmap is a strict subset of struct getbmapx. */ 1580 - if (copy_from_user(&bmx, arg, offsetof(struct getbmapx, bmv_iflags))) 1567 + switch (cmd) { 1568 + case XFS_IOC_GETBMAPA: 1569 + bmx.bmv_iflags = BMV_IF_ATTRFORK; 1570 + /*FALLTHRU*/ 1571 + case XFS_IOC_GETBMAP: 1572 + if (file->f_mode & FMODE_NOCMTIME) 1573 + bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ; 1574 + /* struct getbmap is a strict subset of struct getbmapx. */ 1575 + recsize = sizeof(struct getbmap); 1576 + break; 1577 + case XFS_IOC_GETBMAPX: 1578 + recsize = sizeof(struct getbmapx); 1579 + break; 1580 + default: 1581 + return -EINVAL; 1582 + } 1583 + 1584 + if (copy_from_user(&bmx, arg, recsize)) 1581 1585 return -EFAULT; 1582 1586 1583 1587 if (bmx.bmv_count < 2) 1584 1588 return -EINVAL; 1589 + if (bmx.bmv_count > ULONG_MAX / recsize) 1590 + return -ENOMEM; 1585 1591 1586 - bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); 1587 - if (file->f_mode & FMODE_NOCMTIME) 1588 - bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ; 1592 + buf = kmem_zalloc_large(bmx.bmv_count * sizeof(*buf), 0); 1593 + if (!buf) 1594 + return -ENOMEM; 1589 1595 1590 - error = xfs_getbmap(XFS_I(file_inode(file)), &bmx, xfs_getbmap_format, 1591 - (__force struct getbmap *)arg+1); 1596 + error = xfs_getbmap(XFS_I(file_inode(file)), &bmx, buf); 1592 1597 if (error) 1593 - return error; 1598 + goto out_free_buf; 1594 1599 1595 - /* copy back header - only size of getbmap */ 1596 - if (copy_to_user(arg, &bmx, sizeof(struct getbmap))) 1597 - return -EFAULT; 1598 - return 0; 1599 - } 1600 + error = -EFAULT; 1601 + if (copy_to_user(arg, &bmx, recsize)) 1602 + goto out_free_buf; 1603 + arg += recsize; 1600 1604 1601 - STATIC int 1602 - xfs_getbmapx_format(void **ap, struct getbmapx *bmv) 1603 - { 1604 - struct getbmapx __user *base = (struct getbmapx __user *)*ap; 1605 + for (i = 0; i < bmx.bmv_entries; i++) { 1606 + if (!xfs_getbmap_format(buf + i, arg, recsize)) 1607 + goto out_free_buf; 1608 + arg += recsize; 1609 + } 1605 1610 1606 - if (copy_to_user(base, bmv, sizeof(struct getbmapx))) 1607 - return -EFAULT; 1608 - 1609 - *ap += sizeof(struct getbmapx); 1610 - return 0; 1611 - } 1612 - 1613 - STATIC int 1614 - xfs_ioc_getbmapx( 1615 - struct xfs_inode *ip, 1616 - void __user *arg) 1617 - { 1618 - struct getbmapx bmx; 1619 - int error; 1620 - 1621 - if (copy_from_user(&bmx, arg, sizeof(bmx))) 1622 - return -EFAULT; 1623 - 1624 - if (bmx.bmv_count < 2) 1625 - return -EINVAL; 1626 - 1627 - if (bmx.bmv_iflags & (~BMV_IF_VALID)) 1628 - return -EINVAL; 1629 - 1630 - error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format, 1631 - (__force struct getbmapx *)arg+1); 1632 - if (error) 1633 - return error; 1634 - 1635 - /* copy back header */ 1636 - if (copy_to_user(arg, &bmx, sizeof(struct getbmapx))) 1637 - return -EFAULT; 1638 - 1611 + error = 0; 1612 + out_free_buf: 1613 + kmem_free(buf); 1639 1614 return 0; 1640 1615 } 1641 1616 ··· 1701 1698 head.fmh_entries = xhead.fmh_entries; 1702 1699 head.fmh_oflags = xhead.fmh_oflags; 1703 1700 if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) 1701 + return -EFAULT; 1702 + 1703 + return 0; 1704 + } 1705 + 1706 + STATIC int 1707 + xfs_ioc_scrub_metadata( 1708 + struct xfs_inode *ip, 1709 + void __user *arg) 1710 + { 1711 + struct xfs_scrub_metadata scrub; 1712 + int error; 1713 + 1714 + if (!capable(CAP_SYS_ADMIN)) 1715 + return -EPERM; 1716 + 1717 + if (copy_from_user(&scrub, arg, sizeof(scrub))) 1718 + return -EFAULT; 1719 + 1720 + error = xfs_scrub_metadata(ip, &scrub); 1721 + if (error) 1722 + return error; 1723 + 1724 + if (copy_to_user(arg, &scrub, sizeof(scrub))) 1704 1725 return -EFAULT; 1705 1726 1706 1727 return 0; ··· 1905 1878 1906 1879 case XFS_IOC_GETBMAP: 1907 1880 case XFS_IOC_GETBMAPA: 1908 - return xfs_ioc_getbmap(filp, cmd, arg); 1909 - 1910 1881 case XFS_IOC_GETBMAPX: 1911 - return xfs_ioc_getbmapx(ip, arg); 1882 + return xfs_ioc_getbmap(filp, cmd, arg); 1912 1883 1913 1884 case FS_IOC_GETFSMAP: 1914 1885 return xfs_ioc_getfsmap(ip, arg); 1886 + 1887 + case XFS_IOC_SCRUB_METADATA: 1888 + return xfs_ioc_scrub_metadata(ip, arg); 1915 1889 1916 1890 case XFS_IOC_FD_TO_HANDLE: 1917 1891 case XFS_IOC_PATH_TO_HANDLE:

+2 -2

fs/xfs/xfs_ioctl.h

··· 86 86 extern int 87 87 xfs_set_dmattrs( 88 88 struct xfs_inode *ip, 89 - u_int evmask, 90 - u_int16_t state); 89 + uint evmask, 90 + uint16_t state); 91 91 92 92 #endif

+1

fs/xfs/xfs_ioctl32.c

··· 556 556 case XFS_IOC_ERROR_INJECTION: 557 557 case XFS_IOC_ERROR_CLEARALL: 558 558 case FS_IOC_GETFSMAP: 559 + case XFS_IOC_SCRUB_METADATA: 559 560 return xfs_file_ioctl(filp, cmd, p); 560 561 #ifndef BROKEN_X86_ALIGNMENT 561 562 /* These are handled fine if no alignment issues */

+9 -6

fs/xfs/xfs_iomap.c

··· 30 30 #include "xfs_bmap_btree.h" 31 31 #include "xfs_bmap.h" 32 32 #include "xfs_bmap_util.h" 33 + #include "xfs_errortag.h" 33 34 #include "xfs_error.h" 34 35 #include "xfs_trans.h" 35 36 #include "xfs_trans_space.h" ··· 390 389 struct xfs_inode *ip, 391 390 loff_t offset, 392 391 loff_t count, 393 - xfs_extnum_t idx) 392 + struct xfs_iext_cursor *icur) 394 393 { 395 394 struct xfs_mount *mp = ip->i_mount; 396 395 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); ··· 415 414 */ 416 415 if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) || 417 416 XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) || 418 - !xfs_iext_get_extent(ifp, idx - 1, &prev) || 417 + !xfs_iext_peek_prev_extent(ifp, icur, &prev) || 419 418 prev.br_startoff + prev.br_blockcount < offset_fsb) 420 419 return mp->m_writeio_blocks; 421 420 ··· 533 532 xfs_fileoff_t end_fsb; 534 533 int error = 0, eof = 0; 535 534 struct xfs_bmbt_irec got; 536 - xfs_extnum_t idx; 535 + struct xfs_iext_cursor icur; 537 536 xfs_fsblock_t prealloc_blocks = 0; 538 537 539 538 ASSERT(!XFS_IS_REALTIME_INODE(ip)); ··· 558 557 goto out_unlock; 559 558 } 560 559 561 - eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); 560 + eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got); 562 561 if (!eof && got.br_startoff <= offset_fsb) { 563 562 if (xfs_is_reflink_inode(ip)) { 564 563 bool shared; ··· 592 591 end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); 593 592 594 593 if (eof) { 595 - prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, idx); 594 + prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, 595 + &icur); 596 596 if (prealloc_blocks) { 597 597 xfs_extlen_t align; 598 598 xfs_off_t end_offset; ··· 615 613 616 614 retry: 617 615 error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, 618 - end_fsb - offset_fsb, prealloc_blocks, &got, &idx, eof); 616 + end_fsb - offset_fsb, prealloc_blocks, &got, &icur, 617 + eof); 619 618 switch (error) { 620 619 case 0: 621 620 break;

+21 -31

fs/xfs/xfs_iops.c

··· 160 160 if (S_ISCHR(mode) || S_ISBLK(mode)) { 161 161 if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) 162 162 return -EINVAL; 163 - rdev = sysv_encode_dev(rdev); 164 163 } else { 165 164 rdev = 0; 166 165 } ··· 534 535 case S_IFBLK: 535 536 case S_IFCHR: 536 537 stat->blksize = BLKDEV_IOSIZE; 537 - stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, 538 - sysv_minor(ip->i_df.if_u2.if_rdev)); 538 + stat->rdev = inode->i_rdev; 539 539 break; 540 540 default: 541 541 if (XFS_IS_REALTIME_INODE(ip)) { ··· 884 886 return error; 885 887 886 888 /* 887 - * We are going to log the inode size change in this transaction so 888 - * any previous writes that are beyond the on disk EOF and the new 889 - * EOF that have not been written out need to be written here. If we 890 - * do not write the data out, we expose ourselves to the null files 891 - * problem. Note that this includes any block zeroing we did above; 892 - * otherwise those blocks may not be zeroed after a crash. 893 - */ 894 - if (did_zeroing || 895 - (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) { 896 - error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 897 - ip->i_d.di_size, newsize); 898 - if (error) 899 - return error; 900 - } 901 - 902 - /* 903 889 * We've already locked out new page faults, so now we can safely remove 904 890 * pages from the page cache knowing they won't get refaulted until we 905 891 * drop the XFS_MMAP_EXCL lock after the extent manipulations are ··· 899 917 * user visible changes). There's not much we can do about this, except 900 918 * to hope that the caller sees ENOMEM and retries the truncate 901 919 * operation. 920 + * 921 + * And we update in-core i_size and truncate page cache beyond newsize 922 + * before writeback the [di_size, newsize] range, so we're guaranteed 923 + * not to write stale data past the new EOF on truncate down. 902 924 */ 903 925 truncate_setsize(inode, newsize); 926 + 927 + /* 928 + * We are going to log the inode size change in this transaction so 929 + * any previous writes that are beyond the on disk EOF and the new 930 + * EOF that have not been written out need to be written here. If we 931 + * do not write the data out, we expose ourselves to the null files 932 + * problem. Note that this includes any block zeroing we did above; 933 + * otherwise those blocks may not be zeroed after a crash. 934 + */ 935 + if (did_zeroing || 936 + (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) { 937 + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 938 + ip->i_d.di_size, newsize - 1); 939 + if (error) 940 + return error; 941 + } 904 942 905 943 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); 906 944 if (error) ··· 1232 1230 1233 1231 inode->i_uid = xfs_uid_to_kuid(ip->i_d.di_uid); 1234 1232 inode->i_gid = xfs_gid_to_kgid(ip->i_d.di_gid); 1235 - 1236 - switch (inode->i_mode & S_IFMT) { 1237 - case S_IFBLK: 1238 - case S_IFCHR: 1239 - inode->i_rdev = 1240 - MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, 1241 - sysv_minor(ip->i_df.if_u2.if_rdev)); 1242 - break; 1243 - default: 1244 - inode->i_rdev = 0; 1245 - break; 1246 - } 1247 1233 1248 1234 i_size_write(inode, ip->i_d.di_size); 1249 1235 xfs_diflags_to_iflags(inode, ip);

+1 -12

fs/xfs/xfs_itable.c

··· 31 31 #include "xfs_trace.h" 32 32 #include "xfs_icache.h" 33 33 34 - int 35 - xfs_internal_inum( 36 - xfs_mount_t *mp, 37 - xfs_ino_t ino) 38 - { 39 - return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino || 40 - (xfs_sb_version_hasquota(&mp->m_sb) && 41 - xfs_is_quota_inode(&mp->m_sb, ino))); 42 - } 43 - 44 34 /* 45 35 * Return stat information for one inode. 46 36 * Return 0 if ok, else errno. ··· 109 119 110 120 switch (dic->di_format) { 111 121 case XFS_DINODE_FMT_DEV: 112 - buf->bs_rdev = ip->i_df.if_u2.if_rdev; 122 + buf->bs_rdev = sysv_encode_dev(inode->i_rdev); 113 123 buf->bs_blksize = BLKDEV_IOSIZE; 114 124 buf->bs_blocks = 0; 115 125 break; 116 126 case XFS_DINODE_FMT_LOCAL: 117 - case XFS_DINODE_FMT_UUID: 118 127 buf->bs_rdev = 0; 119 128 buf->bs_blksize = mp->m_sb.sb_blocksize; 120 129 buf->bs_blocks = 0;

-2

fs/xfs/xfs_itable.h

··· 96 96 void __user *buffer, /* buffer with inode info */ 97 97 inumbers_fmt_pf formatter); 98 98 99 - int xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino); 100 - 101 99 #endif /* __XFS_ITABLE_H__ */

+9 -12

fs/xfs/xfs_linux.h

··· 142 142 #define SYNCHRONIZE() barrier() 143 143 #define __return_address __builtin_return_address(0) 144 144 145 + /* 146 + * Return the address of a label. Use barrier() so that the optimizer 147 + * won't reorder code to refactor the error jumpouts into a single 148 + * return, which throws off the reported address. 149 + */ 150 + #define __this_address ({ __label__ __here; __here: barrier(); &&__here; }) 151 + 145 152 #define XFS_PROJID_DEFAULT 0 146 153 147 154 #define MIN(a,b) (min(a,b)) ··· 250 243 #define ASSERT(expr) \ 251 244 (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) 252 245 253 - #ifndef STATIC 254 - # define STATIC noinline 255 - #endif 256 - 257 246 #else /* !DEBUG */ 258 247 259 248 #ifdef XFS_WARN ··· 257 254 #define ASSERT(expr) \ 258 255 (likely(expr) ? (void)0 : asswarn(#expr, __FILE__, __LINE__)) 259 256 260 - #ifndef STATIC 261 - # define STATIC static noinline 262 - #endif 263 - 264 257 #else /* !DEBUG && !XFS_WARN */ 265 258 266 259 #define ASSERT(expr) ((void)0) 267 260 268 - #ifndef STATIC 269 - # define STATIC static noinline 270 - #endif 271 - 272 261 #endif /* XFS_WARN */ 273 262 #endif /* DEBUG */ 263 + 264 + #define STATIC static noinline 274 265 275 266 #ifdef CONFIG_XFS_RT 276 267

+31 -2

fs/xfs/xfs_log.c

··· 22 22 #include "xfs_log_format.h" 23 23 #include "xfs_trans_resv.h" 24 24 #include "xfs_mount.h" 25 + #include "xfs_errortag.h" 25 26 #include "xfs_error.h" 26 27 #include "xfs_trans.h" 27 28 #include "xfs_trans_priv.h" ··· 609 608 xfs_daddr_t blk_offset, 610 609 int num_bblks) 611 610 { 611 + bool fatal = xfs_sb_version_hascrc(&mp->m_sb); 612 612 int error = 0; 613 613 int min_logfsbs; 614 614 ··· 661 659 XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks), 662 660 XFS_MAX_LOG_BYTES); 663 661 error = -EINVAL; 662 + } else if (mp->m_sb.sb_logsunit > 1 && 663 + mp->m_sb.sb_logsunit % mp->m_sb.sb_blocksize) { 664 + xfs_warn(mp, 665 + "log stripe unit %u bytes must be a multiple of block size", 666 + mp->m_sb.sb_logsunit); 667 + error = -EINVAL; 668 + fatal = true; 664 669 } 665 670 if (error) { 666 - if (xfs_sb_version_hascrc(&mp->m_sb)) { 671 + /* 672 + * Log check errors are always fatal on v5; or whenever bad 673 + * metadata leads to a crash. 674 + */ 675 + if (fatal) { 667 676 xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!"); 668 677 ASSERT(0); 669 678 goto out_free_log; ··· 757 744 { 758 745 int error = 0; 759 746 bool readonly = (mp->m_flags & XFS_MOUNT_RDONLY); 747 + bool recovered = mp->m_log->l_flags & XLOG_RECOVERY_NEEDED; 760 748 761 749 if (mp->m_flags & XFS_MOUNT_NORECOVERY) { 762 750 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); ··· 793 779 xfs_log_work_queue(mp); 794 780 mp->m_super->s_flags &= ~MS_ACTIVE; 795 781 evict_inodes(mp->m_super); 782 + 783 + /* 784 + * Drain the buffer LRU after log recovery. This is required for v4 785 + * filesystems to avoid leaving around buffers with NULL verifier ops, 786 + * but we do it unconditionally to make sure we're always in a clean 787 + * cache state after mount. 788 + * 789 + * Don't push in the error case because the AIL may have pending intents 790 + * that aren't removed until recovery is cancelled. 791 + */ 792 + if (!error && recovered) { 793 + xfs_log_force(mp, XFS_LOG_SYNC); 794 + xfs_ail_push_all_sync(mp->m_ail); 795 + } 796 + xfs_wait_buftarg(mp->m_ddev_targp); 796 797 797 798 if (readonly) 798 799 mp->m_flags |= XFS_MOUNT_RDONLY; ··· 3763 3734 * one of the iclogs. This uses backup pointers stored in a different 3764 3735 * part of the log in case we trash the log structure. 3765 3736 */ 3766 - void 3737 + STATIC void 3767 3738 xlog_verify_dest_ptr( 3768 3739 struct xlog *log, 3769 3740 void *ptr)

+32 -30

fs/xfs/xfs_log_recover.c

··· 85 85 */ 86 86 87 87 /* 88 - * Verify the given count of basic blocks is valid number of blocks 89 - * to specify for an operation involving the given XFS log buffer. 90 - * Returns nonzero if the count is valid, 0 otherwise. 88 + * Verify the log-relative block number and length in basic blocks are valid for 89 + * an operation involving the given XFS log buffer. Returns true if the fields 90 + * are valid, false otherwise. 91 91 */ 92 - 93 - static inline int 94 - xlog_buf_bbcount_valid( 92 + static inline bool 93 + xlog_verify_bp( 95 94 struct xlog *log, 95 + xfs_daddr_t blk_no, 96 96 int bbcount) 97 97 { 98 - return bbcount > 0 && bbcount <= log->l_logBBsize; 98 + if (blk_no < 0 || blk_no >= log->l_logBBsize) 99 + return false; 100 + if (bbcount <= 0 || (blk_no + bbcount) > log->l_logBBsize) 101 + return false; 102 + return true; 99 103 } 100 104 101 105 /* ··· 114 110 { 115 111 struct xfs_buf *bp; 116 112 117 - if (!xlog_buf_bbcount_valid(log, nbblks)) { 113 + /* 114 + * Pass log block 0 since we don't have an addr yet, buffer will be 115 + * verified on read. 116 + */ 117 + if (!xlog_verify_bp(log, 0, nbblks)) { 118 118 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 119 119 nbblks); 120 120 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); ··· 188 180 { 189 181 int error; 190 182 191 - if (!xlog_buf_bbcount_valid(log, nbblks)) { 192 - xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 193 - nbblks); 183 + if (!xlog_verify_bp(log, blk_no, nbblks)) { 184 + xfs_warn(log->l_mp, 185 + "Invalid log block/length (0x%llx, 0x%x) for buffer", 186 + blk_no, nbblks); 194 187 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 195 188 return -EFSCORRUPTED; 196 189 } ··· 274 265 { 275 266 int error; 276 267 277 - if (!xlog_buf_bbcount_valid(log, nbblks)) { 278 - xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 279 - nbblks); 268 + if (!xlog_verify_bp(log, blk_no, nbblks)) { 269 + xfs_warn(log->l_mp, 270 + "Invalid log block/length (0x%llx, 0x%x) for buffer", 271 + blk_no, nbblks); 280 272 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 281 273 return -EFSCORRUPTED; 282 274 } ··· 763 753 * in the in-core log. The following number can be made tighter if 764 754 * we actually look at the block size of the filesystem. 765 755 */ 766 - num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); 756 + num_scan_bblks = min_t(int, log_bbnum, XLOG_TOTAL_REC_SHIFT(log)); 767 757 if (head_blk >= num_scan_bblks) { 768 758 /* 769 759 * We are guaranteed that the entire check can be performed ··· 2985 2975 struct xlog_recover_item *item, 2986 2976 xfs_lsn_t current_lsn) 2987 2977 { 2988 - xfs_inode_log_format_t *in_f; 2978 + struct xfs_inode_log_format *in_f; 2989 2979 xfs_mount_t *mp = log->l_mp; 2990 2980 xfs_buf_t *bp; 2991 2981 xfs_dinode_t *dip; ··· 2999 2989 uint isize; 3000 2990 int need_free = 0; 3001 2991 3002 - if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { 2992 + if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { 3003 2993 in_f = item->ri_buf[0].i_addr; 3004 2994 } else { 3005 - in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP); 2995 + in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), KM_SLEEP); 3006 2996 need_free = 1; 3007 2997 error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); 3008 2998 if (error) ··· 3173 3163 } 3174 3164 3175 3165 fields = in_f->ilf_fields; 3176 - switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) { 3177 - case XFS_ILOG_DEV: 3166 + if (fields & XFS_ILOG_DEV) 3178 3167 xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); 3179 - break; 3180 - case XFS_ILOG_UUID: 3181 - memcpy(XFS_DFORK_DPTR(dip), 3182 - &in_f->ilf_u.ilfu_uuid, 3183 - sizeof(uuid_t)); 3184 - break; 3185 - } 3186 3168 3187 3169 if (in_f->ilf_size == 2) 3188 3170 goto out_owner_change; ··· 4299 4297 char *dp, 4300 4298 int len) 4301 4299 { 4302 - xfs_inode_log_format_t *in_f; /* any will do */ 4300 + struct xfs_inode_log_format *in_f; /* any will do */ 4303 4301 xlog_recover_item_t *item; 4304 4302 char *ptr; 4305 4303 ··· 4333 4331 4334 4332 ptr = kmem_alloc(len, KM_SLEEP); 4335 4333 memcpy(ptr, dp, len); 4336 - in_f = (xfs_inode_log_format_t *)ptr; 4334 + in_f = (struct xfs_inode_log_format *)ptr; 4337 4335 4338 4336 /* take the tail entry */ 4339 4337 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); ··· 5825 5823 * Read all of the agf and agi counters and check that they 5826 5824 * are consistent with the superblock counters. 5827 5825 */ 5828 - void 5826 + STATIC void 5829 5827 xlog_recover_check_summary( 5830 5828 struct xlog *log) 5831 5829 {

+13 -2

fs/xfs/xfs_mount.c

··· 1022 1022 xfs_rtunmount_inodes(mp); 1023 1023 out_rele_rip: 1024 1024 IRELE(rip); 1025 - cancel_delayed_work_sync(&mp->m_reclaim_work); 1026 - xfs_reclaim_inodes(mp, SYNC_WAIT); 1027 1025 /* Clean out dquots that might be in memory after quotacheck. */ 1028 1026 xfs_qm_unmount(mp); 1027 + /* 1028 + * Cancel all delayed reclaim work and reclaim the inodes directly. 1029 + * We have to do this /after/ rtunmount and qm_unmount because those 1030 + * two will have scheduled delayed reclaim for the rt/quota inodes. 1031 + * 1032 + * This is slightly different from the unmountfs call sequence 1033 + * because we could be tearing down a partially set up mount. In 1034 + * particular, if log_mount_finish fails we bail out without calling 1035 + * qm_unmount_quotas and therefore rely on qm_unmount to release the 1036 + * quota inodes. 1037 + */ 1038 + cancel_delayed_work_sync(&mp->m_reclaim_work); 1039 + xfs_reclaim_inodes(mp, SYNC_WAIT); 1029 1040 out_log_dealloc: 1030 1041 mp->m_flags |= XFS_MOUNT_UNMOUNTING; 1031 1042 xfs_log_mount_cancel(mp);

+52 -56

fs/xfs/xfs_reflink.c

··· 273 273 struct xfs_bmbt_irec got; 274 274 int error = 0; 275 275 bool eof = false, trimmed; 276 - xfs_extnum_t idx; 276 + struct xfs_iext_cursor icur; 277 277 278 278 /* 279 279 * Search the COW fork extent list first. This serves two purposes: ··· 284 284 * tree. 285 285 */ 286 286 287 - if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &idx, &got)) 287 + if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &icur, &got)) 288 288 eof = true; 289 289 if (!eof && got.br_startoff <= imap->br_startoff) { 290 290 trace_xfs_reflink_cow_found(ip, imap); ··· 312 312 return error; 313 313 314 314 error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, 315 - imap->br_blockcount, 0, &got, &idx, eof); 315 + imap->br_blockcount, 0, &got, &icur, eof); 316 316 if (error == -ENOSPC || error == -EDQUOT) 317 317 trace_xfs_reflink_cow_enospc(ip, imap); 318 318 if (error) ··· 353 353 xfs_off_t offset, 354 354 xfs_off_t count) 355 355 { 356 - struct xfs_bmbt_irec got; 357 - struct xfs_defer_ops dfops; 358 356 struct xfs_mount *mp = ip->i_mount; 359 - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 360 357 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 361 358 xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); 362 - xfs_extnum_t idx; 363 - bool found; 364 - int error = 0; 359 + xfs_filblks_t count_fsb = end_fsb - offset_fsb; 360 + struct xfs_bmbt_irec imap; 361 + struct xfs_defer_ops dfops; 362 + xfs_fsblock_t first_block = NULLFSBLOCK; 363 + int nimaps = 1, error = 0; 364 + 365 + ASSERT(count != 0); 365 366 366 367 xfs_ilock(ip, XFS_ILOCK_EXCL); 367 - 368 - /* Convert all the extents to real from unwritten. */ 369 - for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); 370 - found && got.br_startoff < end_fsb; 371 - found = xfs_iext_get_extent(ifp, ++idx, &got)) { 372 - error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb, 373 - end_fsb - offset_fsb, &dfops); 374 - if (error) 375 - break; 376 - } 377 - 378 - /* Finish up. */ 368 + error = xfs_bmapi_write(NULL, ip, offset_fsb, count_fsb, 369 + XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT | 370 + XFS_BMAPI_CONVERT_ONLY, &first_block, 0, &imap, &nimaps, 371 + &dfops); 379 372 xfs_iunlock(ip, XFS_ILOCK_EXCL); 380 373 return error; 381 374 } ··· 392 399 bool trimmed; 393 400 xfs_filblks_t resaligned; 394 401 xfs_extlen_t resblks = 0; 395 - xfs_extnum_t idx; 402 + struct xfs_iext_cursor icur; 396 403 397 404 retry: 398 405 ASSERT(xfs_is_reflink_inode(ip)); ··· 402 409 * Even if the extent is not shared we might have a preallocation for 403 410 * it in the COW fork. If so use it. 404 411 */ 405 - if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) && 412 + if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) && 406 413 got.br_startoff <= offset_fsb) { 407 414 *shared = true; 408 415 ··· 489 496 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 490 497 xfs_fileoff_t offset_fsb; 491 498 struct xfs_bmbt_irec got; 492 - xfs_extnum_t idx; 499 + struct xfs_iext_cursor icur; 493 500 494 501 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); 495 502 ASSERT(xfs_is_reflink_inode(ip)); 496 503 497 504 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 498 - if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) 505 + if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got)) 499 506 return false; 500 507 if (got.br_startoff > offset_fsb) 501 508 return false; ··· 517 524 { 518 525 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 519 526 struct xfs_bmbt_irec got; 520 - xfs_extnum_t idx; 527 + struct xfs_iext_cursor icur; 521 528 522 529 if (!xfs_is_reflink_inode(ip)) 523 530 return; 524 531 525 532 /* Find the extent in the CoW fork. */ 526 - if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) 533 + if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got)) 527 534 return; 528 535 529 536 /* This is the extent before; try sliding up one. */ 530 537 if (got.br_startoff < offset_fsb) { 531 - if (!xfs_iext_get_extent(ifp, idx + 1, &got)) 538 + if (!xfs_iext_next_extent(ifp, &icur, &got)) 532 539 return; 533 540 } 534 541 ··· 555 562 { 556 563 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 557 564 struct xfs_bmbt_irec got, del; 558 - xfs_extnum_t idx; 565 + struct xfs_iext_cursor icur; 559 566 xfs_fsblock_t firstfsb; 560 567 struct xfs_defer_ops dfops; 561 568 int error = 0; 562 569 563 570 if (!xfs_is_reflink_inode(ip)) 564 571 return 0; 565 - if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) 572 + if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) 566 573 return 0; 567 574 568 - while (got.br_startoff < end_fsb) { 575 + /* Walk backwards until we're out of the I/O range... */ 576 + while (got.br_startoff + got.br_blockcount > offset_fsb) { 569 577 del = got; 570 578 xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); 579 + 580 + /* Extent delete may have bumped ext forward */ 581 + if (!del.br_blockcount) { 582 + xfs_iext_prev(ifp, &icur); 583 + goto next_extent; 584 + } 585 + 571 586 trace_xfs_reflink_cancel_cow(ip, &del); 572 587 573 588 if (isnullstartblock(del.br_startblock)) { 574 589 error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK, 575 - &idx, &got, &del); 590 + &icur, &got, &del); 576 591 if (error) 577 592 break; 578 593 } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { ··· 611 610 } 612 611 613 612 /* Remove the mapping from the CoW fork. */ 614 - xfs_bmap_del_extent_cow(ip, &idx, &got, &del); 613 + xfs_bmap_del_extent_cow(ip, &icur, &got, &del); 615 614 } 616 - 617 - if (!xfs_iext_get_extent(ifp, ++idx, &got)) 615 + next_extent: 616 + if (!xfs_iext_get_extent(ifp, &icur, &got)) 618 617 break; 619 618 } 620 619 ··· 699 698 int error; 700 699 unsigned int resblks; 701 700 xfs_filblks_t rlen; 702 - xfs_extnum_t idx; 701 + struct xfs_iext_cursor icur; 703 702 704 703 trace_xfs_reflink_end_cow(ip, offset, count); 705 704 ··· 734 733 xfs_ilock(ip, XFS_ILOCK_EXCL); 735 734 xfs_trans_ijoin(tp, ip, 0); 736 735 737 - /* If there is a hole at end_fsb - 1 go to the previous extent */ 738 - if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) || 739 - got.br_startoff > end_fsb) { 740 - /* 741 - * In case of racing, overlapping AIO writes no COW extents 742 - * might be left by the time I/O completes for the loser of 743 - * the race. In that case we are done. 744 - */ 745 - if (idx <= 0) 746 - goto out_cancel; 747 - xfs_iext_get_extent(ifp, --idx, &got); 748 - } 736 + /* 737 + * In case of racing, overlapping AIO writes no COW extents might be 738 + * left by the time I/O completes for the loser of the race. In that 739 + * case we are done. 740 + */ 741 + if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) 742 + goto out_cancel; 749 743 750 744 /* Walk backwards until we're out of the I/O range... */ 751 745 while (got.br_startoff + got.br_blockcount > offset_fsb) { 752 746 del = got; 753 747 xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); 754 748 755 - /* Extent delete may have bumped idx forward */ 749 + /* Extent delete may have bumped ext forward */ 756 750 if (!del.br_blockcount) { 757 - idx--; 751 + xfs_iext_prev(ifp, &icur); 758 752 goto next_extent; 759 753 } 760 754 ··· 761 765 * allocated but have not yet been involved in a write. 762 766 */ 763 767 if (got.br_state == XFS_EXT_UNWRITTEN) { 764 - idx--; 768 + xfs_iext_prev(ifp, &icur); 765 769 goto next_extent; 766 770 } 767 771 ··· 792 796 goto out_defer; 793 797 794 798 /* Remove the mapping from the CoW fork. */ 795 - xfs_bmap_del_extent_cow(ip, &idx, &got, &del); 799 + xfs_bmap_del_extent_cow(ip, &icur, &got, &del); 796 800 797 801 xfs_defer_ijoin(&dfops, ip); 798 802 error = xfs_defer_finish(&tp, &dfops); 799 803 if (error) 800 804 goto out_defer; 801 805 next_extent: 802 - if (!xfs_iext_get_extent(ifp, idx, &got)) 806 + if (!xfs_iext_get_extent(ifp, &icur, &got)) 803 807 break; 804 808 } 805 809 ··· 1429 1433 xfs_extlen_t aglen; 1430 1434 xfs_agblock_t rbno; 1431 1435 xfs_extlen_t rlen; 1432 - xfs_extnum_t idx; 1436 + struct xfs_iext_cursor icur; 1433 1437 bool found; 1434 1438 int error; 1435 1439 ··· 1441 1445 } 1442 1446 1443 1447 *has_shared = false; 1444 - found = xfs_iext_lookup_extent(ip, ifp, 0, &idx, &got); 1448 + found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got); 1445 1449 while (found) { 1446 1450 if (isnullstartblock(got.br_startblock) || 1447 1451 got.br_state != XFS_EXT_NORM) ··· 1460 1464 return 0; 1461 1465 } 1462 1466 next: 1463 - found = xfs_iext_get_extent(ifp, ++idx, &got); 1467 + found = xfs_iext_next_extent(ifp, &icur, &got); 1464 1468 } 1465 1469 1466 1470 return 0;

+2

fs/xfs/xfs_rtalloc.h

··· 138 138 int xfs_rtalloc_query_all(struct xfs_trans *tp, 139 139 xfs_rtalloc_query_range_fn fn, 140 140 void *priv); 141 + bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); 141 142 #else 142 143 # define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS) 143 144 # define xfs_rtfree_extent(t,b,l) (ENOSYS) ··· 147 146 # define xfs_rtalloc_query_range(t,l,h,f,p) (ENOSYS) 148 147 # define xfs_rtalloc_query_all(t,f,p) (ENOSYS) 149 148 # define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS) 149 + # define xfs_verify_rtbno(m, r) (false) 150 150 static inline int /* error */ 151 151 xfs_rtmount_init( 152 152 xfs_mount_t *mp) /* file system mount structure */

+15 -49

fs/xfs/xfs_trace.h

··· 218 218 __entry->bt_before) 219 219 ); 220 220 221 - TRACE_EVENT(xfs_iext_insert, 222 - TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, 223 - struct xfs_bmbt_irec *r, int state, unsigned long caller_ip), 224 - TP_ARGS(ip, idx, r, state, caller_ip), 225 - TP_STRUCT__entry( 226 - __field(dev_t, dev) 227 - __field(xfs_ino_t, ino) 228 - __field(xfs_extnum_t, idx) 229 - __field(xfs_fileoff_t, startoff) 230 - __field(xfs_fsblock_t, startblock) 231 - __field(xfs_filblks_t, blockcount) 232 - __field(xfs_exntst_t, state) 233 - __field(int, bmap_state) 234 - __field(unsigned long, caller_ip) 235 - ), 236 - TP_fast_assign( 237 - __entry->dev = VFS_I(ip)->i_sb->s_dev; 238 - __entry->ino = ip->i_ino; 239 - __entry->idx = idx; 240 - __entry->startoff = r->br_startoff; 241 - __entry->startblock = r->br_startblock; 242 - __entry->blockcount = r->br_blockcount; 243 - __entry->state = r->br_state; 244 - __entry->bmap_state = state; 245 - __entry->caller_ip = caller_ip; 246 - ), 247 - TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " 248 - "offset %lld block %lld count %lld flag %d caller %ps", 249 - MAJOR(__entry->dev), MINOR(__entry->dev), 250 - __entry->ino, 251 - __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), 252 - (long)__entry->idx, 253 - __entry->startoff, 254 - (int64_t)__entry->startblock, 255 - __entry->blockcount, 256 - __entry->state, 257 - (char *)__entry->caller_ip) 258 - ); 259 - 260 221 DECLARE_EVENT_CLASS(xfs_bmap_class, 261 - TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, 222 + TP_PROTO(struct xfs_inode *ip, struct xfs_iext_cursor *cur, int state, 262 223 unsigned long caller_ip), 263 - TP_ARGS(ip, idx, state, caller_ip), 224 + TP_ARGS(ip, cur, state, caller_ip), 264 225 TP_STRUCT__entry( 265 226 __field(dev_t, dev) 266 227 __field(xfs_ino_t, ino) 267 - __field(xfs_extnum_t, idx) 228 + __field(void *, leaf); 229 + __field(int, pos); 268 230 __field(xfs_fileoff_t, startoff) 269 231 __field(xfs_fsblock_t, startblock) 270 232 __field(xfs_filblks_t, blockcount) ··· 239 277 struct xfs_bmbt_irec r; 240 278 241 279 ifp = xfs_iext_state_to_fork(ip, state); 242 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r); 280 + xfs_iext_get_extent(ifp, cur, &r); 243 281 __entry->dev = VFS_I(ip)->i_sb->s_dev; 244 282 __entry->ino = ip->i_ino; 245 - __entry->idx = idx; 283 + __entry->leaf = cur->leaf; 284 + __entry->pos = cur->pos; 246 285 __entry->startoff = r.br_startoff; 247 286 __entry->startblock = r.br_startblock; 248 287 __entry->blockcount = r.br_blockcount; ··· 251 288 __entry->bmap_state = state; 252 289 __entry->caller_ip = caller_ip; 253 290 ), 254 - TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " 291 + TP_printk("dev %d:%d ino 0x%llx state %s cur 0x%p/%d " 255 292 "offset %lld block %lld count %lld flag %d caller %ps", 256 293 MAJOR(__entry->dev), MINOR(__entry->dev), 257 294 __entry->ino, 258 295 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), 259 - (long)__entry->idx, 296 + __entry->leaf, 297 + __entry->pos, 260 298 __entry->startoff, 261 299 (int64_t)__entry->startblock, 262 300 __entry->blockcount, ··· 267 303 268 304 #define DEFINE_BMAP_EVENT(name) \ 269 305 DEFINE_EVENT(xfs_bmap_class, name, \ 270 - TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \ 306 + TP_PROTO(struct xfs_inode *ip, struct xfs_iext_cursor *cur, int state, \ 271 307 unsigned long caller_ip), \ 272 - TP_ARGS(ip, idx, state, caller_ip)) 308 + TP_ARGS(ip, cur, state, caller_ip)) 309 + DEFINE_BMAP_EVENT(xfs_iext_insert); 273 310 DEFINE_BMAP_EVENT(xfs_iext_remove); 274 311 DEFINE_BMAP_EVENT(xfs_bmap_pre_update); 275 312 DEFINE_BMAP_EVENT(xfs_bmap_post_update); 276 - DEFINE_BMAP_EVENT(xfs_extlist); 313 + DEFINE_BMAP_EVENT(xfs_read_extent); 314 + DEFINE_BMAP_EVENT(xfs_write_extent); 277 315 278 316 DECLARE_EVENT_CLASS(xfs_buf_class, 279 317 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),

+19 -3

fs/xfs/xfs_trans_ail.c

··· 25 25 #include "xfs_trans.h" 26 26 #include "xfs_trans_priv.h" 27 27 #include "xfs_trace.h" 28 + #include "xfs_errortag.h" 28 29 #include "xfs_error.h" 29 30 #include "xfs_log.h" 30 31 ··· 515 514 current->flags |= PF_MEMALLOC; 516 515 set_freezable(); 517 516 518 - while (!kthread_should_stop()) { 517 + while (1) { 519 518 if (tout && tout <= 20) 520 - __set_current_state(TASK_KILLABLE); 519 + set_current_state(TASK_KILLABLE); 521 520 else 522 - __set_current_state(TASK_INTERRUPTIBLE); 521 + set_current_state(TASK_INTERRUPTIBLE); 522 + 523 + /* 524 + * Check kthread_should_stop() after we set the task state 525 + * to guarantee that we either see the stop bit and exit or 526 + * the task state is reset to runnable such that it's not 527 + * scheduled out indefinitely and detects the stop bit at 528 + * next iteration. 529 + * 530 + * A memory barrier is included in above task state set to 531 + * serialize again kthread_stop(). 532 + */ 533 + if (kthread_should_stop()) { 534 + __set_current_state(TASK_RUNNING); 535 + break; 536 + } 523 537 524 538 spin_lock(&ailp->xa_lock); 525 539