Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xfs: support the XFS_BTNUM_FINOBT free inode btree type

Define the AGI fields for the finobt root/level and add magic
numbers. Update the btree code to add support for the new
XFS_BTNUM_FINOBT inode btree.

The finobt root block is reserved immediately following the inobt
root block in the AG. Update XFS_PREALLOC_BLOCKS() to determine the
starting AG data block based on whether finobt support is enabled.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>

authored by

Brian Foster and committed by
Dave Chinner
aafc3c24 8e2c84df

+156 -30
+22 -14
fs/xfs/xfs_ag.h
··· 160 160 * still being referenced. 161 161 */ 162 162 __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS]; 163 - 163 + /* 164 + * This marks the end of logging region 1 and start of logging region 2. 165 + */ 164 166 uuid_t agi_uuid; /* uuid of filesystem */ 165 167 __be32 agi_crc; /* crc of agi sector */ 166 168 __be32 agi_pad32; 167 169 __be64 agi_lsn; /* last write sequence */ 170 + 171 + __be32 agi_free_root; /* root of the free inode btree */ 172 + __be32 agi_free_level;/* levels in free inode btree */ 168 173 169 174 /* structure must be padded to 64 bit alignment */ 170 175 } xfs_agi_t; 171 176 172 177 #define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc) 173 178 174 - #define XFS_AGI_MAGICNUM 0x00000001 175 - #define XFS_AGI_VERSIONNUM 0x00000002 176 - #define XFS_AGI_SEQNO 0x00000004 177 - #define XFS_AGI_LENGTH 0x00000008 178 - #define XFS_AGI_COUNT 0x00000010 179 - #define XFS_AGI_ROOT 0x00000020 180 - #define XFS_AGI_LEVEL 0x00000040 181 - #define XFS_AGI_FREECOUNT 0x00000080 182 - #define XFS_AGI_NEWINO 0x00000100 183 - #define XFS_AGI_DIRINO 0x00000200 184 - #define XFS_AGI_UNLINKED 0x00000400 185 - #define XFS_AGI_NUM_BITS 11 186 - #define XFS_AGI_ALL_BITS ((1 << XFS_AGI_NUM_BITS) - 1) 179 + #define XFS_AGI_MAGICNUM (1 << 0) 180 + #define XFS_AGI_VERSIONNUM (1 << 1) 181 + #define XFS_AGI_SEQNO (1 << 2) 182 + #define XFS_AGI_LENGTH (1 << 3) 183 + #define XFS_AGI_COUNT (1 << 4) 184 + #define XFS_AGI_ROOT (1 << 5) 185 + #define XFS_AGI_LEVEL (1 << 6) 186 + #define XFS_AGI_FREECOUNT (1 << 7) 187 + #define XFS_AGI_NEWINO (1 << 8) 188 + #define XFS_AGI_DIRINO (1 << 9) 189 + #define XFS_AGI_UNLINKED (1 << 10) 190 + #define XFS_AGI_NUM_BITS_R1 11 /* end of the 1st agi logging region */ 191 + #define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1) 192 + #define XFS_AGI_FREE_ROOT (1 << 11) 193 + #define XFS_AGI_FREE_LEVEL (1 << 12) 194 + #define XFS_AGI_NUM_BITS_R2 13 187 195 188 196 /* disk block (xfs_daddr_t) in the AG */ 189 197 #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log))
+4 -2
fs/xfs/xfs_btree.c
··· 43 43 * Btree magic numbers. 44 44 */ 45 45 static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { 46 - { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC }, 46 + { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, 47 + XFS_FIBT_MAGIC }, 47 48 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, 48 - XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC } 49 + XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC } 49 50 }; 50 51 #define xfs_btree_magic(cur) \ 51 52 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] ··· 1116 1115 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); 1117 1116 break; 1118 1117 case XFS_BTNUM_INO: 1118 + case XFS_BTNUM_FINO: 1119 1119 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); 1120 1120 break; 1121 1121 case XFS_BTNUM_BMAP:
+3
fs/xfs/xfs_btree.h
··· 62 62 #define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi) 63 63 #define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) 64 64 #define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) 65 + #define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi) 65 66 66 67 /* 67 68 * For logging record fields. ··· 93 92 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \ 94 93 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \ 95 94 case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \ 95 + case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \ 96 96 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 97 97 } \ 98 98 } while (0) ··· 107 105 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \ 108 106 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \ 109 107 case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ 108 + case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \ 110 109 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 111 110 } \ 112 111 } while (0)
+13 -1
fs/xfs/xfs_format.h
··· 202 202 */ 203 203 #define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ 204 204 #define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */ 205 + #define XFS_FIBT_MAGIC 0x46494254 /* 'FIBT' */ 206 + #define XFS_FIBT_CRC_MAGIC 0x46494233 /* 'FIB3' */ 205 207 206 208 typedef __uint64_t xfs_inofree_t; 207 209 #define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) ··· 246 244 * block numbers in the AG. 247 245 */ 248 246 #define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) 249 - #define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) 247 + #define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) 248 + 249 + /* 250 + * The first data block of an AG depends on whether the filesystem was formatted 251 + * with the finobt feature. If so, account for the finobt reserved root btree 252 + * block. 253 + */ 254 + #define XFS_PREALLOC_BLOCKS(mp) \ 255 + (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \ 256 + XFS_FIBT_BLOCK(mp) + 1 : \ 257 + XFS_IBT_BLOCK(mp) + 1) 250 258 251 259 252 260
+35 -9
fs/xfs/xfs_ialloc.c
··· 1488 1488 } 1489 1489 1490 1490 /* 1491 - * Log specified fields for the ag hdr (inode section) 1491 + * Log specified fields for the ag hdr (inode section). The growth of the agi 1492 + * structure over time requires that we interpret the buffer as two logical 1493 + * regions delineated by the end of the unlinked list. This is due to the size 1494 + * of the hash table and its location in the middle of the agi. 1495 + * 1496 + * For example, a request to log a field before agi_unlinked and a field after 1497 + * agi_unlinked could cause us to log the entire hash table and use an excessive 1498 + * amount of log space. To avoid this behavior, log the region up through 1499 + * agi_unlinked in one call and the region after agi_unlinked through the end of 1500 + * the structure in another. 1492 1501 */ 1493 1502 void 1494 1503 xfs_ialloc_log_agi( ··· 1520 1511 offsetof(xfs_agi_t, agi_newino), 1521 1512 offsetof(xfs_agi_t, agi_dirino), 1522 1513 offsetof(xfs_agi_t, agi_unlinked), 1514 + offsetof(xfs_agi_t, agi_free_root), 1515 + offsetof(xfs_agi_t, agi_free_level), 1523 1516 sizeof(xfs_agi_t) 1524 1517 }; 1525 1518 #ifdef DEBUG ··· 1530 1519 agi = XFS_BUF_TO_AGI(bp); 1531 1520 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); 1532 1521 #endif 1533 - /* 1534 - * Compute byte offsets for the first and last fields. 1535 - */ 1536 - xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last); 1537 - /* 1538 - * Log the allocation group inode header buffer. 1539 - */ 1522 + 1540 1523 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); 1541 - xfs_trans_log_buf(tp, bp, first, last); 1524 + 1525 + /* 1526 + * Compute byte offsets for the first and last fields in the first 1527 + * region and log the agi buffer. This only logs up through 1528 + * agi_unlinked. 1529 + */ 1530 + if (fields & XFS_AGI_ALL_BITS_R1) { 1531 + xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1, 1532 + &first, &last); 1533 + xfs_trans_log_buf(tp, bp, first, last); 1534 + } 1535 + 1536 + /* 1537 + * Mask off the bits in the first region and calculate the first and 1538 + * last field offsets for any bits in the second region. 1539 + */ 1540 + fields &= ~XFS_AGI_ALL_BITS_R1; 1541 + if (fields) { 1542 + xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2, 1543 + &first, &last); 1544 + xfs_trans_log_buf(tp, bp, first, last); 1545 + } 1542 1546 } 1543 1547 1544 1548 #ifdef DEBUG
+58 -2
fs/xfs/xfs_ialloc_btree.c
··· 67 67 xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); 68 68 } 69 69 70 + STATIC void 71 + xfs_finobt_set_root( 72 + struct xfs_btree_cur *cur, 73 + union xfs_btree_ptr *nptr, 74 + int inc) /* level change */ 75 + { 76 + struct xfs_buf *agbp = cur->bc_private.a.agbp; 77 + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 78 + 79 + agi->agi_free_root = nptr->s; 80 + be32_add_cpu(&agi->agi_free_level, inc); 81 + xfs_ialloc_log_agi(cur->bc_tp, agbp, 82 + XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL); 83 + } 84 + 70 85 STATIC int 71 86 xfs_inobt_alloc_block( 72 87 struct xfs_btree_cur *cur, ··· 189 174 ptr->s = agi->agi_root; 190 175 } 191 176 177 + STATIC void 178 + xfs_finobt_init_ptr_from_cur( 179 + struct xfs_btree_cur *cur, 180 + union xfs_btree_ptr *ptr) 181 + { 182 + struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); 183 + 184 + ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); 185 + ptr->s = agi->agi_free_root; 186 + } 187 + 192 188 STATIC __int64_t 193 189 xfs_inobt_key_diff( 194 190 struct xfs_btree_cur *cur, ··· 230 204 */ 231 205 switch (block->bb_magic) { 232 206 case cpu_to_be32(XFS_IBT_CRC_MAGIC): 207 + case cpu_to_be32(XFS_FIBT_CRC_MAGIC): 233 208 if (!xfs_sb_version_hascrc(&mp->m_sb)) 234 209 return false; 235 210 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) ··· 242 215 return false; 243 216 /* fall through */ 244 217 case cpu_to_be32(XFS_IBT_MAGIC): 218 + case cpu_to_be32(XFS_FIBT_MAGIC): 245 219 break; 246 220 default: 247 221 return 0; ··· 346 318 #endif 347 319 }; 348 320 321 + static const struct xfs_btree_ops xfs_finobt_ops = { 322 + .rec_len = sizeof(xfs_inobt_rec_t), 323 + .key_len = sizeof(xfs_inobt_key_t), 324 + 325 + .dup_cursor = xfs_inobt_dup_cursor, 326 + .set_root = xfs_finobt_set_root, 327 + .alloc_block = xfs_inobt_alloc_block, 328 + .free_block = xfs_inobt_free_block, 329 + .get_minrecs = xfs_inobt_get_minrecs, 330 + .get_maxrecs = xfs_inobt_get_maxrecs, 331 + .init_key_from_rec = xfs_inobt_init_key_from_rec, 332 + .init_rec_from_key = xfs_inobt_init_rec_from_key, 333 + .init_rec_from_cur = xfs_inobt_init_rec_from_cur, 334 + .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur, 335 + .key_diff = xfs_inobt_key_diff, 336 + .buf_ops = &xfs_inobt_buf_ops, 337 + #if defined(DEBUG) || defined(XFS_WARN) 338 + .keys_inorder = xfs_inobt_keys_inorder, 339 + .recs_inorder = xfs_inobt_recs_inorder, 340 + #endif 341 + }; 342 + 349 343 /* 350 344 * Allocate a new inode btree cursor. 351 345 */ ··· 386 336 387 337 cur->bc_tp = tp; 388 338 cur->bc_mp = mp; 389 - cur->bc_nlevels = be32_to_cpu(agi->agi_level); 390 339 cur->bc_btnum = btnum; 340 + if (btnum == XFS_BTNUM_INO) { 341 + cur->bc_nlevels = be32_to_cpu(agi->agi_level); 342 + cur->bc_ops = &xfs_inobt_ops; 343 + } else { 344 + cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); 345 + cur->bc_ops = &xfs_finobt_ops; 346 + } 347 + 391 348 cur->bc_blocklog = mp->m_sb.sb_blocklog; 392 349 393 - cur->bc_ops = &xfs_inobt_ops; 394 350 if (xfs_sb_version_hascrc(&mp->m_sb)) 395 351 cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; 396 352
+2
fs/xfs/xfs_log_recover.c
··· 2138 2138 bp->b_ops = &xfs_allocbt_buf_ops; 2139 2139 break; 2140 2140 case XFS_IBT_CRC_MAGIC: 2141 + case XFS_FIBT_CRC_MAGIC: 2141 2142 case XFS_IBT_MAGIC: 2143 + case XFS_FIBT_MAGIC: 2142 2144 bp->b_ops = &xfs_inobt_buf_ops; 2143 2145 break; 2144 2146 case XFS_BMAP_CRC_MAGIC:
+1
fs/xfs/xfs_stats.c
··· 59 59 { "abtc2", XFSSTAT_END_ABTC_V2 }, 60 60 { "bmbt2", XFSSTAT_END_BMBT_V2 }, 61 61 { "ibt2", XFSSTAT_END_IBT_V2 }, 62 + { "fibt2", XFSSTAT_END_FIBT_V2 }, 62 63 /* we print both series of quota information together */ 63 64 { "qm", XFSSTAT_END_QM }, 64 65 };
+17 -1
fs/xfs/xfs_stats.h
··· 183 183 __uint32_t xs_ibt_2_alloc; 184 184 __uint32_t xs_ibt_2_free; 185 185 __uint32_t xs_ibt_2_moves; 186 - #define XFSSTAT_END_XQMSTAT (XFSSTAT_END_IBT_V2+6) 186 + #define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2+15) 187 + __uint32_t xs_fibt_2_lookup; 188 + __uint32_t xs_fibt_2_compare; 189 + __uint32_t xs_fibt_2_insrec; 190 + __uint32_t xs_fibt_2_delrec; 191 + __uint32_t xs_fibt_2_newroot; 192 + __uint32_t xs_fibt_2_killroot; 193 + __uint32_t xs_fibt_2_increment; 194 + __uint32_t xs_fibt_2_decrement; 195 + __uint32_t xs_fibt_2_lshift; 196 + __uint32_t xs_fibt_2_rshift; 197 + __uint32_t xs_fibt_2_split; 198 + __uint32_t xs_fibt_2_join; 199 + __uint32_t xs_fibt_2_alloc; 200 + __uint32_t xs_fibt_2_free; 201 + __uint32_t xs_fibt_2_moves; 202 + #define XFSSTAT_END_XQMSTAT (XFSSTAT_END_FIBT_V2+6) 187 203 __uint32_t xs_qm_dqreclaims; 188 204 __uint32_t xs_qm_dqreclaim_misses; 189 205 __uint32_t xs_qm_dquot_dups;
+1 -1
fs/xfs/xfs_types.h
··· 134 134 135 135 typedef enum { 136 136 XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi, 137 - XFS_BTNUM_MAX 137 + XFS_BTNUM_FINOi, XFS_BTNUM_MAX 138 138 } xfs_btnum_t; 139 139 140 140 struct xfs_name {