···149149{150150 xfs_agblock_t bno;151151 xfs_extlen_t len;152152+ xfs_extlen_t diff;152153153154 /* Trim busy sections out of found extent */154155 xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);155156157157+ /*158158+ * If we have a largish extent that happens to start before min_agbno,159159+ * see if we can shift it into range...160160+ */161161+ if (bno < args->min_agbno && bno + len > args->min_agbno) {162162+ diff = args->min_agbno - bno;163163+ if (len > diff) {164164+ bno += diff;165165+ len -= diff;166166+ }167167+ }168168+156169 if (args->alignment > 1 && len >= args->minlen) {157170 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);158158- xfs_extlen_t diff = aligned_bno - bno;171171+172172+ diff = aligned_bno - bno;159173160174 *resbno = aligned_bno;161175 *reslen = diff >= len ? 0 : len - diff;···809795 * The good extent is closer than this one.810796 */811797 if (!dir) {798798+ if (*sbnoa > args->max_agbno)799799+ goto out_use_good;812800 if (*sbnoa >= args->agbno + gdiff)813801 goto out_use_good;814802 } else {803803+ if (*sbnoa < args->min_agbno)804804+ goto out_use_good;815805 if (*sbnoa <= args->agbno - gdiff)816806 goto out_use_good;817807 }···901883902884 dofirst = prandom_u32() & 1;903885#endif886886+887887+ /* handle unitialized agbno range so caller doesn't have to */888888+ if (!args->min_agbno && !args->max_agbno)889889+ args->max_agbno = args->mp->m_sb.sb_agblocks - 1;890890+ ASSERT(args->min_agbno <= args->max_agbno);891891+892892+ /* clamp agbno to the range if it's outside */893893+ if (args->agbno < args->min_agbno)894894+ args->agbno = args->min_agbno;895895+ if (args->agbno > args->max_agbno)896896+ args->agbno = args->max_agbno;904897905898restart:906899 bno_cur_lt = NULL;···1004975 xfs_alloc_compute_aligned(args, ltbno, ltlen,1005976 <bnoa, <lena);1006977 if (ltlena < args->minlen)978978+ continue;979979+ if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)1007980 continue;1008981 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);1009982 xfs_alloc_fix_len(args);···11271096 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);11281097 xfs_alloc_compute_aligned(args, ltbno, ltlen,11291098 <bnoa, <lena);11301130- if (ltlena >= args->minlen)10991099+ if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)11311100 break;11321101 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))11331102 goto error0;11341134- if (!i) {11031103+ if (!i || ltbnoa < args->min_agbno) {11351104 xfs_btree_del_cursor(bno_cur_lt,11361105 XFS_BTREE_NOERROR);11371106 bno_cur_lt = NULL;···11431112 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);11441113 xfs_alloc_compute_aligned(args, gtbno, gtlen,11451114 >bnoa, >lena);11461146- if (gtlena >= args->minlen)11151115+ if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)11471116 break;11481117 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))11491118 goto error0;11501150- if (!i) {11191119+ if (!i || gtbnoa > args->max_agbno) {11511120 xfs_btree_del_cursor(bno_cur_gt,11521121 XFS_BTREE_NOERROR);11531122 bno_cur_gt = NULL;···12471216 ASSERT(ltnew >= ltbno);12481217 ASSERT(ltnew + rlen <= ltbnoa + ltlena);12491218 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));12191219+ ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);12501220 args->agbno = ltnew;1251122112521222 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
+2
fs/xfs/libxfs/xfs_alloc.h
···112112 xfs_extlen_t total; /* total blocks needed in xaction */113113 xfs_extlen_t alignment; /* align answer to multiple of this */114114 xfs_extlen_t minalignslop; /* slop for minlen+alignment calcs */115115+ xfs_agblock_t min_agbno; /* set an agbno range for NEAR allocs */116116+ xfs_agblock_t max_agbno; /* ... */115117 xfs_extlen_t len; /* output: actual size of extent */116118 xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */117119 xfs_alloctype_t otype; /* original allocation type */
+42-6
fs/xfs/libxfs/xfs_format.h
···170170 __uint32_t sb_features_log_incompat;171171172172 __uint32_t sb_crc; /* superblock crc */173173- __uint32_t sb_pad;173173+ xfs_extlen_t sb_spino_align; /* sparse inode chunk alignment */174174175175 xfs_ino_t sb_pquotino; /* project quota inode */176176 xfs_lsn_t sb_lsn; /* last write sequence */···256256 __be32 sb_features_log_incompat;257257258258 __le32 sb_crc; /* superblock crc */259259- __be32 sb_pad;259259+ __be32 sb_spino_align; /* sparse inode chunk alignment */260260261261 __be64 sb_pquotino; /* project quota inode */262262 __be64 sb_lsn; /* last write sequence */···457457}458458459459#define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */460460+#define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */460461#define XFS_SB_FEAT_INCOMPAT_ALL \461461- (XFS_SB_FEAT_INCOMPAT_FTYPE)462462+ (XFS_SB_FEAT_INCOMPAT_FTYPE| \463463+ XFS_SB_FEAT_INCOMPAT_SPINODES)462464463465#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL464466static inline bool···506504{507505 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&508506 (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);507507+}508508+509509+static inline bool xfs_sb_version_hassparseinodes(struct xfs_sb *sbp)510510+{511511+ return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&512512+ xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_SPINODES);509513}510514511515/*···12241216#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1)12251217#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i))1226121812191219+#define XFS_INOBT_HOLEMASK_FULL 0 /* holemask for full chunk */12201220+#define XFS_INOBT_HOLEMASK_BITS (NBBY * sizeof(__uint16_t))12211221+#define XFS_INODES_PER_HOLEMASK_BIT \12221222+ (XFS_INODES_PER_CHUNK / (NBBY * sizeof(__uint16_t)))12231223+12271224static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)12281225{12291226 return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i;12301227}1231122812321229/*12331233- * Data record structure12301230+ * The on-disk inode record structure has two formats. The original "full"12311231+ * format uses a 4-byte freecount. The "sparse" format uses a 1-byte freecount12321232+ * and replaces the 3 high-order freecount bytes wth the holemask and inode12331233+ * count.12341234+ *12351235+ * The holemask of the sparse record format allows an inode chunk to have holes12361236+ * that refer to blocks not owned by the inode record. This facilitates inode12371237+ * allocation in the event of severe free space fragmentation.12341238 */12351239typedef struct xfs_inobt_rec {12361240 __be32 ir_startino; /* starting inode number */12371237- __be32 ir_freecount; /* count of free inodes (set bits) */12411241+ union {12421242+ struct {12431243+ __be32 ir_freecount; /* count of free inodes */12441244+ } f;12451245+ struct {12461246+ __be16 ir_holemask;/* hole mask for sparse chunks */12471247+ __u8 ir_count; /* total inode count */12481248+ __u8 ir_freecount; /* count of free inodes */12491249+ } sp;12501250+ } ir_u;12381251 __be64 ir_free; /* free inode mask */12391252} xfs_inobt_rec_t;1240125312411254typedef struct xfs_inobt_rec_incore {12421255 xfs_agino_t ir_startino; /* starting inode number */12431243- __int32_t ir_freecount; /* count of free inodes (set bits) */12561256+ __uint16_t ir_holemask; /* hole mask for sparse chunks */12571257+ __uint8_t ir_count; /* total inode count */12581258+ __uint8_t ir_freecount; /* count of free inodes (set bits) */12441259 xfs_inofree_t ir_free; /* free inode mask */12451260} xfs_inobt_rec_incore_t;1246126112621262+static inline bool xfs_inobt_issparse(uint16_t holemask)12631263+{12641264+ /* non-zero holemask represents a sparse rec. */12651265+ return holemask;12661266+}1247126712481268/*12491269 * Key structure
+1
fs/xfs/libxfs/xfs_fs.h
···239239#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */240240#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */241241#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */242242+#define XFS_FSOP_GEOM_FLAGS_SPINODES 0x40000 /* sparse inode chunks */242243243244/*244245 * Minimum and maximum sizes need for growth checks.
+494-47
fs/xfs/libxfs/xfs_ialloc.c
···6565 int *stat) /* success/failure */6666{6767 cur->bc_rec.i.ir_startino = ino;6868+ cur->bc_rec.i.ir_holemask = 0;6969+ cur->bc_rec.i.ir_count = 0;6870 cur->bc_rec.i.ir_freecount = 0;6971 cur->bc_rec.i.ir_free = 0;7072 return xfs_btree_lookup(cur, dir, stat);···8482 union xfs_btree_rec rec;85838684 rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);8787- rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount);8585+ if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {8686+ rec.inobt.ir_u.sp.ir_holemask = cpu_to_be16(irec->ir_holemask);8787+ rec.inobt.ir_u.sp.ir_count = irec->ir_count;8888+ rec.inobt.ir_u.sp.ir_freecount = irec->ir_freecount;8989+ } else {9090+ /* ir_holemask/ir_count not supported on-disk */9191+ rec.inobt.ir_u.f.ir_freecount = cpu_to_be32(irec->ir_freecount);9292+ }8893 rec.inobt.ir_free = cpu_to_be64(irec->ir_free);8994 return xfs_btree_update(cur, &rec);9095}···109100 int error;110101111102 error = xfs_btree_get_rec(cur, &rec, stat);112112- if (!error && *stat == 1) {113113- irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);114114- irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount);115115- irec->ir_free = be64_to_cpu(rec->inobt.ir_free);103103+ if (error || *stat == 0)104104+ return error;105105+106106+ irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);107107+ if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {108108+ irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);109109+ irec->ir_count = rec->inobt.ir_u.sp.ir_count;110110+ irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;111111+ } else {112112+ /*113113+ * ir_holemask/ir_count not supported on-disk. Fill in hardcoded114114+ * values for full inode chunks.115115+ */116116+ irec->ir_holemask = XFS_INOBT_HOLEMASK_FULL;117117+ irec->ir_count = XFS_INODES_PER_CHUNK;118118+ irec->ir_freecount =119119+ be32_to_cpu(rec->inobt.ir_u.f.ir_freecount);116120 }117117- return error;121121+ irec->ir_free = be64_to_cpu(rec->inobt.ir_free);122122+123123+ return 0;118124}119125120126/*···138114STATIC int139115xfs_inobt_insert_rec(140116 struct xfs_btree_cur *cur,117117+ __uint16_t holemask,118118+ __uint8_t count,141119 __int32_t freecount,142120 xfs_inofree_t free,143121 int *stat)144122{123123+ cur->bc_rec.i.ir_holemask = holemask;124124+ cur->bc_rec.i.ir_count = count;145125 cur->bc_rec.i.ir_freecount = freecount;146126 cur->bc_rec.i.ir_free = free;147127 return xfs_btree_insert(cur, stat);···182154 }183155 ASSERT(i == 0);184156185185- error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK,157157+ error = xfs_inobt_insert_rec(cur, XFS_INOBT_HOLEMASK_FULL,158158+ XFS_INODES_PER_CHUNK,159159+ XFS_INODES_PER_CHUNK,186160 XFS_INOBT_ALL_FREE, &i);187161 if (error) {188162 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);···250220 struct xfs_mount *mp,251221 struct xfs_trans *tp,252222 struct list_head *buffer_list,223223+ int icount,253224 xfs_agnumber_t agno,254225 xfs_agblock_t agbno,255226 xfs_agblock_t length,···306275 * they track in the AIL as if they were physically logged.307276 */308277 if (tp)309309- xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos,278278+ xfs_icreate_log(tp, agno, agbno, icount,310279 mp->m_sb.sb_inodesize, length, gen);311280 } else312281 version = 2;···378347}379348380349/*350350+ * Align startino and allocmask for a recently allocated sparse chunk such that351351+ * they are fit for insertion (or merge) into the on-disk inode btrees.352352+ *353353+ * Background:354354+ *355355+ * When enabled, sparse inode support increases the inode alignment from cluster356356+ * size to inode chunk size. This means that the minimum range between two357357+ * non-adjacent inode records in the inobt is large enough for a full inode358358+ * record. This allows for cluster sized, cluster aligned block allocation359359+ * without need to worry about whether the resulting inode record overlaps with360360+ * another record in the tree. Without this basic rule, we would have to deal361361+ * with the consequences of overlap by potentially undoing recent allocations in362362+ * the inode allocation codepath.363363+ *364364+ * Because of this alignment rule (which is enforced on mount), there are two365365+ * inobt possibilities for newly allocated sparse chunks. One is that the366366+ * aligned inode record for the chunk covers a range of inodes not already367367+ * covered in the inobt (i.e., it is safe to insert a new sparse record). The368368+ * other is that a record already exists at the aligned startino that considers369369+ * the newly allocated range as sparse. In the latter case, record content is370370+ * merged in hope that sparse inode chunks fill to full chunks over time.371371+ */372372+STATIC void373373+xfs_align_sparse_ino(374374+ struct xfs_mount *mp,375375+ xfs_agino_t *startino,376376+ uint16_t *allocmask)377377+{378378+ xfs_agblock_t agbno;379379+ xfs_agblock_t mod;380380+ int offset;381381+382382+ agbno = XFS_AGINO_TO_AGBNO(mp, *startino);383383+ mod = agbno % mp->m_sb.sb_inoalignmt;384384+ if (!mod)385385+ return;386386+387387+ /* calculate the inode offset and align startino */388388+ offset = mod << mp->m_sb.sb_inopblog;389389+ *startino -= offset;390390+391391+ /*392392+ * Since startino has been aligned down, left shift allocmask such that393393+ * it continues to represent the same physical inodes relative to the394394+ * new startino.395395+ */396396+ *allocmask <<= offset / XFS_INODES_PER_HOLEMASK_BIT;397397+}398398+399399+/*400400+ * Determine whether the source inode record can merge into the target. Both401401+ * records must be sparse, the inode ranges must match and there must be no402402+ * allocation overlap between the records.403403+ */404404+STATIC bool405405+__xfs_inobt_can_merge(406406+ struct xfs_inobt_rec_incore *trec, /* tgt record */407407+ struct xfs_inobt_rec_incore *srec) /* src record */408408+{409409+ uint64_t talloc;410410+ uint64_t salloc;411411+412412+ /* records must cover the same inode range */413413+ if (trec->ir_startino != srec->ir_startino)414414+ return false;415415+416416+ /* both records must be sparse */417417+ if (!xfs_inobt_issparse(trec->ir_holemask) ||418418+ !xfs_inobt_issparse(srec->ir_holemask))419419+ return false;420420+421421+ /* both records must track some inodes */422422+ if (!trec->ir_count || !srec->ir_count)423423+ return false;424424+425425+ /* can't exceed capacity of a full record */426426+ if (trec->ir_count + srec->ir_count > XFS_INODES_PER_CHUNK)427427+ return false;428428+429429+ /* verify there is no allocation overlap */430430+ talloc = xfs_inobt_irec_to_allocmask(trec);431431+ salloc = xfs_inobt_irec_to_allocmask(srec);432432+ if (talloc & salloc)433433+ return false;434434+435435+ return true;436436+}437437+438438+/*439439+ * Merge the source inode record into the target. The caller must call440440+ * __xfs_inobt_can_merge() to ensure the merge is valid.441441+ */442442+STATIC void443443+__xfs_inobt_rec_merge(444444+ struct xfs_inobt_rec_incore *trec, /* target */445445+ struct xfs_inobt_rec_incore *srec) /* src */446446+{447447+ ASSERT(trec->ir_startino == srec->ir_startino);448448+449449+ /* combine the counts */450450+ trec->ir_count += srec->ir_count;451451+ trec->ir_freecount += srec->ir_freecount;452452+453453+ /*454454+ * Merge the holemask and free mask. For both fields, 0 bits refer to455455+ * allocated inodes. We combine the allocated ranges with bitwise AND.456456+ */457457+ trec->ir_holemask &= srec->ir_holemask;458458+ trec->ir_free &= srec->ir_free;459459+}460460+461461+/*462462+ * Insert a new sparse inode chunk into the associated inode btree. The inode463463+ * record for the sparse chunk is pre-aligned to a startino that should match464464+ * any pre-existing sparse inode record in the tree. This allows sparse chunks465465+ * to fill over time.466466+ *467467+ * This function supports two modes of handling preexisting records depending on468468+ * the merge flag. If merge is true, the provided record is merged with the469469+ * existing record and updated in place. The merged record is returned in nrec.470470+ * If merge is false, an existing record is replaced with the provided record.471471+ * If no preexisting record exists, the provided record is always inserted.472472+ *473473+ * It is considered corruption if a merge is requested and not possible. Given474474+ * the sparse inode alignment constraints, this should never happen.475475+ */476476+STATIC int477477+xfs_inobt_insert_sprec(478478+ struct xfs_mount *mp,479479+ struct xfs_trans *tp,480480+ struct xfs_buf *agbp,481481+ int btnum,482482+ struct xfs_inobt_rec_incore *nrec, /* in/out: new/merged rec. */483483+ bool merge) /* merge or replace */484484+{485485+ struct xfs_btree_cur *cur;486486+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);487487+ xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);488488+ int error;489489+ int i;490490+ struct xfs_inobt_rec_incore rec;491491+492492+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);493493+494494+ /* the new record is pre-aligned so we know where to look */495495+ error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i);496496+ if (error)497497+ goto error;498498+ /* if nothing there, insert a new record and return */499499+ if (i == 0) {500500+ error = xfs_inobt_insert_rec(cur, nrec->ir_holemask,501501+ nrec->ir_count, nrec->ir_freecount,502502+ nrec->ir_free, &i);503503+ if (error)504504+ goto error;505505+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);506506+507507+ goto out;508508+ }509509+510510+ /*511511+ * A record exists at this startino. Merge or replace the record512512+ * depending on what we've been asked to do.513513+ */514514+ if (merge) {515515+ error = xfs_inobt_get_rec(cur, &rec, &i);516516+ if (error)517517+ goto error;518518+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);519519+ XFS_WANT_CORRUPTED_GOTO(mp,520520+ rec.ir_startino == nrec->ir_startino,521521+ error);522522+523523+ /*524524+ * This should never fail. If we have coexisting records that525525+ * cannot merge, something is seriously wrong.526526+ */527527+ XFS_WANT_CORRUPTED_GOTO(mp, __xfs_inobt_can_merge(nrec, &rec),528528+ error);529529+530530+ trace_xfs_irec_merge_pre(mp, agno, rec.ir_startino,531531+ rec.ir_holemask, nrec->ir_startino,532532+ nrec->ir_holemask);533533+534534+ /* merge to nrec to output the updated record */535535+ __xfs_inobt_rec_merge(nrec, &rec);536536+537537+ trace_xfs_irec_merge_post(mp, agno, nrec->ir_startino,538538+ nrec->ir_holemask);539539+540540+ error = xfs_inobt_rec_check_count(mp, nrec);541541+ if (error)542542+ goto error;543543+ }544544+545545+ error = xfs_inobt_update(cur, nrec);546546+ if (error)547547+ goto error;548548+549549+out:550550+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);551551+ return 0;552552+error:553553+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);554554+ return error;555555+}556556+557557+/*381558 * Allocate new inodes in the allocation group specified by agbp.382559 * Return 0 for success, else error code.383560 */···603364 xfs_agino_t newlen; /* new number of inodes */604365 int isaligned = 0; /* inode allocation at stripe unit */605366 /* boundary */367367+ uint16_t allocmask = (uint16_t) -1; /* init. to full chunk */368368+ struct xfs_inobt_rec_incore rec;606369 struct xfs_perag *pag;370370+371371+ int do_sparse = 0;372372+373373+#ifdef DEBUG374374+ /* randomly do sparse inode allocations */375375+ if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb))376376+ do_sparse = prandom_u32() & 1;377377+#endif607378608379 memset(&args, 0, sizeof(args));609380 args.tp = tp;610381 args.mp = tp->t_mountp;382382+ args.fsbno = NULLFSBLOCK;611383612384 /*613385 * Locking will ensure that we don't have two callers in here···640390 agno = be32_to_cpu(agi->agi_seqno);641391 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +642392 args.mp->m_ialloc_blks;393393+ if (do_sparse)394394+ goto sparse_alloc;643395 if (likely(newino != NULLAGINO &&644396 (args.agbno < be32_to_cpu(agi->agi_length)))) {645397 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);···680428 * subsequent requests.681429 */682430 args.minalignslop = 0;683683- } else684684- args.fsbno = NULLFSBLOCK;431431+ }685432686433 if (unlikely(args.fsbno == NULLFSBLOCK)) {687434 /*···731480 return error;732481 }733482483483+ /*484484+ * Finally, try a sparse allocation if the filesystem supports it and485485+ * the sparse allocation length is smaller than a full chunk.486486+ */487487+ if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) &&488488+ args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks &&489489+ args.fsbno == NULLFSBLOCK) {490490+sparse_alloc:491491+ args.type = XFS_ALLOCTYPE_NEAR_BNO;492492+ args.agbno = be32_to_cpu(agi->agi_root);493493+ args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);494494+ args.alignment = args.mp->m_sb.sb_spino_align;495495+ args.prod = 1;496496+497497+ args.minlen = args.mp->m_ialloc_min_blks;498498+ args.maxlen = args.minlen;499499+500500+ /*501501+ * The inode record will be aligned to full chunk size. We must502502+ * prevent sparse allocation from AG boundaries that result in503503+ * invalid inode records, such as records that start at agbno 0504504+ * or extend beyond the AG.505505+ *506506+ * Set min agbno to the first aligned, non-zero agbno and max to507507+ * the last aligned agbno that is at least one full chunk from508508+ * the end of the AG.509509+ */510510+ args.min_agbno = args.mp->m_sb.sb_inoalignmt;511511+ args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,512512+ args.mp->m_sb.sb_inoalignmt) -513513+ args.mp->m_ialloc_blks;514514+515515+ error = xfs_alloc_vextent(&args);516516+ if (error)517517+ return error;518518+519519+ newlen = args.len << args.mp->m_sb.sb_inopblog;520520+ allocmask = (1 << (newlen / XFS_INODES_PER_HOLEMASK_BIT)) - 1;521521+ }522522+734523 if (args.fsbno == NULLFSBLOCK) {735524 *alloc = 0;736525 return 0;···786495 * rather than a linear progression to prevent the next generation787496 * number from being easily guessable.788497 */789789- error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno,790790- args.len, prandom_u32());498498+ error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, agno,499499+ args.agbno, args.len, prandom_u32());791500792501 if (error)793502 return error;···795504 * Convert the results.796505 */797506 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);507507+508508+ if (xfs_inobt_issparse(~allocmask)) {509509+ /*510510+ * We've allocated a sparse chunk. Align the startino and mask.511511+ */512512+ xfs_align_sparse_ino(args.mp, &newino, &allocmask);513513+514514+ rec.ir_startino = newino;515515+ rec.ir_holemask = ~allocmask;516516+ rec.ir_count = newlen;517517+ rec.ir_freecount = newlen;518518+ rec.ir_free = XFS_INOBT_ALL_FREE;519519+520520+ /*521521+ * Insert the sparse record into the inobt and allow for a merge522522+ * if necessary. If a merge does occur, rec is updated to the523523+ * merged record.524524+ */525525+ error = xfs_inobt_insert_sprec(args.mp, tp, agbp, XFS_BTNUM_INO,526526+ &rec, true);527527+ if (error == -EFSCORRUPTED) {528528+ xfs_alert(args.mp,529529+ "invalid sparse inode record: ino 0x%llx holemask 0x%x count %u",530530+ XFS_AGINO_TO_INO(args.mp, agno,531531+ rec.ir_startino),532532+ rec.ir_holemask, rec.ir_count);533533+ xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE);534534+ }535535+ if (error)536536+ return error;537537+538538+ /*539539+ * We can't merge the part we've just allocated as for the inobt540540+ * due to finobt semantics. The original record may or may not541541+ * exist independent of whether physical inodes exist in this542542+ * sparse chunk.543543+ *544544+ * We must update the finobt record based on the inobt record.545545+ * rec contains the fully merged and up to date inobt record546546+ * from the previous call. Set merge false to replace any547547+ * existing record with this one.548548+ */549549+ if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {550550+ error = xfs_inobt_insert_sprec(args.mp, tp, agbp,551551+ XFS_BTNUM_FINO, &rec,552552+ false);553553+ if (error)554554+ return error;555555+ }556556+ } else {557557+ /* full chunk - insert new records to both btrees */558558+ error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,559559+ XFS_BTNUM_INO);560560+ if (error)561561+ return error;562562+563563+ if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {564564+ error = xfs_inobt_insert(args.mp, tp, agbp, newino,565565+ newlen, XFS_BTNUM_FINO);566566+ if (error)567567+ return error;568568+ }569569+ }570570+571571+ /*572572+ * Update AGI counts and newino.573573+ */798574 be32_add_cpu(&agi->agi_count, newlen);799575 be32_add_cpu(&agi->agi_freecount, newlen);800576 pag = xfs_perag_get(args.mp, agno);···869511 xfs_perag_put(pag);870512 agi->agi_newino = cpu_to_be32(newino);871513872872- /*873873- * Insert records describing the new inode chunk into the btrees.874874- */875875- error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,876876- XFS_BTNUM_INO);877877- if (error)878878- return error;879879-880880- if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {881881- error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,882882- XFS_BTNUM_FINO);883883- if (error)884884- return error;885885- }886514 /*887515 * Log allocation group header fields888516 */···989645 * if we fail allocation due to alignment issues then it is most990646 * likely a real ENOSPC condition.991647 */992992- ineed = mp->m_ialloc_blks;648648+ ineed = mp->m_ialloc_min_blks;993649 if (flags && ineed > 1)994650 ineed += xfs_ialloc_cluster_alignment(mp);995651 longest = pag->pagf_longest;···1073729 }10747301075731 return 0;732732+}733733+734734+/*735735+ * Return the offset of the first free inode in the record. If the inode chunk736736+ * is sparsely allocated, we convert the record holemask to inode granularity737737+ * and mask off the unallocated regions from the inode free mask.738738+ */739739+STATIC int740740+xfs_inobt_first_free_inode(741741+ struct xfs_inobt_rec_incore *rec)742742+{743743+ xfs_inofree_t realfree;744744+745745+ /* if there are no holes, return the first available offset */746746+ if (!xfs_inobt_issparse(rec->ir_holemask))747747+ return xfs_lowbit64(rec->ir_free);748748+749749+ realfree = xfs_inobt_irec_to_allocmask(rec);750750+ realfree &= rec->ir_free;751751+752752+ return xfs_lowbit64(realfree);1076753}10777541078755/*···1326961 }13279621328963alloc_inode:13291329- offset = xfs_lowbit64(rec.ir_free);964964+ offset = xfs_inobt_first_free_inode(&rec);1330965 ASSERT(offset >= 0);1331966 ASSERT(offset < XFS_INODES_PER_CHUNK);1332967 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %···15751210 if (error)15761211 goto error_cur;1577121215781578- offset = xfs_lowbit64(rec.ir_free);12131213+ offset = xfs_inobt_first_free_inode(&rec);15791214 ASSERT(offset >= 0);15801215 ASSERT(offset < XFS_INODES_PER_CHUNK);15811216 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %···18041439 return error;18051440}1806144114421442+/*14431443+ * Free the blocks of an inode chunk. We must consider that the inode chunk14441444+ * might be sparse and only free the regions that are allocated as part of the14451445+ * chunk.14461446+ */14471447+STATIC void14481448+xfs_difree_inode_chunk(14491449+ struct xfs_mount *mp,14501450+ xfs_agnumber_t agno,14511451+ struct xfs_inobt_rec_incore *rec,14521452+ struct xfs_bmap_free *flist)14531453+{14541454+ xfs_agblock_t sagbno = XFS_AGINO_TO_AGBNO(mp, rec->ir_startino);14551455+ int startidx, endidx;14561456+ int nextbit;14571457+ xfs_agblock_t agbno;14581458+ int contigblk;14591459+ DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);14601460+14611461+ if (!xfs_inobt_issparse(rec->ir_holemask)) {14621462+ /* not sparse, calculate extent info directly */14631463+ xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,14641464+ XFS_AGINO_TO_AGBNO(mp, rec->ir_startino)),14651465+ mp->m_ialloc_blks, flist, mp);14661466+ return;14671467+ }14681468+14691469+ /* holemask is only 16-bits (fits in an unsigned long) */14701470+ ASSERT(sizeof(rec->ir_holemask) <= sizeof(holemask[0]));14711471+ holemask[0] = rec->ir_holemask;14721472+14731473+ /*14741474+ * Find contiguous ranges of zeroes (i.e., allocated regions) in the14751475+ * holemask and convert the start/end index of each range to an extent.14761476+ * We start with the start and end index both pointing at the first 0 in14771477+ * the mask.14781478+ */14791479+ startidx = endidx = find_first_zero_bit(holemask,14801480+ XFS_INOBT_HOLEMASK_BITS);14811481+ nextbit = startidx + 1;14821482+ while (startidx < XFS_INOBT_HOLEMASK_BITS) {14831483+ nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS,14841484+ nextbit);14851485+ /*14861486+ * If the next zero bit is contiguous, update the end index of14871487+ * the current range and continue.14881488+ */14891489+ if (nextbit != XFS_INOBT_HOLEMASK_BITS &&14901490+ nextbit == endidx + 1) {14911491+ endidx = nextbit;14921492+ goto next;14931493+ }14941494+14951495+ /*14961496+ * nextbit is not contiguous with the current end index. Convert14971497+ * the current start/end to an extent and add it to the free14981498+ * list.14991499+ */15001500+ agbno = sagbno + (startidx * XFS_INODES_PER_HOLEMASK_BIT) /15011501+ mp->m_sb.sb_inopblock;15021502+ contigblk = ((endidx - startidx + 1) *15031503+ XFS_INODES_PER_HOLEMASK_BIT) /15041504+ mp->m_sb.sb_inopblock;15051505+15061506+ ASSERT(agbno % mp->m_sb.sb_spino_align == 0);15071507+ ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);15081508+ xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,15091509+ flist, mp);15101510+15111511+ /* reset range to current bit and carry on... */15121512+ startidx = endidx = nextbit;15131513+15141514+next:15151515+ nextbit++;15161516+ }15171517+}15181518+18071519STATIC int18081520xfs_difree_inobt(18091521 struct xfs_mount *mp,···18881446 struct xfs_buf *agbp,18891447 xfs_agino_t agino,18901448 struct xfs_bmap_free *flist,18911891- int *deleted,18921892- xfs_ino_t *first_ino,14491449+ struct xfs_icluster *xic,18931450 struct xfs_inobt_rec_incore *orec)18941451{18951452 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);···19421501 rec.ir_freecount++;1943150219441503 /*19451945- * When an inode cluster is free, it becomes eligible for removal15041504+ * When an inode chunk is free, it becomes eligible for removal. Don't15051505+ * remove the chunk if the block size is large enough for multiple inode15061506+ * chunks (that might not be free).19461507 */19471508 if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&19481948- (rec.ir_freecount == mp->m_ialloc_inos)) {19491949-19501950- *deleted = 1;19511951- *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);15091509+ rec.ir_free == XFS_INOBT_ALL_FREE &&15101510+ mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {15111511+ xic->deleted = 1;15121512+ xic->first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);15131513+ xic->alloc = xfs_inobt_irec_to_allocmask(&rec);1952151419531515 /*19541516 * Remove the inode cluster from the AGI B+Tree, adjust the19551517 * AGI and Superblock inode counts, and mark the disk space19561518 * to be freed when the transaction is committed.19571519 */19581958- ilen = mp->m_ialloc_inos;15201520+ ilen = rec.ir_freecount;19591521 be32_add_cpu(&agi->agi_count, -ilen);19601522 be32_add_cpu(&agi->agi_freecount, -(ilen - 1));19611523 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);···19741530 goto error0;19751531 }1976153219771977- xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,19781978- XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)),19791979- mp->m_ialloc_blks, flist, mp);15331533+ xfs_difree_inode_chunk(mp, agno, &rec, flist);19801534 } else {19811981- *deleted = 0;15351535+ xic->deleted = 0;1982153619831537 error = xfs_inobt_update(cur, &rec);19841538 if (error) {···20411599 */20421600 XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error);2043160120442044- error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,16021602+ error = xfs_inobt_insert_rec(cur, ibtrec->ir_holemask,16031603+ ibtrec->ir_count,16041604+ ibtrec->ir_freecount,20451605 ibtrec->ir_free, &i);20461606 if (error)20471607 goto error;···20781634 * free inode. Hence, if all of the inodes are free and we aren't20791635 * keeping inode chunks permanently on disk, remove the record.20801636 * Otherwise, update the record with the new information.16371637+ *16381638+ * Note that we currently can't free chunks when the block size is large16391639+ * enough for multiple chunks. Leave the finobt record to remain in sync16401640+ * with the inobt.20811641 */20822082- if (rec.ir_freecount == mp->m_ialloc_inos &&16421642+ if (rec.ir_free == XFS_INOBT_ALL_FREE &&16431643+ mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK &&20831644 !(mp->m_flags & XFS_MOUNT_IKEEP)) {20841645 error = xfs_btree_delete(cur, &i);20851646 if (error)···21201671 struct xfs_trans *tp, /* transaction pointer */21211672 xfs_ino_t inode, /* inode to be freed */21221673 struct xfs_bmap_free *flist, /* extents to free */21232123- int *deleted,/* set if inode cluster was deleted */21242124- xfs_ino_t *first_ino)/* first inode in deleted cluster */16741674+ struct xfs_icluster *xic) /* cluster info if deleted */21251675{21261676 /* REFERENCED */21271677 xfs_agblock_t agbno; /* block number containing inode */···21711723 /*21721724 * Fix up the inode allocation btree.21731725 */21742174- error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino,21752175- &rec);17261726+ error = xfs_difree_inobt(mp, tp, agbp, agino, flist, xic, &rec);21761727 if (error)21771728 goto error0;21781729
+9-3
fs/xfs/libxfs/xfs_ialloc.h
···2828/* Move inodes in clusters of this size */2929#define XFS_INODE_BIG_CLUSTER_SIZE 819230303131+struct xfs_icluster {3232+ bool deleted; /* record is deleted */3333+ xfs_ino_t first_ino; /* first inode number */3434+ uint64_t alloc; /* inode phys. allocation bitmap for3535+ * sparse chunks */3636+};3737+3138/* Calculate and return the number of filesystem blocks per inode cluster */3239static inline int3340xfs_icluster_size_fsb(···9790 struct xfs_trans *tp, /* transaction pointer */9891 xfs_ino_t inode, /* inode to be freed */9992 struct xfs_bmap_free *flist, /* extents to free */100100- int *deleted, /* set if inode cluster was deleted */101101- xfs_ino_t *first_ino); /* first inode in deleted cluster */9393+ struct xfs_icluster *ifree); /* cluster info if deleted */1029410395/*10496 * Return the location of the inode in imap, for mapping it into a buffer.···162156 * Inode chunk initialisation routine163157 */164158int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,165165- struct list_head *buffer_list,159159+ struct list_head *buffer_list, int icount,166160 xfs_agnumber_t agno, xfs_agblock_t agbno,167161 xfs_agblock_t length, unsigned int gen);168162
+92-1
fs/xfs/libxfs/xfs_ialloc_btree.c
···167167 union xfs_btree_rec *rec)168168{169169 rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);170170- rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);170170+ if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {171171+ rec->inobt.ir_u.sp.ir_holemask =172172+ cpu_to_be16(cur->bc_rec.i.ir_holemask);173173+ rec->inobt.ir_u.sp.ir_count = cur->bc_rec.i.ir_count;174174+ rec->inobt.ir_u.sp.ir_freecount = cur->bc_rec.i.ir_freecount;175175+ } else {176176+ /* ir_holemask/ir_count not supported on-disk */177177+ rec->inobt.ir_u.f.ir_freecount =178178+ cpu_to_be32(cur->bc_rec.i.ir_freecount);179179+ }171180 rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);172181}173182···427418 return blocklen / sizeof(xfs_inobt_rec_t);428419 return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t));429420}421421+422422+/*423423+ * Convert the inode record holemask to an inode allocation bitmap. The inode424424+ * allocation bitmap is inode granularity and specifies whether an inode is425425+ * physically allocated on disk (not whether the inode is considered allocated426426+ * or free by the fs).427427+ *428428+ * A bit value of 1 means the inode is allocated, a value of 0 means it is free.429429+ */430430+uint64_t431431+xfs_inobt_irec_to_allocmask(432432+ struct xfs_inobt_rec_incore *rec)433433+{434434+ uint64_t bitmap = 0;435435+ uint64_t inodespbit;436436+ int nextbit;437437+ uint allocbitmap;438438+439439+ /*440440+ * The holemask has 16-bits for a 64 inode record. Therefore each441441+ * holemask bit represents multiple inodes. Create a mask of bits to set442442+ * in the allocmask for each holemask bit.443443+ */444444+ inodespbit = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;445445+446446+ /*447447+ * Allocated inodes are represented by 0 bits in holemask. Invert the 0448448+ * bits to 1 and convert to a uint so we can use xfs_next_bit(). Mask449449+ * anything beyond the 16 holemask bits since this casts to a larger450450+ * type.451451+ */452452+ allocbitmap = ~rec->ir_holemask & ((1 << XFS_INOBT_HOLEMASK_BITS) - 1);453453+454454+ /*455455+ * allocbitmap is the inverted holemask so every set bit represents456456+ * allocated inodes. To expand from 16-bit holemask granularity to457457+ * 64-bit (e.g., bit-per-inode), set inodespbit bits in the target458458+ * bitmap for every holemask bit.459459+ */460460+ nextbit = xfs_next_bit(&allocbitmap, 1, 0);461461+ while (nextbit != -1) {462462+ ASSERT(nextbit < (sizeof(rec->ir_holemask) * NBBY));463463+464464+ bitmap |= (inodespbit <<465465+ (nextbit * XFS_INODES_PER_HOLEMASK_BIT));466466+467467+ nextbit = xfs_next_bit(&allocbitmap, 1, nextbit + 1);468468+ }469469+470470+ return bitmap;471471+}472472+473473+#if defined(DEBUG) || defined(XFS_WARN)474474+/*475475+ * Verify that an in-core inode record has a valid inode count.476476+ */477477+int478478+xfs_inobt_rec_check_count(479479+ struct xfs_mount *mp,480480+ struct xfs_inobt_rec_incore *rec)481481+{482482+ int inocount = 0;483483+ int nextbit = 0;484484+ uint64_t allocbmap;485485+ int wordsz;486486+487487+ wordsz = sizeof(allocbmap) / sizeof(unsigned int);488488+ allocbmap = xfs_inobt_irec_to_allocmask(rec);489489+490490+ nextbit = xfs_next_bit((uint *) &allocbmap, wordsz, nextbit);491491+ while (nextbit != -1) {492492+ inocount++;493493+ nextbit = xfs_next_bit((uint *) &allocbmap, wordsz,494494+ nextbit + 1);495495+ }496496+497497+ if (inocount != rec->ir_count)498498+ return -EFSCORRUPTED;499499+500500+ return 0;501501+}502502+#endif /* DEBUG */
···22352235 */22362236STATIC int22372237xfs_ifree_cluster(22382238- xfs_inode_t *free_ip,22392239- xfs_trans_t *tp,22402240- xfs_ino_t inum)22382238+ xfs_inode_t *free_ip,22392239+ xfs_trans_t *tp,22402240+ struct xfs_icluster *xic)22412241{22422242 xfs_mount_t *mp = free_ip->i_mount;22432243 int blks_per_cluster;···22502250 xfs_inode_log_item_t *iip;22512251 xfs_log_item_t *lip;22522252 struct xfs_perag *pag;22532253+ xfs_ino_t inum;2253225422552255+ inum = xic->first_ino;22542256 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));22552257 blks_per_cluster = xfs_icluster_size_fsb(mp);22562258 inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;22572259 nbufs = mp->m_ialloc_blks / blks_per_cluster;2258226022592261 for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) {22622262+ /*22632263+ * The allocation bitmap tells us which inodes of the chunk were22642264+ * physically allocated. Skip the cluster if an inode falls into22652265+ * a sparse region.22662266+ */22672267+ if ((xic->alloc & XFS_INOBT_MASK(inum - xic->first_ino)) == 0) {22682268+ ASSERT(((inum - xic->first_ino) %22692269+ inodes_per_cluster) == 0);22702270+ continue;22712271+ }22722272+22602273 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),22612274 XFS_INO_TO_AGBNO(mp, inum));22622275···24272414 xfs_bmap_free_t *flist)24282415{24292416 int error;24302430- int delete;24312431- xfs_ino_t first_ino;24172417+ struct xfs_icluster xic = { 0 };2432241824332419 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));24342420 ASSERT(ip->i_d.di_nlink == 0);···24432431 if (error)24442432 return error;2445243324462446- error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);24342434+ error = xfs_difree(tp, ip->i_ino, flist, &xic);24472435 if (error)24482436 return error;24492437···24602448 ip->i_d.di_gen++;24612449 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);2462245024632463- if (delete)24642464- error = xfs_ifree_cluster(ip, tp, first_ino);24512451+ if (xic.deleted)24522452+ error = xfs_ifree_cluster(ip, tp, &xic);2465245324662454 return error;24672455}
+8-5
fs/xfs/xfs_itable.c
···252252 }253253254254 irec->ir_free |= xfs_inobt_maskn(0, idx);255255- *icount = XFS_INODES_PER_CHUNK - irec->ir_freecount;255255+ *icount = irec->ir_count - irec->ir_freecount;256256 }257257258258 return 0;···415415 goto del_cursor;416416 if (icount) {417417 irbp->ir_startino = r.ir_startino;418418+ irbp->ir_holemask = r.ir_holemask;419419+ irbp->ir_count = r.ir_count;418420 irbp->ir_freecount = r.ir_freecount;419421 irbp->ir_free = r.ir_free;420422 irbp++;···449447 * If this chunk has any allocated inodes, save it.450448 * Also start read-ahead now for this chunk.451449 */452452- if (r.ir_freecount < XFS_INODES_PER_CHUNK) {450450+ if (r.ir_freecount < r.ir_count) {453451 xfs_bulkstat_ichunk_ra(mp, agno, &r);454452 irbp->ir_startino = r.ir_startino;453453+ irbp->ir_holemask = r.ir_holemask;454454+ irbp->ir_count = r.ir_count;455455 irbp->ir_freecount = r.ir_freecount;456456 irbp->ir_free = r.ir_free;457457 irbp++;458458- icount += XFS_INODES_PER_CHUNK - r.ir_freecount;458458+ icount += r.ir_count - r.ir_freecount;459459 }460460 error = xfs_btree_increment(cur, 0, &stat);461461 if (error || stat == 0) {···603599 agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;604600 buffer[bufidx].xi_startino =605601 XFS_AGINO_TO_INO(mp, agno, r.ir_startino);606606- buffer[bufidx].xi_alloccount =607607- XFS_INODES_PER_CHUNK - r.ir_freecount;602602+ buffer[bufidx].xi_alloccount = r.ir_count - r.ir_freecount;608603 buffer[bufidx].xi_allocmask = ~r.ir_free;609604 if (++bufidx == bcount) {610605 long written;
+18-8
fs/xfs/xfs_log_recover.c
···30683068 return -EINVAL;30693069 }3070307030713071- /* existing allocation is fixed value */30723072- ASSERT(count == mp->m_ialloc_inos);30733073- ASSERT(length == mp->m_ialloc_blks);30743074- if (count != mp->m_ialloc_inos ||30753075- length != mp->m_ialloc_blks) {30763076- xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2");30713071+ /*30723072+ * The inode chunk is either full or sparse and we only support30733073+ * m_ialloc_min_blks sized sparse allocations at this time.30743074+ */30753075+ if (length != mp->m_ialloc_blks &&30763076+ length != mp->m_ialloc_min_blks) {30773077+ xfs_warn(log->l_mp,30783078+ "%s: unsupported chunk length", __FUNCTION__);30793079+ return -EINVAL;30803080+ }30813081+30823082+ /* verify inode count is consistent with extent length */30833083+ if ((count >> mp->m_sb.sb_inopblog) != length) {30843084+ xfs_warn(log->l_mp,30853085+ "%s: inconsistent inode count and chunk length",30863086+ __FUNCTION__);30773087 return -EINVAL;30783088 }30793089···31013091 XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0))31023092 return 0;3103309331043104- xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length,31053105- be32_to_cpu(icl->icl_gen));30943094+ xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, length,30953095+ be32_to_cpu(icl->icl_gen));31063096 return 0;31073097}31083098
+16
fs/xfs/xfs_mount.c
···725725 }726726727727 /*728728+ * If enabled, sparse inode chunk alignment is expected to match the729729+ * cluster size. Full inode chunk alignment must match the chunk size,730730+ * but that is checked on sb read verification...731731+ */732732+ if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&733733+ mp->m_sb.sb_spino_align !=734734+ XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) {735735+ xfs_warn(mp,736736+ "Sparse inode block alignment (%u) must match cluster size (%llu).",737737+ mp->m_sb.sb_spino_align,738738+ XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size));739739+ error = -EINVAL;740740+ goto out_remove_uuid;741741+ }742742+743743+ /*728744 * Set inode alignment fields729745 */730746 xfs_set_inoalignment(mp);
+2
fs/xfs/xfs_mount.h
···101101 __uint64_t m_flags; /* global mount flags */102102 int m_ialloc_inos; /* inodes in inode allocation */103103 int m_ialloc_blks; /* blocks in inode allocation */104104+ int m_ialloc_min_blks;/* min blocks in sparse inode105105+ * allocation */104106 int m_inoalign_mask;/* mask sb_inoalignmt if used */105107 uint m_qflags; /* quota status flags */106108 struct xfs_trans_resv m_resv; /* precomputed res values */