Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.3 1565 lines 44 kB view raw
1/* 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18#include "xfs.h" 19#include "xfs_fs.h" 20#include "xfs_types.h" 21#include "xfs_bit.h" 22#include "xfs_log.h" 23#include "xfs_inum.h" 24#include "xfs_trans.h" 25#include "xfs_sb.h" 26#include "xfs_ag.h" 27#include "xfs_mount.h" 28#include "xfs_bmap_btree.h" 29#include "xfs_alloc_btree.h" 30#include "xfs_ialloc_btree.h" 31#include "xfs_dinode.h" 32#include "xfs_inode.h" 33#include "xfs_btree.h" 34#include "xfs_ialloc.h" 35#include "xfs_alloc.h" 36#include "xfs_rtalloc.h" 37#include "xfs_error.h" 38#include "xfs_bmap.h" 39 40 41/* 42 * Allocation group level functions. 43 */ 44static inline int 45xfs_ialloc_cluster_alignment( 46 xfs_alloc_arg_t *args) 47{ 48 if (xfs_sb_version_hasalign(&args->mp->m_sb) && 49 args->mp->m_sb.sb_inoalignmt >= 50 XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp))) 51 return args->mp->m_sb.sb_inoalignmt; 52 return 1; 53} 54 55/* 56 * Lookup a record by ino in the btree given by cur. 57 */ 58int /* error */ 59xfs_inobt_lookup( 60 struct xfs_btree_cur *cur, /* btree cursor */ 61 xfs_agino_t ino, /* starting inode of chunk */ 62 xfs_lookup_t dir, /* <=, >=, == */ 63 int *stat) /* success/failure */ 64{ 65 cur->bc_rec.i.ir_startino = ino; 66 cur->bc_rec.i.ir_freecount = 0; 67 cur->bc_rec.i.ir_free = 0; 68 return xfs_btree_lookup(cur, dir, stat); 69} 70 71/* 72 * Update the record referred to by cur to the value given. 73 * This either works (return 0) or gets an EFSCORRUPTED error. 74 */ 75STATIC int /* error */ 76xfs_inobt_update( 77 struct xfs_btree_cur *cur, /* btree cursor */ 78 xfs_inobt_rec_incore_t *irec) /* btree record */ 79{ 80 union xfs_btree_rec rec; 81 82 rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino); 83 rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount); 84 rec.inobt.ir_free = cpu_to_be64(irec->ir_free); 85 return xfs_btree_update(cur, &rec); 86} 87 88/* 89 * Get the data from the pointed-to record. 90 */ 91int /* error */ 92xfs_inobt_get_rec( 93 struct xfs_btree_cur *cur, /* btree cursor */ 94 xfs_inobt_rec_incore_t *irec, /* btree record */ 95 int *stat) /* output: success/failure */ 96{ 97 union xfs_btree_rec *rec; 98 int error; 99 100 error = xfs_btree_get_rec(cur, &rec, stat); 101 if (!error && *stat == 1) { 102 irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); 103 irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount); 104 irec->ir_free = be64_to_cpu(rec->inobt.ir_free); 105 } 106 return error; 107} 108 109/* 110 * Verify that the number of free inodes in the AGI is correct. 111 */ 112#ifdef DEBUG 113STATIC int 114xfs_check_agi_freecount( 115 struct xfs_btree_cur *cur, 116 struct xfs_agi *agi) 117{ 118 if (cur->bc_nlevels == 1) { 119 xfs_inobt_rec_incore_t rec; 120 int freecount = 0; 121 int error; 122 int i; 123 124 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); 125 if (error) 126 return error; 127 128 do { 129 error = xfs_inobt_get_rec(cur, &rec, &i); 130 if (error) 131 return error; 132 133 if (i) { 134 freecount += rec.ir_freecount; 135 error = xfs_btree_increment(cur, 0, &i); 136 if (error) 137 return error; 138 } 139 } while (i == 1); 140 141 if (!XFS_FORCED_SHUTDOWN(cur->bc_mp)) 142 ASSERT(freecount == be32_to_cpu(agi->agi_freecount)); 143 } 144 return 0; 145} 146#else 147#define xfs_check_agi_freecount(cur, agi) 0 148#endif 149 150/* 151 * Initialise a new set of inodes. 152 */ 153STATIC int 154xfs_ialloc_inode_init( 155 struct xfs_mount *mp, 156 struct xfs_trans *tp, 157 xfs_agnumber_t agno, 158 xfs_agblock_t agbno, 159 xfs_agblock_t length, 160 unsigned int gen) 161{ 162 struct xfs_buf *fbuf; 163 struct xfs_dinode *free; 164 int blks_per_cluster, nbufs, ninodes; 165 int version; 166 int i, j; 167 xfs_daddr_t d; 168 169 /* 170 * Loop over the new block(s), filling in the inodes. 171 * For small block sizes, manipulate the inodes in buffers 172 * which are multiples of the blocks size. 173 */ 174 if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 175 blks_per_cluster = 1; 176 nbufs = length; 177 ninodes = mp->m_sb.sb_inopblock; 178 } else { 179 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / 180 mp->m_sb.sb_blocksize; 181 nbufs = length / blks_per_cluster; 182 ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; 183 } 184 185 /* 186 * Figure out what version number to use in the inodes we create. 187 * If the superblock version has caught up to the one that supports 188 * the new inode format, then use the new inode version. Otherwise 189 * use the old version so that old kernels will continue to be 190 * able to use the file system. 191 */ 192 if (xfs_sb_version_hasnlink(&mp->m_sb)) 193 version = 2; 194 else 195 version = 1; 196 197 for (j = 0; j < nbufs; j++) { 198 /* 199 * Get the block. 200 */ 201 d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); 202 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 203 mp->m_bsize * blks_per_cluster, 204 XBF_LOCK); 205 if (!fbuf) 206 return ENOMEM; 207 /* 208 * Initialize all inodes in this buffer and then log them. 209 * 210 * XXX: It would be much better if we had just one transaction 211 * to log a whole cluster of inodes instead of all the 212 * individual transactions causing a lot of log traffic. 213 */ 214 xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); 215 for (i = 0; i < ninodes; i++) { 216 int ioffset = i << mp->m_sb.sb_inodelog; 217 uint isize = sizeof(struct xfs_dinode); 218 219 free = xfs_make_iptr(mp, fbuf, i); 220 free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 221 free->di_version = version; 222 free->di_gen = cpu_to_be32(gen); 223 free->di_next_unlinked = cpu_to_be32(NULLAGINO); 224 xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); 225 } 226 xfs_trans_inode_alloc_buf(tp, fbuf); 227 } 228 return 0; 229} 230 231/* 232 * Allocate new inodes in the allocation group specified by agbp. 233 * Return 0 for success, else error code. 234 */ 235STATIC int /* error code or 0 */ 236xfs_ialloc_ag_alloc( 237 xfs_trans_t *tp, /* transaction pointer */ 238 xfs_buf_t *agbp, /* alloc group buffer */ 239 int *alloc) 240{ 241 xfs_agi_t *agi; /* allocation group header */ 242 xfs_alloc_arg_t args; /* allocation argument structure */ 243 xfs_btree_cur_t *cur; /* inode btree cursor */ 244 xfs_agnumber_t agno; 245 int error; 246 int i; 247 xfs_agino_t newino; /* new first inode's number */ 248 xfs_agino_t newlen; /* new number of inodes */ 249 xfs_agino_t thisino; /* current inode number, for loop */ 250 int isaligned = 0; /* inode allocation at stripe unit */ 251 /* boundary */ 252 struct xfs_perag *pag; 253 254 args.tp = tp; 255 args.mp = tp->t_mountp; 256 257 /* 258 * Locking will ensure that we don't have two callers in here 259 * at one time. 260 */ 261 newlen = XFS_IALLOC_INODES(args.mp); 262 if (args.mp->m_maxicount && 263 args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) 264 return XFS_ERROR(ENOSPC); 265 args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp); 266 /* 267 * First try to allocate inodes contiguous with the last-allocated 268 * chunk of inodes. If the filesystem is striped, this will fill 269 * an entire stripe unit with inodes. 270 */ 271 agi = XFS_BUF_TO_AGI(agbp); 272 newino = be32_to_cpu(agi->agi_newino); 273 agno = be32_to_cpu(agi->agi_seqno); 274 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + 275 XFS_IALLOC_BLOCKS(args.mp); 276 if (likely(newino != NULLAGINO && 277 (args.agbno < be32_to_cpu(agi->agi_length)))) { 278 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 279 args.type = XFS_ALLOCTYPE_THIS_BNO; 280 args.mod = args.total = args.wasdel = args.isfl = 281 args.userdata = args.minalignslop = 0; 282 args.prod = 1; 283 284 /* 285 * We need to take into account alignment here to ensure that 286 * we don't modify the free list if we fail to have an exact 287 * block. If we don't have an exact match, and every oher 288 * attempt allocation attempt fails, we'll end up cancelling 289 * a dirty transaction and shutting down. 290 * 291 * For an exact allocation, alignment must be 1, 292 * however we need to take cluster alignment into account when 293 * fixing up the freelist. Use the minalignslop field to 294 * indicate that extra blocks might be required for alignment, 295 * but not to use them in the actual exact allocation. 296 */ 297 args.alignment = 1; 298 args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; 299 300 /* Allow space for the inode btree to split. */ 301 args.minleft = args.mp->m_in_maxlevels - 1; 302 if ((error = xfs_alloc_vextent(&args))) 303 return error; 304 } else 305 args.fsbno = NULLFSBLOCK; 306 307 if (unlikely(args.fsbno == NULLFSBLOCK)) { 308 /* 309 * Set the alignment for the allocation. 310 * If stripe alignment is turned on then align at stripe unit 311 * boundary. 312 * If the cluster size is smaller than a filesystem block 313 * then we're doing I/O for inodes in filesystem block size 314 * pieces, so don't need alignment anyway. 315 */ 316 isaligned = 0; 317 if (args.mp->m_sinoalign) { 318 ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); 319 args.alignment = args.mp->m_dalign; 320 isaligned = 1; 321 } else 322 args.alignment = xfs_ialloc_cluster_alignment(&args); 323 /* 324 * Need to figure out where to allocate the inode blocks. 325 * Ideally they should be spaced out through the a.g. 326 * For now, just allocate blocks up front. 327 */ 328 args.agbno = be32_to_cpu(agi->agi_root); 329 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 330 /* 331 * Allocate a fixed-size extent of inodes. 332 */ 333 args.type = XFS_ALLOCTYPE_NEAR_BNO; 334 args.mod = args.total = args.wasdel = args.isfl = 335 args.userdata = args.minalignslop = 0; 336 args.prod = 1; 337 /* 338 * Allow space for the inode btree to split. 339 */ 340 args.minleft = args.mp->m_in_maxlevels - 1; 341 if ((error = xfs_alloc_vextent(&args))) 342 return error; 343 } 344 345 /* 346 * If stripe alignment is turned on, then try again with cluster 347 * alignment. 348 */ 349 if (isaligned && args.fsbno == NULLFSBLOCK) { 350 args.type = XFS_ALLOCTYPE_NEAR_BNO; 351 args.agbno = be32_to_cpu(agi->agi_root); 352 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 353 args.alignment = xfs_ialloc_cluster_alignment(&args); 354 if ((error = xfs_alloc_vextent(&args))) 355 return error; 356 } 357 358 if (args.fsbno == NULLFSBLOCK) { 359 *alloc = 0; 360 return 0; 361 } 362 ASSERT(args.len == args.minlen); 363 364 /* 365 * Stamp and write the inode buffers. 366 * 367 * Seed the new inode cluster with a random generation number. This 368 * prevents short-term reuse of generation numbers if a chunk is 369 * freed and then immediately reallocated. We use random numbers 370 * rather than a linear progression to prevent the next generation 371 * number from being easily guessable. 372 */ 373 error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, 374 args.len, random32()); 375 376 if (error) 377 return error; 378 /* 379 * Convert the results. 380 */ 381 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); 382 be32_add_cpu(&agi->agi_count, newlen); 383 be32_add_cpu(&agi->agi_freecount, newlen); 384 pag = xfs_perag_get(args.mp, agno); 385 pag->pagi_freecount += newlen; 386 xfs_perag_put(pag); 387 agi->agi_newino = cpu_to_be32(newino); 388 389 /* 390 * Insert records describing the new inode chunk into the btree. 391 */ 392 cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno); 393 for (thisino = newino; 394 thisino < newino + newlen; 395 thisino += XFS_INODES_PER_CHUNK) { 396 cur->bc_rec.i.ir_startino = thisino; 397 cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK; 398 cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE; 399 error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i); 400 if (error) { 401 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 402 return error; 403 } 404 ASSERT(i == 0); 405 error = xfs_btree_insert(cur, &i); 406 if (error) { 407 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 408 return error; 409 } 410 ASSERT(i == 1); 411 } 412 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 413 /* 414 * Log allocation group header fields 415 */ 416 xfs_ialloc_log_agi(tp, agbp, 417 XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO); 418 /* 419 * Modify/log superblock values for inode count and inode free count. 420 */ 421 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen); 422 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen); 423 *alloc = 1; 424 return 0; 425} 426 427STATIC xfs_agnumber_t 428xfs_ialloc_next_ag( 429 xfs_mount_t *mp) 430{ 431 xfs_agnumber_t agno; 432 433 spin_lock(&mp->m_agirotor_lock); 434 agno = mp->m_agirotor; 435 if (++mp->m_agirotor == mp->m_maxagi) 436 mp->m_agirotor = 0; 437 spin_unlock(&mp->m_agirotor_lock); 438 439 return agno; 440} 441 442/* 443 * Select an allocation group to look for a free inode in, based on the parent 444 * inode and then mode. Return the allocation group buffer. 445 */ 446STATIC xfs_buf_t * /* allocation group buffer */ 447xfs_ialloc_ag_select( 448 xfs_trans_t *tp, /* transaction pointer */ 449 xfs_ino_t parent, /* parent directory inode number */ 450 umode_t mode, /* bits set to indicate file type */ 451 int okalloc) /* ok to allocate more space */ 452{ 453 xfs_buf_t *agbp; /* allocation group header buffer */ 454 xfs_agnumber_t agcount; /* number of ag's in the filesystem */ 455 xfs_agnumber_t agno; /* current ag number */ 456 int flags; /* alloc buffer locking flags */ 457 xfs_extlen_t ineed; /* blocks needed for inode allocation */ 458 xfs_extlen_t longest = 0; /* longest extent available */ 459 xfs_mount_t *mp; /* mount point structure */ 460 int needspace; /* file mode implies space allocated */ 461 xfs_perag_t *pag; /* per allocation group data */ 462 xfs_agnumber_t pagno; /* parent (starting) ag number */ 463 464 /* 465 * Files of these types need at least one block if length > 0 466 * (and they won't fit in the inode, but that's hard to figure out). 467 */ 468 needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); 469 mp = tp->t_mountp; 470 agcount = mp->m_maxagi; 471 if (S_ISDIR(mode)) 472 pagno = xfs_ialloc_next_ag(mp); 473 else { 474 pagno = XFS_INO_TO_AGNO(mp, parent); 475 if (pagno >= agcount) 476 pagno = 0; 477 } 478 ASSERT(pagno < agcount); 479 /* 480 * Loop through allocation groups, looking for one with a little 481 * free space in it. Note we don't look for free inodes, exactly. 482 * Instead, we include whether there is a need to allocate inodes 483 * to mean that blocks must be allocated for them, 484 * if none are currently free. 485 */ 486 agno = pagno; 487 flags = XFS_ALLOC_FLAG_TRYLOCK; 488 for (;;) { 489 pag = xfs_perag_get(mp, agno); 490 if (!pag->pagi_init) { 491 if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { 492 agbp = NULL; 493 goto nextag; 494 } 495 } else 496 agbp = NULL; 497 498 if (!pag->pagi_inodeok) { 499 xfs_ialloc_next_ag(mp); 500 goto unlock_nextag; 501 } 502 503 /* 504 * Is there enough free space for the file plus a block 505 * of inodes (if we need to allocate some)? 506 */ 507 ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp); 508 if (ineed && !pag->pagf_init) { 509 if (agbp == NULL && 510 xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { 511 agbp = NULL; 512 goto nextag; 513 } 514 (void)xfs_alloc_pagf_init(mp, tp, agno, flags); 515 } 516 if (!ineed || pag->pagf_init) { 517 if (ineed && !(longest = pag->pagf_longest)) 518 longest = pag->pagf_flcount > 0; 519 if (!ineed || 520 (pag->pagf_freeblks >= needspace + ineed && 521 longest >= ineed && 522 okalloc)) { 523 if (agbp == NULL && 524 xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { 525 agbp = NULL; 526 goto nextag; 527 } 528 xfs_perag_put(pag); 529 return agbp; 530 } 531 } 532unlock_nextag: 533 if (agbp) 534 xfs_trans_brelse(tp, agbp); 535nextag: 536 xfs_perag_put(pag); 537 /* 538 * No point in iterating over the rest, if we're shutting 539 * down. 540 */ 541 if (XFS_FORCED_SHUTDOWN(mp)) 542 return NULL; 543 agno++; 544 if (agno >= agcount) 545 agno = 0; 546 if (agno == pagno) { 547 if (flags == 0) 548 return NULL; 549 flags = 0; 550 } 551 } 552} 553 554/* 555 * Try to retrieve the next record to the left/right from the current one. 556 */ 557STATIC int 558xfs_ialloc_next_rec( 559 struct xfs_btree_cur *cur, 560 xfs_inobt_rec_incore_t *rec, 561 int *done, 562 int left) 563{ 564 int error; 565 int i; 566 567 if (left) 568 error = xfs_btree_decrement(cur, 0, &i); 569 else 570 error = xfs_btree_increment(cur, 0, &i); 571 572 if (error) 573 return error; 574 *done = !i; 575 if (i) { 576 error = xfs_inobt_get_rec(cur, rec, &i); 577 if (error) 578 return error; 579 XFS_WANT_CORRUPTED_RETURN(i == 1); 580 } 581 582 return 0; 583} 584 585STATIC int 586xfs_ialloc_get_rec( 587 struct xfs_btree_cur *cur, 588 xfs_agino_t agino, 589 xfs_inobt_rec_incore_t *rec, 590 int *done, 591 int left) 592{ 593 int error; 594 int i; 595 596 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i); 597 if (error) 598 return error; 599 *done = !i; 600 if (i) { 601 error = xfs_inobt_get_rec(cur, rec, &i); 602 if (error) 603 return error; 604 XFS_WANT_CORRUPTED_RETURN(i == 1); 605 } 606 607 return 0; 608} 609 610/* 611 * Visible inode allocation functions. 612 */ 613 614/* 615 * Allocate an inode on disk. 616 * Mode is used to tell whether the new inode will need space, and whether 617 * it is a directory. 618 * 619 * The arguments IO_agbp and alloc_done are defined to work within 620 * the constraint of one allocation per transaction. 621 * xfs_dialloc() is designed to be called twice if it has to do an 622 * allocation to make more free inodes. On the first call, 623 * IO_agbp should be set to NULL. If an inode is available, 624 * i.e., xfs_dialloc() did not need to do an allocation, an inode 625 * number is returned. In this case, IO_agbp would be set to the 626 * current ag_buf and alloc_done set to false. 627 * If an allocation needed to be done, xfs_dialloc would return 628 * the current ag_buf in IO_agbp and set alloc_done to true. 629 * The caller should then commit the current transaction, allocate a new 630 * transaction, and call xfs_dialloc() again, passing in the previous 631 * value of IO_agbp. IO_agbp should be held across the transactions. 632 * Since the agbp is locked across the two calls, the second call is 633 * guaranteed to have a free inode available. 634 * 635 * Once we successfully pick an inode its number is returned and the 636 * on-disk data structures are updated. The inode itself is not read 637 * in, since doing so would break ordering constraints with xfs_reclaim. 638 */ 639int 640xfs_dialloc( 641 xfs_trans_t *tp, /* transaction pointer */ 642 xfs_ino_t parent, /* parent inode (directory) */ 643 umode_t mode, /* mode bits for new inode */ 644 int okalloc, /* ok to allocate more space */ 645 xfs_buf_t **IO_agbp, /* in/out ag header's buffer */ 646 boolean_t *alloc_done, /* true if we needed to replenish 647 inode freelist */ 648 xfs_ino_t *inop) /* inode number allocated */ 649{ 650 xfs_agnumber_t agcount; /* number of allocation groups */ 651 xfs_buf_t *agbp; /* allocation group header's buffer */ 652 xfs_agnumber_t agno; /* allocation group number */ 653 xfs_agi_t *agi; /* allocation group header structure */ 654 xfs_btree_cur_t *cur; /* inode allocation btree cursor */ 655 int error; /* error return value */ 656 int i; /* result code */ 657 int ialloced; /* inode allocation status */ 658 int noroom = 0; /* no space for inode blk allocation */ 659 xfs_ino_t ino; /* fs-relative inode to be returned */ 660 /* REFERENCED */ 661 int j; /* result code */ 662 xfs_mount_t *mp; /* file system mount structure */ 663 int offset; /* index of inode in chunk */ 664 xfs_agino_t pagino; /* parent's AG relative inode # */ 665 xfs_agnumber_t pagno; /* parent's AG number */ 666 xfs_inobt_rec_incore_t rec; /* inode allocation record */ 667 xfs_agnumber_t tagno; /* testing allocation group number */ 668 xfs_btree_cur_t *tcur; /* temp cursor */ 669 xfs_inobt_rec_incore_t trec; /* temp inode allocation record */ 670 struct xfs_perag *pag; 671 672 673 if (*IO_agbp == NULL) { 674 /* 675 * We do not have an agbp, so select an initial allocation 676 * group for inode allocation. 677 */ 678 agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc); 679 /* 680 * Couldn't find an allocation group satisfying the 681 * criteria, give up. 682 */ 683 if (!agbp) { 684 *inop = NULLFSINO; 685 return 0; 686 } 687 agi = XFS_BUF_TO_AGI(agbp); 688 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); 689 } else { 690 /* 691 * Continue where we left off before. In this case, we 692 * know that the allocation group has free inodes. 693 */ 694 agbp = *IO_agbp; 695 agi = XFS_BUF_TO_AGI(agbp); 696 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); 697 ASSERT(be32_to_cpu(agi->agi_freecount) > 0); 698 } 699 mp = tp->t_mountp; 700 agcount = mp->m_sb.sb_agcount; 701 agno = be32_to_cpu(agi->agi_seqno); 702 tagno = agno; 703 pagno = XFS_INO_TO_AGNO(mp, parent); 704 pagino = XFS_INO_TO_AGINO(mp, parent); 705 706 /* 707 * If we have already hit the ceiling of inode blocks then clear 708 * okalloc so we scan all available agi structures for a free 709 * inode. 710 */ 711 712 if (mp->m_maxicount && 713 mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { 714 noroom = 1; 715 okalloc = 0; 716 } 717 718 /* 719 * Loop until we find an allocation group that either has free inodes 720 * or in which we can allocate some inodes. Iterate through the 721 * allocation groups upward, wrapping at the end. 722 */ 723 *alloc_done = B_FALSE; 724 while (!agi->agi_freecount) { 725 /* 726 * Don't do anything if we're not supposed to allocate 727 * any blocks, just go on to the next ag. 728 */ 729 if (okalloc) { 730 /* 731 * Try to allocate some new inodes in the allocation 732 * group. 733 */ 734 if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) { 735 xfs_trans_brelse(tp, agbp); 736 if (error == ENOSPC) { 737 *inop = NULLFSINO; 738 return 0; 739 } else 740 return error; 741 } 742 if (ialloced) { 743 /* 744 * We successfully allocated some inodes, return 745 * the current context to the caller so that it 746 * can commit the current transaction and call 747 * us again where we left off. 748 */ 749 ASSERT(be32_to_cpu(agi->agi_freecount) > 0); 750 *alloc_done = B_TRUE; 751 *IO_agbp = agbp; 752 *inop = NULLFSINO; 753 return 0; 754 } 755 } 756 /* 757 * If it failed, give up on this ag. 758 */ 759 xfs_trans_brelse(tp, agbp); 760 /* 761 * Go on to the next ag: get its ag header. 762 */ 763nextag: 764 if (++tagno == agcount) 765 tagno = 0; 766 if (tagno == agno) { 767 *inop = NULLFSINO; 768 return noroom ? ENOSPC : 0; 769 } 770 pag = xfs_perag_get(mp, tagno); 771 if (pag->pagi_inodeok == 0) { 772 xfs_perag_put(pag); 773 goto nextag; 774 } 775 error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); 776 xfs_perag_put(pag); 777 if (error) 778 goto nextag; 779 agi = XFS_BUF_TO_AGI(agbp); 780 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); 781 } 782 /* 783 * Here with an allocation group that has a free inode. 784 * Reset agno since we may have chosen a new ag in the 785 * loop above. 786 */ 787 agno = tagno; 788 *IO_agbp = NULL; 789 pag = xfs_perag_get(mp, agno); 790 791 restart_pagno: 792 cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); 793 /* 794 * If pagino is 0 (this is the root inode allocation) use newino. 795 * This must work because we've just allocated some. 796 */ 797 if (!pagino) 798 pagino = be32_to_cpu(agi->agi_newino); 799 800 error = xfs_check_agi_freecount(cur, agi); 801 if (error) 802 goto error0; 803 804 /* 805 * If in the same AG as the parent, try to get near the parent. 806 */ 807 if (pagno == agno) { 808 int doneleft; /* done, to the left */ 809 int doneright; /* done, to the right */ 810 int searchdistance = 10; 811 812 error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); 813 if (error) 814 goto error0; 815 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 816 817 error = xfs_inobt_get_rec(cur, &rec, &j); 818 if (error) 819 goto error0; 820 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 821 822 if (rec.ir_freecount > 0) { 823 /* 824 * Found a free inode in the same chunk 825 * as the parent, done. 826 */ 827 goto alloc_inode; 828 } 829 830 831 /* 832 * In the same AG as parent, but parent's chunk is full. 833 */ 834 835 /* duplicate the cursor, search left & right simultaneously */ 836 error = xfs_btree_dup_cursor(cur, &tcur); 837 if (error) 838 goto error0; 839 840 /* 841 * Skip to last blocks looked up if same parent inode. 842 */ 843 if (pagino != NULLAGINO && 844 pag->pagl_pagino == pagino && 845 pag->pagl_leftrec != NULLAGINO && 846 pag->pagl_rightrec != NULLAGINO) { 847 error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, 848 &trec, &doneleft, 1); 849 if (error) 850 goto error1; 851 852 error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, 853 &rec, &doneright, 0); 854 if (error) 855 goto error1; 856 } else { 857 /* search left with tcur, back up 1 record */ 858 error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1); 859 if (error) 860 goto error1; 861 862 /* search right with cur, go forward 1 record. */ 863 error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0); 864 if (error) 865 goto error1; 866 } 867 868 /* 869 * Loop until we find an inode chunk with a free inode. 870 */ 871 while (!doneleft || !doneright) { 872 int useleft; /* using left inode chunk this time */ 873 874 if (!--searchdistance) { 875 /* 876 * Not in range - save last search 877 * location and allocate a new inode 878 */ 879 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 880 pag->pagl_leftrec = trec.ir_startino; 881 pag->pagl_rightrec = rec.ir_startino; 882 pag->pagl_pagino = pagino; 883 goto newino; 884 } 885 886 /* figure out the closer block if both are valid. */ 887 if (!doneleft && !doneright) { 888 useleft = pagino - 889 (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) < 890 rec.ir_startino - pagino; 891 } else { 892 useleft = !doneleft; 893 } 894 895 /* free inodes to the left? */ 896 if (useleft && trec.ir_freecount) { 897 rec = trec; 898 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 899 cur = tcur; 900 901 pag->pagl_leftrec = trec.ir_startino; 902 pag->pagl_rightrec = rec.ir_startino; 903 pag->pagl_pagino = pagino; 904 goto alloc_inode; 905 } 906 907 /* free inodes to the right? */ 908 if (!useleft && rec.ir_freecount) { 909 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 910 911 pag->pagl_leftrec = trec.ir_startino; 912 pag->pagl_rightrec = rec.ir_startino; 913 pag->pagl_pagino = pagino; 914 goto alloc_inode; 915 } 916 917 /* get next record to check */ 918 if (useleft) { 919 error = xfs_ialloc_next_rec(tcur, &trec, 920 &doneleft, 1); 921 } else { 922 error = xfs_ialloc_next_rec(cur, &rec, 923 &doneright, 0); 924 } 925 if (error) 926 goto error1; 927 } 928 929 /* 930 * We've reached the end of the btree. because 931 * we are only searching a small chunk of the 932 * btree each search, there is obviously free 933 * inodes closer to the parent inode than we 934 * are now. restart the search again. 935 */ 936 pag->pagl_pagino = NULLAGINO; 937 pag->pagl_leftrec = NULLAGINO; 938 pag->pagl_rightrec = NULLAGINO; 939 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 940 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 941 goto restart_pagno; 942 } 943 944 /* 945 * In a different AG from the parent. 946 * See if the most recently allocated block has any free. 947 */ 948newino: 949 if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { 950 error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), 951 XFS_LOOKUP_EQ, &i); 952 if (error) 953 goto error0; 954 955 if (i == 1) { 956 error = xfs_inobt_get_rec(cur, &rec, &j); 957 if (error) 958 goto error0; 959 960 if (j == 1 && rec.ir_freecount > 0) { 961 /* 962 * The last chunk allocated in the group 963 * still has a free inode. 964 */ 965 goto alloc_inode; 966 } 967 } 968 } 969 970 /* 971 * None left in the last group, search the whole AG 972 */ 973 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); 974 if (error) 975 goto error0; 976 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 977 978 for (;;) { 979 error = xfs_inobt_get_rec(cur, &rec, &i); 980 if (error) 981 goto error0; 982 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 983 if (rec.ir_freecount > 0) 984 break; 985 error = xfs_btree_increment(cur, 0, &i); 986 if (error) 987 goto error0; 988 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 989 } 990 991alloc_inode: 992 offset = xfs_ialloc_find_free(&rec.ir_free); 993 ASSERT(offset >= 0); 994 ASSERT(offset < XFS_INODES_PER_CHUNK); 995 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % 996 XFS_INODES_PER_CHUNK) == 0); 997 ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); 998 rec.ir_free &= ~XFS_INOBT_MASK(offset); 999 rec.ir_freecount--; 1000 error = xfs_inobt_update(cur, &rec); 1001 if (error) 1002 goto error0; 1003 be32_add_cpu(&agi->agi_freecount, -1); 1004 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1005 pag->pagi_freecount--; 1006 1007 error = xfs_check_agi_freecount(cur, agi); 1008 if (error) 1009 goto error0; 1010 1011 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1012 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); 1013 xfs_perag_put(pag); 1014 *inop = ino; 1015 return 0; 1016error1: 1017 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 1018error0: 1019 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1020 xfs_perag_put(pag); 1021 return error; 1022} 1023 1024/* 1025 * Free disk inode. Carefully avoids touching the incore inode, all 1026 * manipulations incore are the caller's responsibility. 1027 * The on-disk inode is not changed by this operation, only the 1028 * btree (free inode mask) is changed. 1029 */ 1030int 1031xfs_difree( 1032 xfs_trans_t *tp, /* transaction pointer */ 1033 xfs_ino_t inode, /* inode to be freed */ 1034 xfs_bmap_free_t *flist, /* extents to free */ 1035 int *delete, /* set if inode cluster was deleted */ 1036 xfs_ino_t *first_ino) /* first inode in deleted cluster */ 1037{ 1038 /* REFERENCED */ 1039 xfs_agblock_t agbno; /* block number containing inode */ 1040 xfs_buf_t *agbp; /* buffer containing allocation group header */ 1041 xfs_agino_t agino; /* inode number relative to allocation group */ 1042 xfs_agnumber_t agno; /* allocation group number */ 1043 xfs_agi_t *agi; /* allocation group header */ 1044 xfs_btree_cur_t *cur; /* inode btree cursor */ 1045 int error; /* error return value */ 1046 int i; /* result code */ 1047 int ilen; /* inodes in an inode cluster */ 1048 xfs_mount_t *mp; /* mount structure for filesystem */ 1049 int off; /* offset of inode in inode chunk */ 1050 xfs_inobt_rec_incore_t rec; /* btree record */ 1051 struct xfs_perag *pag; 1052 1053 mp = tp->t_mountp; 1054 1055 /* 1056 * Break up inode number into its components. 1057 */ 1058 agno = XFS_INO_TO_AGNO(mp, inode); 1059 if (agno >= mp->m_sb.sb_agcount) { 1060 xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", 1061 __func__, agno, mp->m_sb.sb_agcount); 1062 ASSERT(0); 1063 return XFS_ERROR(EINVAL); 1064 } 1065 agino = XFS_INO_TO_AGINO(mp, inode); 1066 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { 1067 xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", 1068 __func__, (unsigned long long)inode, 1069 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); 1070 ASSERT(0); 1071 return XFS_ERROR(EINVAL); 1072 } 1073 agbno = XFS_AGINO_TO_AGBNO(mp, agino); 1074 if (agbno >= mp->m_sb.sb_agblocks) { 1075 xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", 1076 __func__, agbno, mp->m_sb.sb_agblocks); 1077 ASSERT(0); 1078 return XFS_ERROR(EINVAL); 1079 } 1080 /* 1081 * Get the allocation group header. 1082 */ 1083 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1084 if (error) { 1085 xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", 1086 __func__, error); 1087 return error; 1088 } 1089 agi = XFS_BUF_TO_AGI(agbp); 1090 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); 1091 ASSERT(agbno < be32_to_cpu(agi->agi_length)); 1092 /* 1093 * Initialize the cursor. 1094 */ 1095 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1096 1097 error = xfs_check_agi_freecount(cur, agi); 1098 if (error) 1099 goto error0; 1100 1101 /* 1102 * Look for the entry describing this inode. 1103 */ 1104 if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { 1105 xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.", 1106 __func__, error); 1107 goto error0; 1108 } 1109 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1110 error = xfs_inobt_get_rec(cur, &rec, &i); 1111 if (error) { 1112 xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.", 1113 __func__, error); 1114 goto error0; 1115 } 1116 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1117 /* 1118 * Get the offset in the inode chunk. 1119 */ 1120 off = agino - rec.ir_startino; 1121 ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK); 1122 ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off))); 1123 /* 1124 * Mark the inode free & increment the count. 1125 */ 1126 rec.ir_free |= XFS_INOBT_MASK(off); 1127 rec.ir_freecount++; 1128 1129 /* 1130 * When an inode cluster is free, it becomes eligible for removal 1131 */ 1132 if (!(mp->m_flags & XFS_MOUNT_IKEEP) && 1133 (rec.ir_freecount == XFS_IALLOC_INODES(mp))) { 1134 1135 *delete = 1; 1136 *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); 1137 1138 /* 1139 * Remove the inode cluster from the AGI B+Tree, adjust the 1140 * AGI and Superblock inode counts, and mark the disk space 1141 * to be freed when the transaction is committed. 1142 */ 1143 ilen = XFS_IALLOC_INODES(mp); 1144 be32_add_cpu(&agi->agi_count, -ilen); 1145 be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); 1146 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); 1147 pag = xfs_perag_get(mp, agno); 1148 pag->pagi_freecount -= ilen - 1; 1149 xfs_perag_put(pag); 1150 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); 1151 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); 1152 1153 if ((error = xfs_btree_delete(cur, &i))) { 1154 xfs_warn(mp, "%s: xfs_btree_delete returned error %d.", 1155 __func__, error); 1156 goto error0; 1157 } 1158 1159 xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, 1160 agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)), 1161 XFS_IALLOC_BLOCKS(mp), flist, mp); 1162 } else { 1163 *delete = 0; 1164 1165 error = xfs_inobt_update(cur, &rec); 1166 if (error) { 1167 xfs_warn(mp, "%s: xfs_inobt_update returned error %d.", 1168 __func__, error); 1169 goto error0; 1170 } 1171 1172 /* 1173 * Change the inode free counts and log the ag/sb changes. 1174 */ 1175 be32_add_cpu(&agi->agi_freecount, 1); 1176 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1177 pag = xfs_perag_get(mp, agno); 1178 pag->pagi_freecount++; 1179 xfs_perag_put(pag); 1180 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); 1181 } 1182 1183 error = xfs_check_agi_freecount(cur, agi); 1184 if (error) 1185 goto error0; 1186 1187 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1188 return 0; 1189 1190error0: 1191 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1192 return error; 1193} 1194 1195STATIC int 1196xfs_imap_lookup( 1197 struct xfs_mount *mp, 1198 struct xfs_trans *tp, 1199 xfs_agnumber_t agno, 1200 xfs_agino_t agino, 1201 xfs_agblock_t agbno, 1202 xfs_agblock_t *chunk_agbno, 1203 xfs_agblock_t *offset_agbno, 1204 int flags) 1205{ 1206 struct xfs_inobt_rec_incore rec; 1207 struct xfs_btree_cur *cur; 1208 struct xfs_buf *agbp; 1209 int error; 1210 int i; 1211 1212 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1213 if (error) { 1214 xfs_alert(mp, 1215 "%s: xfs_ialloc_read_agi() returned error %d, agno %d", 1216 __func__, error, agno); 1217 return error; 1218 } 1219 1220 /* 1221 * Lookup the inode record for the given agino. If the record cannot be 1222 * found, then it's an invalid inode number and we should abort. Once 1223 * we have a record, we need to ensure it contains the inode number 1224 * we are looking up. 1225 */ 1226 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1227 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); 1228 if (!error) { 1229 if (i) 1230 error = xfs_inobt_get_rec(cur, &rec, &i); 1231 if (!error && i == 0) 1232 error = EINVAL; 1233 } 1234 1235 xfs_trans_brelse(tp, agbp); 1236 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1237 if (error) 1238 return error; 1239 1240 /* check that the returned record contains the required inode */ 1241 if (rec.ir_startino > agino || 1242 rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino) 1243 return EINVAL; 1244 1245 /* for untrusted inodes check it is allocated first */ 1246 if ((flags & XFS_IGET_UNTRUSTED) && 1247 (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) 1248 return EINVAL; 1249 1250 *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino); 1251 *offset_agbno = agbno - *chunk_agbno; 1252 return 0; 1253} 1254 1255/* 1256 * Return the location of the inode in imap, for mapping it into a buffer. 1257 */ 1258int 1259xfs_imap( 1260 xfs_mount_t *mp, /* file system mount structure */ 1261 xfs_trans_t *tp, /* transaction pointer */ 1262 xfs_ino_t ino, /* inode to locate */ 1263 struct xfs_imap *imap, /* location map structure */ 1264 uint flags) /* flags for inode btree lookup */ 1265{ 1266 xfs_agblock_t agbno; /* block number of inode in the alloc group */ 1267 xfs_agino_t agino; /* inode number within alloc group */ 1268 xfs_agnumber_t agno; /* allocation group number */ 1269 int blks_per_cluster; /* num blocks per inode cluster */ 1270 xfs_agblock_t chunk_agbno; /* first block in inode chunk */ 1271 xfs_agblock_t cluster_agbno; /* first block in inode cluster */ 1272 int error; /* error code */ 1273 int offset; /* index of inode in its buffer */ 1274 int offset_agbno; /* blks from chunk start to inode */ 1275 1276 ASSERT(ino != NULLFSINO); 1277 1278 /* 1279 * Split up the inode number into its parts. 1280 */ 1281 agno = XFS_INO_TO_AGNO(mp, ino); 1282 agino = XFS_INO_TO_AGINO(mp, ino); 1283 agbno = XFS_AGINO_TO_AGBNO(mp, agino); 1284 if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || 1285 ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1286#ifdef DEBUG 1287 /* 1288 * Don't output diagnostic information for untrusted inodes 1289 * as they can be invalid without implying corruption. 1290 */ 1291 if (flags & XFS_IGET_UNTRUSTED) 1292 return XFS_ERROR(EINVAL); 1293 if (agno >= mp->m_sb.sb_agcount) { 1294 xfs_alert(mp, 1295 "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)", 1296 __func__, agno, mp->m_sb.sb_agcount); 1297 } 1298 if (agbno >= mp->m_sb.sb_agblocks) { 1299 xfs_alert(mp, 1300 "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)", 1301 __func__, (unsigned long long)agbno, 1302 (unsigned long)mp->m_sb.sb_agblocks); 1303 } 1304 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1305 xfs_alert(mp, 1306 "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)", 1307 __func__, ino, 1308 XFS_AGINO_TO_INO(mp, agno, agino)); 1309 } 1310 xfs_stack_trace(); 1311#endif /* DEBUG */ 1312 return XFS_ERROR(EINVAL); 1313 } 1314 1315 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; 1316 1317 /* 1318 * For bulkstat and handle lookups, we have an untrusted inode number 1319 * that we have to verify is valid. We cannot do this just by reading 1320 * the inode buffer as it may have been unlinked and removed leaving 1321 * inodes in stale state on disk. Hence we have to do a btree lookup 1322 * in all cases where an untrusted inode number is passed. 1323 */ 1324 if (flags & XFS_IGET_UNTRUSTED) { 1325 error = xfs_imap_lookup(mp, tp, agno, agino, agbno, 1326 &chunk_agbno, &offset_agbno, flags); 1327 if (error) 1328 return error; 1329 goto out_map; 1330 } 1331 1332 /* 1333 * If the inode cluster size is the same as the blocksize or 1334 * smaller we get to the buffer by simple arithmetics. 1335 */ 1336 if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) { 1337 offset = XFS_INO_TO_OFFSET(mp, ino); 1338 ASSERT(offset < mp->m_sb.sb_inopblock); 1339 1340 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); 1341 imap->im_len = XFS_FSB_TO_BB(mp, 1); 1342 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); 1343 return 0; 1344 } 1345 1346 /* 1347 * If the inode chunks are aligned then use simple maths to 1348 * find the location. Otherwise we have to do a btree 1349 * lookup to find the location. 1350 */ 1351 if (mp->m_inoalign_mask) { 1352 offset_agbno = agbno & mp->m_inoalign_mask; 1353 chunk_agbno = agbno - offset_agbno; 1354 } else { 1355 error = xfs_imap_lookup(mp, tp, agno, agino, agbno, 1356 &chunk_agbno, &offset_agbno, flags); 1357 if (error) 1358 return error; 1359 } 1360 1361out_map: 1362 ASSERT(agbno >= chunk_agbno); 1363 cluster_agbno = chunk_agbno + 1364 ((offset_agbno / blks_per_cluster) * blks_per_cluster); 1365 offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + 1366 XFS_INO_TO_OFFSET(mp, ino); 1367 1368 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); 1369 imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); 1370 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); 1371 1372 /* 1373 * If the inode number maps to a block outside the bounds 1374 * of the file system then return NULL rather than calling 1375 * read_buf and panicing when we get an error from the 1376 * driver. 1377 */ 1378 if ((imap->im_blkno + imap->im_len) > 1379 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 1380 xfs_alert(mp, 1381 "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)", 1382 __func__, (unsigned long long) imap->im_blkno, 1383 (unsigned long long) imap->im_len, 1384 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 1385 return XFS_ERROR(EINVAL); 1386 } 1387 return 0; 1388} 1389 1390/* 1391 * Compute and fill in value of m_in_maxlevels. 1392 */ 1393void 1394xfs_ialloc_compute_maxlevels( 1395 xfs_mount_t *mp) /* file system mount structure */ 1396{ 1397 int level; 1398 uint maxblocks; 1399 uint maxleafents; 1400 int minleafrecs; 1401 int minnoderecs; 1402 1403 maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >> 1404 XFS_INODES_PER_CHUNK_LOG; 1405 minleafrecs = mp->m_alloc_mnr[0]; 1406 minnoderecs = mp->m_alloc_mnr[1]; 1407 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; 1408 for (level = 1; maxblocks > 1; level++) 1409 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; 1410 mp->m_in_maxlevels = level; 1411} 1412 1413/* 1414 * Log specified fields for the ag hdr (inode section) 1415 */ 1416void 1417xfs_ialloc_log_agi( 1418 xfs_trans_t *tp, /* transaction pointer */ 1419 xfs_buf_t *bp, /* allocation group header buffer */ 1420 int fields) /* bitmask of fields to log */ 1421{ 1422 int first; /* first byte number */ 1423 int last; /* last byte number */ 1424 static const short offsets[] = { /* field starting offsets */ 1425 /* keep in sync with bit definitions */ 1426 offsetof(xfs_agi_t, agi_magicnum), 1427 offsetof(xfs_agi_t, agi_versionnum), 1428 offsetof(xfs_agi_t, agi_seqno), 1429 offsetof(xfs_agi_t, agi_length), 1430 offsetof(xfs_agi_t, agi_count), 1431 offsetof(xfs_agi_t, agi_root), 1432 offsetof(xfs_agi_t, agi_level), 1433 offsetof(xfs_agi_t, agi_freecount), 1434 offsetof(xfs_agi_t, agi_newino), 1435 offsetof(xfs_agi_t, agi_dirino), 1436 offsetof(xfs_agi_t, agi_unlinked), 1437 sizeof(xfs_agi_t) 1438 }; 1439#ifdef DEBUG 1440 xfs_agi_t *agi; /* allocation group header */ 1441 1442 agi = XFS_BUF_TO_AGI(bp); 1443 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); 1444#endif 1445 /* 1446 * Compute byte offsets for the first and last fields. 1447 */ 1448 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last); 1449 /* 1450 * Log the allocation group inode header buffer. 1451 */ 1452 xfs_trans_log_buf(tp, bp, first, last); 1453} 1454 1455#ifdef DEBUG 1456STATIC void 1457xfs_check_agi_unlinked( 1458 struct xfs_agi *agi) 1459{ 1460 int i; 1461 1462 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) 1463 ASSERT(agi->agi_unlinked[i]); 1464} 1465#else 1466#define xfs_check_agi_unlinked(agi) 1467#endif 1468 1469/* 1470 * Read in the allocation group header (inode allocation section) 1471 */ 1472int 1473xfs_read_agi( 1474 struct xfs_mount *mp, /* file system mount structure */ 1475 struct xfs_trans *tp, /* transaction pointer */ 1476 xfs_agnumber_t agno, /* allocation group number */ 1477 struct xfs_buf **bpp) /* allocation group hdr buf */ 1478{ 1479 struct xfs_agi *agi; /* allocation group header */ 1480 int agi_ok; /* agi is consistent */ 1481 int error; 1482 1483 ASSERT(agno != NULLAGNUMBER); 1484 1485 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 1486 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), 1487 XFS_FSS_TO_BB(mp, 1), 0, bpp); 1488 if (error) 1489 return error; 1490 1491 ASSERT(!xfs_buf_geterror(*bpp)); 1492 agi = XFS_BUF_TO_AGI(*bpp); 1493 1494 /* 1495 * Validate the magic number of the agi block. 1496 */ 1497 agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) && 1498 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) && 1499 be32_to_cpu(agi->agi_seqno) == agno; 1500 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, 1501 XFS_RANDOM_IALLOC_READ_AGI))) { 1502 XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW, 1503 mp, agi); 1504 xfs_trans_brelse(tp, *bpp); 1505 return XFS_ERROR(EFSCORRUPTED); 1506 } 1507 1508 xfs_buf_set_ref(*bpp, XFS_AGI_REF); 1509 1510 xfs_check_agi_unlinked(agi); 1511 return 0; 1512} 1513 1514int 1515xfs_ialloc_read_agi( 1516 struct xfs_mount *mp, /* file system mount structure */ 1517 struct xfs_trans *tp, /* transaction pointer */ 1518 xfs_agnumber_t agno, /* allocation group number */ 1519 struct xfs_buf **bpp) /* allocation group hdr buf */ 1520{ 1521 struct xfs_agi *agi; /* allocation group header */ 1522 struct xfs_perag *pag; /* per allocation group data */ 1523 int error; 1524 1525 error = xfs_read_agi(mp, tp, agno, bpp); 1526 if (error) 1527 return error; 1528 1529 agi = XFS_BUF_TO_AGI(*bpp); 1530 pag = xfs_perag_get(mp, agno); 1531 if (!pag->pagi_init) { 1532 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); 1533 pag->pagi_count = be32_to_cpu(agi->agi_count); 1534 pag->pagi_init = 1; 1535 } 1536 1537 /* 1538 * It's possible for these to be out of sync if 1539 * we are in the middle of a forced shutdown. 1540 */ 1541 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || 1542 XFS_FORCED_SHUTDOWN(mp)); 1543 xfs_perag_put(pag); 1544 return 0; 1545} 1546 1547/* 1548 * Read in the agi to initialise the per-ag data in the mount structure 1549 */ 1550int 1551xfs_ialloc_pagi_init( 1552 xfs_mount_t *mp, /* file system mount structure */ 1553 xfs_trans_t *tp, /* transaction pointer */ 1554 xfs_agnumber_t agno) /* allocation group number */ 1555{ 1556 xfs_buf_t *bp = NULL; 1557 int error; 1558 1559 error = xfs_ialloc_read_agi(mp, tp, agno, &bp); 1560 if (error) 1561 return error; 1562 if (bp) 1563 xfs_trans_brelse(tp, bp); 1564 return 0; 1565}