Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.8 1604 lines 43 kB view raw
1/* 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18#include "xfs.h" 19#include "xfs_fs.h" 20#include "xfs_types.h" 21#include "xfs_bit.h" 22#include "xfs_log.h" 23#include "xfs_inum.h" 24#include "xfs_trans.h" 25#include "xfs_sb.h" 26#include "xfs_ag.h" 27#include "xfs_mount.h" 28#include "xfs_bmap_btree.h" 29#include "xfs_alloc_btree.h" 30#include "xfs_ialloc_btree.h" 31#include "xfs_dinode.h" 32#include "xfs_inode.h" 33#include "xfs_btree.h" 34#include "xfs_ialloc.h" 35#include "xfs_alloc.h" 36#include "xfs_rtalloc.h" 37#include "xfs_error.h" 38#include "xfs_bmap.h" 39 40 41/* 42 * Allocation group level functions. 43 */ 44static inline int 45xfs_ialloc_cluster_alignment( 46 xfs_alloc_arg_t *args) 47{ 48 if (xfs_sb_version_hasalign(&args->mp->m_sb) && 49 args->mp->m_sb.sb_inoalignmt >= 50 XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp))) 51 return args->mp->m_sb.sb_inoalignmt; 52 return 1; 53} 54 55/* 56 * Lookup a record by ino in the btree given by cur. 57 */ 58int /* error */ 59xfs_inobt_lookup( 60 struct xfs_btree_cur *cur, /* btree cursor */ 61 xfs_agino_t ino, /* starting inode of chunk */ 62 xfs_lookup_t dir, /* <=, >=, == */ 63 int *stat) /* success/failure */ 64{ 65 cur->bc_rec.i.ir_startino = ino; 66 cur->bc_rec.i.ir_freecount = 0; 67 cur->bc_rec.i.ir_free = 0; 68 return xfs_btree_lookup(cur, dir, stat); 69} 70 71/* 72 * Update the record referred to by cur to the value given. 73 * This either works (return 0) or gets an EFSCORRUPTED error. 74 */ 75STATIC int /* error */ 76xfs_inobt_update( 77 struct xfs_btree_cur *cur, /* btree cursor */ 78 xfs_inobt_rec_incore_t *irec) /* btree record */ 79{ 80 union xfs_btree_rec rec; 81 82 rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino); 83 rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount); 84 rec.inobt.ir_free = cpu_to_be64(irec->ir_free); 85 return xfs_btree_update(cur, &rec); 86} 87 88/* 89 * Get the data from the pointed-to record. 90 */ 91int /* error */ 92xfs_inobt_get_rec( 93 struct xfs_btree_cur *cur, /* btree cursor */ 94 xfs_inobt_rec_incore_t *irec, /* btree record */ 95 int *stat) /* output: success/failure */ 96{ 97 union xfs_btree_rec *rec; 98 int error; 99 100 error = xfs_btree_get_rec(cur, &rec, stat); 101 if (!error && *stat == 1) { 102 irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); 103 irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount); 104 irec->ir_free = be64_to_cpu(rec->inobt.ir_free); 105 } 106 return error; 107} 108 109/* 110 * Verify that the number of free inodes in the AGI is correct. 111 */ 112#ifdef DEBUG 113STATIC int 114xfs_check_agi_freecount( 115 struct xfs_btree_cur *cur, 116 struct xfs_agi *agi) 117{ 118 if (cur->bc_nlevels == 1) { 119 xfs_inobt_rec_incore_t rec; 120 int freecount = 0; 121 int error; 122 int i; 123 124 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); 125 if (error) 126 return error; 127 128 do { 129 error = xfs_inobt_get_rec(cur, &rec, &i); 130 if (error) 131 return error; 132 133 if (i) { 134 freecount += rec.ir_freecount; 135 error = xfs_btree_increment(cur, 0, &i); 136 if (error) 137 return error; 138 } 139 } while (i == 1); 140 141 if (!XFS_FORCED_SHUTDOWN(cur->bc_mp)) 142 ASSERT(freecount == be32_to_cpu(agi->agi_freecount)); 143 } 144 return 0; 145} 146#else 147#define xfs_check_agi_freecount(cur, agi) 0 148#endif 149 150/* 151 * Initialise a new set of inodes. 152 */ 153STATIC int 154xfs_ialloc_inode_init( 155 struct xfs_mount *mp, 156 struct xfs_trans *tp, 157 xfs_agnumber_t agno, 158 xfs_agblock_t agbno, 159 xfs_agblock_t length, 160 unsigned int gen) 161{ 162 struct xfs_buf *fbuf; 163 struct xfs_dinode *free; 164 int blks_per_cluster, nbufs, ninodes; 165 int version; 166 int i, j; 167 xfs_daddr_t d; 168 169 /* 170 * Loop over the new block(s), filling in the inodes. 171 * For small block sizes, manipulate the inodes in buffers 172 * which are multiples of the blocks size. 173 */ 174 if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 175 blks_per_cluster = 1; 176 nbufs = length; 177 ninodes = mp->m_sb.sb_inopblock; 178 } else { 179 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / 180 mp->m_sb.sb_blocksize; 181 nbufs = length / blks_per_cluster; 182 ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; 183 } 184 185 /* 186 * Figure out what version number to use in the inodes we create. 187 * If the superblock version has caught up to the one that supports 188 * the new inode format, then use the new inode version. Otherwise 189 * use the old version so that old kernels will continue to be 190 * able to use the file system. 191 */ 192 if (xfs_sb_version_hasnlink(&mp->m_sb)) 193 version = 2; 194 else 195 version = 1; 196 197 for (j = 0; j < nbufs; j++) { 198 /* 199 * Get the block. 200 */ 201 d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); 202 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 203 mp->m_bsize * blks_per_cluster, 204 XBF_UNMAPPED); 205 if (!fbuf) 206 return ENOMEM; 207 /* 208 * Initialize all inodes in this buffer and then log them. 209 * 210 * XXX: It would be much better if we had just one transaction 211 * to log a whole cluster of inodes instead of all the 212 * individual transactions causing a lot of log traffic. 213 */ 214 fbuf->b_ops = &xfs_inode_buf_ops; 215 xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); 216 for (i = 0; i < ninodes; i++) { 217 int ioffset = i << mp->m_sb.sb_inodelog; 218 uint isize = sizeof(struct xfs_dinode); 219 220 free = xfs_make_iptr(mp, fbuf, i); 221 free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 222 free->di_version = version; 223 free->di_gen = cpu_to_be32(gen); 224 free->di_next_unlinked = cpu_to_be32(NULLAGINO); 225 xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); 226 } 227 xfs_trans_inode_alloc_buf(tp, fbuf); 228 } 229 return 0; 230} 231 232/* 233 * Allocate new inodes in the allocation group specified by agbp. 234 * Return 0 for success, else error code. 235 */ 236STATIC int /* error code or 0 */ 237xfs_ialloc_ag_alloc( 238 xfs_trans_t *tp, /* transaction pointer */ 239 xfs_buf_t *agbp, /* alloc group buffer */ 240 int *alloc) 241{ 242 xfs_agi_t *agi; /* allocation group header */ 243 xfs_alloc_arg_t args; /* allocation argument structure */ 244 xfs_btree_cur_t *cur; /* inode btree cursor */ 245 xfs_agnumber_t agno; 246 int error; 247 int i; 248 xfs_agino_t newino; /* new first inode's number */ 249 xfs_agino_t newlen; /* new number of inodes */ 250 xfs_agino_t thisino; /* current inode number, for loop */ 251 int isaligned = 0; /* inode allocation at stripe unit */ 252 /* boundary */ 253 struct xfs_perag *pag; 254 255 memset(&args, 0, sizeof(args)); 256 args.tp = tp; 257 args.mp = tp->t_mountp; 258 259 /* 260 * Locking will ensure that we don't have two callers in here 261 * at one time. 262 */ 263 newlen = XFS_IALLOC_INODES(args.mp); 264 if (args.mp->m_maxicount && 265 args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) 266 return XFS_ERROR(ENOSPC); 267 args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp); 268 /* 269 * First try to allocate inodes contiguous with the last-allocated 270 * chunk of inodes. If the filesystem is striped, this will fill 271 * an entire stripe unit with inodes. 272 */ 273 agi = XFS_BUF_TO_AGI(agbp); 274 newino = be32_to_cpu(agi->agi_newino); 275 agno = be32_to_cpu(agi->agi_seqno); 276 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + 277 XFS_IALLOC_BLOCKS(args.mp); 278 if (likely(newino != NULLAGINO && 279 (args.agbno < be32_to_cpu(agi->agi_length)))) { 280 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 281 args.type = XFS_ALLOCTYPE_THIS_BNO; 282 args.mod = args.total = args.wasdel = args.isfl = 283 args.userdata = args.minalignslop = 0; 284 args.prod = 1; 285 286 /* 287 * We need to take into account alignment here to ensure that 288 * we don't modify the free list if we fail to have an exact 289 * block. If we don't have an exact match, and every oher 290 * attempt allocation attempt fails, we'll end up cancelling 291 * a dirty transaction and shutting down. 292 * 293 * For an exact allocation, alignment must be 1, 294 * however we need to take cluster alignment into account when 295 * fixing up the freelist. Use the minalignslop field to 296 * indicate that extra blocks might be required for alignment, 297 * but not to use them in the actual exact allocation. 298 */ 299 args.alignment = 1; 300 args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; 301 302 /* Allow space for the inode btree to split. */ 303 args.minleft = args.mp->m_in_maxlevels - 1; 304 if ((error = xfs_alloc_vextent(&args))) 305 return error; 306 } else 307 args.fsbno = NULLFSBLOCK; 308 309 if (unlikely(args.fsbno == NULLFSBLOCK)) { 310 /* 311 * Set the alignment for the allocation. 312 * If stripe alignment is turned on then align at stripe unit 313 * boundary. 314 * If the cluster size is smaller than a filesystem block 315 * then we're doing I/O for inodes in filesystem block size 316 * pieces, so don't need alignment anyway. 317 */ 318 isaligned = 0; 319 if (args.mp->m_sinoalign) { 320 ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); 321 args.alignment = args.mp->m_dalign; 322 isaligned = 1; 323 } else 324 args.alignment = xfs_ialloc_cluster_alignment(&args); 325 /* 326 * Need to figure out where to allocate the inode blocks. 327 * Ideally they should be spaced out through the a.g. 328 * For now, just allocate blocks up front. 329 */ 330 args.agbno = be32_to_cpu(agi->agi_root); 331 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 332 /* 333 * Allocate a fixed-size extent of inodes. 334 */ 335 args.type = XFS_ALLOCTYPE_NEAR_BNO; 336 args.mod = args.total = args.wasdel = args.isfl = 337 args.userdata = args.minalignslop = 0; 338 args.prod = 1; 339 /* 340 * Allow space for the inode btree to split. 341 */ 342 args.minleft = args.mp->m_in_maxlevels - 1; 343 if ((error = xfs_alloc_vextent(&args))) 344 return error; 345 } 346 347 /* 348 * If stripe alignment is turned on, then try again with cluster 349 * alignment. 350 */ 351 if (isaligned && args.fsbno == NULLFSBLOCK) { 352 args.type = XFS_ALLOCTYPE_NEAR_BNO; 353 args.agbno = be32_to_cpu(agi->agi_root); 354 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 355 args.alignment = xfs_ialloc_cluster_alignment(&args); 356 if ((error = xfs_alloc_vextent(&args))) 357 return error; 358 } 359 360 if (args.fsbno == NULLFSBLOCK) { 361 *alloc = 0; 362 return 0; 363 } 364 ASSERT(args.len == args.minlen); 365 366 /* 367 * Stamp and write the inode buffers. 368 * 369 * Seed the new inode cluster with a random generation number. This 370 * prevents short-term reuse of generation numbers if a chunk is 371 * freed and then immediately reallocated. We use random numbers 372 * rather than a linear progression to prevent the next generation 373 * number from being easily guessable. 374 */ 375 error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, 376 args.len, random32()); 377 378 if (error) 379 return error; 380 /* 381 * Convert the results. 382 */ 383 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); 384 be32_add_cpu(&agi->agi_count, newlen); 385 be32_add_cpu(&agi->agi_freecount, newlen); 386 pag = xfs_perag_get(args.mp, agno); 387 pag->pagi_freecount += newlen; 388 xfs_perag_put(pag); 389 agi->agi_newino = cpu_to_be32(newino); 390 391 /* 392 * Insert records describing the new inode chunk into the btree. 393 */ 394 cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno); 395 for (thisino = newino; 396 thisino < newino + newlen; 397 thisino += XFS_INODES_PER_CHUNK) { 398 cur->bc_rec.i.ir_startino = thisino; 399 cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK; 400 cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE; 401 error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i); 402 if (error) { 403 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 404 return error; 405 } 406 ASSERT(i == 0); 407 error = xfs_btree_insert(cur, &i); 408 if (error) { 409 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 410 return error; 411 } 412 ASSERT(i == 1); 413 } 414 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 415 /* 416 * Log allocation group header fields 417 */ 418 xfs_ialloc_log_agi(tp, agbp, 419 XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO); 420 /* 421 * Modify/log superblock values for inode count and inode free count. 422 */ 423 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen); 424 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen); 425 *alloc = 1; 426 return 0; 427} 428 429STATIC xfs_agnumber_t 430xfs_ialloc_next_ag( 431 xfs_mount_t *mp) 432{ 433 xfs_agnumber_t agno; 434 435 spin_lock(&mp->m_agirotor_lock); 436 agno = mp->m_agirotor; 437 if (++mp->m_agirotor >= mp->m_maxagi) 438 mp->m_agirotor = 0; 439 spin_unlock(&mp->m_agirotor_lock); 440 441 return agno; 442} 443 444/* 445 * Select an allocation group to look for a free inode in, based on the parent 446 * inode and then mode. Return the allocation group buffer. 447 */ 448STATIC xfs_agnumber_t 449xfs_ialloc_ag_select( 450 xfs_trans_t *tp, /* transaction pointer */ 451 xfs_ino_t parent, /* parent directory inode number */ 452 umode_t mode, /* bits set to indicate file type */ 453 int okalloc) /* ok to allocate more space */ 454{ 455 xfs_agnumber_t agcount; /* number of ag's in the filesystem */ 456 xfs_agnumber_t agno; /* current ag number */ 457 int flags; /* alloc buffer locking flags */ 458 xfs_extlen_t ineed; /* blocks needed for inode allocation */ 459 xfs_extlen_t longest = 0; /* longest extent available */ 460 xfs_mount_t *mp; /* mount point structure */ 461 int needspace; /* file mode implies space allocated */ 462 xfs_perag_t *pag; /* per allocation group data */ 463 xfs_agnumber_t pagno; /* parent (starting) ag number */ 464 int error; 465 466 /* 467 * Files of these types need at least one block if length > 0 468 * (and they won't fit in the inode, but that's hard to figure out). 469 */ 470 needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); 471 mp = tp->t_mountp; 472 agcount = mp->m_maxagi; 473 if (S_ISDIR(mode)) 474 pagno = xfs_ialloc_next_ag(mp); 475 else { 476 pagno = XFS_INO_TO_AGNO(mp, parent); 477 if (pagno >= agcount) 478 pagno = 0; 479 } 480 481 ASSERT(pagno < agcount); 482 483 /* 484 * Loop through allocation groups, looking for one with a little 485 * free space in it. Note we don't look for free inodes, exactly. 486 * Instead, we include whether there is a need to allocate inodes 487 * to mean that blocks must be allocated for them, 488 * if none are currently free. 489 */ 490 agno = pagno; 491 flags = XFS_ALLOC_FLAG_TRYLOCK; 492 for (;;) { 493 pag = xfs_perag_get(mp, agno); 494 if (!pag->pagi_inodeok) { 495 xfs_ialloc_next_ag(mp); 496 goto nextag; 497 } 498 499 if (!pag->pagi_init) { 500 error = xfs_ialloc_pagi_init(mp, tp, agno); 501 if (error) 502 goto nextag; 503 } 504 505 if (pag->pagi_freecount) { 506 xfs_perag_put(pag); 507 return agno; 508 } 509 510 if (!okalloc) 511 goto nextag; 512 513 if (!pag->pagf_init) { 514 error = xfs_alloc_pagf_init(mp, tp, agno, flags); 515 if (error) 516 goto nextag; 517 } 518 519 /* 520 * Is there enough free space for the file plus a block of 521 * inodes? (if we need to allocate some)? 522 */ 523 ineed = XFS_IALLOC_BLOCKS(mp); 524 longest = pag->pagf_longest; 525 if (!longest) 526 longest = pag->pagf_flcount > 0; 527 528 if (pag->pagf_freeblks >= needspace + ineed && 529 longest >= ineed) { 530 xfs_perag_put(pag); 531 return agno; 532 } 533nextag: 534 xfs_perag_put(pag); 535 /* 536 * No point in iterating over the rest, if we're shutting 537 * down. 538 */ 539 if (XFS_FORCED_SHUTDOWN(mp)) 540 return NULLAGNUMBER; 541 agno++; 542 if (agno >= agcount) 543 agno = 0; 544 if (agno == pagno) { 545 if (flags == 0) 546 return NULLAGNUMBER; 547 flags = 0; 548 } 549 } 550} 551 552/* 553 * Try to retrieve the next record to the left/right from the current one. 554 */ 555STATIC int 556xfs_ialloc_next_rec( 557 struct xfs_btree_cur *cur, 558 xfs_inobt_rec_incore_t *rec, 559 int *done, 560 int left) 561{ 562 int error; 563 int i; 564 565 if (left) 566 error = xfs_btree_decrement(cur, 0, &i); 567 else 568 error = xfs_btree_increment(cur, 0, &i); 569 570 if (error) 571 return error; 572 *done = !i; 573 if (i) { 574 error = xfs_inobt_get_rec(cur, rec, &i); 575 if (error) 576 return error; 577 XFS_WANT_CORRUPTED_RETURN(i == 1); 578 } 579 580 return 0; 581} 582 583STATIC int 584xfs_ialloc_get_rec( 585 struct xfs_btree_cur *cur, 586 xfs_agino_t agino, 587 xfs_inobt_rec_incore_t *rec, 588 int *done, 589 int left) 590{ 591 int error; 592 int i; 593 594 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i); 595 if (error) 596 return error; 597 *done = !i; 598 if (i) { 599 error = xfs_inobt_get_rec(cur, rec, &i); 600 if (error) 601 return error; 602 XFS_WANT_CORRUPTED_RETURN(i == 1); 603 } 604 605 return 0; 606} 607 608/* 609 * Allocate an inode. 610 * 611 * The caller selected an AG for us, and made sure that free inodes are 612 * available. 613 */ 614STATIC int 615xfs_dialloc_ag( 616 struct xfs_trans *tp, 617 struct xfs_buf *agbp, 618 xfs_ino_t parent, 619 xfs_ino_t *inop) 620{ 621 struct xfs_mount *mp = tp->t_mountp; 622 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 623 xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); 624 xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); 625 xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); 626 struct xfs_perag *pag; 627 struct xfs_btree_cur *cur, *tcur; 628 struct xfs_inobt_rec_incore rec, trec; 629 xfs_ino_t ino; 630 int error; 631 int offset; 632 int i, j; 633 634 pag = xfs_perag_get(mp, agno); 635 636 ASSERT(pag->pagi_init); 637 ASSERT(pag->pagi_inodeok); 638 ASSERT(pag->pagi_freecount > 0); 639 640 restart_pagno: 641 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 642 /* 643 * If pagino is 0 (this is the root inode allocation) use newino. 644 * This must work because we've just allocated some. 645 */ 646 if (!pagino) 647 pagino = be32_to_cpu(agi->agi_newino); 648 649 error = xfs_check_agi_freecount(cur, agi); 650 if (error) 651 goto error0; 652 653 /* 654 * If in the same AG as the parent, try to get near the parent. 655 */ 656 if (pagno == agno) { 657 int doneleft; /* done, to the left */ 658 int doneright; /* done, to the right */ 659 int searchdistance = 10; 660 661 error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); 662 if (error) 663 goto error0; 664 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 665 666 error = xfs_inobt_get_rec(cur, &rec, &j); 667 if (error) 668 goto error0; 669 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 670 671 if (rec.ir_freecount > 0) { 672 /* 673 * Found a free inode in the same chunk 674 * as the parent, done. 675 */ 676 goto alloc_inode; 677 } 678 679 680 /* 681 * In the same AG as parent, but parent's chunk is full. 682 */ 683 684 /* duplicate the cursor, search left & right simultaneously */ 685 error = xfs_btree_dup_cursor(cur, &tcur); 686 if (error) 687 goto error0; 688 689 /* 690 * Skip to last blocks looked up if same parent inode. 691 */ 692 if (pagino != NULLAGINO && 693 pag->pagl_pagino == pagino && 694 pag->pagl_leftrec != NULLAGINO && 695 pag->pagl_rightrec != NULLAGINO) { 696 error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, 697 &trec, &doneleft, 1); 698 if (error) 699 goto error1; 700 701 error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, 702 &rec, &doneright, 0); 703 if (error) 704 goto error1; 705 } else { 706 /* search left with tcur, back up 1 record */ 707 error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1); 708 if (error) 709 goto error1; 710 711 /* search right with cur, go forward 1 record. */ 712 error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0); 713 if (error) 714 goto error1; 715 } 716 717 /* 718 * Loop until we find an inode chunk with a free inode. 719 */ 720 while (!doneleft || !doneright) { 721 int useleft; /* using left inode chunk this time */ 722 723 if (!--searchdistance) { 724 /* 725 * Not in range - save last search 726 * location and allocate a new inode 727 */ 728 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 729 pag->pagl_leftrec = trec.ir_startino; 730 pag->pagl_rightrec = rec.ir_startino; 731 pag->pagl_pagino = pagino; 732 goto newino; 733 } 734 735 /* figure out the closer block if both are valid. */ 736 if (!doneleft && !doneright) { 737 useleft = pagino - 738 (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) < 739 rec.ir_startino - pagino; 740 } else { 741 useleft = !doneleft; 742 } 743 744 /* free inodes to the left? */ 745 if (useleft && trec.ir_freecount) { 746 rec = trec; 747 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 748 cur = tcur; 749 750 pag->pagl_leftrec = trec.ir_startino; 751 pag->pagl_rightrec = rec.ir_startino; 752 pag->pagl_pagino = pagino; 753 goto alloc_inode; 754 } 755 756 /* free inodes to the right? */ 757 if (!useleft && rec.ir_freecount) { 758 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 759 760 pag->pagl_leftrec = trec.ir_startino; 761 pag->pagl_rightrec = rec.ir_startino; 762 pag->pagl_pagino = pagino; 763 goto alloc_inode; 764 } 765 766 /* get next record to check */ 767 if (useleft) { 768 error = xfs_ialloc_next_rec(tcur, &trec, 769 &doneleft, 1); 770 } else { 771 error = xfs_ialloc_next_rec(cur, &rec, 772 &doneright, 0); 773 } 774 if (error) 775 goto error1; 776 } 777 778 /* 779 * We've reached the end of the btree. because 780 * we are only searching a small chunk of the 781 * btree each search, there is obviously free 782 * inodes closer to the parent inode than we 783 * are now. restart the search again. 784 */ 785 pag->pagl_pagino = NULLAGINO; 786 pag->pagl_leftrec = NULLAGINO; 787 pag->pagl_rightrec = NULLAGINO; 788 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 789 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 790 goto restart_pagno; 791 } 792 793 /* 794 * In a different AG from the parent. 795 * See if the most recently allocated block has any free. 796 */ 797newino: 798 if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { 799 error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), 800 XFS_LOOKUP_EQ, &i); 801 if (error) 802 goto error0; 803 804 if (i == 1) { 805 error = xfs_inobt_get_rec(cur, &rec, &j); 806 if (error) 807 goto error0; 808 809 if (j == 1 && rec.ir_freecount > 0) { 810 /* 811 * The last chunk allocated in the group 812 * still has a free inode. 813 */ 814 goto alloc_inode; 815 } 816 } 817 } 818 819 /* 820 * None left in the last group, search the whole AG 821 */ 822 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); 823 if (error) 824 goto error0; 825 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 826 827 for (;;) { 828 error = xfs_inobt_get_rec(cur, &rec, &i); 829 if (error) 830 goto error0; 831 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 832 if (rec.ir_freecount > 0) 833 break; 834 error = xfs_btree_increment(cur, 0, &i); 835 if (error) 836 goto error0; 837 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 838 } 839 840alloc_inode: 841 offset = xfs_lowbit64(rec.ir_free); 842 ASSERT(offset >= 0); 843 ASSERT(offset < XFS_INODES_PER_CHUNK); 844 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % 845 XFS_INODES_PER_CHUNK) == 0); 846 ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); 847 rec.ir_free &= ~XFS_INOBT_MASK(offset); 848 rec.ir_freecount--; 849 error = xfs_inobt_update(cur, &rec); 850 if (error) 851 goto error0; 852 be32_add_cpu(&agi->agi_freecount, -1); 853 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 854 pag->pagi_freecount--; 855 856 error = xfs_check_agi_freecount(cur, agi); 857 if (error) 858 goto error0; 859 860 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 861 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); 862 xfs_perag_put(pag); 863 *inop = ino; 864 return 0; 865error1: 866 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 867error0: 868 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 869 xfs_perag_put(pag); 870 return error; 871} 872 873/* 874 * Allocate an inode on disk. 875 * 876 * Mode is used to tell whether the new inode will need space, and whether it 877 * is a directory. 878 * 879 * This function is designed to be called twice if it has to do an allocation 880 * to make more free inodes. On the first call, *IO_agbp should be set to NULL. 881 * If an inode is available without having to performn an allocation, an inode 882 * number is returned. In this case, *IO_agbp is set to NULL. If an allocation 883 * needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp. 884 * The caller should then commit the current transaction, allocate a 885 * new transaction, and call xfs_dialloc() again, passing in the previous value 886 * of *IO_agbp. IO_agbp should be held across the transactions. Since the AGI 887 * buffer is locked across the two calls, the second call is guaranteed to have 888 * a free inode available. 889 * 890 * Once we successfully pick an inode its number is returned and the on-disk 891 * data structures are updated. The inode itself is not read in, since doing so 892 * would break ordering constraints with xfs_reclaim. 893 */ 894int 895xfs_dialloc( 896 struct xfs_trans *tp, 897 xfs_ino_t parent, 898 umode_t mode, 899 int okalloc, 900 struct xfs_buf **IO_agbp, 901 xfs_ino_t *inop) 902{ 903 struct xfs_mount *mp = tp->t_mountp; 904 struct xfs_buf *agbp; 905 xfs_agnumber_t agno; 906 int error; 907 int ialloced; 908 int noroom = 0; 909 xfs_agnumber_t start_agno; 910 struct xfs_perag *pag; 911 912 if (*IO_agbp) { 913 /* 914 * If the caller passes in a pointer to the AGI buffer, 915 * continue where we left off before. In this case, we 916 * know that the allocation group has free inodes. 917 */ 918 agbp = *IO_agbp; 919 goto out_alloc; 920 } 921 922 /* 923 * We do not have an agbp, so select an initial allocation 924 * group for inode allocation. 925 */ 926 start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); 927 if (start_agno == NULLAGNUMBER) { 928 *inop = NULLFSINO; 929 return 0; 930 } 931 932 /* 933 * If we have already hit the ceiling of inode blocks then clear 934 * okalloc so we scan all available agi structures for a free 935 * inode. 936 */ 937 if (mp->m_maxicount && 938 mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { 939 noroom = 1; 940 okalloc = 0; 941 } 942 943 /* 944 * Loop until we find an allocation group that either has free inodes 945 * or in which we can allocate some inodes. Iterate through the 946 * allocation groups upward, wrapping at the end. 947 */ 948 agno = start_agno; 949 for (;;) { 950 pag = xfs_perag_get(mp, agno); 951 if (!pag->pagi_inodeok) { 952 xfs_ialloc_next_ag(mp); 953 goto nextag; 954 } 955 956 if (!pag->pagi_init) { 957 error = xfs_ialloc_pagi_init(mp, tp, agno); 958 if (error) 959 goto out_error; 960 } 961 962 /* 963 * Do a first racy fast path check if this AG is usable. 964 */ 965 if (!pag->pagi_freecount && !okalloc) 966 goto nextag; 967 968 /* 969 * Then read in the AGI buffer and recheck with the AGI buffer 970 * lock held. 971 */ 972 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 973 if (error) 974 goto out_error; 975 976 if (pag->pagi_freecount) { 977 xfs_perag_put(pag); 978 goto out_alloc; 979 } 980 981 if (!okalloc) 982 goto nextag_relse_buffer; 983 984 985 error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); 986 if (error) { 987 xfs_trans_brelse(tp, agbp); 988 989 if (error != ENOSPC) 990 goto out_error; 991 992 xfs_perag_put(pag); 993 *inop = NULLFSINO; 994 return 0; 995 } 996 997 if (ialloced) { 998 /* 999 * We successfully allocated some inodes, return 1000 * the current context to the caller so that it 1001 * can commit the current transaction and call 1002 * us again where we left off. 1003 */ 1004 ASSERT(pag->pagi_freecount > 0); 1005 xfs_perag_put(pag); 1006 1007 *IO_agbp = agbp; 1008 *inop = NULLFSINO; 1009 return 0; 1010 } 1011 1012nextag_relse_buffer: 1013 xfs_trans_brelse(tp, agbp); 1014nextag: 1015 xfs_perag_put(pag); 1016 if (++agno == mp->m_sb.sb_agcount) 1017 agno = 0; 1018 if (agno == start_agno) { 1019 *inop = NULLFSINO; 1020 return noroom ? ENOSPC : 0; 1021 } 1022 } 1023 1024out_alloc: 1025 *IO_agbp = NULL; 1026 return xfs_dialloc_ag(tp, agbp, parent, inop); 1027out_error: 1028 xfs_perag_put(pag); 1029 return XFS_ERROR(error); 1030} 1031 1032/* 1033 * Free disk inode. Carefully avoids touching the incore inode, all 1034 * manipulations incore are the caller's responsibility. 1035 * The on-disk inode is not changed by this operation, only the 1036 * btree (free inode mask) is changed. 1037 */ 1038int 1039xfs_difree( 1040 xfs_trans_t *tp, /* transaction pointer */ 1041 xfs_ino_t inode, /* inode to be freed */ 1042 xfs_bmap_free_t *flist, /* extents to free */ 1043 int *delete, /* set if inode cluster was deleted */ 1044 xfs_ino_t *first_ino) /* first inode in deleted cluster */ 1045{ 1046 /* REFERENCED */ 1047 xfs_agblock_t agbno; /* block number containing inode */ 1048 xfs_buf_t *agbp; /* buffer containing allocation group header */ 1049 xfs_agino_t agino; /* inode number relative to allocation group */ 1050 xfs_agnumber_t agno; /* allocation group number */ 1051 xfs_agi_t *agi; /* allocation group header */ 1052 xfs_btree_cur_t *cur; /* inode btree cursor */ 1053 int error; /* error return value */ 1054 int i; /* result code */ 1055 int ilen; /* inodes in an inode cluster */ 1056 xfs_mount_t *mp; /* mount structure for filesystem */ 1057 int off; /* offset of inode in inode chunk */ 1058 xfs_inobt_rec_incore_t rec; /* btree record */ 1059 struct xfs_perag *pag; 1060 1061 mp = tp->t_mountp; 1062 1063 /* 1064 * Break up inode number into its components. 1065 */ 1066 agno = XFS_INO_TO_AGNO(mp, inode); 1067 if (agno >= mp->m_sb.sb_agcount) { 1068 xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", 1069 __func__, agno, mp->m_sb.sb_agcount); 1070 ASSERT(0); 1071 return XFS_ERROR(EINVAL); 1072 } 1073 agino = XFS_INO_TO_AGINO(mp, inode); 1074 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { 1075 xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", 1076 __func__, (unsigned long long)inode, 1077 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); 1078 ASSERT(0); 1079 return XFS_ERROR(EINVAL); 1080 } 1081 agbno = XFS_AGINO_TO_AGBNO(mp, agino); 1082 if (agbno >= mp->m_sb.sb_agblocks) { 1083 xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", 1084 __func__, agbno, mp->m_sb.sb_agblocks); 1085 ASSERT(0); 1086 return XFS_ERROR(EINVAL); 1087 } 1088 /* 1089 * Get the allocation group header. 1090 */ 1091 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1092 if (error) { 1093 xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", 1094 __func__, error); 1095 return error; 1096 } 1097 agi = XFS_BUF_TO_AGI(agbp); 1098 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); 1099 ASSERT(agbno < be32_to_cpu(agi->agi_length)); 1100 /* 1101 * Initialize the cursor. 1102 */ 1103 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1104 1105 error = xfs_check_agi_freecount(cur, agi); 1106 if (error) 1107 goto error0; 1108 1109 /* 1110 * Look for the entry describing this inode. 1111 */ 1112 if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { 1113 xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.", 1114 __func__, error); 1115 goto error0; 1116 } 1117 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1118 error = xfs_inobt_get_rec(cur, &rec, &i); 1119 if (error) { 1120 xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.", 1121 __func__, error); 1122 goto error0; 1123 } 1124 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1125 /* 1126 * Get the offset in the inode chunk. 1127 */ 1128 off = agino - rec.ir_startino; 1129 ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK); 1130 ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off))); 1131 /* 1132 * Mark the inode free & increment the count. 1133 */ 1134 rec.ir_free |= XFS_INOBT_MASK(off); 1135 rec.ir_freecount++; 1136 1137 /* 1138 * When an inode cluster is free, it becomes eligible for removal 1139 */ 1140 if (!(mp->m_flags & XFS_MOUNT_IKEEP) && 1141 (rec.ir_freecount == XFS_IALLOC_INODES(mp))) { 1142 1143 *delete = 1; 1144 *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); 1145 1146 /* 1147 * Remove the inode cluster from the AGI B+Tree, adjust the 1148 * AGI and Superblock inode counts, and mark the disk space 1149 * to be freed when the transaction is committed. 1150 */ 1151 ilen = XFS_IALLOC_INODES(mp); 1152 be32_add_cpu(&agi->agi_count, -ilen); 1153 be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); 1154 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); 1155 pag = xfs_perag_get(mp, agno); 1156 pag->pagi_freecount -= ilen - 1; 1157 xfs_perag_put(pag); 1158 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); 1159 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); 1160 1161 if ((error = xfs_btree_delete(cur, &i))) { 1162 xfs_warn(mp, "%s: xfs_btree_delete returned error %d.", 1163 __func__, error); 1164 goto error0; 1165 } 1166 1167 xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, 1168 agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)), 1169 XFS_IALLOC_BLOCKS(mp), flist, mp); 1170 } else { 1171 *delete = 0; 1172 1173 error = xfs_inobt_update(cur, &rec); 1174 if (error) { 1175 xfs_warn(mp, "%s: xfs_inobt_update returned error %d.", 1176 __func__, error); 1177 goto error0; 1178 } 1179 1180 /* 1181 * Change the inode free counts and log the ag/sb changes. 1182 */ 1183 be32_add_cpu(&agi->agi_freecount, 1); 1184 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1185 pag = xfs_perag_get(mp, agno); 1186 pag->pagi_freecount++; 1187 xfs_perag_put(pag); 1188 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); 1189 } 1190 1191 error = xfs_check_agi_freecount(cur, agi); 1192 if (error) 1193 goto error0; 1194 1195 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1196 return 0; 1197 1198error0: 1199 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1200 return error; 1201} 1202 1203STATIC int 1204xfs_imap_lookup( 1205 struct xfs_mount *mp, 1206 struct xfs_trans *tp, 1207 xfs_agnumber_t agno, 1208 xfs_agino_t agino, 1209 xfs_agblock_t agbno, 1210 xfs_agblock_t *chunk_agbno, 1211 xfs_agblock_t *offset_agbno, 1212 int flags) 1213{ 1214 struct xfs_inobt_rec_incore rec; 1215 struct xfs_btree_cur *cur; 1216 struct xfs_buf *agbp; 1217 int error; 1218 int i; 1219 1220 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1221 if (error) { 1222 xfs_alert(mp, 1223 "%s: xfs_ialloc_read_agi() returned error %d, agno %d", 1224 __func__, error, agno); 1225 return error; 1226 } 1227 1228 /* 1229 * Lookup the inode record for the given agino. If the record cannot be 1230 * found, then it's an invalid inode number and we should abort. Once 1231 * we have a record, we need to ensure it contains the inode number 1232 * we are looking up. 1233 */ 1234 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1235 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); 1236 if (!error) { 1237 if (i) 1238 error = xfs_inobt_get_rec(cur, &rec, &i); 1239 if (!error && i == 0) 1240 error = EINVAL; 1241 } 1242 1243 xfs_trans_brelse(tp, agbp); 1244 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1245 if (error) 1246 return error; 1247 1248 /* check that the returned record contains the required inode */ 1249 if (rec.ir_startino > agino || 1250 rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino) 1251 return EINVAL; 1252 1253 /* for untrusted inodes check it is allocated first */ 1254 if ((flags & XFS_IGET_UNTRUSTED) && 1255 (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) 1256 return EINVAL; 1257 1258 *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino); 1259 *offset_agbno = agbno - *chunk_agbno; 1260 return 0; 1261} 1262 1263/* 1264 * Return the location of the inode in imap, for mapping it into a buffer. 1265 */ 1266int 1267xfs_imap( 1268 xfs_mount_t *mp, /* file system mount structure */ 1269 xfs_trans_t *tp, /* transaction pointer */ 1270 xfs_ino_t ino, /* inode to locate */ 1271 struct xfs_imap *imap, /* location map structure */ 1272 uint flags) /* flags for inode btree lookup */ 1273{ 1274 xfs_agblock_t agbno; /* block number of inode in the alloc group */ 1275 xfs_agino_t agino; /* inode number within alloc group */ 1276 xfs_agnumber_t agno; /* allocation group number */ 1277 int blks_per_cluster; /* num blocks per inode cluster */ 1278 xfs_agblock_t chunk_agbno; /* first block in inode chunk */ 1279 xfs_agblock_t cluster_agbno; /* first block in inode cluster */ 1280 int error; /* error code */ 1281 int offset; /* index of inode in its buffer */ 1282 int offset_agbno; /* blks from chunk start to inode */ 1283 1284 ASSERT(ino != NULLFSINO); 1285 1286 /* 1287 * Split up the inode number into its parts. 1288 */ 1289 agno = XFS_INO_TO_AGNO(mp, ino); 1290 agino = XFS_INO_TO_AGINO(mp, ino); 1291 agbno = XFS_AGINO_TO_AGBNO(mp, agino); 1292 if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || 1293 ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1294#ifdef DEBUG 1295 /* 1296 * Don't output diagnostic information for untrusted inodes 1297 * as they can be invalid without implying corruption. 1298 */ 1299 if (flags & XFS_IGET_UNTRUSTED) 1300 return XFS_ERROR(EINVAL); 1301 if (agno >= mp->m_sb.sb_agcount) { 1302 xfs_alert(mp, 1303 "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)", 1304 __func__, agno, mp->m_sb.sb_agcount); 1305 } 1306 if (agbno >= mp->m_sb.sb_agblocks) { 1307 xfs_alert(mp, 1308 "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)", 1309 __func__, (unsigned long long)agbno, 1310 (unsigned long)mp->m_sb.sb_agblocks); 1311 } 1312 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1313 xfs_alert(mp, 1314 "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)", 1315 __func__, ino, 1316 XFS_AGINO_TO_INO(mp, agno, agino)); 1317 } 1318 xfs_stack_trace(); 1319#endif /* DEBUG */ 1320 return XFS_ERROR(EINVAL); 1321 } 1322 1323 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; 1324 1325 /* 1326 * For bulkstat and handle lookups, we have an untrusted inode number 1327 * that we have to verify is valid. We cannot do this just by reading 1328 * the inode buffer as it may have been unlinked and removed leaving 1329 * inodes in stale state on disk. Hence we have to do a btree lookup 1330 * in all cases where an untrusted inode number is passed. 1331 */ 1332 if (flags & XFS_IGET_UNTRUSTED) { 1333 error = xfs_imap_lookup(mp, tp, agno, agino, agbno, 1334 &chunk_agbno, &offset_agbno, flags); 1335 if (error) 1336 return error; 1337 goto out_map; 1338 } 1339 1340 /* 1341 * If the inode cluster size is the same as the blocksize or 1342 * smaller we get to the buffer by simple arithmetics. 1343 */ 1344 if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) { 1345 offset = XFS_INO_TO_OFFSET(mp, ino); 1346 ASSERT(offset < mp->m_sb.sb_inopblock); 1347 1348 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); 1349 imap->im_len = XFS_FSB_TO_BB(mp, 1); 1350 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); 1351 return 0; 1352 } 1353 1354 /* 1355 * If the inode chunks are aligned then use simple maths to 1356 * find the location. Otherwise we have to do a btree 1357 * lookup to find the location. 1358 */ 1359 if (mp->m_inoalign_mask) { 1360 offset_agbno = agbno & mp->m_inoalign_mask; 1361 chunk_agbno = agbno - offset_agbno; 1362 } else { 1363 error = xfs_imap_lookup(mp, tp, agno, agino, agbno, 1364 &chunk_agbno, &offset_agbno, flags); 1365 if (error) 1366 return error; 1367 } 1368 1369out_map: 1370 ASSERT(agbno >= chunk_agbno); 1371 cluster_agbno = chunk_agbno + 1372 ((offset_agbno / blks_per_cluster) * blks_per_cluster); 1373 offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + 1374 XFS_INO_TO_OFFSET(mp, ino); 1375 1376 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); 1377 imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); 1378 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); 1379 1380 /* 1381 * If the inode number maps to a block outside the bounds 1382 * of the file system then return NULL rather than calling 1383 * read_buf and panicing when we get an error from the 1384 * driver. 1385 */ 1386 if ((imap->im_blkno + imap->im_len) > 1387 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 1388 xfs_alert(mp, 1389 "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)", 1390 __func__, (unsigned long long) imap->im_blkno, 1391 (unsigned long long) imap->im_len, 1392 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 1393 return XFS_ERROR(EINVAL); 1394 } 1395 return 0; 1396} 1397 1398/* 1399 * Compute and fill in value of m_in_maxlevels. 1400 */ 1401void 1402xfs_ialloc_compute_maxlevels( 1403 xfs_mount_t *mp) /* file system mount structure */ 1404{ 1405 int level; 1406 uint maxblocks; 1407 uint maxleafents; 1408 int minleafrecs; 1409 int minnoderecs; 1410 1411 maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >> 1412 XFS_INODES_PER_CHUNK_LOG; 1413 minleafrecs = mp->m_alloc_mnr[0]; 1414 minnoderecs = mp->m_alloc_mnr[1]; 1415 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; 1416 for (level = 1; maxblocks > 1; level++) 1417 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; 1418 mp->m_in_maxlevels = level; 1419} 1420 1421/* 1422 * Log specified fields for the ag hdr (inode section) 1423 */ 1424void 1425xfs_ialloc_log_agi( 1426 xfs_trans_t *tp, /* transaction pointer */ 1427 xfs_buf_t *bp, /* allocation group header buffer */ 1428 int fields) /* bitmask of fields to log */ 1429{ 1430 int first; /* first byte number */ 1431 int last; /* last byte number */ 1432 static const short offsets[] = { /* field starting offsets */ 1433 /* keep in sync with bit definitions */ 1434 offsetof(xfs_agi_t, agi_magicnum), 1435 offsetof(xfs_agi_t, agi_versionnum), 1436 offsetof(xfs_agi_t, agi_seqno), 1437 offsetof(xfs_agi_t, agi_length), 1438 offsetof(xfs_agi_t, agi_count), 1439 offsetof(xfs_agi_t, agi_root), 1440 offsetof(xfs_agi_t, agi_level), 1441 offsetof(xfs_agi_t, agi_freecount), 1442 offsetof(xfs_agi_t, agi_newino), 1443 offsetof(xfs_agi_t, agi_dirino), 1444 offsetof(xfs_agi_t, agi_unlinked), 1445 sizeof(xfs_agi_t) 1446 }; 1447#ifdef DEBUG 1448 xfs_agi_t *agi; /* allocation group header */ 1449 1450 agi = XFS_BUF_TO_AGI(bp); 1451 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); 1452#endif 1453 /* 1454 * Compute byte offsets for the first and last fields. 1455 */ 1456 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last); 1457 /* 1458 * Log the allocation group inode header buffer. 1459 */ 1460 xfs_trans_log_buf(tp, bp, first, last); 1461} 1462 1463#ifdef DEBUG 1464STATIC void 1465xfs_check_agi_unlinked( 1466 struct xfs_agi *agi) 1467{ 1468 int i; 1469 1470 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) 1471 ASSERT(agi->agi_unlinked[i]); 1472} 1473#else 1474#define xfs_check_agi_unlinked(agi) 1475#endif 1476 1477static void 1478xfs_agi_verify( 1479 struct xfs_buf *bp) 1480{ 1481 struct xfs_mount *mp = bp->b_target->bt_mount; 1482 struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); 1483 int agi_ok; 1484 1485 /* 1486 * Validate the magic number of the agi block. 1487 */ 1488 agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) && 1489 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)); 1490 1491 /* 1492 * during growfs operations, the perag is not fully initialised, 1493 * so we can't use it for any useful checking. growfs ensures we can't 1494 * use it by using uncached buffers that don't have the perag attached 1495 * so we can detect and avoid this problem. 1496 */ 1497 if (bp->b_pag) 1498 agi_ok = agi_ok && be32_to_cpu(agi->agi_seqno) == 1499 bp->b_pag->pag_agno; 1500 1501 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, 1502 XFS_RANDOM_IALLOC_READ_AGI))) { 1503 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agi); 1504 xfs_buf_ioerror(bp, EFSCORRUPTED); 1505 } 1506 xfs_check_agi_unlinked(agi); 1507} 1508 1509static void 1510xfs_agi_read_verify( 1511 struct xfs_buf *bp) 1512{ 1513 xfs_agi_verify(bp); 1514} 1515 1516static void 1517xfs_agi_write_verify( 1518 struct xfs_buf *bp) 1519{ 1520 xfs_agi_verify(bp); 1521} 1522 1523const struct xfs_buf_ops xfs_agi_buf_ops = { 1524 .verify_read = xfs_agi_read_verify, 1525 .verify_write = xfs_agi_write_verify, 1526}; 1527 1528/* 1529 * Read in the allocation group header (inode allocation section) 1530 */ 1531int 1532xfs_read_agi( 1533 struct xfs_mount *mp, /* file system mount structure */ 1534 struct xfs_trans *tp, /* transaction pointer */ 1535 xfs_agnumber_t agno, /* allocation group number */ 1536 struct xfs_buf **bpp) /* allocation group hdr buf */ 1537{ 1538 int error; 1539 1540 ASSERT(agno != NULLAGNUMBER); 1541 1542 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 1543 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), 1544 XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops); 1545 if (error) 1546 return error; 1547 1548 ASSERT(!xfs_buf_geterror(*bpp)); 1549 xfs_buf_set_ref(*bpp, XFS_AGI_REF); 1550 return 0; 1551} 1552 1553int 1554xfs_ialloc_read_agi( 1555 struct xfs_mount *mp, /* file system mount structure */ 1556 struct xfs_trans *tp, /* transaction pointer */ 1557 xfs_agnumber_t agno, /* allocation group number */ 1558 struct xfs_buf **bpp) /* allocation group hdr buf */ 1559{ 1560 struct xfs_agi *agi; /* allocation group header */ 1561 struct xfs_perag *pag; /* per allocation group data */ 1562 int error; 1563 1564 error = xfs_read_agi(mp, tp, agno, bpp); 1565 if (error) 1566 return error; 1567 1568 agi = XFS_BUF_TO_AGI(*bpp); 1569 pag = xfs_perag_get(mp, agno); 1570 if (!pag->pagi_init) { 1571 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); 1572 pag->pagi_count = be32_to_cpu(agi->agi_count); 1573 pag->pagi_init = 1; 1574 } 1575 1576 /* 1577 * It's possible for these to be out of sync if 1578 * we are in the middle of a forced shutdown. 1579 */ 1580 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || 1581 XFS_FORCED_SHUTDOWN(mp)); 1582 xfs_perag_put(pag); 1583 return 0; 1584} 1585 1586/* 1587 * Read in the agi to initialise the per-ag data in the mount structure 1588 */ 1589int 1590xfs_ialloc_pagi_init( 1591 xfs_mount_t *mp, /* file system mount structure */ 1592 xfs_trans_t *tp, /* transaction pointer */ 1593 xfs_agnumber_t agno) /* allocation group number */ 1594{ 1595 xfs_buf_t *bp = NULL; 1596 int error; 1597 1598 error = xfs_ialloc_read_agi(mp, tp, agno, &bp); 1599 if (error) 1600 return error; 1601 if (bp) 1602 xfs_trans_brelse(tp, bp); 1603 return 0; 1604}