Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.12-rc5 1694 lines 46 kB view raw
1/* 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18#include "xfs.h" 19#include "xfs_fs.h" 20#include "xfs_types.h" 21#include "xfs_bit.h" 22#include "xfs_log.h" 23#include "xfs_inum.h" 24#include "xfs_trans.h" 25#include "xfs_sb.h" 26#include "xfs_ag.h" 27#include "xfs_mount.h" 28#include "xfs_bmap_btree.h" 29#include "xfs_alloc_btree.h" 30#include "xfs_ialloc_btree.h" 31#include "xfs_dinode.h" 32#include "xfs_inode.h" 33#include "xfs_btree.h" 34#include "xfs_ialloc.h" 35#include "xfs_alloc.h" 36#include "xfs_rtalloc.h" 37#include "xfs_error.h" 38#include "xfs_bmap.h" 39#include "xfs_cksum.h" 40#include "xfs_buf_item.h" 41#include "xfs_icreate_item.h" 42#include "xfs_icache.h" 43 44 45/* 46 * Allocation group level functions. 47 */ 48static inline int 49xfs_ialloc_cluster_alignment( 50 xfs_alloc_arg_t *args) 51{ 52 if (xfs_sb_version_hasalign(&args->mp->m_sb) && 53 args->mp->m_sb.sb_inoalignmt >= 54 XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp))) 55 return args->mp->m_sb.sb_inoalignmt; 56 return 1; 57} 58 59/* 60 * Lookup a record by ino in the btree given by cur. 61 */ 62int /* error */ 63xfs_inobt_lookup( 64 struct xfs_btree_cur *cur, /* btree cursor */ 65 xfs_agino_t ino, /* starting inode of chunk */ 66 xfs_lookup_t dir, /* <=, >=, == */ 67 int *stat) /* success/failure */ 68{ 69 cur->bc_rec.i.ir_startino = ino; 70 cur->bc_rec.i.ir_freecount = 0; 71 cur->bc_rec.i.ir_free = 0; 72 return xfs_btree_lookup(cur, dir, stat); 73} 74 75/* 76 * Update the record referred to by cur to the value given. 77 * This either works (return 0) or gets an EFSCORRUPTED error. 78 */ 79STATIC int /* error */ 80xfs_inobt_update( 81 struct xfs_btree_cur *cur, /* btree cursor */ 82 xfs_inobt_rec_incore_t *irec) /* btree record */ 83{ 84 union xfs_btree_rec rec; 85 86 rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino); 87 rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount); 88 rec.inobt.ir_free = cpu_to_be64(irec->ir_free); 89 return xfs_btree_update(cur, &rec); 90} 91 92/* 93 * Get the data from the pointed-to record. 94 */ 95int /* error */ 96xfs_inobt_get_rec( 97 struct xfs_btree_cur *cur, /* btree cursor */ 98 xfs_inobt_rec_incore_t *irec, /* btree record */ 99 int *stat) /* output: success/failure */ 100{ 101 union xfs_btree_rec *rec; 102 int error; 103 104 error = xfs_btree_get_rec(cur, &rec, stat); 105 if (!error && *stat == 1) { 106 irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); 107 irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount); 108 irec->ir_free = be64_to_cpu(rec->inobt.ir_free); 109 } 110 return error; 111} 112 113/* 114 * Verify that the number of free inodes in the AGI is correct. 115 */ 116#ifdef DEBUG 117STATIC int 118xfs_check_agi_freecount( 119 struct xfs_btree_cur *cur, 120 struct xfs_agi *agi) 121{ 122 if (cur->bc_nlevels == 1) { 123 xfs_inobt_rec_incore_t rec; 124 int freecount = 0; 125 int error; 126 int i; 127 128 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); 129 if (error) 130 return error; 131 132 do { 133 error = xfs_inobt_get_rec(cur, &rec, &i); 134 if (error) 135 return error; 136 137 if (i) { 138 freecount += rec.ir_freecount; 139 error = xfs_btree_increment(cur, 0, &i); 140 if (error) 141 return error; 142 } 143 } while (i == 1); 144 145 if (!XFS_FORCED_SHUTDOWN(cur->bc_mp)) 146 ASSERT(freecount == be32_to_cpu(agi->agi_freecount)); 147 } 148 return 0; 149} 150#else 151#define xfs_check_agi_freecount(cur, agi) 0 152#endif 153 154/* 155 * Initialise a new set of inodes. When called without a transaction context 156 * (e.g. from recovery) we initiate a delayed write of the inode buffers rather 157 * than logging them (which in a transaction context puts them into the AIL 158 * for writeback rather than the xfsbufd queue). 159 */ 160int 161xfs_ialloc_inode_init( 162 struct xfs_mount *mp, 163 struct xfs_trans *tp, 164 struct list_head *buffer_list, 165 xfs_agnumber_t agno, 166 xfs_agblock_t agbno, 167 xfs_agblock_t length, 168 unsigned int gen) 169{ 170 struct xfs_buf *fbuf; 171 struct xfs_dinode *free; 172 int blks_per_cluster, nbufs, ninodes; 173 int version; 174 int i, j; 175 xfs_daddr_t d; 176 xfs_ino_t ino = 0; 177 178 /* 179 * Loop over the new block(s), filling in the inodes. 180 * For small block sizes, manipulate the inodes in buffers 181 * which are multiples of the blocks size. 182 */ 183 if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 184 blks_per_cluster = 1; 185 nbufs = length; 186 ninodes = mp->m_sb.sb_inopblock; 187 } else { 188 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / 189 mp->m_sb.sb_blocksize; 190 nbufs = length / blks_per_cluster; 191 ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; 192 } 193 194 /* 195 * Figure out what version number to use in the inodes we create. If 196 * the superblock version has caught up to the one that supports the new 197 * inode format, then use the new inode version. Otherwise use the old 198 * version so that old kernels will continue to be able to use the file 199 * system. 200 * 201 * For v3 inodes, we also need to write the inode number into the inode, 202 * so calculate the first inode number of the chunk here as 203 * XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not 204 * across multiple filesystem blocks (such as a cluster) and so cannot 205 * be used in the cluster buffer loop below. 206 * 207 * Further, because we are writing the inode directly into the buffer 208 * and calculating a CRC on the entire inode, we have ot log the entire 209 * inode so that the entire range the CRC covers is present in the log. 210 * That means for v3 inode we log the entire buffer rather than just the 211 * inode cores. 212 */ 213 if (xfs_sb_version_hascrc(&mp->m_sb)) { 214 version = 3; 215 ino = XFS_AGINO_TO_INO(mp, agno, 216 XFS_OFFBNO_TO_AGINO(mp, agbno, 0)); 217 218 /* 219 * log the initialisation that is about to take place as an 220 * logical operation. This means the transaction does not 221 * need to log the physical changes to the inode buffers as log 222 * recovery will know what initialisation is actually needed. 223 * Hence we only need to log the buffers as "ordered" buffers so 224 * they track in the AIL as if they were physically logged. 225 */ 226 if (tp) 227 xfs_icreate_log(tp, agno, agbno, XFS_IALLOC_INODES(mp), 228 mp->m_sb.sb_inodesize, length, gen); 229 } else if (xfs_sb_version_hasnlink(&mp->m_sb)) 230 version = 2; 231 else 232 version = 1; 233 234 for (j = 0; j < nbufs; j++) { 235 /* 236 * Get the block. 237 */ 238 d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); 239 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 240 mp->m_bsize * blks_per_cluster, 241 XBF_UNMAPPED); 242 if (!fbuf) 243 return ENOMEM; 244 245 /* Initialize the inode buffers and log them appropriately. */ 246 fbuf->b_ops = &xfs_inode_buf_ops; 247 xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); 248 for (i = 0; i < ninodes; i++) { 249 int ioffset = i << mp->m_sb.sb_inodelog; 250 uint isize = xfs_dinode_size(version); 251 252 free = xfs_make_iptr(mp, fbuf, i); 253 free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 254 free->di_version = version; 255 free->di_gen = cpu_to_be32(gen); 256 free->di_next_unlinked = cpu_to_be32(NULLAGINO); 257 258 if (version == 3) { 259 free->di_ino = cpu_to_be64(ino); 260 ino++; 261 uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid); 262 xfs_dinode_calc_crc(mp, free); 263 } else if (tp) { 264 /* just log the inode core */ 265 xfs_trans_log_buf(tp, fbuf, ioffset, 266 ioffset + isize - 1); 267 } 268 } 269 270 if (tp) { 271 /* 272 * Mark the buffer as an inode allocation buffer so it 273 * sticks in AIL at the point of this allocation 274 * transaction. This ensures the they are on disk before 275 * the tail of the log can be moved past this 276 * transaction (i.e. by preventing relogging from moving 277 * it forward in the log). 278 */ 279 xfs_trans_inode_alloc_buf(tp, fbuf); 280 if (version == 3) { 281 /* 282 * Mark the buffer as ordered so that they are 283 * not physically logged in the transaction but 284 * still tracked in the AIL as part of the 285 * transaction and pin the log appropriately. 286 */ 287 xfs_trans_ordered_buf(tp, fbuf); 288 xfs_trans_log_buf(tp, fbuf, 0, 289 BBTOB(fbuf->b_length) - 1); 290 } 291 } else { 292 fbuf->b_flags |= XBF_DONE; 293 xfs_buf_delwri_queue(fbuf, buffer_list); 294 xfs_buf_relse(fbuf); 295 } 296 } 297 return 0; 298} 299 300/* 301 * Allocate new inodes in the allocation group specified by agbp. 302 * Return 0 for success, else error code. 303 */ 304STATIC int /* error code or 0 */ 305xfs_ialloc_ag_alloc( 306 xfs_trans_t *tp, /* transaction pointer */ 307 xfs_buf_t *agbp, /* alloc group buffer */ 308 int *alloc) 309{ 310 xfs_agi_t *agi; /* allocation group header */ 311 xfs_alloc_arg_t args; /* allocation argument structure */ 312 xfs_btree_cur_t *cur; /* inode btree cursor */ 313 xfs_agnumber_t agno; 314 int error; 315 int i; 316 xfs_agino_t newino; /* new first inode's number */ 317 xfs_agino_t newlen; /* new number of inodes */ 318 xfs_agino_t thisino; /* current inode number, for loop */ 319 int isaligned = 0; /* inode allocation at stripe unit */ 320 /* boundary */ 321 struct xfs_perag *pag; 322 323 memset(&args, 0, sizeof(args)); 324 args.tp = tp; 325 args.mp = tp->t_mountp; 326 327 /* 328 * Locking will ensure that we don't have two callers in here 329 * at one time. 330 */ 331 newlen = XFS_IALLOC_INODES(args.mp); 332 if (args.mp->m_maxicount && 333 args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) 334 return XFS_ERROR(ENOSPC); 335 args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp); 336 /* 337 * First try to allocate inodes contiguous with the last-allocated 338 * chunk of inodes. If the filesystem is striped, this will fill 339 * an entire stripe unit with inodes. 340 */ 341 agi = XFS_BUF_TO_AGI(agbp); 342 newino = be32_to_cpu(agi->agi_newino); 343 agno = be32_to_cpu(agi->agi_seqno); 344 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + 345 XFS_IALLOC_BLOCKS(args.mp); 346 if (likely(newino != NULLAGINO && 347 (args.agbno < be32_to_cpu(agi->agi_length)))) { 348 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 349 args.type = XFS_ALLOCTYPE_THIS_BNO; 350 args.prod = 1; 351 352 /* 353 * We need to take into account alignment here to ensure that 354 * we don't modify the free list if we fail to have an exact 355 * block. If we don't have an exact match, and every oher 356 * attempt allocation attempt fails, we'll end up cancelling 357 * a dirty transaction and shutting down. 358 * 359 * For an exact allocation, alignment must be 1, 360 * however we need to take cluster alignment into account when 361 * fixing up the freelist. Use the minalignslop field to 362 * indicate that extra blocks might be required for alignment, 363 * but not to use them in the actual exact allocation. 364 */ 365 args.alignment = 1; 366 args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; 367 368 /* Allow space for the inode btree to split. */ 369 args.minleft = args.mp->m_in_maxlevels - 1; 370 if ((error = xfs_alloc_vextent(&args))) 371 return error; 372 } else 373 args.fsbno = NULLFSBLOCK; 374 375 if (unlikely(args.fsbno == NULLFSBLOCK)) { 376 /* 377 * Set the alignment for the allocation. 378 * If stripe alignment is turned on then align at stripe unit 379 * boundary. 380 * If the cluster size is smaller than a filesystem block 381 * then we're doing I/O for inodes in filesystem block size 382 * pieces, so don't need alignment anyway. 383 */ 384 isaligned = 0; 385 if (args.mp->m_sinoalign) { 386 ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); 387 args.alignment = args.mp->m_dalign; 388 isaligned = 1; 389 } else 390 args.alignment = xfs_ialloc_cluster_alignment(&args); 391 /* 392 * Need to figure out where to allocate the inode blocks. 393 * Ideally they should be spaced out through the a.g. 394 * For now, just allocate blocks up front. 395 */ 396 args.agbno = be32_to_cpu(agi->agi_root); 397 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 398 /* 399 * Allocate a fixed-size extent of inodes. 400 */ 401 args.type = XFS_ALLOCTYPE_NEAR_BNO; 402 args.prod = 1; 403 /* 404 * Allow space for the inode btree to split. 405 */ 406 args.minleft = args.mp->m_in_maxlevels - 1; 407 if ((error = xfs_alloc_vextent(&args))) 408 return error; 409 } 410 411 /* 412 * If stripe alignment is turned on, then try again with cluster 413 * alignment. 414 */ 415 if (isaligned && args.fsbno == NULLFSBLOCK) { 416 args.type = XFS_ALLOCTYPE_NEAR_BNO; 417 args.agbno = be32_to_cpu(agi->agi_root); 418 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 419 args.alignment = xfs_ialloc_cluster_alignment(&args); 420 if ((error = xfs_alloc_vextent(&args))) 421 return error; 422 } 423 424 if (args.fsbno == NULLFSBLOCK) { 425 *alloc = 0; 426 return 0; 427 } 428 ASSERT(args.len == args.minlen); 429 430 /* 431 * Stamp and write the inode buffers. 432 * 433 * Seed the new inode cluster with a random generation number. This 434 * prevents short-term reuse of generation numbers if a chunk is 435 * freed and then immediately reallocated. We use random numbers 436 * rather than a linear progression to prevent the next generation 437 * number from being easily guessable. 438 */ 439 error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno, 440 args.len, prandom_u32()); 441 442 if (error) 443 return error; 444 /* 445 * Convert the results. 446 */ 447 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); 448 be32_add_cpu(&agi->agi_count, newlen); 449 be32_add_cpu(&agi->agi_freecount, newlen); 450 pag = xfs_perag_get(args.mp, agno); 451 pag->pagi_freecount += newlen; 452 xfs_perag_put(pag); 453 agi->agi_newino = cpu_to_be32(newino); 454 455 /* 456 * Insert records describing the new inode chunk into the btree. 457 */ 458 cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno); 459 for (thisino = newino; 460 thisino < newino + newlen; 461 thisino += XFS_INODES_PER_CHUNK) { 462 cur->bc_rec.i.ir_startino = thisino; 463 cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK; 464 cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE; 465 error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i); 466 if (error) { 467 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 468 return error; 469 } 470 ASSERT(i == 0); 471 error = xfs_btree_insert(cur, &i); 472 if (error) { 473 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 474 return error; 475 } 476 ASSERT(i == 1); 477 } 478 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 479 /* 480 * Log allocation group header fields 481 */ 482 xfs_ialloc_log_agi(tp, agbp, 483 XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO); 484 /* 485 * Modify/log superblock values for inode count and inode free count. 486 */ 487 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen); 488 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen); 489 *alloc = 1; 490 return 0; 491} 492 493STATIC xfs_agnumber_t 494xfs_ialloc_next_ag( 495 xfs_mount_t *mp) 496{ 497 xfs_agnumber_t agno; 498 499 spin_lock(&mp->m_agirotor_lock); 500 agno = mp->m_agirotor; 501 if (++mp->m_agirotor >= mp->m_maxagi) 502 mp->m_agirotor = 0; 503 spin_unlock(&mp->m_agirotor_lock); 504 505 return agno; 506} 507 508/* 509 * Select an allocation group to look for a free inode in, based on the parent 510 * inode and the mode. Return the allocation group buffer. 511 */ 512STATIC xfs_agnumber_t 513xfs_ialloc_ag_select( 514 xfs_trans_t *tp, /* transaction pointer */ 515 xfs_ino_t parent, /* parent directory inode number */ 516 umode_t mode, /* bits set to indicate file type */ 517 int okalloc) /* ok to allocate more space */ 518{ 519 xfs_agnumber_t agcount; /* number of ag's in the filesystem */ 520 xfs_agnumber_t agno; /* current ag number */ 521 int flags; /* alloc buffer locking flags */ 522 xfs_extlen_t ineed; /* blocks needed for inode allocation */ 523 xfs_extlen_t longest = 0; /* longest extent available */ 524 xfs_mount_t *mp; /* mount point structure */ 525 int needspace; /* file mode implies space allocated */ 526 xfs_perag_t *pag; /* per allocation group data */ 527 xfs_agnumber_t pagno; /* parent (starting) ag number */ 528 int error; 529 530 /* 531 * Files of these types need at least one block if length > 0 532 * (and they won't fit in the inode, but that's hard to figure out). 533 */ 534 needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); 535 mp = tp->t_mountp; 536 agcount = mp->m_maxagi; 537 if (S_ISDIR(mode)) 538 pagno = xfs_ialloc_next_ag(mp); 539 else { 540 pagno = XFS_INO_TO_AGNO(mp, parent); 541 if (pagno >= agcount) 542 pagno = 0; 543 } 544 545 ASSERT(pagno < agcount); 546 547 /* 548 * Loop through allocation groups, looking for one with a little 549 * free space in it. Note we don't look for free inodes, exactly. 550 * Instead, we include whether there is a need to allocate inodes 551 * to mean that blocks must be allocated for them, 552 * if none are currently free. 553 */ 554 agno = pagno; 555 flags = XFS_ALLOC_FLAG_TRYLOCK; 556 for (;;) { 557 pag = xfs_perag_get(mp, agno); 558 if (!pag->pagi_inodeok) { 559 xfs_ialloc_next_ag(mp); 560 goto nextag; 561 } 562 563 if (!pag->pagi_init) { 564 error = xfs_ialloc_pagi_init(mp, tp, agno); 565 if (error) 566 goto nextag; 567 } 568 569 if (pag->pagi_freecount) { 570 xfs_perag_put(pag); 571 return agno; 572 } 573 574 if (!okalloc) 575 goto nextag; 576 577 if (!pag->pagf_init) { 578 error = xfs_alloc_pagf_init(mp, tp, agno, flags); 579 if (error) 580 goto nextag; 581 } 582 583 /* 584 * Is there enough free space for the file plus a block of 585 * inodes? (if we need to allocate some)? 586 */ 587 ineed = XFS_IALLOC_BLOCKS(mp); 588 longest = pag->pagf_longest; 589 if (!longest) 590 longest = pag->pagf_flcount > 0; 591 592 if (pag->pagf_freeblks >= needspace + ineed && 593 longest >= ineed) { 594 xfs_perag_put(pag); 595 return agno; 596 } 597nextag: 598 xfs_perag_put(pag); 599 /* 600 * No point in iterating over the rest, if we're shutting 601 * down. 602 */ 603 if (XFS_FORCED_SHUTDOWN(mp)) 604 return NULLAGNUMBER; 605 agno++; 606 if (agno >= agcount) 607 agno = 0; 608 if (agno == pagno) { 609 if (flags == 0) 610 return NULLAGNUMBER; 611 flags = 0; 612 } 613 } 614} 615 616/* 617 * Try to retrieve the next record to the left/right from the current one. 618 */ 619STATIC int 620xfs_ialloc_next_rec( 621 struct xfs_btree_cur *cur, 622 xfs_inobt_rec_incore_t *rec, 623 int *done, 624 int left) 625{ 626 int error; 627 int i; 628 629 if (left) 630 error = xfs_btree_decrement(cur, 0, &i); 631 else 632 error = xfs_btree_increment(cur, 0, &i); 633 634 if (error) 635 return error; 636 *done = !i; 637 if (i) { 638 error = xfs_inobt_get_rec(cur, rec, &i); 639 if (error) 640 return error; 641 XFS_WANT_CORRUPTED_RETURN(i == 1); 642 } 643 644 return 0; 645} 646 647STATIC int 648xfs_ialloc_get_rec( 649 struct xfs_btree_cur *cur, 650 xfs_agino_t agino, 651 xfs_inobt_rec_incore_t *rec, 652 int *done) 653{ 654 int error; 655 int i; 656 657 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i); 658 if (error) 659 return error; 660 *done = !i; 661 if (i) { 662 error = xfs_inobt_get_rec(cur, rec, &i); 663 if (error) 664 return error; 665 XFS_WANT_CORRUPTED_RETURN(i == 1); 666 } 667 668 return 0; 669} 670 671/* 672 * Allocate an inode. 673 * 674 * The caller selected an AG for us, and made sure that free inodes are 675 * available. 676 */ 677STATIC int 678xfs_dialloc_ag( 679 struct xfs_trans *tp, 680 struct xfs_buf *agbp, 681 xfs_ino_t parent, 682 xfs_ino_t *inop) 683{ 684 struct xfs_mount *mp = tp->t_mountp; 685 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 686 xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); 687 xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); 688 xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); 689 struct xfs_perag *pag; 690 struct xfs_btree_cur *cur, *tcur; 691 struct xfs_inobt_rec_incore rec, trec; 692 xfs_ino_t ino; 693 int error; 694 int offset; 695 int i, j; 696 697 pag = xfs_perag_get(mp, agno); 698 699 ASSERT(pag->pagi_init); 700 ASSERT(pag->pagi_inodeok); 701 ASSERT(pag->pagi_freecount > 0); 702 703 restart_pagno: 704 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 705 /* 706 * If pagino is 0 (this is the root inode allocation) use newino. 707 * This must work because we've just allocated some. 708 */ 709 if (!pagino) 710 pagino = be32_to_cpu(agi->agi_newino); 711 712 error = xfs_check_agi_freecount(cur, agi); 713 if (error) 714 goto error0; 715 716 /* 717 * If in the same AG as the parent, try to get near the parent. 718 */ 719 if (pagno == agno) { 720 int doneleft; /* done, to the left */ 721 int doneright; /* done, to the right */ 722 int searchdistance = 10; 723 724 error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); 725 if (error) 726 goto error0; 727 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 728 729 error = xfs_inobt_get_rec(cur, &rec, &j); 730 if (error) 731 goto error0; 732 XFS_WANT_CORRUPTED_GOTO(j == 1, error0); 733 734 if (rec.ir_freecount > 0) { 735 /* 736 * Found a free inode in the same chunk 737 * as the parent, done. 738 */ 739 goto alloc_inode; 740 } 741 742 743 /* 744 * In the same AG as parent, but parent's chunk is full. 745 */ 746 747 /* duplicate the cursor, search left & right simultaneously */ 748 error = xfs_btree_dup_cursor(cur, &tcur); 749 if (error) 750 goto error0; 751 752 /* 753 * Skip to last blocks looked up if same parent inode. 754 */ 755 if (pagino != NULLAGINO && 756 pag->pagl_pagino == pagino && 757 pag->pagl_leftrec != NULLAGINO && 758 pag->pagl_rightrec != NULLAGINO) { 759 error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, 760 &trec, &doneleft); 761 if (error) 762 goto error1; 763 764 error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, 765 &rec, &doneright); 766 if (error) 767 goto error1; 768 } else { 769 /* search left with tcur, back up 1 record */ 770 error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1); 771 if (error) 772 goto error1; 773 774 /* search right with cur, go forward 1 record. */ 775 error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0); 776 if (error) 777 goto error1; 778 } 779 780 /* 781 * Loop until we find an inode chunk with a free inode. 782 */ 783 while (!doneleft || !doneright) { 784 int useleft; /* using left inode chunk this time */ 785 786 if (!--searchdistance) { 787 /* 788 * Not in range - save last search 789 * location and allocate a new inode 790 */ 791 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 792 pag->pagl_leftrec = trec.ir_startino; 793 pag->pagl_rightrec = rec.ir_startino; 794 pag->pagl_pagino = pagino; 795 goto newino; 796 } 797 798 /* figure out the closer block if both are valid. */ 799 if (!doneleft && !doneright) { 800 useleft = pagino - 801 (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) < 802 rec.ir_startino - pagino; 803 } else { 804 useleft = !doneleft; 805 } 806 807 /* free inodes to the left? */ 808 if (useleft && trec.ir_freecount) { 809 rec = trec; 810 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 811 cur = tcur; 812 813 pag->pagl_leftrec = trec.ir_startino; 814 pag->pagl_rightrec = rec.ir_startino; 815 pag->pagl_pagino = pagino; 816 goto alloc_inode; 817 } 818 819 /* free inodes to the right? */ 820 if (!useleft && rec.ir_freecount) { 821 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 822 823 pag->pagl_leftrec = trec.ir_startino; 824 pag->pagl_rightrec = rec.ir_startino; 825 pag->pagl_pagino = pagino; 826 goto alloc_inode; 827 } 828 829 /* get next record to check */ 830 if (useleft) { 831 error = xfs_ialloc_next_rec(tcur, &trec, 832 &doneleft, 1); 833 } else { 834 error = xfs_ialloc_next_rec(cur, &rec, 835 &doneright, 0); 836 } 837 if (error) 838 goto error1; 839 } 840 841 /* 842 * We've reached the end of the btree. because 843 * we are only searching a small chunk of the 844 * btree each search, there is obviously free 845 * inodes closer to the parent inode than we 846 * are now. restart the search again. 847 */ 848 pag->pagl_pagino = NULLAGINO; 849 pag->pagl_leftrec = NULLAGINO; 850 pag->pagl_rightrec = NULLAGINO; 851 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 852 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 853 goto restart_pagno; 854 } 855 856 /* 857 * In a different AG from the parent. 858 * See if the most recently allocated block has any free. 859 */ 860newino: 861 if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { 862 error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), 863 XFS_LOOKUP_EQ, &i); 864 if (error) 865 goto error0; 866 867 if (i == 1) { 868 error = xfs_inobt_get_rec(cur, &rec, &j); 869 if (error) 870 goto error0; 871 872 if (j == 1 && rec.ir_freecount > 0) { 873 /* 874 * The last chunk allocated in the group 875 * still has a free inode. 876 */ 877 goto alloc_inode; 878 } 879 } 880 } 881 882 /* 883 * None left in the last group, search the whole AG 884 */ 885 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); 886 if (error) 887 goto error0; 888 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 889 890 for (;;) { 891 error = xfs_inobt_get_rec(cur, &rec, &i); 892 if (error) 893 goto error0; 894 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 895 if (rec.ir_freecount > 0) 896 break; 897 error = xfs_btree_increment(cur, 0, &i); 898 if (error) 899 goto error0; 900 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 901 } 902 903alloc_inode: 904 offset = xfs_lowbit64(rec.ir_free); 905 ASSERT(offset >= 0); 906 ASSERT(offset < XFS_INODES_PER_CHUNK); 907 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % 908 XFS_INODES_PER_CHUNK) == 0); 909 ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); 910 rec.ir_free &= ~XFS_INOBT_MASK(offset); 911 rec.ir_freecount--; 912 error = xfs_inobt_update(cur, &rec); 913 if (error) 914 goto error0; 915 be32_add_cpu(&agi->agi_freecount, -1); 916 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 917 pag->pagi_freecount--; 918 919 error = xfs_check_agi_freecount(cur, agi); 920 if (error) 921 goto error0; 922 923 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 924 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); 925 xfs_perag_put(pag); 926 *inop = ino; 927 return 0; 928error1: 929 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 930error0: 931 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 932 xfs_perag_put(pag); 933 return error; 934} 935 936/* 937 * Allocate an inode on disk. 938 * 939 * Mode is used to tell whether the new inode will need space, and whether it 940 * is a directory. 941 * 942 * This function is designed to be called twice if it has to do an allocation 943 * to make more free inodes. On the first call, *IO_agbp should be set to NULL. 944 * If an inode is available without having to performn an allocation, an inode 945 * number is returned. In this case, *IO_agbp is set to NULL. If an allocation 946 * needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp. 947 * The caller should then commit the current transaction, allocate a 948 * new transaction, and call xfs_dialloc() again, passing in the previous value 949 * of *IO_agbp. IO_agbp should be held across the transactions. Since the AGI 950 * buffer is locked across the two calls, the second call is guaranteed to have 951 * a free inode available. 952 * 953 * Once we successfully pick an inode its number is returned and the on-disk 954 * data structures are updated. The inode itself is not read in, since doing so 955 * would break ordering constraints with xfs_reclaim. 956 */ 957int 958xfs_dialloc( 959 struct xfs_trans *tp, 960 xfs_ino_t parent, 961 umode_t mode, 962 int okalloc, 963 struct xfs_buf **IO_agbp, 964 xfs_ino_t *inop) 965{ 966 struct xfs_mount *mp = tp->t_mountp; 967 struct xfs_buf *agbp; 968 xfs_agnumber_t agno; 969 int error; 970 int ialloced; 971 int noroom = 0; 972 xfs_agnumber_t start_agno; 973 struct xfs_perag *pag; 974 975 if (*IO_agbp) { 976 /* 977 * If the caller passes in a pointer to the AGI buffer, 978 * continue where we left off before. In this case, we 979 * know that the allocation group has free inodes. 980 */ 981 agbp = *IO_agbp; 982 goto out_alloc; 983 } 984 985 /* 986 * We do not have an agbp, so select an initial allocation 987 * group for inode allocation. 988 */ 989 start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); 990 if (start_agno == NULLAGNUMBER) { 991 *inop = NULLFSINO; 992 return 0; 993 } 994 995 /* 996 * If we have already hit the ceiling of inode blocks then clear 997 * okalloc so we scan all available agi structures for a free 998 * inode. 999 */ 1000 if (mp->m_maxicount && 1001 mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { 1002 noroom = 1; 1003 okalloc = 0; 1004 } 1005 1006 /* 1007 * Loop until we find an allocation group that either has free inodes 1008 * or in which we can allocate some inodes. Iterate through the 1009 * allocation groups upward, wrapping at the end. 1010 */ 1011 agno = start_agno; 1012 for (;;) { 1013 pag = xfs_perag_get(mp, agno); 1014 if (!pag->pagi_inodeok) { 1015 xfs_ialloc_next_ag(mp); 1016 goto nextag; 1017 } 1018 1019 if (!pag->pagi_init) { 1020 error = xfs_ialloc_pagi_init(mp, tp, agno); 1021 if (error) 1022 goto out_error; 1023 } 1024 1025 /* 1026 * Do a first racy fast path check if this AG is usable. 1027 */ 1028 if (!pag->pagi_freecount && !okalloc) 1029 goto nextag; 1030 1031 /* 1032 * Then read in the AGI buffer and recheck with the AGI buffer 1033 * lock held. 1034 */ 1035 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1036 if (error) 1037 goto out_error; 1038 1039 if (pag->pagi_freecount) { 1040 xfs_perag_put(pag); 1041 goto out_alloc; 1042 } 1043 1044 if (!okalloc) 1045 goto nextag_relse_buffer; 1046 1047 1048 error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); 1049 if (error) { 1050 xfs_trans_brelse(tp, agbp); 1051 1052 if (error != ENOSPC) 1053 goto out_error; 1054 1055 xfs_perag_put(pag); 1056 *inop = NULLFSINO; 1057 return 0; 1058 } 1059 1060 if (ialloced) { 1061 /* 1062 * We successfully allocated some inodes, return 1063 * the current context to the caller so that it 1064 * can commit the current transaction and call 1065 * us again where we left off. 1066 */ 1067 ASSERT(pag->pagi_freecount > 0); 1068 xfs_perag_put(pag); 1069 1070 *IO_agbp = agbp; 1071 *inop = NULLFSINO; 1072 return 0; 1073 } 1074 1075nextag_relse_buffer: 1076 xfs_trans_brelse(tp, agbp); 1077nextag: 1078 xfs_perag_put(pag); 1079 if (++agno == mp->m_sb.sb_agcount) 1080 agno = 0; 1081 if (agno == start_agno) { 1082 *inop = NULLFSINO; 1083 return noroom ? ENOSPC : 0; 1084 } 1085 } 1086 1087out_alloc: 1088 *IO_agbp = NULL; 1089 return xfs_dialloc_ag(tp, agbp, parent, inop); 1090out_error: 1091 xfs_perag_put(pag); 1092 return XFS_ERROR(error); 1093} 1094 1095/* 1096 * Free disk inode. Carefully avoids touching the incore inode, all 1097 * manipulations incore are the caller's responsibility. 1098 * The on-disk inode is not changed by this operation, only the 1099 * btree (free inode mask) is changed. 1100 */ 1101int 1102xfs_difree( 1103 xfs_trans_t *tp, /* transaction pointer */ 1104 xfs_ino_t inode, /* inode to be freed */ 1105 xfs_bmap_free_t *flist, /* extents to free */ 1106 int *delete, /* set if inode cluster was deleted */ 1107 xfs_ino_t *first_ino) /* first inode in deleted cluster */ 1108{ 1109 /* REFERENCED */ 1110 xfs_agblock_t agbno; /* block number containing inode */ 1111 xfs_buf_t *agbp; /* buffer containing allocation group header */ 1112 xfs_agino_t agino; /* inode number relative to allocation group */ 1113 xfs_agnumber_t agno; /* allocation group number */ 1114 xfs_agi_t *agi; /* allocation group header */ 1115 xfs_btree_cur_t *cur; /* inode btree cursor */ 1116 int error; /* error return value */ 1117 int i; /* result code */ 1118 int ilen; /* inodes in an inode cluster */ 1119 xfs_mount_t *mp; /* mount structure for filesystem */ 1120 int off; /* offset of inode in inode chunk */ 1121 xfs_inobt_rec_incore_t rec; /* btree record */ 1122 struct xfs_perag *pag; 1123 1124 mp = tp->t_mountp; 1125 1126 /* 1127 * Break up inode number into its components. 1128 */ 1129 agno = XFS_INO_TO_AGNO(mp, inode); 1130 if (agno >= mp->m_sb.sb_agcount) { 1131 xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", 1132 __func__, agno, mp->m_sb.sb_agcount); 1133 ASSERT(0); 1134 return XFS_ERROR(EINVAL); 1135 } 1136 agino = XFS_INO_TO_AGINO(mp, inode); 1137 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { 1138 xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", 1139 __func__, (unsigned long long)inode, 1140 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); 1141 ASSERT(0); 1142 return XFS_ERROR(EINVAL); 1143 } 1144 agbno = XFS_AGINO_TO_AGBNO(mp, agino); 1145 if (agbno >= mp->m_sb.sb_agblocks) { 1146 xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", 1147 __func__, agbno, mp->m_sb.sb_agblocks); 1148 ASSERT(0); 1149 return XFS_ERROR(EINVAL); 1150 } 1151 /* 1152 * Get the allocation group header. 1153 */ 1154 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1155 if (error) { 1156 xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", 1157 __func__, error); 1158 return error; 1159 } 1160 agi = XFS_BUF_TO_AGI(agbp); 1161 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); 1162 ASSERT(agbno < be32_to_cpu(agi->agi_length)); 1163 /* 1164 * Initialize the cursor. 1165 */ 1166 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1167 1168 error = xfs_check_agi_freecount(cur, agi); 1169 if (error) 1170 goto error0; 1171 1172 /* 1173 * Look for the entry describing this inode. 1174 */ 1175 if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { 1176 xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.", 1177 __func__, error); 1178 goto error0; 1179 } 1180 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1181 error = xfs_inobt_get_rec(cur, &rec, &i); 1182 if (error) { 1183 xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.", 1184 __func__, error); 1185 goto error0; 1186 } 1187 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1188 /* 1189 * Get the offset in the inode chunk. 1190 */ 1191 off = agino - rec.ir_startino; 1192 ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK); 1193 ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off))); 1194 /* 1195 * Mark the inode free & increment the count. 1196 */ 1197 rec.ir_free |= XFS_INOBT_MASK(off); 1198 rec.ir_freecount++; 1199 1200 /* 1201 * When an inode cluster is free, it becomes eligible for removal 1202 */ 1203 if (!(mp->m_flags & XFS_MOUNT_IKEEP) && 1204 (rec.ir_freecount == XFS_IALLOC_INODES(mp))) { 1205 1206 *delete = 1; 1207 *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); 1208 1209 /* 1210 * Remove the inode cluster from the AGI B+Tree, adjust the 1211 * AGI and Superblock inode counts, and mark the disk space 1212 * to be freed when the transaction is committed. 1213 */ 1214 ilen = XFS_IALLOC_INODES(mp); 1215 be32_add_cpu(&agi->agi_count, -ilen); 1216 be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); 1217 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); 1218 pag = xfs_perag_get(mp, agno); 1219 pag->pagi_freecount -= ilen - 1; 1220 xfs_perag_put(pag); 1221 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); 1222 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); 1223 1224 if ((error = xfs_btree_delete(cur, &i))) { 1225 xfs_warn(mp, "%s: xfs_btree_delete returned error %d.", 1226 __func__, error); 1227 goto error0; 1228 } 1229 1230 xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, 1231 agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)), 1232 XFS_IALLOC_BLOCKS(mp), flist, mp); 1233 } else { 1234 *delete = 0; 1235 1236 error = xfs_inobt_update(cur, &rec); 1237 if (error) { 1238 xfs_warn(mp, "%s: xfs_inobt_update returned error %d.", 1239 __func__, error); 1240 goto error0; 1241 } 1242 1243 /* 1244 * Change the inode free counts and log the ag/sb changes. 1245 */ 1246 be32_add_cpu(&agi->agi_freecount, 1); 1247 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1248 pag = xfs_perag_get(mp, agno); 1249 pag->pagi_freecount++; 1250 xfs_perag_put(pag); 1251 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); 1252 } 1253 1254 error = xfs_check_agi_freecount(cur, agi); 1255 if (error) 1256 goto error0; 1257 1258 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1259 return 0; 1260 1261error0: 1262 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1263 return error; 1264} 1265 1266STATIC int 1267xfs_imap_lookup( 1268 struct xfs_mount *mp, 1269 struct xfs_trans *tp, 1270 xfs_agnumber_t agno, 1271 xfs_agino_t agino, 1272 xfs_agblock_t agbno, 1273 xfs_agblock_t *chunk_agbno, 1274 xfs_agblock_t *offset_agbno, 1275 int flags) 1276{ 1277 struct xfs_inobt_rec_incore rec; 1278 struct xfs_btree_cur *cur; 1279 struct xfs_buf *agbp; 1280 int error; 1281 int i; 1282 1283 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1284 if (error) { 1285 xfs_alert(mp, 1286 "%s: xfs_ialloc_read_agi() returned error %d, agno %d", 1287 __func__, error, agno); 1288 return error; 1289 } 1290 1291 /* 1292 * Lookup the inode record for the given agino. If the record cannot be 1293 * found, then it's an invalid inode number and we should abort. Once 1294 * we have a record, we need to ensure it contains the inode number 1295 * we are looking up. 1296 */ 1297 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1298 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); 1299 if (!error) { 1300 if (i) 1301 error = xfs_inobt_get_rec(cur, &rec, &i); 1302 if (!error && i == 0) 1303 error = EINVAL; 1304 } 1305 1306 xfs_trans_brelse(tp, agbp); 1307 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1308 if (error) 1309 return error; 1310 1311 /* check that the returned record contains the required inode */ 1312 if (rec.ir_startino > agino || 1313 rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino) 1314 return EINVAL; 1315 1316 /* for untrusted inodes check it is allocated first */ 1317 if ((flags & XFS_IGET_UNTRUSTED) && 1318 (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) 1319 return EINVAL; 1320 1321 *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino); 1322 *offset_agbno = agbno - *chunk_agbno; 1323 return 0; 1324} 1325 1326/* 1327 * Return the location of the inode in imap, for mapping it into a buffer. 1328 */ 1329int 1330xfs_imap( 1331 xfs_mount_t *mp, /* file system mount structure */ 1332 xfs_trans_t *tp, /* transaction pointer */ 1333 xfs_ino_t ino, /* inode to locate */ 1334 struct xfs_imap *imap, /* location map structure */ 1335 uint flags) /* flags for inode btree lookup */ 1336{ 1337 xfs_agblock_t agbno; /* block number of inode in the alloc group */ 1338 xfs_agino_t agino; /* inode number within alloc group */ 1339 xfs_agnumber_t agno; /* allocation group number */ 1340 int blks_per_cluster; /* num blocks per inode cluster */ 1341 xfs_agblock_t chunk_agbno; /* first block in inode chunk */ 1342 xfs_agblock_t cluster_agbno; /* first block in inode cluster */ 1343 int error; /* error code */ 1344 int offset; /* index of inode in its buffer */ 1345 xfs_agblock_t offset_agbno; /* blks from chunk start to inode */ 1346 1347 ASSERT(ino != NULLFSINO); 1348 1349 /* 1350 * Split up the inode number into its parts. 1351 */ 1352 agno = XFS_INO_TO_AGNO(mp, ino); 1353 agino = XFS_INO_TO_AGINO(mp, ino); 1354 agbno = XFS_AGINO_TO_AGBNO(mp, agino); 1355 if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || 1356 ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1357#ifdef DEBUG 1358 /* 1359 * Don't output diagnostic information for untrusted inodes 1360 * as they can be invalid without implying corruption. 1361 */ 1362 if (flags & XFS_IGET_UNTRUSTED) 1363 return XFS_ERROR(EINVAL); 1364 if (agno >= mp->m_sb.sb_agcount) { 1365 xfs_alert(mp, 1366 "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)", 1367 __func__, agno, mp->m_sb.sb_agcount); 1368 } 1369 if (agbno >= mp->m_sb.sb_agblocks) { 1370 xfs_alert(mp, 1371 "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)", 1372 __func__, (unsigned long long)agbno, 1373 (unsigned long)mp->m_sb.sb_agblocks); 1374 } 1375 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1376 xfs_alert(mp, 1377 "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)", 1378 __func__, ino, 1379 XFS_AGINO_TO_INO(mp, agno, agino)); 1380 } 1381 xfs_stack_trace(); 1382#endif /* DEBUG */ 1383 return XFS_ERROR(EINVAL); 1384 } 1385 1386 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; 1387 1388 /* 1389 * For bulkstat and handle lookups, we have an untrusted inode number 1390 * that we have to verify is valid. We cannot do this just by reading 1391 * the inode buffer as it may have been unlinked and removed leaving 1392 * inodes in stale state on disk. Hence we have to do a btree lookup 1393 * in all cases where an untrusted inode number is passed. 1394 */ 1395 if (flags & XFS_IGET_UNTRUSTED) { 1396 error = xfs_imap_lookup(mp, tp, agno, agino, agbno, 1397 &chunk_agbno, &offset_agbno, flags); 1398 if (error) 1399 return error; 1400 goto out_map; 1401 } 1402 1403 /* 1404 * If the inode cluster size is the same as the blocksize or 1405 * smaller we get to the buffer by simple arithmetics. 1406 */ 1407 if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) { 1408 offset = XFS_INO_TO_OFFSET(mp, ino); 1409 ASSERT(offset < mp->m_sb.sb_inopblock); 1410 1411 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); 1412 imap->im_len = XFS_FSB_TO_BB(mp, 1); 1413 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); 1414 return 0; 1415 } 1416 1417 /* 1418 * If the inode chunks are aligned then use simple maths to 1419 * find the location. Otherwise we have to do a btree 1420 * lookup to find the location. 1421 */ 1422 if (mp->m_inoalign_mask) { 1423 offset_agbno = agbno & mp->m_inoalign_mask; 1424 chunk_agbno = agbno - offset_agbno; 1425 } else { 1426 error = xfs_imap_lookup(mp, tp, agno, agino, agbno, 1427 &chunk_agbno, &offset_agbno, flags); 1428 if (error) 1429 return error; 1430 } 1431 1432out_map: 1433 ASSERT(agbno >= chunk_agbno); 1434 cluster_agbno = chunk_agbno + 1435 ((offset_agbno / blks_per_cluster) * blks_per_cluster); 1436 offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + 1437 XFS_INO_TO_OFFSET(mp, ino); 1438 1439 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); 1440 imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); 1441 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); 1442 1443 /* 1444 * If the inode number maps to a block outside the bounds 1445 * of the file system then return NULL rather than calling 1446 * read_buf and panicing when we get an error from the 1447 * driver. 1448 */ 1449 if ((imap->im_blkno + imap->im_len) > 1450 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 1451 xfs_alert(mp, 1452 "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)", 1453 __func__, (unsigned long long) imap->im_blkno, 1454 (unsigned long long) imap->im_len, 1455 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 1456 return XFS_ERROR(EINVAL); 1457 } 1458 return 0; 1459} 1460 1461/* 1462 * Compute and fill in value of m_in_maxlevels. 1463 */ 1464void 1465xfs_ialloc_compute_maxlevels( 1466 xfs_mount_t *mp) /* file system mount structure */ 1467{ 1468 int level; 1469 uint maxblocks; 1470 uint maxleafents; 1471 int minleafrecs; 1472 int minnoderecs; 1473 1474 maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >> 1475 XFS_INODES_PER_CHUNK_LOG; 1476 minleafrecs = mp->m_alloc_mnr[0]; 1477 minnoderecs = mp->m_alloc_mnr[1]; 1478 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; 1479 for (level = 1; maxblocks > 1; level++) 1480 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; 1481 mp->m_in_maxlevels = level; 1482} 1483 1484/* 1485 * Log specified fields for the ag hdr (inode section) 1486 */ 1487void 1488xfs_ialloc_log_agi( 1489 xfs_trans_t *tp, /* transaction pointer */ 1490 xfs_buf_t *bp, /* allocation group header buffer */ 1491 int fields) /* bitmask of fields to log */ 1492{ 1493 int first; /* first byte number */ 1494 int last; /* last byte number */ 1495 static const short offsets[] = { /* field starting offsets */ 1496 /* keep in sync with bit definitions */ 1497 offsetof(xfs_agi_t, agi_magicnum), 1498 offsetof(xfs_agi_t, agi_versionnum), 1499 offsetof(xfs_agi_t, agi_seqno), 1500 offsetof(xfs_agi_t, agi_length), 1501 offsetof(xfs_agi_t, agi_count), 1502 offsetof(xfs_agi_t, agi_root), 1503 offsetof(xfs_agi_t, agi_level), 1504 offsetof(xfs_agi_t, agi_freecount), 1505 offsetof(xfs_agi_t, agi_newino), 1506 offsetof(xfs_agi_t, agi_dirino), 1507 offsetof(xfs_agi_t, agi_unlinked), 1508 sizeof(xfs_agi_t) 1509 }; 1510#ifdef DEBUG 1511 xfs_agi_t *agi; /* allocation group header */ 1512 1513 agi = XFS_BUF_TO_AGI(bp); 1514 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); 1515#endif 1516 /* 1517 * Compute byte offsets for the first and last fields. 1518 */ 1519 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last); 1520 /* 1521 * Log the allocation group inode header buffer. 1522 */ 1523 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); 1524 xfs_trans_log_buf(tp, bp, first, last); 1525} 1526 1527#ifdef DEBUG 1528STATIC void 1529xfs_check_agi_unlinked( 1530 struct xfs_agi *agi) 1531{ 1532 int i; 1533 1534 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) 1535 ASSERT(agi->agi_unlinked[i]); 1536} 1537#else 1538#define xfs_check_agi_unlinked(agi) 1539#endif 1540 1541static bool 1542xfs_agi_verify( 1543 struct xfs_buf *bp) 1544{ 1545 struct xfs_mount *mp = bp->b_target->bt_mount; 1546 struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); 1547 1548 if (xfs_sb_version_hascrc(&mp->m_sb) && 1549 !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_uuid)) 1550 return false; 1551 /* 1552 * Validate the magic number of the agi block. 1553 */ 1554 if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC)) 1555 return false; 1556 if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) 1557 return false; 1558 1559 /* 1560 * during growfs operations, the perag is not fully initialised, 1561 * so we can't use it for any useful checking. growfs ensures we can't 1562 * use it by using uncached buffers that don't have the perag attached 1563 * so we can detect and avoid this problem. 1564 */ 1565 if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno) 1566 return false; 1567 1568 xfs_check_agi_unlinked(agi); 1569 return true; 1570} 1571 1572static void 1573xfs_agi_read_verify( 1574 struct xfs_buf *bp) 1575{ 1576 struct xfs_mount *mp = bp->b_target->bt_mount; 1577 int agi_ok = 1; 1578 1579 if (xfs_sb_version_hascrc(&mp->m_sb)) 1580 agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 1581 offsetof(struct xfs_agi, agi_crc)); 1582 agi_ok = agi_ok && xfs_agi_verify(bp); 1583 1584 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, 1585 XFS_RANDOM_IALLOC_READ_AGI))) { 1586 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 1587 xfs_buf_ioerror(bp, EFSCORRUPTED); 1588 } 1589} 1590 1591static void 1592xfs_agi_write_verify( 1593 struct xfs_buf *bp) 1594{ 1595 struct xfs_mount *mp = bp->b_target->bt_mount; 1596 struct xfs_buf_log_item *bip = bp->b_fspriv; 1597 1598 if (!xfs_agi_verify(bp)) { 1599 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 1600 xfs_buf_ioerror(bp, EFSCORRUPTED); 1601 return; 1602 } 1603 1604 if (!xfs_sb_version_hascrc(&mp->m_sb)) 1605 return; 1606 1607 if (bip) 1608 XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); 1609 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 1610 offsetof(struct xfs_agi, agi_crc)); 1611} 1612 1613const struct xfs_buf_ops xfs_agi_buf_ops = { 1614 .verify_read = xfs_agi_read_verify, 1615 .verify_write = xfs_agi_write_verify, 1616}; 1617 1618/* 1619 * Read in the allocation group header (inode allocation section) 1620 */ 1621int 1622xfs_read_agi( 1623 struct xfs_mount *mp, /* file system mount structure */ 1624 struct xfs_trans *tp, /* transaction pointer */ 1625 xfs_agnumber_t agno, /* allocation group number */ 1626 struct xfs_buf **bpp) /* allocation group hdr buf */ 1627{ 1628 int error; 1629 1630 ASSERT(agno != NULLAGNUMBER); 1631 1632 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 1633 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), 1634 XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops); 1635 if (error) 1636 return error; 1637 1638 ASSERT(!xfs_buf_geterror(*bpp)); 1639 xfs_buf_set_ref(*bpp, XFS_AGI_REF); 1640 return 0; 1641} 1642 1643int 1644xfs_ialloc_read_agi( 1645 struct xfs_mount *mp, /* file system mount structure */ 1646 struct xfs_trans *tp, /* transaction pointer */ 1647 xfs_agnumber_t agno, /* allocation group number */ 1648 struct xfs_buf **bpp) /* allocation group hdr buf */ 1649{ 1650 struct xfs_agi *agi; /* allocation group header */ 1651 struct xfs_perag *pag; /* per allocation group data */ 1652 int error; 1653 1654 error = xfs_read_agi(mp, tp, agno, bpp); 1655 if (error) 1656 return error; 1657 1658 agi = XFS_BUF_TO_AGI(*bpp); 1659 pag = xfs_perag_get(mp, agno); 1660 if (!pag->pagi_init) { 1661 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); 1662 pag->pagi_count = be32_to_cpu(agi->agi_count); 1663 pag->pagi_init = 1; 1664 } 1665 1666 /* 1667 * It's possible for these to be out of sync if 1668 * we are in the middle of a forced shutdown. 1669 */ 1670 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || 1671 XFS_FORCED_SHUTDOWN(mp)); 1672 xfs_perag_put(pag); 1673 return 0; 1674} 1675 1676/* 1677 * Read in the agi to initialise the per-ag data in the mount structure 1678 */ 1679int 1680xfs_ialloc_pagi_init( 1681 xfs_mount_t *mp, /* file system mount structure */ 1682 xfs_trans_t *tp, /* transaction pointer */ 1683 xfs_agnumber_t agno) /* allocation group number */ 1684{ 1685 xfs_buf_t *bp = NULL; 1686 int error; 1687 1688 error = xfs_ialloc_read_agi(mp, tp, agno, &bp); 1689 if (error) 1690 return error; 1691 if (bp) 1692 xfs_trans_brelse(tp, bp); 1693 return 0; 1694}