at v3.3-rc4 1120 lines 35 kB view raw
1/* 2 * linux/fs/ext3/resize.c 3 * 4 * Support for resizing an ext3 filesystem while it is mounted. 5 * 6 * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com> 7 * 8 * This could probably be made into a module, because it is not often in use. 9 */ 10 11 12#define EXT3FS_DEBUG 13 14#include <linux/ext3_jbd.h> 15 16#include <linux/errno.h> 17#include <linux/slab.h> 18 19 20#define outside(b, first, last) ((b) < (first) || (b) >= (last)) 21#define inside(b, first, last) ((b) >= (first) && (b) < (last)) 22 23static int verify_group_input(struct super_block *sb, 24 struct ext3_new_group_data *input) 25{ 26 struct ext3_sb_info *sbi = EXT3_SB(sb); 27 struct ext3_super_block *es = sbi->s_es; 28 ext3_fsblk_t start = le32_to_cpu(es->s_blocks_count); 29 ext3_fsblk_t end = start + input->blocks_count; 30 unsigned group = input->group; 31 ext3_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; 32 unsigned overhead = ext3_bg_has_super(sb, group) ? 33 (1 + ext3_bg_num_gdb(sb, group) + 34 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; 35 ext3_fsblk_t metaend = start + overhead; 36 struct buffer_head *bh = NULL; 37 ext3_grpblk_t free_blocks_count; 38 int err = -EINVAL; 39 40 input->free_blocks_count = free_blocks_count = 41 input->blocks_count - 2 - overhead - sbi->s_itb_per_group; 42 43 if (test_opt(sb, DEBUG)) 44 printk(KERN_DEBUG "EXT3-fs: adding %s group %u: %u blocks " 45 "(%d free, %u reserved)\n", 46 ext3_bg_has_super(sb, input->group) ? "normal" : 47 "no-super", input->group, input->blocks_count, 48 free_blocks_count, input->reserved_blocks); 49 50 if (group != sbi->s_groups_count) 51 ext3_warning(sb, __func__, 52 "Cannot add at group %u (only %lu groups)", 53 input->group, sbi->s_groups_count); 54 else if ((start - le32_to_cpu(es->s_first_data_block)) % 55 EXT3_BLOCKS_PER_GROUP(sb)) 56 ext3_warning(sb, __func__, "Last group not full"); 57 else if (input->reserved_blocks > input->blocks_count / 5) 58 ext3_warning(sb, __func__, "Reserved blocks too high (%u)", 59 input->reserved_blocks); 60 else if (free_blocks_count < 0) 61 ext3_warning(sb, __func__, "Bad blocks count %u", 62 input->blocks_count); 63 else if (!(bh = sb_bread(sb, end - 1))) 64 ext3_warning(sb, __func__, 65 "Cannot read last block ("E3FSBLK")", 66 end - 1); 67 else if (outside(input->block_bitmap, start, end)) 68 ext3_warning(sb, __func__, 69 "Block bitmap not in group (block %u)", 70 input->block_bitmap); 71 else if (outside(input->inode_bitmap, start, end)) 72 ext3_warning(sb, __func__, 73 "Inode bitmap not in group (block %u)", 74 input->inode_bitmap); 75 else if (outside(input->inode_table, start, end) || 76 outside(itend - 1, start, end)) 77 ext3_warning(sb, __func__, 78 "Inode table not in group (blocks %u-"E3FSBLK")", 79 input->inode_table, itend - 1); 80 else if (input->inode_bitmap == input->block_bitmap) 81 ext3_warning(sb, __func__, 82 "Block bitmap same as inode bitmap (%u)", 83 input->block_bitmap); 84 else if (inside(input->block_bitmap, input->inode_table, itend)) 85 ext3_warning(sb, __func__, 86 "Block bitmap (%u) in inode table (%u-"E3FSBLK")", 87 input->block_bitmap, input->inode_table, itend-1); 88 else if (inside(input->inode_bitmap, input->inode_table, itend)) 89 ext3_warning(sb, __func__, 90 "Inode bitmap (%u) in inode table (%u-"E3FSBLK")", 91 input->inode_bitmap, input->inode_table, itend-1); 92 else if (inside(input->block_bitmap, start, metaend)) 93 ext3_warning(sb, __func__, 94 "Block bitmap (%u) in GDT table" 95 " ("E3FSBLK"-"E3FSBLK")", 96 input->block_bitmap, start, metaend - 1); 97 else if (inside(input->inode_bitmap, start, metaend)) 98 ext3_warning(sb, __func__, 99 "Inode bitmap (%u) in GDT table" 100 " ("E3FSBLK"-"E3FSBLK")", 101 input->inode_bitmap, start, metaend - 1); 102 else if (inside(input->inode_table, start, metaend) || 103 inside(itend - 1, start, metaend)) 104 ext3_warning(sb, __func__, 105 "Inode table (%u-"E3FSBLK") overlaps" 106 "GDT table ("E3FSBLK"-"E3FSBLK")", 107 input->inode_table, itend - 1, start, metaend - 1); 108 else 109 err = 0; 110 brelse(bh); 111 112 return err; 113} 114 115static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, 116 ext3_fsblk_t blk) 117{ 118 struct buffer_head *bh; 119 int err; 120 121 bh = sb_getblk(sb, blk); 122 if (!bh) 123 return ERR_PTR(-EIO); 124 if ((err = ext3_journal_get_write_access(handle, bh))) { 125 brelse(bh); 126 bh = ERR_PTR(err); 127 } else { 128 lock_buffer(bh); 129 memset(bh->b_data, 0, sb->s_blocksize); 130 set_buffer_uptodate(bh); 131 unlock_buffer(bh); 132 } 133 134 return bh; 135} 136 137/* 138 * To avoid calling the atomic setbit hundreds or thousands of times, we only 139 * need to use it within a single byte (to ensure we get endianness right). 140 * We can use memset for the rest of the bitmap as there are no other users. 141 */ 142static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 143{ 144 int i; 145 146 if (start_bit >= end_bit) 147 return; 148 149 ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); 150 for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) 151 ext3_set_bit(i, bitmap); 152 if (i < end_bit) 153 memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); 154} 155 156/* 157 * If we have fewer than thresh credits, extend by EXT3_MAX_TRANS_DATA. 158 * If that fails, restart the transaction & regain write access for the 159 * buffer head which is used for block_bitmap modifications. 160 */ 161static int extend_or_restart_transaction(handle_t *handle, int thresh, 162 struct buffer_head *bh) 163{ 164 int err; 165 166 if (handle->h_buffer_credits >= thresh) 167 return 0; 168 169 err = ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA); 170 if (err < 0) 171 return err; 172 if (err) { 173 err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA); 174 if (err) 175 return err; 176 err = ext3_journal_get_write_access(handle, bh); 177 if (err) 178 return err; 179 } 180 181 return 0; 182} 183 184/* 185 * Set up the block and inode bitmaps, and the inode table for the new group. 186 * This doesn't need to be part of the main transaction, since we are only 187 * changing blocks outside the actual filesystem. We still do journaling to 188 * ensure the recovery is correct in case of a failure just after resize. 189 * If any part of this fails, we simply abort the resize. 190 */ 191static int setup_new_group_blocks(struct super_block *sb, 192 struct ext3_new_group_data *input) 193{ 194 struct ext3_sb_info *sbi = EXT3_SB(sb); 195 ext3_fsblk_t start = ext3_group_first_block_no(sb, input->group); 196 int reserved_gdb = ext3_bg_has_super(sb, input->group) ? 197 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; 198 unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group); 199 struct buffer_head *bh; 200 handle_t *handle; 201 ext3_fsblk_t block; 202 ext3_grpblk_t bit; 203 int i; 204 int err = 0, err2; 205 206 /* This transaction may be extended/restarted along the way */ 207 handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA); 208 209 if (IS_ERR(handle)) 210 return PTR_ERR(handle); 211 212 mutex_lock(&sbi->s_resize_lock); 213 if (input->group != sbi->s_groups_count) { 214 err = -EBUSY; 215 goto exit_journal; 216 } 217 218 if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) { 219 err = PTR_ERR(bh); 220 goto exit_journal; 221 } 222 223 if (ext3_bg_has_super(sb, input->group)) { 224 ext3_debug("mark backup superblock %#04lx (+0)\n", start); 225 ext3_set_bit(0, bh->b_data); 226 } 227 228 /* Copy all of the GDT blocks into the backup in this group */ 229 for (i = 0, bit = 1, block = start + 1; 230 i < gdblocks; i++, block++, bit++) { 231 struct buffer_head *gdb; 232 233 ext3_debug("update backup group %#04lx (+%d)\n", block, bit); 234 235 err = extend_or_restart_transaction(handle, 1, bh); 236 if (err) 237 goto exit_bh; 238 239 gdb = sb_getblk(sb, block); 240 if (!gdb) { 241 err = -EIO; 242 goto exit_bh; 243 } 244 if ((err = ext3_journal_get_write_access(handle, gdb))) { 245 brelse(gdb); 246 goto exit_bh; 247 } 248 lock_buffer(gdb); 249 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); 250 set_buffer_uptodate(gdb); 251 unlock_buffer(gdb); 252 err = ext3_journal_dirty_metadata(handle, gdb); 253 if (err) { 254 brelse(gdb); 255 goto exit_bh; 256 } 257 ext3_set_bit(bit, bh->b_data); 258 brelse(gdb); 259 } 260 261 /* Zero out all of the reserved backup group descriptor table blocks */ 262 for (i = 0, bit = gdblocks + 1, block = start + bit; 263 i < reserved_gdb; i++, block++, bit++) { 264 struct buffer_head *gdb; 265 266 ext3_debug("clear reserved block %#04lx (+%d)\n", block, bit); 267 268 err = extend_or_restart_transaction(handle, 1, bh); 269 if (err) 270 goto exit_bh; 271 272 if (IS_ERR(gdb = bclean(handle, sb, block))) { 273 err = PTR_ERR(gdb); 274 goto exit_bh; 275 } 276 err = ext3_journal_dirty_metadata(handle, gdb); 277 if (err) { 278 brelse(gdb); 279 goto exit_bh; 280 } 281 ext3_set_bit(bit, bh->b_data); 282 brelse(gdb); 283 } 284 ext3_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap, 285 input->block_bitmap - start); 286 ext3_set_bit(input->block_bitmap - start, bh->b_data); 287 ext3_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap, 288 input->inode_bitmap - start); 289 ext3_set_bit(input->inode_bitmap - start, bh->b_data); 290 291 /* Zero out all of the inode table blocks */ 292 for (i = 0, block = input->inode_table, bit = block - start; 293 i < sbi->s_itb_per_group; i++, bit++, block++) { 294 struct buffer_head *it; 295 296 ext3_debug("clear inode block %#04lx (+%d)\n", block, bit); 297 298 err = extend_or_restart_transaction(handle, 1, bh); 299 if (err) 300 goto exit_bh; 301 302 if (IS_ERR(it = bclean(handle, sb, block))) { 303 err = PTR_ERR(it); 304 goto exit_bh; 305 } 306 err = ext3_journal_dirty_metadata(handle, it); 307 if (err) { 308 brelse(it); 309 goto exit_bh; 310 } 311 brelse(it); 312 ext3_set_bit(bit, bh->b_data); 313 } 314 315 err = extend_or_restart_transaction(handle, 2, bh); 316 if (err) 317 goto exit_bh; 318 319 mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb), 320 bh->b_data); 321 err = ext3_journal_dirty_metadata(handle, bh); 322 if (err) 323 goto exit_bh; 324 brelse(bh); 325 326 /* Mark unused entries in inode bitmap used */ 327 ext3_debug("clear inode bitmap %#04x (+%ld)\n", 328 input->inode_bitmap, input->inode_bitmap - start); 329 if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { 330 err = PTR_ERR(bh); 331 goto exit_journal; 332 } 333 334 mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb), 335 bh->b_data); 336 err = ext3_journal_dirty_metadata(handle, bh); 337exit_bh: 338 brelse(bh); 339 340exit_journal: 341 mutex_unlock(&sbi->s_resize_lock); 342 if ((err2 = ext3_journal_stop(handle)) && !err) 343 err = err2; 344 345 return err; 346} 347 348/* 349 * Iterate through the groups which hold BACKUP superblock/GDT copies in an 350 * ext3 filesystem. The counters should be initialized to 1, 5, and 7 before 351 * calling this for the first time. In a sparse filesystem it will be the 352 * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ... 353 * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ... 354 */ 355static unsigned ext3_list_backups(struct super_block *sb, unsigned *three, 356 unsigned *five, unsigned *seven) 357{ 358 unsigned *min = three; 359 int mult = 3; 360 unsigned ret; 361 362 if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, 363 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 364 ret = *min; 365 *min += 1; 366 return ret; 367 } 368 369 if (*five < *min) { 370 min = five; 371 mult = 5; 372 } 373 if (*seven < *min) { 374 min = seven; 375 mult = 7; 376 } 377 378 ret = *min; 379 *min *= mult; 380 381 return ret; 382} 383 384/* 385 * Check that all of the backup GDT blocks are held in the primary GDT block. 386 * It is assumed that they are stored in group order. Returns the number of 387 * groups in current filesystem that have BACKUPS, or -ve error code. 388 */ 389static int verify_reserved_gdb(struct super_block *sb, 390 struct buffer_head *primary) 391{ 392 const ext3_fsblk_t blk = primary->b_blocknr; 393 const unsigned long end = EXT3_SB(sb)->s_groups_count; 394 unsigned three = 1; 395 unsigned five = 5; 396 unsigned seven = 7; 397 unsigned grp; 398 __le32 *p = (__le32 *)primary->b_data; 399 int gdbackups = 0; 400 401 while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) { 402 if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){ 403 ext3_warning(sb, __func__, 404 "reserved GDT "E3FSBLK 405 " missing grp %d ("E3FSBLK")", 406 blk, grp, 407 grp * EXT3_BLOCKS_PER_GROUP(sb) + blk); 408 return -EINVAL; 409 } 410 if (++gdbackups > EXT3_ADDR_PER_BLOCK(sb)) 411 return -EFBIG; 412 } 413 414 return gdbackups; 415} 416 417/* 418 * Called when we need to bring a reserved group descriptor table block into 419 * use from the resize inode. The primary copy of the new GDT block currently 420 * is an indirect block (under the double indirect block in the resize inode). 421 * The new backup GDT blocks will be stored as leaf blocks in this indirect 422 * block, in group order. Even though we know all the block numbers we need, 423 * we check to ensure that the resize inode has actually reserved these blocks. 424 * 425 * Don't need to update the block bitmaps because the blocks are still in use. 426 * 427 * We get all of the error cases out of the way, so that we are sure to not 428 * fail once we start modifying the data on disk, because JBD has no rollback. 429 */ 430static int add_new_gdb(handle_t *handle, struct inode *inode, 431 struct ext3_new_group_data *input, 432 struct buffer_head **primary) 433{ 434 struct super_block *sb = inode->i_sb; 435 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 436 unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb); 437 ext3_fsblk_t gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; 438 struct buffer_head **o_group_desc, **n_group_desc; 439 struct buffer_head *dind; 440 int gdbackups; 441 struct ext3_iloc iloc; 442 __le32 *data; 443 int err; 444 445 if (test_opt(sb, DEBUG)) 446 printk(KERN_DEBUG 447 "EXT3-fs: ext3_add_new_gdb: adding group block %lu\n", 448 gdb_num); 449 450 /* 451 * If we are not using the primary superblock/GDT copy don't resize, 452 * because the user tools have no way of handling this. Probably a 453 * bad time to do it anyways. 454 */ 455 if (EXT3_SB(sb)->s_sbh->b_blocknr != 456 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) { 457 ext3_warning(sb, __func__, 458 "won't resize using backup superblock at %llu", 459 (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr); 460 return -EPERM; 461 } 462 463 *primary = sb_bread(sb, gdblock); 464 if (!*primary) 465 return -EIO; 466 467 if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) { 468 err = gdbackups; 469 goto exit_bh; 470 } 471 472 data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK; 473 dind = sb_bread(sb, le32_to_cpu(*data)); 474 if (!dind) { 475 err = -EIO; 476 goto exit_bh; 477 } 478 479 data = (__le32 *)dind->b_data; 480 if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) { 481 ext3_warning(sb, __func__, 482 "new group %u GDT block "E3FSBLK" not reserved", 483 input->group, gdblock); 484 err = -EINVAL; 485 goto exit_dind; 486 } 487 488 if ((err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh))) 489 goto exit_dind; 490 491 if ((err = ext3_journal_get_write_access(handle, *primary))) 492 goto exit_sbh; 493 494 if ((err = ext3_journal_get_write_access(handle, dind))) 495 goto exit_primary; 496 497 /* ext3_reserve_inode_write() gets a reference on the iloc */ 498 if ((err = ext3_reserve_inode_write(handle, inode, &iloc))) 499 goto exit_dindj; 500 501 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), 502 GFP_NOFS); 503 if (!n_group_desc) { 504 err = -ENOMEM; 505 ext3_warning (sb, __func__, 506 "not enough memory for %lu groups", gdb_num + 1); 507 goto exit_inode; 508 } 509 510 /* 511 * Finally, we have all of the possible failures behind us... 512 * 513 * Remove new GDT block from inode double-indirect block and clear out 514 * the new GDT block for use (which also "frees" the backup GDT blocks 515 * from the reserved inode). We don't need to change the bitmaps for 516 * these blocks, because they are marked as in-use from being in the 517 * reserved inode, and will become GDT blocks (primary and backup). 518 */ 519 data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0; 520 err = ext3_journal_dirty_metadata(handle, dind); 521 if (err) 522 goto exit_group_desc; 523 brelse(dind); 524 dind = NULL; 525 inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; 526 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 527 if (err) 528 goto exit_group_desc; 529 memset((*primary)->b_data, 0, sb->s_blocksize); 530 err = ext3_journal_dirty_metadata(handle, *primary); 531 if (err) 532 goto exit_group_desc; 533 534 o_group_desc = EXT3_SB(sb)->s_group_desc; 535 memcpy(n_group_desc, o_group_desc, 536 EXT3_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); 537 n_group_desc[gdb_num] = *primary; 538 EXT3_SB(sb)->s_group_desc = n_group_desc; 539 EXT3_SB(sb)->s_gdb_count++; 540 kfree(o_group_desc); 541 542 le16_add_cpu(&es->s_reserved_gdt_blocks, -1); 543 err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 544 if (err) 545 goto exit_inode; 546 547 return 0; 548 549exit_group_desc: 550 kfree(n_group_desc); 551exit_inode: 552 //ext3_journal_release_buffer(handle, iloc.bh); 553 brelse(iloc.bh); 554exit_dindj: 555 //ext3_journal_release_buffer(handle, dind); 556exit_primary: 557 //ext3_journal_release_buffer(handle, *primary); 558exit_sbh: 559 //ext3_journal_release_buffer(handle, *primary); 560exit_dind: 561 brelse(dind); 562exit_bh: 563 brelse(*primary); 564 565 ext3_debug("leaving with error %d\n", err); 566 return err; 567} 568 569/* 570 * Called when we are adding a new group which has a backup copy of each of 571 * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. 572 * We need to add these reserved backup GDT blocks to the resize inode, so 573 * that they are kept for future resizing and not allocated to files. 574 * 575 * Each reserved backup GDT block will go into a different indirect block. 576 * The indirect blocks are actually the primary reserved GDT blocks, 577 * so we know in advance what their block numbers are. We only get the 578 * double-indirect block to verify it is pointing to the primary reserved 579 * GDT blocks so we don't overwrite a data block by accident. The reserved 580 * backup GDT blocks are stored in their reserved primary GDT block. 581 */ 582static int reserve_backup_gdb(handle_t *handle, struct inode *inode, 583 struct ext3_new_group_data *input) 584{ 585 struct super_block *sb = inode->i_sb; 586 int reserved_gdb =le16_to_cpu(EXT3_SB(sb)->s_es->s_reserved_gdt_blocks); 587 struct buffer_head **primary; 588 struct buffer_head *dind; 589 struct ext3_iloc iloc; 590 ext3_fsblk_t blk; 591 __le32 *data, *end; 592 int gdbackups = 0; 593 int res, i; 594 int err; 595 596 primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS); 597 if (!primary) 598 return -ENOMEM; 599 600 data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK; 601 dind = sb_bread(sb, le32_to_cpu(*data)); 602 if (!dind) { 603 err = -EIO; 604 goto exit_free; 605 } 606 607 blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count; 608 data = (__le32 *)dind->b_data + (EXT3_SB(sb)->s_gdb_count % 609 EXT3_ADDR_PER_BLOCK(sb)); 610 end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb); 611 612 /* Get each reserved primary GDT block and verify it holds backups */ 613 for (res = 0; res < reserved_gdb; res++, blk++) { 614 if (le32_to_cpu(*data) != blk) { 615 ext3_warning(sb, __func__, 616 "reserved block "E3FSBLK 617 " not at offset %ld", 618 blk, 619 (long)(data - (__le32 *)dind->b_data)); 620 err = -EINVAL; 621 goto exit_bh; 622 } 623 primary[res] = sb_bread(sb, blk); 624 if (!primary[res]) { 625 err = -EIO; 626 goto exit_bh; 627 } 628 if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) { 629 brelse(primary[res]); 630 err = gdbackups; 631 goto exit_bh; 632 } 633 if (++data >= end) 634 data = (__le32 *)dind->b_data; 635 } 636 637 for (i = 0; i < reserved_gdb; i++) { 638 if ((err = ext3_journal_get_write_access(handle, primary[i]))) { 639 /* 640 int j; 641 for (j = 0; j < i; j++) 642 ext3_journal_release_buffer(handle, primary[j]); 643 */ 644 goto exit_bh; 645 } 646 } 647 648 if ((err = ext3_reserve_inode_write(handle, inode, &iloc))) 649 goto exit_bh; 650 651 /* 652 * Finally we can add each of the reserved backup GDT blocks from 653 * the new group to its reserved primary GDT block. 654 */ 655 blk = input->group * EXT3_BLOCKS_PER_GROUP(sb); 656 for (i = 0; i < reserved_gdb; i++) { 657 int err2; 658 data = (__le32 *)primary[i]->b_data; 659 /* printk("reserving backup %lu[%u] = %lu\n", 660 primary[i]->b_blocknr, gdbackups, 661 blk + primary[i]->b_blocknr); */ 662 data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr); 663 err2 = ext3_journal_dirty_metadata(handle, primary[i]); 664 if (!err) 665 err = err2; 666 } 667 inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9; 668 ext3_mark_iloc_dirty(handle, inode, &iloc); 669 670exit_bh: 671 while (--res >= 0) 672 brelse(primary[res]); 673 brelse(dind); 674 675exit_free: 676 kfree(primary); 677 678 return err; 679} 680 681/* 682 * Update the backup copies of the ext3 metadata. These don't need to be part 683 * of the main resize transaction, because e2fsck will re-write them if there 684 * is a problem (basically only OOM will cause a problem). However, we 685 * _should_ update the backups if possible, in case the primary gets trashed 686 * for some reason and we need to run e2fsck from a backup superblock. The 687 * important part is that the new block and inode counts are in the backup 688 * superblocks, and the location of the new group metadata in the GDT backups. 689 * 690 * We do not need take the s_resize_lock for this, because these 691 * blocks are not otherwise touched by the filesystem code when it is 692 * mounted. We don't need to worry about last changing from 693 * sbi->s_groups_count, because the worst that can happen is that we 694 * do not copy the full number of backups at this time. The resize 695 * which changed s_groups_count will backup again. 696 */ 697static void update_backups(struct super_block *sb, 698 int blk_off, char *data, int size) 699{ 700 struct ext3_sb_info *sbi = EXT3_SB(sb); 701 const unsigned long last = sbi->s_groups_count; 702 const int bpg = EXT3_BLOCKS_PER_GROUP(sb); 703 unsigned three = 1; 704 unsigned five = 5; 705 unsigned seven = 7; 706 unsigned group; 707 int rest = sb->s_blocksize - size; 708 handle_t *handle; 709 int err = 0, err2; 710 711 handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA); 712 if (IS_ERR(handle)) { 713 group = 1; 714 err = PTR_ERR(handle); 715 goto exit_err; 716 } 717 718 while ((group = ext3_list_backups(sb, &three, &five, &seven)) < last) { 719 struct buffer_head *bh; 720 721 /* Out of journal space, and can't get more - abort - so sad */ 722 if (handle->h_buffer_credits == 0 && 723 ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA) && 724 (err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA))) 725 break; 726 727 bh = sb_getblk(sb, group * bpg + blk_off); 728 if (!bh) { 729 err = -EIO; 730 break; 731 } 732 ext3_debug("update metadata backup %#04lx\n", 733 (unsigned long)bh->b_blocknr); 734 if ((err = ext3_journal_get_write_access(handle, bh))) { 735 brelse(bh); 736 break; 737 } 738 lock_buffer(bh); 739 memcpy(bh->b_data, data, size); 740 if (rest) 741 memset(bh->b_data + size, 0, rest); 742 set_buffer_uptodate(bh); 743 unlock_buffer(bh); 744 err = ext3_journal_dirty_metadata(handle, bh); 745 brelse(bh); 746 if (err) 747 break; 748 } 749 if ((err2 = ext3_journal_stop(handle)) && !err) 750 err = err2; 751 752 /* 753 * Ugh! Need to have e2fsck write the backup copies. It is too 754 * late to revert the resize, we shouldn't fail just because of 755 * the backup copies (they are only needed in case of corruption). 756 * 757 * However, if we got here we have a journal problem too, so we 758 * can't really start a transaction to mark the superblock. 759 * Chicken out and just set the flag on the hope it will be written 760 * to disk, and if not - we will simply wait until next fsck. 761 */ 762exit_err: 763 if (err) { 764 ext3_warning(sb, __func__, 765 "can't update backup for group %d (err %d), " 766 "forcing fsck on next reboot", group, err); 767 sbi->s_mount_state &= ~EXT3_VALID_FS; 768 sbi->s_es->s_state &= cpu_to_le16(~EXT3_VALID_FS); 769 mark_buffer_dirty(sbi->s_sbh); 770 } 771} 772 773/* Add group descriptor data to an existing or new group descriptor block. 774 * Ensure we handle all possible error conditions _before_ we start modifying 775 * the filesystem, because we cannot abort the transaction and not have it 776 * write the data to disk. 777 * 778 * If we are on a GDT block boundary, we need to get the reserved GDT block. 779 * Otherwise, we may need to add backup GDT blocks for a sparse group. 780 * 781 * We only need to hold the superblock lock while we are actually adding 782 * in the new group's counts to the superblock. Prior to that we have 783 * not really "added" the group at all. We re-check that we are still 784 * adding in the last group in case things have changed since verifying. 785 */ 786int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) 787{ 788 struct ext3_sb_info *sbi = EXT3_SB(sb); 789 struct ext3_super_block *es = sbi->s_es; 790 int reserved_gdb = ext3_bg_has_super(sb, input->group) ? 791 le16_to_cpu(es->s_reserved_gdt_blocks) : 0; 792 struct buffer_head *primary = NULL; 793 struct ext3_group_desc *gdp; 794 struct inode *inode = NULL; 795 handle_t *handle; 796 int gdb_off, gdb_num; 797 int err, err2; 798 799 gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb); 800 gdb_off = input->group % EXT3_DESC_PER_BLOCK(sb); 801 802 if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb, 803 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 804 ext3_warning(sb, __func__, 805 "Can't resize non-sparse filesystem further"); 806 return -EPERM; 807 } 808 809 if (le32_to_cpu(es->s_blocks_count) + input->blocks_count < 810 le32_to_cpu(es->s_blocks_count)) { 811 ext3_warning(sb, __func__, "blocks_count overflow\n"); 812 return -EINVAL; 813 } 814 815 if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) < 816 le32_to_cpu(es->s_inodes_count)) { 817 ext3_warning(sb, __func__, "inodes_count overflow\n"); 818 return -EINVAL; 819 } 820 821 if (reserved_gdb || gdb_off == 0) { 822 if (!EXT3_HAS_COMPAT_FEATURE(sb, 823 EXT3_FEATURE_COMPAT_RESIZE_INODE) 824 || !le16_to_cpu(es->s_reserved_gdt_blocks)) { 825 ext3_warning(sb, __func__, 826 "No reserved GDT blocks, can't resize"); 827 return -EPERM; 828 } 829 inode = ext3_iget(sb, EXT3_RESIZE_INO); 830 if (IS_ERR(inode)) { 831 ext3_warning(sb, __func__, 832 "Error opening resize inode"); 833 return PTR_ERR(inode); 834 } 835 } 836 837 if ((err = verify_group_input(sb, input))) 838 goto exit_put; 839 840 if ((err = setup_new_group_blocks(sb, input))) 841 goto exit_put; 842 843 /* 844 * We will always be modifying at least the superblock and a GDT 845 * block. If we are adding a group past the last current GDT block, 846 * we will also modify the inode and the dindirect block. If we 847 * are adding a group with superblock/GDT backups we will also 848 * modify each of the reserved GDT dindirect blocks. 849 */ 850 handle = ext3_journal_start_sb(sb, 851 ext3_bg_has_super(sb, input->group) ? 852 3 + reserved_gdb : 4); 853 if (IS_ERR(handle)) { 854 err = PTR_ERR(handle); 855 goto exit_put; 856 } 857 858 mutex_lock(&sbi->s_resize_lock); 859 if (input->group != sbi->s_groups_count) { 860 ext3_warning(sb, __func__, 861 "multiple resizers run on filesystem!"); 862 err = -EBUSY; 863 goto exit_journal; 864 } 865 866 if ((err = ext3_journal_get_write_access(handle, sbi->s_sbh))) 867 goto exit_journal; 868 869 /* 870 * We will only either add reserved group blocks to a backup group 871 * or remove reserved blocks for the first group in a new group block. 872 * Doing both would be mean more complex code, and sane people don't 873 * use non-sparse filesystems anymore. This is already checked above. 874 */ 875 if (gdb_off) { 876 primary = sbi->s_group_desc[gdb_num]; 877 if ((err = ext3_journal_get_write_access(handle, primary))) 878 goto exit_journal; 879 880 if (reserved_gdb && ext3_bg_num_gdb(sb, input->group) && 881 (err = reserve_backup_gdb(handle, inode, input))) 882 goto exit_journal; 883 } else if ((err = add_new_gdb(handle, inode, input, &primary))) 884 goto exit_journal; 885 886 /* 887 * OK, now we've set up the new group. Time to make it active. 888 * 889 * We do not lock all allocations via s_resize_lock 890 * so we have to be safe wrt. concurrent accesses the group 891 * data. So we need to be careful to set all of the relevant 892 * group descriptor data etc. *before* we enable the group. 893 * 894 * The key field here is sbi->s_groups_count: as long as 895 * that retains its old value, nobody is going to access the new 896 * group. 897 * 898 * So first we update all the descriptor metadata for the new 899 * group; then we update the total disk blocks count; then we 900 * update the groups count to enable the group; then finally we 901 * update the free space counts so that the system can start 902 * using the new disk blocks. 903 */ 904 905 /* Update group descriptor block for new group */ 906 gdp = (struct ext3_group_desc *)primary->b_data + gdb_off; 907 908 gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap); 909 gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap); 910 gdp->bg_inode_table = cpu_to_le32(input->inode_table); 911 gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); 912 gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb)); 913 914 /* 915 * Make the new blocks and inodes valid next. We do this before 916 * increasing the group count so that once the group is enabled, 917 * all of its blocks and inodes are already valid. 918 * 919 * We always allocate group-by-group, then block-by-block or 920 * inode-by-inode within a group, so enabling these 921 * blocks/inodes before the group is live won't actually let us 922 * allocate the new space yet. 923 */ 924 le32_add_cpu(&es->s_blocks_count, input->blocks_count); 925 le32_add_cpu(&es->s_inodes_count, EXT3_INODES_PER_GROUP(sb)); 926 927 /* 928 * We need to protect s_groups_count against other CPUs seeing 929 * inconsistent state in the superblock. 930 * 931 * The precise rules we use are: 932 * 933 * * Writers of s_groups_count *must* hold s_resize_lock 934 * AND 935 * * Writers must perform a smp_wmb() after updating all dependent 936 * data and before modifying the groups count 937 * 938 * * Readers must hold s_resize_lock over the access 939 * OR 940 * * Readers must perform an smp_rmb() after reading the groups count 941 * and before reading any dependent data. 942 * 943 * NB. These rules can be relaxed when checking the group count 944 * while freeing data, as we can only allocate from a block 945 * group after serialising against the group count, and we can 946 * only then free after serialising in turn against that 947 * allocation. 948 */ 949 smp_wmb(); 950 951 /* Update the global fs size fields */ 952 sbi->s_groups_count++; 953 954 err = ext3_journal_dirty_metadata(handle, primary); 955 if (err) 956 goto exit_journal; 957 958 /* Update the reserved block counts only once the new group is 959 * active. */ 960 le32_add_cpu(&es->s_r_blocks_count, input->reserved_blocks); 961 962 /* Update the free space counts */ 963 percpu_counter_add(&sbi->s_freeblocks_counter, 964 input->free_blocks_count); 965 percpu_counter_add(&sbi->s_freeinodes_counter, 966 EXT3_INODES_PER_GROUP(sb)); 967 968 err = ext3_journal_dirty_metadata(handle, sbi->s_sbh); 969 970exit_journal: 971 mutex_unlock(&sbi->s_resize_lock); 972 if ((err2 = ext3_journal_stop(handle)) && !err) 973 err = err2; 974 if (!err) { 975 update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, 976 sizeof(struct ext3_super_block)); 977 update_backups(sb, primary->b_blocknr, primary->b_data, 978 primary->b_size); 979 } 980exit_put: 981 iput(inode); 982 return err; 983} /* ext3_group_add */ 984 985/* Extend the filesystem to the new number of blocks specified. This entry 986 * point is only used to extend the current filesystem to the end of the last 987 * existing group. It can be accessed via ioctl, or by "remount,resize=<size>" 988 * for emergencies (because it has no dependencies on reserved blocks). 989 * 990 * If we _really_ wanted, we could use default values to call ext3_group_add() 991 * allow the "remount" trick to work for arbitrary resizing, assuming enough 992 * GDT blocks are reserved to grow to the desired size. 993 */ 994int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, 995 ext3_fsblk_t n_blocks_count) 996{ 997 ext3_fsblk_t o_blocks_count; 998 ext3_grpblk_t last; 999 ext3_grpblk_t add; 1000 struct buffer_head * bh; 1001 handle_t *handle; 1002 int err; 1003 unsigned long freed_blocks; 1004 1005 /* We don't need to worry about locking wrt other resizers just 1006 * yet: we're going to revalidate es->s_blocks_count after 1007 * taking the s_resize_lock below. */ 1008 o_blocks_count = le32_to_cpu(es->s_blocks_count); 1009 1010 if (test_opt(sb, DEBUG)) 1011 printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK 1012 " up to "E3FSBLK" blocks\n", 1013 o_blocks_count, n_blocks_count); 1014 1015 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) 1016 return 0; 1017 1018 if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 1019 printk(KERN_ERR "EXT3-fs: filesystem on %s:" 1020 " too large to resize to "E3FSBLK" blocks safely\n", 1021 sb->s_id, n_blocks_count); 1022 if (sizeof(sector_t) < 8) 1023 ext3_warning(sb, __func__, 1024 "CONFIG_LBDAF not enabled\n"); 1025 return -EINVAL; 1026 } 1027 1028 if (n_blocks_count < o_blocks_count) { 1029 ext3_warning(sb, __func__, 1030 "can't shrink FS - resize aborted"); 1031 return -EBUSY; 1032 } 1033 1034 /* Handle the remaining blocks in the last group only. */ 1035 last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) % 1036 EXT3_BLOCKS_PER_GROUP(sb); 1037 1038 if (last == 0) { 1039 ext3_warning(sb, __func__, 1040 "need to use ext2online to resize further"); 1041 return -EPERM; 1042 } 1043 1044 add = EXT3_BLOCKS_PER_GROUP(sb) - last; 1045 1046 if (o_blocks_count + add < o_blocks_count) { 1047 ext3_warning(sb, __func__, "blocks_count overflow"); 1048 return -EINVAL; 1049 } 1050 1051 if (o_blocks_count + add > n_blocks_count) 1052 add = n_blocks_count - o_blocks_count; 1053 1054 if (o_blocks_count + add < n_blocks_count) 1055 ext3_warning(sb, __func__, 1056 "will only finish group ("E3FSBLK 1057 " blocks, %u new)", 1058 o_blocks_count + add, add); 1059 1060 /* See if the device is actually as big as what was requested */ 1061 bh = sb_bread(sb, o_blocks_count + add -1); 1062 if (!bh) { 1063 ext3_warning(sb, __func__, 1064 "can't read last block, resize aborted"); 1065 return -ENOSPC; 1066 } 1067 brelse(bh); 1068 1069 /* We will update the superblock, one block bitmap, and 1070 * one group descriptor via ext3_free_blocks(). 1071 */ 1072 handle = ext3_journal_start_sb(sb, 3); 1073 if (IS_ERR(handle)) { 1074 err = PTR_ERR(handle); 1075 ext3_warning(sb, __func__, "error %d on journal start",err); 1076 goto exit_put; 1077 } 1078 1079 mutex_lock(&EXT3_SB(sb)->s_resize_lock); 1080 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { 1081 ext3_warning(sb, __func__, 1082 "multiple resizers run on filesystem!"); 1083 mutex_unlock(&EXT3_SB(sb)->s_resize_lock); 1084 ext3_journal_stop(handle); 1085 err = -EBUSY; 1086 goto exit_put; 1087 } 1088 1089 if ((err = ext3_journal_get_write_access(handle, 1090 EXT3_SB(sb)->s_sbh))) { 1091 ext3_warning(sb, __func__, 1092 "error %d on journal write access", err); 1093 mutex_unlock(&EXT3_SB(sb)->s_resize_lock); 1094 ext3_journal_stop(handle); 1095 goto exit_put; 1096 } 1097 es->s_blocks_count = cpu_to_le32(o_blocks_count + add); 1098 err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 1099 mutex_unlock(&EXT3_SB(sb)->s_resize_lock); 1100 if (err) { 1101 ext3_warning(sb, __func__, 1102 "error %d on journal dirty metadata", err); 1103 ext3_journal_stop(handle); 1104 goto exit_put; 1105 } 1106 ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n", 1107 o_blocks_count, o_blocks_count + add); 1108 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); 1109 ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", 1110 o_blocks_count, o_blocks_count + add); 1111 if ((err = ext3_journal_stop(handle))) 1112 goto exit_put; 1113 if (test_opt(sb, DEBUG)) 1114 printk(KERN_DEBUG "EXT3-fs: extended group to %u blocks\n", 1115 le32_to_cpu(es->s_blocks_count)); 1116 update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es, 1117 sizeof(struct ext3_super_block)); 1118exit_put: 1119 return err; 1120} /* ext3_group_extend */