Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v3.3-rc3 2163 lines 64 kB view raw
1/* 2 * linux/fs/ext3/balloc.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993 10 * Big-endian to little-endian byte-swapping/bitmaps by 11 * David S. Miller (davem@caip.rutgers.edu), 1995 12 */ 13 14#include <linux/time.h> 15#include <linux/capability.h> 16#include <linux/fs.h> 17#include <linux/slab.h> 18#include <linux/jbd.h> 19#include <linux/ext3_fs.h> 20#include <linux/ext3_jbd.h> 21#include <linux/quotaops.h> 22#include <linux/buffer_head.h> 23#include <linux/blkdev.h> 24#include <trace/events/ext3.h> 25 26/* 27 * balloc.c contains the blocks allocation and deallocation routines 28 */ 29 30/* 31 * The free blocks are managed by bitmaps. A file system contains several 32 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap 33 * block for inodes, N blocks for the inode table and data blocks. 34 * 35 * The file system contains group descriptors which are located after the 36 * super block. Each descriptor contains the number of the bitmap block and 37 * the free blocks count in the block. The descriptors are loaded in memory 38 * when a file system is mounted (see ext3_fill_super). 39 */ 40 41 42#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 43 44/* 45 * Calculate the block group number and offset, given a block number 46 */ 47static void ext3_get_group_no_and_offset(struct super_block *sb, 48 ext3_fsblk_t blocknr, unsigned long *blockgrpp, ext3_grpblk_t *offsetp) 49{ 50 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 51 52 blocknr = blocknr - le32_to_cpu(es->s_first_data_block); 53 if (offsetp) 54 *offsetp = blocknr % EXT3_BLOCKS_PER_GROUP(sb); 55 if (blockgrpp) 56 *blockgrpp = blocknr / EXT3_BLOCKS_PER_GROUP(sb); 57} 58 59/** 60 * ext3_get_group_desc() -- load group descriptor from disk 61 * @sb: super block 62 * @block_group: given block group 63 * @bh: pointer to the buffer head to store the block 64 * group descriptor 65 */ 66struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, 67 unsigned int block_group, 68 struct buffer_head ** bh) 69{ 70 unsigned long group_desc; 71 unsigned long offset; 72 struct ext3_group_desc * desc; 73 struct ext3_sb_info *sbi = EXT3_SB(sb); 74 75 if (block_group >= sbi->s_groups_count) { 76 ext3_error (sb, "ext3_get_group_desc", 77 "block_group >= groups_count - " 78 "block_group = %d, groups_count = %lu", 79 block_group, sbi->s_groups_count); 80 81 return NULL; 82 } 83 smp_rmb(); 84 85 group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb); 86 offset = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1); 87 if (!sbi->s_group_desc[group_desc]) { 88 ext3_error (sb, "ext3_get_group_desc", 89 "Group descriptor not loaded - " 90 "block_group = %d, group_desc = %lu, desc = %lu", 91 block_group, group_desc, offset); 92 return NULL; 93 } 94 95 desc = (struct ext3_group_desc *) sbi->s_group_desc[group_desc]->b_data; 96 if (bh) 97 *bh = sbi->s_group_desc[group_desc]; 98 return desc + offset; 99} 100 101static int ext3_valid_block_bitmap(struct super_block *sb, 102 struct ext3_group_desc *desc, 103 unsigned int block_group, 104 struct buffer_head *bh) 105{ 106 ext3_grpblk_t offset; 107 ext3_grpblk_t next_zero_bit; 108 ext3_fsblk_t bitmap_blk; 109 ext3_fsblk_t group_first_block; 110 111 group_first_block = ext3_group_first_block_no(sb, block_group); 112 113 /* check whether block bitmap block number is set */ 114 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); 115 offset = bitmap_blk - group_first_block; 116 if (!ext3_test_bit(offset, bh->b_data)) 117 /* bad block bitmap */ 118 goto err_out; 119 120 /* check whether the inode bitmap block number is set */ 121 bitmap_blk = le32_to_cpu(desc->bg_inode_bitmap); 122 offset = bitmap_blk - group_first_block; 123 if (!ext3_test_bit(offset, bh->b_data)) 124 /* bad block bitmap */ 125 goto err_out; 126 127 /* check whether the inode table block number is set */ 128 bitmap_blk = le32_to_cpu(desc->bg_inode_table); 129 offset = bitmap_blk - group_first_block; 130 next_zero_bit = ext3_find_next_zero_bit(bh->b_data, 131 offset + EXT3_SB(sb)->s_itb_per_group, 132 offset); 133 if (next_zero_bit >= offset + EXT3_SB(sb)->s_itb_per_group) 134 /* good bitmap for inode tables */ 135 return 1; 136 137err_out: 138 ext3_error(sb, __func__, 139 "Invalid block bitmap - " 140 "block_group = %d, block = %lu", 141 block_group, bitmap_blk); 142 return 0; 143} 144 145/** 146 * read_block_bitmap() 147 * @sb: super block 148 * @block_group: given block group 149 * 150 * Read the bitmap for a given block_group,and validate the 151 * bits for block/inode/inode tables are set in the bitmaps 152 * 153 * Return buffer_head on success or NULL in case of failure. 154 */ 155static struct buffer_head * 156read_block_bitmap(struct super_block *sb, unsigned int block_group) 157{ 158 struct ext3_group_desc * desc; 159 struct buffer_head * bh = NULL; 160 ext3_fsblk_t bitmap_blk; 161 162 desc = ext3_get_group_desc(sb, block_group, NULL); 163 if (!desc) 164 return NULL; 165 trace_ext3_read_block_bitmap(sb, block_group); 166 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); 167 bh = sb_getblk(sb, bitmap_blk); 168 if (unlikely(!bh)) { 169 ext3_error(sb, __func__, 170 "Cannot read block bitmap - " 171 "block_group = %d, block_bitmap = %u", 172 block_group, le32_to_cpu(desc->bg_block_bitmap)); 173 return NULL; 174 } 175 if (likely(bh_uptodate_or_lock(bh))) 176 return bh; 177 178 if (bh_submit_read(bh) < 0) { 179 brelse(bh); 180 ext3_error(sb, __func__, 181 "Cannot read block bitmap - " 182 "block_group = %d, block_bitmap = %u", 183 block_group, le32_to_cpu(desc->bg_block_bitmap)); 184 return NULL; 185 } 186 ext3_valid_block_bitmap(sb, desc, block_group, bh); 187 /* 188 * file system mounted not to panic on error, continue with corrupt 189 * bitmap 190 */ 191 return bh; 192} 193/* 194 * The reservation window structure operations 195 * -------------------------------------------- 196 * Operations include: 197 * dump, find, add, remove, is_empty, find_next_reservable_window, etc. 198 * 199 * We use a red-black tree to represent per-filesystem reservation 200 * windows. 201 * 202 */ 203 204/** 205 * __rsv_window_dump() -- Dump the filesystem block allocation reservation map 206 * @rb_root: root of per-filesystem reservation rb tree 207 * @verbose: verbose mode 208 * @fn: function which wishes to dump the reservation map 209 * 210 * If verbose is turned on, it will print the whole block reservation 211 * windows(start, end). Otherwise, it will only print out the "bad" windows, 212 * those windows that overlap with their immediate neighbors. 213 */ 214#if 1 215static void __rsv_window_dump(struct rb_root *root, int verbose, 216 const char *fn) 217{ 218 struct rb_node *n; 219 struct ext3_reserve_window_node *rsv, *prev; 220 int bad; 221 222restart: 223 n = rb_first(root); 224 bad = 0; 225 prev = NULL; 226 227 printk("Block Allocation Reservation Windows Map (%s):\n", fn); 228 while (n) { 229 rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); 230 if (verbose) 231 printk("reservation window 0x%p " 232 "start: %lu, end: %lu\n", 233 rsv, rsv->rsv_start, rsv->rsv_end); 234 if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { 235 printk("Bad reservation %p (start >= end)\n", 236 rsv); 237 bad = 1; 238 } 239 if (prev && prev->rsv_end >= rsv->rsv_start) { 240 printk("Bad reservation %p (prev->end >= start)\n", 241 rsv); 242 bad = 1; 243 } 244 if (bad) { 245 if (!verbose) { 246 printk("Restarting reservation walk in verbose mode\n"); 247 verbose = 1; 248 goto restart; 249 } 250 } 251 n = rb_next(n); 252 prev = rsv; 253 } 254 printk("Window map complete.\n"); 255 BUG_ON(bad); 256} 257#define rsv_window_dump(root, verbose) \ 258 __rsv_window_dump((root), (verbose), __func__) 259#else 260#define rsv_window_dump(root, verbose) do {} while (0) 261#endif 262 263/** 264 * goal_in_my_reservation() 265 * @rsv: inode's reservation window 266 * @grp_goal: given goal block relative to the allocation block group 267 * @group: the current allocation block group 268 * @sb: filesystem super block 269 * 270 * Test if the given goal block (group relative) is within the file's 271 * own block reservation window range. 272 * 273 * If the reservation window is outside the goal allocation group, return 0; 274 * grp_goal (given goal block) could be -1, which means no specific 275 * goal block. In this case, always return 1. 276 * If the goal block is within the reservation window, return 1; 277 * otherwise, return 0; 278 */ 279static int 280goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal, 281 unsigned int group, struct super_block * sb) 282{ 283 ext3_fsblk_t group_first_block, group_last_block; 284 285 group_first_block = ext3_group_first_block_no(sb, group); 286 group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); 287 288 if ((rsv->_rsv_start > group_last_block) || 289 (rsv->_rsv_end < group_first_block)) 290 return 0; 291 if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) 292 || (grp_goal + group_first_block > rsv->_rsv_end))) 293 return 0; 294 return 1; 295} 296 297/** 298 * search_reserve_window() 299 * @rb_root: root of reservation tree 300 * @goal: target allocation block 301 * 302 * Find the reserved window which includes the goal, or the previous one 303 * if the goal is not in any window. 304 * Returns NULL if there are no windows or if all windows start after the goal. 305 */ 306static struct ext3_reserve_window_node * 307search_reserve_window(struct rb_root *root, ext3_fsblk_t goal) 308{ 309 struct rb_node *n = root->rb_node; 310 struct ext3_reserve_window_node *rsv; 311 312 if (!n) 313 return NULL; 314 315 do { 316 rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); 317 318 if (goal < rsv->rsv_start) 319 n = n->rb_left; 320 else if (goal > rsv->rsv_end) 321 n = n->rb_right; 322 else 323 return rsv; 324 } while (n); 325 /* 326 * We've fallen off the end of the tree: the goal wasn't inside 327 * any particular node. OK, the previous node must be to one 328 * side of the interval containing the goal. If it's the RHS, 329 * we need to back up one. 330 */ 331 if (rsv->rsv_start > goal) { 332 n = rb_prev(&rsv->rsv_node); 333 rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); 334 } 335 return rsv; 336} 337 338/** 339 * ext3_rsv_window_add() -- Insert a window to the block reservation rb tree. 340 * @sb: super block 341 * @rsv: reservation window to add 342 * 343 * Must be called with rsv_lock hold. 344 */ 345void ext3_rsv_window_add(struct super_block *sb, 346 struct ext3_reserve_window_node *rsv) 347{ 348 struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root; 349 struct rb_node *node = &rsv->rsv_node; 350 ext3_fsblk_t start = rsv->rsv_start; 351 352 struct rb_node ** p = &root->rb_node; 353 struct rb_node * parent = NULL; 354 struct ext3_reserve_window_node *this; 355 356 trace_ext3_rsv_window_add(sb, rsv); 357 while (*p) 358 { 359 parent = *p; 360 this = rb_entry(parent, struct ext3_reserve_window_node, rsv_node); 361 362 if (start < this->rsv_start) 363 p = &(*p)->rb_left; 364 else if (start > this->rsv_end) 365 p = &(*p)->rb_right; 366 else { 367 rsv_window_dump(root, 1); 368 BUG(); 369 } 370 } 371 372 rb_link_node(node, parent, p); 373 rb_insert_color(node, root); 374} 375 376/** 377 * ext3_rsv_window_remove() -- unlink a window from the reservation rb tree 378 * @sb: super block 379 * @rsv: reservation window to remove 380 * 381 * Mark the block reservation window as not allocated, and unlink it 382 * from the filesystem reservation window rb tree. Must be called with 383 * rsv_lock hold. 384 */ 385static void rsv_window_remove(struct super_block *sb, 386 struct ext3_reserve_window_node *rsv) 387{ 388 rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 389 rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 390 rsv->rsv_alloc_hit = 0; 391 rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root); 392} 393 394/* 395 * rsv_is_empty() -- Check if the reservation window is allocated. 396 * @rsv: given reservation window to check 397 * 398 * returns 1 if the end block is EXT3_RESERVE_WINDOW_NOT_ALLOCATED. 399 */ 400static inline int rsv_is_empty(struct ext3_reserve_window *rsv) 401{ 402 /* a valid reservation end block could not be 0 */ 403 return rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 404} 405 406/** 407 * ext3_init_block_alloc_info() 408 * @inode: file inode structure 409 * 410 * Allocate and initialize the reservation window structure, and 411 * link the window to the ext3 inode structure at last 412 * 413 * The reservation window structure is only dynamically allocated 414 * and linked to ext3 inode the first time the open file 415 * needs a new block. So, before every ext3_new_block(s) call, for 416 * regular files, we should check whether the reservation window 417 * structure exists or not. In the latter case, this function is called. 418 * Fail to do so will result in block reservation being turned off for that 419 * open file. 420 * 421 * This function is called from ext3_get_blocks_handle(), also called 422 * when setting the reservation window size through ioctl before the file 423 * is open for write (needs block allocation). 424 * 425 * Needs truncate_mutex protection prior to call this function. 426 */ 427void ext3_init_block_alloc_info(struct inode *inode) 428{ 429 struct ext3_inode_info *ei = EXT3_I(inode); 430 struct ext3_block_alloc_info *block_i; 431 struct super_block *sb = inode->i_sb; 432 433 block_i = kmalloc(sizeof(*block_i), GFP_NOFS); 434 if (block_i) { 435 struct ext3_reserve_window_node *rsv = &block_i->rsv_window_node; 436 437 rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 438 rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 439 440 /* 441 * if filesystem is mounted with NORESERVATION, the goal 442 * reservation window size is set to zero to indicate 443 * block reservation is off 444 */ 445 if (!test_opt(sb, RESERVATION)) 446 rsv->rsv_goal_size = 0; 447 else 448 rsv->rsv_goal_size = EXT3_DEFAULT_RESERVE_BLOCKS; 449 rsv->rsv_alloc_hit = 0; 450 block_i->last_alloc_logical_block = 0; 451 block_i->last_alloc_physical_block = 0; 452 } 453 ei->i_block_alloc_info = block_i; 454} 455 456/** 457 * ext3_discard_reservation() 458 * @inode: inode 459 * 460 * Discard(free) block reservation window on last file close, or truncate 461 * or at last iput(). 462 * 463 * It is being called in three cases: 464 * ext3_release_file(): last writer close the file 465 * ext3_clear_inode(): last iput(), when nobody link to this file. 466 * ext3_truncate(): when the block indirect map is about to change. 467 * 468 */ 469void ext3_discard_reservation(struct inode *inode) 470{ 471 struct ext3_inode_info *ei = EXT3_I(inode); 472 struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info; 473 struct ext3_reserve_window_node *rsv; 474 spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; 475 476 if (!block_i) 477 return; 478 479 rsv = &block_i->rsv_window_node; 480 if (!rsv_is_empty(&rsv->rsv_window)) { 481 spin_lock(rsv_lock); 482 if (!rsv_is_empty(&rsv->rsv_window)) { 483 trace_ext3_discard_reservation(inode, rsv); 484 rsv_window_remove(inode->i_sb, rsv); 485 } 486 spin_unlock(rsv_lock); 487 } 488} 489 490/** 491 * ext3_free_blocks_sb() -- Free given blocks and update quota 492 * @handle: handle to this transaction 493 * @sb: super block 494 * @block: start physcial block to free 495 * @count: number of blocks to free 496 * @pdquot_freed_blocks: pointer to quota 497 */ 498void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb, 499 ext3_fsblk_t block, unsigned long count, 500 unsigned long *pdquot_freed_blocks) 501{ 502 struct buffer_head *bitmap_bh = NULL; 503 struct buffer_head *gd_bh; 504 unsigned long block_group; 505 ext3_grpblk_t bit; 506 unsigned long i; 507 unsigned long overflow; 508 struct ext3_group_desc * desc; 509 struct ext3_super_block * es; 510 struct ext3_sb_info *sbi; 511 int err = 0, ret; 512 ext3_grpblk_t group_freed; 513 514 *pdquot_freed_blocks = 0; 515 sbi = EXT3_SB(sb); 516 es = sbi->s_es; 517 if (block < le32_to_cpu(es->s_first_data_block) || 518 block + count < block || 519 block + count > le32_to_cpu(es->s_blocks_count)) { 520 ext3_error (sb, "ext3_free_blocks", 521 "Freeing blocks not in datazone - " 522 "block = "E3FSBLK", count = %lu", block, count); 523 goto error_return; 524 } 525 526 ext3_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1); 527 528do_more: 529 overflow = 0; 530 block_group = (block - le32_to_cpu(es->s_first_data_block)) / 531 EXT3_BLOCKS_PER_GROUP(sb); 532 bit = (block - le32_to_cpu(es->s_first_data_block)) % 533 EXT3_BLOCKS_PER_GROUP(sb); 534 /* 535 * Check to see if we are freeing blocks across a group 536 * boundary. 537 */ 538 if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) { 539 overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb); 540 count -= overflow; 541 } 542 brelse(bitmap_bh); 543 bitmap_bh = read_block_bitmap(sb, block_group); 544 if (!bitmap_bh) 545 goto error_return; 546 desc = ext3_get_group_desc (sb, block_group, &gd_bh); 547 if (!desc) 548 goto error_return; 549 550 if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) || 551 in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) || 552 in_range (block, le32_to_cpu(desc->bg_inode_table), 553 sbi->s_itb_per_group) || 554 in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table), 555 sbi->s_itb_per_group)) { 556 ext3_error (sb, "ext3_free_blocks", 557 "Freeing blocks in system zones - " 558 "Block = "E3FSBLK", count = %lu", 559 block, count); 560 goto error_return; 561 } 562 563 /* 564 * We are about to start releasing blocks in the bitmap, 565 * so we need undo access. 566 */ 567 /* @@@ check errors */ 568 BUFFER_TRACE(bitmap_bh, "getting undo access"); 569 err = ext3_journal_get_undo_access(handle, bitmap_bh); 570 if (err) 571 goto error_return; 572 573 /* 574 * We are about to modify some metadata. Call the journal APIs 575 * to unshare ->b_data if a currently-committing transaction is 576 * using it 577 */ 578 BUFFER_TRACE(gd_bh, "get_write_access"); 579 err = ext3_journal_get_write_access(handle, gd_bh); 580 if (err) 581 goto error_return; 582 583 jbd_lock_bh_state(bitmap_bh); 584 585 for (i = 0, group_freed = 0; i < count; i++) { 586 /* 587 * An HJ special. This is expensive... 588 */ 589#ifdef CONFIG_JBD_DEBUG 590 jbd_unlock_bh_state(bitmap_bh); 591 { 592 struct buffer_head *debug_bh; 593 debug_bh = sb_find_get_block(sb, block + i); 594 if (debug_bh) { 595 BUFFER_TRACE(debug_bh, "Deleted!"); 596 if (!bh2jh(bitmap_bh)->b_committed_data) 597 BUFFER_TRACE(debug_bh, 598 "No committed data in bitmap"); 599 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap"); 600 __brelse(debug_bh); 601 } 602 } 603 jbd_lock_bh_state(bitmap_bh); 604#endif 605 if (need_resched()) { 606 jbd_unlock_bh_state(bitmap_bh); 607 cond_resched(); 608 jbd_lock_bh_state(bitmap_bh); 609 } 610 /* @@@ This prevents newly-allocated data from being 611 * freed and then reallocated within the same 612 * transaction. 613 * 614 * Ideally we would want to allow that to happen, but to 615 * do so requires making journal_forget() capable of 616 * revoking the queued write of a data block, which 617 * implies blocking on the journal lock. *forget() 618 * cannot block due to truncate races. 619 * 620 * Eventually we can fix this by making journal_forget() 621 * return a status indicating whether or not it was able 622 * to revoke the buffer. On successful revoke, it is 623 * safe not to set the allocation bit in the committed 624 * bitmap, because we know that there is no outstanding 625 * activity on the buffer any more and so it is safe to 626 * reallocate it. 627 */ 628 BUFFER_TRACE(bitmap_bh, "set in b_committed_data"); 629 J_ASSERT_BH(bitmap_bh, 630 bh2jh(bitmap_bh)->b_committed_data != NULL); 631 ext3_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i, 632 bh2jh(bitmap_bh)->b_committed_data); 633 634 /* 635 * We clear the bit in the bitmap after setting the committed 636 * data bit, because this is the reverse order to that which 637 * the allocator uses. 638 */ 639 BUFFER_TRACE(bitmap_bh, "clear bit"); 640 if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 641 bit + i, bitmap_bh->b_data)) { 642 jbd_unlock_bh_state(bitmap_bh); 643 ext3_error(sb, __func__, 644 "bit already cleared for block "E3FSBLK, 645 block + i); 646 jbd_lock_bh_state(bitmap_bh); 647 BUFFER_TRACE(bitmap_bh, "bit already cleared"); 648 } else { 649 group_freed++; 650 } 651 } 652 jbd_unlock_bh_state(bitmap_bh); 653 654 spin_lock(sb_bgl_lock(sbi, block_group)); 655 le16_add_cpu(&desc->bg_free_blocks_count, group_freed); 656 spin_unlock(sb_bgl_lock(sbi, block_group)); 657 percpu_counter_add(&sbi->s_freeblocks_counter, count); 658 659 /* We dirtied the bitmap block */ 660 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 661 err = ext3_journal_dirty_metadata(handle, bitmap_bh); 662 663 /* And the group descriptor block */ 664 BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); 665 ret = ext3_journal_dirty_metadata(handle, gd_bh); 666 if (!err) err = ret; 667 *pdquot_freed_blocks += group_freed; 668 669 if (overflow && !err) { 670 block += count; 671 count = overflow; 672 goto do_more; 673 } 674 675error_return: 676 brelse(bitmap_bh); 677 ext3_std_error(sb, err); 678 return; 679} 680 681/** 682 * ext3_free_blocks() -- Free given blocks and update quota 683 * @handle: handle for this transaction 684 * @inode: inode 685 * @block: start physical block to free 686 * @count: number of blocks to count 687 */ 688void ext3_free_blocks(handle_t *handle, struct inode *inode, 689 ext3_fsblk_t block, unsigned long count) 690{ 691 struct super_block *sb = inode->i_sb; 692 unsigned long dquot_freed_blocks; 693 694 trace_ext3_free_blocks(inode, block, count); 695 ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); 696 if (dquot_freed_blocks) 697 dquot_free_block(inode, dquot_freed_blocks); 698 return; 699} 700 701/** 702 * ext3_test_allocatable() 703 * @nr: given allocation block group 704 * @bh: bufferhead contains the bitmap of the given block group 705 * 706 * For ext3 allocations, we must not reuse any blocks which are 707 * allocated in the bitmap buffer's "last committed data" copy. This 708 * prevents deletes from freeing up the page for reuse until we have 709 * committed the delete transaction. 710 * 711 * If we didn't do this, then deleting something and reallocating it as 712 * data would allow the old block to be overwritten before the 713 * transaction committed (because we force data to disk before commit). 714 * This would lead to corruption if we crashed between overwriting the 715 * data and committing the delete. 716 * 717 * @@@ We may want to make this allocation behaviour conditional on 718 * data-writes at some point, and disable it for metadata allocations or 719 * sync-data inodes. 720 */ 721static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh) 722{ 723 int ret; 724 struct journal_head *jh = bh2jh(bh); 725 726 if (ext3_test_bit(nr, bh->b_data)) 727 return 0; 728 729 jbd_lock_bh_state(bh); 730 if (!jh->b_committed_data) 731 ret = 1; 732 else 733 ret = !ext3_test_bit(nr, jh->b_committed_data); 734 jbd_unlock_bh_state(bh); 735 return ret; 736} 737 738/** 739 * bitmap_search_next_usable_block() 740 * @start: the starting block (group relative) of the search 741 * @bh: bufferhead contains the block group bitmap 742 * @maxblocks: the ending block (group relative) of the reservation 743 * 744 * The bitmap search --- search forward alternately through the actual 745 * bitmap on disk and the last-committed copy in journal, until we find a 746 * bit free in both bitmaps. 747 */ 748static ext3_grpblk_t 749bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, 750 ext3_grpblk_t maxblocks) 751{ 752 ext3_grpblk_t next; 753 struct journal_head *jh = bh2jh(bh); 754 755 while (start < maxblocks) { 756 next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start); 757 if (next >= maxblocks) 758 return -1; 759 if (ext3_test_allocatable(next, bh)) 760 return next; 761 jbd_lock_bh_state(bh); 762 if (jh->b_committed_data) 763 start = ext3_find_next_zero_bit(jh->b_committed_data, 764 maxblocks, next); 765 jbd_unlock_bh_state(bh); 766 } 767 return -1; 768} 769 770/** 771 * find_next_usable_block() 772 * @start: the starting block (group relative) to find next 773 * allocatable block in bitmap. 774 * @bh: bufferhead contains the block group bitmap 775 * @maxblocks: the ending block (group relative) for the search 776 * 777 * Find an allocatable block in a bitmap. We honor both the bitmap and 778 * its last-committed copy (if that exists), and perform the "most 779 * appropriate allocation" algorithm of looking for a free block near 780 * the initial goal; then for a free byte somewhere in the bitmap; then 781 * for any free bit in the bitmap. 782 */ 783static ext3_grpblk_t 784find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, 785 ext3_grpblk_t maxblocks) 786{ 787 ext3_grpblk_t here, next; 788 char *p, *r; 789 790 if (start > 0) { 791 /* 792 * The goal was occupied; search forward for a free 793 * block within the next XX blocks. 794 * 795 * end_goal is more or less random, but it has to be 796 * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the 797 * next 64-bit boundary is simple.. 798 */ 799 ext3_grpblk_t end_goal = (start + 63) & ~63; 800 if (end_goal > maxblocks) 801 end_goal = maxblocks; 802 here = ext3_find_next_zero_bit(bh->b_data, end_goal, start); 803 if (here < end_goal && ext3_test_allocatable(here, bh)) 804 return here; 805 ext3_debug("Bit not found near goal\n"); 806 } 807 808 here = start; 809 if (here < 0) 810 here = 0; 811 812 p = bh->b_data + (here >> 3); 813 r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); 814 next = (r - bh->b_data) << 3; 815 816 if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh)) 817 return next; 818 819 /* 820 * The bitmap search --- search forward alternately through the actual 821 * bitmap and the last-committed copy until we find a bit free in 822 * both 823 */ 824 here = bitmap_search_next_usable_block(here, bh, maxblocks); 825 return here; 826} 827 828/** 829 * claim_block() 830 * @lock: the spin lock for this block group 831 * @block: the free block (group relative) to allocate 832 * @bh: the buffer_head contains the block group bitmap 833 * 834 * We think we can allocate this block in this bitmap. Try to set the bit. 835 * If that succeeds then check that nobody has allocated and then freed the 836 * block since we saw that is was not marked in b_committed_data. If it _was_ 837 * allocated and freed then clear the bit in the bitmap again and return 838 * zero (failure). 839 */ 840static inline int 841claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh) 842{ 843 struct journal_head *jh = bh2jh(bh); 844 int ret; 845 846 if (ext3_set_bit_atomic(lock, block, bh->b_data)) 847 return 0; 848 jbd_lock_bh_state(bh); 849 if (jh->b_committed_data && ext3_test_bit(block,jh->b_committed_data)) { 850 ext3_clear_bit_atomic(lock, block, bh->b_data); 851 ret = 0; 852 } else { 853 ret = 1; 854 } 855 jbd_unlock_bh_state(bh); 856 return ret; 857} 858 859/** 860 * ext3_try_to_allocate() 861 * @sb: superblock 862 * @handle: handle to this transaction 863 * @group: given allocation block group 864 * @bitmap_bh: bufferhead holds the block bitmap 865 * @grp_goal: given target block within the group 866 * @count: target number of blocks to allocate 867 * @my_rsv: reservation window 868 * 869 * Attempt to allocate blocks within a give range. Set the range of allocation 870 * first, then find the first free bit(s) from the bitmap (within the range), 871 * and at last, allocate the blocks by claiming the found free bit as allocated. 872 * 873 * To set the range of this allocation: 874 * if there is a reservation window, only try to allocate block(s) from the 875 * file's own reservation window; 876 * Otherwise, the allocation range starts from the give goal block, ends at 877 * the block group's last block. 878 * 879 * If we failed to allocate the desired block then we may end up crossing to a 880 * new bitmap. In that case we must release write access to the old one via 881 * ext3_journal_release_buffer(), else we'll run out of credits. 882 */ 883static ext3_grpblk_t 884ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group, 885 struct buffer_head *bitmap_bh, ext3_grpblk_t grp_goal, 886 unsigned long *count, struct ext3_reserve_window *my_rsv) 887{ 888 ext3_fsblk_t group_first_block; 889 ext3_grpblk_t start, end; 890 unsigned long num = 0; 891 892 /* we do allocation within the reservation window if we have a window */ 893 if (my_rsv) { 894 group_first_block = ext3_group_first_block_no(sb, group); 895 if (my_rsv->_rsv_start >= group_first_block) 896 start = my_rsv->_rsv_start - group_first_block; 897 else 898 /* reservation window cross group boundary */ 899 start = 0; 900 end = my_rsv->_rsv_end - group_first_block + 1; 901 if (end > EXT3_BLOCKS_PER_GROUP(sb)) 902 /* reservation window crosses group boundary */ 903 end = EXT3_BLOCKS_PER_GROUP(sb); 904 if ((start <= grp_goal) && (grp_goal < end)) 905 start = grp_goal; 906 else 907 grp_goal = -1; 908 } else { 909 if (grp_goal > 0) 910 start = grp_goal; 911 else 912 start = 0; 913 end = EXT3_BLOCKS_PER_GROUP(sb); 914 } 915 916 BUG_ON(start > EXT3_BLOCKS_PER_GROUP(sb)); 917 918repeat: 919 if (grp_goal < 0 || !ext3_test_allocatable(grp_goal, bitmap_bh)) { 920 grp_goal = find_next_usable_block(start, bitmap_bh, end); 921 if (grp_goal < 0) 922 goto fail_access; 923 if (!my_rsv) { 924 int i; 925 926 for (i = 0; i < 7 && grp_goal > start && 927 ext3_test_allocatable(grp_goal - 1, 928 bitmap_bh); 929 i++, grp_goal--) 930 ; 931 } 932 } 933 start = grp_goal; 934 935 if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), 936 grp_goal, bitmap_bh)) { 937 /* 938 * The block was allocated by another thread, or it was 939 * allocated and then freed by another thread 940 */ 941 start++; 942 grp_goal++; 943 if (start >= end) 944 goto fail_access; 945 goto repeat; 946 } 947 num++; 948 grp_goal++; 949 while (num < *count && grp_goal < end 950 && ext3_test_allocatable(grp_goal, bitmap_bh) 951 && claim_block(sb_bgl_lock(EXT3_SB(sb), group), 952 grp_goal, bitmap_bh)) { 953 num++; 954 grp_goal++; 955 } 956 *count = num; 957 return grp_goal - num; 958fail_access: 959 *count = num; 960 return -1; 961} 962 963/** 964 * find_next_reservable_window(): 965 * find a reservable space within the given range. 966 * It does not allocate the reservation window for now: 967 * alloc_new_reservation() will do the work later. 968 * 969 * @search_head: the head of the searching list; 970 * This is not necessarily the list head of the whole filesystem 971 * 972 * We have both head and start_block to assist the search 973 * for the reservable space. The list starts from head, 974 * but we will shift to the place where start_block is, 975 * then start from there, when looking for a reservable space. 976 * 977 * @my_rsv: the reservation window 978 * 979 * @sb: the super block 980 * 981 * @start_block: the first block we consider to start 982 * the real search from 983 * 984 * @last_block: 985 * the maximum block number that our goal reservable space 986 * could start from. This is normally the last block in this 987 * group. The search will end when we found the start of next 988 * possible reservable space is out of this boundary. 989 * This could handle the cross boundary reservation window 990 * request. 991 * 992 * basically we search from the given range, rather than the whole 993 * reservation double linked list, (start_block, last_block) 994 * to find a free region that is of my size and has not 995 * been reserved. 996 * 997 */ 998static int find_next_reservable_window( 999 struct ext3_reserve_window_node *search_head, 1000 struct ext3_reserve_window_node *my_rsv, 1001 struct super_block * sb, 1002 ext3_fsblk_t start_block, 1003 ext3_fsblk_t last_block) 1004{ 1005 struct rb_node *next; 1006 struct ext3_reserve_window_node *rsv, *prev; 1007 ext3_fsblk_t cur; 1008 int size = my_rsv->rsv_goal_size; 1009 1010 /* TODO: make the start of the reservation window byte-aligned */ 1011 /* cur = *start_block & ~7;*/ 1012 cur = start_block; 1013 rsv = search_head; 1014 if (!rsv) 1015 return -1; 1016 1017 while (1) { 1018 if (cur <= rsv->rsv_end) 1019 cur = rsv->rsv_end + 1; 1020 1021 /* TODO? 1022 * in the case we could not find a reservable space 1023 * that is what is expected, during the re-search, we could 1024 * remember what's the largest reservable space we could have 1025 * and return that one. 1026 * 1027 * For now it will fail if we could not find the reservable 1028 * space with expected-size (or more)... 1029 */ 1030 if (cur > last_block) 1031 return -1; /* fail */ 1032 1033 prev = rsv; 1034 next = rb_next(&rsv->rsv_node); 1035 rsv = rb_entry(next,struct ext3_reserve_window_node,rsv_node); 1036 1037 /* 1038 * Reached the last reservation, we can just append to the 1039 * previous one. 1040 */ 1041 if (!next) 1042 break; 1043 1044 if (cur + size <= rsv->rsv_start) { 1045 /* 1046 * Found a reserveable space big enough. We could 1047 * have a reservation across the group boundary here 1048 */ 1049 break; 1050 } 1051 } 1052 /* 1053 * we come here either : 1054 * when we reach the end of the whole list, 1055 * and there is empty reservable space after last entry in the list. 1056 * append it to the end of the list. 1057 * 1058 * or we found one reservable space in the middle of the list, 1059 * return the reservation window that we could append to. 1060 * succeed. 1061 */ 1062 1063 if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) 1064 rsv_window_remove(sb, my_rsv); 1065 1066 /* 1067 * Let's book the whole available window for now. We will check the 1068 * disk bitmap later and then, if there are free blocks then we adjust 1069 * the window size if it's larger than requested. 1070 * Otherwise, we will remove this node from the tree next time 1071 * call find_next_reservable_window. 1072 */ 1073 my_rsv->rsv_start = cur; 1074 my_rsv->rsv_end = cur + size - 1; 1075 my_rsv->rsv_alloc_hit = 0; 1076 1077 if (prev != my_rsv) 1078 ext3_rsv_window_add(sb, my_rsv); 1079 1080 return 0; 1081} 1082 1083/** 1084 * alloc_new_reservation()--allocate a new reservation window 1085 * 1086 * To make a new reservation, we search part of the filesystem 1087 * reservation list (the list that inside the group). We try to 1088 * allocate a new reservation window near the allocation goal, 1089 * or the beginning of the group, if there is no goal. 1090 * 1091 * We first find a reservable space after the goal, then from 1092 * there, we check the bitmap for the first free block after 1093 * it. If there is no free block until the end of group, then the 1094 * whole group is full, we failed. Otherwise, check if the free 1095 * block is inside the expected reservable space, if so, we 1096 * succeed. 1097 * If the first free block is outside the reservable space, then 1098 * start from the first free block, we search for next available 1099 * space, and go on. 1100 * 1101 * on succeed, a new reservation will be found and inserted into the list 1102 * It contains at least one free block, and it does not overlap with other 1103 * reservation windows. 1104 * 1105 * failed: we failed to find a reservation window in this group 1106 * 1107 * @my_rsv: the reservation window 1108 * 1109 * @grp_goal: The goal (group-relative). It is where the search for a 1110 * free reservable space should start from. 1111 * if we have a grp_goal(grp_goal >0 ), then start from there, 1112 * no grp_goal(grp_goal = -1), we start from the first block 1113 * of the group. 1114 * 1115 * @sb: the super block 1116 * @group: the group we are trying to allocate in 1117 * @bitmap_bh: the block group block bitmap 1118 * 1119 */ 1120static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, 1121 ext3_grpblk_t grp_goal, struct super_block *sb, 1122 unsigned int group, struct buffer_head *bitmap_bh) 1123{ 1124 struct ext3_reserve_window_node *search_head; 1125 ext3_fsblk_t group_first_block, group_end_block, start_block; 1126 ext3_grpblk_t first_free_block; 1127 struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root; 1128 unsigned long size; 1129 int ret; 1130 spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; 1131 1132 group_first_block = ext3_group_first_block_no(sb, group); 1133 group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); 1134 1135 if (grp_goal < 0) 1136 start_block = group_first_block; 1137 else 1138 start_block = grp_goal + group_first_block; 1139 1140 trace_ext3_alloc_new_reservation(sb, start_block); 1141 size = my_rsv->rsv_goal_size; 1142 1143 if (!rsv_is_empty(&my_rsv->rsv_window)) { 1144 /* 1145 * if the old reservation is cross group boundary 1146 * and if the goal is inside the old reservation window, 1147 * we will come here when we just failed to allocate from 1148 * the first part of the window. We still have another part 1149 * that belongs to the next group. In this case, there is no 1150 * point to discard our window and try to allocate a new one 1151 * in this group(which will fail). we should 1152 * keep the reservation window, just simply move on. 1153 * 1154 * Maybe we could shift the start block of the reservation 1155 * window to the first block of next group. 1156 */ 1157 1158 if ((my_rsv->rsv_start <= group_end_block) && 1159 (my_rsv->rsv_end > group_end_block) && 1160 (start_block >= my_rsv->rsv_start)) 1161 return -1; 1162 1163 if ((my_rsv->rsv_alloc_hit > 1164 (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { 1165 /* 1166 * if the previously allocation hit ratio is 1167 * greater than 1/2, then we double the size of 1168 * the reservation window the next time, 1169 * otherwise we keep the same size window 1170 */ 1171 size = size * 2; 1172 if (size > EXT3_MAX_RESERVE_BLOCKS) 1173 size = EXT3_MAX_RESERVE_BLOCKS; 1174 my_rsv->rsv_goal_size= size; 1175 } 1176 } 1177 1178 spin_lock(rsv_lock); 1179 /* 1180 * shift the search start to the window near the goal block 1181 */ 1182 search_head = search_reserve_window(fs_rsv_root, start_block); 1183 1184 /* 1185 * find_next_reservable_window() simply finds a reservable window 1186 * inside the given range(start_block, group_end_block). 1187 * 1188 * To make sure the reservation window has a free bit inside it, we 1189 * need to check the bitmap after we found a reservable window. 1190 */ 1191retry: 1192 ret = find_next_reservable_window(search_head, my_rsv, sb, 1193 start_block, group_end_block); 1194 1195 if (ret == -1) { 1196 if (!rsv_is_empty(&my_rsv->rsv_window)) 1197 rsv_window_remove(sb, my_rsv); 1198 spin_unlock(rsv_lock); 1199 return -1; 1200 } 1201 1202 /* 1203 * On success, find_next_reservable_window() returns the 1204 * reservation window where there is a reservable space after it. 1205 * Before we reserve this reservable space, we need 1206 * to make sure there is at least a free block inside this region. 1207 * 1208 * searching the first free bit on the block bitmap and copy of 1209 * last committed bitmap alternatively, until we found a allocatable 1210 * block. Search start from the start block of the reservable space 1211 * we just found. 1212 */ 1213 spin_unlock(rsv_lock); 1214 first_free_block = bitmap_search_next_usable_block( 1215 my_rsv->rsv_start - group_first_block, 1216 bitmap_bh, group_end_block - group_first_block + 1); 1217 1218 if (first_free_block < 0) { 1219 /* 1220 * no free block left on the bitmap, no point 1221 * to reserve the space. return failed. 1222 */ 1223 spin_lock(rsv_lock); 1224 if (!rsv_is_empty(&my_rsv->rsv_window)) 1225 rsv_window_remove(sb, my_rsv); 1226 spin_unlock(rsv_lock); 1227 return -1; /* failed */ 1228 } 1229 1230 start_block = first_free_block + group_first_block; 1231 /* 1232 * check if the first free block is within the 1233 * free space we just reserved 1234 */ 1235 if (start_block >= my_rsv->rsv_start && 1236 start_block <= my_rsv->rsv_end) { 1237 trace_ext3_reserved(sb, start_block, my_rsv); 1238 return 0; /* success */ 1239 } 1240 /* 1241 * if the first free bit we found is out of the reservable space 1242 * continue search for next reservable space, 1243 * start from where the free block is, 1244 * we also shift the list head to where we stopped last time 1245 */ 1246 search_head = my_rsv; 1247 spin_lock(rsv_lock); 1248 goto retry; 1249} 1250 1251/** 1252 * try_to_extend_reservation() 1253 * @my_rsv: given reservation window 1254 * @sb: super block 1255 * @size: the delta to extend 1256 * 1257 * Attempt to expand the reservation window large enough to have 1258 * required number of free blocks 1259 * 1260 * Since ext3_try_to_allocate() will always allocate blocks within 1261 * the reservation window range, if the window size is too small, 1262 * multiple blocks allocation has to stop at the end of the reservation 1263 * window. To make this more efficient, given the total number of 1264 * blocks needed and the current size of the window, we try to 1265 * expand the reservation window size if necessary on a best-effort 1266 * basis before ext3_new_blocks() tries to allocate blocks, 1267 */ 1268static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv, 1269 struct super_block *sb, int size) 1270{ 1271 struct ext3_reserve_window_node *next_rsv; 1272 struct rb_node *next; 1273 spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; 1274 1275 if (!spin_trylock(rsv_lock)) 1276 return; 1277 1278 next = rb_next(&my_rsv->rsv_node); 1279 1280 if (!next) 1281 my_rsv->rsv_end += size; 1282 else { 1283 next_rsv = rb_entry(next, struct ext3_reserve_window_node, rsv_node); 1284 1285 if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) 1286 my_rsv->rsv_end += size; 1287 else 1288 my_rsv->rsv_end = next_rsv->rsv_start - 1; 1289 } 1290 spin_unlock(rsv_lock); 1291} 1292 1293/** 1294 * ext3_try_to_allocate_with_rsv() 1295 * @sb: superblock 1296 * @handle: handle to this transaction 1297 * @group: given allocation block group 1298 * @bitmap_bh: bufferhead holds the block bitmap 1299 * @grp_goal: given target block within the group 1300 * @my_rsv: reservation window 1301 * @count: target number of blocks to allocate 1302 * @errp: pointer to store the error code 1303 * 1304 * This is the main function used to allocate a new block and its reservation 1305 * window. 1306 * 1307 * Each time when a new block allocation is need, first try to allocate from 1308 * its own reservation. If it does not have a reservation window, instead of 1309 * looking for a free bit on bitmap first, then look up the reservation list to 1310 * see if it is inside somebody else's reservation window, we try to allocate a 1311 * reservation window for it starting from the goal first. Then do the block 1312 * allocation within the reservation window. 1313 * 1314 * This will avoid keeping on searching the reservation list again and 1315 * again when somebody is looking for a free block (without 1316 * reservation), and there are lots of free blocks, but they are all 1317 * being reserved. 1318 * 1319 * We use a red-black tree for the per-filesystem reservation list. 1320 * 1321 */ 1322static ext3_grpblk_t 1323ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, 1324 unsigned int group, struct buffer_head *bitmap_bh, 1325 ext3_grpblk_t grp_goal, 1326 struct ext3_reserve_window_node * my_rsv, 1327 unsigned long *count, int *errp) 1328{ 1329 ext3_fsblk_t group_first_block, group_last_block; 1330 ext3_grpblk_t ret = 0; 1331 int fatal; 1332 unsigned long num = *count; 1333 1334 *errp = 0; 1335 1336 /* 1337 * Make sure we use undo access for the bitmap, because it is critical 1338 * that we do the frozen_data COW on bitmap buffers in all cases even 1339 * if the buffer is in BJ_Forget state in the committing transaction. 1340 */ 1341 BUFFER_TRACE(bitmap_bh, "get undo access for new block"); 1342 fatal = ext3_journal_get_undo_access(handle, bitmap_bh); 1343 if (fatal) { 1344 *errp = fatal; 1345 return -1; 1346 } 1347 1348 /* 1349 * we don't deal with reservation when 1350 * filesystem is mounted without reservation 1351 * or the file is not a regular file 1352 * or last attempt to allocate a block with reservation turned on failed 1353 */ 1354 if (my_rsv == NULL ) { 1355 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, 1356 grp_goal, count, NULL); 1357 goto out; 1358 } 1359 /* 1360 * grp_goal is a group relative block number (if there is a goal) 1361 * 0 <= grp_goal < EXT3_BLOCKS_PER_GROUP(sb) 1362 * first block is a filesystem wide block number 1363 * first block is the block number of the first block in this group 1364 */ 1365 group_first_block = ext3_group_first_block_no(sb, group); 1366 group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); 1367 1368 /* 1369 * Basically we will allocate a new block from inode's reservation 1370 * window. 1371 * 1372 * We need to allocate a new reservation window, if: 1373 * a) inode does not have a reservation window; or 1374 * b) last attempt to allocate a block from existing reservation 1375 * failed; or 1376 * c) we come here with a goal and with a reservation window 1377 * 1378 * We do not need to allocate a new reservation window if we come here 1379 * at the beginning with a goal and the goal is inside the window, or 1380 * we don't have a goal but already have a reservation window. 1381 * then we could go to allocate from the reservation window directly. 1382 */ 1383 while (1) { 1384 if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || 1385 !goal_in_my_reservation(&my_rsv->rsv_window, 1386 grp_goal, group, sb)) { 1387 if (my_rsv->rsv_goal_size < *count) 1388 my_rsv->rsv_goal_size = *count; 1389 ret = alloc_new_reservation(my_rsv, grp_goal, sb, 1390 group, bitmap_bh); 1391 if (ret < 0) 1392 break; /* failed */ 1393 1394 if (!goal_in_my_reservation(&my_rsv->rsv_window, 1395 grp_goal, group, sb)) 1396 grp_goal = -1; 1397 } else if (grp_goal >= 0) { 1398 int curr = my_rsv->rsv_end - 1399 (grp_goal + group_first_block) + 1; 1400 1401 if (curr < *count) 1402 try_to_extend_reservation(my_rsv, sb, 1403 *count - curr); 1404 } 1405 1406 if ((my_rsv->rsv_start > group_last_block) || 1407 (my_rsv->rsv_end < group_first_block)) { 1408 rsv_window_dump(&EXT3_SB(sb)->s_rsv_window_root, 1); 1409 BUG(); 1410 } 1411 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, 1412 grp_goal, &num, &my_rsv->rsv_window); 1413 if (ret >= 0) { 1414 my_rsv->rsv_alloc_hit += num; 1415 *count = num; 1416 break; /* succeed */ 1417 } 1418 num = *count; 1419 } 1420out: 1421 if (ret >= 0) { 1422 BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for " 1423 "bitmap block"); 1424 fatal = ext3_journal_dirty_metadata(handle, bitmap_bh); 1425 if (fatal) { 1426 *errp = fatal; 1427 return -1; 1428 } 1429 return ret; 1430 } 1431 1432 BUFFER_TRACE(bitmap_bh, "journal_release_buffer"); 1433 ext3_journal_release_buffer(handle, bitmap_bh); 1434 return ret; 1435} 1436 1437/** 1438 * ext3_has_free_blocks() 1439 * @sbi: in-core super block structure. 1440 * 1441 * Check if filesystem has at least 1 free block available for allocation. 1442 */ 1443static int ext3_has_free_blocks(struct ext3_sb_info *sbi, int use_reservation) 1444{ 1445 ext3_fsblk_t free_blocks, root_blocks; 1446 1447 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 1448 root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); 1449 if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && 1450 !use_reservation && sbi->s_resuid != current_fsuid() && 1451 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { 1452 return 0; 1453 } 1454 return 1; 1455} 1456 1457/** 1458 * ext3_should_retry_alloc() 1459 * @sb: super block 1460 * @retries number of attemps has been made 1461 * 1462 * ext3_should_retry_alloc() is called when ENOSPC is returned, and if 1463 * it is profitable to retry the operation, this function will wait 1464 * for the current or committing transaction to complete, and then 1465 * return TRUE. 1466 * 1467 * if the total number of retries exceed three times, return FALSE. 1468 */ 1469int ext3_should_retry_alloc(struct super_block *sb, int *retries) 1470{ 1471 if (!ext3_has_free_blocks(EXT3_SB(sb), 0) || (*retries)++ > 3) 1472 return 0; 1473 1474 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); 1475 1476 return journal_force_commit_nested(EXT3_SB(sb)->s_journal); 1477} 1478 1479/** 1480 * ext3_new_blocks() -- core block(s) allocation function 1481 * @handle: handle to this transaction 1482 * @inode: file inode 1483 * @goal: given target block(filesystem wide) 1484 * @count: target number of blocks to allocate 1485 * @errp: error code 1486 * 1487 * ext3_new_blocks uses a goal block to assist allocation. It tries to 1488 * allocate block(s) from the block group contains the goal block first. If that 1489 * fails, it will try to allocate block(s) from other block groups without 1490 * any specific goal block. 1491 * 1492 */ 1493ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, 1494 ext3_fsblk_t goal, unsigned long *count, int *errp) 1495{ 1496 struct buffer_head *bitmap_bh = NULL; 1497 struct buffer_head *gdp_bh; 1498 int group_no; 1499 int goal_group; 1500 ext3_grpblk_t grp_target_blk; /* blockgroup relative goal block */ 1501 ext3_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ 1502 ext3_fsblk_t ret_block; /* filesyetem-wide allocated block */ 1503 int bgi; /* blockgroup iteration index */ 1504 int fatal = 0, err; 1505 int performed_allocation = 0; 1506 ext3_grpblk_t free_blocks; /* number of free blocks in a group */ 1507 struct super_block *sb; 1508 struct ext3_group_desc *gdp; 1509 struct ext3_super_block *es; 1510 struct ext3_sb_info *sbi; 1511 struct ext3_reserve_window_node *my_rsv = NULL; 1512 struct ext3_block_alloc_info *block_i; 1513 unsigned short windowsz = 0; 1514#ifdef EXT3FS_DEBUG 1515 static int goal_hits, goal_attempts; 1516#endif 1517 unsigned long ngroups; 1518 unsigned long num = *count; 1519 1520 *errp = -ENOSPC; 1521 sb = inode->i_sb; 1522 1523 /* 1524 * Check quota for allocation of this block. 1525 */ 1526 err = dquot_alloc_block(inode, num); 1527 if (err) { 1528 *errp = err; 1529 return 0; 1530 } 1531 1532 trace_ext3_request_blocks(inode, goal, num); 1533 1534 sbi = EXT3_SB(sb); 1535 es = sbi->s_es; 1536 ext3_debug("goal=%lu.\n", goal); 1537 /* 1538 * Allocate a block from reservation only when 1539 * filesystem is mounted with reservation(default,-o reservation), and 1540 * it's a regular file, and 1541 * the desired window size is greater than 0 (One could use ioctl 1542 * command EXT3_IOC_SETRSVSZ to set the window size to 0 to turn off 1543 * reservation on that particular file) 1544 */ 1545 block_i = EXT3_I(inode)->i_block_alloc_info; 1546 if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) 1547 my_rsv = &block_i->rsv_window_node; 1548 1549 if (!ext3_has_free_blocks(sbi, IS_NOQUOTA(inode))) { 1550 *errp = -ENOSPC; 1551 goto out; 1552 } 1553 1554 /* 1555 * First, test whether the goal block is free. 1556 */ 1557 if (goal < le32_to_cpu(es->s_first_data_block) || 1558 goal >= le32_to_cpu(es->s_blocks_count)) 1559 goal = le32_to_cpu(es->s_first_data_block); 1560 group_no = (goal - le32_to_cpu(es->s_first_data_block)) / 1561 EXT3_BLOCKS_PER_GROUP(sb); 1562 goal_group = group_no; 1563retry_alloc: 1564 gdp = ext3_get_group_desc(sb, group_no, &gdp_bh); 1565 if (!gdp) 1566 goto io_error; 1567 1568 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 1569 /* 1570 * if there is not enough free blocks to make a new resevation 1571 * turn off reservation for this allocation 1572 */ 1573 if (my_rsv && (free_blocks < windowsz) 1574 && (free_blocks > 0) 1575 && (rsv_is_empty(&my_rsv->rsv_window))) 1576 my_rsv = NULL; 1577 1578 if (free_blocks > 0) { 1579 grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) % 1580 EXT3_BLOCKS_PER_GROUP(sb)); 1581 bitmap_bh = read_block_bitmap(sb, group_no); 1582 if (!bitmap_bh) 1583 goto io_error; 1584 grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle, 1585 group_no, bitmap_bh, grp_target_blk, 1586 my_rsv, &num, &fatal); 1587 if (fatal) 1588 goto out; 1589 if (grp_alloc_blk >= 0) 1590 goto allocated; 1591 } 1592 1593 ngroups = EXT3_SB(sb)->s_groups_count; 1594 smp_rmb(); 1595 1596 /* 1597 * Now search the rest of the groups. We assume that 1598 * group_no and gdp correctly point to the last group visited. 1599 */ 1600 for (bgi = 0; bgi < ngroups; bgi++) { 1601 group_no++; 1602 if (group_no >= ngroups) 1603 group_no = 0; 1604 gdp = ext3_get_group_desc(sb, group_no, &gdp_bh); 1605 if (!gdp) 1606 goto io_error; 1607 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 1608 /* 1609 * skip this group (and avoid loading bitmap) if there 1610 * are no free blocks 1611 */ 1612 if (!free_blocks) 1613 continue; 1614 /* 1615 * skip this group if the number of 1616 * free blocks is less than half of the reservation 1617 * window size. 1618 */ 1619 if (my_rsv && (free_blocks <= (windowsz/2))) 1620 continue; 1621 1622 brelse(bitmap_bh); 1623 bitmap_bh = read_block_bitmap(sb, group_no); 1624 if (!bitmap_bh) 1625 goto io_error; 1626 /* 1627 * try to allocate block(s) from this group, without a goal(-1). 1628 */ 1629 grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle, 1630 group_no, bitmap_bh, -1, my_rsv, 1631 &num, &fatal); 1632 if (fatal) 1633 goto out; 1634 if (grp_alloc_blk >= 0) 1635 goto allocated; 1636 } 1637 /* 1638 * We may end up a bogus earlier ENOSPC error due to 1639 * filesystem is "full" of reservations, but 1640 * there maybe indeed free blocks available on disk 1641 * In this case, we just forget about the reservations 1642 * just do block allocation as without reservations. 1643 */ 1644 if (my_rsv) { 1645 my_rsv = NULL; 1646 windowsz = 0; 1647 group_no = goal_group; 1648 goto retry_alloc; 1649 } 1650 /* No space left on the device */ 1651 *errp = -ENOSPC; 1652 goto out; 1653 1654allocated: 1655 1656 ext3_debug("using block group %d(%d)\n", 1657 group_no, gdp->bg_free_blocks_count); 1658 1659 BUFFER_TRACE(gdp_bh, "get_write_access"); 1660 fatal = ext3_journal_get_write_access(handle, gdp_bh); 1661 if (fatal) 1662 goto out; 1663 1664 ret_block = grp_alloc_blk + ext3_group_first_block_no(sb, group_no); 1665 1666 if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) || 1667 in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) || 1668 in_range(ret_block, le32_to_cpu(gdp->bg_inode_table), 1669 EXT3_SB(sb)->s_itb_per_group) || 1670 in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table), 1671 EXT3_SB(sb)->s_itb_per_group)) { 1672 ext3_error(sb, "ext3_new_block", 1673 "Allocating block in system zone - " 1674 "blocks from "E3FSBLK", length %lu", 1675 ret_block, num); 1676 /* 1677 * claim_block() marked the blocks we allocated as in use. So we 1678 * may want to selectively mark some of the blocks as free. 1679 */ 1680 goto retry_alloc; 1681 } 1682 1683 performed_allocation = 1; 1684 1685#ifdef CONFIG_JBD_DEBUG 1686 { 1687 struct buffer_head *debug_bh; 1688 1689 /* Record bitmap buffer state in the newly allocated block */ 1690 debug_bh = sb_find_get_block(sb, ret_block); 1691 if (debug_bh) { 1692 BUFFER_TRACE(debug_bh, "state when allocated"); 1693 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); 1694 brelse(debug_bh); 1695 } 1696 } 1697 jbd_lock_bh_state(bitmap_bh); 1698 spin_lock(sb_bgl_lock(sbi, group_no)); 1699 if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) { 1700 int i; 1701 1702 for (i = 0; i < num; i++) { 1703 if (ext3_test_bit(grp_alloc_blk+i, 1704 bh2jh(bitmap_bh)->b_committed_data)) { 1705 printk("%s: block was unexpectedly set in " 1706 "b_committed_data\n", __func__); 1707 } 1708 } 1709 } 1710 ext3_debug("found bit %d\n", grp_alloc_blk); 1711 spin_unlock(sb_bgl_lock(sbi, group_no)); 1712 jbd_unlock_bh_state(bitmap_bh); 1713#endif 1714 1715 if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) { 1716 ext3_error(sb, "ext3_new_block", 1717 "block("E3FSBLK") >= blocks count(%d) - " 1718 "block_group = %d, es == %p ", ret_block, 1719 le32_to_cpu(es->s_blocks_count), group_no, es); 1720 goto out; 1721 } 1722 1723 /* 1724 * It is up to the caller to add the new buffer to a journal 1725 * list of some description. We don't know in advance whether 1726 * the caller wants to use it as metadata or data. 1727 */ 1728 ext3_debug("allocating block %lu. Goal hits %d of %d.\n", 1729 ret_block, goal_hits, goal_attempts); 1730 1731 spin_lock(sb_bgl_lock(sbi, group_no)); 1732 le16_add_cpu(&gdp->bg_free_blocks_count, -num); 1733 spin_unlock(sb_bgl_lock(sbi, group_no)); 1734 percpu_counter_sub(&sbi->s_freeblocks_counter, num); 1735 1736 BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); 1737 err = ext3_journal_dirty_metadata(handle, gdp_bh); 1738 if (!fatal) 1739 fatal = err; 1740 1741 if (fatal) 1742 goto out; 1743 1744 *errp = 0; 1745 brelse(bitmap_bh); 1746 dquot_free_block(inode, *count-num); 1747 *count = num; 1748 1749 trace_ext3_allocate_blocks(inode, goal, num, 1750 (unsigned long long)ret_block); 1751 1752 return ret_block; 1753 1754io_error: 1755 *errp = -EIO; 1756out: 1757 if (fatal) { 1758 *errp = fatal; 1759 ext3_std_error(sb, fatal); 1760 } 1761 /* 1762 * Undo the block allocation 1763 */ 1764 if (!performed_allocation) 1765 dquot_free_block(inode, *count); 1766 brelse(bitmap_bh); 1767 return 0; 1768} 1769 1770ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode, 1771 ext3_fsblk_t goal, int *errp) 1772{ 1773 unsigned long count = 1; 1774 1775 return ext3_new_blocks(handle, inode, goal, &count, errp); 1776} 1777 1778/** 1779 * ext3_count_free_blocks() -- count filesystem free blocks 1780 * @sb: superblock 1781 * 1782 * Adds up the number of free blocks from each block group. 1783 */ 1784ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb) 1785{ 1786 ext3_fsblk_t desc_count; 1787 struct ext3_group_desc *gdp; 1788 int i; 1789 unsigned long ngroups = EXT3_SB(sb)->s_groups_count; 1790#ifdef EXT3FS_DEBUG 1791 struct ext3_super_block *es; 1792 ext3_fsblk_t bitmap_count; 1793 unsigned long x; 1794 struct buffer_head *bitmap_bh = NULL; 1795 1796 es = EXT3_SB(sb)->s_es; 1797 desc_count = 0; 1798 bitmap_count = 0; 1799 gdp = NULL; 1800 1801 smp_rmb(); 1802 for (i = 0; i < ngroups; i++) { 1803 gdp = ext3_get_group_desc(sb, i, NULL); 1804 if (!gdp) 1805 continue; 1806 desc_count += le16_to_cpu(gdp->bg_free_blocks_count); 1807 brelse(bitmap_bh); 1808 bitmap_bh = read_block_bitmap(sb, i); 1809 if (bitmap_bh == NULL) 1810 continue; 1811 1812 x = ext3_count_free(bitmap_bh, sb->s_blocksize); 1813 printk("group %d: stored = %d, counted = %lu\n", 1814 i, le16_to_cpu(gdp->bg_free_blocks_count), x); 1815 bitmap_count += x; 1816 } 1817 brelse(bitmap_bh); 1818 printk("ext3_count_free_blocks: stored = "E3FSBLK 1819 ", computed = "E3FSBLK", "E3FSBLK"\n", 1820 le32_to_cpu(es->s_free_blocks_count), 1821 desc_count, bitmap_count); 1822 return bitmap_count; 1823#else 1824 desc_count = 0; 1825 smp_rmb(); 1826 for (i = 0; i < ngroups; i++) { 1827 gdp = ext3_get_group_desc(sb, i, NULL); 1828 if (!gdp) 1829 continue; 1830 desc_count += le16_to_cpu(gdp->bg_free_blocks_count); 1831 } 1832 1833 return desc_count; 1834#endif 1835} 1836 1837static inline int test_root(int a, int b) 1838{ 1839 int num = b; 1840 1841 while (a > num) 1842 num *= b; 1843 return num == a; 1844} 1845 1846static int ext3_group_sparse(int group) 1847{ 1848 if (group <= 1) 1849 return 1; 1850 if (!(group & 1)) 1851 return 0; 1852 return (test_root(group, 7) || test_root(group, 5) || 1853 test_root(group, 3)); 1854} 1855 1856/** 1857 * ext3_bg_has_super - number of blocks used by the superblock in group 1858 * @sb: superblock for filesystem 1859 * @group: group number to check 1860 * 1861 * Return the number of blocks used by the superblock (primary or backup) 1862 * in this group. Currently this will be only 0 or 1. 1863 */ 1864int ext3_bg_has_super(struct super_block *sb, int group) 1865{ 1866 if (EXT3_HAS_RO_COMPAT_FEATURE(sb, 1867 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) && 1868 !ext3_group_sparse(group)) 1869 return 0; 1870 return 1; 1871} 1872 1873static unsigned long ext3_bg_num_gdb_meta(struct super_block *sb, int group) 1874{ 1875 unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb); 1876 unsigned long first = metagroup * EXT3_DESC_PER_BLOCK(sb); 1877 unsigned long last = first + EXT3_DESC_PER_BLOCK(sb) - 1; 1878 1879 if (group == first || group == first + 1 || group == last) 1880 return 1; 1881 return 0; 1882} 1883 1884static unsigned long ext3_bg_num_gdb_nometa(struct super_block *sb, int group) 1885{ 1886 return ext3_bg_has_super(sb, group) ? EXT3_SB(sb)->s_gdb_count : 0; 1887} 1888 1889/** 1890 * ext3_bg_num_gdb - number of blocks used by the group table in group 1891 * @sb: superblock for filesystem 1892 * @group: group number to check 1893 * 1894 * Return the number of blocks used by the group descriptor table 1895 * (primary or backup) in this group. In the future there may be a 1896 * different number of descriptor blocks in each group. 1897 */ 1898unsigned long ext3_bg_num_gdb(struct super_block *sb, int group) 1899{ 1900 unsigned long first_meta_bg = 1901 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_meta_bg); 1902 unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb); 1903 1904 if (!EXT3_HAS_INCOMPAT_FEATURE(sb,EXT3_FEATURE_INCOMPAT_META_BG) || 1905 metagroup < first_meta_bg) 1906 return ext3_bg_num_gdb_nometa(sb,group); 1907 1908 return ext3_bg_num_gdb_meta(sb,group); 1909 1910} 1911 1912/** 1913 * ext3_trim_all_free -- function to trim all free space in alloc. group 1914 * @sb: super block for file system 1915 * @group: allocation group to trim 1916 * @start: first group block to examine 1917 * @max: last group block to examine 1918 * @gdp: allocation group description structure 1919 * @minblocks: minimum extent block count 1920 * 1921 * ext3_trim_all_free walks through group's block bitmap searching for free 1922 * blocks. When the free block is found, it tries to allocate this block and 1923 * consequent free block to get the biggest free extent possible, until it 1924 * reaches any used block. Then issue a TRIM command on this extent and free 1925 * the extent in the block bitmap. This is done until whole group is scanned. 1926 */ 1927static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, 1928 unsigned int group, 1929 ext3_grpblk_t start, ext3_grpblk_t max, 1930 ext3_grpblk_t minblocks) 1931{ 1932 handle_t *handle; 1933 ext3_grpblk_t next, free_blocks, bit, freed, count = 0; 1934 ext3_fsblk_t discard_block; 1935 struct ext3_sb_info *sbi; 1936 struct buffer_head *gdp_bh, *bitmap_bh = NULL; 1937 struct ext3_group_desc *gdp; 1938 int err = 0, ret = 0; 1939 1940 /* 1941 * We will update one block bitmap, and one group descriptor 1942 */ 1943 handle = ext3_journal_start_sb(sb, 2); 1944 if (IS_ERR(handle)) 1945 return PTR_ERR(handle); 1946 1947 bitmap_bh = read_block_bitmap(sb, group); 1948 if (!bitmap_bh) { 1949 err = -EIO; 1950 goto err_out; 1951 } 1952 1953 BUFFER_TRACE(bitmap_bh, "getting undo access"); 1954 err = ext3_journal_get_undo_access(handle, bitmap_bh); 1955 if (err) 1956 goto err_out; 1957 1958 gdp = ext3_get_group_desc(sb, group, &gdp_bh); 1959 if (!gdp) { 1960 err = -EIO; 1961 goto err_out; 1962 } 1963 1964 BUFFER_TRACE(gdp_bh, "get_write_access"); 1965 err = ext3_journal_get_write_access(handle, gdp_bh); 1966 if (err) 1967 goto err_out; 1968 1969 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 1970 sbi = EXT3_SB(sb); 1971 1972 /* Walk through the whole group */ 1973 while (start < max) { 1974 start = bitmap_search_next_usable_block(start, bitmap_bh, max); 1975 if (start < 0) 1976 break; 1977 next = start; 1978 1979 /* 1980 * Allocate contiguous free extents by setting bits in the 1981 * block bitmap 1982 */ 1983 while (next < max 1984 && claim_block(sb_bgl_lock(sbi, group), 1985 next, bitmap_bh)) { 1986 next++; 1987 } 1988 1989 /* We did not claim any blocks */ 1990 if (next == start) 1991 continue; 1992 1993 discard_block = (ext3_fsblk_t)start + 1994 ext3_group_first_block_no(sb, group); 1995 1996 /* Update counters */ 1997 spin_lock(sb_bgl_lock(sbi, group)); 1998 le16_add_cpu(&gdp->bg_free_blocks_count, start - next); 1999 spin_unlock(sb_bgl_lock(sbi, group)); 2000 percpu_counter_sub(&sbi->s_freeblocks_counter, next - start); 2001 2002 free_blocks -= next - start; 2003 /* Do not issue a TRIM on extents smaller than minblocks */ 2004 if ((next - start) < minblocks) 2005 goto free_extent; 2006 2007 trace_ext3_discard_blocks(sb, discard_block, next - start); 2008 /* Send the TRIM command down to the device */ 2009 err = sb_issue_discard(sb, discard_block, next - start, 2010 GFP_NOFS, 0); 2011 count += (next - start); 2012free_extent: 2013 freed = 0; 2014 2015 /* 2016 * Clear bits in the bitmap 2017 */ 2018 for (bit = start; bit < next; bit++) { 2019 BUFFER_TRACE(bitmap_bh, "clear bit"); 2020 if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, group), 2021 bit, bitmap_bh->b_data)) { 2022 ext3_error(sb, __func__, 2023 "bit already cleared for block "E3FSBLK, 2024 (unsigned long)bit); 2025 BUFFER_TRACE(bitmap_bh, "bit already cleared"); 2026 } else { 2027 freed++; 2028 } 2029 } 2030 2031 /* Update couters */ 2032 spin_lock(sb_bgl_lock(sbi, group)); 2033 le16_add_cpu(&gdp->bg_free_blocks_count, freed); 2034 spin_unlock(sb_bgl_lock(sbi, group)); 2035 percpu_counter_add(&sbi->s_freeblocks_counter, freed); 2036 2037 start = next; 2038 if (err < 0) { 2039 if (err != -EOPNOTSUPP) 2040 ext3_warning(sb, __func__, "Discard command " 2041 "returned error %d\n", err); 2042 break; 2043 } 2044 2045 if (fatal_signal_pending(current)) { 2046 err = -ERESTARTSYS; 2047 break; 2048 } 2049 2050 cond_resched(); 2051 2052 /* No more suitable extents */ 2053 if (free_blocks < minblocks) 2054 break; 2055 } 2056 2057 /* We dirtied the bitmap block */ 2058 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 2059 ret = ext3_journal_dirty_metadata(handle, bitmap_bh); 2060 if (!err) 2061 err = ret; 2062 2063 /* And the group descriptor block */ 2064 BUFFER_TRACE(gdp_bh, "dirtied group descriptor block"); 2065 ret = ext3_journal_dirty_metadata(handle, gdp_bh); 2066 if (!err) 2067 err = ret; 2068 2069 ext3_debug("trimmed %d blocks in the group %d\n", 2070 count, group); 2071 2072err_out: 2073 if (err) 2074 count = err; 2075 ext3_journal_stop(handle); 2076 brelse(bitmap_bh); 2077 2078 return count; 2079} 2080 2081/** 2082 * ext3_trim_fs() -- trim ioctl handle function 2083 * @sb: superblock for filesystem 2084 * @start: First Byte to trim 2085 * @len: number of Bytes to trim from start 2086 * @minlen: minimum extent length in Bytes 2087 * 2088 * ext3_trim_fs goes through all allocation groups containing Bytes from 2089 * start to start+len. For each such a group ext3_trim_all_free function 2090 * is invoked to trim all free space. 2091 */ 2092int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range) 2093{ 2094 ext3_grpblk_t last_block, first_block, free_blocks; 2095 unsigned long first_group, last_group; 2096 unsigned long group, ngroups; 2097 struct ext3_group_desc *gdp; 2098 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 2099 uint64_t start, len, minlen, trimmed; 2100 ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count); 2101 int ret = 0; 2102 2103 start = (range->start >> sb->s_blocksize_bits) + 2104 le32_to_cpu(es->s_first_data_block); 2105 len = range->len >> sb->s_blocksize_bits; 2106 minlen = range->minlen >> sb->s_blocksize_bits; 2107 trimmed = 0; 2108 2109 if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb))) 2110 return -EINVAL; 2111 if (start >= max_blks) 2112 return -EINVAL; 2113 if (start + len > max_blks) 2114 len = max_blks - start; 2115 2116 ngroups = EXT3_SB(sb)->s_groups_count; 2117 smp_rmb(); 2118 2119 /* Determine first and last group to examine based on start and len */ 2120 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start, 2121 &first_group, &first_block); 2122 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) (start + len), 2123 &last_group, &last_block); 2124 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; 2125 last_block = EXT3_BLOCKS_PER_GROUP(sb); 2126 2127 if (first_group > last_group) 2128 return -EINVAL; 2129 2130 for (group = first_group; group <= last_group; group++) { 2131 gdp = ext3_get_group_desc(sb, group, NULL); 2132 if (!gdp) 2133 break; 2134 2135 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 2136 if (free_blocks < minlen) 2137 continue; 2138 2139 /* 2140 * For all the groups except the last one, last block will 2141 * always be EXT3_BLOCKS_PER_GROUP(sb), so we only need to 2142 * change it for the last group in which case first_block + 2143 * len < EXT3_BLOCKS_PER_GROUP(sb). 2144 */ 2145 if (first_block + len < EXT3_BLOCKS_PER_GROUP(sb)) 2146 last_block = first_block + len; 2147 len -= last_block - first_block; 2148 2149 ret = ext3_trim_all_free(sb, group, first_block, 2150 last_block, minlen); 2151 if (ret < 0) 2152 break; 2153 2154 trimmed += ret; 2155 first_block = 0; 2156 } 2157 2158 if (ret >= 0) 2159 ret = 0; 2160 range->len = trimmed * sb->s_blocksize; 2161 2162 return ret; 2163}