Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ext2 reservations

Val's cross-port of the ext3 reservations code into ext2.

[mbligh@mbligh.org: Small type error for printk
[akpm@linux-foundation.org: fix types, sync with ext3]
[mbligh@mbligh.org: Bring ext2 reservations code in line with latest ext3]
[akpm@linux-foundation.org: kill noisy printk]
[akpm@linux-foundation.org: remember to dirty the gdp's block]
[akpm@linux-foundation.org: cross-port the missed 5dea5176e5c32ef9f0d1a41d28427b3bf6881b3a]
[akpm@linux-foundation.org: cross-port e6022603b9aa7d61d20b392e69edcdbbc1789969]
[akpm@linux-foundation.org: Port the omitted 08fb306fe63d98eb86e3b16f4cc21816fa47f18e]
[akpm@linux-foundation.org: Backport the missed 20acaa18d0c002fec180956f87adeb3f11f635a6]
[akpm@linux-foundation.org: fixes]
[cmm@us.ibm.com: fix reservation extension]
[bunk@stusta.de: make ext2_get_blocks() static]
[hugh@veritas.com: fix hang]
[hugh@veritas.com: ext2_new_blocks should reset the reservation window size]
[hugh@veritas.com: ext2 balloc: fix off-by-one against rsv_end]
[hugh@veritas.com: grp_goal 0 is a genuine goal (unlike -1), so ext2_try_to_allocate_with_rsv should treat it as such]
[hugh@veritas.com: rbtree usage cleanup]
[pbadari@us.ibm.com: Fix for ext2 reservation]
[bunk@kernel.org: remove fs/ext2/balloc.c:reserve_blocks()]
[hugh@veritas.com: ext2 balloc: use io_error label]
Cc: "Martin J. Bligh" <mbligh@mbligh.org>
Cc: Valerie Henson <val_henson@linux.intel.com>
Cc: Mingming Cao <cmm@us.ibm.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Martin J. Bligh and committed by
Linus Torvalds
a686cd89 369f2389

+1569 -500
+1088 -249
fs/ext2/balloc.c
··· 133 133 return NULL; 134 134 } 135 135 136 - /* 137 - * Set sb->s_dirt here because the superblock was "logically" altered. We 138 - * need to recalculate its free blocks count and flush it out. 139 - */ 140 - static int reserve_blocks(struct super_block *sb, int count) 141 - { 142 - struct ext2_sb_info *sbi = EXT2_SB(sb); 143 - struct ext2_super_block *es = sbi->s_es; 144 - unsigned free_blocks; 145 - unsigned root_blocks; 146 - 147 - free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 148 - root_blocks = le32_to_cpu(es->s_r_blocks_count); 149 - 150 - if (free_blocks < count) 151 - count = free_blocks; 152 - 153 - if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) && 154 - sbi->s_resuid != current->fsuid && 155 - (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { 156 - /* 157 - * We are too close to reserve and we are not privileged. 158 - * Can we allocate anything at all? 159 - */ 160 - if (free_blocks > root_blocks) 161 - count = free_blocks - root_blocks; 162 - else 163 - return 0; 164 - } 165 - 166 - percpu_counter_sub(&sbi->s_freeblocks_counter, count); 167 - sb->s_dirt = 1; 168 - return count; 169 - } 170 - 171 136 static void release_blocks(struct super_block *sb, int count) 172 137 { 173 138 if (count) { ··· 143 178 } 144 179 } 145 180 146 - static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no, 147 - struct ext2_group_desc *desc, struct buffer_head *bh, int count) 148 - { 149 - unsigned free_blocks; 150 - 151 - if (!desc->bg_free_blocks_count) 152 - return 0; 153 - 154 - spin_lock(sb_bgl_lock(sbi, group_no)); 155 - free_blocks = le16_to_cpu(desc->bg_free_blocks_count); 156 - if (free_blocks < count) 157 - count = free_blocks; 158 - desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count); 159 - spin_unlock(sb_bgl_lock(sbi, group_no)); 160 - mark_buffer_dirty(bh); 161 - return count; 162 - } 163 - 164 - static void group_release_blocks(struct super_block *sb, int group_no, 181 + static void group_adjust_blocks(struct super_block *sb, int group_no, 165 182 struct ext2_group_desc *desc, struct buffer_head *bh, int count) 166 183 { 167 184 if (count) { ··· 159 212 } 160 213 } 161 214 162 - /* Free given blocks, update quota and i_blocks field */ 215 + /* 216 + * The reservation window structure operations 217 + * -------------------------------------------- 218 + * Operations include: 219 + * dump, find, add, remove, is_empty, find_next_reservable_window, etc. 220 + * 221 + * We use a red-black tree to represent per-filesystem reservation 222 + * windows. 223 + * 224 + */ 225 + 226 + /** 227 + * __rsv_window_dump() -- Dump the filesystem block allocation reservation map 228 + * @rb_root: root of per-filesystem reservation rb tree 229 + * @verbose: verbose mode 230 + * @fn: function which wishes to dump the reservation map 231 + * 232 + * If verbose is turned on, it will print the whole block reservation 233 + * windows(start, end). Otherwise, it will only print out the "bad" windows, 234 + * those windows that overlap with their immediate neighbors. 235 + */ 236 + #if 1 237 + static void __rsv_window_dump(struct rb_root *root, int verbose, 238 + const char *fn) 239 + { 240 + struct rb_node *n; 241 + struct ext2_reserve_window_node *rsv, *prev; 242 + int bad; 243 + 244 + restart: 245 + n = rb_first(root); 246 + bad = 0; 247 + prev = NULL; 248 + 249 + printk("Block Allocation Reservation Windows Map (%s):\n", fn); 250 + while (n) { 251 + rsv = rb_entry(n, struct ext2_reserve_window_node, rsv_node); 252 + if (verbose) 253 + printk("reservation window 0x%p " 254 + "start: %lu, end: %lu\n", 255 + rsv, rsv->rsv_start, rsv->rsv_end); 256 + if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { 257 + printk("Bad reservation %p (start >= end)\n", 258 + rsv); 259 + bad = 1; 260 + } 261 + if (prev && prev->rsv_end >= rsv->rsv_start) { 262 + printk("Bad reservation %p (prev->end >= start)\n", 263 + rsv); 264 + bad = 1; 265 + } 266 + if (bad) { 267 + if (!verbose) { 268 + printk("Restarting reservation walk in verbose mode\n"); 269 + verbose = 1; 270 + goto restart; 271 + } 272 + } 273 + n = rb_next(n); 274 + prev = rsv; 275 + } 276 + printk("Window map complete.\n"); 277 + if (bad) 278 + BUG(); 279 + } 280 + #define rsv_window_dump(root, verbose) \ 281 + __rsv_window_dump((root), (verbose), __FUNCTION__) 282 + #else 283 + #define rsv_window_dump(root, verbose) do {} while (0) 284 + #endif 285 + 286 + /** 287 + * goal_in_my_reservation() 288 + * @rsv: inode's reservation window 289 + * @grp_goal: given goal block relative to the allocation block group 290 + * @group: the current allocation block group 291 + * @sb: filesystem super block 292 + * 293 + * Test if the given goal block (group relative) is within the file's 294 + * own block reservation window range. 295 + * 296 + * If the reservation window is outside the goal allocation group, return 0; 297 + * grp_goal (given goal block) could be -1, which means no specific 298 + * goal block. In this case, always return 1. 299 + * If the goal block is within the reservation window, return 1; 300 + * otherwise, return 0; 301 + */ 302 + static int 303 + goal_in_my_reservation(struct ext2_reserve_window *rsv, ext2_grpblk_t grp_goal, 304 + unsigned int group, struct super_block * sb) 305 + { 306 + ext2_fsblk_t group_first_block, group_last_block; 307 + 308 + group_first_block = ext2_group_first_block_no(sb, group); 309 + group_last_block = group_first_block + EXT2_BLOCKS_PER_GROUP(sb) - 1; 310 + 311 + if ((rsv->_rsv_start > group_last_block) || 312 + (rsv->_rsv_end < group_first_block)) 313 + return 0; 314 + if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) 315 + || (grp_goal + group_first_block > rsv->_rsv_end))) 316 + return 0; 317 + return 1; 318 + } 319 + 320 + /** 321 + * search_reserve_window() 322 + * @rb_root: root of reservation tree 323 + * @goal: target allocation block 324 + * 325 + * Find the reserved window which includes the goal, or the previous one 326 + * if the goal is not in any window. 327 + * Returns NULL if there are no windows or if all windows start after the goal. 328 + */ 329 + static struct ext2_reserve_window_node * 330 + search_reserve_window(struct rb_root *root, ext2_fsblk_t goal) 331 + { 332 + struct rb_node *n = root->rb_node; 333 + struct ext2_reserve_window_node *rsv; 334 + 335 + if (!n) 336 + return NULL; 337 + 338 + do { 339 + rsv = rb_entry(n, struct ext2_reserve_window_node, rsv_node); 340 + 341 + if (goal < rsv->rsv_start) 342 + n = n->rb_left; 343 + else if (goal > rsv->rsv_end) 344 + n = n->rb_right; 345 + else 346 + return rsv; 347 + } while (n); 348 + /* 349 + * We've fallen off the end of the tree: the goal wasn't inside 350 + * any particular node. OK, the previous node must be to one 351 + * side of the interval containing the goal. If it's the RHS, 352 + * we need to back up one. 353 + */ 354 + if (rsv->rsv_start > goal) { 355 + n = rb_prev(&rsv->rsv_node); 356 + rsv = rb_entry(n, struct ext2_reserve_window_node, rsv_node); 357 + } 358 + return rsv; 359 + } 360 + 361 + /* 362 + * ext2_rsv_window_add() -- Insert a window to the block reservation rb tree. 363 + * @sb: super block 364 + * @rsv: reservation window to add 365 + * 366 + * Must be called with rsv_lock held. 367 + */ 368 + void ext2_rsv_window_add(struct super_block *sb, 369 + struct ext2_reserve_window_node *rsv) 370 + { 371 + struct rb_root *root = &EXT2_SB(sb)->s_rsv_window_root; 372 + struct rb_node *node = &rsv->rsv_node; 373 + ext2_fsblk_t start = rsv->rsv_start; 374 + 375 + struct rb_node ** p = &root->rb_node; 376 + struct rb_node * parent = NULL; 377 + struct ext2_reserve_window_node *this; 378 + 379 + while (*p) 380 + { 381 + parent = *p; 382 + this = rb_entry(parent, struct ext2_reserve_window_node, rsv_node); 383 + 384 + if (start < this->rsv_start) 385 + p = &(*p)->rb_left; 386 + else if (start > this->rsv_end) 387 + p = &(*p)->rb_right; 388 + else { 389 + rsv_window_dump(root, 1); 390 + BUG(); 391 + } 392 + } 393 + 394 + rb_link_node(node, parent, p); 395 + rb_insert_color(node, root); 396 + } 397 + 398 + /** 399 + * rsv_window_remove() -- unlink a window from the reservation rb tree 400 + * @sb: super block 401 + * @rsv: reservation window to remove 402 + * 403 + * Mark the block reservation window as not allocated, and unlink it 404 + * from the filesystem reservation window rb tree. Must be called with 405 + * rsv_lock held. 406 + */ 407 + static void rsv_window_remove(struct super_block *sb, 408 + struct ext2_reserve_window_node *rsv) 409 + { 410 + rsv->rsv_start = EXT2_RESERVE_WINDOW_NOT_ALLOCATED; 411 + rsv->rsv_end = EXT2_RESERVE_WINDOW_NOT_ALLOCATED; 412 + rsv->rsv_alloc_hit = 0; 413 + rb_erase(&rsv->rsv_node, &EXT2_SB(sb)->s_rsv_window_root); 414 + } 415 + 416 + /* 417 + * rsv_is_empty() -- Check if the reservation window is allocated. 418 + * @rsv: given reservation window to check 419 + * 420 + * returns 1 if the end block is EXT2_RESERVE_WINDOW_NOT_ALLOCATED. 421 + */ 422 + static inline int rsv_is_empty(struct ext2_reserve_window *rsv) 423 + { 424 + /* a valid reservation end block could not be 0 */ 425 + return (rsv->_rsv_end == EXT2_RESERVE_WINDOW_NOT_ALLOCATED); 426 + } 427 + 428 + /** 429 + * ext2_init_block_alloc_info() 430 + * @inode: file inode structure 431 + * 432 + * Allocate and initialize the reservation window structure, and 433 + * link the window to the ext2 inode structure at last 434 + * 435 + * The reservation window structure is only dynamically allocated 436 + * and linked to ext2 inode the first time the open file 437 + * needs a new block. So, before every ext2_new_block(s) call, for 438 + * regular files, we should check whether the reservation window 439 + * structure exists or not. In the latter case, this function is called. 440 + * Fail to do so will result in block reservation being turned off for that 441 + * open file. 442 + * 443 + * This function is called from ext2_get_blocks_handle(), also called 444 + * when setting the reservation window size through ioctl before the file 445 + * is open for write (needs block allocation). 446 + * 447 + * Needs truncate_mutex protection prior to calling this function. 448 + */ 449 + void ext2_init_block_alloc_info(struct inode *inode) 450 + { 451 + struct ext2_inode_info *ei = EXT2_I(inode); 452 + struct ext2_block_alloc_info *block_i = ei->i_block_alloc_info; 453 + struct super_block *sb = inode->i_sb; 454 + 455 + block_i = kmalloc(sizeof(*block_i), GFP_NOFS); 456 + if (block_i) { 457 + struct ext2_reserve_window_node *rsv = &block_i->rsv_window_node; 458 + 459 + rsv->rsv_start = EXT2_RESERVE_WINDOW_NOT_ALLOCATED; 460 + rsv->rsv_end = EXT2_RESERVE_WINDOW_NOT_ALLOCATED; 461 + 462 + /* 463 + * if filesystem is mounted with NORESERVATION, the goal 464 + * reservation window size is set to zero to indicate 465 + * block reservation is off 466 + */ 467 + if (!test_opt(sb, RESERVATION)) 468 + rsv->rsv_goal_size = 0; 469 + else 470 + rsv->rsv_goal_size = EXT2_DEFAULT_RESERVE_BLOCKS; 471 + rsv->rsv_alloc_hit = 0; 472 + block_i->last_alloc_logical_block = 0; 473 + block_i->last_alloc_physical_block = 0; 474 + } 475 + ei->i_block_alloc_info = block_i; 476 + } 477 + 478 + /** 479 + * ext2_discard_reservation() 480 + * @inode: inode 481 + * 482 + * Discard(free) block reservation window on last file close, or truncate 483 + * or at last iput(). 484 + * 485 + * It is being called in three cases: 486 + * ext2_release_file(): last writer closes the file 487 + * ext2_clear_inode(): last iput(), when nobody links to this file. 488 + * ext2_truncate(): when the block indirect map is about to change. 489 + */ 490 + void ext2_discard_reservation(struct inode *inode) 491 + { 492 + struct ext2_inode_info *ei = EXT2_I(inode); 493 + struct ext2_block_alloc_info *block_i = ei->i_block_alloc_info; 494 + struct ext2_reserve_window_node *rsv; 495 + spinlock_t *rsv_lock = &EXT2_SB(inode->i_sb)->s_rsv_window_lock; 496 + 497 + if (!block_i) 498 + return; 499 + 500 + rsv = &block_i->rsv_window_node; 501 + if (!rsv_is_empty(&rsv->rsv_window)) { 502 + spin_lock(rsv_lock); 503 + if (!rsv_is_empty(&rsv->rsv_window)) 504 + rsv_window_remove(inode->i_sb, rsv); 505 + spin_unlock(rsv_lock); 506 + } 507 + } 508 + 509 + /** 510 + * ext2_free_blocks_sb() -- Free given blocks and update quota and i_blocks 511 + * @inode: inode 512 + * @block: start physcial block to free 513 + * @count: number of blocks to free 514 + */ 163 515 void ext2_free_blocks (struct inode * inode, unsigned long block, 164 516 unsigned long count) 165 517 { ··· 533 287 if (sb->s_flags & MS_SYNCHRONOUS) 534 288 sync_dirty_buffer(bitmap_bh); 535 289 536 - group_release_blocks(sb, block_group, desc, bh2, group_freed); 290 + group_adjust_blocks(sb, block_group, desc, bh2, group_freed); 537 291 freed += group_freed; 538 292 539 293 if (overflow) { ··· 547 301 DQUOT_FREE_BLOCK(inode, freed); 548 302 } 549 303 550 - static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal) 304 + /** 305 + * bitmap_search_next_usable_block() 306 + * @start: the starting block (group relative) of the search 307 + * @bh: bufferhead contains the block group bitmap 308 + * @maxblocks: the ending block (group relative) of the reservation 309 + * 310 + * The bitmap search --- search forward through the actual bitmap on disk until 311 + * we find a bit free. 312 + */ 313 + static ext2_grpblk_t 314 + bitmap_search_next_usable_block(ext2_grpblk_t start, struct buffer_head *bh, 315 + ext2_grpblk_t maxblocks) 551 316 { 552 - int k; 317 + ext2_grpblk_t next; 318 + 319 + next = ext2_find_next_zero_bit(bh->b_data, maxblocks, start); 320 + if (next >= maxblocks) 321 + return -1; 322 + return next; 323 + } 324 + 325 + /** 326 + * find_next_usable_block() 327 + * @start: the starting block (group relative) to find next 328 + * allocatable block in bitmap. 329 + * @bh: bufferhead contains the block group bitmap 330 + * @maxblocks: the ending block (group relative) for the search 331 + * 332 + * Find an allocatable block in a bitmap. We perform the "most 333 + * appropriate allocation" algorithm of looking for a free block near 334 + * the initial goal; then for a free byte somewhere in the bitmap; 335 + * then for any free bit in the bitmap. 336 + */ 337 + static ext2_grpblk_t 338 + find_next_usable_block(int start, struct buffer_head *bh, int maxblocks) 339 + { 340 + ext2_grpblk_t here, next; 553 341 char *p, *r; 554 342 555 - if (!ext2_test_bit(goal, map)) 556 - goto got_it; 557 - 558 - repeat: 559 - if (goal) { 343 + if (start > 0) { 560 344 /* 561 345 * The goal was occupied; search forward for a free 562 346 * block within the next XX blocks. ··· 595 319 * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the 596 320 * next 64-bit boundary is simple.. 597 321 */ 598 - k = (goal + 63) & ~63; 599 - goal = ext2_find_next_zero_bit(map, k, goal); 600 - if (goal < k) 601 - goto got_it; 602 - /* 603 - * Search in the remainder of the current group. 604 - */ 322 + ext2_grpblk_t end_goal = (start + 63) & ~63; 323 + if (end_goal > maxblocks) 324 + end_goal = maxblocks; 325 + here = ext2_find_next_zero_bit(bh->b_data, end_goal, start); 326 + if (here < end_goal) 327 + return here; 328 + ext2_debug("Bit not found near goal\n"); 605 329 } 606 330 607 - p = map + (goal >> 3); 608 - r = memscan(p, 0, (size - goal + 7) >> 3); 609 - k = (r - map) << 3; 610 - if (k < size) { 611 - /* 612 - * We have succeeded in finding a free byte in the block 613 - * bitmap. Now search backwards to find the start of this 614 - * group of free blocks - won't take more than 7 iterations. 615 - */ 616 - for (goal = k; goal && !ext2_test_bit (goal - 1, map); goal--) 617 - ; 618 - goto got_it; 619 - } 331 + here = start; 332 + if (here < 0) 333 + here = 0; 620 334 621 - k = ext2_find_next_zero_bit ((u32 *)map, size, goal); 622 - if (k < size) { 623 - goal = k; 624 - goto got_it; 625 - } 626 - return -1; 627 - got_it: 628 - if (ext2_set_bit_atomic(lock, goal, (void *) map)) 629 - goto repeat; 630 - return goal; 335 + p = ((char *)bh->b_data) + (here >> 3); 336 + r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); 337 + next = (r - ((char *)bh->b_data)) << 3; 338 + 339 + if (next < maxblocks && next >= here) 340 + return next; 341 + 342 + here = bitmap_search_next_usable_block(here, bh, maxblocks); 343 + return here; 631 344 } 632 345 633 346 /* 634 - * ext2_new_block uses a goal block to assist allocation. If the goal is 347 + * ext2_try_to_allocate() 348 + * @sb: superblock 349 + * @handle: handle to this transaction 350 + * @group: given allocation block group 351 + * @bitmap_bh: bufferhead holds the block bitmap 352 + * @grp_goal: given target block within the group 353 + * @count: target number of blocks to allocate 354 + * @my_rsv: reservation window 355 + * 356 + * Attempt to allocate blocks within a give range. Set the range of allocation 357 + * first, then find the first free bit(s) from the bitmap (within the range), 358 + * and at last, allocate the blocks by claiming the found free bit as allocated. 359 + * 360 + * To set the range of this allocation: 361 + * if there is a reservation window, only try to allocate block(s) 362 + * from the file's own reservation window; 363 + * Otherwise, the allocation range starts from the give goal block, 364 + * ends at the block group's last block. 365 + * 366 + * If we failed to allocate the desired block then we may end up crossing to a 367 + * new bitmap. 368 + */ 369 + static int 370 + ext2_try_to_allocate(struct super_block *sb, int group, 371 + struct buffer_head *bitmap_bh, ext2_grpblk_t grp_goal, 372 + unsigned long *count, 373 + struct ext2_reserve_window *my_rsv) 374 + { 375 + ext2_fsblk_t group_first_block; 376 + ext2_grpblk_t start, end; 377 + unsigned long num = 0; 378 + 379 + /* we do allocation within the reservation window if we have a window */ 380 + if (my_rsv) { 381 + group_first_block = ext2_group_first_block_no(sb, group); 382 + if (my_rsv->_rsv_start >= group_first_block) 383 + start = my_rsv->_rsv_start - group_first_block; 384 + else 385 + /* reservation window cross group boundary */ 386 + start = 0; 387 + end = my_rsv->_rsv_end - group_first_block + 1; 388 + if (end > EXT2_BLOCKS_PER_GROUP(sb)) 389 + /* reservation window crosses group boundary */ 390 + end = EXT2_BLOCKS_PER_GROUP(sb); 391 + if ((start <= grp_goal) && (grp_goal < end)) 392 + start = grp_goal; 393 + else 394 + grp_goal = -1; 395 + } else { 396 + if (grp_goal > 0) 397 + start = grp_goal; 398 + else 399 + start = 0; 400 + end = EXT2_BLOCKS_PER_GROUP(sb); 401 + } 402 + 403 + BUG_ON(start > EXT2_BLOCKS_PER_GROUP(sb)); 404 + 405 + repeat: 406 + if (grp_goal < 0) { 407 + grp_goal = find_next_usable_block(start, bitmap_bh, end); 408 + if (grp_goal < 0) 409 + goto fail_access; 410 + if (!my_rsv) { 411 + int i; 412 + 413 + for (i = 0; i < 7 && grp_goal > start && 414 + !ext2_test_bit(grp_goal - 1, 415 + bitmap_bh->b_data); 416 + i++, grp_goal--) 417 + ; 418 + } 419 + } 420 + start = grp_goal; 421 + 422 + if (ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group), grp_goal, 423 + bitmap_bh->b_data)) { 424 + /* 425 + * The block was allocated by another thread, or it was 426 + * allocated and then freed by another thread 427 + */ 428 + start++; 429 + grp_goal++; 430 + if (start >= end) 431 + goto fail_access; 432 + goto repeat; 433 + } 434 + num++; 435 + grp_goal++; 436 + while (num < *count && grp_goal < end 437 + && !ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group), 438 + grp_goal, bitmap_bh->b_data)) { 439 + num++; 440 + grp_goal++; 441 + } 442 + *count = num; 443 + return grp_goal - num; 444 + fail_access: 445 + *count = num; 446 + return -1; 447 + } 448 + 449 + /** 450 + * find_next_reservable_window(): 451 + * find a reservable space within the given range. 452 + * It does not allocate the reservation window for now: 453 + * alloc_new_reservation() will do the work later. 454 + * 455 + * @search_head: the head of the searching list; 456 + * This is not necessarily the list head of the whole filesystem 457 + * 458 + * We have both head and start_block to assist the search 459 + * for the reservable space. The list starts from head, 460 + * but we will shift to the place where start_block is, 461 + * then start from there, when looking for a reservable space. 462 + * 463 + * @size: the target new reservation window size 464 + * 465 + * @group_first_block: the first block we consider to start 466 + * the real search from 467 + * 468 + * @last_block: 469 + * the maximum block number that our goal reservable space 470 + * could start from. This is normally the last block in this 471 + * group. The search will end when we found the start of next 472 + * possible reservable space is out of this boundary. 473 + * This could handle the cross boundary reservation window 474 + * request. 475 + * 476 + * basically we search from the given range, rather than the whole 477 + * reservation double linked list, (start_block, last_block) 478 + * to find a free region that is of my size and has not 479 + * been reserved. 480 + * 481 + */ 482 + static int find_next_reservable_window( 483 + struct ext2_reserve_window_node *search_head, 484 + struct ext2_reserve_window_node *my_rsv, 485 + struct super_block * sb, 486 + ext2_fsblk_t start_block, 487 + ext2_fsblk_t last_block) 488 + { 489 + struct rb_node *next; 490 + struct ext2_reserve_window_node *rsv, *prev; 491 + ext2_fsblk_t cur; 492 + int size = my_rsv->rsv_goal_size; 493 + 494 + /* TODO: make the start of the reservation window byte-aligned */ 495 + /* cur = *start_block & ~7;*/ 496 + cur = start_block; 497 + rsv = search_head; 498 + if (!rsv) 499 + return -1; 500 + 501 + while (1) { 502 + if (cur <= rsv->rsv_end) 503 + cur = rsv->rsv_end + 1; 504 + 505 + /* TODO? 506 + * in the case we could not find a reservable space 507 + * that is what is expected, during the re-search, we could 508 + * remember what's the largest reservable space we could have 509 + * and return that one. 510 + * 511 + * For now it will fail if we could not find the reservable 512 + * space with expected-size (or more)... 513 + */ 514 + if (cur > last_block) 515 + return -1; /* fail */ 516 + 517 + prev = rsv; 518 + next = rb_next(&rsv->rsv_node); 519 + rsv = rb_entry(next,struct ext2_reserve_window_node,rsv_node); 520 + 521 + /* 522 + * Reached the last reservation, we can just append to the 523 + * previous one. 524 + */ 525 + if (!next) 526 + break; 527 + 528 + if (cur + size <= rsv->rsv_start) { 529 + /* 530 + * Found a reserveable space big enough. We could 531 + * have a reservation across the group boundary here 532 + */ 533 + break; 534 + } 535 + } 536 + /* 537 + * we come here either : 538 + * when we reach the end of the whole list, 539 + * and there is empty reservable space after last entry in the list. 540 + * append it to the end of the list. 541 + * 542 + * or we found one reservable space in the middle of the list, 543 + * return the reservation window that we could append to. 544 + * succeed. 545 + */ 546 + 547 + if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) 548 + rsv_window_remove(sb, my_rsv); 549 + 550 + /* 551 + * Let's book the whole avaliable window for now. We will check the 552 + * disk bitmap later and then, if there are free blocks then we adjust 553 + * the window size if it's larger than requested. 554 + * Otherwise, we will remove this node from the tree next time 555 + * call find_next_reservable_window. 556 + */ 557 + my_rsv->rsv_start = cur; 558 + my_rsv->rsv_end = cur + size - 1; 559 + my_rsv->rsv_alloc_hit = 0; 560 + 561 + if (prev != my_rsv) 562 + ext2_rsv_window_add(sb, my_rsv); 563 + 564 + return 0; 565 + } 566 + 567 + /** 568 + * alloc_new_reservation()--allocate a new reservation window 569 + * 570 + * To make a new reservation, we search part of the filesystem 571 + * reservation list (the list that inside the group). We try to 572 + * allocate a new reservation window near the allocation goal, 573 + * or the beginning of the group, if there is no goal. 574 + * 575 + * We first find a reservable space after the goal, then from 576 + * there, we check the bitmap for the first free block after 577 + * it. If there is no free block until the end of group, then the 578 + * whole group is full, we failed. Otherwise, check if the free 579 + * block is inside the expected reservable space, if so, we 580 + * succeed. 581 + * If the first free block is outside the reservable space, then 582 + * start from the first free block, we search for next available 583 + * space, and go on. 584 + * 585 + * on succeed, a new reservation will be found and inserted into the list 586 + * It contains at least one free block, and it does not overlap with other 587 + * reservation windows. 588 + * 589 + * failed: we failed to find a reservation window in this group 590 + * 591 + * @rsv: the reservation 592 + * 593 + * @grp_goal: The goal (group-relative). It is where the search for a 594 + * free reservable space should start from. 595 + * if we have a goal(goal >0 ), then start from there, 596 + * no goal(goal = -1), we start from the first block 597 + * of the group. 598 + * 599 + * @sb: the super block 600 + * @group: the group we are trying to allocate in 601 + * @bitmap_bh: the block group block bitmap 602 + * 603 + */ 604 + static int alloc_new_reservation(struct ext2_reserve_window_node *my_rsv, 605 + ext2_grpblk_t grp_goal, struct super_block *sb, 606 + unsigned int group, struct buffer_head *bitmap_bh) 607 + { 608 + struct ext2_reserve_window_node *search_head; 609 + ext2_fsblk_t group_first_block, group_end_block, start_block; 610 + ext2_grpblk_t first_free_block; 611 + struct rb_root *fs_rsv_root = &EXT2_SB(sb)->s_rsv_window_root; 612 + unsigned long size; 613 + int ret; 614 + spinlock_t *rsv_lock = &EXT2_SB(sb)->s_rsv_window_lock; 615 + 616 + group_first_block = ext2_group_first_block_no(sb, group); 617 + group_end_block = group_first_block + (EXT2_BLOCKS_PER_GROUP(sb) - 1); 618 + 619 + if (grp_goal < 0) 620 + start_block = group_first_block; 621 + else 622 + start_block = grp_goal + group_first_block; 623 + 624 + size = my_rsv->rsv_goal_size; 625 + 626 + if (!rsv_is_empty(&my_rsv->rsv_window)) { 627 + /* 628 + * if the old reservation is cross group boundary 629 + * and if the goal is inside the old reservation window, 630 + * we will come here when we just failed to allocate from 631 + * the first part of the window. We still have another part 632 + * that belongs to the next group. In this case, there is no 633 + * point to discard our window and try to allocate a new one 634 + * in this group(which will fail). we should 635 + * keep the reservation window, just simply move on. 636 + * 637 + * Maybe we could shift the start block of the reservation 638 + * window to the first block of next group. 639 + */ 640 + 641 + if ((my_rsv->rsv_start <= group_end_block) && 642 + (my_rsv->rsv_end > group_end_block) && 643 + (start_block >= my_rsv->rsv_start)) 644 + return -1; 645 + 646 + if ((my_rsv->rsv_alloc_hit > 647 + (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { 648 + /* 649 + * if the previously allocation hit ratio is 650 + * greater than 1/2, then we double the size of 651 + * the reservation window the next time, 652 + * otherwise we keep the same size window 653 + */ 654 + size = size * 2; 655 + if (size > EXT2_MAX_RESERVE_BLOCKS) 656 + size = EXT2_MAX_RESERVE_BLOCKS; 657 + my_rsv->rsv_goal_size= size; 658 + } 659 + } 660 + 661 + spin_lock(rsv_lock); 662 + /* 663 + * shift the search start to the window near the goal block 664 + */ 665 + search_head = search_reserve_window(fs_rsv_root, start_block); 666 + 667 + /* 668 + * find_next_reservable_window() simply finds a reservable window 669 + * inside the given range(start_block, group_end_block). 670 + * 671 + * To make sure the reservation window has a free bit inside it, we 672 + * need to check the bitmap after we found a reservable window. 673 + */ 674 + retry: 675 + ret = find_next_reservable_window(search_head, my_rsv, sb, 676 + start_block, group_end_block); 677 + 678 + if (ret == -1) { 679 + if (!rsv_is_empty(&my_rsv->rsv_window)) 680 + rsv_window_remove(sb, my_rsv); 681 + spin_unlock(rsv_lock); 682 + return -1; 683 + } 684 + 685 + /* 686 + * On success, find_next_reservable_window() returns the 687 + * reservation window where there is a reservable space after it. 688 + * Before we reserve this reservable space, we need 689 + * to make sure there is at least a free block inside this region. 690 + * 691 + * Search the first free bit on the block bitmap. Search starts from 692 + * the start block of the reservable space we just found. 693 + */ 694 + spin_unlock(rsv_lock); 695 + first_free_block = bitmap_search_next_usable_block( 696 + my_rsv->rsv_start - group_first_block, 697 + bitmap_bh, group_end_block - group_first_block + 1); 698 + 699 + if (first_free_block < 0) { 700 + /* 701 + * no free block left on the bitmap, no point 702 + * to reserve the space. return failed. 703 + */ 704 + spin_lock(rsv_lock); 705 + if (!rsv_is_empty(&my_rsv->rsv_window)) 706 + rsv_window_remove(sb, my_rsv); 707 + spin_unlock(rsv_lock); 708 + return -1; /* failed */ 709 + } 710 + 711 + start_block = first_free_block + group_first_block; 712 + /* 713 + * check if the first free block is within the 714 + * free space we just reserved 715 + */ 716 + if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) 717 + return 0; /* success */ 718 + /* 719 + * if the first free bit we found is out of the reservable space 720 + * continue search for next reservable space, 721 + * start from where the free block is, 722 + * we also shift the list head to where we stopped last time 723 + */ 724 + search_head = my_rsv; 725 + spin_lock(rsv_lock); 726 + goto retry; 727 + } 728 + 729 + /** 730 + * try_to_extend_reservation() 731 + * @my_rsv: given reservation window 732 + * @sb: super block 733 + * @size: the delta to extend 734 + * 735 + * Attempt to expand the reservation window large enough to have 736 + * required number of free blocks 737 + * 738 + * Since ext2_try_to_allocate() will always allocate blocks within 739 + * the reservation window range, if the window size is too small, 740 + * multiple blocks allocation has to stop at the end of the reservation 741 + * window. To make this more efficient, given the total number of 742 + * blocks needed and the current size of the window, we try to 743 + * expand the reservation window size if necessary on a best-effort 744 + * basis before ext2_new_blocks() tries to allocate blocks. 745 + */ 746 + static void try_to_extend_reservation(struct ext2_reserve_window_node *my_rsv, 747 + struct super_block *sb, int size) 748 + { 749 + struct ext2_reserve_window_node *next_rsv; 750 + struct rb_node *next; 751 + spinlock_t *rsv_lock = &EXT2_SB(sb)->s_rsv_window_lock; 752 + 753 + if (!spin_trylock(rsv_lock)) 754 + return; 755 + 756 + next = rb_next(&my_rsv->rsv_node); 757 + 758 + if (!next) 759 + my_rsv->rsv_end += size; 760 + else { 761 + next_rsv = rb_entry(next, struct ext2_reserve_window_node, rsv_node); 762 + 763 + if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) 764 + my_rsv->rsv_end += size; 765 + else 766 + my_rsv->rsv_end = next_rsv->rsv_start - 1; 767 + } 768 + spin_unlock(rsv_lock); 769 + } 770 + 771 + /** 772 + * ext2_try_to_allocate_with_rsv() 773 + * @sb: superblock 774 + * @group: given allocation block group 775 + * @bitmap_bh: bufferhead holds the block bitmap 776 + * @grp_goal: given target block within the group 777 + * @count: target number of blocks to allocate 778 + * @my_rsv: reservation window 779 + * 780 + * This is the main function used to allocate a new block and its reservation 781 + * window. 782 + * 783 + * Each time when a new block allocation is need, first try to allocate from 784 + * its own reservation. If it does not have a reservation window, instead of 785 + * looking for a free bit on bitmap first, then look up the reservation list to 786 + * see if it is inside somebody else's reservation window, we try to allocate a 787 + * reservation window for it starting from the goal first. Then do the block 788 + * allocation within the reservation window. 789 + * 790 + * This will avoid keeping on searching the reservation list again and 791 + * again when somebody is looking for a free block (without 792 + * reservation), and there are lots of free blocks, but they are all 793 + * being reserved. 794 + * 795 + * We use a red-black tree for the per-filesystem reservation list. 796 + */ 797 + static ext2_grpblk_t 798 + ext2_try_to_allocate_with_rsv(struct super_block *sb, unsigned int group, 799 + struct buffer_head *bitmap_bh, ext2_grpblk_t grp_goal, 800 + struct ext2_reserve_window_node * my_rsv, 801 + unsigned long *count) 802 + { 803 + ext2_fsblk_t group_first_block, group_last_block; 804 + ext2_grpblk_t ret = 0; 805 + unsigned long num = *count; 806 + 807 + /* 808 + * we don't deal with reservation when 809 + * filesystem is mounted without reservation 810 + * or the file is not a regular file 811 + * or last attempt to allocate a block with reservation turned on failed 812 + */ 813 + if (my_rsv == NULL) { 814 + return ext2_try_to_allocate(sb, group, bitmap_bh, 815 + grp_goal, count, NULL); 816 + } 817 + /* 818 + * grp_goal is a group relative block number (if there is a goal) 819 + * 0 <= grp_goal < EXT2_BLOCKS_PER_GROUP(sb) 820 + * first block is a filesystem wide block number 821 + * first block is the block number of the first block in this group 822 + */ 823 + group_first_block = ext2_group_first_block_no(sb, group); 824 + group_last_block = group_first_block + (EXT2_BLOCKS_PER_GROUP(sb) - 1); 825 + 826 + /* 827 + * Basically we will allocate a new block from inode's reservation 828 + * window. 829 + * 830 + * We need to allocate a new reservation window, if: 831 + * a) inode does not have a reservation window; or 832 + * b) last attempt to allocate a block from existing reservation 833 + * failed; or 834 + * c) we come here with a goal and with a reservation window 835 + * 836 + * We do not need to allocate a new reservation window if we come here 837 + * at the beginning with a goal and the goal is inside the window, or 838 + * we don't have a goal but already have a reservation window. 839 + * then we could go to allocate from the reservation window directly. 840 + */ 841 + while (1) { 842 + if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || 843 + !goal_in_my_reservation(&my_rsv->rsv_window, 844 + grp_goal, group, sb)) { 845 + if (my_rsv->rsv_goal_size < *count) 846 + my_rsv->rsv_goal_size = *count; 847 + ret = alloc_new_reservation(my_rsv, grp_goal, sb, 848 + group, bitmap_bh); 849 + if (ret < 0) 850 + break; /* failed */ 851 + 852 + if (!goal_in_my_reservation(&my_rsv->rsv_window, 853 + grp_goal, group, sb)) 854 + grp_goal = -1; 855 + } else if (grp_goal >= 0) { 856 + int curr = my_rsv->rsv_end - 857 + (grp_goal + group_first_block) + 1; 858 + 859 + if (curr < *count) 860 + try_to_extend_reservation(my_rsv, sb, 861 + *count - curr); 862 + } 863 + 864 + if ((my_rsv->rsv_start > group_last_block) || 865 + (my_rsv->rsv_end < group_first_block)) { 866 + rsv_window_dump(&EXT2_SB(sb)->s_rsv_window_root, 1); 867 + BUG(); 868 + } 869 + ret = ext2_try_to_allocate(sb, group, bitmap_bh, grp_goal, 870 + &num, &my_rsv->rsv_window); 871 + if (ret >= 0) { 872 + my_rsv->rsv_alloc_hit += num; 873 + *count = num; 874 + break; /* succeed */ 875 + } 876 + num = *count; 877 + } 878 + return ret; 879 + } 880 + 881 + /** 882 + * ext2_has_free_blocks() 883 + * @sbi: in-core super block structure. 884 + * 885 + * Check if filesystem has at least 1 free block available for allocation. 886 + */ 887 + static int ext2_has_free_blocks(struct ext2_sb_info *sbi) 888 + { 889 + ext2_fsblk_t free_blocks, root_blocks; 890 + 891 + free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 892 + root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); 893 + if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && 894 + sbi->s_resuid != current->fsuid && 895 + (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { 896 + return 0; 897 + } 898 + return 1; 899 + } 900 + 901 + /* 902 + * ext2_new_blocks() -- core block(s) allocation function 903 + * @inode: file inode 904 + * @goal: given target block(filesystem wide) 905 + * @count: target number of blocks to allocate 906 + * @errp: error code 907 + * 908 + * ext2_new_blocks uses a goal block to assist allocation. If the goal is 635 909 * free, or there is a free block within 32 blocks of the goal, that block 636 910 * is allocated. Otherwise a forward search is made for a free block; within 637 911 * each block group the search first looks for an entire free byte in the block 638 912 * bitmap, and then for any free bit if that fails. 639 913 * This function also updates quota and i_blocks field. 640 914 */ 641 - int ext2_new_block(struct inode *inode, unsigned long goal, 642 - u32 *prealloc_count, u32 *prealloc_block, int *err) 915 + ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal, 916 + unsigned long *count, int *errp) 643 917 { 644 918 struct buffer_head *bitmap_bh = NULL; 645 - struct buffer_head *gdp_bh; /* bh2 */ 646 - struct ext2_group_desc *desc; 647 - int group_no; /* i */ 648 - int ret_block; /* j */ 649 - int group_idx; /* k */ 650 - int target_block; /* tmp */ 651 - int block = 0; 652 - struct super_block *sb = inode->i_sb; 653 - struct ext2_sb_info *sbi = EXT2_SB(sb); 654 - struct ext2_super_block *es = sbi->s_es; 655 - unsigned group_size = EXT2_BLOCKS_PER_GROUP(sb); 656 - unsigned prealloc_goal = es->s_prealloc_blocks; 657 - unsigned group_alloc = 0, es_alloc, dq_alloc; 658 - int nr_scanned_groups; 919 + struct buffer_head *gdp_bh; 920 + int group_no; 921 + int goal_group; 922 + ext2_grpblk_t grp_target_blk; /* blockgroup relative goal block */ 923 + ext2_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ 924 + ext2_fsblk_t ret_block; /* filesyetem-wide allocated block */ 925 + int bgi; /* blockgroup iteration index */ 926 + int performed_allocation = 0; 927 + ext2_grpblk_t free_blocks; /* number of free blocks in a group */ 928 + struct super_block *sb; 929 + struct ext2_group_desc *gdp; 930 + struct ext2_super_block *es; 931 + struct ext2_sb_info *sbi; 932 + struct ext2_reserve_window_node *my_rsv = NULL; 933 + struct ext2_block_alloc_info *block_i; 934 + unsigned short windowsz = 0; 935 + unsigned long ngroups; 936 + unsigned long num = *count; 659 937 660 - if (!prealloc_goal--) 661 - prealloc_goal = EXT2_DEFAULT_PREALLOC_BLOCKS - 1; 662 - if (!prealloc_count || *prealloc_count) 663 - prealloc_goal = 0; 938 + *errp = -ENOSPC; 939 + sb = inode->i_sb; 940 + if (!sb) { 941 + printk("ext2_new_blocks: nonexistent device"); 942 + return 0; 943 + } 664 944 665 - if (DQUOT_ALLOC_BLOCK(inode, 1)) { 666 - *err = -EDQUOT; 945 + /* 946 + * Check quota for allocation of this block. 947 + */ 948 + if (DQUOT_ALLOC_BLOCK(inode, num)) { 949 + *errp = -EDQUOT; 950 + return 0; 951 + } 952 + 953 + sbi = EXT2_SB(sb); 954 + es = EXT2_SB(sb)->s_es; 955 + ext2_debug("goal=%lu.\n", goal); 956 + /* 957 + * Allocate a block from reservation only when 958 + * filesystem is mounted with reservation(default,-o reservation), and 959 + * it's a regular file, and 960 + * the desired window size is greater than 0 (One could use ioctl 961 + * command EXT2_IOC_SETRSVSZ to set the window size to 0 to turn off 962 + * reservation on that particular file) 963 + */ 964 + block_i = EXT2_I(inode)->i_block_alloc_info; 965 + if (block_i) { 966 + windowsz = block_i->rsv_window_node.rsv_goal_size; 967 + if (windowsz > 0) 968 + my_rsv = &block_i->rsv_window_node; 969 + } 970 + 971 + if (!ext2_has_free_blocks(sbi)) { 972 + *errp = -ENOSPC; 667 973 goto out; 668 974 } 669 975 670 - while (prealloc_goal && DQUOT_PREALLOC_BLOCK(inode, prealloc_goal)) 671 - prealloc_goal--; 672 - 673 - dq_alloc = prealloc_goal + 1; 674 - es_alloc = reserve_blocks(sb, dq_alloc); 675 - if (!es_alloc) { 676 - *err = -ENOSPC; 677 - goto out_dquot; 678 - } 679 - 680 - ext2_debug ("goal=%lu.\n", goal); 681 - 976 + /* 977 + * First, test whether the goal block is free. 978 + */ 682 979 if (goal < le32_to_cpu(es->s_first_data_block) || 683 980 goal >= le32_to_cpu(es->s_blocks_count)) 684 981 goal = le32_to_cpu(es->s_first_data_block); 685 - group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size; 686 - desc = ext2_get_group_desc (sb, group_no, &gdp_bh); 687 - if (!desc) { 688 - /* 689 - * gdp_bh may still be uninitialised. But group_release_blocks 690 - * will not touch it because group_alloc is zero. 691 - */ 982 + group_no = (goal - le32_to_cpu(es->s_first_data_block)) / 983 + EXT2_BLOCKS_PER_GROUP(sb); 984 + goal_group = group_no; 985 + retry_alloc: 986 + gdp = ext2_get_group_desc(sb, group_no, &gdp_bh); 987 + if (!gdp) 692 988 goto io_error; 989 + 990 + free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 991 + /* 992 + * if there is not enough free blocks to make a new resevation 993 + * turn off reservation for this allocation 994 + */ 995 + if (my_rsv && (free_blocks < windowsz) 996 + && (rsv_is_empty(&my_rsv->rsv_window))) 997 + my_rsv = NULL; 998 + 999 + if (free_blocks > 0) { 1000 + grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) % 1001 + EXT2_BLOCKS_PER_GROUP(sb)); 1002 + bitmap_bh = read_block_bitmap(sb, group_no); 1003 + if (!bitmap_bh) 1004 + goto io_error; 1005 + grp_alloc_blk = ext2_try_to_allocate_with_rsv(sb, group_no, 1006 + bitmap_bh, grp_target_blk, 1007 + my_rsv, &num); 1008 + if (grp_alloc_blk >= 0) 1009 + goto allocated; 693 1010 } 694 1011 695 - group_alloc = group_reserve_blocks(sbi, group_no, desc, 696 - gdp_bh, es_alloc); 697 - if (group_alloc) { 698 - ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) % 699 - group_size); 1012 + ngroups = EXT2_SB(sb)->s_groups_count; 1013 + smp_rmb(); 1014 + 1015 + /* 1016 + * Now search the rest of the groups. We assume that 1017 + * i and gdp correctly point to the last group visited. 1018 + */ 1019 + for (bgi = 0; bgi < ngroups; bgi++) { 1020 + group_no++; 1021 + if (group_no >= ngroups) 1022 + group_no = 0; 1023 + gdp = ext2_get_group_desc(sb, group_no, &gdp_bh); 1024 + if (!gdp) 1025 + goto io_error; 1026 + 1027 + free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 1028 + /* 1029 + * skip this group if the number of 1030 + * free blocks is less than half of the reservation 1031 + * window size. 1032 + */ 1033 + if (free_blocks <= (windowsz/2)) 1034 + continue; 1035 + 700 1036 brelse(bitmap_bh); 701 1037 bitmap_bh = read_block_bitmap(sb, group_no); 702 1038 if (!bitmap_bh) 703 1039 goto io_error; 704 - 705 - ext2_debug("goal is at %d:%d.\n", group_no, ret_block); 706 - 707 - ret_block = grab_block(sb_bgl_lock(sbi, group_no), 708 - bitmap_bh->b_data, group_size, ret_block); 709 - if (ret_block >= 0) 710 - goto got_block; 711 - group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); 712 - group_alloc = 0; 1040 + /* 1041 + * try to allocate block(s) from this group, without a goal(-1). 1042 + */ 1043 + grp_alloc_blk = ext2_try_to_allocate_with_rsv(sb, group_no, 1044 + bitmap_bh, -1, my_rsv, &num); 1045 + if (grp_alloc_blk >= 0) 1046 + goto allocated; 713 1047 } 714 - 715 - ext2_debug ("Bit not found in block group %d.\n", group_no); 716 - 717 1048 /* 718 - * Now search the rest of the groups. We assume that 719 - * i and desc correctly point to the last group visited. 1049 + * We may end up a bogus ealier ENOSPC error due to 1050 + * filesystem is "full" of reservations, but 1051 + * there maybe indeed free blocks avaliable on disk 1052 + * In this case, we just forget about the reservations 1053 + * just do block allocation as without reservations. 720 1054 */ 721 - nr_scanned_groups = 0; 722 - retry: 723 - for (group_idx = 0; !group_alloc && 724 - group_idx < sbi->s_groups_count; group_idx++) { 725 - group_no++; 726 - if (group_no >= sbi->s_groups_count) 727 - group_no = 0; 728 - desc = ext2_get_group_desc(sb, group_no, &gdp_bh); 729 - if (!desc) 730 - goto io_error; 731 - group_alloc = group_reserve_blocks(sbi, group_no, desc, 732 - gdp_bh, es_alloc); 1055 + if (my_rsv) { 1056 + my_rsv = NULL; 1057 + windowsz = 0; 1058 + group_no = goal_group; 1059 + goto retry_alloc; 733 1060 } 734 - if (!group_alloc) { 735 - *err = -ENOSPC; 736 - goto out_release; 737 - } 738 - brelse(bitmap_bh); 739 - bitmap_bh = read_block_bitmap(sb, group_no); 740 - if (!bitmap_bh) 741 - goto io_error; 1061 + /* No space left on the device */ 1062 + *errp = -ENOSPC; 1063 + goto out; 742 1064 743 - ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data, 744 - group_size, 0); 745 - if (ret_block < 0) { 746 - /* 747 - * If a free block counter is corrupted we can loop inifintely. 748 - * Detect that here. 749 - */ 750 - nr_scanned_groups++; 751 - if (nr_scanned_groups > 2 * sbi->s_groups_count) { 752 - ext2_error(sb, "ext2_new_block", 753 - "corrupted free blocks counters"); 754 - goto io_error; 755 - } 756 - /* 757 - * Someone else grabbed the last free block in this blockgroup 758 - * before us. Retry the scan. 759 - */ 760 - group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); 761 - group_alloc = 0; 762 - goto retry; 763 - } 1065 + allocated: 764 1066 765 - got_block: 766 1067 ext2_debug("using block group %d(%d)\n", 767 - group_no, desc->bg_free_blocks_count); 1068 + group_no, gdp->bg_free_blocks_count); 768 1069 769 - target_block = ret_block + group_no * group_size + 770 - le32_to_cpu(es->s_first_data_block); 1070 + ret_block = grp_alloc_blk + ext2_group_first_block_no(sb, group_no); 771 1071 772 - if (target_block == le32_to_cpu(desc->bg_block_bitmap) || 773 - target_block == le32_to_cpu(desc->bg_inode_bitmap) || 774 - in_range(target_block, le32_to_cpu(desc->bg_inode_table), 775 - sbi->s_itb_per_group)) 776 - ext2_error (sb, "ext2_new_block", 1072 + if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) || 1073 + in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) || 1074 + in_range(ret_block, le32_to_cpu(gdp->bg_inode_table), 1075 + EXT2_SB(sb)->s_itb_per_group) || 1076 + in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table), 1077 + EXT2_SB(sb)->s_itb_per_group)) 1078 + ext2_error(sb, "ext2_new_blocks", 777 1079 "Allocating block in system zone - " 778 - "block = %u", target_block); 1080 + "blocks from "E2FSBLK", length %lu", 1081 + ret_block, num); 779 1082 780 - if (target_block >= le32_to_cpu(es->s_blocks_count)) { 781 - ext2_error (sb, "ext2_new_block", 782 - "block(%d) >= blocks count(%d) - " 1083 + performed_allocation = 1; 1084 + 1085 + if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) { 1086 + ext2_error(sb, "ext2_new_blocks", 1087 + "block("E2FSBLK") >= blocks count(%d) - " 783 1088 "block_group = %d, es == %p ", ret_block, 784 1089 le32_to_cpu(es->s_blocks_count), group_no, es); 785 - goto io_error; 1090 + goto out; 786 1091 } 787 - block = target_block; 788 1092 789 - /* OK, we _had_ allocated something */ 790 - ext2_debug("found bit %d\n", ret_block); 791 - 792 - dq_alloc--; 793 - es_alloc--; 794 - group_alloc--; 795 - 796 - /* 797 - * Do block preallocation now if required. 798 - */ 799 - write_lock(&EXT2_I(inode)->i_meta_lock); 800 - if (group_alloc && !*prealloc_count) { 801 - unsigned n; 802 - 803 - for (n = 0; n < group_alloc && ++ret_block < group_size; n++) { 804 - if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no), 805 - ret_block, 806 - (void*) bitmap_bh->b_data)) 807 - break; 808 - } 809 - *prealloc_block = block + 1; 810 - *prealloc_count = n; 811 - es_alloc -= n; 812 - dq_alloc -= n; 813 - group_alloc -= n; 814 - } 815 - write_unlock(&EXT2_I(inode)->i_meta_lock); 1093 + group_adjust_blocks(sb, group_no, gdp, gdp_bh, -num); 1094 + percpu_counter_sub(&sbi->s_freeblocks_counter, num); 816 1095 817 1096 mark_buffer_dirty(bitmap_bh); 818 1097 if (sb->s_flags & MS_SYNCHRONOUS) 819 1098 sync_dirty_buffer(bitmap_bh); 820 1099 821 - ext2_debug ("allocating block %d. ", block); 822 - 823 - *err = 0; 824 - out_release: 825 - group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); 826 - release_blocks(sb, es_alloc); 827 - out_dquot: 828 - DQUOT_FREE_BLOCK(inode, dq_alloc); 829 - out: 1100 + *errp = 0; 830 1101 brelse(bitmap_bh); 831 - return block; 1102 + DQUOT_FREE_BLOCK(inode, *count-num); 1103 + *count = num; 1104 + return ret_block; 832 1105 833 1106 io_error: 834 - *err = -EIO; 835 - goto out_release; 1107 + *errp = -EIO; 1108 + out: 1109 + /* 1110 + * Undo the block allocation 1111 + */ 1112 + if (!performed_allocation) 1113 + DQUOT_FREE_BLOCK(inode, *count); 1114 + brelse(bitmap_bh); 1115 + return 0; 1116 + } 1117 + 1118 + ext2_fsblk_t ext2_new_block(struct inode *inode, unsigned long goal, int *errp) 1119 + { 1120 + unsigned long count = 1; 1121 + 1122 + return ext2_new_blocks(inode, goal, &count, errp); 836 1123 } 837 1124 838 1125 #ifdef EXT2FS_DEBUG
+18 -18
fs/ext2/ext2.h
··· 33 33 */ 34 34 __u32 i_block_group; 35 35 36 - /* 37 - * i_next_alloc_block is the logical (file-relative) number of the 38 - * most-recently-allocated block in this file. Yes, it is misnamed. 39 - * We use this for detecting linearly ascending allocation requests. 40 - */ 41 - __u32 i_next_alloc_block; 36 + /* block reservation info */ 37 + struct ext2_block_alloc_info *i_block_alloc_info; 42 38 43 - /* 44 - * i_next_alloc_goal is the *physical* companion to i_next_alloc_block. 45 - * it the the physical block number of the block which was most-recently 46 - * allocated to this file. This give us the goal (target) for the next 47 - * allocation when we detect linearly ascending requests. 48 - */ 49 - __u32 i_next_alloc_goal; 50 - __u32 i_prealloc_block; 51 - __u32 i_prealloc_count; 52 39 __u32 i_dir_start_lookup; 53 40 #ifdef CONFIG_EXT2_FS_XATTR 54 41 /* ··· 52 65 struct posix_acl *i_default_acl; 53 66 #endif 54 67 rwlock_t i_meta_lock; 68 + 69 + /* 70 + * truncate_mutex is for serialising ext2_truncate() against 71 + * ext2_getblock(). It also protects the internals of the inode's 72 + * reservation data structures: ext2_reserve_window and 73 + * ext2_reserve_window_node. 74 + */ 75 + struct mutex truncate_mutex; 55 76 struct inode vfs_inode; 77 + struct list_head i_orphan; /* unlinked but open inodes */ 56 78 }; 57 79 58 80 /* ··· 87 91 /* balloc.c */ 88 92 extern int ext2_bg_has_super(struct super_block *sb, int group); 89 93 extern unsigned long ext2_bg_num_gdb(struct super_block *sb, int group); 90 - extern int ext2_new_block (struct inode *, unsigned long, 91 - __u32 *, __u32 *, int *); 94 + extern ext2_fsblk_t ext2_new_block(struct inode *, unsigned long, int *); 95 + extern ext2_fsblk_t ext2_new_blocks(struct inode *, unsigned long, 96 + unsigned long *, int *); 92 97 extern void ext2_free_blocks (struct inode *, unsigned long, 93 98 unsigned long); 94 99 extern unsigned long ext2_count_free_blocks (struct super_block *); ··· 98 101 extern struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, 99 102 unsigned int block_group, 100 103 struct buffer_head ** bh); 104 + extern void ext2_discard_reservation (struct inode *); 105 + extern int ext2_should_retry_alloc(struct super_block *sb, int *retries); 106 + extern void ext2_init_block_alloc_info(struct inode *); 107 + extern void ext2_rsv_window_add(struct super_block *sb, struct ext2_reserve_window_node *rsv); 101 108 102 109 /* dir.c */ 103 110 extern int ext2_add_link (struct dentry *, struct inode *); ··· 129 128 extern void ext2_put_inode (struct inode *); 130 129 extern void ext2_delete_inode (struct inode *); 131 130 extern int ext2_sync_inode (struct inode *); 132 - extern void ext2_discard_prealloc (struct inode *); 133 131 extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); 134 132 extern void ext2_truncate (struct inode *); 135 133 extern int ext2_setattr (struct dentry *, struct iattr *);
+5 -2
fs/ext2/file.c
··· 30 30 */ 31 31 static int ext2_release_file (struct inode * inode, struct file * filp) 32 32 { 33 - if (filp->f_mode & FMODE_WRITE) 34 - ext2_discard_prealloc (inode); 33 + if (filp->f_mode & FMODE_WRITE) { 34 + mutex_lock(&EXT2_I(inode)->truncate_mutex); 35 + ext2_discard_reservation(inode); 36 + mutex_unlock(&EXT2_I(inode)->truncate_mutex); 37 + } 35 38 return 0; 36 39 } 37 40
+1 -4
fs/ext2/ialloc.c
··· 581 581 ei->i_file_acl = 0; 582 582 ei->i_dir_acl = 0; 583 583 ei->i_dtime = 0; 584 + ei->i_block_alloc_info = NULL; 584 585 ei->i_block_group = group; 585 - ei->i_next_alloc_block = 0; 586 - ei->i_next_alloc_goal = 0; 587 - ei->i_prealloc_block = 0; 588 - ei->i_prealloc_count = 0; 589 586 ei->i_dir_start_lookup = 0; 590 587 ei->i_state = EXT2_STATE_NEW; 591 588 ext2_set_inode_flags(inode);
+307 -215
fs/ext2/inode.c
··· 54 54 } 55 55 56 56 /* 57 - * Called at each iput(). 58 - * 59 - * The inode may be "bad" if ext2_read_inode() saw an error from 60 - * ext2_get_inode(), so we need to check that to avoid freeing random disk 61 - * blocks. 62 - */ 63 - void ext2_put_inode(struct inode *inode) 64 - { 65 - if (!is_bad_inode(inode)) 66 - ext2_discard_prealloc(inode); 67 - } 68 - 69 - /* 70 57 * Called at the last iput() if i_nlink is zero. 71 58 */ 72 59 void ext2_delete_inode (struct inode * inode) ··· 74 87 return; 75 88 no_delete: 76 89 clear_inode(inode); /* We must guarantee clearing of inode... */ 77 - } 78 - 79 - void ext2_discard_prealloc (struct inode * inode) 80 - { 81 - #ifdef EXT2_PREALLOCATE 82 - struct ext2_inode_info *ei = EXT2_I(inode); 83 - write_lock(&ei->i_meta_lock); 84 - if (ei->i_prealloc_count) { 85 - unsigned short total = ei->i_prealloc_count; 86 - unsigned long block = ei->i_prealloc_block; 87 - ei->i_prealloc_count = 0; 88 - ei->i_prealloc_block = 0; 89 - write_unlock(&ei->i_meta_lock); 90 - ext2_free_blocks (inode, block, total); 91 - return; 92 - } else 93 - write_unlock(&ei->i_meta_lock); 94 - #endif 95 - } 96 - 97 - static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err) 98 - { 99 - #ifdef EXT2FS_DEBUG 100 - static unsigned long alloc_hits, alloc_attempts; 101 - #endif 102 - unsigned long result; 103 - 104 - 105 - #ifdef EXT2_PREALLOCATE 106 - struct ext2_inode_info *ei = EXT2_I(inode); 107 - write_lock(&ei->i_meta_lock); 108 - if (ei->i_prealloc_count && 109 - (goal == ei->i_prealloc_block || goal + 1 == ei->i_prealloc_block)) 110 - { 111 - result = ei->i_prealloc_block++; 112 - ei->i_prealloc_count--; 113 - write_unlock(&ei->i_meta_lock); 114 - ext2_debug ("preallocation hit (%lu/%lu).\n", 115 - ++alloc_hits, ++alloc_attempts); 116 - } else { 117 - write_unlock(&ei->i_meta_lock); 118 - ext2_discard_prealloc (inode); 119 - ext2_debug ("preallocation miss (%lu/%lu).\n", 120 - alloc_hits, ++alloc_attempts); 121 - if (S_ISREG(inode->i_mode)) 122 - result = ext2_new_block (inode, goal, 123 - &ei->i_prealloc_count, 124 - &ei->i_prealloc_block, err); 125 - else 126 - result = ext2_new_block(inode, goal, NULL, NULL, err); 127 - } 128 - #else 129 - result = ext2_new_block (inode, goal, 0, 0, err); 130 - #endif 131 - return result; 132 90 } 133 91 134 92 typedef struct { ··· 160 228 ext2_warning (inode->i_sb, "ext2_block_to_path", "block > big"); 161 229 } 162 230 if (boundary) 163 - *boundary = (i_block & (ptrs - 1)) == (final - 1); 231 + *boundary = final - 1 - (i_block & (ptrs - 1)); 232 + 164 233 return n; 165 234 } 166 235 ··· 288 355 * @block: block we want 289 356 * @chain: chain of indirect blocks 290 357 * @partial: pointer to the last triple within a chain 291 - * @goal: place to store the result. 292 358 * 293 - * Normally this function find the prefered place for block allocation, 294 - * stores it in *@goal and returns zero. If the branch had been changed 295 - * under us we return -EAGAIN. 359 + * Returns preferred place for a block (the goal). 296 360 */ 297 361 298 362 static inline int ext2_find_goal(struct inode *inode, 299 363 long block, 300 364 Indirect chain[4], 301 - Indirect *partial, 302 - unsigned long *goal) 365 + Indirect *partial) 303 366 { 304 - struct ext2_inode_info *ei = EXT2_I(inode); 305 - write_lock(&ei->i_meta_lock); 306 - if ((block == ei->i_next_alloc_block + 1) && ei->i_next_alloc_goal) { 307 - ei->i_next_alloc_block++; 308 - ei->i_next_alloc_goal++; 309 - } 310 - if (verify_chain(chain, partial)) { 311 - /* 312 - * try the heuristic for sequential allocation, 313 - * failing that at least try to get decent locality. 314 - */ 315 - if (block == ei->i_next_alloc_block) 316 - *goal = ei->i_next_alloc_goal; 317 - if (!*goal) 318 - *goal = ext2_find_near(inode, partial); 319 - write_unlock(&ei->i_meta_lock); 320 - return 0; 367 + struct ext2_block_alloc_info *block_i; 368 + 369 + block_i = EXT2_I(inode)->i_block_alloc_info; 370 + 371 + /* 372 + * try the heuristic for sequential allocation, 373 + * failing that at least try to get decent locality. 374 + */ 375 + if (block_i && (block == block_i->last_alloc_logical_block + 1) 376 + && (block_i->last_alloc_physical_block != 0)) { 377 + return block_i->last_alloc_physical_block + 1; 321 378 } 322 - write_unlock(&ei->i_meta_lock); 323 - return -EAGAIN; 379 + 380 + return ext2_find_near(inode, partial); 381 + } 382 + 383 + /** 384 + * ext2_blks_to_allocate: Look up the block map and count the number 385 + * of direct blocks need to be allocated for the given branch. 386 + * 387 + * @branch: chain of indirect blocks 388 + * @k: number of blocks need for indirect blocks 389 + * @blks: number of data blocks to be mapped. 390 + * @blocks_to_boundary: the offset in the indirect block 391 + * 392 + * return the total number of blocks to be allocate, including the 393 + * direct and indirect blocks. 394 + */ 395 + static int 396 + ext2_blks_to_allocate(Indirect * branch, int k, unsigned long blks, 397 + int blocks_to_boundary) 398 + { 399 + unsigned long count = 0; 400 + 401 + /* 402 + * Simple case, [t,d]Indirect block(s) has not allocated yet 403 + * then it's clear blocks on that path have not allocated 404 + */ 405 + if (k > 0) { 406 + /* right now don't hanel cross boundary allocation */ 407 + if (blks < blocks_to_boundary + 1) 408 + count += blks; 409 + else 410 + count += blocks_to_boundary + 1; 411 + return count; 412 + } 413 + 414 + count++; 415 + while (count < blks && count <= blocks_to_boundary 416 + && le32_to_cpu(*(branch[0].p + count)) == 0) { 417 + count++; 418 + } 419 + return count; 420 + } 421 + 422 + /** 423 + * ext2_alloc_blocks: multiple allocate blocks needed for a branch 424 + * @indirect_blks: the number of blocks need to allocate for indirect 425 + * blocks 426 + * 427 + * @new_blocks: on return it will store the new block numbers for 428 + * the indirect blocks(if needed) and the first direct block, 429 + * @blks: on return it will store the total number of allocated 430 + * direct blocks 431 + */ 432 + static int ext2_alloc_blocks(struct inode *inode, 433 + ext2_fsblk_t goal, int indirect_blks, int blks, 434 + ext2_fsblk_t new_blocks[4], int *err) 435 + { 436 + int target, i; 437 + unsigned long count = 0; 438 + int index = 0; 439 + ext2_fsblk_t current_block = 0; 440 + int ret = 0; 441 + 442 + /* 443 + * Here we try to allocate the requested multiple blocks at once, 444 + * on a best-effort basis. 445 + * To build a branch, we should allocate blocks for 446 + * the indirect blocks(if not allocated yet), and at least 447 + * the first direct block of this branch. That's the 448 + * minimum number of blocks need to allocate(required) 449 + */ 450 + target = blks + indirect_blks; 451 + 452 + while (1) { 453 + count = target; 454 + /* allocating blocks for indirect blocks and direct blocks */ 455 + current_block = ext2_new_blocks(inode,goal,&count,err); 456 + if (*err) 457 + goto failed_out; 458 + 459 + target -= count; 460 + /* allocate blocks for indirect blocks */ 461 + while (index < indirect_blks && count) { 462 + new_blocks[index++] = current_block++; 463 + count--; 464 + } 465 + 466 + if (count > 0) 467 + break; 468 + } 469 + 470 + /* save the new block number for the first direct block */ 471 + new_blocks[index] = current_block; 472 + 473 + /* total number of blocks allocated for direct blocks */ 474 + ret = count; 475 + *err = 0; 476 + return ret; 477 + failed_out: 478 + for (i = 0; i <index; i++) 479 + ext2_free_blocks(inode, new_blocks[i], 1); 480 + return ret; 324 481 } 325 482 326 483 /** ··· 439 416 */ 440 417 441 418 static int ext2_alloc_branch(struct inode *inode, 442 - int num, 443 - unsigned long goal, 444 - int *offsets, 445 - Indirect *branch) 419 + int indirect_blks, int *blks, ext2_fsblk_t goal, 420 + int *offsets, Indirect *branch) 446 421 { 447 422 int blocksize = inode->i_sb->s_blocksize; 448 - int n = 0; 449 - int err; 450 - int i; 451 - int parent = ext2_alloc_block(inode, goal, &err); 423 + int i, n = 0; 424 + int err = 0; 425 + struct buffer_head *bh; 426 + int num; 427 + ext2_fsblk_t new_blocks[4]; 428 + ext2_fsblk_t current_block; 452 429 453 - branch[0].key = cpu_to_le32(parent); 454 - if (parent) for (n = 1; n < num; n++) { 455 - struct buffer_head *bh; 456 - /* Allocate the next block */ 457 - int nr = ext2_alloc_block(inode, parent, &err); 458 - if (!nr) 459 - break; 460 - branch[n].key = cpu_to_le32(nr); 430 + num = ext2_alloc_blocks(inode, goal, indirect_blks, 431 + *blks, new_blocks, &err); 432 + if (err) 433 + return err; 434 + 435 + branch[0].key = cpu_to_le32(new_blocks[0]); 436 + /* 437 + * metadata blocks and data blocks are allocated. 438 + */ 439 + for (n = 1; n <= indirect_blks; n++) { 461 440 /* 462 - * Get buffer_head for parent block, zero it out and set 463 - * the pointer to new one, then send parent to disk. 441 + * Get buffer_head for parent block, zero it out 442 + * and set the pointer to new one, then send 443 + * parent to disk. 464 444 */ 465 - bh = sb_getblk(inode->i_sb, parent); 466 - if (!bh) { 467 - err = -EIO; 468 - break; 469 - } 445 + bh = sb_getblk(inode->i_sb, new_blocks[n-1]); 446 + branch[n].bh = bh; 470 447 lock_buffer(bh); 471 448 memset(bh->b_data, 0, blocksize); 472 - branch[n].bh = bh; 473 449 branch[n].p = (__le32 *) bh->b_data + offsets[n]; 450 + branch[n].key = cpu_to_le32(new_blocks[n]); 474 451 *branch[n].p = branch[n].key; 452 + if ( n == indirect_blks) { 453 + current_block = new_blocks[n]; 454 + /* 455 + * End of chain, update the last new metablock of 456 + * the chain to point to the new allocated 457 + * data blocks numbers 458 + */ 459 + for (i=1; i < num; i++) 460 + *(branch[n].p + i) = cpu_to_le32(++current_block); 461 + } 475 462 set_buffer_uptodate(bh); 476 463 unlock_buffer(bh); 477 464 mark_buffer_dirty_inode(bh, inode); ··· 491 458 */ 492 459 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) 493 460 sync_dirty_buffer(bh); 494 - parent = nr; 495 461 } 496 - if (n == num) 497 - return 0; 498 - 499 - /* Allocation failed, free what we already allocated */ 500 - for (i = 1; i < n; i++) 501 - bforget(branch[i].bh); 502 - for (i = 0; i < n; i++) 503 - ext2_free_blocks(inode, le32_to_cpu(branch[i].key), 1); 462 + *blks = num; 504 463 return err; 505 464 } 506 465 507 466 /** 508 - * ext2_splice_branch - splice the allocated branch onto inode. 509 - * @inode: owner 510 - * @block: (logical) number of block we are adding 511 - * @chain: chain of indirect blocks (with a missing link - see 512 - * ext2_alloc_branch) 513 - * @where: location of missing link 514 - * @num: number of blocks we are adding 467 + * ext2_splice_branch - splice the allocated branch onto inode. 468 + * @inode: owner 469 + * @block: (logical) number of block we are adding 470 + * @chain: chain of indirect blocks (with a missing link - see 471 + * ext2_alloc_branch) 472 + * @where: location of missing link 473 + * @num: number of indirect blocks we are adding 474 + * @blks: number of direct blocks we are adding 515 475 * 516 - * This function verifies that chain (up to the missing link) had not 517 - * changed, fills the missing link and does all housekeeping needed in 518 - * inode (->i_blocks, etc.). In case of success we end up with the full 519 - * chain to new block and return 0. Otherwise (== chain had been changed) 520 - * we free the new blocks (forgetting their buffer_heads, indeed) and 521 - * return -EAGAIN. 476 + * This function fills the missing link and does all housekeeping needed in 477 + * inode (->i_blocks, etc.). In case of success we end up with the full 478 + * chain to new block and return 0. 522 479 */ 523 - 524 - static inline int ext2_splice_branch(struct inode *inode, 525 - long block, 526 - Indirect chain[4], 527 - Indirect *where, 528 - int num) 480 + static void ext2_splice_branch(struct inode *inode, 481 + long block, Indirect *where, int num, int blks) 529 482 { 530 - struct ext2_inode_info *ei = EXT2_I(inode); 531 483 int i; 484 + struct ext2_block_alloc_info *block_i; 485 + ext2_fsblk_t current_block; 532 486 533 - /* Verify that place we are splicing to is still there and vacant */ 487 + block_i = EXT2_I(inode)->i_block_alloc_info; 534 488 535 - write_lock(&ei->i_meta_lock); 536 - if (!verify_chain(chain, where-1) || *where->p) 537 - goto changed; 538 - 489 + /* XXX LOCKING probably should have i_meta_lock ?*/ 539 490 /* That's it */ 540 491 541 492 *where->p = where->key; 542 - ei->i_next_alloc_block = block; 543 - ei->i_next_alloc_goal = le32_to_cpu(where[num-1].key); 544 493 545 - write_unlock(&ei->i_meta_lock); 494 + /* 495 + * Update the host buffer_head or inode to point to more just allocated 496 + * direct blocks blocks 497 + */ 498 + if (num == 0 && blks > 1) { 499 + current_block = le32_to_cpu(where->key) + 1; 500 + for (i = 1; i < blks; i++) 501 + *(where->p + i ) = cpu_to_le32(current_block++); 502 + } 503 + 504 + /* 505 + * update the most recently allocated logical & physical block 506 + * in i_block_alloc_info, to assist find the proper goal block for next 507 + * allocation 508 + */ 509 + if (block_i) { 510 + block_i->last_alloc_logical_block = block + blks - 1; 511 + block_i->last_alloc_physical_block = 512 + le32_to_cpu(where[num].key) + blks - 1; 513 + } 546 514 547 515 /* We are done with atomic stuff, now do the rest of housekeeping */ 548 - 549 - inode->i_ctime = CURRENT_TIME_SEC; 550 516 551 517 /* had we spliced it onto indirect block? */ 552 518 if (where->bh) 553 519 mark_buffer_dirty_inode(where->bh, inode); 554 520 521 + inode->i_ctime = CURRENT_TIME_SEC; 555 522 mark_inode_dirty(inode); 556 - return 0; 557 - 558 - changed: 559 - write_unlock(&ei->i_meta_lock); 560 - for (i = 1; i < num; i++) 561 - bforget(where[i].bh); 562 - for (i = 0; i < num; i++) 563 - ext2_free_blocks(inode, le32_to_cpu(where[i].key), 1); 564 - return -EAGAIN; 565 523 } 566 524 567 525 /* ··· 566 542 * That has a nice additional property: no special recovery from the failed 567 543 * allocations is needed - we simply release blocks and do not touch anything 568 544 * reachable from inode. 545 + * 546 + * `handle' can be NULL if create == 0. 547 + * 548 + * The BKL may not be held on entry here. Be sure to take it early. 549 + * return > 0, # of blocks mapped or allocated. 550 + * return = 0, if plain lookup failed. 551 + * return < 0, error case. 569 552 */ 570 - 571 - int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) 553 + static int ext2_get_blocks(struct inode *inode, 554 + sector_t iblock, unsigned long maxblocks, 555 + struct buffer_head *bh_result, 556 + int create) 572 557 { 573 558 int err = -EIO; 574 559 int offsets[4]; 575 560 Indirect chain[4]; 576 561 Indirect *partial; 577 - unsigned long goal; 578 - int left; 579 - int boundary = 0; 580 - int depth = ext2_block_to_path(inode, iblock, offsets, &boundary); 562 + ext2_fsblk_t goal; 563 + int indirect_blks; 564 + int blocks_to_boundary = 0; 565 + int depth; 566 + struct ext2_inode_info *ei = EXT2_I(inode); 567 + int count = 0; 568 + ext2_fsblk_t first_block = 0; 569 + 570 + depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary); 581 571 582 572 if (depth == 0) 583 - goto out; 584 - 573 + return (err); 585 574 reread: 586 575 partial = ext2_get_branch(inode, depth, offsets, chain, &err); 587 576 588 577 /* Simplest case - block found, no allocation needed */ 589 578 if (!partial) { 590 - got_it: 591 - map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); 592 - if (boundary) 593 - set_buffer_boundary(bh_result); 594 - /* Clean up and exit */ 595 - partial = chain+depth-1; /* the whole chain */ 596 - goto cleanup; 579 + first_block = le32_to_cpu(chain[depth - 1].key); 580 + clear_buffer_new(bh_result); /* What's this do? */ 581 + count++; 582 + /*map more blocks*/ 583 + while (count < maxblocks && count <= blocks_to_boundary) { 584 + ext2_fsblk_t blk; 585 + 586 + if (!verify_chain(chain, partial)) { 587 + /* 588 + * Indirect block might be removed by 589 + * truncate while we were reading it. 590 + * Handling of that case: forget what we've 591 + * got now, go to reread. 592 + */ 593 + count = 0; 594 + goto changed; 595 + } 596 + blk = le32_to_cpu(*(chain[depth-1].p + count)); 597 + if (blk == first_block + count) 598 + count++; 599 + else 600 + break; 601 + } 602 + goto got_it; 597 603 } 598 604 599 605 /* Next simple case - plain lookup or failed read of indirect block */ 600 - if (!create || err == -EIO) { 601 - cleanup: 602 - while (partial > chain) { 603 - brelse(partial->bh); 604 - partial--; 605 - } 606 - out: 607 - return err; 608 - } 606 + if (!create || err == -EIO) 607 + goto cleanup; 608 + 609 + mutex_lock(&ei->truncate_mutex); 609 610 610 611 /* 611 - * Indirect block might be removed by truncate while we were 612 - * reading it. Handling of that case (forget what we've got and 613 - * reread) is taken out of the main path. 612 + * Okay, we need to do block allocation. Lazily initialize the block 613 + * allocation info here if necessary 614 + */ 615 + if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) 616 + ext2_init_block_alloc_info(inode); 617 + 618 + goal = ext2_find_goal(inode, iblock, chain, partial); 619 + 620 + /* the number of blocks need to allocate for [d,t]indirect blocks */ 621 + indirect_blks = (chain + depth) - partial - 1; 622 + /* 623 + * Next look up the indirect map to count the totoal number of 624 + * direct blocks to allocate for this branch. 614 625 */ 615 - if (err == -EAGAIN) 616 - goto changed; 626 + count = ext2_blks_to_allocate(partial, indirect_blks, 627 + maxblocks, blocks_to_boundary); 628 + /* 629 + * XXX ???? Block out ext2_truncate while we alter the tree 630 + */ 631 + err = ext2_alloc_branch(inode, indirect_blks, &count, goal, 632 + offsets + (partial - chain), partial); 617 633 618 - goal = 0; 619 - if (ext2_find_goal(inode, iblock, chain, partial, &goal) < 0) 620 - goto changed; 621 - 622 - left = (chain + depth) - partial; 623 - err = ext2_alloc_branch(inode, left, goal, 624 - offsets+(partial-chain), partial); 625 - if (err) 634 + if (err) { 635 + mutex_unlock(&ei->truncate_mutex); 626 636 goto cleanup; 637 + } 627 638 628 639 if (ext2_use_xip(inode->i_sb)) { 629 640 /* ··· 666 607 */ 667 608 err = ext2_clear_xip_target (inode, 668 609 le32_to_cpu(chain[depth-1].key)); 669 - if (err) 610 + if (err) { 611 + mutex_unlock(&ei->truncate_mutex); 670 612 goto cleanup; 613 + } 671 614 } 672 615 673 - if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0) 674 - goto changed; 675 - 616 + ext2_splice_branch(inode, iblock, partial, indirect_blks, count); 617 + mutex_unlock(&ei->truncate_mutex); 676 618 set_buffer_new(bh_result); 677 - goto got_it; 678 - 619 + got_it: 620 + map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); 621 + if (count > blocks_to_boundary) 622 + set_buffer_boundary(bh_result); 623 + err = count; 624 + /* Clean up and exit */ 625 + partial = chain + depth - 1; /* the whole chain */ 626 + cleanup: 627 + while (partial > chain) { 628 + brelse(partial->bh); 629 + partial--; 630 + } 631 + return err; 679 632 changed: 680 633 while (partial > chain) { 681 634 brelse(partial->bh); 682 635 partial--; 683 636 } 684 637 goto reread; 638 + } 639 + 640 + int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) 641 + { 642 + unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 643 + int ret = ext2_get_blocks(inode, iblock, max_blocks, 644 + bh_result, create); 645 + if (ret > 0) { 646 + bh_result->b_size = (ret << inode->i_blkbits); 647 + ret = 0; 648 + } 649 + return ret; 650 + 685 651 } 686 652 687 653 static int ext2_writepage(struct page *page, struct writeback_control *wbc) ··· 997 913 ext2_free_data(inode, p, q); 998 914 } 999 915 1000 - void ext2_truncate (struct inode * inode) 916 + void ext2_truncate(struct inode *inode) 1001 917 { 1002 918 __le32 *i_data = EXT2_I(inode)->i_data; 919 + struct ext2_inode_info *ei = EXT2_I(inode); 1003 920 int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); 1004 921 int offsets[4]; 1005 922 Indirect chain[4]; ··· 1018 933 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 1019 934 return; 1020 935 1021 - ext2_discard_prealloc(inode); 1022 - 1023 936 blocksize = inode->i_sb->s_blocksize; 1024 937 iblock = (inode->i_size + blocksize-1) 1025 938 >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); ··· 1034 951 n = ext2_block_to_path(inode, iblock, offsets, NULL); 1035 952 if (n == 0) 1036 953 return; 954 + 955 + /* 956 + * From here we block out all ext2_get_block() callers who want to 957 + * modify the block allocation tree. 958 + */ 959 + mutex_lock(&ei->truncate_mutex); 1037 960 1038 961 if (n == 1) { 1039 962 ext2_free_data(inode, i_data+offsets[0], ··· 1093 1004 case EXT2_TIND_BLOCK: 1094 1005 ; 1095 1006 } 1007 + 1008 + ext2_discard_reservation(inode); 1009 + 1010 + mutex_unlock(&ei->truncate_mutex); 1096 1011 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; 1097 1012 if (inode_needs_sync(inode)) { 1098 1013 sync_mapping_buffers(inode->i_mapping); ··· 1197 1104 ei->i_acl = EXT2_ACL_NOT_CACHED; 1198 1105 ei->i_default_acl = EXT2_ACL_NOT_CACHED; 1199 1106 #endif 1107 + ei->i_block_alloc_info = NULL; 1108 + 1200 1109 if (IS_ERR(raw_inode)) 1201 1110 goto bad_inode; 1202 1111 ··· 1240 1145 ei->i_dtime = 0; 1241 1146 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 1242 1147 ei->i_state = 0; 1243 - ei->i_next_alloc_block = 0; 1244 - ei->i_next_alloc_goal = 0; 1245 - ei->i_prealloc_count = 0; 1246 1148 ei->i_block_group = (ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb); 1247 1149 ei->i_dir_start_lookup = 0; 1248 1150
+45
fs/ext2/ioctl.c
··· 22 22 { 23 23 struct ext2_inode_info *ei = EXT2_I(inode); 24 24 unsigned int flags; 25 + unsigned short rsv_window_size; 25 26 26 27 ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg); 27 28 ··· 84 83 inode->i_ctime = CURRENT_TIME_SEC; 85 84 mark_inode_dirty(inode); 86 85 return 0; 86 + case EXT2_IOC_GETRSVSZ: 87 + if (test_opt(inode->i_sb, RESERVATION) 88 + && S_ISREG(inode->i_mode) 89 + && ei->i_block_alloc_info) { 90 + rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size; 91 + return put_user(rsv_window_size, (int __user *)arg); 92 + } 93 + return -ENOTTY; 94 + case EXT2_IOC_SETRSVSZ: { 95 + 96 + if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) 97 + return -ENOTTY; 98 + 99 + if (IS_RDONLY(inode)) 100 + return -EROFS; 101 + 102 + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) 103 + return -EACCES; 104 + 105 + if (get_user(rsv_window_size, (int __user *)arg)) 106 + return -EFAULT; 107 + 108 + if (rsv_window_size > EXT2_MAX_RESERVE_BLOCKS) 109 + rsv_window_size = EXT2_MAX_RESERVE_BLOCKS; 110 + 111 + /* 112 + * need to allocate reservation structure for this inode 113 + * before set the window size 114 + */ 115 + /* 116 + * XXX What lock should protect the rsv_goal_size? 117 + * Accessed in ext2_get_block only. ext3 uses i_truncate. 118 + */ 119 + mutex_lock(&ei->truncate_mutex); 120 + if (!ei->i_block_alloc_info) 121 + ext2_init_block_alloc_info(inode); 122 + 123 + if (ei->i_block_alloc_info){ 124 + struct ext2_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; 125 + rsv->rsv_goal_size = rsv_window_size; 126 + } 127 + mutex_unlock(&ei->truncate_mutex); 128 + return 0; 129 + } 87 130 default: 88 131 return -ENOTTY; 89 132 }
+37 -4
fs/ext2/super.c
··· 149 149 ei->i_acl = EXT2_ACL_NOT_CACHED; 150 150 ei->i_default_acl = EXT2_ACL_NOT_CACHED; 151 151 #endif 152 + ei->i_block_alloc_info = NULL; 152 153 ei->vfs_inode.i_version = 1; 153 154 return &ei->vfs_inode; 154 155 } ··· 167 166 #ifdef CONFIG_EXT2_FS_XATTR 168 167 init_rwsem(&ei->xattr_sem); 169 168 #endif 169 + mutex_init(&ei->truncate_mutex); 170 170 inode_init_once(&ei->vfs_inode); 171 171 } 172 172 ··· 190 188 191 189 static void ext2_clear_inode(struct inode *inode) 192 190 { 191 + struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info; 193 192 #ifdef CONFIG_EXT2_FS_POSIX_ACL 194 193 struct ext2_inode_info *ei = EXT2_I(inode); 195 194 ··· 203 200 ei->i_default_acl = EXT2_ACL_NOT_CACHED; 204 201 } 205 202 #endif 203 + ext2_discard_reservation(inode); 204 + EXT2_I(inode)->i_block_alloc_info = NULL; 205 + if (unlikely(rsv)) 206 + kfree(rsv); 206 207 } 207 208 208 209 static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) ··· 298 291 .destroy_inode = ext2_destroy_inode, 299 292 .read_inode = ext2_read_inode, 300 293 .write_inode = ext2_write_inode, 301 - .put_inode = ext2_put_inode, 302 294 .delete_inode = ext2_delete_inode, 303 295 .put_super = ext2_put_super, 304 296 .write_super = ext2_write_super, ··· 385 379 Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug, 386 380 Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr, 387 381 Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota, 388 - Opt_usrquota, Opt_grpquota 382 + Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation 389 383 }; 390 384 391 385 static match_table_t tokens = { ··· 417 411 {Opt_ignore, "noquota"}, 418 412 {Opt_quota, "quota"}, 419 413 {Opt_usrquota, "usrquota"}, 414 + {Opt_reservation, "reservation"}, 415 + {Opt_noreservation, "noreservation"}, 420 416 {Opt_err, NULL} 421 417 }; 422 418 ··· 551 543 break; 552 544 #endif 553 545 546 + case Opt_reservation: 547 + set_opt(sbi->s_mount_opt, RESERVATION); 548 + printk("reservations ON\n"); 549 + break; 550 + case Opt_noreservation: 551 + clear_opt(sbi->s_mount_opt, RESERVATION); 552 + printk("reservations OFF\n"); 553 + break; 554 554 case Opt_ignore: 555 555 break; 556 556 default: ··· 800 784 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 801 785 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 802 786 787 + set_opt(sbi->s_mount_opt, RESERVATION); 788 + 803 789 if (!parse_options ((char *) data, sbi)) 804 790 goto failed_mount; 805 791 ··· 982 964 sbi->s_gdb_count = db_count; 983 965 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 984 966 spin_lock_init(&sbi->s_next_gen_lock); 967 + 968 + /* per fileystem reservation list head & lock */ 969 + spin_lock_init(&sbi->s_rsv_window_lock); 970 + sbi->s_rsv_window_root = RB_ROOT; 971 + /* 972 + * Add a single, static dummy reservation to the start of the 973 + * reservation window list --- it gives us a placeholder for 974 + * append-at-start-of-list which makes the allocation logic 975 + * _much_ simpler. 976 + */ 977 + sbi->s_rsv_window_head.rsv_start = EXT2_RESERVE_WINDOW_NOT_ALLOCATED; 978 + sbi->s_rsv_window_head.rsv_end = EXT2_RESERVE_WINDOW_NOT_ALLOCATED; 979 + sbi->s_rsv_window_head.rsv_alloc_hit = 0; 980 + sbi->s_rsv_window_head.rsv_goal_size = 0; 981 + ext2_rsv_window_add(sb, &sbi->s_rsv_window_head); 985 982 986 983 err = percpu_counter_init(&sbi->s_freeblocks_counter, 987 984 ext2_count_free_blocks(sb)); ··· 1293 1260 1294 1261 tmp_bh.b_state = 0; 1295 1262 err = ext2_get_block(inode, blk, &tmp_bh, 0); 1296 - if (err) 1263 + if (err < 0) 1297 1264 return err; 1298 1265 if (!buffer_mapped(&tmp_bh)) /* A hole? */ 1299 1266 memset(data, 0, tocopy); ··· 1332 1299 1333 1300 tmp_bh.b_state = 0; 1334 1301 err = ext2_get_block(inode, blk, &tmp_bh, 1); 1335 - if (err) 1302 + if (err < 0) 1336 1303 goto out; 1337 1304 if (offset || tocopy != EXT2_BLOCK_SIZE(sb)) 1338 1305 bh = sb_bread(sb, tmp_bh.b_blocknr);
+1 -2
fs/ext2/xattr.c
··· 664 664 s_first_data_block) + 665 665 EXT2_I(inode)->i_block_group * 666 666 EXT2_BLOCKS_PER_GROUP(sb); 667 - int block = ext2_new_block(inode, goal, 668 - NULL, NULL, &error); 667 + int block = ext2_new_block(inode, goal, &error); 669 668 if (error) 670 669 goto cleanup; 671 670 ea_idebug(inode, "creating block %d", block);
+17 -6
include/linux/ext2_fs.h
··· 29 29 #undef EXT2FS_DEBUG 30 30 31 31 /* 32 - * Define EXT2_PREALLOCATE to preallocate data blocks for expanding files 32 + * Define EXT2_RESERVATION to reserve data blocks for expanding files 33 33 */ 34 - #define EXT2_PREALLOCATE 35 - #define EXT2_DEFAULT_PREALLOC_BLOCKS 8 36 - 34 + #define EXT2_DEFAULT_RESERVE_BLOCKS 8 35 + /*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */ 36 + #define EXT2_MAX_RESERVE_BLOCKS 1027 37 + #define EXT2_RESERVE_WINDOW_NOT_ALLOCATED 0 37 38 /* 38 39 * The second extended file system version 39 40 */ ··· 201 200 #define EXT2_IOC_SETFLAGS FS_IOC_SETFLAGS 202 201 #define EXT2_IOC_GETVERSION FS_IOC_GETVERSION 203 202 #define EXT2_IOC_SETVERSION FS_IOC_SETVERSION 203 + #define EXT2_IOC_GETRSVSZ _IOR('f', 5, long) 204 + #define EXT2_IOC_SETRSVSZ _IOW('f', 6, long) 204 205 205 206 /* 206 207 * ioctl commands in 32 bit emulation ··· 320 317 #define EXT2_MOUNT_XATTR_USER 0x004000 /* Extended user attributes */ 321 318 #define EXT2_MOUNT_POSIX_ACL 0x008000 /* POSIX Access Control Lists */ 322 319 #define EXT2_MOUNT_XIP 0x010000 /* Execute in place */ 323 - #define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */ 324 - #define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */ 320 + #define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */ 321 + #define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */ 322 + #define EXT2_MOUNT_RESERVATION 0x080000 /* Preallocation */ 325 323 326 324 327 325 #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt ··· 561 557 #define EXT2_DIR_ROUND (EXT2_DIR_PAD - 1) 562 558 #define EXT2_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT2_DIR_ROUND) & \ 563 559 ~EXT2_DIR_ROUND) 560 + 561 + static inline ext2_fsblk_t 562 + ext2_group_first_block_no(struct super_block *sb, unsigned long group_no) 563 + { 564 + return group_no * (ext2_fsblk_t)EXT2_BLOCKS_PER_GROUP(sb) + 565 + le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block); 566 + } 564 567 565 568 #endif /* _LINUX_EXT2_FS_H */
+50
include/linux/ext2_fs_sb.h
··· 18 18 19 19 #include <linux/blockgroup_lock.h> 20 20 #include <linux/percpu_counter.h> 21 + #include <linux/rbtree.h> 22 + 23 + /* XXX Here for now... not interested in restructing headers JUST now */ 24 + 25 + /* data type for block offset of block group */ 26 + typedef int ext2_grpblk_t; 27 + 28 + /* data type for filesystem-wide blocks number */ 29 + typedef unsigned long ext2_fsblk_t; 30 + 31 + #define E2FSBLK "%lu" 32 + 33 + struct ext2_reserve_window { 34 + ext2_fsblk_t _rsv_start; /* First byte reserved */ 35 + ext2_fsblk_t _rsv_end; /* Last byte reserved or 0 */ 36 + }; 37 + 38 + struct ext2_reserve_window_node { 39 + struct rb_node rsv_node; 40 + __u32 rsv_goal_size; 41 + __u32 rsv_alloc_hit; 42 + struct ext2_reserve_window rsv_window; 43 + }; 44 + 45 + struct ext2_block_alloc_info { 46 + /* information about reservation window */ 47 + struct ext2_reserve_window_node rsv_window_node; 48 + /* 49 + * was i_next_alloc_block in ext2_inode_info 50 + * is the logical (file-relative) number of the 51 + * most-recently-allocated block in this file. 52 + * We use this for detecting linearly ascending allocation requests. 53 + */ 54 + __u32 last_alloc_logical_block; 55 + /* 56 + * Was i_next_alloc_goal in ext2_inode_info 57 + * is the *physical* companion to i_next_alloc_block. 58 + * it the the physical block number of the block which was most-recentl 59 + * allocated to this file. This give us the goal (target) for the next 60 + * allocation when we detect linearly ascending requests. 61 + */ 62 + ext2_fsblk_t last_alloc_physical_block; 63 + }; 64 + 65 + #define rsv_start rsv_window._rsv_start 66 + #define rsv_end rsv_window._rsv_end 21 67 22 68 /* 23 69 * second extended-fs super-block data in memory ··· 102 56 struct percpu_counter s_freeinodes_counter; 103 57 struct percpu_counter s_dirs_counter; 104 58 struct blockgroup_lock s_blockgroup_lock; 59 + /* root of the per fs reservation window tree */ 60 + spinlock_t s_rsv_window_lock; 61 + struct rb_root s_rsv_window_root; 62 + struct ext2_reserve_window_node s_rsv_window_head; 105 63 }; 106 64 107 65 #endif /* _LINUX_EXT2_FS_SB */