at v2.6.13 988 lines 31 kB view raw
1/* 2 * lcnalloc.c - Cluster (de)allocation code. Part of the Linux-NTFS project. 3 * 4 * Copyright (c) 2004-2005 Anton Altaparmakov 5 * 6 * This program/include file is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License as published 8 * by the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program/include file is distributed in the hope that it will be 12 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program (in the main directory of the Linux-NTFS 18 * distribution in the file COPYING); if not, write to the Free Software 19 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 */ 21 22#ifdef NTFS_RW 23 24#include <linux/pagemap.h> 25 26#include "lcnalloc.h" 27#include "debug.h" 28#include "bitmap.h" 29#include "inode.h" 30#include "volume.h" 31#include "attrib.h" 32#include "malloc.h" 33#include "aops.h" 34#include "ntfs.h" 35 36/** 37 * ntfs_cluster_free_from_rl_nolock - free clusters from runlist 38 * @vol: mounted ntfs volume on which to free the clusters 39 * @rl: runlist describing the clusters to free 40 * 41 * Free all the clusters described by the runlist @rl on the volume @vol. In 42 * the case of an error being returned, at least some of the clusters were not 43 * freed. 44 * 45 * Return 0 on success and -errno on error. 46 * 47 * Locking: - The volume lcn bitmap must be locked for writing on entry and is 48 * left locked on return. 49 */ 50int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, 51 const runlist_element *rl) 52{ 53 struct inode *lcnbmp_vi = vol->lcnbmp_ino; 54 int ret = 0; 55 56 ntfs_debug("Entering."); 57 for (; rl->length; rl++) { 58 int err; 59 60 if (rl->lcn < 0) 61 continue; 62 err = ntfs_bitmap_clear_run(lcnbmp_vi, rl->lcn, rl->length); 63 if (unlikely(err && (!ret || ret == -ENOMEM) && ret != err)) 64 ret = err; 65 } 66 ntfs_debug("Done."); 67 return ret; 68} 69 70/** 71 * ntfs_cluster_alloc - allocate clusters on an ntfs volume 72 * @vol: mounted ntfs volume on which to allocate the clusters 73 * @start_vcn: vcn to use for the first allocated cluster 74 * @count: number of clusters to allocate 75 * @start_lcn: starting lcn at which to allocate the clusters (or -1 if none) 76 * @zone: zone from which to allocate the clusters 77 * 78 * Allocate @count clusters preferably starting at cluster @start_lcn or at the 79 * current allocator position if @start_lcn is -1, on the mounted ntfs volume 80 * @vol. @zone is either DATA_ZONE for allocation of normal clusters or 81 * MFT_ZONE for allocation of clusters for the master file table, i.e. the 82 * $MFT/$DATA attribute. 83 * 84 * @start_vcn specifies the vcn of the first allocated cluster. This makes 85 * merging the resulting runlist with the old runlist easier. 86 * 87 * You need to check the return value with IS_ERR(). If this is false, the 88 * function was successful and the return value is a runlist describing the 89 * allocated cluster(s). If IS_ERR() is true, the function failed and 90 * PTR_ERR() gives you the error code. 91 * 92 * Notes on the allocation algorithm 93 * ================================= 94 * 95 * There are two data zones. First is the area between the end of the mft zone 96 * and the end of the volume, and second is the area between the start of the 97 * volume and the start of the mft zone. On unmodified/standard NTFS 1.x 98 * volumes, the second data zone does not exist due to the mft zone being 99 * expanded to cover the start of the volume in order to reserve space for the 100 * mft bitmap attribute. 101 * 102 * This is not the prettiest function but the complexity stems from the need of 103 * implementing the mft vs data zoned approach and from the fact that we have 104 * access to the lcn bitmap in portions of up to 8192 bytes at a time, so we 105 * need to cope with crossing over boundaries of two buffers. Further, the 106 * fact that the allocator allows for caller supplied hints as to the location 107 * of where allocation should begin and the fact that the allocator keeps track 108 * of where in the data zones the next natural allocation should occur, 109 * contribute to the complexity of the function. But it should all be 110 * worthwhile, because this allocator should: 1) be a full implementation of 111 * the MFT zone approach used by Windows NT, 2) cause reduction in 112 * fragmentation, and 3) be speedy in allocations (the code is not optimized 113 * for speed, but the algorithm is, so further speed improvements are probably 114 * possible). 115 * 116 * FIXME: We should be monitoring cluster allocation and increment the MFT zone 117 * size dynamically but this is something for the future. We will just cause 118 * heavier fragmentation by not doing it and I am not even sure Windows would 119 * grow the MFT zone dynamically, so it might even be correct not to do this. 120 * The overhead in doing dynamic MFT zone expansion would be very large and 121 * unlikely worth the effort. (AIA) 122 * 123 * TODO: I have added in double the required zone position pointer wrap around 124 * logic which can be optimized to having only one of the two logic sets. 125 * However, having the double logic will work fine, but if we have only one of 126 * the sets and we get it wrong somewhere, then we get into trouble, so 127 * removing the duplicate logic requires _very_ careful consideration of _all_ 128 * possible code paths. So at least for now, I am leaving the double logic - 129 * better safe than sorry... (AIA) 130 * 131 * Locking: - The volume lcn bitmap must be unlocked on entry and is unlocked 132 * on return. 133 * - This function takes the volume lcn bitmap lock for writing and 134 * modifies the bitmap contents. 135 */ 136runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn, 137 const s64 count, const LCN start_lcn, 138 const NTFS_CLUSTER_ALLOCATION_ZONES zone) 139{ 140 LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn; 141 LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size; 142 s64 clusters; 143 loff_t i_size; 144 struct inode *lcnbmp_vi; 145 runlist_element *rl = NULL; 146 struct address_space *mapping; 147 struct page *page = NULL; 148 u8 *buf, *byte; 149 int err = 0, rlpos, rlsize, buf_size; 150 u8 pass, done_zones, search_zone, need_writeback = 0, bit; 151 152 ntfs_debug("Entering for start_vcn 0x%llx, count 0x%llx, start_lcn " 153 "0x%llx, zone %s_ZONE.", (unsigned long long)start_vcn, 154 (unsigned long long)count, 155 (unsigned long long)start_lcn, 156 zone == MFT_ZONE ? "MFT" : "DATA"); 157 BUG_ON(!vol); 158 lcnbmp_vi = vol->lcnbmp_ino; 159 BUG_ON(!lcnbmp_vi); 160 BUG_ON(start_vcn < 0); 161 BUG_ON(count < 0); 162 BUG_ON(start_lcn < -1); 163 BUG_ON(zone < FIRST_ZONE); 164 BUG_ON(zone > LAST_ZONE); 165 166 /* Return empty runlist if @count == 0 */ 167 // FIXME: Do we want to just return NULL instead? (AIA) 168 if (!count) { 169 rl = ntfs_malloc_nofs(PAGE_SIZE); 170 if (!rl) 171 return ERR_PTR(-ENOMEM); 172 rl[0].vcn = start_vcn; 173 rl[0].lcn = LCN_RL_NOT_MAPPED; 174 rl[0].length = 0; 175 return rl; 176 } 177 /* Take the lcnbmp lock for writing. */ 178 down_write(&vol->lcnbmp_lock); 179 /* 180 * If no specific @start_lcn was requested, use the current data zone 181 * position, otherwise use the requested @start_lcn but make sure it 182 * lies outside the mft zone. Also set done_zones to 0 (no zones done) 183 * and pass depending on whether we are starting inside a zone (1) or 184 * at the beginning of a zone (2). If requesting from the MFT_ZONE, 185 * we either start at the current position within the mft zone or at 186 * the specified position. If the latter is out of bounds then we start 187 * at the beginning of the MFT_ZONE. 188 */ 189 done_zones = 0; 190 pass = 1; 191 /* 192 * zone_start and zone_end are the current search range. search_zone 193 * is 1 for mft zone, 2 for data zone 1 (end of mft zone till end of 194 * volume) and 4 for data zone 2 (start of volume till start of mft 195 * zone). 196 */ 197 zone_start = start_lcn; 198 if (zone_start < 0) { 199 if (zone == DATA_ZONE) 200 zone_start = vol->data1_zone_pos; 201 else 202 zone_start = vol->mft_zone_pos; 203 if (!zone_start) { 204 /* 205 * Zone starts at beginning of volume which means a 206 * single pass is sufficient. 207 */ 208 pass = 2; 209 } 210 } else if (zone == DATA_ZONE && zone_start >= vol->mft_zone_start && 211 zone_start < vol->mft_zone_end) { 212 zone_start = vol->mft_zone_end; 213 /* 214 * Starting at beginning of data1_zone which means a single 215 * pass in this zone is sufficient. 216 */ 217 pass = 2; 218 } else if (zone == MFT_ZONE && (zone_start < vol->mft_zone_start || 219 zone_start >= vol->mft_zone_end)) { 220 zone_start = vol->mft_lcn; 221 if (!vol->mft_zone_end) 222 zone_start = 0; 223 /* 224 * Starting at beginning of volume which means a single pass 225 * is sufficient. 226 */ 227 pass = 2; 228 } 229 if (zone == MFT_ZONE) { 230 zone_end = vol->mft_zone_end; 231 search_zone = 1; 232 } else /* if (zone == DATA_ZONE) */ { 233 /* Skip searching the mft zone. */ 234 done_zones |= 1; 235 if (zone_start >= vol->mft_zone_end) { 236 zone_end = vol->nr_clusters; 237 search_zone = 2; 238 } else { 239 zone_end = vol->mft_zone_start; 240 search_zone = 4; 241 } 242 } 243 /* 244 * bmp_pos is the current bit position inside the bitmap. We use 245 * bmp_initial_pos to determine whether or not to do a zone switch. 246 */ 247 bmp_pos = bmp_initial_pos = zone_start; 248 249 /* Loop until all clusters are allocated, i.e. clusters == 0. */ 250 clusters = count; 251 rlpos = rlsize = 0; 252 mapping = lcnbmp_vi->i_mapping; 253 i_size = i_size_read(lcnbmp_vi); 254 while (1) { 255 ntfs_debug("Start of outer while loop: done_zones 0x%x, " 256 "search_zone %i, pass %i, zone_start 0x%llx, " 257 "zone_end 0x%llx, bmp_initial_pos 0x%llx, " 258 "bmp_pos 0x%llx, rlpos %i, rlsize %i.", 259 done_zones, search_zone, pass, 260 (unsigned long long)zone_start, 261 (unsigned long long)zone_end, 262 (unsigned long long)bmp_initial_pos, 263 (unsigned long long)bmp_pos, rlpos, rlsize); 264 /* Loop until we run out of free clusters. */ 265 last_read_pos = bmp_pos >> 3; 266 ntfs_debug("last_read_pos 0x%llx.", 267 (unsigned long long)last_read_pos); 268 if (last_read_pos > i_size) { 269 ntfs_debug("End of attribute reached. " 270 "Skipping to zone_pass_done."); 271 goto zone_pass_done; 272 } 273 if (likely(page)) { 274 if (need_writeback) { 275 ntfs_debug("Marking page dirty."); 276 flush_dcache_page(page); 277 set_page_dirty(page); 278 need_writeback = 0; 279 } 280 ntfs_unmap_page(page); 281 } 282 page = ntfs_map_page(mapping, last_read_pos >> 283 PAGE_CACHE_SHIFT); 284 if (IS_ERR(page)) { 285 err = PTR_ERR(page); 286 ntfs_error(vol->sb, "Failed to map page."); 287 goto out; 288 } 289 buf_size = last_read_pos & ~PAGE_CACHE_MASK; 290 buf = page_address(page) + buf_size; 291 buf_size = PAGE_CACHE_SIZE - buf_size; 292 if (unlikely(last_read_pos + buf_size > i_size)) 293 buf_size = i_size - last_read_pos; 294 buf_size <<= 3; 295 lcn = bmp_pos & 7; 296 bmp_pos &= ~(LCN)7; 297 ntfs_debug("Before inner while loop: buf_size %i, lcn 0x%llx, " 298 "bmp_pos 0x%llx, need_writeback %i.", buf_size, 299 (unsigned long long)lcn, 300 (unsigned long long)bmp_pos, need_writeback); 301 while (lcn < buf_size && lcn + bmp_pos < zone_end) { 302 byte = buf + (lcn >> 3); 303 ntfs_debug("In inner while loop: buf_size %i, " 304 "lcn 0x%llx, bmp_pos 0x%llx, " 305 "need_writeback %i, byte ofs 0x%x, " 306 "*byte 0x%x.", buf_size, 307 (unsigned long long)lcn, 308 (unsigned long long)bmp_pos, 309 need_writeback, 310 (unsigned int)(lcn >> 3), 311 (unsigned int)*byte); 312 /* Skip full bytes. */ 313 if (*byte == 0xff) { 314 lcn = (lcn + 8) & ~(LCN)7; 315 ntfs_debug("Continuing while loop 1."); 316 continue; 317 } 318 bit = 1 << (lcn & 7); 319 ntfs_debug("bit %i.", bit); 320 /* If the bit is already set, go onto the next one. */ 321 if (*byte & bit) { 322 lcn++; 323 ntfs_debug("Continuing while loop 2."); 324 continue; 325 } 326 /* 327 * Allocate more memory if needed, including space for 328 * the terminator element. 329 * ntfs_malloc_nofs() operates on whole pages only. 330 */ 331 if ((rlpos + 2) * sizeof(*rl) > rlsize) { 332 runlist_element *rl2; 333 334 ntfs_debug("Reallocating memory."); 335 if (!rl) 336 ntfs_debug("First free bit is at LCN " 337 "0x%llx.", 338 (unsigned long long) 339 (lcn + bmp_pos)); 340 rl2 = ntfs_malloc_nofs(rlsize + (int)PAGE_SIZE); 341 if (unlikely(!rl2)) { 342 err = -ENOMEM; 343 ntfs_error(vol->sb, "Failed to " 344 "allocate memory."); 345 goto out; 346 } 347 memcpy(rl2, rl, rlsize); 348 ntfs_free(rl); 349 rl = rl2; 350 rlsize += PAGE_SIZE; 351 ntfs_debug("Reallocated memory, rlsize 0x%x.", 352 rlsize); 353 } 354 /* Allocate the bitmap bit. */ 355 *byte |= bit; 356 /* We need to write this bitmap page to disk. */ 357 need_writeback = 1; 358 ntfs_debug("*byte 0x%x, need_writeback is set.", 359 (unsigned int)*byte); 360 /* 361 * Coalesce with previous run if adjacent LCNs. 362 * Otherwise, append a new run. 363 */ 364 ntfs_debug("Adding run (lcn 0x%llx, len 0x%llx), " 365 "prev_lcn 0x%llx, lcn 0x%llx, " 366 "bmp_pos 0x%llx, prev_run_len 0x%llx, " 367 "rlpos %i.", 368 (unsigned long long)(lcn + bmp_pos), 369 1ULL, (unsigned long long)prev_lcn, 370 (unsigned long long)lcn, 371 (unsigned long long)bmp_pos, 372 (unsigned long long)prev_run_len, 373 rlpos); 374 if (prev_lcn == lcn + bmp_pos - prev_run_len && rlpos) { 375 ntfs_debug("Coalescing to run (lcn 0x%llx, " 376 "len 0x%llx).", 377 (unsigned long long) 378 rl[rlpos - 1].lcn, 379 (unsigned long long) 380 rl[rlpos - 1].length); 381 rl[rlpos - 1].length = ++prev_run_len; 382 ntfs_debug("Run now (lcn 0x%llx, len 0x%llx), " 383 "prev_run_len 0x%llx.", 384 (unsigned long long) 385 rl[rlpos - 1].lcn, 386 (unsigned long long) 387 rl[rlpos - 1].length, 388 (unsigned long long) 389 prev_run_len); 390 } else { 391 if (likely(rlpos)) { 392 ntfs_debug("Adding new run, (previous " 393 "run lcn 0x%llx, " 394 "len 0x%llx).", 395 (unsigned long long) 396 rl[rlpos - 1].lcn, 397 (unsigned long long) 398 rl[rlpos - 1].length); 399 rl[rlpos].vcn = rl[rlpos - 1].vcn + 400 prev_run_len; 401 } else { 402 ntfs_debug("Adding new run, is first " 403 "run."); 404 rl[rlpos].vcn = start_vcn; 405 } 406 rl[rlpos].lcn = prev_lcn = lcn + bmp_pos; 407 rl[rlpos].length = prev_run_len = 1; 408 rlpos++; 409 } 410 /* Done? */ 411 if (!--clusters) { 412 LCN tc; 413 /* 414 * Update the current zone position. Positions 415 * of already scanned zones have been updated 416 * during the respective zone switches. 417 */ 418 tc = lcn + bmp_pos + 1; 419 ntfs_debug("Done. Updating current zone " 420 "position, tc 0x%llx, " 421 "search_zone %i.", 422 (unsigned long long)tc, 423 search_zone); 424 switch (search_zone) { 425 case 1: 426 ntfs_debug("Before checks, " 427 "vol->mft_zone_pos " 428 "0x%llx.", 429 (unsigned long long) 430 vol->mft_zone_pos); 431 if (tc >= vol->mft_zone_end) { 432 vol->mft_zone_pos = 433 vol->mft_lcn; 434 if (!vol->mft_zone_end) 435 vol->mft_zone_pos = 0; 436 } else if ((bmp_initial_pos >= 437 vol->mft_zone_pos || 438 tc > vol->mft_zone_pos) 439 && tc >= vol->mft_lcn) 440 vol->mft_zone_pos = tc; 441 ntfs_debug("After checks, " 442 "vol->mft_zone_pos " 443 "0x%llx.", 444 (unsigned long long) 445 vol->mft_zone_pos); 446 break; 447 case 2: 448 ntfs_debug("Before checks, " 449 "vol->data1_zone_pos " 450 "0x%llx.", 451 (unsigned long long) 452 vol->data1_zone_pos); 453 if (tc >= vol->nr_clusters) 454 vol->data1_zone_pos = 455 vol->mft_zone_end; 456 else if ((bmp_initial_pos >= 457 vol->data1_zone_pos || 458 tc > vol->data1_zone_pos) 459 && tc >= vol->mft_zone_end) 460 vol->data1_zone_pos = tc; 461 ntfs_debug("After checks, " 462 "vol->data1_zone_pos " 463 "0x%llx.", 464 (unsigned long long) 465 vol->data1_zone_pos); 466 break; 467 case 4: 468 ntfs_debug("Before checks, " 469 "vol->data2_zone_pos " 470 "0x%llx.", 471 (unsigned long long) 472 vol->data2_zone_pos); 473 if (tc >= vol->mft_zone_start) 474 vol->data2_zone_pos = 0; 475 else if (bmp_initial_pos >= 476 vol->data2_zone_pos || 477 tc > vol->data2_zone_pos) 478 vol->data2_zone_pos = tc; 479 ntfs_debug("After checks, " 480 "vol->data2_zone_pos " 481 "0x%llx.", 482 (unsigned long long) 483 vol->data2_zone_pos); 484 break; 485 default: 486 BUG(); 487 } 488 ntfs_debug("Finished. Going to out."); 489 goto out; 490 } 491 lcn++; 492 } 493 bmp_pos += buf_size; 494 ntfs_debug("After inner while loop: buf_size 0x%x, lcn " 495 "0x%llx, bmp_pos 0x%llx, need_writeback %i.", 496 buf_size, (unsigned long long)lcn, 497 (unsigned long long)bmp_pos, need_writeback); 498 if (bmp_pos < zone_end) { 499 ntfs_debug("Continuing outer while loop, " 500 "bmp_pos 0x%llx, zone_end 0x%llx.", 501 (unsigned long long)bmp_pos, 502 (unsigned long long)zone_end); 503 continue; 504 } 505zone_pass_done: /* Finished with the current zone pass. */ 506 ntfs_debug("At zone_pass_done, pass %i.", pass); 507 if (pass == 1) { 508 /* 509 * Now do pass 2, scanning the first part of the zone 510 * we omitted in pass 1. 511 */ 512 pass = 2; 513 zone_end = zone_start; 514 switch (search_zone) { 515 case 1: /* mft_zone */ 516 zone_start = vol->mft_zone_start; 517 break; 518 case 2: /* data1_zone */ 519 zone_start = vol->mft_zone_end; 520 break; 521 case 4: /* data2_zone */ 522 zone_start = 0; 523 break; 524 default: 525 BUG(); 526 } 527 /* Sanity check. */ 528 if (zone_end < zone_start) 529 zone_end = zone_start; 530 bmp_pos = zone_start; 531 ntfs_debug("Continuing outer while loop, pass 2, " 532 "zone_start 0x%llx, zone_end 0x%llx, " 533 "bmp_pos 0x%llx.", 534 (unsigned long long)zone_start, 535 (unsigned long long)zone_end, 536 (unsigned long long)bmp_pos); 537 continue; 538 } /* pass == 2 */ 539done_zones_check: 540 ntfs_debug("At done_zones_check, search_zone %i, done_zones " 541 "before 0x%x, done_zones after 0x%x.", 542 search_zone, done_zones, 543 done_zones | search_zone); 544 done_zones |= search_zone; 545 if (done_zones < 7) { 546 ntfs_debug("Switching zone."); 547 /* Now switch to the next zone we haven't done yet. */ 548 pass = 1; 549 switch (search_zone) { 550 case 1: 551 ntfs_debug("Switching from mft zone to data1 " 552 "zone."); 553 /* Update mft zone position. */ 554 if (rlpos) { 555 LCN tc; 556 557 ntfs_debug("Before checks, " 558 "vol->mft_zone_pos " 559 "0x%llx.", 560 (unsigned long long) 561 vol->mft_zone_pos); 562 tc = rl[rlpos - 1].lcn + 563 rl[rlpos - 1].length; 564 if (tc >= vol->mft_zone_end) { 565 vol->mft_zone_pos = 566 vol->mft_lcn; 567 if (!vol->mft_zone_end) 568 vol->mft_zone_pos = 0; 569 } else if ((bmp_initial_pos >= 570 vol->mft_zone_pos || 571 tc > vol->mft_zone_pos) 572 && tc >= vol->mft_lcn) 573 vol->mft_zone_pos = tc; 574 ntfs_debug("After checks, " 575 "vol->mft_zone_pos " 576 "0x%llx.", 577 (unsigned long long) 578 vol->mft_zone_pos); 579 } 580 /* Switch from mft zone to data1 zone. */ 581switch_to_data1_zone: search_zone = 2; 582 zone_start = bmp_initial_pos = 583 vol->data1_zone_pos; 584 zone_end = vol->nr_clusters; 585 if (zone_start == vol->mft_zone_end) 586 pass = 2; 587 if (zone_start >= zone_end) { 588 vol->data1_zone_pos = zone_start = 589 vol->mft_zone_end; 590 pass = 2; 591 } 592 break; 593 case 2: 594 ntfs_debug("Switching from data1 zone to " 595 "data2 zone."); 596 /* Update data1 zone position. */ 597 if (rlpos) { 598 LCN tc; 599 600 ntfs_debug("Before checks, " 601 "vol->data1_zone_pos " 602 "0x%llx.", 603 (unsigned long long) 604 vol->data1_zone_pos); 605 tc = rl[rlpos - 1].lcn + 606 rl[rlpos - 1].length; 607 if (tc >= vol->nr_clusters) 608 vol->data1_zone_pos = 609 vol->mft_zone_end; 610 else if ((bmp_initial_pos >= 611 vol->data1_zone_pos || 612 tc > vol->data1_zone_pos) 613 && tc >= vol->mft_zone_end) 614 vol->data1_zone_pos = tc; 615 ntfs_debug("After checks, " 616 "vol->data1_zone_pos " 617 "0x%llx.", 618 (unsigned long long) 619 vol->data1_zone_pos); 620 } 621 /* Switch from data1 zone to data2 zone. */ 622 search_zone = 4; 623 zone_start = bmp_initial_pos = 624 vol->data2_zone_pos; 625 zone_end = vol->mft_zone_start; 626 if (!zone_start) 627 pass = 2; 628 if (zone_start >= zone_end) { 629 vol->data2_zone_pos = zone_start = 630 bmp_initial_pos = 0; 631 pass = 2; 632 } 633 break; 634 case 4: 635 ntfs_debug("Switching from data2 zone to " 636 "data1 zone."); 637 /* Update data2 zone position. */ 638 if (rlpos) { 639 LCN tc; 640 641 ntfs_debug("Before checks, " 642 "vol->data2_zone_pos " 643 "0x%llx.", 644 (unsigned long long) 645 vol->data2_zone_pos); 646 tc = rl[rlpos - 1].lcn + 647 rl[rlpos - 1].length; 648 if (tc >= vol->mft_zone_start) 649 vol->data2_zone_pos = 0; 650 else if (bmp_initial_pos >= 651 vol->data2_zone_pos || 652 tc > vol->data2_zone_pos) 653 vol->data2_zone_pos = tc; 654 ntfs_debug("After checks, " 655 "vol->data2_zone_pos " 656 "0x%llx.", 657 (unsigned long long) 658 vol->data2_zone_pos); 659 } 660 /* Switch from data2 zone to data1 zone. */ 661 goto switch_to_data1_zone; 662 default: 663 BUG(); 664 } 665 ntfs_debug("After zone switch, search_zone %i, " 666 "pass %i, bmp_initial_pos 0x%llx, " 667 "zone_start 0x%llx, zone_end 0x%llx.", 668 search_zone, pass, 669 (unsigned long long)bmp_initial_pos, 670 (unsigned long long)zone_start, 671 (unsigned long long)zone_end); 672 bmp_pos = zone_start; 673 if (zone_start == zone_end) { 674 ntfs_debug("Empty zone, going to " 675 "done_zones_check."); 676 /* Empty zone. Don't bother searching it. */ 677 goto done_zones_check; 678 } 679 ntfs_debug("Continuing outer while loop."); 680 continue; 681 } /* done_zones == 7 */ 682 ntfs_debug("All zones are finished."); 683 /* 684 * All zones are finished! If DATA_ZONE, shrink mft zone. If 685 * MFT_ZONE, we have really run out of space. 686 */ 687 mft_zone_size = vol->mft_zone_end - vol->mft_zone_start; 688 ntfs_debug("vol->mft_zone_start 0x%llx, vol->mft_zone_end " 689 "0x%llx, mft_zone_size 0x%llx.", 690 (unsigned long long)vol->mft_zone_start, 691 (unsigned long long)vol->mft_zone_end, 692 (unsigned long long)mft_zone_size); 693 if (zone == MFT_ZONE || mft_zone_size <= 0) { 694 ntfs_debug("No free clusters left, going to out."); 695 /* Really no more space left on device. */ 696 err = -ENOSPC; 697 goto out; 698 } /* zone == DATA_ZONE && mft_zone_size > 0 */ 699 ntfs_debug("Shrinking mft zone."); 700 zone_end = vol->mft_zone_end; 701 mft_zone_size >>= 1; 702 if (mft_zone_size > 0) 703 vol->mft_zone_end = vol->mft_zone_start + mft_zone_size; 704 else /* mft zone and data2 zone no longer exist. */ 705 vol->data2_zone_pos = vol->mft_zone_start = 706 vol->mft_zone_end = 0; 707 if (vol->mft_zone_pos >= vol->mft_zone_end) { 708 vol->mft_zone_pos = vol->mft_lcn; 709 if (!vol->mft_zone_end) 710 vol->mft_zone_pos = 0; 711 } 712 bmp_pos = zone_start = bmp_initial_pos = 713 vol->data1_zone_pos = vol->mft_zone_end; 714 search_zone = 2; 715 pass = 2; 716 done_zones &= ~2; 717 ntfs_debug("After shrinking mft zone, mft_zone_size 0x%llx, " 718 "vol->mft_zone_start 0x%llx, " 719 "vol->mft_zone_end 0x%llx, " 720 "vol->mft_zone_pos 0x%llx, search_zone 2, " 721 "pass 2, dones_zones 0x%x, zone_start 0x%llx, " 722 "zone_end 0x%llx, vol->data1_zone_pos 0x%llx, " 723 "continuing outer while loop.", 724 (unsigned long long)mft_zone_size, 725 (unsigned long long)vol->mft_zone_start, 726 (unsigned long long)vol->mft_zone_end, 727 (unsigned long long)vol->mft_zone_pos, 728 done_zones, (unsigned long long)zone_start, 729 (unsigned long long)zone_end, 730 (unsigned long long)vol->data1_zone_pos); 731 } 732 ntfs_debug("After outer while loop."); 733out: 734 ntfs_debug("At out."); 735 /* Add runlist terminator element. */ 736 if (likely(rl)) { 737 rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length; 738 rl[rlpos].lcn = LCN_RL_NOT_MAPPED; 739 rl[rlpos].length = 0; 740 } 741 if (likely(page && !IS_ERR(page))) { 742 if (need_writeback) { 743 ntfs_debug("Marking page dirty."); 744 flush_dcache_page(page); 745 set_page_dirty(page); 746 need_writeback = 0; 747 } 748 ntfs_unmap_page(page); 749 } 750 if (likely(!err)) { 751 up_write(&vol->lcnbmp_lock); 752 ntfs_debug("Done."); 753 return rl; 754 } 755 ntfs_error(vol->sb, "Failed to allocate clusters, aborting " 756 "(error %i).", err); 757 if (rl) { 758 int err2; 759 760 if (err == -ENOSPC) 761 ntfs_debug("Not enough space to complete allocation, " 762 "err -ENOSPC, first free lcn 0x%llx, " 763 "could allocate up to 0x%llx " 764 "clusters.", 765 (unsigned long long)rl[0].lcn, 766 (unsigned long long)(count - clusters)); 767 /* Deallocate all allocated clusters. */ 768 ntfs_debug("Attempting rollback..."); 769 err2 = ntfs_cluster_free_from_rl_nolock(vol, rl); 770 if (err2) { 771 ntfs_error(vol->sb, "Failed to rollback (error %i). " 772 "Leaving inconsistent metadata! " 773 "Unmount and run chkdsk.", err2); 774 NVolSetErrors(vol); 775 } 776 /* Free the runlist. */ 777 ntfs_free(rl); 778 } else if (err == -ENOSPC) 779 ntfs_debug("No space left at all, err = -ENOSPC, first free " 780 "lcn = 0x%llx.", 781 (long long)vol->data1_zone_pos); 782 up_write(&vol->lcnbmp_lock); 783 return ERR_PTR(err); 784} 785 786/** 787 * __ntfs_cluster_free - free clusters on an ntfs volume 788 * @vi: vfs inode whose runlist describes the clusters to free 789 * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters 790 * @count: number of clusters to free or -1 for all clusters 791 * @is_rollback: if TRUE this is a rollback operation 792 * 793 * Free @count clusters starting at the cluster @start_vcn in the runlist 794 * described by the vfs inode @vi. 795 * 796 * If @count is -1, all clusters from @start_vcn to the end of the runlist are 797 * deallocated. Thus, to completely free all clusters in a runlist, use 798 * @start_vcn = 0 and @count = -1. 799 * 800 * @is_rollback should always be FALSE, it is for internal use to rollback 801 * errors. You probably want to use ntfs_cluster_free() instead. 802 * 803 * Note, ntfs_cluster_free() does not modify the runlist at all, so the caller 804 * has to deal with it later. 805 * 806 * Return the number of deallocated clusters (not counting sparse ones) on 807 * success and -errno on error. 808 * 809 * Locking: - The runlist described by @vi must be unlocked on entry and is 810 * unlocked on return. 811 * - This function takes the runlist lock of @vi for reading and 812 * sometimes for writing and sometimes modifies the runlist. 813 * - The volume lcn bitmap must be unlocked on entry and is unlocked 814 * on return. 815 * - This function takes the volume lcn bitmap lock for writing and 816 * modifies the bitmap contents. 817 */ 818s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, 819 const BOOL is_rollback) 820{ 821 s64 delta, to_free, total_freed, real_freed; 822 ntfs_inode *ni; 823 ntfs_volume *vol; 824 struct inode *lcnbmp_vi; 825 runlist_element *rl; 826 int err; 827 828 BUG_ON(!vi); 829 ntfs_debug("Entering for i_ino 0x%lx, start_vcn 0x%llx, count " 830 "0x%llx.%s", vi->i_ino, (unsigned long long)start_vcn, 831 (unsigned long long)count, 832 is_rollback ? " (rollback)" : ""); 833 ni = NTFS_I(vi); 834 vol = ni->vol; 835 lcnbmp_vi = vol->lcnbmp_ino; 836 BUG_ON(!lcnbmp_vi); 837 BUG_ON(start_vcn < 0); 838 BUG_ON(count < -1); 839 /* 840 * Lock the lcn bitmap for writing but only if not rolling back. We 841 * must hold the lock all the way including through rollback otherwise 842 * rollback is not possible because once we have cleared a bit and 843 * dropped the lock, anyone could have set the bit again, thus 844 * allocating the cluster for another use. 845 */ 846 if (likely(!is_rollback)) 847 down_write(&vol->lcnbmp_lock); 848 849 total_freed = real_freed = 0; 850 851 down_read(&ni->runlist.lock); 852 rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, FALSE); 853 if (IS_ERR(rl)) { 854 if (!is_rollback) 855 ntfs_error(vol->sb, "Failed to find first runlist " 856 "element (error %li), aborting.", 857 PTR_ERR(rl)); 858 err = PTR_ERR(rl); 859 goto err_out; 860 } 861 if (unlikely(rl->lcn < LCN_HOLE)) { 862 if (!is_rollback) 863 ntfs_error(vol->sb, "First runlist element has " 864 "invalid lcn, aborting."); 865 err = -EIO; 866 goto err_out; 867 } 868 /* Find the starting cluster inside the run that needs freeing. */ 869 delta = start_vcn - rl->vcn; 870 871 /* The number of clusters in this run that need freeing. */ 872 to_free = rl->length - delta; 873 if (count >= 0 && to_free > count) 874 to_free = count; 875 876 if (likely(rl->lcn >= 0)) { 877 /* Do the actual freeing of the clusters in this run. */ 878 err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn + delta, 879 to_free, likely(!is_rollback) ? 0 : 1); 880 if (unlikely(err)) { 881 if (!is_rollback) 882 ntfs_error(vol->sb, "Failed to clear first run " 883 "(error %i), aborting.", err); 884 goto err_out; 885 } 886 /* We have freed @to_free real clusters. */ 887 real_freed = to_free; 888 }; 889 /* Go to the next run and adjust the number of clusters left to free. */ 890 ++rl; 891 if (count >= 0) 892 count -= to_free; 893 894 /* Keep track of the total "freed" clusters, including sparse ones. */ 895 total_freed = to_free; 896 /* 897 * Loop over the remaining runs, using @count as a capping value, and 898 * free them. 899 */ 900 for (; rl->length && count != 0; ++rl) { 901 if (unlikely(rl->lcn < LCN_HOLE)) { 902 VCN vcn; 903 904 /* Attempt to map runlist. */ 905 vcn = rl->vcn; 906 rl = ntfs_attr_find_vcn_nolock(ni, vcn, FALSE); 907 if (IS_ERR(rl)) { 908 err = PTR_ERR(rl); 909 if (!is_rollback) 910 ntfs_error(vol->sb, "Failed to map " 911 "runlist fragment or " 912 "failed to find " 913 "subsequent runlist " 914 "element."); 915 goto err_out; 916 } 917 if (unlikely(rl->lcn < LCN_HOLE)) { 918 if (!is_rollback) 919 ntfs_error(vol->sb, "Runlist element " 920 "has invalid lcn " 921 "(0x%llx).", 922 (unsigned long long) 923 rl->lcn); 924 err = -EIO; 925 goto err_out; 926 } 927 } 928 /* The number of clusters in this run that need freeing. */ 929 to_free = rl->length; 930 if (count >= 0 && to_free > count) 931 to_free = count; 932 933 if (likely(rl->lcn >= 0)) { 934 /* Do the actual freeing of the clusters in the run. */ 935 err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn, 936 to_free, likely(!is_rollback) ? 0 : 1); 937 if (unlikely(err)) { 938 if (!is_rollback) 939 ntfs_error(vol->sb, "Failed to clear " 940 "subsequent run."); 941 goto err_out; 942 } 943 /* We have freed @to_free real clusters. */ 944 real_freed += to_free; 945 } 946 /* Adjust the number of clusters left to free. */ 947 if (count >= 0) 948 count -= to_free; 949 950 /* Update the total done clusters. */ 951 total_freed += to_free; 952 } 953 up_read(&ni->runlist.lock); 954 if (likely(!is_rollback)) 955 up_write(&vol->lcnbmp_lock); 956 957 BUG_ON(count > 0); 958 959 /* We are done. Return the number of actually freed clusters. */ 960 ntfs_debug("Done."); 961 return real_freed; 962err_out: 963 up_read(&ni->runlist.lock); 964 if (is_rollback) 965 return err; 966 /* If no real clusters were freed, no need to rollback. */ 967 if (!real_freed) { 968 up_write(&vol->lcnbmp_lock); 969 return err; 970 } 971 /* 972 * Attempt to rollback and if that succeeds just return the error code. 973 * If rollback fails, set the volume errors flag, emit an error 974 * message, and return the error code. 975 */ 976 delta = __ntfs_cluster_free(vi, start_vcn, total_freed, TRUE); 977 if (delta < 0) { 978 ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving " 979 "inconsistent metadata! Unmount and run " 980 "chkdsk.", (int)delta); 981 NVolSetErrors(vol); 982 } 983 up_write(&vol->lcnbmp_lock); 984 ntfs_error(vol->sb, "Aborting (error %i).", err); 985 return err; 986} 987 988#endif /* NTFS_RW */