Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'linux-next' of git://git.infradead.org/ubifs-2.6

* 'linux-next' of git://git.infradead.org/ubifs-2.6: (52 commits)
UBIFS: switch to dynamic printks
UBIFS: fix kernel-doc comments
UBIFS: fix extremely rare mount failure
UBIFS: simplify LEB recovery function further
UBIFS: always cleanup the recovered LEB
UBIFS: clean up LEB recovery function
UBIFS: fix-up free space on mount if flag is set
UBIFS: add the fixup function
UBIFS: add a superblock flag for free space fix-up
UBIFS: share the next_log_lnum helper
UBIFS: expect corruption only in last journal head LEBs
UBIFS: synchronize write-buffer before switching to the next bud
UBIFS: remove BUG statement
UBIFS: change bud replay function conventions
UBIFS: substitute the replay tree with a replay list
UBIFS: simplify replay
UBIFS: store free and dirty space in the bud replay entry
UBIFS: remove unnecessary stack variable
UBIFS: double check that buds are replied in order
UBIFS: make 2 functions static
...

+1115 -932
+2 -24
Documentation/filesystems/ubifs.txt
··· 115 115 Module Parameters for Debugging 116 116 =============================== 117 117 118 - When UBIFS has been compiled with debugging enabled, there are 3 module 118 + When UBIFS has been compiled with debugging enabled, there are 2 module 119 119 parameters that are available to control aspects of testing and debugging. 120 - The parameters are unsigned integers where each bit controls an option. 121 - The parameters are: 122 - 123 - debug_msgs Selects which debug messages to display, as follows: 124 - 125 - Message Type Flag value 126 - 127 - General messages 1 128 - Journal messages 2 129 - Mount messages 4 130 - Commit messages 8 131 - LEB search messages 16 132 - Budgeting messages 32 133 - Garbage collection messages 64 134 - Tree Node Cache (TNC) messages 128 135 - LEB properties (lprops) messages 256 136 - Input/output messages 512 137 - Log messages 1024 138 - Scan messages 2048 139 - Recovery messages 4096 140 120 141 121 debug_chks Selects extra checks that UBIFS can do while running: 142 122 ··· 134 154 135 155 Test mode Flag value 136 156 137 - Force in-the-gaps method 2 138 157 Failure mode for recovery testing 4 139 158 140 - For example, set debug_msgs to 5 to display General messages and Mount 141 - messages. 159 + For example, set debug_chks to 3 to enable general and TNC checks. 142 160 143 161 144 162 References
+51 -51
fs/ubifs/budget.c
··· 106 106 long long liab; 107 107 108 108 spin_lock(&c->space_lock); 109 - liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth; 109 + liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth; 110 110 spin_unlock(&c->space_lock); 111 111 return liab; 112 112 } ··· 180 180 int idx_lebs; 181 181 long long idx_size; 182 182 183 - idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; 183 + idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx; 184 184 /* And make sure we have thrice the index size of space reserved */ 185 185 idx_size += idx_size << 1; 186 186 /* ··· 292 292 * budgeted index space to the size of the current index, multiplies this by 3, 293 293 * and makes sure this does not exceed the amount of free LEBs. 294 294 * 295 - * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: 295 + * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables: 296 296 * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might 297 297 * be large, because UBIFS does not do any index consolidation as long as 298 298 * there is free space. IOW, the index may take a lot of LEBs, but the LEBs 299 299 * will contain a lot of dirt. 300 - * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW, 301 - * the index may be consolidated to take up to @c->min_idx_lebs LEBs. 300 + * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW, 301 + * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs. 302 302 * 303 303 * This function returns zero in case of success, and %-ENOSPC in case of 304 304 * failure. ··· 343 343 c->lst.taken_empty_lebs; 344 344 if (unlikely(rsvd_idx_lebs > lebs)) { 345 345 dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " 346 - "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs, 346 + "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs, 347 347 rsvd_idx_lebs); 348 348 return -ENOSPC; 349 349 } 350 350 351 351 available = ubifs_calc_available(c, min_idx_lebs); 352 - outstanding = c->budg_data_growth + c->budg_dd_growth; 352 + outstanding = c->bi.data_growth + c->bi.dd_growth; 353 353 354 354 if (unlikely(available < outstanding)) { 355 355 dbg_budg("out of data space: available %lld, outstanding %lld", ··· 360 360 if (available - outstanding <= c->rp_size && !can_use_rp(c)) 361 361 return -ENOSPC; 362 362 363 - c->min_idx_lebs = min_idx_lebs; 363 + c->bi.min_idx_lebs = min_idx_lebs; 364 364 return 0; 365 365 } 366 366 ··· 393 393 { 394 394 int data_growth; 395 395 396 - data_growth = req->new_ino ? c->inode_budget : 0; 396 + data_growth = req->new_ino ? c->bi.inode_budget : 0; 397 397 if (req->new_page) 398 - data_growth += c->page_budget; 398 + data_growth += c->bi.page_budget; 399 399 if (req->new_dent) 400 - data_growth += c->dent_budget; 400 + data_growth += c->bi.dent_budget; 401 401 data_growth += req->new_ino_d; 402 402 return data_growth; 403 403 } ··· 413 413 { 414 414 int dd_growth; 415 415 416 - dd_growth = req->dirtied_page ? c->page_budget : 0; 416 + dd_growth = req->dirtied_page ? c->bi.page_budget : 0; 417 417 418 418 if (req->dirtied_ino) 419 - dd_growth += c->inode_budget << (req->dirtied_ino - 1); 419 + dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1); 420 420 if (req->mod_dent) 421 - dd_growth += c->dent_budget; 421 + dd_growth += c->bi.dent_budget; 422 422 dd_growth += req->dirtied_ino_d; 423 423 return dd_growth; 424 424 } ··· 460 460 461 461 again: 462 462 spin_lock(&c->space_lock); 463 - ubifs_assert(c->budg_idx_growth >= 0); 464 - ubifs_assert(c->budg_data_growth >= 0); 465 - ubifs_assert(c->budg_dd_growth >= 0); 463 + ubifs_assert(c->bi.idx_growth >= 0); 464 + ubifs_assert(c->bi.data_growth >= 0); 465 + ubifs_assert(c->bi.dd_growth >= 0); 466 466 467 - if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) { 467 + if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) { 468 468 dbg_budg("no space"); 469 469 spin_unlock(&c->space_lock); 470 470 return -ENOSPC; 471 471 } 472 472 473 - c->budg_idx_growth += idx_growth; 474 - c->budg_data_growth += data_growth; 475 - c->budg_dd_growth += dd_growth; 473 + c->bi.idx_growth += idx_growth; 474 + c->bi.data_growth += data_growth; 475 + c->bi.dd_growth += dd_growth; 476 476 477 477 err = do_budget_space(c); 478 478 if (likely(!err)) { ··· 484 484 } 485 485 486 486 /* Restore the old values */ 487 - c->budg_idx_growth -= idx_growth; 488 - c->budg_data_growth -= data_growth; 489 - c->budg_dd_growth -= dd_growth; 487 + c->bi.idx_growth -= idx_growth; 488 + c->bi.data_growth -= data_growth; 489 + c->bi.dd_growth -= dd_growth; 490 490 spin_unlock(&c->space_lock); 491 491 492 492 if (req->fast) { ··· 506 506 goto again; 507 507 } 508 508 dbg_budg("FS is full, -ENOSPC"); 509 - c->nospace = 1; 509 + c->bi.nospace = 1; 510 510 if (can_use_rp(c) || c->rp_size == 0) 511 - c->nospace_rp = 1; 511 + c->bi.nospace_rp = 1; 512 512 smp_wmb(); 513 513 } else 514 514 ubifs_err("cannot budget space, error %d", err); ··· 523 523 * This function releases the space budgeted by 'ubifs_budget_space()'. Note, 524 524 * since the index changes (which were budgeted for in @req->idx_growth) will 525 525 * only be written to the media on commit, this function moves the index budget 526 - * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be 527 - * zeroed by the commit operation. 526 + * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed 527 + * by the commit operation. 528 528 */ 529 529 void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) 530 530 { ··· 553 553 if (!req->data_growth && !req->dd_growth) 554 554 return; 555 555 556 - c->nospace = c->nospace_rp = 0; 556 + c->bi.nospace = c->bi.nospace_rp = 0; 557 557 smp_wmb(); 558 558 559 559 spin_lock(&c->space_lock); 560 - c->budg_idx_growth -= req->idx_growth; 561 - c->budg_uncommitted_idx += req->idx_growth; 562 - c->budg_data_growth -= req->data_growth; 563 - c->budg_dd_growth -= req->dd_growth; 564 - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 560 + c->bi.idx_growth -= req->idx_growth; 561 + c->bi.uncommitted_idx += req->idx_growth; 562 + c->bi.data_growth -= req->data_growth; 563 + c->bi.dd_growth -= req->dd_growth; 564 + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); 565 565 566 - ubifs_assert(c->budg_idx_growth >= 0); 567 - ubifs_assert(c->budg_data_growth >= 0); 568 - ubifs_assert(c->budg_dd_growth >= 0); 569 - ubifs_assert(c->min_idx_lebs < c->main_lebs); 570 - ubifs_assert(!(c->budg_idx_growth & 7)); 571 - ubifs_assert(!(c->budg_data_growth & 7)); 572 - ubifs_assert(!(c->budg_dd_growth & 7)); 566 + ubifs_assert(c->bi.idx_growth >= 0); 567 + ubifs_assert(c->bi.data_growth >= 0); 568 + ubifs_assert(c->bi.dd_growth >= 0); 569 + ubifs_assert(c->bi.min_idx_lebs < c->main_lebs); 570 + ubifs_assert(!(c->bi.idx_growth & 7)); 571 + ubifs_assert(!(c->bi.data_growth & 7)); 572 + ubifs_assert(!(c->bi.dd_growth & 7)); 573 573 spin_unlock(&c->space_lock); 574 574 } 575 575 ··· 586 586 { 587 587 spin_lock(&c->space_lock); 588 588 /* Release the index growth reservation */ 589 - c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; 589 + c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; 590 590 /* Release the data growth reservation */ 591 - c->budg_data_growth -= c->page_budget; 591 + c->bi.data_growth -= c->bi.page_budget; 592 592 /* Increase the dirty data growth reservation instead */ 593 - c->budg_dd_growth += c->page_budget; 593 + c->bi.dd_growth += c->bi.page_budget; 594 594 /* And re-calculate the indexing space reservation */ 595 - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 595 + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); 596 596 spin_unlock(&c->space_lock); 597 597 } 598 598 ··· 612 612 613 613 memset(&req, 0, sizeof(struct ubifs_budget_req)); 614 614 /* The "no space" flags will be cleared because dd_growth is > 0 */ 615 - req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); 615 + req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8); 616 616 ubifs_release_budget(c, &req); 617 617 } 618 618 ··· 682 682 int rsvd_idx_lebs, lebs; 683 683 long long available, outstanding, free; 684 684 685 - ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); 686 - outstanding = c->budg_data_growth + c->budg_dd_growth; 687 - available = ubifs_calc_available(c, c->min_idx_lebs); 685 + ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c)); 686 + outstanding = c->bi.data_growth + c->bi.dd_growth; 687 + available = ubifs_calc_available(c, c->bi.min_idx_lebs); 688 688 689 689 /* 690 690 * When reporting free space to user-space, UBIFS guarantees that it is ··· 697 697 * Note, the calculations below are similar to what we have in 698 698 * 'do_budget_space()', so refer there for comments. 699 699 */ 700 - if (c->min_idx_lebs > c->lst.idx_lebs) 701 - rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; 700 + if (c->bi.min_idx_lebs > c->lst.idx_lebs) 701 + rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; 702 702 else 703 703 rsvd_idx_lebs = 0; 704 704 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
+1 -1
fs/ubifs/commit.c
··· 182 182 c->mst_node->root_len = cpu_to_le32(zroot.len); 183 183 c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); 184 184 c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); 185 - c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); 185 + c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz); 186 186 c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); 187 187 c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); 188 188 c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum);
+108 -59
fs/ubifs/debug.c
··· 34 34 #include <linux/moduleparam.h> 35 35 #include <linux/debugfs.h> 36 36 #include <linux/math64.h> 37 - #include <linux/slab.h> 38 37 39 38 #ifdef CONFIG_UBIFS_FS_DEBUG 40 39 ··· 42 43 static char dbg_key_buf0[128]; 43 44 static char dbg_key_buf1[128]; 44 45 45 - unsigned int ubifs_msg_flags; 46 46 unsigned int ubifs_chk_flags; 47 47 unsigned int ubifs_tst_flags; 48 48 49 - module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); 50 49 module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); 51 50 module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); 52 51 53 - MODULE_PARM_DESC(debug_msgs, "Debug message type flags"); 54 52 MODULE_PARM_DESC(debug_chks, "Debug check flags"); 55 53 MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); 56 54 ··· 313 317 printk(KERN_DEBUG "\tflags %#x\n", sup_flags); 314 318 printk(KERN_DEBUG "\t big_lpt %u\n", 315 319 !!(sup_flags & UBIFS_FLG_BIGLPT)); 320 + printk(KERN_DEBUG "\t space_fixup %u\n", 321 + !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); 316 322 printk(KERN_DEBUG "\tmin_io_size %u\n", 317 323 le32_to_cpu(sup->min_io_size)); 318 324 printk(KERN_DEBUG "\tleb_size %u\n", ··· 600 602 spin_unlock(&dbg_lock); 601 603 } 602 604 603 - void dbg_dump_budg(struct ubifs_info *c) 605 + void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) 604 606 { 605 607 int i; 606 608 struct rb_node *rb; ··· 608 610 struct ubifs_gced_idx_leb *idx_gc; 609 611 long long available, outstanding, free; 610 612 611 - ubifs_assert(spin_is_locked(&c->space_lock)); 613 + spin_lock(&c->space_lock); 612 614 spin_lock(&dbg_lock); 613 - printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " 614 - "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, 615 - c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); 616 - printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " 617 - "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, 618 - c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth, 619 - c->freeable_cnt); 620 - printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " 621 - "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs, 622 - c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); 615 + printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, " 616 + "total budget sum %lld\n", current->pid, 617 + bi->data_growth + bi->dd_growth, 618 + bi->data_growth + bi->dd_growth + bi->idx_growth); 619 + printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, " 620 + "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth, 621 + bi->idx_growth); 622 + printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, " 623 + "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz, 624 + bi->uncommitted_idx); 625 + printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n", 626 + bi->page_budget, bi->inode_budget, bi->dent_budget); 627 + printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n", 628 + bi->nospace, bi->nospace_rp); 629 + printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", 630 + c->dark_wm, c->dead_wm, c->max_idx_node_sz); 631 + 632 + if (bi != &c->bi) 633 + /* 634 + * If we are dumping saved budgeting data, do not print 635 + * additional information which is about the current state, not 636 + * the old one which corresponded to the saved budgeting data. 637 + */ 638 + goto out_unlock; 639 + 640 + printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", 641 + c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); 623 642 printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " 624 643 "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), 625 644 atomic_long_read(&c->dirty_zn_cnt), 626 645 atomic_long_read(&c->clean_zn_cnt)); 627 - printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", 628 - c->dark_wm, c->dead_wm, c->max_idx_node_sz); 629 646 printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", 630 647 c->gc_lnum, c->ihead_lnum); 648 + 631 649 /* If we are in R/O mode, journal heads do not exist */ 632 650 if (c->jheads) 633 651 for (i = 0; i < c->jhead_cnt; i++) ··· 662 648 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); 663 649 664 650 /* Print budgeting predictions */ 665 - available = ubifs_calc_available(c, c->min_idx_lebs); 666 - outstanding = c->budg_data_growth + c->budg_dd_growth; 651 + available = ubifs_calc_available(c, c->bi.min_idx_lebs); 652 + outstanding = c->bi.data_growth + c->bi.dd_growth; 667 653 free = ubifs_get_free_space_nolock(c); 668 654 printk(KERN_DEBUG "Budgeting predictions:\n"); 669 655 printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", 670 656 available, outstanding, free); 657 + out_unlock: 671 658 spin_unlock(&dbg_lock); 659 + spin_unlock(&c->space_lock); 672 660 } 673 661 674 662 void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) ··· 745 729 if (bud->lnum == lp->lnum) { 746 730 int head = 0; 747 731 for (i = 0; i < c->jhead_cnt; i++) { 748 - if (lp->lnum == c->jheads[i].wbuf.lnum) { 732 + /* 733 + * Note, if we are in R/O mode or in the middle 734 + * of mounting/re-mounting, the write-buffers do 735 + * not exist. 736 + */ 737 + if (c->jheads && 738 + lp->lnum == c->jheads[i].wbuf.lnum) { 749 739 printk(KERN_CONT ", jhead %s", 750 740 dbg_jhead(i)); 751 741 head = 1; ··· 998 976 999 977 spin_lock(&c->space_lock); 1000 978 memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats)); 979 + memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info)); 980 + d->saved_idx_gc_cnt = c->idx_gc_cnt; 1001 981 1002 982 /* 1003 983 * We use a dirty hack here and zero out @c->freeable_cnt, because it ··· 1066 1042 out: 1067 1043 ubifs_msg("saved lprops statistics dump"); 1068 1044 dbg_dump_lstats(&d->saved_lst); 1069 - ubifs_get_lp_stats(c, &lst); 1070 - 1045 + ubifs_msg("saved budgeting info dump"); 1046 + dbg_dump_budg(c, &d->saved_bi); 1047 + ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt); 1071 1048 ubifs_msg("current lprops statistics dump"); 1049 + ubifs_get_lp_stats(c, &lst); 1072 1050 dbg_dump_lstats(&lst); 1073 - 1074 - spin_lock(&c->space_lock); 1075 - dbg_dump_budg(c); 1076 - spin_unlock(&c->space_lock); 1051 + ubifs_msg("current budgeting info dump"); 1052 + dbg_dump_budg(c, &c->bi); 1077 1053 dump_stack(); 1078 1054 return -EINVAL; 1079 1055 } ··· 1817 1793 struct rb_node **p, *parent = NULL; 1818 1794 struct fsck_inode *fscki; 1819 1795 ino_t inum = key_inum_flash(c, &ino->key); 1796 + struct inode *inode; 1797 + struct ubifs_inode *ui; 1820 1798 1821 1799 p = &fsckd->inodes.rb_node; 1822 1800 while (*p) { ··· 1842 1816 if (!fscki) 1843 1817 return ERR_PTR(-ENOMEM); 1844 1818 1819 + inode = ilookup(c->vfs_sb, inum); 1820 + 1845 1821 fscki->inum = inum; 1846 - fscki->nlink = le32_to_cpu(ino->nlink); 1847 - fscki->size = le64_to_cpu(ino->size); 1848 - fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); 1849 - fscki->xattr_sz = le32_to_cpu(ino->xattr_size); 1850 - fscki->xattr_nms = le32_to_cpu(ino->xattr_names); 1851 - fscki->mode = le32_to_cpu(ino->mode); 1822 + /* 1823 + * If the inode is present in the VFS inode cache, use it instead of 1824 + * the on-flash inode which might be out-of-date. E.g., the size might 1825 + * be out-of-date. If we do not do this, the following may happen, for 1826 + * example: 1827 + * 1. A power cut happens 1828 + * 2. We mount the file-system R/O, the replay process fixes up the 1829 + * inode size in the VFS cache, but on on-flash. 1830 + * 3. 'check_leaf()' fails because it hits a data node beyond inode 1831 + * size. 1832 + */ 1833 + if (!inode) { 1834 + fscki->nlink = le32_to_cpu(ino->nlink); 1835 + fscki->size = le64_to_cpu(ino->size); 1836 + fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); 1837 + fscki->xattr_sz = le32_to_cpu(ino->xattr_size); 1838 + fscki->xattr_nms = le32_to_cpu(ino->xattr_names); 1839 + fscki->mode = le32_to_cpu(ino->mode); 1840 + } else { 1841 + ui = ubifs_inode(inode); 1842 + fscki->nlink = inode->i_nlink; 1843 + fscki->size = inode->i_size; 1844 + fscki->xattr_cnt = ui->xattr_cnt; 1845 + fscki->xattr_sz = ui->xattr_size; 1846 + fscki->xattr_nms = ui->xattr_names; 1847 + fscki->mode = inode->i_mode; 1848 + iput(inode); 1849 + } 1850 + 1852 1851 if (S_ISDIR(fscki->mode)) { 1853 1852 fscki->calc_sz = UBIFS_INO_NODE_SZ; 1854 1853 fscki->calc_cnt = 2; 1855 1854 } 1855 + 1856 1856 rb_link_node(&fscki->rb, parent, p); 1857 1857 rb_insert_color(&fscki->rb, &fsckd->inodes); 1858 + 1858 1859 return fscki; 1859 1860 } 1860 1861 ··· 2474 2421 hashb = key_block(c, &sb->key); 2475 2422 2476 2423 if (hasha > hashb) { 2477 - ubifs_err("larger hash %u goes before %u", hasha, hashb); 2424 + ubifs_err("larger hash %u goes before %u", 2425 + hasha, hashb); 2478 2426 goto error_dump; 2479 2427 } 2480 2428 } ··· 2491 2437 return 0; 2492 2438 } 2493 2439 2494 - static int invocation_cnt; 2495 - 2496 2440 int dbg_force_in_the_gaps(void) 2497 2441 { 2498 - if (!dbg_force_in_the_gaps_enabled) 2442 + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) 2499 2443 return 0; 2500 - /* Force in-the-gaps every 8th commit */ 2501 - return !((invocation_cnt++) & 0x7); 2444 + 2445 + return !(random32() & 7); 2502 2446 } 2503 2447 2504 2448 /* Failure mode for recovery testing */ ··· 2684 2632 int len, int check) 2685 2633 { 2686 2634 if (in_failure_mode(desc)) 2687 - return -EIO; 2635 + return -EROFS; 2688 2636 return ubi_leb_read(desc, lnum, buf, offset, len, check); 2689 2637 } 2690 2638 ··· 2694 2642 int err, failing; 2695 2643 2696 2644 if (in_failure_mode(desc)) 2697 - return -EIO; 2645 + return -EROFS; 2698 2646 failing = do_fail(desc, lnum, 1); 2699 2647 if (failing) 2700 2648 cut_data(buf, len); ··· 2702 2650 if (err) 2703 2651 return err; 2704 2652 if (failing) 2705 - return -EIO; 2653 + return -EROFS; 2706 2654 return 0; 2707 2655 } 2708 2656 ··· 2712 2660 int err; 2713 2661 2714 2662 if (do_fail(desc, lnum, 1)) 2715 - return -EIO; 2663 + return -EROFS; 2716 2664 err = ubi_leb_change(desc, lnum, buf, len, dtype); 2717 2665 if (err) 2718 2666 return err; 2719 2667 if (do_fail(desc, lnum, 1)) 2720 - return -EIO; 2668 + return -EROFS; 2721 2669 return 0; 2722 2670 } 2723 2671 ··· 2726 2674 int err; 2727 2675 2728 2676 if (do_fail(desc, lnum, 0)) 2729 - return -EIO; 2677 + return -EROFS; 2730 2678 err = ubi_leb_erase(desc, lnum); 2731 2679 if (err) 2732 2680 return err; 2733 2681 if (do_fail(desc, lnum, 0)) 2734 - return -EIO; 2682 + return -EROFS; 2735 2683 return 0; 2736 2684 } 2737 2685 ··· 2740 2688 int err; 2741 2689 2742 2690 if (do_fail(desc, lnum, 0)) 2743 - return -EIO; 2691 + return -EROFS; 2744 2692 err = ubi_leb_unmap(desc, lnum); 2745 2693 if (err) 2746 2694 return err; 2747 2695 if (do_fail(desc, lnum, 0)) 2748 - return -EIO; 2696 + return -EROFS; 2749 2697 return 0; 2750 2698 } 2751 2699 2752 2700 int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) 2753 2701 { 2754 2702 if (in_failure_mode(desc)) 2755 - return -EIO; 2703 + return -EROFS; 2756 2704 return ubi_is_mapped(desc, lnum); 2757 2705 } 2758 2706 ··· 2761 2709 int err; 2762 2710 2763 2711 if (do_fail(desc, lnum, 0)) 2764 - return -EIO; 2712 + return -EROFS; 2765 2713 err = ubi_leb_map(desc, lnum, dtype); 2766 2714 if (err) 2767 2715 return err; 2768 2716 if (do_fail(desc, lnum, 0)) 2769 - return -EIO; 2717 + return -EROFS; 2770 2718 return 0; 2771 2719 } 2772 2720 ··· 2836 2784 static int open_debugfs_file(struct inode *inode, struct file *file) 2837 2785 { 2838 2786 file->private_data = inode->i_private; 2839 - return 0; 2787 + return nonseekable_open(inode, file); 2840 2788 } 2841 2789 2842 2790 static ssize_t write_debugfs_file(struct file *file, const char __user *buf, ··· 2847 2795 2848 2796 if (file->f_path.dentry == d->dfs_dump_lprops) 2849 2797 dbg_dump_lprops(c); 2850 - else if (file->f_path.dentry == d->dfs_dump_budg) { 2851 - spin_lock(&c->space_lock); 2852 - dbg_dump_budg(c); 2853 - spin_unlock(&c->space_lock); 2854 - } else if (file->f_path.dentry == d->dfs_dump_tnc) { 2798 + else if (file->f_path.dentry == d->dfs_dump_budg) 2799 + dbg_dump_budg(c, &c->bi); 2800 + else if (file->f_path.dentry == d->dfs_dump_tnc) { 2855 2801 mutex_lock(&c->tnc_mutex); 2856 2802 dbg_dump_tnc(c); 2857 2803 mutex_unlock(&c->tnc_mutex); 2858 2804 } else 2859 2805 return -EINVAL; 2860 2806 2861 - *ppos += count; 2862 2807 return count; 2863 2808 } 2864 2809 ··· 2863 2814 .open = open_debugfs_file, 2864 2815 .write = write_debugfs_file, 2865 2816 .owner = THIS_MODULE, 2866 - .llseek = default_llseek, 2817 + .llseek = no_llseek, 2867 2818 }; 2868 2819 2869 2820 /**
+64 -114
fs/ubifs/debug.h
··· 31 31 32 32 #ifdef CONFIG_UBIFS_FS_DEBUG 33 33 34 + #include <linux/random.h> 35 + 34 36 /** 35 37 * ubifs_debug_info - per-FS debugging information. 36 38 * @old_zroot: old index root - used by 'dbg_check_old_index()' ··· 52 50 * @new_ihead_offs: used by debugging to check @c->ihead_offs 53 51 * 54 52 * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') 55 - * @saved_free: saved free space (used by 'dbg_save_space_info()') 53 + * @saved_bi: saved budgeting information 54 + * @saved_free: saved amount of free space 55 + * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt 56 56 * 57 - * dfs_dir_name: name of debugfs directory containing this file-system's files 58 - * dfs_dir: direntry object of the file-system debugfs directory 59 - * dfs_dump_lprops: "dump lprops" debugfs knob 60 - * dfs_dump_budg: "dump budgeting information" debugfs knob 61 - * dfs_dump_tnc: "dump TNC" debugfs knob 57 + * @dfs_dir_name: name of debugfs directory containing this file-system's files 58 + * @dfs_dir: direntry object of the file-system debugfs directory 59 + * @dfs_dump_lprops: "dump lprops" debugfs knob 60 + * @dfs_dump_budg: "dump budgeting information" debugfs knob 61 + * @dfs_dump_tnc: "dump TNC" debugfs knob 62 62 */ 63 63 struct ubifs_debug_info { 64 64 struct ubifs_zbranch old_zroot; ··· 80 76 int new_ihead_offs; 81 77 82 78 struct ubifs_lp_stats saved_lst; 79 + struct ubifs_budg_info saved_bi; 83 80 long long saved_free; 81 + int saved_idx_gc_cnt; 84 82 85 83 char dfs_dir_name[100]; 86 84 struct dentry *dfs_dir; ··· 107 101 } \ 108 102 } while (0) 109 103 110 - #define dbg_dump_stack() do { \ 111 - if (!dbg_failure_mode) \ 112 - dump_stack(); \ 113 - } while (0) 114 - 115 - /* Generic debugging messages */ 116 - #define dbg_msg(fmt, ...) do { \ 117 - spin_lock(&dbg_lock); \ 118 - printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ 119 - __func__, ##__VA_ARGS__); \ 120 - spin_unlock(&dbg_lock); \ 121 - } while (0) 122 - 123 - #define dbg_do_msg(typ, fmt, ...) do { \ 124 - if (ubifs_msg_flags & typ) \ 125 - dbg_msg(fmt, ##__VA_ARGS__); \ 126 - } while (0) 104 + #define dbg_dump_stack() dump_stack() 127 105 128 106 #define dbg_err(fmt, ...) do { \ 129 107 spin_lock(&dbg_lock); \ ··· 127 137 #define DBGKEY(key) dbg_key_str0(c, (key)) 128 138 #define DBGKEY1(key) dbg_key_str1(c, (key)) 129 139 140 + #define ubifs_dbg_msg(type, fmt, ...) do { \ 141 + spin_lock(&dbg_lock); \ 142 + pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \ 143 + spin_unlock(&dbg_lock); \ 144 + } while (0) 145 + 146 + /* Just a debugging messages not related to any specific UBIFS subsystem */ 147 + #define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__) 130 148 /* General messages */ 131 - #define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) 132 - 149 + #define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) 133 150 /* Additional journal messages */ 134 - #define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) 135 - 151 + #define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__) 136 152 /* Additional TNC messages */ 137 - #define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) 138 - 153 + #define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__) 139 154 /* Additional lprops messages */ 140 - #define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) 141 - 155 + #define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__) 142 156 /* Additional LEB find messages */ 143 - #define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) 144 - 157 + #define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__) 145 158 /* Additional mount messages */ 146 - #define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) 147 - 159 + #define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__) 148 160 /* Additional I/O messages */ 149 - #define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) 150 - 161 + #define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__) 151 162 /* Additional commit messages */ 152 - #define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) 153 - 163 + #define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__) 154 164 /* Additional budgeting messages */ 155 - #define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) 156 - 165 + #define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__) 157 166 /* Additional log messages */ 158 - #define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) 159 - 167 + #define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__) 160 168 /* Additional gc messages */ 161 - #define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) 162 - 169 + #define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__) 163 170 /* Additional scan messages */ 164 - #define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) 165 - 171 + #define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__) 166 172 /* Additional recovery messages */ 167 - #define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) 168 - 169 - /* 170 - * Debugging message type flags. 171 - * 172 - * UBIFS_MSG_GEN: general messages 173 - * UBIFS_MSG_JNL: journal messages 174 - * UBIFS_MSG_MNT: mount messages 175 - * UBIFS_MSG_CMT: commit messages 176 - * UBIFS_MSG_FIND: LEB find messages 177 - * UBIFS_MSG_BUDG: budgeting messages 178 - * UBIFS_MSG_GC: garbage collection messages 179 - * UBIFS_MSG_TNC: TNC messages 180 - * UBIFS_MSG_LP: lprops messages 181 - * UBIFS_MSG_IO: I/O messages 182 - * UBIFS_MSG_LOG: log messages 183 - * UBIFS_MSG_SCAN: scan messages 184 - * UBIFS_MSG_RCVRY: recovery messages 185 - */ 186 - enum { 187 - UBIFS_MSG_GEN = 0x1, 188 - UBIFS_MSG_JNL = 0x2, 189 - UBIFS_MSG_MNT = 0x4, 190 - UBIFS_MSG_CMT = 0x8, 191 - UBIFS_MSG_FIND = 0x10, 192 - UBIFS_MSG_BUDG = 0x20, 193 - UBIFS_MSG_GC = 0x40, 194 - UBIFS_MSG_TNC = 0x80, 195 - UBIFS_MSG_LP = 0x100, 196 - UBIFS_MSG_IO = 0x200, 197 - UBIFS_MSG_LOG = 0x400, 198 - UBIFS_MSG_SCAN = 0x800, 199 - UBIFS_MSG_RCVRY = 0x1000, 200 - }; 173 + #define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) 201 174 202 175 /* 203 176 * Debugging check flags. ··· 186 233 /* 187 234 * Special testing flags. 188 235 * 189 - * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method 190 236 * UBIFS_TST_RCVRY: failure mode for recovery testing 191 237 */ 192 238 enum { 193 - UBIFS_TST_FORCE_IN_THE_GAPS = 0x2, 194 239 UBIFS_TST_RCVRY = 0x4, 195 240 }; 196 241 ··· 213 262 int offs); 214 263 void dbg_dump_budget_req(const struct ubifs_budget_req *req); 215 264 void dbg_dump_lstats(const struct ubifs_lp_stats *lst); 216 - void dbg_dump_budg(struct ubifs_info *c); 265 + void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi); 217 266 void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); 218 267 void dbg_dump_lprops(struct ubifs_info *c); 219 268 void dbg_dump_lpt_info(struct ubifs_info *c); ··· 255 304 int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); 256 305 257 306 /* Force the use of in-the-gaps method for testing */ 258 - 259 - #define dbg_force_in_the_gaps_enabled \ 260 - (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS) 261 - 307 + static inline int dbg_force_in_the_gaps_enabled(void) 308 + { 309 + return ubifs_chk_flags & UBIFS_CHK_GEN; 310 + } 262 311 int dbg_force_in_the_gaps(void); 263 312 264 313 /* Failure mode for recovery testing */ 265 - 266 314 #define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) 267 315 268 316 #ifndef UBIFS_DBG_PRESERVE_UBI 269 - 270 317 #define ubi_leb_read dbg_leb_read 271 318 #define ubi_leb_write dbg_leb_write 272 319 #define ubi_leb_change dbg_leb_change ··· 272 323 #define ubi_leb_unmap dbg_leb_unmap 273 324 #define ubi_is_mapped dbg_is_mapped 274 325 #define ubi_leb_map dbg_leb_map 275 - 276 326 #endif 277 327 278 328 int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, ··· 318 370 __func__, __LINE__, current->pid); \ 319 371 } while (0) 320 372 321 - #define dbg_err(fmt, ...) do { \ 322 - if (0) \ 323 - ubifs_err(fmt, ##__VA_ARGS__); \ 373 + #define dbg_err(fmt, ...) do { \ 374 + if (0) \ 375 + ubifs_err(fmt, ##__VA_ARGS__); \ 324 376 } while (0) 325 377 326 - #define dbg_msg(fmt, ...) do { \ 327 - if (0) \ 328 - printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ 329 - current->pid, __func__, ##__VA_ARGS__); \ 378 + #define ubifs_dbg_msg(fmt, ...) do { \ 379 + if (0) \ 380 + pr_debug(fmt "\n", ##__VA_ARGS__); \ 330 381 } while (0) 331 382 332 383 #define dbg_dump_stack() 333 384 #define ubifs_assert_cmt_locked(c) 334 385 335 - #define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 336 - #define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 337 - #define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 338 - #define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 339 - #define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 340 - #define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 341 - #define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 342 - #define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 343 - #define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 344 - #define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 345 - #define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 346 - #define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 347 - #define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 386 + #define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 387 + #define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 388 + #define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 389 + #define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 390 + #define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 391 + #define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 392 + #define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 393 + #define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 394 + #define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 395 + #define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 396 + #define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 397 + #define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 398 + #define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 399 + #define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 348 400 349 401 #define DBGKEY(key) ((char *)(key)) 350 402 #define DBGKEY1(key) ((char *)(key)) ··· 368 420 dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; } 369 421 static inline void 370 422 dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; } 371 - static inline void dbg_dump_budg(struct ubifs_info *c) { return; } 423 + static inline void 424 + dbg_dump_budg(struct ubifs_info *c, 425 + const struct ubifs_budg_info *bi) { return; } 372 426 static inline void dbg_dump_lprop(const struct ubifs_info *c, 373 427 const struct ubifs_lprops *lp) { return; } 374 428 static inline void dbg_dump_lprops(struct ubifs_info *c) { return; } ··· 432 482 struct list_head *head) { return 0; } 433 483 434 484 static inline int dbg_force_in_the_gaps(void) { return 0; } 435 - #define dbg_force_in_the_gaps_enabled 0 436 - #define dbg_failure_mode 0 485 + #define dbg_force_in_the_gaps_enabled() 0 486 + #define dbg_failure_mode 0 437 487 438 488 static inline int dbg_debugfs_init(void) { return 0; } 439 489 static inline void dbg_debugfs_exit(void) { return; }
+2 -2
fs/ubifs/dir.c
··· 603 603 ubifs_release_budget(c, &req); 604 604 else { 605 605 /* We've deleted something - clean the "no space" flags */ 606 - c->nospace = c->nospace_rp = 0; 606 + c->bi.nospace = c->bi.nospace_rp = 0; 607 607 smp_wmb(); 608 608 } 609 609 return 0; ··· 693 693 ubifs_release_budget(c, &req); 694 694 else { 695 695 /* We've deleted something - clean the "no space" flags */ 696 - c->nospace = c->nospace_rp = 0; 696 + c->bi.nospace = c->bi.nospace_rp = 0; 697 697 smp_wmb(); 698 698 } 699 699 return 0;
+16 -12
fs/ubifs/file.c
··· 212 212 */ 213 213 static void release_existing_page_budget(struct ubifs_info *c) 214 214 { 215 - struct ubifs_budget_req req = { .dd_growth = c->page_budget}; 215 + struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget}; 216 216 217 217 ubifs_release_budget(c, &req); 218 218 } ··· 971 971 * the page locked, and it locks @ui_mutex. However, write-back does take inode 972 972 * @i_mutex, which means other VFS operations may be run on this inode at the 973 973 * same time. And the problematic one is truncation to smaller size, from where 974 - * we have to call 'truncate_setsize()', which first changes @inode->i_size, then 975 - * drops the truncated pages. And while dropping the pages, it takes the page 976 - * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with 977 - * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This 978 - * means that @inode->i_size is changed while @ui_mutex is unlocked. 974 + * we have to call 'truncate_setsize()', which first changes @inode->i_size, 975 + * then drops the truncated pages. And while dropping the pages, it takes the 976 + * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' 977 + * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. 978 + * This means that @inode->i_size is changed while @ui_mutex is unlocked. 979 979 * 980 980 * XXX(truncate): with the new truncate sequence this is not true anymore, 981 981 * and the calls to truncate_setsize can be move around freely. They should ··· 1189 1189 if (budgeted) 1190 1190 ubifs_release_budget(c, &req); 1191 1191 else { 1192 - c->nospace = c->nospace_rp = 0; 1192 + c->bi.nospace = c->bi.nospace_rp = 0; 1193 1193 smp_wmb(); 1194 1194 } 1195 1195 return err; ··· 1312 1312 1313 1313 dbg_gen("syncing inode %lu", inode->i_ino); 1314 1314 1315 - if (inode->i_sb->s_flags & MS_RDONLY) 1315 + if (c->ro_mount) 1316 + /* 1317 + * For some really strange reasons VFS does not filter out 1318 + * 'fsync()' for R/O mounted file-systems as per 2.6.39. 1319 + */ 1316 1320 return 0; 1317 1321 1318 1322 /* ··· 1436 1432 } 1437 1433 1438 1434 /* 1439 - * mmap()d file has taken write protection fault and is being made 1440 - * writable. UBIFS must ensure page is budgeted for. 1435 + * mmap()d file has taken write protection fault and is being made writable. 1436 + * UBIFS must ensure page is budgeted for. 1441 1437 */ 1442 - static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 1438 + static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, 1439 + struct vm_fault *vmf) 1443 1440 { 1444 1441 struct page *page = vmf->page; 1445 1442 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; ··· 1541 1536 { 1542 1537 int err; 1543 1538 1544 - /* 'generic_file_mmap()' takes care of NOMMU case */ 1545 1539 err = generic_file_mmap(file, vma); 1546 1540 if (err) 1547 1541 return err;
+5 -5
fs/ubifs/find.c
··· 252 252 * But if the index takes fewer LEBs than it is reserved for it, 253 253 * this function must avoid picking those reserved LEBs. 254 254 */ 255 - if (c->min_idx_lebs >= c->lst.idx_lebs) { 256 - rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; 255 + if (c->bi.min_idx_lebs >= c->lst.idx_lebs) { 256 + rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; 257 257 exclude_index = 1; 258 258 } 259 259 spin_unlock(&c->space_lock); ··· 276 276 pick_free = 0; 277 277 } else { 278 278 spin_lock(&c->space_lock); 279 - exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs); 279 + exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs); 280 280 spin_unlock(&c->space_lock); 281 281 } 282 282 ··· 501 501 502 502 /* Check if there are enough empty LEBs for commit */ 503 503 spin_lock(&c->space_lock); 504 - if (c->min_idx_lebs > c->lst.idx_lebs) 505 - rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; 504 + if (c->bi.min_idx_lebs > c->lst.idx_lebs) 505 + rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; 506 506 else 507 507 rsvd_idx_lebs = 0; 508 508 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
+38 -33
fs/ubifs/gc.c
··· 100 100 if (err) 101 101 return err; 102 102 103 + err = ubifs_wbuf_sync_nolock(wbuf); 104 + if (err) 105 + return err; 106 + 103 107 err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); 104 108 if (err) 105 109 return err; ··· 122 118 * This function compares data nodes @a and @b. Returns %1 if @a has greater 123 119 * inode or block number, and %-1 otherwise. 124 120 */ 125 - int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) 121 + static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) 126 122 { 127 123 ino_t inuma, inumb; 128 124 struct ubifs_info *c = priv; ··· 165 161 * first and sorted by length in descending order. Directory entry nodes go 166 162 * after inode nodes and are sorted in ascending hash valuer order. 167 163 */ 168 - int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) 164 + static int nondata_nodes_cmp(void *priv, struct list_head *a, 165 + struct list_head *b) 169 166 { 170 167 ino_t inuma, inumb; 171 168 struct ubifs_info *c = priv; ··· 478 473 ubifs_assert(c->gc_lnum != lnum); 479 474 ubifs_assert(wbuf->lnum != lnum); 480 475 476 + if (lp->free + lp->dirty == c->leb_size) { 477 + /* Special case - a free LEB */ 478 + dbg_gc("LEB %d is free, return it", lp->lnum); 479 + ubifs_assert(!(lp->flags & LPROPS_INDEX)); 480 + 481 + if (lp->free != c->leb_size) { 482 + /* 483 + * Write buffers must be sync'd before unmapping 484 + * freeable LEBs, because one of them may contain data 485 + * which obsoletes something in 'lp->pnum'. 486 + */ 487 + err = gc_sync_wbufs(c); 488 + if (err) 489 + return err; 490 + err = ubifs_change_one_lp(c, lp->lnum, c->leb_size, 491 + 0, 0, 0, 0); 492 + if (err) 493 + return err; 494 + } 495 + err = ubifs_leb_unmap(c, lp->lnum); 496 + if (err) 497 + return err; 498 + 499 + if (c->gc_lnum == -1) { 500 + c->gc_lnum = lnum; 501 + return LEB_RETAINED; 502 + } 503 + 504 + return LEB_FREED; 505 + } 506 + 481 507 /* 482 508 * We scan the entire LEB even though we only really need to scan up to 483 509 * (c->leb_size - lp->free). ··· 717 681 dbg_gc("found LEB %d: free %d, dirty %d, sum %d " 718 682 "(min. space %d)", lp.lnum, lp.free, lp.dirty, 719 683 lp.free + lp.dirty, min_space); 720 - 721 - if (lp.free + lp.dirty == c->leb_size) { 722 - /* An empty LEB was returned */ 723 - dbg_gc("LEB %d is free, return it", lp.lnum); 724 - /* 725 - * ubifs_find_dirty_leb() doesn't return freeable index 726 - * LEBs. 727 - */ 728 - ubifs_assert(!(lp.flags & LPROPS_INDEX)); 729 - if (lp.free != c->leb_size) { 730 - /* 731 - * Write buffers must be sync'd before 732 - * unmapping freeable LEBs, because one of them 733 - * may contain data which obsoletes something 734 - * in 'lp.pnum'. 735 - */ 736 - ret = gc_sync_wbufs(c); 737 - if (ret) 738 - goto out; 739 - ret = ubifs_change_one_lp(c, lp.lnum, 740 - c->leb_size, 0, 0, 0, 741 - 0); 742 - if (ret) 743 - goto out; 744 - } 745 - ret = ubifs_leb_unmap(c, lp.lnum); 746 - if (ret) 747 - goto out; 748 - ret = lp.lnum; 749 - break; 750 - } 751 684 752 685 space_before = c->leb_size - wbuf->offs - wbuf->used; 753 686 if (wbuf->lnum == -1)
+13 -20
fs/ubifs/io.c
··· 393 393 ubifs_assert(wbuf->size % c->min_io_size == 0); 394 394 ubifs_assert(!c->ro_media && !c->ro_mount); 395 395 if (c->leb_size - wbuf->offs >= c->max_write_size) 396 - ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); 396 + ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); 397 397 398 398 if (c->ro_error) 399 399 return -EROFS; ··· 452 452 * @dtype: data type 453 453 * 454 454 * This function targets the write-buffer to logical eraseblock @lnum:@offs. 455 - * The write-buffer is synchronized if it is not empty. Returns zero in case of 456 - * success and a negative error code in case of failure. 455 + * The write-buffer has to be empty. Returns zero in case of success and a 456 + * negative error code in case of failure. 457 457 */ 458 458 int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, 459 459 int dtype) ··· 465 465 ubifs_assert(offs >= 0 && offs <= c->leb_size); 466 466 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); 467 467 ubifs_assert(lnum != wbuf->lnum); 468 - 469 - if (wbuf->used > 0) { 470 - int err = ubifs_wbuf_sync_nolock(wbuf); 471 - 472 - if (err) 473 - return err; 474 - } 468 + ubifs_assert(wbuf->used == 0); 475 469 476 470 spin_lock(&wbuf->lock); 477 471 wbuf->lnum = lnum; ··· 567 573 int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) 568 574 { 569 575 struct ubifs_info *c = wbuf->c; 570 - int err, written, n, aligned_len = ALIGN(len, 8), offs; 576 + int err, written, n, aligned_len = ALIGN(len, 8); 571 577 572 578 dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, 573 579 dbg_ntype(((struct ubifs_ch *)buf)->node_type), ··· 582 588 ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); 583 589 ubifs_assert(!c->ro_media && !c->ro_mount); 584 590 if (c->leb_size - wbuf->offs >= c->max_write_size) 585 - ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); 591 + ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); 586 592 587 593 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { 588 594 err = -ENOSPC; ··· 630 636 goto exit; 631 637 } 632 638 633 - offs = wbuf->offs; 634 639 written = 0; 635 640 636 641 if (wbuf->used) { ··· 646 653 if (err) 647 654 goto out; 648 655 649 - offs += wbuf->size; 656 + wbuf->offs += wbuf->size; 650 657 len -= wbuf->avail; 651 658 aligned_len -= wbuf->avail; 652 659 written += wbuf->avail; ··· 665 672 if (err) 666 673 goto out; 667 674 668 - offs += wbuf->size; 675 + wbuf->offs += wbuf->size; 669 676 len -= wbuf->size; 670 677 aligned_len -= wbuf->size; 671 678 written += wbuf->size; ··· 680 687 n = aligned_len >> c->max_write_shift; 681 688 if (n) { 682 689 n <<= c->max_write_shift; 683 - dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); 684 - err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, 685 - wbuf->dtype); 690 + dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, 691 + wbuf->offs); 692 + err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, 693 + wbuf->offs, n, wbuf->dtype); 686 694 if (err) 687 695 goto out; 688 - offs += n; 696 + wbuf->offs += n; 689 697 aligned_len -= n; 690 698 len -= n; 691 699 written += n; ··· 701 707 */ 702 708 memcpy(wbuf->buf, buf + written, len); 703 709 704 - wbuf->offs = offs; 705 710 if (c->leb_size - wbuf->offs >= c->max_write_size) 706 711 wbuf->size = c->max_write_size; 707 712 else
+16 -13
fs/ubifs/journal.c
··· 141 141 * LEB with some empty space. 142 142 */ 143 143 lnum = ubifs_find_free_space(c, len, &offs, squeeze); 144 - if (lnum >= 0) { 145 - /* Found an LEB, add it to the journal head */ 146 - err = ubifs_add_bud_to_log(c, jhead, lnum, offs); 147 - if (err) 148 - goto out_return; 149 - /* A new bud was successfully allocated and added to the log */ 144 + if (lnum >= 0) 150 145 goto out; 151 - } 152 146 153 147 err = lnum; 154 148 if (err != -ENOSPC) ··· 197 203 return 0; 198 204 } 199 205 200 - err = ubifs_add_bud_to_log(c, jhead, lnum, 0); 201 - if (err) 202 - goto out_return; 203 206 offs = 0; 204 207 205 208 out: 209 + /* 210 + * Make sure we synchronize the write-buffer before we add the new bud 211 + * to the log. Otherwise we may have a power cut after the log 212 + * reference node for the last bud (@lnum) is written but before the 213 + * write-buffer data are written to the next-to-last bud 214 + * (@wbuf->lnum). And the effect would be that the recovery would see 215 + * that there is corruption in the next-to-last bud. 216 + */ 217 + err = ubifs_wbuf_sync_nolock(wbuf); 218 + if (err) 219 + goto out_return; 220 + err = ubifs_add_bud_to_log(c, jhead, lnum, offs); 221 + if (err) 222 + goto out_return; 206 223 err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); 207 224 if (err) 208 225 goto out_unlock; ··· 385 380 if (err == -ENOSPC) { 386 381 /* This are some budgeting problems, print useful information */ 387 382 down_write(&c->commit_sem); 388 - spin_lock(&c->space_lock); 389 383 dbg_dump_stack(); 390 - dbg_dump_budg(c); 391 - spin_unlock(&c->space_lock); 384 + dbg_dump_budg(c, &c->bi); 392 385 dbg_dump_lprops(c); 393 386 cmt_retries = dbg_check_lprops(c); 394 387 up_write(&c->commit_sem);
+7 -21
fs/ubifs/log.c
··· 100 100 } 101 101 102 102 /** 103 - * next_log_lnum - switch to the next log LEB. 104 - * @c: UBIFS file-system description object 105 - * @lnum: current log LEB 106 - */ 107 - static inline int next_log_lnum(const struct ubifs_info *c, int lnum) 108 - { 109 - lnum += 1; 110 - if (lnum > c->log_last) 111 - lnum = UBIFS_LOG_LNUM; 112 - 113 - return lnum; 114 - } 115 - 116 - /** 117 103 * empty_log_bytes - calculate amount of empty space in the log. 118 104 * @c: UBIFS file-system description object 119 105 */ ··· 243 257 ref->jhead = cpu_to_le32(jhead); 244 258 245 259 if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { 246 - c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); 260 + c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); 247 261 c->lhead_offs = 0; 248 262 } 249 263 ··· 411 425 412 426 /* Switch to the next log LEB */ 413 427 if (c->lhead_offs) { 414 - c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); 428 + c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); 415 429 c->lhead_offs = 0; 416 430 } 417 431 ··· 432 446 433 447 c->lhead_offs += len; 434 448 if (c->lhead_offs == c->leb_size) { 435 - c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); 449 + c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); 436 450 c->lhead_offs = 0; 437 451 } 438 452 ··· 519 533 } 520 534 mutex_lock(&c->log_mutex); 521 535 for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; 522 - lnum = next_log_lnum(c, lnum)) { 536 + lnum = ubifs_next_log_lnum(c, lnum)) { 523 537 dbg_log("unmap log LEB %d", lnum); 524 538 err = ubifs_leb_unmap(c, lnum); 525 539 if (err) ··· 628 642 err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); 629 643 if (err) 630 644 return err; 631 - *lnum = next_log_lnum(c, *lnum); 645 + *lnum = ubifs_next_log_lnum(c, *lnum); 632 646 *offs = 0; 633 647 } 634 648 memcpy(buf + *offs, node, len); ··· 698 712 ubifs_scan_destroy(sleb); 699 713 if (lnum == c->lhead_lnum) 700 714 break; 701 - lnum = next_log_lnum(c, lnum); 715 + lnum = ubifs_next_log_lnum(c, lnum); 702 716 } 703 717 if (offs) { 704 718 int sz = ALIGN(offs, c->min_io_size); ··· 718 732 /* Unmap remaining LEBs */ 719 733 lnum = write_lnum; 720 734 do { 721 - lnum = next_log_lnum(c, lnum); 735 + lnum = ubifs_next_log_lnum(c, lnum); 722 736 err = ubifs_leb_unmap(c, lnum); 723 737 if (err) 724 738 return err;
+45 -70
fs/ubifs/lprops.c
··· 1007 1007 } 1008 1008 1009 1009 /** 1010 - * struct scan_check_data - data provided to scan callback function. 1011 - * @lst: LEB properties statistics 1012 - * @err: error code 1013 - */ 1014 - struct scan_check_data { 1015 - struct ubifs_lp_stats lst; 1016 - int err; 1017 - }; 1018 - 1019 - /** 1020 1010 * scan_check_cb - scan callback. 1021 1011 * @c: the UBIFS file-system description object 1022 1012 * @lp: LEB properties to scan 1023 1013 * @in_tree: whether the LEB properties are in main memory 1024 - * @data: information passed to and from the caller of the scan 1014 + * @lst: lprops statistics to update 1025 1015 * 1026 1016 * This function returns a code that indicates whether the scan should continue 1027 1017 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree ··· 1020 1030 */ 1021 1031 static int scan_check_cb(struct ubifs_info *c, 1022 1032 const struct ubifs_lprops *lp, int in_tree, 1023 - struct scan_check_data *data) 1033 + struct ubifs_lp_stats *lst) 1024 1034 { 1025 1035 struct ubifs_scan_leb *sleb; 1026 1036 struct ubifs_scan_node *snod; 1027 - struct ubifs_lp_stats *lst = &data->lst; 1028 1037 int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret; 1029 1038 void *buf = NULL; 1030 1039 ··· 1033 1044 if (cat != (lp->flags & LPROPS_CAT_MASK)) { 1034 1045 ubifs_err("bad LEB category %d expected %d", 1035 1046 (lp->flags & LPROPS_CAT_MASK), cat); 1036 - goto out; 1047 + return -EINVAL; 1037 1048 } 1038 1049 } 1039 1050 ··· 1067 1078 } 1068 1079 if (!found) { 1069 1080 ubifs_err("bad LPT list (category %d)", cat); 1070 - goto out; 1081 + return -EINVAL; 1071 1082 } 1072 1083 } 1073 1084 } ··· 1079 1090 if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || 1080 1091 lp != heap->arr[lp->hpos]) { 1081 1092 ubifs_err("bad LPT heap (category %d)", cat); 1082 - goto out; 1093 + return -EINVAL; 1083 1094 } 1084 1095 } 1085 1096 1086 1097 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); 1087 - if (!buf) { 1088 - ubifs_err("cannot allocate memory to scan LEB %d", lnum); 1089 - goto out; 1098 + if (!buf) 1099 + return -ENOMEM; 1100 + 1101 + /* 1102 + * After an unclean unmount, empty and freeable LEBs 1103 + * may contain garbage - do not scan them. 1104 + */ 1105 + if (lp->free == c->leb_size) { 1106 + lst->empty_lebs += 1; 1107 + lst->total_free += c->leb_size; 1108 + lst->total_dark += ubifs_calc_dark(c, c->leb_size); 1109 + return LPT_SCAN_CONTINUE; 1110 + } 1111 + if (lp->free + lp->dirty == c->leb_size && 1112 + !(lp->flags & LPROPS_INDEX)) { 1113 + lst->total_free += lp->free; 1114 + lst->total_dirty += lp->dirty; 1115 + lst->total_dark += ubifs_calc_dark(c, c->leb_size); 1116 + return LPT_SCAN_CONTINUE; 1090 1117 } 1091 1118 1092 1119 sleb = ubifs_scan(c, lnum, 0, buf, 0); 1093 1120 if (IS_ERR(sleb)) { 1094 - /* 1095 - * After an unclean unmount, empty and freeable LEBs 1096 - * may contain garbage. 1097 - */ 1098 - if (lp->free == c->leb_size) { 1099 - ubifs_err("scan errors were in empty LEB " 1100 - "- continuing checking"); 1101 - lst->empty_lebs += 1; 1102 - lst->total_free += c->leb_size; 1103 - lst->total_dark += ubifs_calc_dark(c, c->leb_size); 1104 - ret = LPT_SCAN_CONTINUE; 1105 - goto exit; 1121 + ret = PTR_ERR(sleb); 1122 + if (ret == -EUCLEAN) { 1123 + dbg_dump_lprops(c); 1124 + dbg_dump_budg(c, &c->bi); 1106 1125 } 1107 - 1108 - if (lp->free + lp->dirty == c->leb_size && 1109 - !(lp->flags & LPROPS_INDEX)) { 1110 - ubifs_err("scan errors were in freeable LEB " 1111 - "- continuing checking"); 1112 - lst->total_free += lp->free; 1113 - lst->total_dirty += lp->dirty; 1114 - lst->total_dark += ubifs_calc_dark(c, c->leb_size); 1115 - ret = LPT_SCAN_CONTINUE; 1116 - goto exit; 1117 - } 1118 - data->err = PTR_ERR(sleb); 1119 - ret = LPT_SCAN_STOP; 1120 - goto exit; 1126 + goto out; 1121 1127 } 1122 1128 1123 1129 is_idx = -1; ··· 1230 1246 } 1231 1247 1232 1248 ubifs_scan_destroy(sleb); 1233 - ret = LPT_SCAN_CONTINUE; 1234 - exit: 1235 1249 vfree(buf); 1236 - return ret; 1250 + return LPT_SCAN_CONTINUE; 1237 1251 1238 1252 out_print: 1239 1253 ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " ··· 1240 1258 dbg_dump_leb(c, lnum); 1241 1259 out_destroy: 1242 1260 ubifs_scan_destroy(sleb); 1261 + ret = -EINVAL; 1243 1262 out: 1244 1263 vfree(buf); 1245 - data->err = -EINVAL; 1246 - return LPT_SCAN_STOP; 1264 + return ret; 1247 1265 } 1248 1266 1249 1267 /** ··· 1260 1278 int dbg_check_lprops(struct ubifs_info *c) 1261 1279 { 1262 1280 int i, err; 1263 - struct scan_check_data data; 1264 - struct ubifs_lp_stats *lst = &data.lst; 1281 + struct ubifs_lp_stats lst; 1265 1282 1266 1283 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) 1267 1284 return 0; ··· 1275 1294 return err; 1276 1295 } 1277 1296 1278 - memset(lst, 0, sizeof(struct ubifs_lp_stats)); 1279 - 1280 - data.err = 0; 1297 + memset(&lst, 0, sizeof(struct ubifs_lp_stats)); 1281 1298 err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, 1282 1299 (ubifs_lpt_scan_callback)scan_check_cb, 1283 - &data); 1300 + &lst); 1284 1301 if (err && err != -ENOSPC) 1285 1302 goto out; 1286 - if (data.err) { 1287 - err = data.err; 1288 - goto out; 1289 - } 1290 1303 1291 - if (lst->empty_lebs != c->lst.empty_lebs || 1292 - lst->idx_lebs != c->lst.idx_lebs || 1293 - lst->total_free != c->lst.total_free || 1294 - lst->total_dirty != c->lst.total_dirty || 1295 - lst->total_used != c->lst.total_used) { 1304 + if (lst.empty_lebs != c->lst.empty_lebs || 1305 + lst.idx_lebs != c->lst.idx_lebs || 1306 + lst.total_free != c->lst.total_free || 1307 + lst.total_dirty != c->lst.total_dirty || 1308 + lst.total_used != c->lst.total_used) { 1296 1309 ubifs_err("bad overall accounting"); 1297 1310 ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " 1298 1311 "total_free %lld, total_dirty %lld, total_used %lld", 1299 - lst->empty_lebs, lst->idx_lebs, lst->total_free, 1300 - lst->total_dirty, lst->total_used); 1312 + lst.empty_lebs, lst.idx_lebs, lst.total_free, 1313 + lst.total_dirty, lst.total_used); 1301 1314 ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " 1302 1315 "total_free %lld, total_dirty %lld, total_used %lld", 1303 1316 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, ··· 1300 1325 goto out; 1301 1326 } 1302 1327 1303 - if (lst->total_dead != c->lst.total_dead || 1304 - lst->total_dark != c->lst.total_dark) { 1328 + if (lst.total_dead != c->lst.total_dead || 1329 + lst.total_dark != c->lst.total_dark) { 1305 1330 ubifs_err("bad dead/dark space accounting"); 1306 1331 ubifs_err("calculated: total_dead %lld, total_dark %lld", 1307 - lst->total_dead, lst->total_dark); 1332 + lst.total_dead, lst.total_dark); 1308 1333 ubifs_err("read from lprops: total_dead %lld, total_dark %lld", 1309 1334 c->lst.total_dead, c->lst.total_dark); 1310 1335 err = -EINVAL;
+54 -1
fs/ubifs/lpt_commit.c
··· 29 29 #include <linux/slab.h> 30 30 #include "ubifs.h" 31 31 32 + #ifdef CONFIG_UBIFS_FS_DEBUG 33 + static int dbg_populate_lsave(struct ubifs_info *c); 34 + #else 35 + #define dbg_populate_lsave(c) 0 36 + #endif 37 + 32 38 /** 33 39 * first_dirty_cnode - find first dirty cnode. 34 40 * @c: UBIFS file-system description object ··· 592 586 if (nnode->nbranch[iip].lnum) 593 587 break; 594 588 } 595 - } while (iip >= UBIFS_LPT_FANOUT); 589 + } while (iip >= UBIFS_LPT_FANOUT); 596 590 597 591 /* Go right */ 598 592 nnode = ubifs_get_nnode(c, nnode, iip); ··· 821 815 c->lpt_drty_flgs |= LSAVE_DIRTY; 822 816 ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); 823 817 } 818 + 819 + if (dbg_populate_lsave(c)) 820 + return; 821 + 824 822 list_for_each_entry(lprops, &c->empty_list, list) { 825 823 c->lsave[cnt++] = lprops->lnum; 826 824 if (cnt >= c->lsave_cnt) ··· 2002 1992 dump_lpt_leb(c, i + c->lpt_first); 2003 1993 printk(KERN_DEBUG "(pid %d) finish dumping all LPT LEBs\n", 2004 1994 current->pid); 1995 + } 1996 + 1997 + /** 1998 + * dbg_populate_lsave - debugging version of 'populate_lsave()' 1999 + * @c: UBIFS file-system description object 2000 + * 2001 + * This is a debugging version for 'populate_lsave()' which populates lsave 2002 + * with random LEBs instead of useful LEBs, which is good for test coverage. 2003 + * Returns zero if lsave has not been populated (this debugging feature is 2004 + * disabled) an non-zero if lsave has been populated. 2005 + */ 2006 + static int dbg_populate_lsave(struct ubifs_info *c) 2007 + { 2008 + struct ubifs_lprops *lprops; 2009 + struct ubifs_lpt_heap *heap; 2010 + int i; 2011 + 2012 + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) 2013 + return 0; 2014 + if (random32() & 3) 2015 + return 0; 2016 + 2017 + for (i = 0; i < c->lsave_cnt; i++) 2018 + c->lsave[i] = c->main_first; 2019 + 2020 + list_for_each_entry(lprops, &c->empty_list, list) 2021 + c->lsave[random32() % c->lsave_cnt] = lprops->lnum; 2022 + list_for_each_entry(lprops, &c->freeable_list, list) 2023 + c->lsave[random32() % c->lsave_cnt] = lprops->lnum; 2024 + list_for_each_entry(lprops, &c->frdi_idx_list, list) 2025 + c->lsave[random32() % c->lsave_cnt] = lprops->lnum; 2026 + 2027 + heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; 2028 + for (i = 0; i < heap->cnt; i++) 2029 + c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; 2030 + heap = &c->lpt_heap[LPROPS_DIRTY - 1]; 2031 + for (i = 0; i < heap->cnt; i++) 2032 + c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; 2033 + heap = &c->lpt_heap[LPROPS_FREE - 1]; 2034 + for (i = 0; i < heap->cnt; i++) 2035 + c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; 2036 + 2037 + return 1; 2005 2038 } 2006 2039 2007 2040 #endif /* CONFIG_UBIFS_FS_DEBUG */
+4 -4
fs/ubifs/master.c
··· 148 148 } 149 149 150 150 main_sz = (long long)c->main_lebs * c->leb_size; 151 - if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { 151 + if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) { 152 152 err = 9; 153 153 goto out; 154 154 } ··· 218 218 } 219 219 220 220 if (c->lst.total_dead + c->lst.total_dark + 221 - c->lst.total_used + c->old_idx_sz > main_sz) { 221 + c->lst.total_used + c->bi.old_idx_sz > main_sz) { 222 222 err = 21; 223 223 goto out; 224 224 } ··· 286 286 c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); 287 287 c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); 288 288 c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); 289 - c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); 289 + c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size); 290 290 c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); 291 291 c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); 292 292 c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); ··· 305 305 c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); 306 306 c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); 307 307 308 - c->calc_idx_sz = c->old_idx_sz; 308 + c->calc_idx_sz = c->bi.old_idx_sz; 309 309 310 310 if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) 311 311 c->no_orphs = 1;
+17
fs/ubifs/misc.h
··· 340 340 mutex_unlock(&c->lp_mutex); 341 341 } 342 342 343 + /** 344 + * ubifs_next_log_lnum - switch to the next log LEB. 345 + * @c: UBIFS file-system description object 346 + * @lnum: current log LEB 347 + * 348 + * This helper function returns the log LEB number which goes next after LEB 349 + * 'lnum'. 350 + */ 351 + static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum) 352 + { 353 + lnum += 1; 354 + if (lnum > c->log_last) 355 + lnum = UBIFS_LOG_LNUM; 356 + 357 + return lnum; 358 + } 359 + 343 360 #endif /* __UBIFS_MISC_H__ */
+2 -1
fs/ubifs/orphan.c
··· 673 673 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); 674 674 if (IS_ERR(sleb)) { 675 675 if (PTR_ERR(sleb) == -EUCLEAN) 676 - sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); 676 + sleb = ubifs_recover_leb(c, lnum, 0, 677 + c->sbuf, 0); 677 678 if (IS_ERR(sleb)) { 678 679 err = PTR_ERR(sleb); 679 680 break;
+178 -176
fs/ubifs/recovery.c
··· 564 564 } 565 565 566 566 /** 567 - * drop_incomplete_group - drop nodes from an incomplete group. 567 + * drop_last_node - drop the last node or group of nodes. 568 568 * @sleb: scanned LEB information 569 569 * @offs: offset of dropped nodes is returned here 570 + * @grouped: non-zero if whole group of nodes have to be dropped 570 571 * 571 - * This function returns %1 if nodes are dropped and %0 otherwise. 572 + * This is a helper function for 'ubifs_recover_leb()' which drops the last 573 + * node of the scanned LEB or the last group of nodes if @grouped is not zero. 574 + * This function returns %1 if a node was dropped and %0 otherwise. 572 575 */ 573 - static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) 576 + static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) 574 577 { 575 578 int dropped = 0; 576 579 ··· 592 589 kfree(snod); 593 590 sleb->nodes_cnt -= 1; 594 591 dropped = 1; 592 + if (!grouped) 593 + break; 595 594 } 596 595 return dropped; 597 596 } ··· 614 609 struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 615 610 int offs, void *sbuf, int grouped) 616 611 { 617 - int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; 618 - int empty_chkd = 0, start = offs; 612 + int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; 619 613 struct ubifs_scan_leb *sleb; 620 614 void *buf = sbuf + offs; 621 615 ··· 624 620 if (IS_ERR(sleb)) 625 621 return sleb; 626 622 627 - if (sleb->ecc) 628 - need_clean = 1; 629 - 623 + ubifs_assert(len >= 8); 630 624 while (len >= 8) { 631 - int ret; 632 - 633 625 dbg_scan("look at LEB %d:%d (%d bytes left)", 634 626 lnum, offs, len); 635 627 ··· 635 635 * Scan quietly until there is an error from which we cannot 636 636 * recover 637 637 */ 638 - ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); 639 - 638 + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); 640 639 if (ret == SCANNED_A_NODE) { 641 640 /* A valid node, and not a padding node */ 642 641 struct ubifs_ch *ch = buf; ··· 648 649 offs += node_len; 649 650 buf += node_len; 650 651 len -= node_len; 651 - continue; 652 - } 653 - 654 - if (ret > 0) { 652 + } else if (ret > 0) { 655 653 /* Padding bytes or a valid padding node */ 656 654 offs += ret; 657 655 buf += ret; 658 656 len -= ret; 659 - continue; 660 - } 661 - 662 - if (ret == SCANNED_EMPTY_SPACE) { 663 - if (!is_empty(buf, len)) { 664 - if (!is_last_write(c, buf, offs)) 665 - break; 666 - clean_buf(c, &buf, lnum, &offs, &len); 667 - need_clean = 1; 668 - } 669 - empty_chkd = 1; 657 + } else if (ret == SCANNED_EMPTY_SPACE || 658 + ret == SCANNED_GARBAGE || 659 + ret == SCANNED_A_BAD_PAD_NODE || 660 + ret == SCANNED_A_CORRUPT_NODE) { 661 + dbg_rcvry("found corruption - %d", ret); 670 662 break; 671 - } 672 - 673 - if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) 674 - if (is_last_write(c, buf, offs)) { 675 - clean_buf(c, &buf, lnum, &offs, &len); 676 - need_clean = 1; 677 - empty_chkd = 1; 678 - break; 679 - } 680 - 681 - if (ret == SCANNED_A_CORRUPT_NODE) 682 - if (no_more_nodes(c, buf, len, lnum, offs)) { 683 - clean_buf(c, &buf, lnum, &offs, &len); 684 - need_clean = 1; 685 - empty_chkd = 1; 686 - break; 687 - } 688 - 689 - if (quiet) { 690 - /* Redo the last scan but noisily */ 691 - quiet = 0; 692 - continue; 693 - } 694 - 695 - switch (ret) { 696 - case SCANNED_GARBAGE: 697 - dbg_err("garbage"); 698 - goto corrupted; 699 - case SCANNED_A_CORRUPT_NODE: 700 - case SCANNED_A_BAD_PAD_NODE: 701 - dbg_err("bad node"); 702 - goto corrupted; 703 - default: 704 - dbg_err("unknown"); 663 + } else { 664 + dbg_err("unexpected return value %d", ret); 705 665 err = -EINVAL; 706 666 goto error; 707 667 } 708 668 } 709 669 710 - if (!empty_chkd && !is_empty(buf, len)) { 711 - if (is_last_write(c, buf, offs)) { 712 - clean_buf(c, &buf, lnum, &offs, &len); 713 - need_clean = 1; 714 - } else { 670 + if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) { 671 + if (!is_last_write(c, buf, offs)) 672 + goto corrupted_rescan; 673 + } else if (ret == SCANNED_A_CORRUPT_NODE) { 674 + if (!no_more_nodes(c, buf, len, lnum, offs)) 675 + goto corrupted_rescan; 676 + } else if (!is_empty(buf, len)) { 677 + if (!is_last_write(c, buf, offs)) { 715 678 int corruption = first_non_ff(buf, len); 716 679 717 680 /* ··· 689 728 } 690 729 } 691 730 692 - /* Drop nodes from incomplete group */ 693 - if (grouped && drop_incomplete_group(sleb, &offs)) { 694 - buf = sbuf + offs; 695 - len = c->leb_size - offs; 696 - clean_buf(c, &buf, lnum, &offs, &len); 697 - need_clean = 1; 698 - } 731 + min_io_unit = round_down(offs, c->min_io_size); 732 + if (grouped) 733 + /* 734 + * If nodes are grouped, always drop the incomplete group at 735 + * the end. 736 + */ 737 + drop_last_node(sleb, &offs, 1); 699 738 700 - if (offs % c->min_io_size) { 701 - clean_buf(c, &buf, lnum, &offs, &len); 702 - need_clean = 1; 703 - } 739 + /* 740 + * While we are in the middle of the same min. I/O unit keep dropping 741 + * nodes. So basically, what we want is to make sure that the last min. 742 + * I/O unit where we saw the corruption is dropped completely with all 743 + * the uncorrupted node which may possibly sit there. 744 + * 745 + * In other words, let's name the min. I/O unit where the corruption 746 + * starts B, and the previous min. I/O unit A. The below code tries to 747 + * deal with a situation when half of B contains valid nodes or the end 748 + * of a valid node, and the second half of B contains corrupted data or 749 + * garbage. This means that UBIFS had been writing to B just before the 750 + * power cut happened. I do not know how realistic is this scenario 751 + * that half of the min. I/O unit had been written successfully and the 752 + * other half not, but this is possible in our 'failure mode emulation' 753 + * infrastructure at least. 754 + * 755 + * So what is the problem, why we need to drop those nodes? Whey can't 756 + * we just clean-up the second half of B by putting a padding node 757 + * there? We can, and this works fine with one exception which was 758 + * reproduced with power cut emulation testing and happens extremely 759 + * rarely. The description follows, but it is worth noting that that is 760 + * only about the GC head, so we could do this trick only if the bud 761 + * belongs to the GC head, but it does not seem to be worth an 762 + * additional "if" statement. 763 + * 764 + * So, imagine the file-system is full, we run GC which is moving valid 765 + * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head 766 + * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X 767 + * and will try to continue. Imagine that LEB X is currently the 768 + * dirtiest LEB, and the amount of used space in LEB Y is exactly the 769 + * same as amount of free space in LEB X. 770 + * 771 + * And a power cut happens when nodes are moved from LEB X to LEB Y. We 772 + * are here trying to recover LEB Y which is the GC head LEB. We find 773 + * the min. I/O unit B as described above. Then we clean-up LEB Y by 774 + * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function 775 + * fails, because it cannot find a dirty LEB which could be GC'd into 776 + * LEB Y! Even LEB X does not match because the amount of valid nodes 777 + * there does not fit the free space in LEB Y any more! And this is 778 + * because of the padding node which we added to LEB Y. The 779 + * user-visible effect of this which I once observed and analysed is 780 + * that we cannot mount the file-system with -ENOSPC error. 781 + * 782 + * So obviously, to make sure that situation does not happen we should 783 + * free min. I/O unit B in LEB Y completely and the last used min. I/O 784 + * unit in LEB Y should be A. This is basically what the below code 785 + * tries to do. 786 + */ 787 + while (min_io_unit == round_down(offs, c->min_io_size) && 788 + min_io_unit != offs && 789 + drop_last_node(sleb, &offs, grouped)); 704 790 791 + buf = sbuf + offs; 792 + len = c->leb_size - offs; 793 + 794 + clean_buf(c, &buf, lnum, &offs, &len); 705 795 ubifs_end_scan(c, sleb, lnum, offs); 706 796 707 - if (need_clean) { 708 - err = fix_unclean_leb(c, sleb, start); 709 - if (err) 710 - goto error; 711 - } 797 + err = fix_unclean_leb(c, sleb, start); 798 + if (err) 799 + goto error; 712 800 713 801 return sleb; 714 802 803 + corrupted_rescan: 804 + /* Re-scan the corrupted data with verbose messages */ 805 + dbg_err("corruptio %d", ret); 806 + ubifs_scan_a_node(c, buf, len, lnum, offs, 1); 715 807 corrupted: 716 808 ubifs_scanned_corruption(c, lnum, offs, buf); 717 809 err = -EUCLEAN; ··· 1084 1070 } 1085 1071 1086 1072 /** 1073 + * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit. 1074 + * @c: UBIFS file-system description object 1075 + * 1076 + * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty 1077 + * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns 1078 + * zero in case of success and a negative error code in case of failure. 1079 + */ 1080 + static int grab_empty_leb(struct ubifs_info *c) 1081 + { 1082 + int lnum, err; 1083 + 1084 + /* 1085 + * Note, it is very important to first search for an empty LEB and then 1086 + * run the commit, not vice-versa. The reason is that there might be 1087 + * only one empty LEB at the moment, the one which has been the 1088 + * @c->gc_lnum just before the power cut happened. During the regular 1089 + * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no 1090 + * one but GC can grab it. But at this moment this single empty LEB is 1091 + * not marked as taken, so if we run commit - what happens? Right, the 1092 + * commit will grab it and write the index there. Remember that the 1093 + * index always expands as long as there is free space, and it only 1094 + * starts consolidating when we run out of space. 1095 + * 1096 + * IOW, if we run commit now, we might not be able to find a free LEB 1097 + * after this. 1098 + */ 1099 + lnum = ubifs_find_free_leb_for_idx(c); 1100 + if (lnum < 0) { 1101 + dbg_err("could not find an empty LEB"); 1102 + dbg_dump_lprops(c); 1103 + dbg_dump_budg(c, &c->bi); 1104 + return lnum; 1105 + } 1106 + 1107 + /* Reset the index flag */ 1108 + err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, 1109 + LPROPS_INDEX, 0); 1110 + if (err) 1111 + return err; 1112 + 1113 + c->gc_lnum = lnum; 1114 + dbg_rcvry("found empty LEB %d, run commit", lnum); 1115 + 1116 + return ubifs_run_commit(c); 1117 + } 1118 + 1119 + /** 1087 1120 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. 1088 1121 * @c: UBIFS file-system description object 1089 1122 * ··· 1152 1091 { 1153 1092 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; 1154 1093 struct ubifs_lprops lp; 1155 - int lnum, err; 1094 + int err; 1095 + 1096 + dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs); 1156 1097 1157 1098 c->gc_lnum = -1; 1158 - if (wbuf->lnum == -1) { 1159 - dbg_rcvry("no GC head LEB"); 1160 - goto find_free; 1161 - } 1162 - /* 1163 - * See whether the used space in the dirtiest LEB fits in the GC head 1164 - * LEB. 1165 - */ 1166 - if (wbuf->offs == c->leb_size) { 1167 - dbg_rcvry("no room in GC head LEB"); 1168 - goto find_free; 1169 - } 1099 + if (wbuf->lnum == -1 || wbuf->offs == c->leb_size) 1100 + return grab_empty_leb(c); 1101 + 1170 1102 err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); 1171 1103 if (err) { 1172 - /* 1173 - * There are no dirty or empty LEBs subject to here being 1174 - * enough for the index. Try to use 1175 - * 'ubifs_find_free_leb_for_idx()', which will return any empty 1176 - * LEBs (ignoring index requirements). If the index then 1177 - * doesn't have enough LEBs the recovery commit will fail - 1178 - * which is the same result anyway i.e. recovery fails. So 1179 - * there is no problem ignoring index requirements and just 1180 - * grabbing a free LEB since we have already established there 1181 - * is not a dirty LEB we could have used instead. 1182 - */ 1183 - if (err == -ENOSPC) { 1184 - dbg_rcvry("could not find a dirty LEB"); 1185 - goto find_free; 1186 - } 1187 - return err; 1104 + if (err != -ENOSPC) 1105 + return err; 1106 + 1107 + dbg_rcvry("could not find a dirty LEB"); 1108 + return grab_empty_leb(c); 1188 1109 } 1110 + 1189 1111 ubifs_assert(!(lp.flags & LPROPS_INDEX)); 1190 - lnum = lp.lnum; 1191 - if (lp.free + lp.dirty == c->leb_size) { 1192 - /* An empty LEB was returned */ 1193 - if (lp.free != c->leb_size) { 1194 - err = ubifs_change_one_lp(c, lnum, c->leb_size, 1195 - 0, 0, 0, 0); 1196 - if (err) 1197 - return err; 1198 - } 1199 - err = ubifs_leb_unmap(c, lnum); 1200 - if (err) 1201 - return err; 1202 - c->gc_lnum = lnum; 1203 - dbg_rcvry("allocated LEB %d for GC", lnum); 1204 - /* Run the commit */ 1205 - dbg_rcvry("committing"); 1206 - return ubifs_run_commit(c); 1207 - } 1208 - /* 1209 - * There was no empty LEB so the used space in the dirtiest LEB must fit 1210 - * in the GC head LEB. 1211 - */ 1212 - if (lp.free + lp.dirty < wbuf->offs) { 1213 - dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d", 1214 - lnum, wbuf->lnum, wbuf->offs); 1215 - err = ubifs_return_leb(c, lnum); 1216 - if (err) 1217 - return err; 1218 - goto find_free; 1219 - } 1112 + ubifs_assert(lp.free + lp.dirty >= wbuf->offs); 1113 + 1220 1114 /* 1221 1115 * We run the commit before garbage collection otherwise subsequent 1222 1116 * mounts will see the GC and orphan deletion in a different order. ··· 1180 1164 err = ubifs_run_commit(c); 1181 1165 if (err) 1182 1166 return err; 1183 - /* 1184 - * The data in the dirtiest LEB fits in the GC head LEB, so do the GC 1185 - * - use locking to keep 'ubifs_assert()' happy. 1186 - */ 1187 - dbg_rcvry("GC'ing LEB %d", lnum); 1167 + 1168 + dbg_rcvry("GC'ing LEB %d", lp.lnum); 1188 1169 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 1189 1170 err = ubifs_garbage_collect_leb(c, &lp); 1190 1171 if (err >= 0) { ··· 1197 1184 err = -EINVAL; 1198 1185 return err; 1199 1186 } 1200 - if (err != LEB_RETAINED) { 1201 - dbg_err("GC returned %d", err); 1187 + 1188 + ubifs_assert(err == LEB_RETAINED); 1189 + if (err != LEB_RETAINED) 1202 1190 return -EINVAL; 1203 - } 1191 + 1204 1192 err = ubifs_leb_unmap(c, c->gc_lnum); 1205 1193 if (err) 1206 1194 return err; 1207 - dbg_rcvry("allocated LEB %d for GC", lnum); 1208 - return 0; 1209 1195 1210 - find_free: 1211 - /* 1212 - * There is no GC head LEB or the free space in the GC head LEB is too 1213 - * small, or there are not dirty LEBs. Allocate gc_lnum by calling 1214 - * 'ubifs_find_free_leb_for_idx()' so GC is not run. 1215 - */ 1216 - lnum = ubifs_find_free_leb_for_idx(c); 1217 - if (lnum < 0) { 1218 - dbg_err("could not find an empty LEB"); 1219 - return lnum; 1220 - } 1221 - /* And reset the index flag */ 1222 - err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, 1223 - LPROPS_INDEX, 0); 1224 - if (err) 1225 - return err; 1226 - c->gc_lnum = lnum; 1227 - dbg_rcvry("allocated LEB %d for GC", lnum); 1228 - /* Run the commit */ 1229 - dbg_rcvry("committing"); 1230 - return ubifs_run_commit(c); 1196 + dbg_rcvry("allocated LEB %d for GC", lp.lnum); 1197 + return 0; 1231 1198 } 1232 1199 1233 1200 /** ··· 1449 1456 err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); 1450 1457 if (err) 1451 1458 goto out; 1452 - dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", 1459 + dbg_rcvry("inode %lu at %d:%d size %lld -> %lld", 1453 1460 (unsigned long)e->inum, lnum, offs, i_size, e->d_size); 1454 1461 return 0; 1455 1462 ··· 1498 1505 e->i_size = le64_to_cpu(ino->size); 1499 1506 } 1500 1507 } 1508 + 1501 1509 if (e->exists && e->i_size < e->d_size) { 1502 - if (!e->inode && c->ro_mount) { 1510 + if (c->ro_mount) { 1503 1511 /* Fix the inode size and pin it in memory */ 1504 1512 struct inode *inode; 1513 + struct ubifs_inode *ui; 1514 + 1515 + ubifs_assert(!e->inode); 1505 1516 1506 1517 inode = ubifs_iget(c->vfs_sb, e->inum); 1507 1518 if (IS_ERR(inode)) 1508 1519 return PTR_ERR(inode); 1520 + 1521 + ui = ubifs_inode(inode); 1509 1522 if (inode->i_size < e->d_size) { 1510 1523 dbg_rcvry("ino %lu size %lld -> %lld", 1511 1524 (unsigned long)e->inum, 1512 - e->d_size, inode->i_size); 1525 + inode->i_size, e->d_size); 1513 1526 inode->i_size = e->d_size; 1514 - ubifs_inode(inode)->ui_size = e->d_size; 1527 + ui->ui_size = e->d_size; 1528 + ui->synced_i_size = e->d_size; 1515 1529 e->inode = inode; 1516 1530 this = rb_next(this); 1517 1531 continue; ··· 1533 1533 iput(e->inode); 1534 1534 } 1535 1535 } 1536 + 1536 1537 this = rb_next(this); 1537 1538 rb_erase(&e->rb, &c->size_tree); 1538 1539 kfree(e); 1539 1540 } 1541 + 1540 1542 return 0; 1541 1543 }
+231 -235
fs/ubifs/replay.c
··· 33 33 */ 34 34 35 35 #include "ubifs.h" 36 - 37 - /* 38 - * Replay flags. 39 - * 40 - * REPLAY_DELETION: node was deleted 41 - * REPLAY_REF: node is a reference node 42 - */ 43 - enum { 44 - REPLAY_DELETION = 1, 45 - REPLAY_REF = 2, 46 - }; 36 + #include <linux/list_sort.h> 47 37 48 38 /** 49 - * struct replay_entry - replay tree entry. 39 + * struct replay_entry - replay list entry. 50 40 * @lnum: logical eraseblock number of the node 51 41 * @offs: node offset 52 42 * @len: node length 43 + * @deletion: non-zero if this entry corresponds to a node deletion 53 44 * @sqnum: node sequence number 54 - * @flags: replay flags 55 - * @rb: links the replay tree 45 + * @list: links the replay list 56 46 * @key: node key 57 47 * @nm: directory entry name 58 48 * @old_size: truncation old size 59 49 * @new_size: truncation new size 60 - * @free: amount of free space in a bud 61 - * @dirty: amount of dirty space in a bud from padding and deletion nodes 62 - * @jhead: journal head number of the bud 63 50 * 64 - * UBIFS journal replay must compare node sequence numbers, which means it must 65 - * build a tree of node information to insert into the TNC. 51 + * The replay process first scans all buds and builds the replay list, then 52 + * sorts the replay list in nodes sequence number order, and then inserts all 53 + * the replay entries to the TNC. 66 54 */ 67 55 struct replay_entry { 68 56 int lnum; 69 57 int offs; 70 58 int len; 59 + unsigned int deletion:1; 71 60 unsigned long long sqnum; 72 - int flags; 73 - struct rb_node rb; 61 + struct list_head list; 74 62 union ubifs_key key; 75 63 union { 76 64 struct qstr nm; 77 65 struct { 78 66 loff_t old_size; 79 67 loff_t new_size; 80 - }; 81 - struct { 82 - int free; 83 - int dirty; 84 - int jhead; 85 68 }; 86 69 }; 87 70 }; ··· 73 90 * struct bud_entry - entry in the list of buds to replay. 74 91 * @list: next bud in the list 75 92 * @bud: bud description object 76 - * @free: free bytes in the bud 77 93 * @sqnum: reference node sequence number 94 + * @free: free bytes in the bud 95 + * @dirty: dirty bytes in the bud 78 96 */ 79 97 struct bud_entry { 80 98 struct list_head list; 81 99 struct ubifs_bud *bud; 82 - int free; 83 100 unsigned long long sqnum; 101 + int free; 102 + int dirty; 84 103 }; 85 104 86 105 /** 87 106 * set_bud_lprops - set free and dirty space used by a bud. 88 107 * @c: UBIFS file-system description object 89 - * @r: replay entry of bud 108 + * @b: bud entry which describes the bud 109 + * 110 + * This function makes sure the LEB properties of bud @b are set correctly 111 + * after the replay. Returns zero in case of success and a negative error code 112 + * in case of failure. 90 113 */ 91 - static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) 114 + static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) 92 115 { 93 116 const struct ubifs_lprops *lp; 94 117 int err = 0, dirty; 95 118 96 119 ubifs_get_lprops(c); 97 120 98 - lp = ubifs_lpt_lookup_dirty(c, r->lnum); 121 + lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); 99 122 if (IS_ERR(lp)) { 100 123 err = PTR_ERR(lp); 101 124 goto out; 102 125 } 103 126 104 127 dirty = lp->dirty; 105 - if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 128 + if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 106 129 /* 107 130 * The LEB was added to the journal with a starting offset of 108 131 * zero which means the LEB must have been empty. The LEB 109 - * property values should be lp->free == c->leb_size and 110 - * lp->dirty == 0, but that is not the case. The reason is that 111 - * the LEB was garbage collected. The garbage collector resets 112 - * the free and dirty space without recording it anywhere except 113 - * lprops, so if there is not a commit then lprops does not have 114 - * that information next time the file system is mounted. 132 + * property values should be @lp->free == @c->leb_size and 133 + * @lp->dirty == 0, but that is not the case. The reason is that 134 + * the LEB had been garbage collected before it became the bud, 135 + * and there was not commit inbetween. The garbage collector 136 + * resets the free and dirty space without recording it 137 + * anywhere except lprops, so if there was no commit then 138 + * lprops does not have that information. 115 139 * 116 140 * We do not need to adjust free space because the scan has told 117 141 * us the exact value which is recorded in the replay entry as 118 - * r->free. 142 + * @b->free. 119 143 * 120 144 * However we do need to subtract from the dirty space the 121 145 * amount of space that the garbage collector reclaimed, which 122 146 * is the whole LEB minus the amount of space that was free. 123 147 */ 124 - dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 148 + dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 125 149 lp->free, lp->dirty); 126 - dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 150 + dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 127 151 lp->free, lp->dirty); 128 152 dirty -= c->leb_size - lp->free; 129 153 /* ··· 142 152 */ 143 153 if (dirty != 0) 144 154 dbg_msg("LEB %d lp: %d free %d dirty " 145 - "replay: %d free %d dirty", r->lnum, lp->free, 146 - lp->dirty, r->free, r->dirty); 155 + "replay: %d free %d dirty", b->bud->lnum, 156 + lp->free, lp->dirty, b->free, b->dirty); 147 157 } 148 - lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, 158 + lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, 149 159 lp->flags | LPROPS_TAKEN, 0); 150 160 if (IS_ERR(lp)) { 151 161 err = PTR_ERR(lp); ··· 153 163 } 154 164 155 165 /* Make sure the journal head points to the latest bud */ 156 - err = ubifs_wbuf_seek_nolock(&c->jheads[r->jhead].wbuf, r->lnum, 157 - c->leb_size - r->free, UBI_SHORTTERM); 166 + err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, 167 + b->bud->lnum, c->leb_size - b->free, 168 + UBI_SHORTTERM); 158 169 159 170 out: 160 171 ubifs_release_lprops(c); 161 172 return err; 173 + } 174 + 175 + /** 176 + * set_buds_lprops - set free and dirty space for all replayed buds. 177 + * @c: UBIFS file-system description object 178 + * 179 + * This function sets LEB properties for all replayed buds. Returns zero in 180 + * case of success and a negative error code in case of failure. 181 + */ 182 + static int set_buds_lprops(struct ubifs_info *c) 183 + { 184 + struct bud_entry *b; 185 + int err; 186 + 187 + list_for_each_entry(b, &c->replay_buds, list) { 188 + err = set_bud_lprops(c, b); 189 + if (err) 190 + return err; 191 + } 192 + 193 + return 0; 162 194 } 163 195 164 196 /** ··· 219 207 */ 220 208 static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 221 209 { 222 - int err, deletion = ((r->flags & REPLAY_DELETION) != 0); 210 + int err; 223 211 224 - dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, 225 - r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); 212 + dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum, 213 + r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key)); 226 214 227 215 /* Set c->replay_sqnum to help deal with dangling branches. */ 228 216 c->replay_sqnum = r->sqnum; 229 217 230 - if (r->flags & REPLAY_REF) 231 - err = set_bud_lprops(c, r); 232 - else if (is_hash_key(c, &r->key)) { 233 - if (deletion) 218 + if (is_hash_key(c, &r->key)) { 219 + if (r->deletion) 234 220 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 235 221 else 236 222 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 237 223 r->len, &r->nm); 238 224 } else { 239 - if (deletion) 225 + if (r->deletion) 240 226 switch (key_type(c, &r->key)) { 241 227 case UBIFS_INO_KEY: 242 228 { ··· 257 247 return err; 258 248 259 249 if (c->need_recovery) 260 - err = ubifs_recover_size_accum(c, &r->key, deletion, 250 + err = ubifs_recover_size_accum(c, &r->key, r->deletion, 261 251 r->new_size); 262 252 } 263 253 ··· 265 255 } 266 256 267 257 /** 268 - * destroy_replay_tree - destroy the replay. 269 - * @c: UBIFS file-system description object 258 + * replay_entries_cmp - compare 2 replay entries. 259 + * @priv: UBIFS file-system description object 260 + * @a: first replay entry 261 + * @a: second replay entry 270 262 * 271 - * Destroy the replay tree. 263 + * This is a comparios function for 'list_sort()' which compares 2 replay 264 + * entries @a and @b by comparing their sequence numer. Returns %1 if @a has 265 + * greater sequence number and %-1 otherwise. 272 266 */ 273 - static void destroy_replay_tree(struct ubifs_info *c) 267 + static int replay_entries_cmp(void *priv, struct list_head *a, 268 + struct list_head *b) 274 269 { 275 - struct rb_node *this = c->replay_tree.rb_node; 276 - struct replay_entry *r; 270 + struct replay_entry *ra, *rb; 277 271 278 - while (this) { 279 - if (this->rb_left) { 280 - this = this->rb_left; 281 - continue; 282 - } else if (this->rb_right) { 283 - this = this->rb_right; 284 - continue; 285 - } 286 - r = rb_entry(this, struct replay_entry, rb); 287 - this = rb_parent(this); 288 - if (this) { 289 - if (this->rb_left == &r->rb) 290 - this->rb_left = NULL; 291 - else 292 - this->rb_right = NULL; 293 - } 294 - if (is_hash_key(c, &r->key)) 295 - kfree(r->nm.name); 296 - kfree(r); 297 - } 298 - c->replay_tree = RB_ROOT; 272 + cond_resched(); 273 + if (a == b) 274 + return 0; 275 + 276 + ra = list_entry(a, struct replay_entry, list); 277 + rb = list_entry(b, struct replay_entry, list); 278 + ubifs_assert(ra->sqnum != rb->sqnum); 279 + if (ra->sqnum > rb->sqnum) 280 + return 1; 281 + return -1; 299 282 } 300 283 301 284 /** 302 - * apply_replay_tree - apply the replay tree to the TNC. 285 + * apply_replay_list - apply the replay list to the TNC. 303 286 * @c: UBIFS file-system description object 304 287 * 305 - * Apply the replay tree. 306 - * Returns zero in case of success and a negative error code in case of 307 - * failure. 288 + * Apply all entries in the replay list to the TNC. Returns zero in case of 289 + * success and a negative error code in case of failure. 308 290 */ 309 - static int apply_replay_tree(struct ubifs_info *c) 291 + static int apply_replay_list(struct ubifs_info *c) 310 292 { 311 - struct rb_node *this = rb_first(&c->replay_tree); 293 + struct replay_entry *r; 294 + int err; 312 295 313 - while (this) { 314 - struct replay_entry *r; 315 - int err; 296 + list_sort(c, &c->replay_list, &replay_entries_cmp); 316 297 298 + list_for_each_entry(r, &c->replay_list, list) { 317 299 cond_resched(); 318 300 319 - r = rb_entry(this, struct replay_entry, rb); 320 301 err = apply_replay_entry(c, r); 321 302 if (err) 322 303 return err; 323 - this = rb_next(this); 324 304 } 305 + 325 306 return 0; 326 307 } 327 308 328 309 /** 329 - * insert_node - insert a node to the replay tree. 310 + * destroy_replay_list - destroy the replay. 311 + * @c: UBIFS file-system description object 312 + * 313 + * Destroy the replay list. 314 + */ 315 + static void destroy_replay_list(struct ubifs_info *c) 316 + { 317 + struct replay_entry *r, *tmp; 318 + 319 + list_for_each_entry_safe(r, tmp, &c->replay_list, list) { 320 + if (is_hash_key(c, &r->key)) 321 + kfree(r->nm.name); 322 + list_del(&r->list); 323 + kfree(r); 324 + } 325 + } 326 + 327 + /** 328 + * insert_node - insert a node to the replay list 330 329 * @c: UBIFS file-system description object 331 330 * @lnum: node logical eraseblock number 332 331 * @offs: node offset ··· 347 328 * @old_size: truncation old size 348 329 * @new_size: truncation new size 349 330 * 350 - * This function inserts a scanned non-direntry node to the replay tree. The 351 - * replay tree is an RB-tree containing @struct replay_entry elements which are 352 - * indexed by the sequence number. The replay tree is applied at the very end 353 - * of the replay process. Since the tree is sorted in sequence number order, 354 - * the older modifications are applied first. This function returns zero in 355 - * case of success and a negative error code in case of failure. 331 + * This function inserts a scanned non-direntry node to the replay list. The 332 + * replay list contains @struct replay_entry elements, and we sort this list in 333 + * sequence number order before applying it. The replay list is applied at the 334 + * very end of the replay process. Since the list is sorted in sequence number 335 + * order, the older modifications are applied first. This function returns zero 336 + * in case of success and a negative error code in case of failure. 356 337 */ 357 338 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 358 339 union ubifs_key *key, unsigned long long sqnum, 359 340 int deletion, int *used, loff_t old_size, 360 341 loff_t new_size) 361 342 { 362 - struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 363 343 struct replay_entry *r; 344 + 345 + dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); 364 346 365 347 if (key_inum(c, key) >= c->highest_inum) 366 348 c->highest_inum = key_inum(c, key); 367 - 368 - dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); 369 - while (*p) { 370 - parent = *p; 371 - r = rb_entry(parent, struct replay_entry, rb); 372 - if (sqnum < r->sqnum) { 373 - p = &(*p)->rb_left; 374 - continue; 375 - } else if (sqnum > r->sqnum) { 376 - p = &(*p)->rb_right; 377 - continue; 378 - } 379 - ubifs_err("duplicate sqnum in replay"); 380 - return -EINVAL; 381 - } 382 349 383 350 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 384 351 if (!r) ··· 375 370 r->lnum = lnum; 376 371 r->offs = offs; 377 372 r->len = len; 373 + r->deletion = !!deletion; 378 374 r->sqnum = sqnum; 379 - r->flags = (deletion ? REPLAY_DELETION : 0); 375 + key_copy(c, key, &r->key); 380 376 r->old_size = old_size; 381 377 r->new_size = new_size; 382 - key_copy(c, key, &r->key); 383 378 384 - rb_link_node(&r->rb, parent, p); 385 - rb_insert_color(&r->rb, &c->replay_tree); 379 + list_add_tail(&r->list, &c->replay_list); 386 380 return 0; 387 381 } 388 382 389 383 /** 390 - * insert_dent - insert a directory entry node into the replay tree. 384 + * insert_dent - insert a directory entry node into the replay list. 391 385 * @c: UBIFS file-system description object 392 386 * @lnum: node logical eraseblock number 393 387 * @offs: node offset ··· 398 394 * @deletion: non-zero if this is a deletion 399 395 * @used: number of bytes in use in a LEB 400 396 * 401 - * This function inserts a scanned directory entry node to the replay tree. 402 - * Returns zero in case of success and a negative error code in case of 403 - * failure. 404 - * 405 - * This function is also used for extended attribute entries because they are 406 - * implemented as directory entry nodes. 397 + * This function inserts a scanned directory entry node or an extended 398 + * attribute entry to the replay list. Returns zero in case of success and a 399 + * negative error code in case of failure. 407 400 */ 408 401 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 409 402 union ubifs_key *key, const char *name, int nlen, 410 403 unsigned long long sqnum, int deletion, int *used) 411 404 { 412 - struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 413 405 struct replay_entry *r; 414 406 char *nbuf; 415 407 408 + dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); 416 409 if (key_inum(c, key) >= c->highest_inum) 417 410 c->highest_inum = key_inum(c, key); 418 - 419 - dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); 420 - while (*p) { 421 - parent = *p; 422 - r = rb_entry(parent, struct replay_entry, rb); 423 - if (sqnum < r->sqnum) { 424 - p = &(*p)->rb_left; 425 - continue; 426 - } 427 - if (sqnum > r->sqnum) { 428 - p = &(*p)->rb_right; 429 - continue; 430 - } 431 - ubifs_err("duplicate sqnum in replay"); 432 - return -EINVAL; 433 - } 434 411 435 412 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 436 413 if (!r) 437 414 return -ENOMEM; 415 + 438 416 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 439 417 if (!nbuf) { 440 418 kfree(r); ··· 428 442 r->lnum = lnum; 429 443 r->offs = offs; 430 444 r->len = len; 445 + r->deletion = !!deletion; 431 446 r->sqnum = sqnum; 447 + key_copy(c, key, &r->key); 432 448 r->nm.len = nlen; 433 449 memcpy(nbuf, name, nlen); 434 450 nbuf[nlen] = '\0'; 435 451 r->nm.name = nbuf; 436 - r->flags = (deletion ? REPLAY_DELETION : 0); 437 - key_copy(c, key, &r->key); 438 452 439 - ubifs_assert(!*p); 440 - rb_link_node(&r->rb, parent, p); 441 - rb_insert_color(&r->rb, &c->replay_tree); 453 + list_add_tail(&r->list, &c->replay_list); 442 454 return 0; 443 455 } 444 456 ··· 473 489 } 474 490 475 491 /** 492 + * is_last_bud - check if the bud is the last in the journal head. 493 + * @c: UBIFS file-system description object 494 + * @bud: bud description object 495 + * 496 + * This function checks if bud @bud is the last bud in its journal head. This 497 + * information is then used by 'replay_bud()' to decide whether the bud can 498 + * have corruptions or not. Indeed, only last buds can be corrupted by power 499 + * cuts. Returns %1 if this is the last bud, and %0 if not. 500 + */ 501 + static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) 502 + { 503 + struct ubifs_jhead *jh = &c->jheads[bud->jhead]; 504 + struct ubifs_bud *next; 505 + uint32_t data; 506 + int err; 507 + 508 + if (list_is_last(&bud->list, &jh->buds_list)) 509 + return 1; 510 + 511 + /* 512 + * The following is a quirk to make sure we work correctly with UBIFS 513 + * images used with older UBIFS. 514 + * 515 + * Normally, the last bud will be the last in the journal head's list 516 + * of bud. However, there is one exception if the UBIFS image belongs 517 + * to older UBIFS. This is fairly unlikely: one would need to use old 518 + * UBIFS, then have a power cut exactly at the right point, and then 519 + * try to mount this image with new UBIFS. 520 + * 521 + * The exception is: it is possible to have 2 buds A and B, A goes 522 + * before B, and B is the last, bud B is contains no data, and bud A is 523 + * corrupted at the end. The reason is that in older versions when the 524 + * journal code switched the next bud (from A to B), it first added a 525 + * log reference node for the new bud (B), and only after this it 526 + * synchronized the write-buffer of current bud (A). But later this was 527 + * changed and UBIFS started to always synchronize the write-buffer of 528 + * the bud (A) before writing the log reference for the new bud (B). 529 + * 530 + * But because older UBIFS always synchronized A's write-buffer before 531 + * writing to B, we can recognize this exceptional situation but 532 + * checking the contents of bud B - if it is empty, then A can be 533 + * treated as the last and we can recover it. 534 + * 535 + * TODO: remove this piece of code in a couple of years (today it is 536 + * 16.05.2011). 537 + */ 538 + next = list_entry(bud->list.next, struct ubifs_bud, list); 539 + if (!list_is_last(&next->list, &jh->buds_list)) 540 + return 0; 541 + 542 + err = ubi_read(c->ubi, next->lnum, (char *)&data, 543 + next->start, 4); 544 + if (err) 545 + return 0; 546 + 547 + return data == 0xFFFFFFFF; 548 + } 549 + 550 + /** 476 551 * replay_bud - replay a bud logical eraseblock. 477 552 * @c: UBIFS file-system description object 478 - * @lnum: bud logical eraseblock number to replay 479 - * @offs: bud start offset 480 - * @jhead: journal head to which this bud belongs 481 - * @free: amount of free space in the bud is returned here 482 - * @dirty: amount of dirty space from padding and deletion nodes is returned 483 - * here 553 + * @b: bud entry which describes the bud 484 554 * 485 - * This function returns zero in case of success and a negative error code in 486 - * case of failure. 555 + * This function replays bud @bud, recovers it if needed, and adds all nodes 556 + * from this bud to the replay list. Returns zero in case of success and a 557 + * negative error code in case of failure. 487 558 */ 488 - static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 489 - int *free, int *dirty) 559 + static int replay_bud(struct ubifs_info *c, struct bud_entry *b) 490 560 { 491 - int err = 0, used = 0; 561 + int is_last = is_last_bud(c, b->bud); 562 + int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; 492 563 struct ubifs_scan_leb *sleb; 493 564 struct ubifs_scan_node *snod; 494 - struct ubifs_bud *bud; 495 565 496 - dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); 497 - if (c->need_recovery) 498 - sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); 566 + dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", 567 + lnum, b->bud->jhead, offs, is_last); 568 + 569 + if (c->need_recovery && is_last) 570 + /* 571 + * Recover only last LEBs in the journal heads, because power 572 + * cuts may cause corruptions only in these LEBs, because only 573 + * these LEBs could possibly be written to at the power cut 574 + * time. 575 + */ 576 + sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, 577 + b->bud->jhead != GCHD); 499 578 else 500 579 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 501 580 if (IS_ERR(sleb)) ··· 674 627 goto out; 675 628 } 676 629 677 - bud = ubifs_search_bud(c, lnum); 678 - if (!bud) 679 - BUG(); 680 - 630 + ubifs_assert(ubifs_search_bud(c, lnum)); 681 631 ubifs_assert(sleb->endpt - offs >= used); 682 632 ubifs_assert(sleb->endpt % c->min_io_size == 0); 683 633 684 - *dirty = sleb->endpt - offs - used; 685 - *free = c->leb_size - sleb->endpt; 634 + b->dirty = sleb->endpt - offs - used; 635 + b->free = c->leb_size - sleb->endpt; 636 + dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free); 686 637 687 638 out: 688 639 ubifs_scan_destroy(sleb); ··· 694 649 } 695 650 696 651 /** 697 - * insert_ref_node - insert a reference node to the replay tree. 698 - * @c: UBIFS file-system description object 699 - * @lnum: node logical eraseblock number 700 - * @offs: node offset 701 - * @sqnum: sequence number 702 - * @free: amount of free space in bud 703 - * @dirty: amount of dirty space from padding and deletion nodes 704 - * @jhead: journal head number for the bud 705 - * 706 - * This function inserts a reference node to the replay tree and returns zero 707 - * in case of success or a negative error code in case of failure. 708 - */ 709 - static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, 710 - unsigned long long sqnum, int free, int dirty, 711 - int jhead) 712 - { 713 - struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 714 - struct replay_entry *r; 715 - 716 - dbg_mnt("add ref LEB %d:%d", lnum, offs); 717 - while (*p) { 718 - parent = *p; 719 - r = rb_entry(parent, struct replay_entry, rb); 720 - if (sqnum < r->sqnum) { 721 - p = &(*p)->rb_left; 722 - continue; 723 - } else if (sqnum > r->sqnum) { 724 - p = &(*p)->rb_right; 725 - continue; 726 - } 727 - ubifs_err("duplicate sqnum in replay tree"); 728 - return -EINVAL; 729 - } 730 - 731 - r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 732 - if (!r) 733 - return -ENOMEM; 734 - 735 - r->lnum = lnum; 736 - r->offs = offs; 737 - r->sqnum = sqnum; 738 - r->flags = REPLAY_REF; 739 - r->free = free; 740 - r->dirty = dirty; 741 - r->jhead = jhead; 742 - 743 - rb_link_node(&r->rb, parent, p); 744 - rb_insert_color(&r->rb, &c->replay_tree); 745 - return 0; 746 - } 747 - 748 - /** 749 652 * replay_buds - replay all buds. 750 653 * @c: UBIFS file-system description object 751 654 * ··· 703 710 static int replay_buds(struct ubifs_info *c) 704 711 { 705 712 struct bud_entry *b; 706 - int err, uninitialized_var(free), uninitialized_var(dirty); 713 + int err; 714 + unsigned long long prev_sqnum = 0; 707 715 708 716 list_for_each_entry(b, &c->replay_buds, list) { 709 - err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, 710 - &free, &dirty); 717 + err = replay_bud(c, b); 711 718 if (err) 712 719 return err; 713 - err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, 714 - free, dirty, b->bud->jhead); 715 - if (err) 716 - return err; 720 + 721 + ubifs_assert(b->sqnum > prev_sqnum); 722 + prev_sqnum = b->sqnum; 717 723 } 718 724 719 725 return 0; ··· 1052 1060 if (err) 1053 1061 goto out; 1054 1062 1055 - err = apply_replay_tree(c); 1063 + err = apply_replay_list(c); 1064 + if (err) 1065 + goto out; 1066 + 1067 + err = set_buds_lprops(c); 1056 1068 if (err) 1057 1069 goto out; 1058 1070 1059 1071 /* 1060 - * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable 1061 - * to roughly estimate index growth. Things like @c->min_idx_lebs 1072 + * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable 1073 + * to roughly estimate index growth. Things like @c->bi.min_idx_lebs 1062 1074 * depend on it. This means we have to initialize it to make sure 1063 1075 * budgeting works properly. 1064 1076 */ 1065 - c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); 1066 - c->budg_uncommitted_idx *= c->max_idx_node_sz; 1077 + c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); 1078 + c->bi.uncommitted_idx *= c->max_idx_node_sz; 1067 1079 1068 1080 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1069 1081 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " 1070 1082 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1071 1083 (unsigned long)c->highest_inum); 1072 1084 out: 1073 - destroy_replay_tree(c); 1085 + destroy_replay_list(c); 1074 1086 destroy_bud_list(c); 1075 1087 c->replaying = 0; 1076 1088 return err;
+152 -1
fs/ubifs/sb.c
··· 475 475 * @c: UBIFS file-system description object 476 476 * 477 477 * This function returns a pointer to the superblock node or a negative error 478 - * code. 478 + * code. Note, the user of this function is responsible of kfree()'ing the 479 + * returned superblock buffer. 479 480 */ 480 481 struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) 481 482 { ··· 617 616 c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); 618 617 memcpy(&c->uuid, &sup->uuid, 16); 619 618 c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); 619 + c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP); 620 620 621 621 /* Automatically increase file system size to the maximum size */ 622 622 c->old_leb_cnt = c->leb_cnt; ··· 650 648 err = validate_sb(c, sup); 651 649 out: 652 650 kfree(sup); 651 + return err; 652 + } 653 + 654 + /** 655 + * fixup_leb - fixup/unmap an LEB containing free space. 656 + * @c: UBIFS file-system description object 657 + * @lnum: the LEB number to fix up 658 + * @len: number of used bytes in LEB (starting at offset 0) 659 + * 660 + * This function reads the contents of the given LEB number @lnum, then fixes 661 + * it up, so that empty min. I/O units in the end of LEB are actually erased on 662 + * flash (rather than being just all-0xff real data). If the LEB is completely 663 + * empty, it is simply unmapped. 664 + */ 665 + static int fixup_leb(struct ubifs_info *c, int lnum, int len) 666 + { 667 + int err; 668 + 669 + ubifs_assert(len >= 0); 670 + ubifs_assert(len % c->min_io_size == 0); 671 + ubifs_assert(len < c->leb_size); 672 + 673 + if (len == 0) { 674 + dbg_mnt("unmap empty LEB %d", lnum); 675 + return ubi_leb_unmap(c->ubi, lnum); 676 + } 677 + 678 + dbg_mnt("fixup LEB %d, data len %d", lnum, len); 679 + err = ubi_read(c->ubi, lnum, c->sbuf, 0, len); 680 + if (err) 681 + return err; 682 + 683 + return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); 684 + } 685 + 686 + /** 687 + * fixup_free_space - find & remap all LEBs containing free space. 688 + * @c: UBIFS file-system description object 689 + * 690 + * This function walks through all LEBs in the filesystem and fiexes up those 691 + * containing free/empty space. 692 + */ 693 + static int fixup_free_space(struct ubifs_info *c) 694 + { 695 + int lnum, err = 0; 696 + struct ubifs_lprops *lprops; 697 + 698 + ubifs_get_lprops(c); 699 + 700 + /* Fixup LEBs in the master area */ 701 + for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) { 702 + err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz); 703 + if (err) 704 + goto out; 705 + } 706 + 707 + /* Unmap unused log LEBs */ 708 + lnum = ubifs_next_log_lnum(c, c->lhead_lnum); 709 + while (lnum != c->ltail_lnum) { 710 + err = fixup_leb(c, lnum, 0); 711 + if (err) 712 + goto out; 713 + lnum = ubifs_next_log_lnum(c, lnum); 714 + } 715 + 716 + /* Fixup the current log head */ 717 + err = fixup_leb(c, c->lhead_lnum, c->lhead_offs); 718 + if (err) 719 + goto out; 720 + 721 + /* Fixup LEBs in the LPT area */ 722 + for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { 723 + int free = c->ltab[lnum - c->lpt_first].free; 724 + 725 + if (free > 0) { 726 + err = fixup_leb(c, lnum, c->leb_size - free); 727 + if (err) 728 + goto out; 729 + } 730 + } 731 + 732 + /* Unmap LEBs in the orphans area */ 733 + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { 734 + err = fixup_leb(c, lnum, 0); 735 + if (err) 736 + goto out; 737 + } 738 + 739 + /* Fixup LEBs in the main area */ 740 + for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { 741 + lprops = ubifs_lpt_lookup(c, lnum); 742 + if (IS_ERR(lprops)) { 743 + err = PTR_ERR(lprops); 744 + goto out; 745 + } 746 + 747 + if (lprops->free > 0) { 748 + err = fixup_leb(c, lnum, c->leb_size - lprops->free); 749 + if (err) 750 + goto out; 751 + } 752 + } 753 + 754 + out: 755 + ubifs_release_lprops(c); 756 + return err; 757 + } 758 + 759 + /** 760 + * ubifs_fixup_free_space - find & fix all LEBs with free space. 761 + * @c: UBIFS file-system description object 762 + * 763 + * This function fixes up LEBs containing free space on first mount, if the 764 + * appropriate flag was set when the FS was created. Each LEB with one or more 765 + * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure 766 + * the free space is actually erased. E.g., this is necessary for some NAND 767 + * chips, since the free space may have been programmed like real "0xff" data 768 + * (generating a non-0xff ECC), causing future writes to the not-really-erased 769 + * NAND pages to behave badly. After the space is fixed up, the superblock flag 770 + * is cleared, so that this is skipped for all future mounts. 771 + */ 772 + int ubifs_fixup_free_space(struct ubifs_info *c) 773 + { 774 + int err; 775 + struct ubifs_sb_node *sup; 776 + 777 + ubifs_assert(c->space_fixup); 778 + ubifs_assert(!c->ro_mount); 779 + 780 + ubifs_msg("start fixing up free space"); 781 + 782 + err = fixup_free_space(c); 783 + if (err) 784 + return err; 785 + 786 + sup = ubifs_read_sb_node(c); 787 + if (IS_ERR(sup)) 788 + return PTR_ERR(sup); 789 + 790 + /* Free-space fixup is no longer required */ 791 + c->space_fixup = 0; 792 + sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP); 793 + 794 + err = ubifs_write_sb_node(c, sup); 795 + kfree(sup); 796 + if (err) 797 + return err; 798 + 799 + ubifs_msg("free space fixup complete"); 653 800 return err; 654 801 }
+30 -16
fs/ubifs/super.c
··· 375 375 ubifs_release_dirty_inode_budget(c, ui); 376 376 else { 377 377 /* We've deleted something - clean the "no space" flags */ 378 - c->nospace = c->nospace_rp = 0; 378 + c->bi.nospace = c->bi.nospace_rp = 0; 379 379 smp_wmb(); 380 380 } 381 381 done: ··· 694 694 * be compressed and direntries are of the maximum size. 695 695 * 696 696 * Note, data, which may be stored in inodes is budgeted separately, so 697 - * it is not included into 'c->inode_budget'. 697 + * it is not included into 'c->bi.inode_budget'. 698 698 */ 699 - c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; 700 - c->inode_budget = UBIFS_INO_NODE_SZ; 701 - c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; 699 + c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; 700 + c->bi.inode_budget = UBIFS_INO_NODE_SZ; 701 + c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ; 702 702 703 703 /* 704 704 * When the amount of flash space used by buds becomes ··· 742 742 { 743 743 long long tmp64; 744 744 745 - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 745 + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); 746 746 c->report_rp_size = ubifs_reported_space(c, c->rp_size); 747 747 748 748 /* ··· 1144 1144 { 1145 1145 ubifs_assert(c->dark_wm > 0); 1146 1146 if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { 1147 - ubifs_err("insufficient free space to mount in read/write mode"); 1148 - dbg_dump_budg(c); 1147 + ubifs_err("insufficient free space to mount in R/W mode"); 1148 + dbg_dump_budg(c, &c->bi); 1149 1149 dbg_dump_lprops(c); 1150 1150 return -ENOSPC; 1151 1151 } ··· 1304 1304 if (err) 1305 1305 goto out_lpt; 1306 1306 1307 - err = dbg_check_idx_size(c, c->old_idx_sz); 1307 + err = dbg_check_idx_size(c, c->bi.old_idx_sz); 1308 1308 if (err) 1309 1309 goto out_lpt; 1310 1310 ··· 1313 1313 goto out_journal; 1314 1314 1315 1315 /* Calculate 'min_idx_lebs' after journal replay */ 1316 - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 1316 + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); 1317 1317 1318 1318 err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount); 1319 1319 if (err) ··· 1396 1396 } else 1397 1397 ubifs_assert(c->lst.taken_empty_lebs > 0); 1398 1398 1399 + if (!c->ro_mount && c->space_fixup) { 1400 + err = ubifs_fixup_free_space(c); 1401 + if (err) 1402 + goto out_infos; 1403 + } 1404 + 1399 1405 err = dbg_check_filesystem(c); 1400 1406 if (err) 1401 1407 goto out_infos; ··· 1448 1442 c->main_lebs, c->main_first, c->leb_cnt - 1); 1449 1443 dbg_msg("index LEBs: %d", c->lst.idx_lebs); 1450 1444 dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", 1451 - c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); 1445 + c->bi.old_idx_sz, c->bi.old_idx_sz >> 10, 1446 + c->bi.old_idx_sz >> 20); 1452 1447 dbg_msg("key hash type: %d", c->key_hash_type); 1453 1448 dbg_msg("tree fanout: %d", c->fanout); 1454 1449 dbg_msg("reserved GC LEB: %d", c->gc_lnum); ··· 1463 1456 dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", 1464 1457 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); 1465 1458 dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", 1466 - UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, 1459 + UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, 1467 1460 UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); 1468 1461 dbg_msg("dead watermark: %d", c->dead_wm); 1469 1462 dbg_msg("dark watermark: %d", c->dark_wm); ··· 1591 1584 } 1592 1585 sup->leb_cnt = cpu_to_le32(c->leb_cnt); 1593 1586 err = ubifs_write_sb_node(c, sup); 1587 + kfree(sup); 1594 1588 if (err) 1595 1589 goto out; 1596 1590 } ··· 1692 1684 */ 1693 1685 err = dbg_check_space_info(c); 1694 1686 } 1687 + 1688 + if (c->space_fixup) { 1689 + err = ubifs_fixup_free_space(c); 1690 + if (err) 1691 + goto out; 1692 + } 1693 + 1695 1694 mutex_unlock(&c->umount_mutex); 1696 1695 return err; 1697 1696 ··· 1781 1766 * to write them back because of I/O errors. 1782 1767 */ 1783 1768 if (!c->ro_error) { 1784 - ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); 1785 - ubifs_assert(c->budg_idx_growth == 0); 1786 - ubifs_assert(c->budg_dd_growth == 0); 1787 - ubifs_assert(c->budg_data_growth == 0); 1769 + ubifs_assert(c->bi.idx_growth == 0); 1770 + ubifs_assert(c->bi.dd_growth == 0); 1771 + ubifs_assert(c->bi.data_growth == 0); 1788 1772 } 1789 1773 1790 1774 /*
+5 -5
fs/ubifs/tnc.c
··· 2557 2557 if (err) { 2558 2558 /* Ensure the znode is dirtied */ 2559 2559 if (znode->cnext || !ubifs_zn_dirty(znode)) { 2560 - znode = dirty_cow_bottom_up(c, znode); 2561 - if (IS_ERR(znode)) { 2562 - err = PTR_ERR(znode); 2563 - goto out_unlock; 2564 - } 2560 + znode = dirty_cow_bottom_up(c, znode); 2561 + if (IS_ERR(znode)) { 2562 + err = PTR_ERR(znode); 2563 + goto out_unlock; 2564 + } 2565 2565 } 2566 2566 err = tnc_delete(c, znode, n); 2567 2567 }
+8 -10
fs/ubifs/tnc_commit.c
··· 377 377 c->gap_lebs = NULL; 378 378 return err; 379 379 } 380 - if (!dbg_force_in_the_gaps_enabled) { 380 + if (dbg_force_in_the_gaps_enabled()) { 381 381 /* 382 382 * Do not print scary warnings if the debugging 383 383 * option which forces in-the-gaps is enabled. 384 384 */ 385 - ubifs_err("out of space"); 386 - spin_lock(&c->space_lock); 387 - dbg_dump_budg(c); 388 - spin_unlock(&c->space_lock); 385 + ubifs_warn("out of space"); 386 + dbg_dump_budg(c, &c->bi); 389 387 dbg_dump_lprops(c); 390 388 } 391 389 /* Try to commit anyway */ ··· 794 796 spin_lock(&c->space_lock); 795 797 /* 796 798 * Although we have not finished committing yet, update size of the 797 - * committed index ('c->old_idx_sz') and zero out the index growth 799 + * committed index ('c->bi.old_idx_sz') and zero out the index growth 798 800 * budget. It is OK to do this now, because we've reserved all the 799 801 * space which is needed to commit the index, and it is save for the 800 802 * budgeting subsystem to assume the index is already committed, 801 803 * even though it is not. 802 804 */ 803 - ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); 804 - c->old_idx_sz = c->calc_idx_sz; 805 - c->budg_uncommitted_idx = 0; 806 - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 805 + ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c)); 806 + c->bi.old_idx_sz = c->calc_idx_sz; 807 + c->bi.uncommitted_idx = 0; 808 + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); 807 809 spin_unlock(&c->space_lock); 808 810 mutex_unlock(&c->tnc_mutex); 809 811
+16 -14
fs/ubifs/ubifs-media.h
··· 408 408 * Superblock flags. 409 409 * 410 410 * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set 411 + * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed 411 412 */ 412 413 enum { 413 414 UBIFS_FLG_BIGLPT = 0x02, 415 + UBIFS_FLG_SPACE_FIXUP = 0x04, 414 416 }; 415 417 416 418 /** ··· 436 434 __u8 node_type; 437 435 __u8 group_type; 438 436 __u8 padding[2]; 439 - } __attribute__ ((packed)); 437 + } __packed; 440 438 441 439 /** 442 440 * union ubifs_dev_desc - device node descriptor. ··· 450 448 union ubifs_dev_desc { 451 449 __le32 new; 452 450 __le64 huge; 453 - } __attribute__ ((packed)); 451 + } __packed; 454 452 455 453 /** 456 454 * struct ubifs_ino_node - inode node. ··· 511 509 __le16 compr_type; 512 510 __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ 513 511 __u8 data[]; 514 - } __attribute__ ((packed)); 512 + } __packed; 515 513 516 514 /** 517 515 * struct ubifs_dent_node - directory entry node. ··· 536 534 __le16 nlen; 537 535 __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ 538 536 __u8 name[]; 539 - } __attribute__ ((packed)); 537 + } __packed; 540 538 541 539 /** 542 540 * struct ubifs_data_node - data node. ··· 557 555 __le16 compr_type; 558 556 __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ 559 557 __u8 data[]; 560 - } __attribute__ ((packed)); 558 + } __packed; 561 559 562 560 /** 563 561 * struct ubifs_trun_node - truncation node. ··· 577 575 __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ 578 576 __le64 old_size; 579 577 __le64 new_size; 580 - } __attribute__ ((packed)); 578 + } __packed; 581 579 582 580 /** 583 581 * struct ubifs_pad_node - padding node. ··· 588 586 struct ubifs_pad_node { 589 587 struct ubifs_ch ch; 590 588 __le32 pad_len; 591 - } __attribute__ ((packed)); 589 + } __packed; 592 590 593 591 /** 594 592 * struct ubifs_sb_node - superblock node. ··· 646 644 __u8 uuid[16]; 647 645 __le32 ro_compat_version; 648 646 __u8 padding2[3968]; 649 - } __attribute__ ((packed)); 647 + } __packed; 650 648 651 649 /** 652 650 * struct ubifs_mst_node - master node. ··· 713 711 __le32 idx_lebs; 714 712 __le32 leb_cnt; 715 713 __u8 padding[344]; 716 - } __attribute__ ((packed)); 714 + } __packed; 717 715 718 716 /** 719 717 * struct ubifs_ref_node - logical eraseblock reference node. ··· 729 727 __le32 offs; 730 728 __le32 jhead; 731 729 __u8 padding[28]; 732 - } __attribute__ ((packed)); 730 + } __packed; 733 731 734 732 /** 735 733 * struct ubifs_branch - key/reference/length branch ··· 743 741 __le32 offs; 744 742 __le32 len; 745 743 __u8 key[]; 746 - } __attribute__ ((packed)); 744 + } __packed; 747 745 748 746 /** 749 747 * struct ubifs_idx_node - indexing node. ··· 757 755 __le16 child_cnt; 758 756 __le16 level; 759 757 __u8 branches[]; 760 - } __attribute__ ((packed)); 758 + } __packed; 761 759 762 760 /** 763 761 * struct ubifs_cs_node - commit start node. ··· 767 765 struct ubifs_cs_node { 768 766 struct ubifs_ch ch; 769 767 __le64 cmt_no; 770 - } __attribute__ ((packed)); 768 + } __packed; 771 769 772 770 /** 773 771 * struct ubifs_orph_node - orphan node. ··· 779 777 struct ubifs_ch ch; 780 778 __le64 cmt_no; 781 779 __le64 inos[]; 782 - } __attribute__ ((packed)); 780 + } __packed; 783 781 784 782 #endif /* __UBIFS_MEDIA_H__ */
+46 -40
fs/ubifs/ubifs.h
··· 389 389 * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses 390 390 * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot 391 391 * make sure @inode->i_size is always changed under @ui_mutex, because it 392 - * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock 393 - * with 'ubifs_writepage()' (see file.c). All the other inode fields are 394 - * changed under @ui_mutex, so they do not need "shadow" fields. Note, one 392 + * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would 393 + * deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields 394 + * are changed under @ui_mutex, so they do not need "shadow" fields. Note, one 395 395 * could consider to rework locking and base it on "shadow" fields. 396 396 */ 397 397 struct ubifs_inode { ··· 937 937 unsigned int compr_type:2; 938 938 }; 939 939 940 + /** 941 + * struct ubifs_budg_info - UBIFS budgeting information. 942 + * @idx_growth: amount of bytes budgeted for index growth 943 + * @data_growth: amount of bytes budgeted for cached data 944 + * @dd_growth: amount of bytes budgeted for cached data that will make 945 + * other data dirty 946 + * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but 947 + * which still have to be taken into account because the index 948 + * has not been committed so far 949 + * @old_idx_sz: size of index on flash 950 + * @min_idx_lebs: minimum number of LEBs required for the index 951 + * @nospace: non-zero if the file-system does not have flash space (used as 952 + * optimization) 953 + * @nospace_rp: the same as @nospace, but additionally means that even reserved 954 + * pool is full 955 + * @page_budget: budget for a page (constant, nenver changed after mount) 956 + * @inode_budget: budget for an inode (constant, nenver changed after mount) 957 + * @dent_budget: budget for a directory entry (constant, nenver changed after 958 + * mount) 959 + */ 960 + struct ubifs_budg_info { 961 + long long idx_growth; 962 + long long data_growth; 963 + long long dd_growth; 964 + long long uncommitted_idx; 965 + unsigned long long old_idx_sz; 966 + int min_idx_lebs; 967 + unsigned int nospace:1; 968 + unsigned int nospace_rp:1; 969 + int page_budget; 970 + int inode_budget; 971 + int dent_budget; 972 + }; 973 + 940 974 struct ubifs_debug_info; 941 975 942 976 /** ··· 1014 980 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running 1015 981 * 1016 982 * @big_lpt: flag that LPT is too big to write whole during commit 983 + * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up 1017 984 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during 1018 985 * recovery) 1019 986 * @bulk_read: enable bulk-reads ··· 1092 1057 * @dirty_zn_cnt: number of dirty znodes 1093 1058 * @clean_zn_cnt: number of clean znodes 1094 1059 * 1095 - * @budg_idx_growth: amount of bytes budgeted for index growth 1096 - * @budg_data_growth: amount of bytes budgeted for cached data 1097 - * @budg_dd_growth: amount of bytes budgeted for cached data that will make 1098 - * other data dirty 1099 - * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index, 1100 - * but which still have to be taken into account because 1101 - * the index has not been committed so far 1102 - * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, 1103 - * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst, 1104 - * @nospace, and @nospace_rp; 1105 - * @min_idx_lebs: minimum number of LEBs required for the index 1106 - * @old_idx_sz: size of index on flash 1060 + * @space_lock: protects @bi and @lst 1061 + * @lst: lprops statistics 1062 + * @bi: budgeting information 1107 1063 * @calc_idx_sz: temporary variable which is used to calculate new index size 1108 1064 * (contains accurate new index size at end of TNC commit start) 1109 - * @lst: lprops statistics 1110 - * @nospace: non-zero if the file-system does not have flash space (used as 1111 - * optimization) 1112 - * @nospace_rp: the same as @nospace, but additionally means that even reserved 1113 - * pool is full 1114 - * 1115 - * @page_budget: budget for a page 1116 - * @inode_budget: budget for an inode 1117 - * @dent_budget: budget for a directory entry 1118 1065 * 1119 1066 * @ref_node_alsz: size of the LEB reference node aligned to the min. flash 1120 - * I/O unit 1067 + * I/O unit 1121 1068 * @mst_node_alsz: master node aligned size 1122 1069 * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary 1123 1070 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary ··· 1206 1189 * @replaying: %1 during journal replay 1207 1190 * @mounting: %1 while mounting 1208 1191 * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode 1209 - * @replay_tree: temporary tree used during journal replay 1210 1192 * @replay_list: temporary list used during journal replay 1211 1193 * @replay_buds: list of buds to replay 1212 1194 * @cs_sqnum: sequence number of first node in the log (commit start node) ··· 1254 1238 wait_queue_head_t cmt_wq; 1255 1239 1256 1240 unsigned int big_lpt:1; 1241 + unsigned int space_fixup:1; 1257 1242 unsigned int no_chk_data_crc:1; 1258 1243 unsigned int bulk_read:1; 1259 1244 unsigned int default_compr:2; ··· 1325 1308 atomic_long_t dirty_zn_cnt; 1326 1309 atomic_long_t clean_zn_cnt; 1327 1310 1328 - long long budg_idx_growth; 1329 - long long budg_data_growth; 1330 - long long budg_dd_growth; 1331 - long long budg_uncommitted_idx; 1332 1311 spinlock_t space_lock; 1333 - int min_idx_lebs; 1334 - unsigned long long old_idx_sz; 1335 - unsigned long long calc_idx_sz; 1336 1312 struct ubifs_lp_stats lst; 1337 - unsigned int nospace:1; 1338 - unsigned int nospace_rp:1; 1339 - 1340 - int page_budget; 1341 - int inode_budget; 1342 - int dent_budget; 1313 + struct ubifs_budg_info bi; 1314 + unsigned long long calc_idx_sz; 1343 1315 1344 1316 int ref_node_alsz; 1345 1317 int mst_node_alsz; ··· 1436 1430 unsigned int replaying:1; 1437 1431 unsigned int mounting:1; 1438 1432 unsigned int remounting_rw:1; 1439 - struct rb_root replay_tree; 1440 1433 struct list_head replay_list; 1441 1434 struct list_head replay_buds; 1442 1435 unsigned long long cs_sqnum; ··· 1633 1628 int ubifs_read_superblock(struct ubifs_info *c); 1634 1629 struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); 1635 1630 int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); 1631 + int ubifs_fixup_free_space(struct ubifs_info *c); 1636 1632 1637 1633 /* replay.c */ 1638 1634 int ubifs_validate_entry(struct ubifs_info *c,
+4 -4
fs/ubifs/xattr.c
··· 80 80 SECURITY_XATTR, 81 81 }; 82 82 83 - static const struct inode_operations none_inode_operations; 84 - static const struct file_operations none_file_operations; 83 + static const struct inode_operations empty_iops; 84 + static const struct file_operations empty_fops; 85 85 86 86 /** 87 87 * create_xattr - create an extended attribute. ··· 131 131 132 132 /* Re-define all operations to be "nothing" */ 133 133 inode->i_mapping->a_ops = &empty_aops; 134 - inode->i_op = &none_inode_operations; 135 - inode->i_fop = &none_file_operations; 134 + inode->i_op = &empty_iops; 135 + inode->i_fop = &empty_fops; 136 136 137 137 inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; 138 138 ui = ubifs_inode(inode);