at v3.19-rc2 3163 lines 87 kB view raw
1/* 2 * linux/fs/ext3/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19#include <linux/module.h> 20#include <linux/blkdev.h> 21#include <linux/parser.h> 22#include <linux/exportfs.h> 23#include <linux/statfs.h> 24#include <linux/random.h> 25#include <linux/mount.h> 26#include <linux/quotaops.h> 27#include <linux/seq_file.h> 28#include <linux/log2.h> 29#include <linux/cleancache.h> 30#include <linux/namei.h> 31 32#include <asm/uaccess.h> 33 34#define CREATE_TRACE_POINTS 35 36#include "ext3.h" 37#include "xattr.h" 38#include "acl.h" 39#include "namei.h" 40 41#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED 42 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA 43#else 44 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_WRITEBACK_DATA 45#endif 46 47static int ext3_load_journal(struct super_block *, struct ext3_super_block *, 48 unsigned long journal_devnum); 49static int ext3_create_journal(struct super_block *, struct ext3_super_block *, 50 unsigned int); 51static int ext3_commit_super(struct super_block *sb, 52 struct ext3_super_block *es, 53 int sync); 54static void ext3_mark_recovery_complete(struct super_block * sb, 55 struct ext3_super_block * es); 56static void ext3_clear_journal_err(struct super_block * sb, 57 struct ext3_super_block * es); 58static int ext3_sync_fs(struct super_block *sb, int wait); 59static const char *ext3_decode_error(struct super_block * sb, int errno, 60 char nbuf[16]); 61static int ext3_remount (struct super_block * sb, int * flags, char * data); 62static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf); 63static int ext3_unfreeze(struct super_block *sb); 64static int ext3_freeze(struct super_block *sb); 65 66/* 67 * Wrappers for journal_start/end. 68 */ 69handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks) 70{ 71 journal_t *journal; 72 73 if (sb->s_flags & MS_RDONLY) 74 return ERR_PTR(-EROFS); 75 76 /* Special case here: if the journal has aborted behind our 77 * backs (eg. EIO in the commit thread), then we still need to 78 * take the FS itself readonly cleanly. */ 79 journal = EXT3_SB(sb)->s_journal; 80 if (is_journal_aborted(journal)) { 81 ext3_abort(sb, __func__, 82 "Detected aborted journal"); 83 return ERR_PTR(-EROFS); 84 } 85 86 return journal_start(journal, nblocks); 87} 88 89int __ext3_journal_stop(const char *where, handle_t *handle) 90{ 91 struct super_block *sb; 92 int err; 93 int rc; 94 95 sb = handle->h_transaction->t_journal->j_private; 96 err = handle->h_err; 97 rc = journal_stop(handle); 98 99 if (!err) 100 err = rc; 101 if (err) 102 __ext3_std_error(sb, where, err); 103 return err; 104} 105 106void ext3_journal_abort_handle(const char *caller, const char *err_fn, 107 struct buffer_head *bh, handle_t *handle, int err) 108{ 109 char nbuf[16]; 110 const char *errstr = ext3_decode_error(NULL, err, nbuf); 111 112 if (bh) 113 BUFFER_TRACE(bh, "abort"); 114 115 if (!handle->h_err) 116 handle->h_err = err; 117 118 if (is_handle_aborted(handle)) 119 return; 120 121 printk(KERN_ERR "EXT3-fs: %s: aborting transaction: %s in %s\n", 122 caller, errstr, err_fn); 123 124 journal_abort_handle(handle); 125} 126 127void ext3_msg(struct super_block *sb, const char *prefix, 128 const char *fmt, ...) 129{ 130 struct va_format vaf; 131 va_list args; 132 133 va_start(args, fmt); 134 135 vaf.fmt = fmt; 136 vaf.va = &args; 137 138 printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf); 139 140 va_end(args); 141} 142 143/* Deal with the reporting of failure conditions on a filesystem such as 144 * inconsistencies detected or read IO failures. 145 * 146 * On ext2, we can store the error state of the filesystem in the 147 * superblock. That is not possible on ext3, because we may have other 148 * write ordering constraints on the superblock which prevent us from 149 * writing it out straight away; and given that the journal is about to 150 * be aborted, we can't rely on the current, or future, transactions to 151 * write out the superblock safely. 152 * 153 * We'll just use the journal_abort() error code to record an error in 154 * the journal instead. On recovery, the journal will complain about 155 * that error until we've noted it down and cleared it. 156 */ 157 158static void ext3_handle_error(struct super_block *sb) 159{ 160 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 161 162 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 163 es->s_state |= cpu_to_le16(EXT3_ERROR_FS); 164 165 if (sb->s_flags & MS_RDONLY) 166 return; 167 168 if (!test_opt (sb, ERRORS_CONT)) { 169 journal_t *journal = EXT3_SB(sb)->s_journal; 170 171 set_opt(EXT3_SB(sb)->s_mount_opt, ABORT); 172 if (journal) 173 journal_abort(journal, -EIO); 174 } 175 if (test_opt (sb, ERRORS_RO)) { 176 ext3_msg(sb, KERN_CRIT, 177 "error: remounting filesystem read-only"); 178 /* 179 * Make sure updated value of ->s_mount_state will be visible 180 * before ->s_flags update. 181 */ 182 smp_wmb(); 183 sb->s_flags |= MS_RDONLY; 184 } 185 ext3_commit_super(sb, es, 1); 186 if (test_opt(sb, ERRORS_PANIC)) 187 panic("EXT3-fs (%s): panic forced after error\n", 188 sb->s_id); 189} 190 191void ext3_error(struct super_block *sb, const char *function, 192 const char *fmt, ...) 193{ 194 struct va_format vaf; 195 va_list args; 196 197 va_start(args, fmt); 198 199 vaf.fmt = fmt; 200 vaf.va = &args; 201 202 printk(KERN_CRIT "EXT3-fs error (device %s): %s: %pV\n", 203 sb->s_id, function, &vaf); 204 205 va_end(args); 206 207 ext3_handle_error(sb); 208} 209 210static const char *ext3_decode_error(struct super_block * sb, int errno, 211 char nbuf[16]) 212{ 213 char *errstr = NULL; 214 215 switch (errno) { 216 case -EIO: 217 errstr = "IO failure"; 218 break; 219 case -ENOMEM: 220 errstr = "Out of memory"; 221 break; 222 case -EROFS: 223 if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT) 224 errstr = "Journal has aborted"; 225 else 226 errstr = "Readonly filesystem"; 227 break; 228 default: 229 /* If the caller passed in an extra buffer for unknown 230 * errors, textualise them now. Else we just return 231 * NULL. */ 232 if (nbuf) { 233 /* Check for truncated error codes... */ 234 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 235 errstr = nbuf; 236 } 237 break; 238 } 239 240 return errstr; 241} 242 243/* __ext3_std_error decodes expected errors from journaling functions 244 * automatically and invokes the appropriate error response. */ 245 246void __ext3_std_error (struct super_block * sb, const char * function, 247 int errno) 248{ 249 char nbuf[16]; 250 const char *errstr; 251 252 /* Special case: if the error is EROFS, and we're not already 253 * inside a transaction, then there's really no point in logging 254 * an error. */ 255 if (errno == -EROFS && journal_current_handle() == NULL && 256 (sb->s_flags & MS_RDONLY)) 257 return; 258 259 errstr = ext3_decode_error(sb, errno, nbuf); 260 ext3_msg(sb, KERN_CRIT, "error in %s: %s", function, errstr); 261 262 ext3_handle_error(sb); 263} 264 265/* 266 * ext3_abort is a much stronger failure handler than ext3_error. The 267 * abort function may be used to deal with unrecoverable failures such 268 * as journal IO errors or ENOMEM at a critical moment in log management. 269 * 270 * We unconditionally force the filesystem into an ABORT|READONLY state, 271 * unless the error response on the fs has been set to panic in which 272 * case we take the easy way out and panic immediately. 273 */ 274 275void ext3_abort(struct super_block *sb, const char *function, 276 const char *fmt, ...) 277{ 278 struct va_format vaf; 279 va_list args; 280 281 va_start(args, fmt); 282 283 vaf.fmt = fmt; 284 vaf.va = &args; 285 286 printk(KERN_CRIT "EXT3-fs (%s): error: %s: %pV\n", 287 sb->s_id, function, &vaf); 288 289 va_end(args); 290 291 if (test_opt(sb, ERRORS_PANIC)) 292 panic("EXT3-fs: panic from previous error\n"); 293 294 if (sb->s_flags & MS_RDONLY) 295 return; 296 297 ext3_msg(sb, KERN_CRIT, 298 "error: remounting filesystem read-only"); 299 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 300 set_opt(EXT3_SB(sb)->s_mount_opt, ABORT); 301 /* 302 * Make sure updated value of ->s_mount_state will be visible 303 * before ->s_flags update. 304 */ 305 smp_wmb(); 306 sb->s_flags |= MS_RDONLY; 307 308 if (EXT3_SB(sb)->s_journal) 309 journal_abort(EXT3_SB(sb)->s_journal, -EIO); 310} 311 312void ext3_warning(struct super_block *sb, const char *function, 313 const char *fmt, ...) 314{ 315 struct va_format vaf; 316 va_list args; 317 318 va_start(args, fmt); 319 320 vaf.fmt = fmt; 321 vaf.va = &args; 322 323 printk(KERN_WARNING "EXT3-fs (%s): warning: %s: %pV\n", 324 sb->s_id, function, &vaf); 325 326 va_end(args); 327} 328 329void ext3_update_dynamic_rev(struct super_block *sb) 330{ 331 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 332 333 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV) 334 return; 335 336 ext3_msg(sb, KERN_WARNING, 337 "warning: updating to rev %d because of " 338 "new feature flag, running e2fsck is recommended", 339 EXT3_DYNAMIC_REV); 340 341 es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO); 342 es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE); 343 es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV); 344 /* leave es->s_feature_*compat flags alone */ 345 /* es->s_uuid will be set by e2fsck if empty */ 346 347 /* 348 * The rest of the superblock fields should be zero, and if not it 349 * means they are likely already in use, so leave them alone. We 350 * can leave it up to e2fsck to clean up any inconsistencies there. 351 */ 352} 353 354/* 355 * Open the external journal device 356 */ 357static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb) 358{ 359 struct block_device *bdev; 360 char b[BDEVNAME_SIZE]; 361 362 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); 363 if (IS_ERR(bdev)) 364 goto fail; 365 return bdev; 366 367fail: 368 ext3_msg(sb, KERN_ERR, "error: failed to open journal device %s: %ld", 369 __bdevname(dev, b), PTR_ERR(bdev)); 370 371 return NULL; 372} 373 374/* 375 * Release the journal device 376 */ 377static void ext3_blkdev_put(struct block_device *bdev) 378{ 379 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 380} 381 382static void ext3_blkdev_remove(struct ext3_sb_info *sbi) 383{ 384 struct block_device *bdev; 385 bdev = sbi->journal_bdev; 386 if (bdev) { 387 ext3_blkdev_put(bdev); 388 sbi->journal_bdev = NULL; 389 } 390} 391 392static inline struct inode *orphan_list_entry(struct list_head *l) 393{ 394 return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode; 395} 396 397static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi) 398{ 399 struct list_head *l; 400 401 ext3_msg(sb, KERN_ERR, "error: sb orphan head is %d", 402 le32_to_cpu(sbi->s_es->s_last_orphan)); 403 404 ext3_msg(sb, KERN_ERR, "sb_info orphan list:"); 405 list_for_each(l, &sbi->s_orphan) { 406 struct inode *inode = orphan_list_entry(l); 407 ext3_msg(sb, KERN_ERR, " " 408 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 409 inode->i_sb->s_id, inode->i_ino, inode, 410 inode->i_mode, inode->i_nlink, 411 NEXT_ORPHAN(inode)); 412 } 413} 414 415static void ext3_put_super (struct super_block * sb) 416{ 417 struct ext3_sb_info *sbi = EXT3_SB(sb); 418 struct ext3_super_block *es = sbi->s_es; 419 int i, err; 420 421 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); 422 ext3_xattr_put_super(sb); 423 err = journal_destroy(sbi->s_journal); 424 sbi->s_journal = NULL; 425 if (err < 0) 426 ext3_abort(sb, __func__, "Couldn't clean up the journal"); 427 428 if (!(sb->s_flags & MS_RDONLY)) { 429 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 430 es->s_state = cpu_to_le16(sbi->s_mount_state); 431 BUFFER_TRACE(sbi->s_sbh, "marking dirty"); 432 mark_buffer_dirty(sbi->s_sbh); 433 ext3_commit_super(sb, es, 1); 434 } 435 436 for (i = 0; i < sbi->s_gdb_count; i++) 437 brelse(sbi->s_group_desc[i]); 438 kfree(sbi->s_group_desc); 439 percpu_counter_destroy(&sbi->s_freeblocks_counter); 440 percpu_counter_destroy(&sbi->s_freeinodes_counter); 441 percpu_counter_destroy(&sbi->s_dirs_counter); 442 brelse(sbi->s_sbh); 443#ifdef CONFIG_QUOTA 444 for (i = 0; i < EXT3_MAXQUOTAS; i++) 445 kfree(sbi->s_qf_names[i]); 446#endif 447 448 /* Debugging code just in case the in-memory inode orphan list 449 * isn't empty. The on-disk one can be non-empty if we've 450 * detected an error and taken the fs readonly, but the 451 * in-memory list had better be clean by this point. */ 452 if (!list_empty(&sbi->s_orphan)) 453 dump_orphan_list(sb, sbi); 454 J_ASSERT(list_empty(&sbi->s_orphan)); 455 456 invalidate_bdev(sb->s_bdev); 457 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 458 /* 459 * Invalidate the journal device's buffers. We don't want them 460 * floating about in memory - the physical journal device may 461 * hotswapped, and it breaks the `ro-after' testing code. 462 */ 463 sync_blockdev(sbi->journal_bdev); 464 invalidate_bdev(sbi->journal_bdev); 465 ext3_blkdev_remove(sbi); 466 } 467 sb->s_fs_info = NULL; 468 kfree(sbi->s_blockgroup_lock); 469 kfree(sbi); 470} 471 472static struct kmem_cache *ext3_inode_cachep; 473 474/* 475 * Called inside transaction, so use GFP_NOFS 476 */ 477static struct inode *ext3_alloc_inode(struct super_block *sb) 478{ 479 struct ext3_inode_info *ei; 480 481 ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS); 482 if (!ei) 483 return NULL; 484 ei->i_block_alloc_info = NULL; 485 ei->vfs_inode.i_version = 1; 486 atomic_set(&ei->i_datasync_tid, 0); 487 atomic_set(&ei->i_sync_tid, 0); 488#ifdef CONFIG_QUOTA 489 memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); 490#endif 491 492 return &ei->vfs_inode; 493} 494 495static int ext3_drop_inode(struct inode *inode) 496{ 497 int drop = generic_drop_inode(inode); 498 499 trace_ext3_drop_inode(inode, drop); 500 return drop; 501} 502 503static void ext3_i_callback(struct rcu_head *head) 504{ 505 struct inode *inode = container_of(head, struct inode, i_rcu); 506 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); 507} 508 509static void ext3_destroy_inode(struct inode *inode) 510{ 511 if (!list_empty(&(EXT3_I(inode)->i_orphan))) { 512 printk("EXT3 Inode %p: orphan list check failed!\n", 513 EXT3_I(inode)); 514 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 515 EXT3_I(inode), sizeof(struct ext3_inode_info), 516 false); 517 dump_stack(); 518 } 519 call_rcu(&inode->i_rcu, ext3_i_callback); 520} 521 522static void init_once(void *foo) 523{ 524 struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; 525 526 INIT_LIST_HEAD(&ei->i_orphan); 527#ifdef CONFIG_EXT3_FS_XATTR 528 init_rwsem(&ei->xattr_sem); 529#endif 530 mutex_init(&ei->truncate_mutex); 531 inode_init_once(&ei->vfs_inode); 532} 533 534static int __init init_inodecache(void) 535{ 536 ext3_inode_cachep = kmem_cache_create("ext3_inode_cache", 537 sizeof(struct ext3_inode_info), 538 0, (SLAB_RECLAIM_ACCOUNT| 539 SLAB_MEM_SPREAD), 540 init_once); 541 if (ext3_inode_cachep == NULL) 542 return -ENOMEM; 543 return 0; 544} 545 546static void destroy_inodecache(void) 547{ 548 /* 549 * Make sure all delayed rcu free inodes are flushed before we 550 * destroy cache. 551 */ 552 rcu_barrier(); 553 kmem_cache_destroy(ext3_inode_cachep); 554} 555 556static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb) 557{ 558#if defined(CONFIG_QUOTA) 559 struct ext3_sb_info *sbi = EXT3_SB(sb); 560 561 if (sbi->s_jquota_fmt) { 562 char *fmtname = ""; 563 564 switch (sbi->s_jquota_fmt) { 565 case QFMT_VFS_OLD: 566 fmtname = "vfsold"; 567 break; 568 case QFMT_VFS_V0: 569 fmtname = "vfsv0"; 570 break; 571 case QFMT_VFS_V1: 572 fmtname = "vfsv1"; 573 break; 574 } 575 seq_printf(seq, ",jqfmt=%s", fmtname); 576 } 577 578 if (sbi->s_qf_names[USRQUOTA]) 579 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 580 581 if (sbi->s_qf_names[GRPQUOTA]) 582 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 583 584 if (test_opt(sb, USRQUOTA)) 585 seq_puts(seq, ",usrquota"); 586 587 if (test_opt(sb, GRPQUOTA)) 588 seq_puts(seq, ",grpquota"); 589#endif 590} 591 592static char *data_mode_string(unsigned long mode) 593{ 594 switch (mode) { 595 case EXT3_MOUNT_JOURNAL_DATA: 596 return "journal"; 597 case EXT3_MOUNT_ORDERED_DATA: 598 return "ordered"; 599 case EXT3_MOUNT_WRITEBACK_DATA: 600 return "writeback"; 601 } 602 return "unknown"; 603} 604 605/* 606 * Show an option if 607 * - it's set to a non-default value OR 608 * - if the per-sb default is different from the global default 609 */ 610static int ext3_show_options(struct seq_file *seq, struct dentry *root) 611{ 612 struct super_block *sb = root->d_sb; 613 struct ext3_sb_info *sbi = EXT3_SB(sb); 614 struct ext3_super_block *es = sbi->s_es; 615 unsigned long def_mount_opts; 616 617 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 618 619 if (sbi->s_sb_block != 1) 620 seq_printf(seq, ",sb=%lu", sbi->s_sb_block); 621 if (test_opt(sb, MINIX_DF)) 622 seq_puts(seq, ",minixdf"); 623 if (test_opt(sb, GRPID)) 624 seq_puts(seq, ",grpid"); 625 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT3_DEFM_BSDGROUPS)) 626 seq_puts(seq, ",nogrpid"); 627 if (!uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT3_DEF_RESUID)) || 628 le16_to_cpu(es->s_def_resuid) != EXT3_DEF_RESUID) { 629 seq_printf(seq, ",resuid=%u", 630 from_kuid_munged(&init_user_ns, sbi->s_resuid)); 631 } 632 if (!gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT3_DEF_RESGID)) || 633 le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) { 634 seq_printf(seq, ",resgid=%u", 635 from_kgid_munged(&init_user_ns, sbi->s_resgid)); 636 } 637 if (test_opt(sb, ERRORS_RO)) { 638 int def_errors = le16_to_cpu(es->s_errors); 639 640 if (def_errors == EXT3_ERRORS_PANIC || 641 def_errors == EXT3_ERRORS_CONTINUE) { 642 seq_puts(seq, ",errors=remount-ro"); 643 } 644 } 645 if (test_opt(sb, ERRORS_CONT)) 646 seq_puts(seq, ",errors=continue"); 647 if (test_opt(sb, ERRORS_PANIC)) 648 seq_puts(seq, ",errors=panic"); 649 if (test_opt(sb, NO_UID32)) 650 seq_puts(seq, ",nouid32"); 651 if (test_opt(sb, DEBUG)) 652 seq_puts(seq, ",debug"); 653#ifdef CONFIG_EXT3_FS_XATTR 654 if (test_opt(sb, XATTR_USER)) 655 seq_puts(seq, ",user_xattr"); 656 if (!test_opt(sb, XATTR_USER) && 657 (def_mount_opts & EXT3_DEFM_XATTR_USER)) { 658 seq_puts(seq, ",nouser_xattr"); 659 } 660#endif 661#ifdef CONFIG_EXT3_FS_POSIX_ACL 662 if (test_opt(sb, POSIX_ACL)) 663 seq_puts(seq, ",acl"); 664 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT3_DEFM_ACL)) 665 seq_puts(seq, ",noacl"); 666#endif 667 if (!test_opt(sb, RESERVATION)) 668 seq_puts(seq, ",noreservation"); 669 if (sbi->s_commit_interval) { 670 seq_printf(seq, ",commit=%u", 671 (unsigned) (sbi->s_commit_interval / HZ)); 672 } 673 674 /* 675 * Always display barrier state so it's clear what the status is. 676 */ 677 seq_puts(seq, ",barrier="); 678 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 679 seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS))); 680 if (test_opt(sb, DATA_ERR_ABORT)) 681 seq_puts(seq, ",data_err=abort"); 682 683 if (test_opt(sb, NOLOAD)) 684 seq_puts(seq, ",norecovery"); 685 686 ext3_show_quota_options(seq, sb); 687 688 return 0; 689} 690 691 692static struct inode *ext3_nfs_get_inode(struct super_block *sb, 693 u64 ino, u32 generation) 694{ 695 struct inode *inode; 696 697 if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO) 698 return ERR_PTR(-ESTALE); 699 if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) 700 return ERR_PTR(-ESTALE); 701 702 /* iget isn't really right if the inode is currently unallocated!! 703 * 704 * ext3_read_inode will return a bad_inode if the inode had been 705 * deleted, so we should be safe. 706 * 707 * Currently we don't know the generation for parent directory, so 708 * a generation of 0 means "accept any" 709 */ 710 inode = ext3_iget(sb, ino); 711 if (IS_ERR(inode)) 712 return ERR_CAST(inode); 713 if (generation && inode->i_generation != generation) { 714 iput(inode); 715 return ERR_PTR(-ESTALE); 716 } 717 718 return inode; 719} 720 721static struct dentry *ext3_fh_to_dentry(struct super_block *sb, struct fid *fid, 722 int fh_len, int fh_type) 723{ 724 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 725 ext3_nfs_get_inode); 726} 727 728static struct dentry *ext3_fh_to_parent(struct super_block *sb, struct fid *fid, 729 int fh_len, int fh_type) 730{ 731 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 732 ext3_nfs_get_inode); 733} 734 735/* 736 * Try to release metadata pages (indirect blocks, directories) which are 737 * mapped via the block device. Since these pages could have journal heads 738 * which would prevent try_to_free_buffers() from freeing them, we must use 739 * jbd layer's try_to_free_buffers() function to release them. 740 */ 741static int bdev_try_to_free_page(struct super_block *sb, struct page *page, 742 gfp_t wait) 743{ 744 journal_t *journal = EXT3_SB(sb)->s_journal; 745 746 WARN_ON(PageChecked(page)); 747 if (!page_has_buffers(page)) 748 return 0; 749 if (journal) 750 return journal_try_to_free_buffers(journal, page, 751 wait & ~__GFP_WAIT); 752 return try_to_free_buffers(page); 753} 754 755#ifdef CONFIG_QUOTA 756#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") 757#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 758 759static int ext3_write_dquot(struct dquot *dquot); 760static int ext3_acquire_dquot(struct dquot *dquot); 761static int ext3_release_dquot(struct dquot *dquot); 762static int ext3_mark_dquot_dirty(struct dquot *dquot); 763static int ext3_write_info(struct super_block *sb, int type); 764static int ext3_quota_on(struct super_block *sb, int type, int format_id, 765 struct path *path); 766static int ext3_quota_on_mount(struct super_block *sb, int type); 767static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, 768 size_t len, loff_t off); 769static ssize_t ext3_quota_write(struct super_block *sb, int type, 770 const char *data, size_t len, loff_t off); 771static struct dquot **ext3_get_dquots(struct inode *inode) 772{ 773 return EXT3_I(inode)->i_dquot; 774} 775 776static const struct dquot_operations ext3_quota_operations = { 777 .write_dquot = ext3_write_dquot, 778 .acquire_dquot = ext3_acquire_dquot, 779 .release_dquot = ext3_release_dquot, 780 .mark_dirty = ext3_mark_dquot_dirty, 781 .write_info = ext3_write_info, 782 .alloc_dquot = dquot_alloc, 783 .destroy_dquot = dquot_destroy, 784}; 785 786static const struct quotactl_ops ext3_qctl_operations = { 787 .quota_on = ext3_quota_on, 788 .quota_off = dquot_quota_off, 789 .quota_sync = dquot_quota_sync, 790 .get_info = dquot_get_dqinfo, 791 .set_info = dquot_set_dqinfo, 792 .get_dqblk = dquot_get_dqblk, 793 .set_dqblk = dquot_set_dqblk 794}; 795#endif 796 797static const struct super_operations ext3_sops = { 798 .alloc_inode = ext3_alloc_inode, 799 .destroy_inode = ext3_destroy_inode, 800 .write_inode = ext3_write_inode, 801 .dirty_inode = ext3_dirty_inode, 802 .drop_inode = ext3_drop_inode, 803 .evict_inode = ext3_evict_inode, 804 .put_super = ext3_put_super, 805 .sync_fs = ext3_sync_fs, 806 .freeze_fs = ext3_freeze, 807 .unfreeze_fs = ext3_unfreeze, 808 .statfs = ext3_statfs, 809 .remount_fs = ext3_remount, 810 .show_options = ext3_show_options, 811#ifdef CONFIG_QUOTA 812 .quota_read = ext3_quota_read, 813 .quota_write = ext3_quota_write, 814 .get_dquots = ext3_get_dquots, 815#endif 816 .bdev_try_to_free_page = bdev_try_to_free_page, 817}; 818 819static const struct export_operations ext3_export_ops = { 820 .fh_to_dentry = ext3_fh_to_dentry, 821 .fh_to_parent = ext3_fh_to_parent, 822 .get_parent = ext3_get_parent, 823}; 824 825enum { 826 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 827 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 828 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, 829 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 830 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 831 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 832 Opt_journal_path, 833 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 834 Opt_data_err_abort, Opt_data_err_ignore, 835 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 836 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 837 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, 838 Opt_resize, Opt_usrquota, Opt_grpquota 839}; 840 841static const match_table_t tokens = { 842 {Opt_bsd_df, "bsddf"}, 843 {Opt_minix_df, "minixdf"}, 844 {Opt_grpid, "grpid"}, 845 {Opt_grpid, "bsdgroups"}, 846 {Opt_nogrpid, "nogrpid"}, 847 {Opt_nogrpid, "sysvgroups"}, 848 {Opt_resgid, "resgid=%u"}, 849 {Opt_resuid, "resuid=%u"}, 850 {Opt_sb, "sb=%u"}, 851 {Opt_err_cont, "errors=continue"}, 852 {Opt_err_panic, "errors=panic"}, 853 {Opt_err_ro, "errors=remount-ro"}, 854 {Opt_nouid32, "nouid32"}, 855 {Opt_nocheck, "nocheck"}, 856 {Opt_nocheck, "check=none"}, 857 {Opt_debug, "debug"}, 858 {Opt_oldalloc, "oldalloc"}, 859 {Opt_orlov, "orlov"}, 860 {Opt_user_xattr, "user_xattr"}, 861 {Opt_nouser_xattr, "nouser_xattr"}, 862 {Opt_acl, "acl"}, 863 {Opt_noacl, "noacl"}, 864 {Opt_reservation, "reservation"}, 865 {Opt_noreservation, "noreservation"}, 866 {Opt_noload, "noload"}, 867 {Opt_noload, "norecovery"}, 868 {Opt_nobh, "nobh"}, 869 {Opt_bh, "bh"}, 870 {Opt_commit, "commit=%u"}, 871 {Opt_journal_update, "journal=update"}, 872 {Opt_journal_inum, "journal=%u"}, 873 {Opt_journal_dev, "journal_dev=%u"}, 874 {Opt_journal_path, "journal_path=%s"}, 875 {Opt_abort, "abort"}, 876 {Opt_data_journal, "data=journal"}, 877 {Opt_data_ordered, "data=ordered"}, 878 {Opt_data_writeback, "data=writeback"}, 879 {Opt_data_err_abort, "data_err=abort"}, 880 {Opt_data_err_ignore, "data_err=ignore"}, 881 {Opt_offusrjquota, "usrjquota="}, 882 {Opt_usrjquota, "usrjquota=%s"}, 883 {Opt_offgrpjquota, "grpjquota="}, 884 {Opt_grpjquota, "grpjquota=%s"}, 885 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 886 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 887 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, 888 {Opt_grpquota, "grpquota"}, 889 {Opt_noquota, "noquota"}, 890 {Opt_quota, "quota"}, 891 {Opt_usrquota, "usrquota"}, 892 {Opt_barrier, "barrier=%u"}, 893 {Opt_barrier, "barrier"}, 894 {Opt_nobarrier, "nobarrier"}, 895 {Opt_resize, "resize"}, 896 {Opt_err, NULL}, 897}; 898 899static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb) 900{ 901 ext3_fsblk_t sb_block; 902 char *options = (char *) *data; 903 904 if (!options || strncmp(options, "sb=", 3) != 0) 905 return 1; /* Default location */ 906 options += 3; 907 /*todo: use simple_strtoll with >32bit ext3 */ 908 sb_block = simple_strtoul(options, &options, 0); 909 if (*options && *options != ',') { 910 ext3_msg(sb, KERN_ERR, "error: invalid sb specification: %s", 911 (char *) *data); 912 return 1; 913 } 914 if (*options == ',') 915 options++; 916 *data = (void *) options; 917 return sb_block; 918} 919 920#ifdef CONFIG_QUOTA 921static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) 922{ 923 struct ext3_sb_info *sbi = EXT3_SB(sb); 924 char *qname; 925 926 if (sb_any_quota_loaded(sb) && 927 !sbi->s_qf_names[qtype]) { 928 ext3_msg(sb, KERN_ERR, 929 "Cannot change journaled " 930 "quota options when quota turned on"); 931 return 0; 932 } 933 qname = match_strdup(args); 934 if (!qname) { 935 ext3_msg(sb, KERN_ERR, 936 "Not enough memory for storing quotafile name"); 937 return 0; 938 } 939 if (sbi->s_qf_names[qtype]) { 940 int same = !strcmp(sbi->s_qf_names[qtype], qname); 941 942 kfree(qname); 943 if (!same) { 944 ext3_msg(sb, KERN_ERR, 945 "%s quota file already specified", 946 QTYPE2NAME(qtype)); 947 } 948 return same; 949 } 950 if (strchr(qname, '/')) { 951 ext3_msg(sb, KERN_ERR, 952 "quotafile must be on filesystem root"); 953 kfree(qname); 954 return 0; 955 } 956 sbi->s_qf_names[qtype] = qname; 957 set_opt(sbi->s_mount_opt, QUOTA); 958 return 1; 959} 960 961static int clear_qf_name(struct super_block *sb, int qtype) { 962 963 struct ext3_sb_info *sbi = EXT3_SB(sb); 964 965 if (sb_any_quota_loaded(sb) && 966 sbi->s_qf_names[qtype]) { 967 ext3_msg(sb, KERN_ERR, "Cannot change journaled quota options" 968 " when quota turned on"); 969 return 0; 970 } 971 if (sbi->s_qf_names[qtype]) { 972 kfree(sbi->s_qf_names[qtype]); 973 sbi->s_qf_names[qtype] = NULL; 974 } 975 return 1; 976} 977#endif 978 979static int parse_options (char *options, struct super_block *sb, 980 unsigned int *inum, unsigned long *journal_devnum, 981 ext3_fsblk_t *n_blocks_count, int is_remount) 982{ 983 struct ext3_sb_info *sbi = EXT3_SB(sb); 984 char * p; 985 substring_t args[MAX_OPT_ARGS]; 986 int data_opt = 0; 987 int option; 988 kuid_t uid; 989 kgid_t gid; 990 char *journal_path; 991 struct inode *journal_inode; 992 struct path path; 993 int error; 994 995#ifdef CONFIG_QUOTA 996 int qfmt; 997#endif 998 999 if (!options) 1000 return 1; 1001 1002 while ((p = strsep (&options, ",")) != NULL) { 1003 int token; 1004 if (!*p) 1005 continue; 1006 /* 1007 * Initialize args struct so we know whether arg was 1008 * found; some options take optional arguments. 1009 */ 1010 args[0].to = args[0].from = NULL; 1011 token = match_token(p, tokens, args); 1012 switch (token) { 1013 case Opt_bsd_df: 1014 clear_opt (sbi->s_mount_opt, MINIX_DF); 1015 break; 1016 case Opt_minix_df: 1017 set_opt (sbi->s_mount_opt, MINIX_DF); 1018 break; 1019 case Opt_grpid: 1020 set_opt (sbi->s_mount_opt, GRPID); 1021 break; 1022 case Opt_nogrpid: 1023 clear_opt (sbi->s_mount_opt, GRPID); 1024 break; 1025 case Opt_resuid: 1026 if (match_int(&args[0], &option)) 1027 return 0; 1028 uid = make_kuid(current_user_ns(), option); 1029 if (!uid_valid(uid)) { 1030 ext3_msg(sb, KERN_ERR, "Invalid uid value %d", option); 1031 return 0; 1032 1033 } 1034 sbi->s_resuid = uid; 1035 break; 1036 case Opt_resgid: 1037 if (match_int(&args[0], &option)) 1038 return 0; 1039 gid = make_kgid(current_user_ns(), option); 1040 if (!gid_valid(gid)) { 1041 ext3_msg(sb, KERN_ERR, "Invalid gid value %d", option); 1042 return 0; 1043 } 1044 sbi->s_resgid = gid; 1045 break; 1046 case Opt_sb: 1047 /* handled by get_sb_block() instead of here */ 1048 /* *sb_block = match_int(&args[0]); */ 1049 break; 1050 case Opt_err_panic: 1051 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 1052 clear_opt (sbi->s_mount_opt, ERRORS_RO); 1053 set_opt (sbi->s_mount_opt, ERRORS_PANIC); 1054 break; 1055 case Opt_err_ro: 1056 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 1057 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 1058 set_opt (sbi->s_mount_opt, ERRORS_RO); 1059 break; 1060 case Opt_err_cont: 1061 clear_opt (sbi->s_mount_opt, ERRORS_RO); 1062 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 1063 set_opt (sbi->s_mount_opt, ERRORS_CONT); 1064 break; 1065 case Opt_nouid32: 1066 set_opt (sbi->s_mount_opt, NO_UID32); 1067 break; 1068 case Opt_nocheck: 1069 clear_opt (sbi->s_mount_opt, CHECK); 1070 break; 1071 case Opt_debug: 1072 set_opt (sbi->s_mount_opt, DEBUG); 1073 break; 1074 case Opt_oldalloc: 1075 ext3_msg(sb, KERN_WARNING, 1076 "Ignoring deprecated oldalloc option"); 1077 break; 1078 case Opt_orlov: 1079 ext3_msg(sb, KERN_WARNING, 1080 "Ignoring deprecated orlov option"); 1081 break; 1082#ifdef CONFIG_EXT3_FS_XATTR 1083 case Opt_user_xattr: 1084 set_opt (sbi->s_mount_opt, XATTR_USER); 1085 break; 1086 case Opt_nouser_xattr: 1087 clear_opt (sbi->s_mount_opt, XATTR_USER); 1088 break; 1089#else 1090 case Opt_user_xattr: 1091 case Opt_nouser_xattr: 1092 ext3_msg(sb, KERN_INFO, 1093 "(no)user_xattr options not supported"); 1094 break; 1095#endif 1096#ifdef CONFIG_EXT3_FS_POSIX_ACL 1097 case Opt_acl: 1098 set_opt(sbi->s_mount_opt, POSIX_ACL); 1099 break; 1100 case Opt_noacl: 1101 clear_opt(sbi->s_mount_opt, POSIX_ACL); 1102 break; 1103#else 1104 case Opt_acl: 1105 case Opt_noacl: 1106 ext3_msg(sb, KERN_INFO, 1107 "(no)acl options not supported"); 1108 break; 1109#endif 1110 case Opt_reservation: 1111 set_opt(sbi->s_mount_opt, RESERVATION); 1112 break; 1113 case Opt_noreservation: 1114 clear_opt(sbi->s_mount_opt, RESERVATION); 1115 break; 1116 case Opt_journal_update: 1117 /* @@@ FIXME */ 1118 /* Eventually we will want to be able to create 1119 a journal file here. For now, only allow the 1120 user to specify an existing inode to be the 1121 journal file. */ 1122 if (is_remount) { 1123 ext3_msg(sb, KERN_ERR, "error: cannot specify " 1124 "journal on remount"); 1125 return 0; 1126 } 1127 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); 1128 break; 1129 case Opt_journal_inum: 1130 if (is_remount) { 1131 ext3_msg(sb, KERN_ERR, "error: cannot specify " 1132 "journal on remount"); 1133 return 0; 1134 } 1135 if (match_int(&args[0], &option)) 1136 return 0; 1137 *inum = option; 1138 break; 1139 case Opt_journal_dev: 1140 if (is_remount) { 1141 ext3_msg(sb, KERN_ERR, "error: cannot specify " 1142 "journal on remount"); 1143 return 0; 1144 } 1145 if (match_int(&args[0], &option)) 1146 return 0; 1147 *journal_devnum = option; 1148 break; 1149 case Opt_journal_path: 1150 if (is_remount) { 1151 ext3_msg(sb, KERN_ERR, "error: cannot specify " 1152 "journal on remount"); 1153 return 0; 1154 } 1155 1156 journal_path = match_strdup(&args[0]); 1157 if (!journal_path) { 1158 ext3_msg(sb, KERN_ERR, "error: could not dup " 1159 "journal device string"); 1160 return 0; 1161 } 1162 1163 error = kern_path(journal_path, LOOKUP_FOLLOW, &path); 1164 if (error) { 1165 ext3_msg(sb, KERN_ERR, "error: could not find " 1166 "journal device path: error %d", error); 1167 kfree(journal_path); 1168 return 0; 1169 } 1170 1171 journal_inode = path.dentry->d_inode; 1172 if (!S_ISBLK(journal_inode->i_mode)) { 1173 ext3_msg(sb, KERN_ERR, "error: journal path %s " 1174 "is not a block device", journal_path); 1175 path_put(&path); 1176 kfree(journal_path); 1177 return 0; 1178 } 1179 1180 *journal_devnum = new_encode_dev(journal_inode->i_rdev); 1181 path_put(&path); 1182 kfree(journal_path); 1183 break; 1184 case Opt_noload: 1185 set_opt (sbi->s_mount_opt, NOLOAD); 1186 break; 1187 case Opt_commit: 1188 if (match_int(&args[0], &option)) 1189 return 0; 1190 if (option < 0) 1191 return 0; 1192 if (option == 0) 1193 option = JBD_DEFAULT_MAX_COMMIT_AGE; 1194 sbi->s_commit_interval = HZ * option; 1195 break; 1196 case Opt_data_journal: 1197 data_opt = EXT3_MOUNT_JOURNAL_DATA; 1198 goto datacheck; 1199 case Opt_data_ordered: 1200 data_opt = EXT3_MOUNT_ORDERED_DATA; 1201 goto datacheck; 1202 case Opt_data_writeback: 1203 data_opt = EXT3_MOUNT_WRITEBACK_DATA; 1204 datacheck: 1205 if (is_remount) { 1206 if (test_opt(sb, DATA_FLAGS) == data_opt) 1207 break; 1208 ext3_msg(sb, KERN_ERR, 1209 "error: cannot change " 1210 "data mode on remount. The filesystem " 1211 "is mounted in data=%s mode and you " 1212 "try to remount it in data=%s mode.", 1213 data_mode_string(test_opt(sb, 1214 DATA_FLAGS)), 1215 data_mode_string(data_opt)); 1216 return 0; 1217 } else { 1218 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 1219 sbi->s_mount_opt |= data_opt; 1220 } 1221 break; 1222 case Opt_data_err_abort: 1223 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1224 break; 1225 case Opt_data_err_ignore: 1226 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1227 break; 1228#ifdef CONFIG_QUOTA 1229 case Opt_usrjquota: 1230 if (!set_qf_name(sb, USRQUOTA, &args[0])) 1231 return 0; 1232 break; 1233 case Opt_grpjquota: 1234 if (!set_qf_name(sb, GRPQUOTA, &args[0])) 1235 return 0; 1236 break; 1237 case Opt_offusrjquota: 1238 if (!clear_qf_name(sb, USRQUOTA)) 1239 return 0; 1240 break; 1241 case Opt_offgrpjquota: 1242 if (!clear_qf_name(sb, GRPQUOTA)) 1243 return 0; 1244 break; 1245 case Opt_jqfmt_vfsold: 1246 qfmt = QFMT_VFS_OLD; 1247 goto set_qf_format; 1248 case Opt_jqfmt_vfsv0: 1249 qfmt = QFMT_VFS_V0; 1250 goto set_qf_format; 1251 case Opt_jqfmt_vfsv1: 1252 qfmt = QFMT_VFS_V1; 1253set_qf_format: 1254 if (sb_any_quota_loaded(sb) && 1255 sbi->s_jquota_fmt != qfmt) { 1256 ext3_msg(sb, KERN_ERR, "error: cannot change " 1257 "journaled quota options when " 1258 "quota turned on."); 1259 return 0; 1260 } 1261 sbi->s_jquota_fmt = qfmt; 1262 break; 1263 case Opt_quota: 1264 case Opt_usrquota: 1265 set_opt(sbi->s_mount_opt, QUOTA); 1266 set_opt(sbi->s_mount_opt, USRQUOTA); 1267 break; 1268 case Opt_grpquota: 1269 set_opt(sbi->s_mount_opt, QUOTA); 1270 set_opt(sbi->s_mount_opt, GRPQUOTA); 1271 break; 1272 case Opt_noquota: 1273 if (sb_any_quota_loaded(sb)) { 1274 ext3_msg(sb, KERN_ERR, "error: cannot change " 1275 "quota options when quota turned on."); 1276 return 0; 1277 } 1278 clear_opt(sbi->s_mount_opt, QUOTA); 1279 clear_opt(sbi->s_mount_opt, USRQUOTA); 1280 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1281 break; 1282#else 1283 case Opt_quota: 1284 case Opt_usrquota: 1285 case Opt_grpquota: 1286 ext3_msg(sb, KERN_ERR, 1287 "error: quota options not supported."); 1288 break; 1289 case Opt_usrjquota: 1290 case Opt_grpjquota: 1291 case Opt_offusrjquota: 1292 case Opt_offgrpjquota: 1293 case Opt_jqfmt_vfsold: 1294 case Opt_jqfmt_vfsv0: 1295 case Opt_jqfmt_vfsv1: 1296 ext3_msg(sb, KERN_ERR, 1297 "error: journaled quota options not " 1298 "supported."); 1299 break; 1300 case Opt_noquota: 1301 break; 1302#endif 1303 case Opt_abort: 1304 set_opt(sbi->s_mount_opt, ABORT); 1305 break; 1306 case Opt_nobarrier: 1307 clear_opt(sbi->s_mount_opt, BARRIER); 1308 break; 1309 case Opt_barrier: 1310 if (args[0].from) { 1311 if (match_int(&args[0], &option)) 1312 return 0; 1313 } else 1314 option = 1; /* No argument, default to 1 */ 1315 if (option) 1316 set_opt(sbi->s_mount_opt, BARRIER); 1317 else 1318 clear_opt(sbi->s_mount_opt, BARRIER); 1319 break; 1320 case Opt_ignore: 1321 break; 1322 case Opt_resize: 1323 if (!is_remount) { 1324 ext3_msg(sb, KERN_ERR, 1325 "error: resize option only available " 1326 "for remount"); 1327 return 0; 1328 } 1329 if (match_int(&args[0], &option) != 0) 1330 return 0; 1331 *n_blocks_count = option; 1332 break; 1333 case Opt_nobh: 1334 ext3_msg(sb, KERN_WARNING, 1335 "warning: ignoring deprecated nobh option"); 1336 break; 1337 case Opt_bh: 1338 ext3_msg(sb, KERN_WARNING, 1339 "warning: ignoring deprecated bh option"); 1340 break; 1341 default: 1342 ext3_msg(sb, KERN_ERR, 1343 "error: unrecognized mount option \"%s\" " 1344 "or missing value", p); 1345 return 0; 1346 } 1347 } 1348#ifdef CONFIG_QUOTA 1349 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1350 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) 1351 clear_opt(sbi->s_mount_opt, USRQUOTA); 1352 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) 1353 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1354 1355 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { 1356 ext3_msg(sb, KERN_ERR, "error: old and new quota " 1357 "format mixing."); 1358 return 0; 1359 } 1360 1361 if (!sbi->s_jquota_fmt) { 1362 ext3_msg(sb, KERN_ERR, "error: journaled quota format " 1363 "not specified."); 1364 return 0; 1365 } 1366 } 1367#endif 1368 return 1; 1369} 1370 1371static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es, 1372 int read_only) 1373{ 1374 struct ext3_sb_info *sbi = EXT3_SB(sb); 1375 int res = 0; 1376 1377 if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) { 1378 ext3_msg(sb, KERN_ERR, 1379 "error: revision level too high, " 1380 "forcing read-only mode"); 1381 res = MS_RDONLY; 1382 } 1383 if (read_only) 1384 return res; 1385 if (!(sbi->s_mount_state & EXT3_VALID_FS)) 1386 ext3_msg(sb, KERN_WARNING, 1387 "warning: mounting unchecked fs, " 1388 "running e2fsck is recommended"); 1389 else if ((sbi->s_mount_state & EXT3_ERROR_FS)) 1390 ext3_msg(sb, KERN_WARNING, 1391 "warning: mounting fs with errors, " 1392 "running e2fsck is recommended"); 1393 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 && 1394 le16_to_cpu(es->s_mnt_count) >= 1395 le16_to_cpu(es->s_max_mnt_count)) 1396 ext3_msg(sb, KERN_WARNING, 1397 "warning: maximal mount count reached, " 1398 "running e2fsck is recommended"); 1399 else if (le32_to_cpu(es->s_checkinterval) && 1400 (le32_to_cpu(es->s_lastcheck) + 1401 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1402 ext3_msg(sb, KERN_WARNING, 1403 "warning: checktime reached, " 1404 "running e2fsck is recommended"); 1405#if 0 1406 /* @@@ We _will_ want to clear the valid bit if we find 1407 inconsistencies, to force a fsck at reboot. But for 1408 a plain journaled filesystem we can keep it set as 1409 valid forever! :) */ 1410 es->s_state &= cpu_to_le16(~EXT3_VALID_FS); 1411#endif 1412 if (!le16_to_cpu(es->s_max_mnt_count)) 1413 es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT); 1414 le16_add_cpu(&es->s_mnt_count, 1); 1415 es->s_mtime = cpu_to_le32(get_seconds()); 1416 ext3_update_dynamic_rev(sb); 1417 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 1418 1419 ext3_commit_super(sb, es, 1); 1420 if (test_opt(sb, DEBUG)) 1421 ext3_msg(sb, KERN_INFO, "[bs=%lu, gc=%lu, " 1422 "bpg=%lu, ipg=%lu, mo=%04lx]", 1423 sb->s_blocksize, 1424 sbi->s_groups_count, 1425 EXT3_BLOCKS_PER_GROUP(sb), 1426 EXT3_INODES_PER_GROUP(sb), 1427 sbi->s_mount_opt); 1428 1429 if (EXT3_SB(sb)->s_journal->j_inode == NULL) { 1430 char b[BDEVNAME_SIZE]; 1431 ext3_msg(sb, KERN_INFO, "using external journal on %s", 1432 bdevname(EXT3_SB(sb)->s_journal->j_dev, b)); 1433 } else { 1434 ext3_msg(sb, KERN_INFO, "using internal journal"); 1435 } 1436 cleancache_init_fs(sb); 1437 return res; 1438} 1439 1440/* Called at mount-time, super-block is locked */ 1441static int ext3_check_descriptors(struct super_block *sb) 1442{ 1443 struct ext3_sb_info *sbi = EXT3_SB(sb); 1444 int i; 1445 1446 ext3_debug ("Checking group descriptors"); 1447 1448 for (i = 0; i < sbi->s_groups_count; i++) { 1449 struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL); 1450 ext3_fsblk_t first_block = ext3_group_first_block_no(sb, i); 1451 ext3_fsblk_t last_block; 1452 1453 if (i == sbi->s_groups_count - 1) 1454 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; 1455 else 1456 last_block = first_block + 1457 (EXT3_BLOCKS_PER_GROUP(sb) - 1); 1458 1459 if (le32_to_cpu(gdp->bg_block_bitmap) < first_block || 1460 le32_to_cpu(gdp->bg_block_bitmap) > last_block) 1461 { 1462 ext3_error (sb, "ext3_check_descriptors", 1463 "Block bitmap for group %d" 1464 " not in group (block %lu)!", 1465 i, (unsigned long) 1466 le32_to_cpu(gdp->bg_block_bitmap)); 1467 return 0; 1468 } 1469 if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block || 1470 le32_to_cpu(gdp->bg_inode_bitmap) > last_block) 1471 { 1472 ext3_error (sb, "ext3_check_descriptors", 1473 "Inode bitmap for group %d" 1474 " not in group (block %lu)!", 1475 i, (unsigned long) 1476 le32_to_cpu(gdp->bg_inode_bitmap)); 1477 return 0; 1478 } 1479 if (le32_to_cpu(gdp->bg_inode_table) < first_block || 1480 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group - 1 > 1481 last_block) 1482 { 1483 ext3_error (sb, "ext3_check_descriptors", 1484 "Inode table for group %d" 1485 " not in group (block %lu)!", 1486 i, (unsigned long) 1487 le32_to_cpu(gdp->bg_inode_table)); 1488 return 0; 1489 } 1490 } 1491 1492 sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb)); 1493 sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb)); 1494 return 1; 1495} 1496 1497 1498/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at 1499 * the superblock) which were deleted from all directories, but held open by 1500 * a process at the time of a crash. We walk the list and try to delete these 1501 * inodes at recovery time (only with a read-write filesystem). 1502 * 1503 * In order to keep the orphan inode chain consistent during traversal (in 1504 * case of crash during recovery), we link each inode into the superblock 1505 * orphan list_head and handle it the same way as an inode deletion during 1506 * normal operation (which journals the operations for us). 1507 * 1508 * We only do an iget() and an iput() on each inode, which is very safe if we 1509 * accidentally point at an in-use or already deleted inode. The worst that 1510 * can happen in this case is that we get a "bit already cleared" message from 1511 * ext3_free_inode(). The only reason we would point at a wrong inode is if 1512 * e2fsck was run on this filesystem, and it must have already done the orphan 1513 * inode cleanup for us, so we can safely abort without any further action. 1514 */ 1515static void ext3_orphan_cleanup (struct super_block * sb, 1516 struct ext3_super_block * es) 1517{ 1518 unsigned int s_flags = sb->s_flags; 1519 int nr_orphans = 0, nr_truncates = 0; 1520#ifdef CONFIG_QUOTA 1521 int i; 1522#endif 1523 if (!es->s_last_orphan) { 1524 jbd_debug(4, "no orphan inodes to clean up\n"); 1525 return; 1526 } 1527 1528 if (bdev_read_only(sb->s_bdev)) { 1529 ext3_msg(sb, KERN_ERR, "error: write access " 1530 "unavailable, skipping orphan cleanup."); 1531 return; 1532 } 1533 1534 /* Check if feature set allows readwrite operations */ 1535 if (EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) { 1536 ext3_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " 1537 "unknown ROCOMPAT features"); 1538 return; 1539 } 1540 1541 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { 1542 /* don't clear list on RO mount w/ errors */ 1543 if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { 1544 jbd_debug(1, "Errors on filesystem, " 1545 "clearing orphan list.\n"); 1546 es->s_last_orphan = 0; 1547 } 1548 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1549 return; 1550 } 1551 1552 if (s_flags & MS_RDONLY) { 1553 ext3_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); 1554 sb->s_flags &= ~MS_RDONLY; 1555 } 1556#ifdef CONFIG_QUOTA 1557 /* Needed for iput() to work correctly and not trash data */ 1558 sb->s_flags |= MS_ACTIVE; 1559 /* Turn on quotas so that they are updated correctly */ 1560 for (i = 0; i < EXT3_MAXQUOTAS; i++) { 1561 if (EXT3_SB(sb)->s_qf_names[i]) { 1562 int ret = ext3_quota_on_mount(sb, i); 1563 if (ret < 0) 1564 ext3_msg(sb, KERN_ERR, 1565 "error: cannot turn on journaled " 1566 "quota: %d", ret); 1567 } 1568 } 1569#endif 1570 1571 while (es->s_last_orphan) { 1572 struct inode *inode; 1573 1574 inode = ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); 1575 if (IS_ERR(inode)) { 1576 es->s_last_orphan = 0; 1577 break; 1578 } 1579 1580 list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); 1581 dquot_initialize(inode); 1582 if (inode->i_nlink) { 1583 printk(KERN_DEBUG 1584 "%s: truncating inode %lu to %Ld bytes\n", 1585 __func__, inode->i_ino, inode->i_size); 1586 jbd_debug(2, "truncating inode %lu to %Ld bytes\n", 1587 inode->i_ino, inode->i_size); 1588 ext3_truncate(inode); 1589 nr_truncates++; 1590 } else { 1591 printk(KERN_DEBUG 1592 "%s: deleting unreferenced inode %lu\n", 1593 __func__, inode->i_ino); 1594 jbd_debug(2, "deleting unreferenced inode %lu\n", 1595 inode->i_ino); 1596 nr_orphans++; 1597 } 1598 iput(inode); /* The delete magic happens here! */ 1599 } 1600 1601#define PLURAL(x) (x), ((x)==1) ? "" : "s" 1602 1603 if (nr_orphans) 1604 ext3_msg(sb, KERN_INFO, "%d orphan inode%s deleted", 1605 PLURAL(nr_orphans)); 1606 if (nr_truncates) 1607 ext3_msg(sb, KERN_INFO, "%d truncate%s cleaned up", 1608 PLURAL(nr_truncates)); 1609#ifdef CONFIG_QUOTA 1610 /* Turn quotas off */ 1611 for (i = 0; i < EXT3_MAXQUOTAS; i++) { 1612 if (sb_dqopt(sb)->files[i]) 1613 dquot_quota_off(sb, i); 1614 } 1615#endif 1616 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1617} 1618 1619/* 1620 * Maximal file size. There is a direct, and {,double-,triple-}indirect 1621 * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. 1622 * We need to be 1 filesystem block less than the 2^32 sector limit. 1623 */ 1624static loff_t ext3_max_size(int bits) 1625{ 1626 loff_t res = EXT3_NDIR_BLOCKS; 1627 int meta_blocks; 1628 loff_t upper_limit; 1629 1630 /* This is calculated to be the largest file size for a 1631 * dense, file such that the total number of 1632 * sectors in the file, including data and all indirect blocks, 1633 * does not exceed 2^32 -1 1634 * __u32 i_blocks representing the total number of 1635 * 512 bytes blocks of the file 1636 */ 1637 upper_limit = (1LL << 32) - 1; 1638 1639 /* total blocks in file system block size */ 1640 upper_limit >>= (bits - 9); 1641 1642 1643 /* indirect blocks */ 1644 meta_blocks = 1; 1645 /* double indirect blocks */ 1646 meta_blocks += 1 + (1LL << (bits-2)); 1647 /* tripple indirect blocks */ 1648 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 1649 1650 upper_limit -= meta_blocks; 1651 upper_limit <<= bits; 1652 1653 res += 1LL << (bits-2); 1654 res += 1LL << (2*(bits-2)); 1655 res += 1LL << (3*(bits-2)); 1656 res <<= bits; 1657 if (res > upper_limit) 1658 res = upper_limit; 1659 1660 if (res > MAX_LFS_FILESIZE) 1661 res = MAX_LFS_FILESIZE; 1662 1663 return res; 1664} 1665 1666static ext3_fsblk_t descriptor_loc(struct super_block *sb, 1667 ext3_fsblk_t logic_sb_block, 1668 int nr) 1669{ 1670 struct ext3_sb_info *sbi = EXT3_SB(sb); 1671 unsigned long bg, first_meta_bg; 1672 int has_super = 0; 1673 1674 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 1675 1676 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || 1677 nr < first_meta_bg) 1678 return (logic_sb_block + nr + 1); 1679 bg = sbi->s_desc_per_block * nr; 1680 if (ext3_bg_has_super(sb, bg)) 1681 has_super = 1; 1682 return (has_super + ext3_group_first_block_no(sb, bg)); 1683} 1684 1685 1686static int ext3_fill_super (struct super_block *sb, void *data, int silent) 1687{ 1688 struct buffer_head * bh; 1689 struct ext3_super_block *es = NULL; 1690 struct ext3_sb_info *sbi; 1691 ext3_fsblk_t block; 1692 ext3_fsblk_t sb_block = get_sb_block(&data, sb); 1693 ext3_fsblk_t logic_sb_block; 1694 unsigned long offset = 0; 1695 unsigned int journal_inum = 0; 1696 unsigned long journal_devnum = 0; 1697 unsigned long def_mount_opts; 1698 struct inode *root; 1699 int blocksize; 1700 int hblock; 1701 int db_count; 1702 int i; 1703 int needs_recovery; 1704 int ret = -EINVAL; 1705 __le32 features; 1706 int err; 1707 1708 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 1709 if (!sbi) 1710 return -ENOMEM; 1711 1712 sbi->s_blockgroup_lock = 1713 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); 1714 if (!sbi->s_blockgroup_lock) { 1715 kfree(sbi); 1716 return -ENOMEM; 1717 } 1718 sb->s_fs_info = sbi; 1719 sbi->s_sb_block = sb_block; 1720 1721 blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE); 1722 if (!blocksize) { 1723 ext3_msg(sb, KERN_ERR, "error: unable to set blocksize"); 1724 goto out_fail; 1725 } 1726 1727 /* 1728 * The ext3 superblock will not be buffer aligned for other than 1kB 1729 * block sizes. We need to calculate the offset from buffer start. 1730 */ 1731 if (blocksize != EXT3_MIN_BLOCK_SIZE) { 1732 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; 1733 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 1734 } else { 1735 logic_sb_block = sb_block; 1736 } 1737 1738 if (!(bh = sb_bread(sb, logic_sb_block))) { 1739 ext3_msg(sb, KERN_ERR, "error: unable to read superblock"); 1740 goto out_fail; 1741 } 1742 /* 1743 * Note: s_es must be initialized as soon as possible because 1744 * some ext3 macro-instructions depend on its value 1745 */ 1746 es = (struct ext3_super_block *) (bh->b_data + offset); 1747 sbi->s_es = es; 1748 sb->s_magic = le16_to_cpu(es->s_magic); 1749 if (sb->s_magic != EXT3_SUPER_MAGIC) 1750 goto cantfind_ext3; 1751 1752 /* Set defaults before we parse the mount options */ 1753 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 1754 if (def_mount_opts & EXT3_DEFM_DEBUG) 1755 set_opt(sbi->s_mount_opt, DEBUG); 1756 if (def_mount_opts & EXT3_DEFM_BSDGROUPS) 1757 set_opt(sbi->s_mount_opt, GRPID); 1758 if (def_mount_opts & EXT3_DEFM_UID16) 1759 set_opt(sbi->s_mount_opt, NO_UID32); 1760#ifdef CONFIG_EXT3_FS_XATTR 1761 if (def_mount_opts & EXT3_DEFM_XATTR_USER) 1762 set_opt(sbi->s_mount_opt, XATTR_USER); 1763#endif 1764#ifdef CONFIG_EXT3_FS_POSIX_ACL 1765 if (def_mount_opts & EXT3_DEFM_ACL) 1766 set_opt(sbi->s_mount_opt, POSIX_ACL); 1767#endif 1768 if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA) 1769 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 1770 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED) 1771 set_opt(sbi->s_mount_opt, ORDERED_DATA); 1772 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK) 1773 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 1774 1775 if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC) 1776 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1777 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_CONTINUE) 1778 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1779 else 1780 set_opt(sbi->s_mount_opt, ERRORS_RO); 1781 1782 sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); 1783 sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); 1784 1785 /* enable barriers by default */ 1786 set_opt(sbi->s_mount_opt, BARRIER); 1787 set_opt(sbi->s_mount_opt, RESERVATION); 1788 1789 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 1790 NULL, 0)) 1791 goto failed_mount; 1792 1793 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 1794 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 1795 1796 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV && 1797 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) || 1798 EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 1799 EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U))) 1800 ext3_msg(sb, KERN_WARNING, 1801 "warning: feature flags set on rev 0 fs, " 1802 "running e2fsck is recommended"); 1803 /* 1804 * Check feature flags regardless of the revision level, since we 1805 * previously didn't change the revision level when setting the flags, 1806 * so there is a chance incompat flags are set on a rev 0 filesystem. 1807 */ 1808 features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP); 1809 if (features) { 1810 ext3_msg(sb, KERN_ERR, 1811 "error: couldn't mount because of unsupported " 1812 "optional features (%x)", le32_to_cpu(features)); 1813 goto failed_mount; 1814 } 1815 features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP); 1816 if (!(sb->s_flags & MS_RDONLY) && features) { 1817 ext3_msg(sb, KERN_ERR, 1818 "error: couldn't mount RDWR because of unsupported " 1819 "optional features (%x)", le32_to_cpu(features)); 1820 goto failed_mount; 1821 } 1822 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 1823 1824 if (blocksize < EXT3_MIN_BLOCK_SIZE || 1825 blocksize > EXT3_MAX_BLOCK_SIZE) { 1826 ext3_msg(sb, KERN_ERR, 1827 "error: couldn't mount because of unsupported " 1828 "filesystem blocksize %d", blocksize); 1829 goto failed_mount; 1830 } 1831 1832 hblock = bdev_logical_block_size(sb->s_bdev); 1833 if (sb->s_blocksize != blocksize) { 1834 /* 1835 * Make sure the blocksize for the filesystem is larger 1836 * than the hardware sectorsize for the machine. 1837 */ 1838 if (blocksize < hblock) { 1839 ext3_msg(sb, KERN_ERR, 1840 "error: fsblocksize %d too small for " 1841 "hardware sectorsize %d", blocksize, hblock); 1842 goto failed_mount; 1843 } 1844 1845 brelse (bh); 1846 if (!sb_set_blocksize(sb, blocksize)) { 1847 ext3_msg(sb, KERN_ERR, 1848 "error: bad blocksize %d", blocksize); 1849 goto out_fail; 1850 } 1851 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; 1852 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 1853 bh = sb_bread(sb, logic_sb_block); 1854 if (!bh) { 1855 ext3_msg(sb, KERN_ERR, 1856 "error: can't read superblock on 2nd try"); 1857 goto failed_mount; 1858 } 1859 es = (struct ext3_super_block *)(bh->b_data + offset); 1860 sbi->s_es = es; 1861 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) { 1862 ext3_msg(sb, KERN_ERR, 1863 "error: magic mismatch"); 1864 goto failed_mount; 1865 } 1866 } 1867 1868 sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits); 1869 1870 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) { 1871 sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE; 1872 sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO; 1873 } else { 1874 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 1875 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 1876 if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || 1877 (!is_power_of_2(sbi->s_inode_size)) || 1878 (sbi->s_inode_size > blocksize)) { 1879 ext3_msg(sb, KERN_ERR, 1880 "error: unsupported inode size: %d", 1881 sbi->s_inode_size); 1882 goto failed_mount; 1883 } 1884 } 1885 sbi->s_frag_size = EXT3_MIN_FRAG_SIZE << 1886 le32_to_cpu(es->s_log_frag_size); 1887 if (blocksize != sbi->s_frag_size) { 1888 ext3_msg(sb, KERN_ERR, 1889 "error: fragsize %lu != blocksize %u (unsupported)", 1890 sbi->s_frag_size, blocksize); 1891 goto failed_mount; 1892 } 1893 sbi->s_frags_per_block = 1; 1894 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 1895 sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); 1896 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 1897 if (EXT3_INODE_SIZE(sb) == 0 || EXT3_INODES_PER_GROUP(sb) == 0) 1898 goto cantfind_ext3; 1899 sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb); 1900 if (sbi->s_inodes_per_block == 0) 1901 goto cantfind_ext3; 1902 sbi->s_itb_per_group = sbi->s_inodes_per_group / 1903 sbi->s_inodes_per_block; 1904 sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc); 1905 sbi->s_sbh = bh; 1906 sbi->s_mount_state = le16_to_cpu(es->s_state); 1907 sbi->s_addr_per_block_bits = ilog2(EXT3_ADDR_PER_BLOCK(sb)); 1908 sbi->s_desc_per_block_bits = ilog2(EXT3_DESC_PER_BLOCK(sb)); 1909 for (i=0; i < 4; i++) 1910 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 1911 sbi->s_def_hash_version = es->s_def_hash_version; 1912 i = le32_to_cpu(es->s_flags); 1913 if (i & EXT2_FLAGS_UNSIGNED_HASH) 1914 sbi->s_hash_unsigned = 3; 1915 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { 1916#ifdef __CHAR_UNSIGNED__ 1917 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); 1918 sbi->s_hash_unsigned = 3; 1919#else 1920 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); 1921#endif 1922 } 1923 1924 if (sbi->s_blocks_per_group > blocksize * 8) { 1925 ext3_msg(sb, KERN_ERR, 1926 "#blocks per group too big: %lu", 1927 sbi->s_blocks_per_group); 1928 goto failed_mount; 1929 } 1930 if (sbi->s_frags_per_group > blocksize * 8) { 1931 ext3_msg(sb, KERN_ERR, 1932 "error: #fragments per group too big: %lu", 1933 sbi->s_frags_per_group); 1934 goto failed_mount; 1935 } 1936 if (sbi->s_inodes_per_group > blocksize * 8) { 1937 ext3_msg(sb, KERN_ERR, 1938 "error: #inodes per group too big: %lu", 1939 sbi->s_inodes_per_group); 1940 goto failed_mount; 1941 } 1942 1943 err = generic_check_addressable(sb->s_blocksize_bits, 1944 le32_to_cpu(es->s_blocks_count)); 1945 if (err) { 1946 ext3_msg(sb, KERN_ERR, 1947 "error: filesystem is too large to mount safely"); 1948 if (sizeof(sector_t) < 8) 1949 ext3_msg(sb, KERN_ERR, 1950 "error: CONFIG_LBDAF not enabled"); 1951 ret = err; 1952 goto failed_mount; 1953 } 1954 1955 if (EXT3_BLOCKS_PER_GROUP(sb) == 0) 1956 goto cantfind_ext3; 1957 sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - 1958 le32_to_cpu(es->s_first_data_block) - 1) 1959 / EXT3_BLOCKS_PER_GROUP(sb)) + 1; 1960 db_count = DIV_ROUND_UP(sbi->s_groups_count, EXT3_DESC_PER_BLOCK(sb)); 1961 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), 1962 GFP_KERNEL); 1963 if (sbi->s_group_desc == NULL) { 1964 ext3_msg(sb, KERN_ERR, 1965 "error: not enough memory"); 1966 ret = -ENOMEM; 1967 goto failed_mount; 1968 } 1969 1970 bgl_lock_init(sbi->s_blockgroup_lock); 1971 1972 for (i = 0; i < db_count; i++) { 1973 block = descriptor_loc(sb, logic_sb_block, i); 1974 sbi->s_group_desc[i] = sb_bread(sb, block); 1975 if (!sbi->s_group_desc[i]) { 1976 ext3_msg(sb, KERN_ERR, 1977 "error: can't read group descriptor %d", i); 1978 db_count = i; 1979 goto failed_mount2; 1980 } 1981 } 1982 if (!ext3_check_descriptors (sb)) { 1983 ext3_msg(sb, KERN_ERR, 1984 "error: group descriptors corrupted"); 1985 goto failed_mount2; 1986 } 1987 sbi->s_gdb_count = db_count; 1988 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 1989 spin_lock_init(&sbi->s_next_gen_lock); 1990 1991 /* per fileystem reservation list head & lock */ 1992 spin_lock_init(&sbi->s_rsv_window_lock); 1993 sbi->s_rsv_window_root = RB_ROOT; 1994 /* Add a single, static dummy reservation to the start of the 1995 * reservation window list --- it gives us a placeholder for 1996 * append-at-start-of-list which makes the allocation logic 1997 * _much_ simpler. */ 1998 sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 1999 sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 2000 sbi->s_rsv_window_head.rsv_alloc_hit = 0; 2001 sbi->s_rsv_window_head.rsv_goal_size = 0; 2002 ext3_rsv_window_add(sb, &sbi->s_rsv_window_head); 2003 2004 /* 2005 * set up enough so that it can read an inode 2006 */ 2007 sb->s_op = &ext3_sops; 2008 sb->s_export_op = &ext3_export_ops; 2009 sb->s_xattr = ext3_xattr_handlers; 2010#ifdef CONFIG_QUOTA 2011 sb->s_qcop = &ext3_qctl_operations; 2012 sb->dq_op = &ext3_quota_operations; 2013 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; 2014#endif 2015 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); 2016 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 2017 mutex_init(&sbi->s_orphan_lock); 2018 mutex_init(&sbi->s_resize_lock); 2019 2020 sb->s_root = NULL; 2021 2022 needs_recovery = (es->s_last_orphan != 0 || 2023 EXT3_HAS_INCOMPAT_FEATURE(sb, 2024 EXT3_FEATURE_INCOMPAT_RECOVER)); 2025 2026 /* 2027 * The first inode we look at is the journal inode. Don't try 2028 * root first: it may be modified in the journal! 2029 */ 2030 if (!test_opt(sb, NOLOAD) && 2031 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { 2032 if (ext3_load_journal(sb, es, journal_devnum)) 2033 goto failed_mount2; 2034 } else if (journal_inum) { 2035 if (ext3_create_journal(sb, es, journal_inum)) 2036 goto failed_mount2; 2037 } else { 2038 if (!silent) 2039 ext3_msg(sb, KERN_ERR, 2040 "error: no journal found. " 2041 "mounting ext3 over ext2?"); 2042 goto failed_mount2; 2043 } 2044 err = percpu_counter_init(&sbi->s_freeblocks_counter, 2045 ext3_count_free_blocks(sb), GFP_KERNEL); 2046 if (!err) { 2047 err = percpu_counter_init(&sbi->s_freeinodes_counter, 2048 ext3_count_free_inodes(sb), GFP_KERNEL); 2049 } 2050 if (!err) { 2051 err = percpu_counter_init(&sbi->s_dirs_counter, 2052 ext3_count_dirs(sb), GFP_KERNEL); 2053 } 2054 if (err) { 2055 ext3_msg(sb, KERN_ERR, "error: insufficient memory"); 2056 ret = err; 2057 goto failed_mount3; 2058 } 2059 2060 /* We have now updated the journal if required, so we can 2061 * validate the data journaling mode. */ 2062 switch (test_opt(sb, DATA_FLAGS)) { 2063 case 0: 2064 /* No mode set, assume a default based on the journal 2065 capabilities: ORDERED_DATA if the journal can 2066 cope, else JOURNAL_DATA */ 2067 if (journal_check_available_features 2068 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) 2069 set_opt(sbi->s_mount_opt, DEFAULT_DATA_MODE); 2070 else 2071 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 2072 break; 2073 2074 case EXT3_MOUNT_ORDERED_DATA: 2075 case EXT3_MOUNT_WRITEBACK_DATA: 2076 if (!journal_check_available_features 2077 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) { 2078 ext3_msg(sb, KERN_ERR, 2079 "error: journal does not support " 2080 "requested data journaling mode"); 2081 goto failed_mount3; 2082 } 2083 default: 2084 break; 2085 } 2086 2087 /* 2088 * The journal_load will have done any necessary log recovery, 2089 * so we can safely mount the rest of the filesystem now. 2090 */ 2091 2092 root = ext3_iget(sb, EXT3_ROOT_INO); 2093 if (IS_ERR(root)) { 2094 ext3_msg(sb, KERN_ERR, "error: get root inode failed"); 2095 ret = PTR_ERR(root); 2096 goto failed_mount3; 2097 } 2098 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 2099 iput(root); 2100 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck"); 2101 goto failed_mount3; 2102 } 2103 sb->s_root = d_make_root(root); 2104 if (!sb->s_root) { 2105 ext3_msg(sb, KERN_ERR, "error: get root dentry failed"); 2106 ret = -ENOMEM; 2107 goto failed_mount3; 2108 } 2109 2110 if (ext3_setup_super(sb, es, sb->s_flags & MS_RDONLY)) 2111 sb->s_flags |= MS_RDONLY; 2112 2113 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; 2114 ext3_orphan_cleanup(sb, es); 2115 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; 2116 if (needs_recovery) { 2117 ext3_mark_recovery_complete(sb, es); 2118 ext3_msg(sb, KERN_INFO, "recovery complete"); 2119 } 2120 ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode", 2121 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": 2122 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": 2123 "writeback"); 2124 2125 return 0; 2126 2127cantfind_ext3: 2128 if (!silent) 2129 ext3_msg(sb, KERN_INFO, 2130 "error: can't find ext3 filesystem on dev %s.", 2131 sb->s_id); 2132 goto failed_mount; 2133 2134failed_mount3: 2135 percpu_counter_destroy(&sbi->s_freeblocks_counter); 2136 percpu_counter_destroy(&sbi->s_freeinodes_counter); 2137 percpu_counter_destroy(&sbi->s_dirs_counter); 2138 journal_destroy(sbi->s_journal); 2139failed_mount2: 2140 for (i = 0; i < db_count; i++) 2141 brelse(sbi->s_group_desc[i]); 2142 kfree(sbi->s_group_desc); 2143failed_mount: 2144#ifdef CONFIG_QUOTA 2145 for (i = 0; i < EXT3_MAXQUOTAS; i++) 2146 kfree(sbi->s_qf_names[i]); 2147#endif 2148 ext3_blkdev_remove(sbi); 2149 brelse(bh); 2150out_fail: 2151 sb->s_fs_info = NULL; 2152 kfree(sbi->s_blockgroup_lock); 2153 kfree(sbi); 2154 return ret; 2155} 2156 2157/* 2158 * Setup any per-fs journal parameters now. We'll do this both on 2159 * initial mount, once the journal has been initialised but before we've 2160 * done any recovery; and again on any subsequent remount. 2161 */ 2162static void ext3_init_journal_params(struct super_block *sb, journal_t *journal) 2163{ 2164 struct ext3_sb_info *sbi = EXT3_SB(sb); 2165 2166 if (sbi->s_commit_interval) 2167 journal->j_commit_interval = sbi->s_commit_interval; 2168 /* We could also set up an ext3-specific default for the commit 2169 * interval here, but for now we'll just fall back to the jbd 2170 * default. */ 2171 2172 spin_lock(&journal->j_state_lock); 2173 if (test_opt(sb, BARRIER)) 2174 journal->j_flags |= JFS_BARRIER; 2175 else 2176 journal->j_flags &= ~JFS_BARRIER; 2177 if (test_opt(sb, DATA_ERR_ABORT)) 2178 journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR; 2179 else 2180 journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR; 2181 spin_unlock(&journal->j_state_lock); 2182} 2183 2184static journal_t *ext3_get_journal(struct super_block *sb, 2185 unsigned int journal_inum) 2186{ 2187 struct inode *journal_inode; 2188 journal_t *journal; 2189 2190 /* First, test for the existence of a valid inode on disk. Bad 2191 * things happen if we iget() an unused inode, as the subsequent 2192 * iput() will try to delete it. */ 2193 2194 journal_inode = ext3_iget(sb, journal_inum); 2195 if (IS_ERR(journal_inode)) { 2196 ext3_msg(sb, KERN_ERR, "error: no journal found"); 2197 return NULL; 2198 } 2199 if (!journal_inode->i_nlink) { 2200 make_bad_inode(journal_inode); 2201 iput(journal_inode); 2202 ext3_msg(sb, KERN_ERR, "error: journal inode is deleted"); 2203 return NULL; 2204 } 2205 2206 jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", 2207 journal_inode, journal_inode->i_size); 2208 if (!S_ISREG(journal_inode->i_mode)) { 2209 ext3_msg(sb, KERN_ERR, "error: invalid journal inode"); 2210 iput(journal_inode); 2211 return NULL; 2212 } 2213 2214 journal = journal_init_inode(journal_inode); 2215 if (!journal) { 2216 ext3_msg(sb, KERN_ERR, "error: could not load journal inode"); 2217 iput(journal_inode); 2218 return NULL; 2219 } 2220 journal->j_private = sb; 2221 ext3_init_journal_params(sb, journal); 2222 return journal; 2223} 2224 2225static journal_t *ext3_get_dev_journal(struct super_block *sb, 2226 dev_t j_dev) 2227{ 2228 struct buffer_head * bh; 2229 journal_t *journal; 2230 ext3_fsblk_t start; 2231 ext3_fsblk_t len; 2232 int hblock, blocksize; 2233 ext3_fsblk_t sb_block; 2234 unsigned long offset; 2235 struct ext3_super_block * es; 2236 struct block_device *bdev; 2237 2238 bdev = ext3_blkdev_get(j_dev, sb); 2239 if (bdev == NULL) 2240 return NULL; 2241 2242 blocksize = sb->s_blocksize; 2243 hblock = bdev_logical_block_size(bdev); 2244 if (blocksize < hblock) { 2245 ext3_msg(sb, KERN_ERR, 2246 "error: blocksize too small for journal device"); 2247 goto out_bdev; 2248 } 2249 2250 sb_block = EXT3_MIN_BLOCK_SIZE / blocksize; 2251 offset = EXT3_MIN_BLOCK_SIZE % blocksize; 2252 set_blocksize(bdev, blocksize); 2253 if (!(bh = __bread(bdev, sb_block, blocksize))) { 2254 ext3_msg(sb, KERN_ERR, "error: couldn't read superblock of " 2255 "external journal"); 2256 goto out_bdev; 2257 } 2258 2259 es = (struct ext3_super_block *) (bh->b_data + offset); 2260 if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) || 2261 !(le32_to_cpu(es->s_feature_incompat) & 2262 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) { 2263 ext3_msg(sb, KERN_ERR, "error: external journal has " 2264 "bad superblock"); 2265 brelse(bh); 2266 goto out_bdev; 2267 } 2268 2269 if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 2270 ext3_msg(sb, KERN_ERR, "error: journal UUID does not match"); 2271 brelse(bh); 2272 goto out_bdev; 2273 } 2274 2275 len = le32_to_cpu(es->s_blocks_count); 2276 start = sb_block + 1; 2277 brelse(bh); /* we're done with the superblock */ 2278 2279 journal = journal_init_dev(bdev, sb->s_bdev, 2280 start, len, blocksize); 2281 if (!journal) { 2282 ext3_msg(sb, KERN_ERR, 2283 "error: failed to create device journal"); 2284 goto out_bdev; 2285 } 2286 journal->j_private = sb; 2287 if (!bh_uptodate_or_lock(journal->j_sb_buffer)) { 2288 if (bh_submit_read(journal->j_sb_buffer)) { 2289 ext3_msg(sb, KERN_ERR, "I/O error on journal device"); 2290 goto out_journal; 2291 } 2292 } 2293 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 2294 ext3_msg(sb, KERN_ERR, 2295 "error: external journal has more than one " 2296 "user (unsupported) - %d", 2297 be32_to_cpu(journal->j_superblock->s_nr_users)); 2298 goto out_journal; 2299 } 2300 EXT3_SB(sb)->journal_bdev = bdev; 2301 ext3_init_journal_params(sb, journal); 2302 return journal; 2303out_journal: 2304 journal_destroy(journal); 2305out_bdev: 2306 ext3_blkdev_put(bdev); 2307 return NULL; 2308} 2309 2310static int ext3_load_journal(struct super_block *sb, 2311 struct ext3_super_block *es, 2312 unsigned long journal_devnum) 2313{ 2314 journal_t *journal; 2315 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 2316 dev_t journal_dev; 2317 int err = 0; 2318 int really_read_only; 2319 2320 if (journal_devnum && 2321 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2322 ext3_msg(sb, KERN_INFO, "external journal device major/minor " 2323 "numbers have changed"); 2324 journal_dev = new_decode_dev(journal_devnum); 2325 } else 2326 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 2327 2328 really_read_only = bdev_read_only(sb->s_bdev); 2329 2330 /* 2331 * Are we loading a blank journal or performing recovery after a 2332 * crash? For recovery, we need to check in advance whether we 2333 * can get read-write access to the device. 2334 */ 2335 2336 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) { 2337 if (sb->s_flags & MS_RDONLY) { 2338 ext3_msg(sb, KERN_INFO, 2339 "recovery required on readonly filesystem"); 2340 if (really_read_only) { 2341 ext3_msg(sb, KERN_ERR, "error: write access " 2342 "unavailable, cannot proceed"); 2343 return -EROFS; 2344 } 2345 ext3_msg(sb, KERN_INFO, 2346 "write access will be enabled during recovery"); 2347 } 2348 } 2349 2350 if (journal_inum && journal_dev) { 2351 ext3_msg(sb, KERN_ERR, "error: filesystem has both journal " 2352 "and inode journals"); 2353 return -EINVAL; 2354 } 2355 2356 if (journal_inum) { 2357 if (!(journal = ext3_get_journal(sb, journal_inum))) 2358 return -EINVAL; 2359 } else { 2360 if (!(journal = ext3_get_dev_journal(sb, journal_dev))) 2361 return -EINVAL; 2362 } 2363 2364 if (!(journal->j_flags & JFS_BARRIER)) 2365 printk(KERN_INFO "EXT3-fs: barriers not enabled\n"); 2366 2367 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 2368 err = journal_update_format(journal); 2369 if (err) { 2370 ext3_msg(sb, KERN_ERR, "error updating journal"); 2371 journal_destroy(journal); 2372 return err; 2373 } 2374 } 2375 2376 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) 2377 err = journal_wipe(journal, !really_read_only); 2378 if (!err) 2379 err = journal_load(journal); 2380 2381 if (err) { 2382 ext3_msg(sb, KERN_ERR, "error loading journal"); 2383 journal_destroy(journal); 2384 return err; 2385 } 2386 2387 EXT3_SB(sb)->s_journal = journal; 2388 ext3_clear_journal_err(sb, es); 2389 2390 if (!really_read_only && journal_devnum && 2391 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2392 es->s_journal_dev = cpu_to_le32(journal_devnum); 2393 2394 /* Make sure we flush the recovery flag to disk. */ 2395 ext3_commit_super(sb, es, 1); 2396 } 2397 2398 return 0; 2399} 2400 2401static int ext3_create_journal(struct super_block *sb, 2402 struct ext3_super_block *es, 2403 unsigned int journal_inum) 2404{ 2405 journal_t *journal; 2406 int err; 2407 2408 if (sb->s_flags & MS_RDONLY) { 2409 ext3_msg(sb, KERN_ERR, 2410 "error: readonly filesystem when trying to " 2411 "create journal"); 2412 return -EROFS; 2413 } 2414 2415 journal = ext3_get_journal(sb, journal_inum); 2416 if (!journal) 2417 return -EINVAL; 2418 2419 ext3_msg(sb, KERN_INFO, "creating new journal on inode %u", 2420 journal_inum); 2421 2422 err = journal_create(journal); 2423 if (err) { 2424 ext3_msg(sb, KERN_ERR, "error creating journal"); 2425 journal_destroy(journal); 2426 return -EIO; 2427 } 2428 2429 EXT3_SB(sb)->s_journal = journal; 2430 2431 ext3_update_dynamic_rev(sb); 2432 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2433 EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL); 2434 2435 es->s_journal_inum = cpu_to_le32(journal_inum); 2436 2437 /* Make sure we flush the recovery flag to disk. */ 2438 ext3_commit_super(sb, es, 1); 2439 2440 return 0; 2441} 2442 2443static int ext3_commit_super(struct super_block *sb, 2444 struct ext3_super_block *es, 2445 int sync) 2446{ 2447 struct buffer_head *sbh = EXT3_SB(sb)->s_sbh; 2448 int error = 0; 2449 2450 if (!sbh) 2451 return error; 2452 2453 if (buffer_write_io_error(sbh)) { 2454 /* 2455 * Oh, dear. A previous attempt to write the 2456 * superblock failed. This could happen because the 2457 * USB device was yanked out. Or it could happen to 2458 * be a transient write error and maybe the block will 2459 * be remapped. Nothing we can do but to retry the 2460 * write and hope for the best. 2461 */ 2462 ext3_msg(sb, KERN_ERR, "previous I/O error to " 2463 "superblock detected"); 2464 clear_buffer_write_io_error(sbh); 2465 set_buffer_uptodate(sbh); 2466 } 2467 /* 2468 * If the file system is mounted read-only, don't update the 2469 * superblock write time. This avoids updating the superblock 2470 * write time when we are mounting the root file system 2471 * read/only but we need to replay the journal; at that point, 2472 * for people who are east of GMT and who make their clock 2473 * tick in localtime for Windows bug-for-bug compatibility, 2474 * the clock is set in the future, and this will cause e2fsck 2475 * to complain and force a full file system check. 2476 */ 2477 if (!(sb->s_flags & MS_RDONLY)) 2478 es->s_wtime = cpu_to_le32(get_seconds()); 2479 es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb)); 2480 es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb)); 2481 BUFFER_TRACE(sbh, "marking dirty"); 2482 mark_buffer_dirty(sbh); 2483 if (sync) { 2484 error = sync_dirty_buffer(sbh); 2485 if (buffer_write_io_error(sbh)) { 2486 ext3_msg(sb, KERN_ERR, "I/O error while writing " 2487 "superblock"); 2488 clear_buffer_write_io_error(sbh); 2489 set_buffer_uptodate(sbh); 2490 } 2491 } 2492 return error; 2493} 2494 2495 2496/* 2497 * Have we just finished recovery? If so, and if we are mounting (or 2498 * remounting) the filesystem readonly, then we will end up with a 2499 * consistent fs on disk. Record that fact. 2500 */ 2501static void ext3_mark_recovery_complete(struct super_block * sb, 2502 struct ext3_super_block * es) 2503{ 2504 journal_t *journal = EXT3_SB(sb)->s_journal; 2505 2506 journal_lock_updates(journal); 2507 if (journal_flush(journal) < 0) 2508 goto out; 2509 2510 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && 2511 sb->s_flags & MS_RDONLY) { 2512 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2513 ext3_commit_super(sb, es, 1); 2514 } 2515 2516out: 2517 journal_unlock_updates(journal); 2518} 2519 2520/* 2521 * If we are mounting (or read-write remounting) a filesystem whose journal 2522 * has recorded an error from a previous lifetime, move that error to the 2523 * main filesystem now. 2524 */ 2525static void ext3_clear_journal_err(struct super_block *sb, 2526 struct ext3_super_block *es) 2527{ 2528 journal_t *journal; 2529 int j_errno; 2530 const char *errstr; 2531 2532 journal = EXT3_SB(sb)->s_journal; 2533 2534 /* 2535 * Now check for any error status which may have been recorded in the 2536 * journal by a prior ext3_error() or ext3_abort() 2537 */ 2538 2539 j_errno = journal_errno(journal); 2540 if (j_errno) { 2541 char nbuf[16]; 2542 2543 errstr = ext3_decode_error(sb, j_errno, nbuf); 2544 ext3_warning(sb, __func__, "Filesystem error recorded " 2545 "from previous mount: %s", errstr); 2546 ext3_warning(sb, __func__, "Marking fs in need of " 2547 "filesystem check."); 2548 2549 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 2550 es->s_state |= cpu_to_le16(EXT3_ERROR_FS); 2551 ext3_commit_super (sb, es, 1); 2552 2553 journal_clear_err(journal); 2554 } 2555} 2556 2557/* 2558 * Force the running and committing transactions to commit, 2559 * and wait on the commit. 2560 */ 2561int ext3_force_commit(struct super_block *sb) 2562{ 2563 journal_t *journal; 2564 int ret; 2565 2566 if (sb->s_flags & MS_RDONLY) 2567 return 0; 2568 2569 journal = EXT3_SB(sb)->s_journal; 2570 ret = ext3_journal_force_commit(journal); 2571 return ret; 2572} 2573 2574static int ext3_sync_fs(struct super_block *sb, int wait) 2575{ 2576 tid_t target; 2577 2578 trace_ext3_sync_fs(sb, wait); 2579 /* 2580 * Writeback quota in non-journalled quota case - journalled quota has 2581 * no dirty dquots 2582 */ 2583 dquot_writeback_dquots(sb, -1); 2584 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { 2585 if (wait) 2586 log_wait_commit(EXT3_SB(sb)->s_journal, target); 2587 } 2588 return 0; 2589} 2590 2591/* 2592 * LVM calls this function before a (read-only) snapshot is created. This 2593 * gives us a chance to flush the journal completely and mark the fs clean. 2594 */ 2595static int ext3_freeze(struct super_block *sb) 2596{ 2597 int error = 0; 2598 journal_t *journal; 2599 2600 if (!(sb->s_flags & MS_RDONLY)) { 2601 journal = EXT3_SB(sb)->s_journal; 2602 2603 /* Now we set up the journal barrier. */ 2604 journal_lock_updates(journal); 2605 2606 /* 2607 * We don't want to clear needs_recovery flag when we failed 2608 * to flush the journal. 2609 */ 2610 error = journal_flush(journal); 2611 if (error < 0) 2612 goto out; 2613 2614 /* Journal blocked and flushed, clear needs_recovery flag. */ 2615 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2616 error = ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); 2617 if (error) 2618 goto out; 2619 } 2620 return 0; 2621 2622out: 2623 journal_unlock_updates(journal); 2624 return error; 2625} 2626 2627/* 2628 * Called by LVM after the snapshot is done. We need to reset the RECOVER 2629 * flag here, even though the filesystem is not technically dirty yet. 2630 */ 2631static int ext3_unfreeze(struct super_block *sb) 2632{ 2633 if (!(sb->s_flags & MS_RDONLY)) { 2634 /* Reser the needs_recovery flag before the fs is unlocked. */ 2635 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2636 ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); 2637 journal_unlock_updates(EXT3_SB(sb)->s_journal); 2638 } 2639 return 0; 2640} 2641 2642static int ext3_remount (struct super_block * sb, int * flags, char * data) 2643{ 2644 struct ext3_super_block * es; 2645 struct ext3_sb_info *sbi = EXT3_SB(sb); 2646 ext3_fsblk_t n_blocks_count = 0; 2647 unsigned long old_sb_flags; 2648 struct ext3_mount_options old_opts; 2649 int enable_quota = 0; 2650 int err; 2651#ifdef CONFIG_QUOTA 2652 int i; 2653#endif 2654 2655 sync_filesystem(sb); 2656 2657 /* Store the original options */ 2658 old_sb_flags = sb->s_flags; 2659 old_opts.s_mount_opt = sbi->s_mount_opt; 2660 old_opts.s_resuid = sbi->s_resuid; 2661 old_opts.s_resgid = sbi->s_resgid; 2662 old_opts.s_commit_interval = sbi->s_commit_interval; 2663#ifdef CONFIG_QUOTA 2664 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 2665 for (i = 0; i < EXT3_MAXQUOTAS; i++) 2666 if (sbi->s_qf_names[i]) { 2667 old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], 2668 GFP_KERNEL); 2669 if (!old_opts.s_qf_names[i]) { 2670 int j; 2671 2672 for (j = 0; j < i; j++) 2673 kfree(old_opts.s_qf_names[j]); 2674 return -ENOMEM; 2675 } 2676 } else 2677 old_opts.s_qf_names[i] = NULL; 2678#endif 2679 2680 /* 2681 * Allow the "check" option to be passed as a remount option. 2682 */ 2683 if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { 2684 err = -EINVAL; 2685 goto restore_opts; 2686 } 2687 2688 if (test_opt(sb, ABORT)) 2689 ext3_abort(sb, __func__, "Abort forced by user"); 2690 2691 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2692 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 2693 2694 es = sbi->s_es; 2695 2696 ext3_init_journal_params(sb, sbi->s_journal); 2697 2698 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 2699 n_blocks_count > le32_to_cpu(es->s_blocks_count)) { 2700 if (test_opt(sb, ABORT)) { 2701 err = -EROFS; 2702 goto restore_opts; 2703 } 2704 2705 if (*flags & MS_RDONLY) { 2706 err = dquot_suspend(sb, -1); 2707 if (err < 0) 2708 goto restore_opts; 2709 2710 /* 2711 * First of all, the unconditional stuff we have to do 2712 * to disable replay of the journal when we next remount 2713 */ 2714 sb->s_flags |= MS_RDONLY; 2715 2716 /* 2717 * OK, test if we are remounting a valid rw partition 2718 * readonly, and if so set the rdonly flag and then 2719 * mark the partition as valid again. 2720 */ 2721 if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) && 2722 (sbi->s_mount_state & EXT3_VALID_FS)) 2723 es->s_state = cpu_to_le16(sbi->s_mount_state); 2724 2725 ext3_mark_recovery_complete(sb, es); 2726 } else { 2727 __le32 ret; 2728 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb, 2729 ~EXT3_FEATURE_RO_COMPAT_SUPP))) { 2730 ext3_msg(sb, KERN_WARNING, 2731 "warning: couldn't remount RDWR " 2732 "because of unsupported optional " 2733 "features (%x)", le32_to_cpu(ret)); 2734 err = -EROFS; 2735 goto restore_opts; 2736 } 2737 2738 /* 2739 * If we have an unprocessed orphan list hanging 2740 * around from a previously readonly bdev mount, 2741 * require a full umount & mount for now. 2742 */ 2743 if (es->s_last_orphan) { 2744 ext3_msg(sb, KERN_WARNING, "warning: couldn't " 2745 "remount RDWR because of unprocessed " 2746 "orphan inode list. Please " 2747 "umount & mount instead."); 2748 err = -EINVAL; 2749 goto restore_opts; 2750 } 2751 2752 /* 2753 * Mounting a RDONLY partition read-write, so reread 2754 * and store the current valid flag. (It may have 2755 * been changed by e2fsck since we originally mounted 2756 * the partition.) 2757 */ 2758 ext3_clear_journal_err(sb, es); 2759 sbi->s_mount_state = le16_to_cpu(es->s_state); 2760 if ((err = ext3_group_extend(sb, es, n_blocks_count))) 2761 goto restore_opts; 2762 if (!ext3_setup_super (sb, es, 0)) 2763 sb->s_flags &= ~MS_RDONLY; 2764 enable_quota = 1; 2765 } 2766 } 2767#ifdef CONFIG_QUOTA 2768 /* Release old quota file names */ 2769 for (i = 0; i < EXT3_MAXQUOTAS; i++) 2770 kfree(old_opts.s_qf_names[i]); 2771#endif 2772 if (enable_quota) 2773 dquot_resume(sb, -1); 2774 return 0; 2775restore_opts: 2776 sb->s_flags = old_sb_flags; 2777 sbi->s_mount_opt = old_opts.s_mount_opt; 2778 sbi->s_resuid = old_opts.s_resuid; 2779 sbi->s_resgid = old_opts.s_resgid; 2780 sbi->s_commit_interval = old_opts.s_commit_interval; 2781#ifdef CONFIG_QUOTA 2782 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 2783 for (i = 0; i < EXT3_MAXQUOTAS; i++) { 2784 kfree(sbi->s_qf_names[i]); 2785 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 2786 } 2787#endif 2788 return err; 2789} 2790 2791static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) 2792{ 2793 struct super_block *sb = dentry->d_sb; 2794 struct ext3_sb_info *sbi = EXT3_SB(sb); 2795 struct ext3_super_block *es = sbi->s_es; 2796 u64 fsid; 2797 2798 if (test_opt(sb, MINIX_DF)) { 2799 sbi->s_overhead_last = 0; 2800 } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { 2801 unsigned long ngroups = sbi->s_groups_count, i; 2802 ext3_fsblk_t overhead = 0; 2803 smp_rmb(); 2804 2805 /* 2806 * Compute the overhead (FS structures). This is constant 2807 * for a given filesystem unless the number of block groups 2808 * changes so we cache the previous value until it does. 2809 */ 2810 2811 /* 2812 * All of the blocks before first_data_block are 2813 * overhead 2814 */ 2815 overhead = le32_to_cpu(es->s_first_data_block); 2816 2817 /* 2818 * Add the overhead attributed to the superblock and 2819 * block group descriptors. If the sparse superblocks 2820 * feature is turned on, then not all groups have this. 2821 */ 2822 for (i = 0; i < ngroups; i++) { 2823 overhead += ext3_bg_has_super(sb, i) + 2824 ext3_bg_num_gdb(sb, i); 2825 cond_resched(); 2826 } 2827 2828 /* 2829 * Every block group has an inode bitmap, a block 2830 * bitmap, and an inode table. 2831 */ 2832 overhead += ngroups * (2 + sbi->s_itb_per_group); 2833 2834 /* Add the internal journal blocks as well */ 2835 if (sbi->s_journal && !sbi->journal_bdev) 2836 overhead += sbi->s_journal->j_maxlen; 2837 2838 sbi->s_overhead_last = overhead; 2839 smp_wmb(); 2840 sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count); 2841 } 2842 2843 buf->f_type = EXT3_SUPER_MAGIC; 2844 buf->f_bsize = sb->s_blocksize; 2845 buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last; 2846 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); 2847 buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); 2848 if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) 2849 buf->f_bavail = 0; 2850 buf->f_files = le32_to_cpu(es->s_inodes_count); 2851 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 2852 buf->f_namelen = EXT3_NAME_LEN; 2853 fsid = le64_to_cpup((void *)es->s_uuid) ^ 2854 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 2855 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 2856 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 2857 return 0; 2858} 2859 2860/* Helper function for writing quotas on sync - we need to start transaction before quota file 2861 * is locked for write. Otherwise the are possible deadlocks: 2862 * Process 1 Process 2 2863 * ext3_create() quota_sync() 2864 * journal_start() write_dquot() 2865 * dquot_initialize() down(dqio_mutex) 2866 * down(dqio_mutex) journal_start() 2867 * 2868 */ 2869 2870#ifdef CONFIG_QUOTA 2871 2872static inline struct inode *dquot_to_inode(struct dquot *dquot) 2873{ 2874 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type]; 2875} 2876 2877static int ext3_write_dquot(struct dquot *dquot) 2878{ 2879 int ret, err; 2880 handle_t *handle; 2881 struct inode *inode; 2882 2883 inode = dquot_to_inode(dquot); 2884 handle = ext3_journal_start(inode, 2885 EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 2886 if (IS_ERR(handle)) 2887 return PTR_ERR(handle); 2888 ret = dquot_commit(dquot); 2889 err = ext3_journal_stop(handle); 2890 if (!ret) 2891 ret = err; 2892 return ret; 2893} 2894 2895static int ext3_acquire_dquot(struct dquot *dquot) 2896{ 2897 int ret, err; 2898 handle_t *handle; 2899 2900 handle = ext3_journal_start(dquot_to_inode(dquot), 2901 EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 2902 if (IS_ERR(handle)) 2903 return PTR_ERR(handle); 2904 ret = dquot_acquire(dquot); 2905 err = ext3_journal_stop(handle); 2906 if (!ret) 2907 ret = err; 2908 return ret; 2909} 2910 2911static int ext3_release_dquot(struct dquot *dquot) 2912{ 2913 int ret, err; 2914 handle_t *handle; 2915 2916 handle = ext3_journal_start(dquot_to_inode(dquot), 2917 EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 2918 if (IS_ERR(handle)) { 2919 /* Release dquot anyway to avoid endless cycle in dqput() */ 2920 dquot_release(dquot); 2921 return PTR_ERR(handle); 2922 } 2923 ret = dquot_release(dquot); 2924 err = ext3_journal_stop(handle); 2925 if (!ret) 2926 ret = err; 2927 return ret; 2928} 2929 2930static int ext3_mark_dquot_dirty(struct dquot *dquot) 2931{ 2932 /* Are we journaling quotas? */ 2933 if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 2934 EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 2935 dquot_mark_dquot_dirty(dquot); 2936 return ext3_write_dquot(dquot); 2937 } else { 2938 return dquot_mark_dquot_dirty(dquot); 2939 } 2940} 2941 2942static int ext3_write_info(struct super_block *sb, int type) 2943{ 2944 int ret, err; 2945 handle_t *handle; 2946 2947 /* Data block + inode block */ 2948 handle = ext3_journal_start(sb->s_root->d_inode, 2); 2949 if (IS_ERR(handle)) 2950 return PTR_ERR(handle); 2951 ret = dquot_commit_info(sb, type); 2952 err = ext3_journal_stop(handle); 2953 if (!ret) 2954 ret = err; 2955 return ret; 2956} 2957 2958/* 2959 * Turn on quotas during mount time - we need to find 2960 * the quota file and such... 2961 */ 2962static int ext3_quota_on_mount(struct super_block *sb, int type) 2963{ 2964 return dquot_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type], 2965 EXT3_SB(sb)->s_jquota_fmt, type); 2966} 2967 2968/* 2969 * Standard function to be called on quota_on 2970 */ 2971static int ext3_quota_on(struct super_block *sb, int type, int format_id, 2972 struct path *path) 2973{ 2974 int err; 2975 2976 if (!test_opt(sb, QUOTA)) 2977 return -EINVAL; 2978 2979 /* Quotafile not on the same filesystem? */ 2980 if (path->dentry->d_sb != sb) 2981 return -EXDEV; 2982 /* Journaling quota? */ 2983 if (EXT3_SB(sb)->s_qf_names[type]) { 2984 /* Quotafile not of fs root? */ 2985 if (path->dentry->d_parent != sb->s_root) 2986 ext3_msg(sb, KERN_WARNING, 2987 "warning: Quota file not on filesystem root. " 2988 "Journaled quota will not work."); 2989 } 2990 2991 /* 2992 * When we journal data on quota file, we have to flush journal to see 2993 * all updates to the file when we bypass pagecache... 2994 */ 2995 if (ext3_should_journal_data(path->dentry->d_inode)) { 2996 /* 2997 * We don't need to lock updates but journal_flush() could 2998 * otherwise be livelocked... 2999 */ 3000 journal_lock_updates(EXT3_SB(sb)->s_journal); 3001 err = journal_flush(EXT3_SB(sb)->s_journal); 3002 journal_unlock_updates(EXT3_SB(sb)->s_journal); 3003 if (err) 3004 return err; 3005 } 3006 3007 return dquot_quota_on(sb, type, format_id, path); 3008} 3009 3010/* Read data from quotafile - avoid pagecache and such because we cannot afford 3011 * acquiring the locks... As quota files are never truncated and quota code 3012 * itself serializes the operations (and no one else should touch the files) 3013 * we don't have to be afraid of races */ 3014static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, 3015 size_t len, loff_t off) 3016{ 3017 struct inode *inode = sb_dqopt(sb)->files[type]; 3018 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); 3019 int err = 0; 3020 int offset = off & (sb->s_blocksize - 1); 3021 int tocopy; 3022 size_t toread; 3023 struct buffer_head *bh; 3024 loff_t i_size = i_size_read(inode); 3025 3026 if (off > i_size) 3027 return 0; 3028 if (off+len > i_size) 3029 len = i_size-off; 3030 toread = len; 3031 while (toread > 0) { 3032 tocopy = sb->s_blocksize - offset < toread ? 3033 sb->s_blocksize - offset : toread; 3034 bh = ext3_bread(NULL, inode, blk, 0, &err); 3035 if (err) 3036 return err; 3037 if (!bh) /* A hole? */ 3038 memset(data, 0, tocopy); 3039 else 3040 memcpy(data, bh->b_data+offset, tocopy); 3041 brelse(bh); 3042 offset = 0; 3043 toread -= tocopy; 3044 data += tocopy; 3045 blk++; 3046 } 3047 return len; 3048} 3049 3050/* Write to quotafile (we know the transaction is already started and has 3051 * enough credits) */ 3052static ssize_t ext3_quota_write(struct super_block *sb, int type, 3053 const char *data, size_t len, loff_t off) 3054{ 3055 struct inode *inode = sb_dqopt(sb)->files[type]; 3056 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); 3057 int err = 0; 3058 int offset = off & (sb->s_blocksize - 1); 3059 int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL; 3060 struct buffer_head *bh; 3061 handle_t *handle = journal_current_handle(); 3062 3063 if (!handle) { 3064 ext3_msg(sb, KERN_WARNING, 3065 "warning: quota write (off=%llu, len=%llu)" 3066 " cancelled because transaction is not started.", 3067 (unsigned long long)off, (unsigned long long)len); 3068 return -EIO; 3069 } 3070 3071 /* 3072 * Since we account only one data block in transaction credits, 3073 * then it is impossible to cross a block boundary. 3074 */ 3075 if (sb->s_blocksize - offset < len) { 3076 ext3_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 3077 " cancelled because not block aligned", 3078 (unsigned long long)off, (unsigned long long)len); 3079 return -EIO; 3080 } 3081 bh = ext3_bread(handle, inode, blk, 1, &err); 3082 if (!bh) 3083 goto out; 3084 if (journal_quota) { 3085 err = ext3_journal_get_write_access(handle, bh); 3086 if (err) { 3087 brelse(bh); 3088 goto out; 3089 } 3090 } 3091 lock_buffer(bh); 3092 memcpy(bh->b_data+offset, data, len); 3093 flush_dcache_page(bh->b_page); 3094 unlock_buffer(bh); 3095 if (journal_quota) 3096 err = ext3_journal_dirty_metadata(handle, bh); 3097 else { 3098 /* Always do at least ordered writes for quotas */ 3099 err = ext3_journal_dirty_data(handle, bh); 3100 mark_buffer_dirty(bh); 3101 } 3102 brelse(bh); 3103out: 3104 if (err) 3105 return err; 3106 if (inode->i_size < off + len) { 3107 i_size_write(inode, off + len); 3108 EXT3_I(inode)->i_disksize = inode->i_size; 3109 } 3110 inode->i_version++; 3111 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3112 ext3_mark_inode_dirty(handle, inode); 3113 return len; 3114} 3115 3116#endif 3117 3118static struct dentry *ext3_mount(struct file_system_type *fs_type, 3119 int flags, const char *dev_name, void *data) 3120{ 3121 return mount_bdev(fs_type, flags, dev_name, data, ext3_fill_super); 3122} 3123 3124static struct file_system_type ext3_fs_type = { 3125 .owner = THIS_MODULE, 3126 .name = "ext3", 3127 .mount = ext3_mount, 3128 .kill_sb = kill_block_super, 3129 .fs_flags = FS_REQUIRES_DEV, 3130}; 3131MODULE_ALIAS_FS("ext3"); 3132 3133static int __init init_ext3_fs(void) 3134{ 3135 int err = init_ext3_xattr(); 3136 if (err) 3137 return err; 3138 err = init_inodecache(); 3139 if (err) 3140 goto out1; 3141 err = register_filesystem(&ext3_fs_type); 3142 if (err) 3143 goto out; 3144 return 0; 3145out: 3146 destroy_inodecache(); 3147out1: 3148 exit_ext3_xattr(); 3149 return err; 3150} 3151 3152static void __exit exit_ext3_fs(void) 3153{ 3154 unregister_filesystem(&ext3_fs_type); 3155 destroy_inodecache(); 3156 exit_ext3_xattr(); 3157} 3158 3159MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 3160MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); 3161MODULE_LICENSE("GPL"); 3162module_init(init_ext3_fs) 3163module_exit(exit_ext3_fs)