Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v2.6.19-rc4 2756 lines 78 kB view raw
1/* 2 * linux/fs/ext3/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19#include <linux/module.h> 20#include <linux/string.h> 21#include <linux/fs.h> 22#include <linux/time.h> 23#include <linux/jbd.h> 24#include <linux/ext3_fs.h> 25#include <linux/ext3_jbd.h> 26#include <linux/slab.h> 27#include <linux/init.h> 28#include <linux/blkdev.h> 29#include <linux/parser.h> 30#include <linux/smp_lock.h> 31#include <linux/buffer_head.h> 32#include <linux/vfs.h> 33#include <linux/random.h> 34#include <linux/mount.h> 35#include <linux/namei.h> 36#include <linux/quotaops.h> 37#include <linux/seq_file.h> 38 39#include <asm/uaccess.h> 40 41#include "xattr.h" 42#include "acl.h" 43#include "namei.h" 44 45static int ext3_load_journal(struct super_block *, struct ext3_super_block *, 46 unsigned long journal_devnum); 47static int ext3_create_journal(struct super_block *, struct ext3_super_block *, 48 unsigned int); 49static void ext3_commit_super (struct super_block * sb, 50 struct ext3_super_block * es, 51 int sync); 52static void ext3_mark_recovery_complete(struct super_block * sb, 53 struct ext3_super_block * es); 54static void ext3_clear_journal_err(struct super_block * sb, 55 struct ext3_super_block * es); 56static int ext3_sync_fs(struct super_block *sb, int wait); 57static const char *ext3_decode_error(struct super_block * sb, int errno, 58 char nbuf[16]); 59static int ext3_remount (struct super_block * sb, int * flags, char * data); 60static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf); 61static void ext3_unlockfs(struct super_block *sb); 62static void ext3_write_super (struct super_block * sb); 63static void ext3_write_super_lockfs(struct super_block *sb); 64 65/* 66 * Wrappers for journal_start/end. 67 * 68 * The only special thing we need to do here is to make sure that all 69 * journal_end calls result in the superblock being marked dirty, so 70 * that sync() will call the filesystem's write_super callback if 71 * appropriate. 72 */ 73handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks) 74{ 75 journal_t *journal; 76 77 if (sb->s_flags & MS_RDONLY) 78 return ERR_PTR(-EROFS); 79 80 /* Special case here: if the journal has aborted behind our 81 * backs (eg. EIO in the commit thread), then we still need to 82 * take the FS itself readonly cleanly. */ 83 journal = EXT3_SB(sb)->s_journal; 84 if (is_journal_aborted(journal)) { 85 ext3_abort(sb, __FUNCTION__, 86 "Detected aborted journal"); 87 return ERR_PTR(-EROFS); 88 } 89 90 return journal_start(journal, nblocks); 91} 92 93/* 94 * The only special thing we need to do here is to make sure that all 95 * journal_stop calls result in the superblock being marked dirty, so 96 * that sync() will call the filesystem's write_super callback if 97 * appropriate. 98 */ 99int __ext3_journal_stop(const char *where, handle_t *handle) 100{ 101 struct super_block *sb; 102 int err; 103 int rc; 104 105 sb = handle->h_transaction->t_journal->j_private; 106 err = handle->h_err; 107 rc = journal_stop(handle); 108 109 if (!err) 110 err = rc; 111 if (err) 112 __ext3_std_error(sb, where, err); 113 return err; 114} 115 116void ext3_journal_abort_handle(const char *caller, const char *err_fn, 117 struct buffer_head *bh, handle_t *handle, int err) 118{ 119 char nbuf[16]; 120 const char *errstr = ext3_decode_error(NULL, err, nbuf); 121 122 if (bh) 123 BUFFER_TRACE(bh, "abort"); 124 125 if (!handle->h_err) 126 handle->h_err = err; 127 128 if (is_handle_aborted(handle)) 129 return; 130 131 printk(KERN_ERR "%s: aborting transaction: %s in %s\n", 132 caller, errstr, err_fn); 133 134 journal_abort_handle(handle); 135} 136 137/* Deal with the reporting of failure conditions on a filesystem such as 138 * inconsistencies detected or read IO failures. 139 * 140 * On ext2, we can store the error state of the filesystem in the 141 * superblock. That is not possible on ext3, because we may have other 142 * write ordering constraints on the superblock which prevent us from 143 * writing it out straight away; and given that the journal is about to 144 * be aborted, we can't rely on the current, or future, transactions to 145 * write out the superblock safely. 146 * 147 * We'll just use the journal_abort() error code to record an error in 148 * the journal instead. On recovery, the journal will compain about 149 * that error until we've noted it down and cleared it. 150 */ 151 152static void ext3_handle_error(struct super_block *sb) 153{ 154 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 155 156 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 157 es->s_state |= cpu_to_le16(EXT3_ERROR_FS); 158 159 if (sb->s_flags & MS_RDONLY) 160 return; 161 162 if (!test_opt (sb, ERRORS_CONT)) { 163 journal_t *journal = EXT3_SB(sb)->s_journal; 164 165 EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; 166 if (journal) 167 journal_abort(journal, -EIO); 168 } 169 if (test_opt (sb, ERRORS_RO)) { 170 printk (KERN_CRIT "Remounting filesystem read-only\n"); 171 sb->s_flags |= MS_RDONLY; 172 } 173 ext3_commit_super(sb, es, 1); 174 if (test_opt(sb, ERRORS_PANIC)) 175 panic("EXT3-fs (device %s): panic forced after error\n", 176 sb->s_id); 177} 178 179void ext3_error (struct super_block * sb, const char * function, 180 const char * fmt, ...) 181{ 182 va_list args; 183 184 va_start(args, fmt); 185 printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function); 186 vprintk(fmt, args); 187 printk("\n"); 188 va_end(args); 189 190 ext3_handle_error(sb); 191} 192 193static const char *ext3_decode_error(struct super_block * sb, int errno, 194 char nbuf[16]) 195{ 196 char *errstr = NULL; 197 198 switch (errno) { 199 case -EIO: 200 errstr = "IO failure"; 201 break; 202 case -ENOMEM: 203 errstr = "Out of memory"; 204 break; 205 case -EROFS: 206 if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT) 207 errstr = "Journal has aborted"; 208 else 209 errstr = "Readonly filesystem"; 210 break; 211 default: 212 /* If the caller passed in an extra buffer for unknown 213 * errors, textualise them now. Else we just return 214 * NULL. */ 215 if (nbuf) { 216 /* Check for truncated error codes... */ 217 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 218 errstr = nbuf; 219 } 220 break; 221 } 222 223 return errstr; 224} 225 226/* __ext3_std_error decodes expected errors from journaling functions 227 * automatically and invokes the appropriate error response. */ 228 229void __ext3_std_error (struct super_block * sb, const char * function, 230 int errno) 231{ 232 char nbuf[16]; 233 const char *errstr; 234 235 /* Special case: if the error is EROFS, and we're not already 236 * inside a transaction, then there's really no point in logging 237 * an error. */ 238 if (errno == -EROFS && journal_current_handle() == NULL && 239 (sb->s_flags & MS_RDONLY)) 240 return; 241 242 errstr = ext3_decode_error(sb, errno, nbuf); 243 printk (KERN_CRIT "EXT3-fs error (device %s) in %s: %s\n", 244 sb->s_id, function, errstr); 245 246 ext3_handle_error(sb); 247} 248 249/* 250 * ext3_abort is a much stronger failure handler than ext3_error. The 251 * abort function may be used to deal with unrecoverable failures such 252 * as journal IO errors or ENOMEM at a critical moment in log management. 253 * 254 * We unconditionally force the filesystem into an ABORT|READONLY state, 255 * unless the error response on the fs has been set to panic in which 256 * case we take the easy way out and panic immediately. 257 */ 258 259void ext3_abort (struct super_block * sb, const char * function, 260 const char * fmt, ...) 261{ 262 va_list args; 263 264 printk (KERN_CRIT "ext3_abort called.\n"); 265 266 va_start(args, fmt); 267 printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function); 268 vprintk(fmt, args); 269 printk("\n"); 270 va_end(args); 271 272 if (test_opt(sb, ERRORS_PANIC)) 273 panic("EXT3-fs panic from previous error\n"); 274 275 if (sb->s_flags & MS_RDONLY) 276 return; 277 278 printk(KERN_CRIT "Remounting filesystem read-only\n"); 279 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 280 sb->s_flags |= MS_RDONLY; 281 EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; 282 journal_abort(EXT3_SB(sb)->s_journal, -EIO); 283} 284 285void ext3_warning (struct super_block * sb, const char * function, 286 const char * fmt, ...) 287{ 288 va_list args; 289 290 va_start(args, fmt); 291 printk(KERN_WARNING "EXT3-fs warning (device %s): %s: ", 292 sb->s_id, function); 293 vprintk(fmt, args); 294 printk("\n"); 295 va_end(args); 296} 297 298void ext3_update_dynamic_rev(struct super_block *sb) 299{ 300 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 301 302 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV) 303 return; 304 305 ext3_warning(sb, __FUNCTION__, 306 "updating to rev %d because of new feature flag, " 307 "running e2fsck is recommended", 308 EXT3_DYNAMIC_REV); 309 310 es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO); 311 es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE); 312 es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV); 313 /* leave es->s_feature_*compat flags alone */ 314 /* es->s_uuid will be set by e2fsck if empty */ 315 316 /* 317 * The rest of the superblock fields should be zero, and if not it 318 * means they are likely already in use, so leave them alone. We 319 * can leave it up to e2fsck to clean up any inconsistencies there. 320 */ 321} 322 323/* 324 * Open the external journal device 325 */ 326static struct block_device *ext3_blkdev_get(dev_t dev) 327{ 328 struct block_device *bdev; 329 char b[BDEVNAME_SIZE]; 330 331 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 332 if (IS_ERR(bdev)) 333 goto fail; 334 return bdev; 335 336fail: 337 printk(KERN_ERR "EXT3: failed to open journal device %s: %ld\n", 338 __bdevname(dev, b), PTR_ERR(bdev)); 339 return NULL; 340} 341 342/* 343 * Release the journal device 344 */ 345static int ext3_blkdev_put(struct block_device *bdev) 346{ 347 bd_release(bdev); 348 return blkdev_put(bdev); 349} 350 351static int ext3_blkdev_remove(struct ext3_sb_info *sbi) 352{ 353 struct block_device *bdev; 354 int ret = -ENODEV; 355 356 bdev = sbi->journal_bdev; 357 if (bdev) { 358 ret = ext3_blkdev_put(bdev); 359 sbi->journal_bdev = NULL; 360 } 361 return ret; 362} 363 364static inline struct inode *orphan_list_entry(struct list_head *l) 365{ 366 return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode; 367} 368 369static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi) 370{ 371 struct list_head *l; 372 373 printk(KERN_ERR "sb orphan head is %d\n", 374 le32_to_cpu(sbi->s_es->s_last_orphan)); 375 376 printk(KERN_ERR "sb_info orphan list:\n"); 377 list_for_each(l, &sbi->s_orphan) { 378 struct inode *inode = orphan_list_entry(l); 379 printk(KERN_ERR " " 380 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 381 inode->i_sb->s_id, inode->i_ino, inode, 382 inode->i_mode, inode->i_nlink, 383 NEXT_ORPHAN(inode)); 384 } 385} 386 387static void ext3_put_super (struct super_block * sb) 388{ 389 struct ext3_sb_info *sbi = EXT3_SB(sb); 390 struct ext3_super_block *es = sbi->s_es; 391 int i; 392 393 ext3_xattr_put_super(sb); 394 journal_destroy(sbi->s_journal); 395 if (!(sb->s_flags & MS_RDONLY)) { 396 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 397 es->s_state = cpu_to_le16(sbi->s_mount_state); 398 BUFFER_TRACE(sbi->s_sbh, "marking dirty"); 399 mark_buffer_dirty(sbi->s_sbh); 400 ext3_commit_super(sb, es, 1); 401 } 402 403 for (i = 0; i < sbi->s_gdb_count; i++) 404 brelse(sbi->s_group_desc[i]); 405 kfree(sbi->s_group_desc); 406 percpu_counter_destroy(&sbi->s_freeblocks_counter); 407 percpu_counter_destroy(&sbi->s_freeinodes_counter); 408 percpu_counter_destroy(&sbi->s_dirs_counter); 409 brelse(sbi->s_sbh); 410#ifdef CONFIG_QUOTA 411 for (i = 0; i < MAXQUOTAS; i++) 412 kfree(sbi->s_qf_names[i]); 413#endif 414 415 /* Debugging code just in case the in-memory inode orphan list 416 * isn't empty. The on-disk one can be non-empty if we've 417 * detected an error and taken the fs readonly, but the 418 * in-memory list had better be clean by this point. */ 419 if (!list_empty(&sbi->s_orphan)) 420 dump_orphan_list(sb, sbi); 421 J_ASSERT(list_empty(&sbi->s_orphan)); 422 423 invalidate_bdev(sb->s_bdev, 0); 424 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 425 /* 426 * Invalidate the journal device's buffers. We don't want them 427 * floating about in memory - the physical journal device may 428 * hotswapped, and it breaks the `ro-after' testing code. 429 */ 430 sync_blockdev(sbi->journal_bdev); 431 invalidate_bdev(sbi->journal_bdev, 0); 432 ext3_blkdev_remove(sbi); 433 } 434 sb->s_fs_info = NULL; 435 kfree(sbi); 436 return; 437} 438 439static kmem_cache_t *ext3_inode_cachep; 440 441/* 442 * Called inside transaction, so use GFP_NOFS 443 */ 444static struct inode *ext3_alloc_inode(struct super_block *sb) 445{ 446 struct ext3_inode_info *ei; 447 448 ei = kmem_cache_alloc(ext3_inode_cachep, SLAB_NOFS); 449 if (!ei) 450 return NULL; 451#ifdef CONFIG_EXT3_FS_POSIX_ACL 452 ei->i_acl = EXT3_ACL_NOT_CACHED; 453 ei->i_default_acl = EXT3_ACL_NOT_CACHED; 454#endif 455 ei->i_block_alloc_info = NULL; 456 ei->vfs_inode.i_version = 1; 457 return &ei->vfs_inode; 458} 459 460static void ext3_destroy_inode(struct inode *inode) 461{ 462 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); 463} 464 465static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) 466{ 467 struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; 468 469 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 470 SLAB_CTOR_CONSTRUCTOR) { 471 INIT_LIST_HEAD(&ei->i_orphan); 472#ifdef CONFIG_EXT3_FS_XATTR 473 init_rwsem(&ei->xattr_sem); 474#endif 475 mutex_init(&ei->truncate_mutex); 476 inode_init_once(&ei->vfs_inode); 477 } 478} 479 480static int init_inodecache(void) 481{ 482 ext3_inode_cachep = kmem_cache_create("ext3_inode_cache", 483 sizeof(struct ext3_inode_info), 484 0, (SLAB_RECLAIM_ACCOUNT| 485 SLAB_MEM_SPREAD), 486 init_once, NULL); 487 if (ext3_inode_cachep == NULL) 488 return -ENOMEM; 489 return 0; 490} 491 492static void destroy_inodecache(void) 493{ 494 kmem_cache_destroy(ext3_inode_cachep); 495} 496 497static void ext3_clear_inode(struct inode *inode) 498{ 499 struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info; 500#ifdef CONFIG_EXT3_FS_POSIX_ACL 501 if (EXT3_I(inode)->i_acl && 502 EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) { 503 posix_acl_release(EXT3_I(inode)->i_acl); 504 EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED; 505 } 506 if (EXT3_I(inode)->i_default_acl && 507 EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) { 508 posix_acl_release(EXT3_I(inode)->i_default_acl); 509 EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED; 510 } 511#endif 512 ext3_discard_reservation(inode); 513 EXT3_I(inode)->i_block_alloc_info = NULL; 514 if (unlikely(rsv)) 515 kfree(rsv); 516} 517 518static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb) 519{ 520#if defined(CONFIG_QUOTA) 521 struct ext3_sb_info *sbi = EXT3_SB(sb); 522 523 if (sbi->s_jquota_fmt) 524 seq_printf(seq, ",jqfmt=%s", 525 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); 526 527 if (sbi->s_qf_names[USRQUOTA]) 528 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 529 530 if (sbi->s_qf_names[GRPQUOTA]) 531 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 532 533 if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) 534 seq_puts(seq, ",usrquota"); 535 536 if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) 537 seq_puts(seq, ",grpquota"); 538#endif 539} 540 541static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) 542{ 543 struct super_block *sb = vfs->mnt_sb; 544 545 if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA) 546 seq_puts(seq, ",data=journal"); 547 else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA) 548 seq_puts(seq, ",data=ordered"); 549 else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) 550 seq_puts(seq, ",data=writeback"); 551 552 ext3_show_quota_options(seq, sb); 553 554 return 0; 555} 556 557 558static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp) 559{ 560 __u32 *objp = vobjp; 561 unsigned long ino = objp[0]; 562 __u32 generation = objp[1]; 563 struct inode *inode; 564 struct dentry *result; 565 566 if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO) 567 return ERR_PTR(-ESTALE); 568 if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) 569 return ERR_PTR(-ESTALE); 570 571 /* iget isn't really right if the inode is currently unallocated!! 572 * 573 * ext3_read_inode will return a bad_inode if the inode had been 574 * deleted, so we should be safe. 575 * 576 * Currently we don't know the generation for parent directory, so 577 * a generation of 0 means "accept any" 578 */ 579 inode = iget(sb, ino); 580 if (inode == NULL) 581 return ERR_PTR(-ENOMEM); 582 if (is_bad_inode(inode) || 583 (generation && inode->i_generation != generation)) { 584 iput(inode); 585 return ERR_PTR(-ESTALE); 586 } 587 /* now to find a dentry. 588 * If possible, get a well-connected one 589 */ 590 result = d_alloc_anon(inode); 591 if (!result) { 592 iput(inode); 593 return ERR_PTR(-ENOMEM); 594 } 595 return result; 596} 597 598#ifdef CONFIG_QUOTA 599#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") 600#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 601 602static int ext3_dquot_initialize(struct inode *inode, int type); 603static int ext3_dquot_drop(struct inode *inode); 604static int ext3_write_dquot(struct dquot *dquot); 605static int ext3_acquire_dquot(struct dquot *dquot); 606static int ext3_release_dquot(struct dquot *dquot); 607static int ext3_mark_dquot_dirty(struct dquot *dquot); 608static int ext3_write_info(struct super_block *sb, int type); 609static int ext3_quota_on(struct super_block *sb, int type, int format_id, char *path); 610static int ext3_quota_on_mount(struct super_block *sb, int type); 611static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, 612 size_t len, loff_t off); 613static ssize_t ext3_quota_write(struct super_block *sb, int type, 614 const char *data, size_t len, loff_t off); 615 616static struct dquot_operations ext3_quota_operations = { 617 .initialize = ext3_dquot_initialize, 618 .drop = ext3_dquot_drop, 619 .alloc_space = dquot_alloc_space, 620 .alloc_inode = dquot_alloc_inode, 621 .free_space = dquot_free_space, 622 .free_inode = dquot_free_inode, 623 .transfer = dquot_transfer, 624 .write_dquot = ext3_write_dquot, 625 .acquire_dquot = ext3_acquire_dquot, 626 .release_dquot = ext3_release_dquot, 627 .mark_dirty = ext3_mark_dquot_dirty, 628 .write_info = ext3_write_info 629}; 630 631static struct quotactl_ops ext3_qctl_operations = { 632 .quota_on = ext3_quota_on, 633 .quota_off = vfs_quota_off, 634 .quota_sync = vfs_quota_sync, 635 .get_info = vfs_get_dqinfo, 636 .set_info = vfs_set_dqinfo, 637 .get_dqblk = vfs_get_dqblk, 638 .set_dqblk = vfs_set_dqblk 639}; 640#endif 641 642static struct super_operations ext3_sops = { 643 .alloc_inode = ext3_alloc_inode, 644 .destroy_inode = ext3_destroy_inode, 645 .read_inode = ext3_read_inode, 646 .write_inode = ext3_write_inode, 647 .dirty_inode = ext3_dirty_inode, 648 .delete_inode = ext3_delete_inode, 649 .put_super = ext3_put_super, 650 .write_super = ext3_write_super, 651 .sync_fs = ext3_sync_fs, 652 .write_super_lockfs = ext3_write_super_lockfs, 653 .unlockfs = ext3_unlockfs, 654 .statfs = ext3_statfs, 655 .remount_fs = ext3_remount, 656 .clear_inode = ext3_clear_inode, 657 .show_options = ext3_show_options, 658#ifdef CONFIG_QUOTA 659 .quota_read = ext3_quota_read, 660 .quota_write = ext3_quota_write, 661#endif 662}; 663 664static struct export_operations ext3_export_ops = { 665 .get_parent = ext3_get_parent, 666 .get_dentry = ext3_get_dentry, 667}; 668 669enum { 670 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 671 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 672 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, 673 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 674 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 675 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 676 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 677 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 678 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 679 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 680 Opt_grpquota 681}; 682 683static match_table_t tokens = { 684 {Opt_bsd_df, "bsddf"}, 685 {Opt_minix_df, "minixdf"}, 686 {Opt_grpid, "grpid"}, 687 {Opt_grpid, "bsdgroups"}, 688 {Opt_nogrpid, "nogrpid"}, 689 {Opt_nogrpid, "sysvgroups"}, 690 {Opt_resgid, "resgid=%u"}, 691 {Opt_resuid, "resuid=%u"}, 692 {Opt_sb, "sb=%u"}, 693 {Opt_err_cont, "errors=continue"}, 694 {Opt_err_panic, "errors=panic"}, 695 {Opt_err_ro, "errors=remount-ro"}, 696 {Opt_nouid32, "nouid32"}, 697 {Opt_nocheck, "nocheck"}, 698 {Opt_nocheck, "check=none"}, 699 {Opt_debug, "debug"}, 700 {Opt_oldalloc, "oldalloc"}, 701 {Opt_orlov, "orlov"}, 702 {Opt_user_xattr, "user_xattr"}, 703 {Opt_nouser_xattr, "nouser_xattr"}, 704 {Opt_acl, "acl"}, 705 {Opt_noacl, "noacl"}, 706 {Opt_reservation, "reservation"}, 707 {Opt_noreservation, "noreservation"}, 708 {Opt_noload, "noload"}, 709 {Opt_nobh, "nobh"}, 710 {Opt_bh, "bh"}, 711 {Opt_commit, "commit=%u"}, 712 {Opt_journal_update, "journal=update"}, 713 {Opt_journal_inum, "journal=%u"}, 714 {Opt_journal_dev, "journal_dev=%u"}, 715 {Opt_abort, "abort"}, 716 {Opt_data_journal, "data=journal"}, 717 {Opt_data_ordered, "data=ordered"}, 718 {Opt_data_writeback, "data=writeback"}, 719 {Opt_offusrjquota, "usrjquota="}, 720 {Opt_usrjquota, "usrjquota=%s"}, 721 {Opt_offgrpjquota, "grpjquota="}, 722 {Opt_grpjquota, "grpjquota=%s"}, 723 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 724 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 725 {Opt_grpquota, "grpquota"}, 726 {Opt_noquota, "noquota"}, 727 {Opt_quota, "quota"}, 728 {Opt_usrquota, "usrquota"}, 729 {Opt_barrier, "barrier=%u"}, 730 {Opt_err, NULL}, 731 {Opt_resize, "resize"}, 732}; 733 734static ext3_fsblk_t get_sb_block(void **data) 735{ 736 ext3_fsblk_t sb_block; 737 char *options = (char *) *data; 738 739 if (!options || strncmp(options, "sb=", 3) != 0) 740 return 1; /* Default location */ 741 options += 3; 742 /*todo: use simple_strtoll with >32bit ext3 */ 743 sb_block = simple_strtoul(options, &options, 0); 744 if (*options && *options != ',') { 745 printk("EXT3-fs: Invalid sb specification: %s\n", 746 (char *) *data); 747 return 1; 748 } 749 if (*options == ',') 750 options++; 751 *data = (void *) options; 752 return sb_block; 753} 754 755static int parse_options (char *options, struct super_block *sb, 756 unsigned int *inum, unsigned long *journal_devnum, 757 ext3_fsblk_t *n_blocks_count, int is_remount) 758{ 759 struct ext3_sb_info *sbi = EXT3_SB(sb); 760 char * p; 761 substring_t args[MAX_OPT_ARGS]; 762 int data_opt = 0; 763 int option; 764#ifdef CONFIG_QUOTA 765 int qtype; 766 char *qname; 767#endif 768 769 if (!options) 770 return 1; 771 772 while ((p = strsep (&options, ",")) != NULL) { 773 int token; 774 if (!*p) 775 continue; 776 777 token = match_token(p, tokens, args); 778 switch (token) { 779 case Opt_bsd_df: 780 clear_opt (sbi->s_mount_opt, MINIX_DF); 781 break; 782 case Opt_minix_df: 783 set_opt (sbi->s_mount_opt, MINIX_DF); 784 break; 785 case Opt_grpid: 786 set_opt (sbi->s_mount_opt, GRPID); 787 break; 788 case Opt_nogrpid: 789 clear_opt (sbi->s_mount_opt, GRPID); 790 break; 791 case Opt_resuid: 792 if (match_int(&args[0], &option)) 793 return 0; 794 sbi->s_resuid = option; 795 break; 796 case Opt_resgid: 797 if (match_int(&args[0], &option)) 798 return 0; 799 sbi->s_resgid = option; 800 break; 801 case Opt_sb: 802 /* handled by get_sb_block() instead of here */ 803 /* *sb_block = match_int(&args[0]); */ 804 break; 805 case Opt_err_panic: 806 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 807 clear_opt (sbi->s_mount_opt, ERRORS_RO); 808 set_opt (sbi->s_mount_opt, ERRORS_PANIC); 809 break; 810 case Opt_err_ro: 811 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 812 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 813 set_opt (sbi->s_mount_opt, ERRORS_RO); 814 break; 815 case Opt_err_cont: 816 clear_opt (sbi->s_mount_opt, ERRORS_RO); 817 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 818 set_opt (sbi->s_mount_opt, ERRORS_CONT); 819 break; 820 case Opt_nouid32: 821 set_opt (sbi->s_mount_opt, NO_UID32); 822 break; 823 case Opt_nocheck: 824 clear_opt (sbi->s_mount_opt, CHECK); 825 break; 826 case Opt_debug: 827 set_opt (sbi->s_mount_opt, DEBUG); 828 break; 829 case Opt_oldalloc: 830 set_opt (sbi->s_mount_opt, OLDALLOC); 831 break; 832 case Opt_orlov: 833 clear_opt (sbi->s_mount_opt, OLDALLOC); 834 break; 835#ifdef CONFIG_EXT3_FS_XATTR 836 case Opt_user_xattr: 837 set_opt (sbi->s_mount_opt, XATTR_USER); 838 break; 839 case Opt_nouser_xattr: 840 clear_opt (sbi->s_mount_opt, XATTR_USER); 841 break; 842#else 843 case Opt_user_xattr: 844 case Opt_nouser_xattr: 845 printk("EXT3 (no)user_xattr options not supported\n"); 846 break; 847#endif 848#ifdef CONFIG_EXT3_FS_POSIX_ACL 849 case Opt_acl: 850 set_opt(sbi->s_mount_opt, POSIX_ACL); 851 break; 852 case Opt_noacl: 853 clear_opt(sbi->s_mount_opt, POSIX_ACL); 854 break; 855#else 856 case Opt_acl: 857 case Opt_noacl: 858 printk("EXT3 (no)acl options not supported\n"); 859 break; 860#endif 861 case Opt_reservation: 862 set_opt(sbi->s_mount_opt, RESERVATION); 863 break; 864 case Opt_noreservation: 865 clear_opt(sbi->s_mount_opt, RESERVATION); 866 break; 867 case Opt_journal_update: 868 /* @@@ FIXME */ 869 /* Eventually we will want to be able to create 870 a journal file here. For now, only allow the 871 user to specify an existing inode to be the 872 journal file. */ 873 if (is_remount) { 874 printk(KERN_ERR "EXT3-fs: cannot specify " 875 "journal on remount\n"); 876 return 0; 877 } 878 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); 879 break; 880 case Opt_journal_inum: 881 if (is_remount) { 882 printk(KERN_ERR "EXT3-fs: cannot specify " 883 "journal on remount\n"); 884 return 0; 885 } 886 if (match_int(&args[0], &option)) 887 return 0; 888 *inum = option; 889 break; 890 case Opt_journal_dev: 891 if (is_remount) { 892 printk(KERN_ERR "EXT3-fs: cannot specify " 893 "journal on remount\n"); 894 return 0; 895 } 896 if (match_int(&args[0], &option)) 897 return 0; 898 *journal_devnum = option; 899 break; 900 case Opt_noload: 901 set_opt (sbi->s_mount_opt, NOLOAD); 902 break; 903 case Opt_commit: 904 if (match_int(&args[0], &option)) 905 return 0; 906 if (option < 0) 907 return 0; 908 if (option == 0) 909 option = JBD_DEFAULT_MAX_COMMIT_AGE; 910 sbi->s_commit_interval = HZ * option; 911 break; 912 case Opt_data_journal: 913 data_opt = EXT3_MOUNT_JOURNAL_DATA; 914 goto datacheck; 915 case Opt_data_ordered: 916 data_opt = EXT3_MOUNT_ORDERED_DATA; 917 goto datacheck; 918 case Opt_data_writeback: 919 data_opt = EXT3_MOUNT_WRITEBACK_DATA; 920 datacheck: 921 if (is_remount) { 922 if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS) 923 != data_opt) { 924 printk(KERN_ERR 925 "EXT3-fs: cannot change data " 926 "mode on remount\n"); 927 return 0; 928 } 929 } else { 930 sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS; 931 sbi->s_mount_opt |= data_opt; 932 } 933 break; 934#ifdef CONFIG_QUOTA 935 case Opt_usrjquota: 936 qtype = USRQUOTA; 937 goto set_qf_name; 938 case Opt_grpjquota: 939 qtype = GRPQUOTA; 940set_qf_name: 941 if (sb_any_quota_enabled(sb)) { 942 printk(KERN_ERR 943 "EXT3-fs: Cannot change journalled " 944 "quota options when quota turned on.\n"); 945 return 0; 946 } 947 qname = match_strdup(&args[0]); 948 if (!qname) { 949 printk(KERN_ERR 950 "EXT3-fs: not enough memory for " 951 "storing quotafile name.\n"); 952 return 0; 953 } 954 if (sbi->s_qf_names[qtype] && 955 strcmp(sbi->s_qf_names[qtype], qname)) { 956 printk(KERN_ERR 957 "EXT3-fs: %s quota file already " 958 "specified.\n", QTYPE2NAME(qtype)); 959 kfree(qname); 960 return 0; 961 } 962 sbi->s_qf_names[qtype] = qname; 963 if (strchr(sbi->s_qf_names[qtype], '/')) { 964 printk(KERN_ERR 965 "EXT3-fs: quotafile must be on " 966 "filesystem root.\n"); 967 kfree(sbi->s_qf_names[qtype]); 968 sbi->s_qf_names[qtype] = NULL; 969 return 0; 970 } 971 set_opt(sbi->s_mount_opt, QUOTA); 972 break; 973 case Opt_offusrjquota: 974 qtype = USRQUOTA; 975 goto clear_qf_name; 976 case Opt_offgrpjquota: 977 qtype = GRPQUOTA; 978clear_qf_name: 979 if (sb_any_quota_enabled(sb)) { 980 printk(KERN_ERR "EXT3-fs: Cannot change " 981 "journalled quota options when " 982 "quota turned on.\n"); 983 return 0; 984 } 985 /* 986 * The space will be released later when all options 987 * are confirmed to be correct 988 */ 989 sbi->s_qf_names[qtype] = NULL; 990 break; 991 case Opt_jqfmt_vfsold: 992 sbi->s_jquota_fmt = QFMT_VFS_OLD; 993 break; 994 case Opt_jqfmt_vfsv0: 995 sbi->s_jquota_fmt = QFMT_VFS_V0; 996 break; 997 case Opt_quota: 998 case Opt_usrquota: 999 set_opt(sbi->s_mount_opt, QUOTA); 1000 set_opt(sbi->s_mount_opt, USRQUOTA); 1001 break; 1002 case Opt_grpquota: 1003 set_opt(sbi->s_mount_opt, QUOTA); 1004 set_opt(sbi->s_mount_opt, GRPQUOTA); 1005 break; 1006 case Opt_noquota: 1007 if (sb_any_quota_enabled(sb)) { 1008 printk(KERN_ERR "EXT3-fs: Cannot change quota " 1009 "options when quota turned on.\n"); 1010 return 0; 1011 } 1012 clear_opt(sbi->s_mount_opt, QUOTA); 1013 clear_opt(sbi->s_mount_opt, USRQUOTA); 1014 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1015 break; 1016#else 1017 case Opt_quota: 1018 case Opt_usrquota: 1019 case Opt_grpquota: 1020 case Opt_usrjquota: 1021 case Opt_grpjquota: 1022 case Opt_offusrjquota: 1023 case Opt_offgrpjquota: 1024 case Opt_jqfmt_vfsold: 1025 case Opt_jqfmt_vfsv0: 1026 printk(KERN_ERR 1027 "EXT3-fs: journalled quota options not " 1028 "supported.\n"); 1029 break; 1030 case Opt_noquota: 1031 break; 1032#endif 1033 case Opt_abort: 1034 set_opt(sbi->s_mount_opt, ABORT); 1035 break; 1036 case Opt_barrier: 1037 if (match_int(&args[0], &option)) 1038 return 0; 1039 if (option) 1040 set_opt(sbi->s_mount_opt, BARRIER); 1041 else 1042 clear_opt(sbi->s_mount_opt, BARRIER); 1043 break; 1044 case Opt_ignore: 1045 break; 1046 case Opt_resize: 1047 if (!is_remount) { 1048 printk("EXT3-fs: resize option only available " 1049 "for remount\n"); 1050 return 0; 1051 } 1052 if (match_int(&args[0], &option) != 0) 1053 return 0; 1054 *n_blocks_count = option; 1055 break; 1056 case Opt_nobh: 1057 set_opt(sbi->s_mount_opt, NOBH); 1058 break; 1059 case Opt_bh: 1060 clear_opt(sbi->s_mount_opt, NOBH); 1061 break; 1062 default: 1063 printk (KERN_ERR 1064 "EXT3-fs: Unrecognized mount option \"%s\" " 1065 "or missing value\n", p); 1066 return 0; 1067 } 1068 } 1069#ifdef CONFIG_QUOTA 1070 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1071 if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) && 1072 sbi->s_qf_names[USRQUOTA]) 1073 clear_opt(sbi->s_mount_opt, USRQUOTA); 1074 1075 if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) && 1076 sbi->s_qf_names[GRPQUOTA]) 1077 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1078 1079 if ((sbi->s_qf_names[USRQUOTA] && 1080 (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) || 1081 (sbi->s_qf_names[GRPQUOTA] && 1082 (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) { 1083 printk(KERN_ERR "EXT3-fs: old and new quota " 1084 "format mixing.\n"); 1085 return 0; 1086 } 1087 1088 if (!sbi->s_jquota_fmt) { 1089 printk(KERN_ERR "EXT3-fs: journalled quota format " 1090 "not specified.\n"); 1091 return 0; 1092 } 1093 } else { 1094 if (sbi->s_jquota_fmt) { 1095 printk(KERN_ERR "EXT3-fs: journalled quota format " 1096 "specified with no journalling " 1097 "enabled.\n"); 1098 return 0; 1099 } 1100 } 1101#endif 1102 return 1; 1103} 1104 1105static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es, 1106 int read_only) 1107{ 1108 struct ext3_sb_info *sbi = EXT3_SB(sb); 1109 int res = 0; 1110 1111 if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) { 1112 printk (KERN_ERR "EXT3-fs warning: revision level too high, " 1113 "forcing read-only mode\n"); 1114 res = MS_RDONLY; 1115 } 1116 if (read_only) 1117 return res; 1118 if (!(sbi->s_mount_state & EXT3_VALID_FS)) 1119 printk (KERN_WARNING "EXT3-fs warning: mounting unchecked fs, " 1120 "running e2fsck is recommended\n"); 1121 else if ((sbi->s_mount_state & EXT3_ERROR_FS)) 1122 printk (KERN_WARNING 1123 "EXT3-fs warning: mounting fs with errors, " 1124 "running e2fsck is recommended\n"); 1125 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1126 le16_to_cpu(es->s_mnt_count) >= 1127 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1128 printk (KERN_WARNING 1129 "EXT3-fs warning: maximal mount count reached, " 1130 "running e2fsck is recommended\n"); 1131 else if (le32_to_cpu(es->s_checkinterval) && 1132 (le32_to_cpu(es->s_lastcheck) + 1133 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1134 printk (KERN_WARNING 1135 "EXT3-fs warning: checktime reached, " 1136 "running e2fsck is recommended\n"); 1137#if 0 1138 /* @@@ We _will_ want to clear the valid bit if we find 1139 inconsistencies, to force a fsck at reboot. But for 1140 a plain journaled filesystem we can keep it set as 1141 valid forever! :) */ 1142 es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT3_VALID_FS); 1143#endif 1144 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1145 es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT); 1146 es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); 1147 es->s_mtime = cpu_to_le32(get_seconds()); 1148 ext3_update_dynamic_rev(sb); 1149 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 1150 1151 ext3_commit_super(sb, es, 1); 1152 if (test_opt(sb, DEBUG)) 1153 printk(KERN_INFO "[EXT3 FS bs=%lu, gc=%lu, " 1154 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 1155 sb->s_blocksize, 1156 sbi->s_groups_count, 1157 EXT3_BLOCKS_PER_GROUP(sb), 1158 EXT3_INODES_PER_GROUP(sb), 1159 sbi->s_mount_opt); 1160 1161 printk(KERN_INFO "EXT3 FS on %s, ", sb->s_id); 1162 if (EXT3_SB(sb)->s_journal->j_inode == NULL) { 1163 char b[BDEVNAME_SIZE]; 1164 1165 printk("external journal on %s\n", 1166 bdevname(EXT3_SB(sb)->s_journal->j_dev, b)); 1167 } else { 1168 printk("internal journal\n"); 1169 } 1170 return res; 1171} 1172 1173/* Called at mount-time, super-block is locked */ 1174static int ext3_check_descriptors (struct super_block * sb) 1175{ 1176 struct ext3_sb_info *sbi = EXT3_SB(sb); 1177 ext3_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 1178 ext3_fsblk_t last_block; 1179 struct ext3_group_desc * gdp = NULL; 1180 int desc_block = 0; 1181 int i; 1182 1183 ext3_debug ("Checking group descriptors"); 1184 1185 for (i = 0; i < sbi->s_groups_count; i++) 1186 { 1187 if (i == sbi->s_groups_count - 1) 1188 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; 1189 else 1190 last_block = first_block + 1191 (EXT3_BLOCKS_PER_GROUP(sb) - 1); 1192 1193 if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0) 1194 gdp = (struct ext3_group_desc *) 1195 sbi->s_group_desc[desc_block++]->b_data; 1196 if (le32_to_cpu(gdp->bg_block_bitmap) < first_block || 1197 le32_to_cpu(gdp->bg_block_bitmap) > last_block) 1198 { 1199 ext3_error (sb, "ext3_check_descriptors", 1200 "Block bitmap for group %d" 1201 " not in group (block %lu)!", 1202 i, (unsigned long) 1203 le32_to_cpu(gdp->bg_block_bitmap)); 1204 return 0; 1205 } 1206 if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block || 1207 le32_to_cpu(gdp->bg_inode_bitmap) > last_block) 1208 { 1209 ext3_error (sb, "ext3_check_descriptors", 1210 "Inode bitmap for group %d" 1211 " not in group (block %lu)!", 1212 i, (unsigned long) 1213 le32_to_cpu(gdp->bg_inode_bitmap)); 1214 return 0; 1215 } 1216 if (le32_to_cpu(gdp->bg_inode_table) < first_block || 1217 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group > 1218 last_block) 1219 { 1220 ext3_error (sb, "ext3_check_descriptors", 1221 "Inode table for group %d" 1222 " not in group (block %lu)!", 1223 i, (unsigned long) 1224 le32_to_cpu(gdp->bg_inode_table)); 1225 return 0; 1226 } 1227 first_block += EXT3_BLOCKS_PER_GROUP(sb); 1228 gdp++; 1229 } 1230 1231 sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb)); 1232 sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb)); 1233 return 1; 1234} 1235 1236 1237/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at 1238 * the superblock) which were deleted from all directories, but held open by 1239 * a process at the time of a crash. We walk the list and try to delete these 1240 * inodes at recovery time (only with a read-write filesystem). 1241 * 1242 * In order to keep the orphan inode chain consistent during traversal (in 1243 * case of crash during recovery), we link each inode into the superblock 1244 * orphan list_head and handle it the same way as an inode deletion during 1245 * normal operation (which journals the operations for us). 1246 * 1247 * We only do an iget() and an iput() on each inode, which is very safe if we 1248 * accidentally point at an in-use or already deleted inode. The worst that 1249 * can happen in this case is that we get a "bit already cleared" message from 1250 * ext3_free_inode(). The only reason we would point at a wrong inode is if 1251 * e2fsck was run on this filesystem, and it must have already done the orphan 1252 * inode cleanup for us, so we can safely abort without any further action. 1253 */ 1254static void ext3_orphan_cleanup (struct super_block * sb, 1255 struct ext3_super_block * es) 1256{ 1257 unsigned int s_flags = sb->s_flags; 1258 int nr_orphans = 0, nr_truncates = 0; 1259#ifdef CONFIG_QUOTA 1260 int i; 1261#endif 1262 if (!es->s_last_orphan) { 1263 jbd_debug(4, "no orphan inodes to clean up\n"); 1264 return; 1265 } 1266 1267 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { 1268 if (es->s_last_orphan) 1269 jbd_debug(1, "Errors on filesystem, " 1270 "clearing orphan list.\n"); 1271 es->s_last_orphan = 0; 1272 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1273 return; 1274 } 1275 1276 if (s_flags & MS_RDONLY) { 1277 printk(KERN_INFO "EXT3-fs: %s: orphan cleanup on readonly fs\n", 1278 sb->s_id); 1279 sb->s_flags &= ~MS_RDONLY; 1280 } 1281#ifdef CONFIG_QUOTA 1282 /* Needed for iput() to work correctly and not trash data */ 1283 sb->s_flags |= MS_ACTIVE; 1284 /* Turn on quotas so that they are updated correctly */ 1285 for (i = 0; i < MAXQUOTAS; i++) { 1286 if (EXT3_SB(sb)->s_qf_names[i]) { 1287 int ret = ext3_quota_on_mount(sb, i); 1288 if (ret < 0) 1289 printk(KERN_ERR 1290 "EXT3-fs: Cannot turn on journalled " 1291 "quota: error %d\n", ret); 1292 } 1293 } 1294#endif 1295 1296 while (es->s_last_orphan) { 1297 struct inode *inode; 1298 1299 if (!(inode = 1300 ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) { 1301 es->s_last_orphan = 0; 1302 break; 1303 } 1304 1305 list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); 1306 DQUOT_INIT(inode); 1307 if (inode->i_nlink) { 1308 printk(KERN_DEBUG 1309 "%s: truncating inode %lu to %Ld bytes\n", 1310 __FUNCTION__, inode->i_ino, inode->i_size); 1311 jbd_debug(2, "truncating inode %lu to %Ld bytes\n", 1312 inode->i_ino, inode->i_size); 1313 ext3_truncate(inode); 1314 nr_truncates++; 1315 } else { 1316 printk(KERN_DEBUG 1317 "%s: deleting unreferenced inode %lu\n", 1318 __FUNCTION__, inode->i_ino); 1319 jbd_debug(2, "deleting unreferenced inode %lu\n", 1320 inode->i_ino); 1321 nr_orphans++; 1322 } 1323 iput(inode); /* The delete magic happens here! */ 1324 } 1325 1326#define PLURAL(x) (x), ((x)==1) ? "" : "s" 1327 1328 if (nr_orphans) 1329 printk(KERN_INFO "EXT3-fs: %s: %d orphan inode%s deleted\n", 1330 sb->s_id, PLURAL(nr_orphans)); 1331 if (nr_truncates) 1332 printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n", 1333 sb->s_id, PLURAL(nr_truncates)); 1334#ifdef CONFIG_QUOTA 1335 /* Turn quotas off */ 1336 for (i = 0; i < MAXQUOTAS; i++) { 1337 if (sb_dqopt(sb)->files[i]) 1338 vfs_quota_off(sb, i); 1339 } 1340#endif 1341 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1342} 1343 1344#define log2(n) ffz(~(n)) 1345 1346/* 1347 * Maximal file size. There is a direct, and {,double-,triple-}indirect 1348 * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. 1349 * We need to be 1 filesystem block less than the 2^32 sector limit. 1350 */ 1351static loff_t ext3_max_size(int bits) 1352{ 1353 loff_t res = EXT3_NDIR_BLOCKS; 1354 /* This constant is calculated to be the largest file size for a 1355 * dense, 4k-blocksize file such that the total number of 1356 * sectors in the file, including data and all indirect blocks, 1357 * does not exceed 2^32. */ 1358 const loff_t upper_limit = 0x1ff7fffd000LL; 1359 1360 res += 1LL << (bits-2); 1361 res += 1LL << (2*(bits-2)); 1362 res += 1LL << (3*(bits-2)); 1363 res <<= bits; 1364 if (res > upper_limit) 1365 res = upper_limit; 1366 return res; 1367} 1368 1369static ext3_fsblk_t descriptor_loc(struct super_block *sb, 1370 ext3_fsblk_t logic_sb_block, 1371 int nr) 1372{ 1373 struct ext3_sb_info *sbi = EXT3_SB(sb); 1374 unsigned long bg, first_meta_bg; 1375 int has_super = 0; 1376 1377 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 1378 1379 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || 1380 nr < first_meta_bg) 1381 return (logic_sb_block + nr + 1); 1382 bg = sbi->s_desc_per_block * nr; 1383 if (ext3_bg_has_super(sb, bg)) 1384 has_super = 1; 1385 return (has_super + ext3_group_first_block_no(sb, bg)); 1386} 1387 1388 1389static int ext3_fill_super (struct super_block *sb, void *data, int silent) 1390{ 1391 struct buffer_head * bh; 1392 struct ext3_super_block *es = NULL; 1393 struct ext3_sb_info *sbi; 1394 ext3_fsblk_t block; 1395 ext3_fsblk_t sb_block = get_sb_block(&data); 1396 ext3_fsblk_t logic_sb_block; 1397 unsigned long offset = 0; 1398 unsigned int journal_inum = 0; 1399 unsigned long journal_devnum = 0; 1400 unsigned long def_mount_opts; 1401 struct inode *root; 1402 int blocksize; 1403 int hblock; 1404 int db_count; 1405 int i; 1406 int needs_recovery; 1407 __le32 features; 1408 1409 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 1410 if (!sbi) 1411 return -ENOMEM; 1412 sb->s_fs_info = sbi; 1413 sbi->s_mount_opt = 0; 1414 sbi->s_resuid = EXT3_DEF_RESUID; 1415 sbi->s_resgid = EXT3_DEF_RESGID; 1416 1417 unlock_kernel(); 1418 1419 blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE); 1420 if (!blocksize) { 1421 printk(KERN_ERR "EXT3-fs: unable to set blocksize\n"); 1422 goto out_fail; 1423 } 1424 1425 /* 1426 * The ext3 superblock will not be buffer aligned for other than 1kB 1427 * block sizes. We need to calculate the offset from buffer start. 1428 */ 1429 if (blocksize != EXT3_MIN_BLOCK_SIZE) { 1430 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; 1431 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 1432 } else { 1433 logic_sb_block = sb_block; 1434 } 1435 1436 if (!(bh = sb_bread(sb, logic_sb_block))) { 1437 printk (KERN_ERR "EXT3-fs: unable to read superblock\n"); 1438 goto out_fail; 1439 } 1440 /* 1441 * Note: s_es must be initialized as soon as possible because 1442 * some ext3 macro-instructions depend on its value 1443 */ 1444 es = (struct ext3_super_block *) (((char *)bh->b_data) + offset); 1445 sbi->s_es = es; 1446 sb->s_magic = le16_to_cpu(es->s_magic); 1447 if (sb->s_magic != EXT3_SUPER_MAGIC) 1448 goto cantfind_ext3; 1449 1450 /* Set defaults before we parse the mount options */ 1451 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 1452 if (def_mount_opts & EXT3_DEFM_DEBUG) 1453 set_opt(sbi->s_mount_opt, DEBUG); 1454 if (def_mount_opts & EXT3_DEFM_BSDGROUPS) 1455 set_opt(sbi->s_mount_opt, GRPID); 1456 if (def_mount_opts & EXT3_DEFM_UID16) 1457 set_opt(sbi->s_mount_opt, NO_UID32); 1458 if (def_mount_opts & EXT3_DEFM_XATTR_USER) 1459 set_opt(sbi->s_mount_opt, XATTR_USER); 1460 if (def_mount_opts & EXT3_DEFM_ACL) 1461 set_opt(sbi->s_mount_opt, POSIX_ACL); 1462 if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA) 1463 sbi->s_mount_opt |= EXT3_MOUNT_JOURNAL_DATA; 1464 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED) 1465 sbi->s_mount_opt |= EXT3_MOUNT_ORDERED_DATA; 1466 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK) 1467 sbi->s_mount_opt |= EXT3_MOUNT_WRITEBACK_DATA; 1468 1469 if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC) 1470 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1471 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO) 1472 set_opt(sbi->s_mount_opt, ERRORS_RO); 1473 else 1474 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1475 1476 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 1477 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 1478 1479 set_opt(sbi->s_mount_opt, RESERVATION); 1480 1481 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 1482 NULL, 0)) 1483 goto failed_mount; 1484 1485 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 1486 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 1487 1488 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV && 1489 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) || 1490 EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 1491 EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U))) 1492 printk(KERN_WARNING 1493 "EXT3-fs warning: feature flags set on rev 0 fs, " 1494 "running e2fsck is recommended\n"); 1495 /* 1496 * Check feature flags regardless of the revision level, since we 1497 * previously didn't change the revision level when setting the flags, 1498 * so there is a chance incompat flags are set on a rev 0 filesystem. 1499 */ 1500 features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP); 1501 if (features) { 1502 printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of " 1503 "unsupported optional features (%x).\n", 1504 sb->s_id, le32_to_cpu(features)); 1505 goto failed_mount; 1506 } 1507 features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP); 1508 if (!(sb->s_flags & MS_RDONLY) && features) { 1509 printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of " 1510 "unsupported optional features (%x).\n", 1511 sb->s_id, le32_to_cpu(features)); 1512 goto failed_mount; 1513 } 1514 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 1515 1516 if (blocksize < EXT3_MIN_BLOCK_SIZE || 1517 blocksize > EXT3_MAX_BLOCK_SIZE) { 1518 printk(KERN_ERR 1519 "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n", 1520 blocksize, sb->s_id); 1521 goto failed_mount; 1522 } 1523 1524 hblock = bdev_hardsect_size(sb->s_bdev); 1525 if (sb->s_blocksize != blocksize) { 1526 /* 1527 * Make sure the blocksize for the filesystem is larger 1528 * than the hardware sectorsize for the machine. 1529 */ 1530 if (blocksize < hblock) { 1531 printk(KERN_ERR "EXT3-fs: blocksize %d too small for " 1532 "device blocksize %d.\n", blocksize, hblock); 1533 goto failed_mount; 1534 } 1535 1536 brelse (bh); 1537 sb_set_blocksize(sb, blocksize); 1538 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; 1539 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 1540 bh = sb_bread(sb, logic_sb_block); 1541 if (!bh) { 1542 printk(KERN_ERR 1543 "EXT3-fs: Can't read superblock on 2nd try.\n"); 1544 goto failed_mount; 1545 } 1546 es = (struct ext3_super_block *)(((char *)bh->b_data) + offset); 1547 sbi->s_es = es; 1548 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) { 1549 printk (KERN_ERR 1550 "EXT3-fs: Magic mismatch, very weird !\n"); 1551 goto failed_mount; 1552 } 1553 } 1554 1555 sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits); 1556 1557 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) { 1558 sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE; 1559 sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO; 1560 } else { 1561 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 1562 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 1563 if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || 1564 (sbi->s_inode_size & (sbi->s_inode_size - 1)) || 1565 (sbi->s_inode_size > blocksize)) { 1566 printk (KERN_ERR 1567 "EXT3-fs: unsupported inode size: %d\n", 1568 sbi->s_inode_size); 1569 goto failed_mount; 1570 } 1571 } 1572 sbi->s_frag_size = EXT3_MIN_FRAG_SIZE << 1573 le32_to_cpu(es->s_log_frag_size); 1574 if (blocksize != sbi->s_frag_size) { 1575 printk(KERN_ERR 1576 "EXT3-fs: fragsize %lu != blocksize %u (unsupported)\n", 1577 sbi->s_frag_size, blocksize); 1578 goto failed_mount; 1579 } 1580 sbi->s_frags_per_block = 1; 1581 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 1582 sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); 1583 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 1584 if (EXT3_INODE_SIZE(sb) == 0) 1585 goto cantfind_ext3; 1586 sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb); 1587 if (sbi->s_inodes_per_block == 0) 1588 goto cantfind_ext3; 1589 sbi->s_itb_per_group = sbi->s_inodes_per_group / 1590 sbi->s_inodes_per_block; 1591 sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc); 1592 sbi->s_sbh = bh; 1593 sbi->s_mount_state = le16_to_cpu(es->s_state); 1594 sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb)); 1595 sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb)); 1596 for (i=0; i < 4; i++) 1597 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 1598 sbi->s_def_hash_version = es->s_def_hash_version; 1599 1600 if (sbi->s_blocks_per_group > blocksize * 8) { 1601 printk (KERN_ERR 1602 "EXT3-fs: #blocks per group too big: %lu\n", 1603 sbi->s_blocks_per_group); 1604 goto failed_mount; 1605 } 1606 if (sbi->s_frags_per_group > blocksize * 8) { 1607 printk (KERN_ERR 1608 "EXT3-fs: #fragments per group too big: %lu\n", 1609 sbi->s_frags_per_group); 1610 goto failed_mount; 1611 } 1612 if (sbi->s_inodes_per_group > blocksize * 8) { 1613 printk (KERN_ERR 1614 "EXT3-fs: #inodes per group too big: %lu\n", 1615 sbi->s_inodes_per_group); 1616 goto failed_mount; 1617 } 1618 1619 if (le32_to_cpu(es->s_blocks_count) > 1620 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 1621 printk(KERN_ERR "EXT3-fs: filesystem on %s:" 1622 " too large to mount safely\n", sb->s_id); 1623 if (sizeof(sector_t) < 8) 1624 printk(KERN_WARNING "EXT3-fs: CONFIG_LBD not " 1625 "enabled\n"); 1626 goto failed_mount; 1627 } 1628 1629 if (EXT3_BLOCKS_PER_GROUP(sb) == 0) 1630 goto cantfind_ext3; 1631 sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - 1632 le32_to_cpu(es->s_first_data_block) - 1) 1633 / EXT3_BLOCKS_PER_GROUP(sb)) + 1; 1634 db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) / 1635 EXT3_DESC_PER_BLOCK(sb); 1636 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), 1637 GFP_KERNEL); 1638 if (sbi->s_group_desc == NULL) { 1639 printk (KERN_ERR "EXT3-fs: not enough memory\n"); 1640 goto failed_mount; 1641 } 1642 1643 bgl_lock_init(&sbi->s_blockgroup_lock); 1644 1645 for (i = 0; i < db_count; i++) { 1646 block = descriptor_loc(sb, logic_sb_block, i); 1647 sbi->s_group_desc[i] = sb_bread(sb, block); 1648 if (!sbi->s_group_desc[i]) { 1649 printk (KERN_ERR "EXT3-fs: " 1650 "can't read group descriptor %d\n", i); 1651 db_count = i; 1652 goto failed_mount2; 1653 } 1654 } 1655 if (!ext3_check_descriptors (sb)) { 1656 printk(KERN_ERR "EXT3-fs: group descriptors corrupted!\n"); 1657 goto failed_mount2; 1658 } 1659 sbi->s_gdb_count = db_count; 1660 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 1661 spin_lock_init(&sbi->s_next_gen_lock); 1662 1663 percpu_counter_init(&sbi->s_freeblocks_counter, 1664 ext3_count_free_blocks(sb)); 1665 percpu_counter_init(&sbi->s_freeinodes_counter, 1666 ext3_count_free_inodes(sb)); 1667 percpu_counter_init(&sbi->s_dirs_counter, 1668 ext3_count_dirs(sb)); 1669 1670 /* per fileystem reservation list head & lock */ 1671 spin_lock_init(&sbi->s_rsv_window_lock); 1672 sbi->s_rsv_window_root = RB_ROOT; 1673 /* Add a single, static dummy reservation to the start of the 1674 * reservation window list --- it gives us a placeholder for 1675 * append-at-start-of-list which makes the allocation logic 1676 * _much_ simpler. */ 1677 sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 1678 sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 1679 sbi->s_rsv_window_head.rsv_alloc_hit = 0; 1680 sbi->s_rsv_window_head.rsv_goal_size = 0; 1681 ext3_rsv_window_add(sb, &sbi->s_rsv_window_head); 1682 1683 /* 1684 * set up enough so that it can read an inode 1685 */ 1686 sb->s_op = &ext3_sops; 1687 sb->s_export_op = &ext3_export_ops; 1688 sb->s_xattr = ext3_xattr_handlers; 1689#ifdef CONFIG_QUOTA 1690 sb->s_qcop = &ext3_qctl_operations; 1691 sb->dq_op = &ext3_quota_operations; 1692#endif 1693 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1694 1695 sb->s_root = NULL; 1696 1697 needs_recovery = (es->s_last_orphan != 0 || 1698 EXT3_HAS_INCOMPAT_FEATURE(sb, 1699 EXT3_FEATURE_INCOMPAT_RECOVER)); 1700 1701 /* 1702 * The first inode we look at is the journal inode. Don't try 1703 * root first: it may be modified in the journal! 1704 */ 1705 if (!test_opt(sb, NOLOAD) && 1706 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { 1707 if (ext3_load_journal(sb, es, journal_devnum)) 1708 goto failed_mount3; 1709 } else if (journal_inum) { 1710 if (ext3_create_journal(sb, es, journal_inum)) 1711 goto failed_mount3; 1712 } else { 1713 if (!silent) 1714 printk (KERN_ERR 1715 "ext3: No journal on filesystem on %s\n", 1716 sb->s_id); 1717 goto failed_mount3; 1718 } 1719 1720 /* We have now updated the journal if required, so we can 1721 * validate the data journaling mode. */ 1722 switch (test_opt(sb, DATA_FLAGS)) { 1723 case 0: 1724 /* No mode set, assume a default based on the journal 1725 capabilities: ORDERED_DATA if the journal can 1726 cope, else JOURNAL_DATA */ 1727 if (journal_check_available_features 1728 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) 1729 set_opt(sbi->s_mount_opt, ORDERED_DATA); 1730 else 1731 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 1732 break; 1733 1734 case EXT3_MOUNT_ORDERED_DATA: 1735 case EXT3_MOUNT_WRITEBACK_DATA: 1736 if (!journal_check_available_features 1737 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) { 1738 printk(KERN_ERR "EXT3-fs: Journal does not support " 1739 "requested data journaling mode\n"); 1740 goto failed_mount4; 1741 } 1742 default: 1743 break; 1744 } 1745 1746 if (test_opt(sb, NOBH)) { 1747 if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) { 1748 printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - " 1749 "its supported only with writeback mode\n"); 1750 clear_opt(sbi->s_mount_opt, NOBH); 1751 } 1752 } 1753 /* 1754 * The journal_load will have done any necessary log recovery, 1755 * so we can safely mount the rest of the filesystem now. 1756 */ 1757 1758 root = iget(sb, EXT3_ROOT_INO); 1759 sb->s_root = d_alloc_root(root); 1760 if (!sb->s_root) { 1761 printk(KERN_ERR "EXT3-fs: get root inode failed\n"); 1762 iput(root); 1763 goto failed_mount4; 1764 } 1765 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 1766 dput(sb->s_root); 1767 sb->s_root = NULL; 1768 printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n"); 1769 goto failed_mount4; 1770 } 1771 1772 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); 1773 /* 1774 * akpm: core read_super() calls in here with the superblock locked. 1775 * That deadlocks, because orphan cleanup needs to lock the superblock 1776 * in numerous places. Here we just pop the lock - it's relatively 1777 * harmless, because we are now ready to accept write_super() requests, 1778 * and aviro says that's the only reason for hanging onto the 1779 * superblock lock. 1780 */ 1781 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; 1782 ext3_orphan_cleanup(sb, es); 1783 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; 1784 if (needs_recovery) 1785 printk (KERN_INFO "EXT3-fs: recovery complete.\n"); 1786 ext3_mark_recovery_complete(sb, es); 1787 printk (KERN_INFO "EXT3-fs: mounted filesystem with %s data mode.\n", 1788 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": 1789 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": 1790 "writeback"); 1791 1792 lock_kernel(); 1793 return 0; 1794 1795cantfind_ext3: 1796 if (!silent) 1797 printk(KERN_ERR "VFS: Can't find ext3 filesystem on dev %s.\n", 1798 sb->s_id); 1799 goto failed_mount; 1800 1801failed_mount4: 1802 journal_destroy(sbi->s_journal); 1803failed_mount3: 1804 percpu_counter_destroy(&sbi->s_freeblocks_counter); 1805 percpu_counter_destroy(&sbi->s_freeinodes_counter); 1806 percpu_counter_destroy(&sbi->s_dirs_counter); 1807failed_mount2: 1808 for (i = 0; i < db_count; i++) 1809 brelse(sbi->s_group_desc[i]); 1810 kfree(sbi->s_group_desc); 1811failed_mount: 1812#ifdef CONFIG_QUOTA 1813 for (i = 0; i < MAXQUOTAS; i++) 1814 kfree(sbi->s_qf_names[i]); 1815#endif 1816 ext3_blkdev_remove(sbi); 1817 brelse(bh); 1818out_fail: 1819 sb->s_fs_info = NULL; 1820 kfree(sbi); 1821 lock_kernel(); 1822 return -EINVAL; 1823} 1824 1825/* 1826 * Setup any per-fs journal parameters now. We'll do this both on 1827 * initial mount, once the journal has been initialised but before we've 1828 * done any recovery; and again on any subsequent remount. 1829 */ 1830static void ext3_init_journal_params(struct super_block *sb, journal_t *journal) 1831{ 1832 struct ext3_sb_info *sbi = EXT3_SB(sb); 1833 1834 if (sbi->s_commit_interval) 1835 journal->j_commit_interval = sbi->s_commit_interval; 1836 /* We could also set up an ext3-specific default for the commit 1837 * interval here, but for now we'll just fall back to the jbd 1838 * default. */ 1839 1840 spin_lock(&journal->j_state_lock); 1841 if (test_opt(sb, BARRIER)) 1842 journal->j_flags |= JFS_BARRIER; 1843 else 1844 journal->j_flags &= ~JFS_BARRIER; 1845 spin_unlock(&journal->j_state_lock); 1846} 1847 1848static journal_t *ext3_get_journal(struct super_block *sb, 1849 unsigned int journal_inum) 1850{ 1851 struct inode *journal_inode; 1852 journal_t *journal; 1853 1854 /* First, test for the existence of a valid inode on disk. Bad 1855 * things happen if we iget() an unused inode, as the subsequent 1856 * iput() will try to delete it. */ 1857 1858 journal_inode = iget(sb, journal_inum); 1859 if (!journal_inode) { 1860 printk(KERN_ERR "EXT3-fs: no journal found.\n"); 1861 return NULL; 1862 } 1863 if (!journal_inode->i_nlink) { 1864 make_bad_inode(journal_inode); 1865 iput(journal_inode); 1866 printk(KERN_ERR "EXT3-fs: journal inode is deleted.\n"); 1867 return NULL; 1868 } 1869 1870 jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", 1871 journal_inode, journal_inode->i_size); 1872 if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) { 1873 printk(KERN_ERR "EXT3-fs: invalid journal inode.\n"); 1874 iput(journal_inode); 1875 return NULL; 1876 } 1877 1878 journal = journal_init_inode(journal_inode); 1879 if (!journal) { 1880 printk(KERN_ERR "EXT3-fs: Could not load journal inode\n"); 1881 iput(journal_inode); 1882 return NULL; 1883 } 1884 journal->j_private = sb; 1885 ext3_init_journal_params(sb, journal); 1886 return journal; 1887} 1888 1889static journal_t *ext3_get_dev_journal(struct super_block *sb, 1890 dev_t j_dev) 1891{ 1892 struct buffer_head * bh; 1893 journal_t *journal; 1894 ext3_fsblk_t start; 1895 ext3_fsblk_t len; 1896 int hblock, blocksize; 1897 ext3_fsblk_t sb_block; 1898 unsigned long offset; 1899 struct ext3_super_block * es; 1900 struct block_device *bdev; 1901 1902 bdev = ext3_blkdev_get(j_dev); 1903 if (bdev == NULL) 1904 return NULL; 1905 1906 if (bd_claim(bdev, sb)) { 1907 printk(KERN_ERR 1908 "EXT3: failed to claim external journal device.\n"); 1909 blkdev_put(bdev); 1910 return NULL; 1911 } 1912 1913 blocksize = sb->s_blocksize; 1914 hblock = bdev_hardsect_size(bdev); 1915 if (blocksize < hblock) { 1916 printk(KERN_ERR 1917 "EXT3-fs: blocksize too small for journal device.\n"); 1918 goto out_bdev; 1919 } 1920 1921 sb_block = EXT3_MIN_BLOCK_SIZE / blocksize; 1922 offset = EXT3_MIN_BLOCK_SIZE % blocksize; 1923 set_blocksize(bdev, blocksize); 1924 if (!(bh = __bread(bdev, sb_block, blocksize))) { 1925 printk(KERN_ERR "EXT3-fs: couldn't read superblock of " 1926 "external journal\n"); 1927 goto out_bdev; 1928 } 1929 1930 es = (struct ext3_super_block *) (((char *)bh->b_data) + offset); 1931 if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) || 1932 !(le32_to_cpu(es->s_feature_incompat) & 1933 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) { 1934 printk(KERN_ERR "EXT3-fs: external journal has " 1935 "bad superblock\n"); 1936 brelse(bh); 1937 goto out_bdev; 1938 } 1939 1940 if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 1941 printk(KERN_ERR "EXT3-fs: journal UUID does not match\n"); 1942 brelse(bh); 1943 goto out_bdev; 1944 } 1945 1946 len = le32_to_cpu(es->s_blocks_count); 1947 start = sb_block + 1; 1948 brelse(bh); /* we're done with the superblock */ 1949 1950 journal = journal_init_dev(bdev, sb->s_bdev, 1951 start, len, blocksize); 1952 if (!journal) { 1953 printk(KERN_ERR "EXT3-fs: failed to create device journal\n"); 1954 goto out_bdev; 1955 } 1956 journal->j_private = sb; 1957 ll_rw_block(READ, 1, &journal->j_sb_buffer); 1958 wait_on_buffer(journal->j_sb_buffer); 1959 if (!buffer_uptodate(journal->j_sb_buffer)) { 1960 printk(KERN_ERR "EXT3-fs: I/O error on journal device\n"); 1961 goto out_journal; 1962 } 1963 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 1964 printk(KERN_ERR "EXT3-fs: External journal has more than one " 1965 "user (unsupported) - %d\n", 1966 be32_to_cpu(journal->j_superblock->s_nr_users)); 1967 goto out_journal; 1968 } 1969 EXT3_SB(sb)->journal_bdev = bdev; 1970 ext3_init_journal_params(sb, journal); 1971 return journal; 1972out_journal: 1973 journal_destroy(journal); 1974out_bdev: 1975 ext3_blkdev_put(bdev); 1976 return NULL; 1977} 1978 1979static int ext3_load_journal(struct super_block *sb, 1980 struct ext3_super_block *es, 1981 unsigned long journal_devnum) 1982{ 1983 journal_t *journal; 1984 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 1985 dev_t journal_dev; 1986 int err = 0; 1987 int really_read_only; 1988 1989 if (journal_devnum && 1990 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 1991 printk(KERN_INFO "EXT3-fs: external journal device major/minor " 1992 "numbers have changed\n"); 1993 journal_dev = new_decode_dev(journal_devnum); 1994 } else 1995 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 1996 1997 really_read_only = bdev_read_only(sb->s_bdev); 1998 1999 /* 2000 * Are we loading a blank journal or performing recovery after a 2001 * crash? For recovery, we need to check in advance whether we 2002 * can get read-write access to the device. 2003 */ 2004 2005 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) { 2006 if (sb->s_flags & MS_RDONLY) { 2007 printk(KERN_INFO "EXT3-fs: INFO: recovery " 2008 "required on readonly filesystem.\n"); 2009 if (really_read_only) { 2010 printk(KERN_ERR "EXT3-fs: write access " 2011 "unavailable, cannot proceed.\n"); 2012 return -EROFS; 2013 } 2014 printk (KERN_INFO "EXT3-fs: write access will " 2015 "be enabled during recovery.\n"); 2016 } 2017 } 2018 2019 if (journal_inum && journal_dev) { 2020 printk(KERN_ERR "EXT3-fs: filesystem has both journal " 2021 "and inode journals!\n"); 2022 return -EINVAL; 2023 } 2024 2025 if (journal_inum) { 2026 if (!(journal = ext3_get_journal(sb, journal_inum))) 2027 return -EINVAL; 2028 } else { 2029 if (!(journal = ext3_get_dev_journal(sb, journal_dev))) 2030 return -EINVAL; 2031 } 2032 2033 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 2034 err = journal_update_format(journal); 2035 if (err) { 2036 printk(KERN_ERR "EXT3-fs: error updating journal.\n"); 2037 journal_destroy(journal); 2038 return err; 2039 } 2040 } 2041 2042 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) 2043 err = journal_wipe(journal, !really_read_only); 2044 if (!err) 2045 err = journal_load(journal); 2046 2047 if (err) { 2048 printk(KERN_ERR "EXT3-fs: error loading journal.\n"); 2049 journal_destroy(journal); 2050 return err; 2051 } 2052 2053 EXT3_SB(sb)->s_journal = journal; 2054 ext3_clear_journal_err(sb, es); 2055 2056 if (journal_devnum && 2057 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2058 es->s_journal_dev = cpu_to_le32(journal_devnum); 2059 sb->s_dirt = 1; 2060 2061 /* Make sure we flush the recovery flag to disk. */ 2062 ext3_commit_super(sb, es, 1); 2063 } 2064 2065 return 0; 2066} 2067 2068static int ext3_create_journal(struct super_block * sb, 2069 struct ext3_super_block * es, 2070 unsigned int journal_inum) 2071{ 2072 journal_t *journal; 2073 2074 if (sb->s_flags & MS_RDONLY) { 2075 printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to " 2076 "create journal.\n"); 2077 return -EROFS; 2078 } 2079 2080 if (!(journal = ext3_get_journal(sb, journal_inum))) 2081 return -EINVAL; 2082 2083 printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n", 2084 journal_inum); 2085 2086 if (journal_create(journal)) { 2087 printk(KERN_ERR "EXT3-fs: error creating journal.\n"); 2088 journal_destroy(journal); 2089 return -EIO; 2090 } 2091 2092 EXT3_SB(sb)->s_journal = journal; 2093 2094 ext3_update_dynamic_rev(sb); 2095 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2096 EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL); 2097 2098 es->s_journal_inum = cpu_to_le32(journal_inum); 2099 sb->s_dirt = 1; 2100 2101 /* Make sure we flush the recovery flag to disk. */ 2102 ext3_commit_super(sb, es, 1); 2103 2104 return 0; 2105} 2106 2107static void ext3_commit_super (struct super_block * sb, 2108 struct ext3_super_block * es, 2109 int sync) 2110{ 2111 struct buffer_head *sbh = EXT3_SB(sb)->s_sbh; 2112 2113 if (!sbh) 2114 return; 2115 es->s_wtime = cpu_to_le32(get_seconds()); 2116 es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb)); 2117 es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb)); 2118 BUFFER_TRACE(sbh, "marking dirty"); 2119 mark_buffer_dirty(sbh); 2120 if (sync) 2121 sync_dirty_buffer(sbh); 2122} 2123 2124 2125/* 2126 * Have we just finished recovery? If so, and if we are mounting (or 2127 * remounting) the filesystem readonly, then we will end up with a 2128 * consistent fs on disk. Record that fact. 2129 */ 2130static void ext3_mark_recovery_complete(struct super_block * sb, 2131 struct ext3_super_block * es) 2132{ 2133 journal_t *journal = EXT3_SB(sb)->s_journal; 2134 2135 journal_lock_updates(journal); 2136 journal_flush(journal); 2137 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && 2138 sb->s_flags & MS_RDONLY) { 2139 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2140 sb->s_dirt = 0; 2141 ext3_commit_super(sb, es, 1); 2142 } 2143 journal_unlock_updates(journal); 2144} 2145 2146/* 2147 * If we are mounting (or read-write remounting) a filesystem whose journal 2148 * has recorded an error from a previous lifetime, move that error to the 2149 * main filesystem now. 2150 */ 2151static void ext3_clear_journal_err(struct super_block * sb, 2152 struct ext3_super_block * es) 2153{ 2154 journal_t *journal; 2155 int j_errno; 2156 const char *errstr; 2157 2158 journal = EXT3_SB(sb)->s_journal; 2159 2160 /* 2161 * Now check for any error status which may have been recorded in the 2162 * journal by a prior ext3_error() or ext3_abort() 2163 */ 2164 2165 j_errno = journal_errno(journal); 2166 if (j_errno) { 2167 char nbuf[16]; 2168 2169 errstr = ext3_decode_error(sb, j_errno, nbuf); 2170 ext3_warning(sb, __FUNCTION__, "Filesystem error recorded " 2171 "from previous mount: %s", errstr); 2172 ext3_warning(sb, __FUNCTION__, "Marking fs in need of " 2173 "filesystem check."); 2174 2175 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 2176 es->s_state |= cpu_to_le16(EXT3_ERROR_FS); 2177 ext3_commit_super (sb, es, 1); 2178 2179 journal_clear_err(journal); 2180 } 2181} 2182 2183/* 2184 * Force the running and committing transactions to commit, 2185 * and wait on the commit. 2186 */ 2187int ext3_force_commit(struct super_block *sb) 2188{ 2189 journal_t *journal; 2190 int ret; 2191 2192 if (sb->s_flags & MS_RDONLY) 2193 return 0; 2194 2195 journal = EXT3_SB(sb)->s_journal; 2196 sb->s_dirt = 0; 2197 ret = ext3_journal_force_commit(journal); 2198 return ret; 2199} 2200 2201/* 2202 * Ext3 always journals updates to the superblock itself, so we don't 2203 * have to propagate any other updates to the superblock on disk at this 2204 * point. Just start an async writeback to get the buffers on their way 2205 * to the disk. 2206 * 2207 * This implicitly triggers the writebehind on sync(). 2208 */ 2209 2210static void ext3_write_super (struct super_block * sb) 2211{ 2212 if (mutex_trylock(&sb->s_lock) != 0) 2213 BUG(); 2214 sb->s_dirt = 0; 2215} 2216 2217static int ext3_sync_fs(struct super_block *sb, int wait) 2218{ 2219 tid_t target; 2220 2221 sb->s_dirt = 0; 2222 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { 2223 if (wait) 2224 log_wait_commit(EXT3_SB(sb)->s_journal, target); 2225 } 2226 return 0; 2227} 2228 2229/* 2230 * LVM calls this function before a (read-only) snapshot is created. This 2231 * gives us a chance to flush the journal completely and mark the fs clean. 2232 */ 2233static void ext3_write_super_lockfs(struct super_block *sb) 2234{ 2235 sb->s_dirt = 0; 2236 2237 if (!(sb->s_flags & MS_RDONLY)) { 2238 journal_t *journal = EXT3_SB(sb)->s_journal; 2239 2240 /* Now we set up the journal barrier. */ 2241 journal_lock_updates(journal); 2242 journal_flush(journal); 2243 2244 /* Journal blocked and flushed, clear needs_recovery flag. */ 2245 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2246 ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); 2247 } 2248} 2249 2250/* 2251 * Called by LVM after the snapshot is done. We need to reset the RECOVER 2252 * flag here, even though the filesystem is not technically dirty yet. 2253 */ 2254static void ext3_unlockfs(struct super_block *sb) 2255{ 2256 if (!(sb->s_flags & MS_RDONLY)) { 2257 lock_super(sb); 2258 /* Reser the needs_recovery flag before the fs is unlocked. */ 2259 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2260 ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); 2261 unlock_super(sb); 2262 journal_unlock_updates(EXT3_SB(sb)->s_journal); 2263 } 2264} 2265 2266static int ext3_remount (struct super_block * sb, int * flags, char * data) 2267{ 2268 struct ext3_super_block * es; 2269 struct ext3_sb_info *sbi = EXT3_SB(sb); 2270 ext3_fsblk_t n_blocks_count = 0; 2271 unsigned long old_sb_flags; 2272 struct ext3_mount_options old_opts; 2273 int err; 2274#ifdef CONFIG_QUOTA 2275 int i; 2276#endif 2277 2278 /* Store the original options */ 2279 old_sb_flags = sb->s_flags; 2280 old_opts.s_mount_opt = sbi->s_mount_opt; 2281 old_opts.s_resuid = sbi->s_resuid; 2282 old_opts.s_resgid = sbi->s_resgid; 2283 old_opts.s_commit_interval = sbi->s_commit_interval; 2284#ifdef CONFIG_QUOTA 2285 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 2286 for (i = 0; i < MAXQUOTAS; i++) 2287 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 2288#endif 2289 2290 /* 2291 * Allow the "check" option to be passed as a remount option. 2292 */ 2293 if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { 2294 err = -EINVAL; 2295 goto restore_opts; 2296 } 2297 2298 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) 2299 ext3_abort(sb, __FUNCTION__, "Abort forced by user"); 2300 2301 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2302 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2303 2304 es = sbi->s_es; 2305 2306 ext3_init_journal_params(sb, sbi->s_journal); 2307 2308 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 2309 n_blocks_count > le32_to_cpu(es->s_blocks_count)) { 2310 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) { 2311 err = -EROFS; 2312 goto restore_opts; 2313 } 2314 2315 if (*flags & MS_RDONLY) { 2316 /* 2317 * First of all, the unconditional stuff we have to do 2318 * to disable replay of the journal when we next remount 2319 */ 2320 sb->s_flags |= MS_RDONLY; 2321 2322 /* 2323 * OK, test if we are remounting a valid rw partition 2324 * readonly, and if so set the rdonly flag and then 2325 * mark the partition as valid again. 2326 */ 2327 if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) && 2328 (sbi->s_mount_state & EXT3_VALID_FS)) 2329 es->s_state = cpu_to_le16(sbi->s_mount_state); 2330 2331 ext3_mark_recovery_complete(sb, es); 2332 } else { 2333 __le32 ret; 2334 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb, 2335 ~EXT3_FEATURE_RO_COMPAT_SUPP))) { 2336 printk(KERN_WARNING "EXT3-fs: %s: couldn't " 2337 "remount RDWR because of unsupported " 2338 "optional features (%x).\n", 2339 sb->s_id, le32_to_cpu(ret)); 2340 err = -EROFS; 2341 goto restore_opts; 2342 } 2343 /* 2344 * Mounting a RDONLY partition read-write, so reread 2345 * and store the current valid flag. (It may have 2346 * been changed by e2fsck since we originally mounted 2347 * the partition.) 2348 */ 2349 ext3_clear_journal_err(sb, es); 2350 sbi->s_mount_state = le16_to_cpu(es->s_state); 2351 if ((err = ext3_group_extend(sb, es, n_blocks_count))) 2352 goto restore_opts; 2353 if (!ext3_setup_super (sb, es, 0)) 2354 sb->s_flags &= ~MS_RDONLY; 2355 } 2356 } 2357#ifdef CONFIG_QUOTA 2358 /* Release old quota file names */ 2359 for (i = 0; i < MAXQUOTAS; i++) 2360 if (old_opts.s_qf_names[i] && 2361 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 2362 kfree(old_opts.s_qf_names[i]); 2363#endif 2364 return 0; 2365restore_opts: 2366 sb->s_flags = old_sb_flags; 2367 sbi->s_mount_opt = old_opts.s_mount_opt; 2368 sbi->s_resuid = old_opts.s_resuid; 2369 sbi->s_resgid = old_opts.s_resgid; 2370 sbi->s_commit_interval = old_opts.s_commit_interval; 2371#ifdef CONFIG_QUOTA 2372 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 2373 for (i = 0; i < MAXQUOTAS; i++) { 2374 if (sbi->s_qf_names[i] && 2375 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 2376 kfree(sbi->s_qf_names[i]); 2377 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 2378 } 2379#endif 2380 return err; 2381} 2382 2383static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) 2384{ 2385 struct super_block *sb = dentry->d_sb; 2386 struct ext3_sb_info *sbi = EXT3_SB(sb); 2387 struct ext3_super_block *es = sbi->s_es; 2388 ext3_fsblk_t overhead; 2389 int i; 2390 2391 if (test_opt (sb, MINIX_DF)) 2392 overhead = 0; 2393 else { 2394 unsigned long ngroups; 2395 ngroups = EXT3_SB(sb)->s_groups_count; 2396 smp_rmb(); 2397 2398 /* 2399 * Compute the overhead (FS structures) 2400 */ 2401 2402 /* 2403 * All of the blocks before first_data_block are 2404 * overhead 2405 */ 2406 overhead = le32_to_cpu(es->s_first_data_block); 2407 2408 /* 2409 * Add the overhead attributed to the superblock and 2410 * block group descriptors. If the sparse superblocks 2411 * feature is turned on, then not all groups have this. 2412 */ 2413 for (i = 0; i < ngroups; i++) { 2414 overhead += ext3_bg_has_super(sb, i) + 2415 ext3_bg_num_gdb(sb, i); 2416 cond_resched(); 2417 } 2418 2419 /* 2420 * Every block group has an inode bitmap, a block 2421 * bitmap, and an inode table. 2422 */ 2423 overhead += (ngroups * (2 + EXT3_SB(sb)->s_itb_per_group)); 2424 } 2425 2426 buf->f_type = EXT3_SUPER_MAGIC; 2427 buf->f_bsize = sb->s_blocksize; 2428 buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead; 2429 buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); 2430 buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); 2431 if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) 2432 buf->f_bavail = 0; 2433 buf->f_files = le32_to_cpu(es->s_inodes_count); 2434 buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); 2435 buf->f_namelen = EXT3_NAME_LEN; 2436 return 0; 2437} 2438 2439/* Helper function for writing quotas on sync - we need to start transaction before quota file 2440 * is locked for write. Otherwise the are possible deadlocks: 2441 * Process 1 Process 2 2442 * ext3_create() quota_sync() 2443 * journal_start() write_dquot() 2444 * DQUOT_INIT() down(dqio_mutex) 2445 * down(dqio_mutex) journal_start() 2446 * 2447 */ 2448 2449#ifdef CONFIG_QUOTA 2450 2451static inline struct inode *dquot_to_inode(struct dquot *dquot) 2452{ 2453 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 2454} 2455 2456static int ext3_dquot_initialize(struct inode *inode, int type) 2457{ 2458 handle_t *handle; 2459 int ret, err; 2460 2461 /* We may create quota structure so we need to reserve enough blocks */ 2462 handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)); 2463 if (IS_ERR(handle)) 2464 return PTR_ERR(handle); 2465 ret = dquot_initialize(inode, type); 2466 err = ext3_journal_stop(handle); 2467 if (!ret) 2468 ret = err; 2469 return ret; 2470} 2471 2472static int ext3_dquot_drop(struct inode *inode) 2473{ 2474 handle_t *handle; 2475 int ret, err; 2476 2477 /* We may delete quota structure so we need to reserve enough blocks */ 2478 handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb)); 2479 if (IS_ERR(handle)) 2480 return PTR_ERR(handle); 2481 ret = dquot_drop(inode); 2482 err = ext3_journal_stop(handle); 2483 if (!ret) 2484 ret = err; 2485 return ret; 2486} 2487 2488static int ext3_write_dquot(struct dquot *dquot) 2489{ 2490 int ret, err; 2491 handle_t *handle; 2492 struct inode *inode; 2493 2494 inode = dquot_to_inode(dquot); 2495 handle = ext3_journal_start(inode, 2496 EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 2497 if (IS_ERR(handle)) 2498 return PTR_ERR(handle); 2499 ret = dquot_commit(dquot); 2500 err = ext3_journal_stop(handle); 2501 if (!ret) 2502 ret = err; 2503 return ret; 2504} 2505 2506static int ext3_acquire_dquot(struct dquot *dquot) 2507{ 2508 int ret, err; 2509 handle_t *handle; 2510 2511 handle = ext3_journal_start(dquot_to_inode(dquot), 2512 EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 2513 if (IS_ERR(handle)) 2514 return PTR_ERR(handle); 2515 ret = dquot_acquire(dquot); 2516 err = ext3_journal_stop(handle); 2517 if (!ret) 2518 ret = err; 2519 return ret; 2520} 2521 2522static int ext3_release_dquot(struct dquot *dquot) 2523{ 2524 int ret, err; 2525 handle_t *handle; 2526 2527 handle = ext3_journal_start(dquot_to_inode(dquot), 2528 EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 2529 if (IS_ERR(handle)) 2530 return PTR_ERR(handle); 2531 ret = dquot_release(dquot); 2532 err = ext3_journal_stop(handle); 2533 if (!ret) 2534 ret = err; 2535 return ret; 2536} 2537 2538static int ext3_mark_dquot_dirty(struct dquot *dquot) 2539{ 2540 /* Are we journalling quotas? */ 2541 if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 2542 EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 2543 dquot_mark_dquot_dirty(dquot); 2544 return ext3_write_dquot(dquot); 2545 } else { 2546 return dquot_mark_dquot_dirty(dquot); 2547 } 2548} 2549 2550static int ext3_write_info(struct super_block *sb, int type) 2551{ 2552 int ret, err; 2553 handle_t *handle; 2554 2555 /* Data block + inode block */ 2556 handle = ext3_journal_start(sb->s_root->d_inode, 2); 2557 if (IS_ERR(handle)) 2558 return PTR_ERR(handle); 2559 ret = dquot_commit_info(sb, type); 2560 err = ext3_journal_stop(handle); 2561 if (!ret) 2562 ret = err; 2563 return ret; 2564} 2565 2566/* 2567 * Turn on quotas during mount time - we need to find 2568 * the quota file and such... 2569 */ 2570static int ext3_quota_on_mount(struct super_block *sb, int type) 2571{ 2572 return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type], 2573 EXT3_SB(sb)->s_jquota_fmt, type); 2574} 2575 2576/* 2577 * Standard function to be called on quota_on 2578 */ 2579static int ext3_quota_on(struct super_block *sb, int type, int format_id, 2580 char *path) 2581{ 2582 int err; 2583 struct nameidata nd; 2584 2585 if (!test_opt(sb, QUOTA)) 2586 return -EINVAL; 2587 /* Not journalling quota? */ 2588 if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] && 2589 !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) 2590 return vfs_quota_on(sb, type, format_id, path); 2591 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2592 if (err) 2593 return err; 2594 /* Quotafile not on the same filesystem? */ 2595 if (nd.mnt->mnt_sb != sb) { 2596 path_release(&nd); 2597 return -EXDEV; 2598 } 2599 /* Quotafile not of fs root? */ 2600 if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode) 2601 printk(KERN_WARNING 2602 "EXT3-fs: Quota file not on filesystem root. " 2603 "Journalled quota will not work.\n"); 2604 path_release(&nd); 2605 return vfs_quota_on(sb, type, format_id, path); 2606} 2607 2608/* Read data from quotafile - avoid pagecache and such because we cannot afford 2609 * acquiring the locks... As quota files are never truncated and quota code 2610 * itself serializes the operations (and noone else should touch the files) 2611 * we don't have to be afraid of races */ 2612static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, 2613 size_t len, loff_t off) 2614{ 2615 struct inode *inode = sb_dqopt(sb)->files[type]; 2616 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); 2617 int err = 0; 2618 int offset = off & (sb->s_blocksize - 1); 2619 int tocopy; 2620 size_t toread; 2621 struct buffer_head *bh; 2622 loff_t i_size = i_size_read(inode); 2623 2624 if (off > i_size) 2625 return 0; 2626 if (off+len > i_size) 2627 len = i_size-off; 2628 toread = len; 2629 while (toread > 0) { 2630 tocopy = sb->s_blocksize - offset < toread ? 2631 sb->s_blocksize - offset : toread; 2632 bh = ext3_bread(NULL, inode, blk, 0, &err); 2633 if (err) 2634 return err; 2635 if (!bh) /* A hole? */ 2636 memset(data, 0, tocopy); 2637 else 2638 memcpy(data, bh->b_data+offset, tocopy); 2639 brelse(bh); 2640 offset = 0; 2641 toread -= tocopy; 2642 data += tocopy; 2643 blk++; 2644 } 2645 return len; 2646} 2647 2648/* Write to quotafile (we know the transaction is already started and has 2649 * enough credits) */ 2650static ssize_t ext3_quota_write(struct super_block *sb, int type, 2651 const char *data, size_t len, loff_t off) 2652{ 2653 struct inode *inode = sb_dqopt(sb)->files[type]; 2654 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); 2655 int err = 0; 2656 int offset = off & (sb->s_blocksize - 1); 2657 int tocopy; 2658 int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL; 2659 size_t towrite = len; 2660 struct buffer_head *bh; 2661 handle_t *handle = journal_current_handle(); 2662 2663 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 2664 while (towrite > 0) { 2665 tocopy = sb->s_blocksize - offset < towrite ? 2666 sb->s_blocksize - offset : towrite; 2667 bh = ext3_bread(handle, inode, blk, 1, &err); 2668 if (!bh) 2669 goto out; 2670 if (journal_quota) { 2671 err = ext3_journal_get_write_access(handle, bh); 2672 if (err) { 2673 brelse(bh); 2674 goto out; 2675 } 2676 } 2677 lock_buffer(bh); 2678 memcpy(bh->b_data+offset, data, tocopy); 2679 flush_dcache_page(bh->b_page); 2680 unlock_buffer(bh); 2681 if (journal_quota) 2682 err = ext3_journal_dirty_metadata(handle, bh); 2683 else { 2684 /* Always do at least ordered writes for quotas */ 2685 err = ext3_journal_dirty_data(handle, bh); 2686 mark_buffer_dirty(bh); 2687 } 2688 brelse(bh); 2689 if (err) 2690 goto out; 2691 offset = 0; 2692 towrite -= tocopy; 2693 data += tocopy; 2694 blk++; 2695 } 2696out: 2697 if (len == towrite) 2698 return err; 2699 if (inode->i_size < off+len-towrite) { 2700 i_size_write(inode, off+len-towrite); 2701 EXT3_I(inode)->i_disksize = inode->i_size; 2702 } 2703 inode->i_version++; 2704 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2705 ext3_mark_inode_dirty(handle, inode); 2706 mutex_unlock(&inode->i_mutex); 2707 return len - towrite; 2708} 2709 2710#endif 2711 2712static int ext3_get_sb(struct file_system_type *fs_type, 2713 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 2714{ 2715 return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt); 2716} 2717 2718static struct file_system_type ext3_fs_type = { 2719 .owner = THIS_MODULE, 2720 .name = "ext3", 2721 .get_sb = ext3_get_sb, 2722 .kill_sb = kill_block_super, 2723 .fs_flags = FS_REQUIRES_DEV, 2724}; 2725 2726static int __init init_ext3_fs(void) 2727{ 2728 int err = init_ext3_xattr(); 2729 if (err) 2730 return err; 2731 err = init_inodecache(); 2732 if (err) 2733 goto out1; 2734 err = register_filesystem(&ext3_fs_type); 2735 if (err) 2736 goto out; 2737 return 0; 2738out: 2739 destroy_inodecache(); 2740out1: 2741 exit_ext3_xattr(); 2742 return err; 2743} 2744 2745static void __exit exit_ext3_fs(void) 2746{ 2747 unregister_filesystem(&ext3_fs_type); 2748 destroy_inodecache(); 2749 exit_ext3_xattr(); 2750} 2751 2752MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 2753MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); 2754MODULE_LICENSE("GPL"); 2755module_init(init_ext3_fs) 2756module_exit(exit_ext3_fs)