fs/ext3/super.c at v3.6-rc4

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / fs / ext3 / super.c
at v3.6-rc4 3094 lines 86 kB view raw
wrap content
   1/*
   2 *  linux/fs/ext3/super.c
   3 *
   4 * Copyright (C) 1992, 1993, 1994, 1995
   5 * Remy Card (card@masi.ibp.fr)
   6 * Laboratoire MASI - Institut Blaise Pascal
   7 * Universite Pierre et Marie Curie (Paris VI)
   8 *
   9 *  from
  10 *
  11 *  linux/fs/minix/inode.c
  12 *
  13 *  Copyright (C) 1991, 1992  Linus Torvalds
  14 *
  15 *  Big-endian to little-endian byte-swapping/bitmaps by
  16 *        David S. Miller (davem@caip.rutgers.edu), 1995
  17 */
  18
  19#include <linux/module.h>
  20#include <linux/blkdev.h>
  21#include <linux/parser.h>
  22#include <linux/exportfs.h>
  23#include <linux/statfs.h>
  24#include <linux/random.h>
  25#include <linux/mount.h>
  26#include <linux/quotaops.h>
  27#include <linux/seq_file.h>
  28#include <linux/log2.h>
  29#include <linux/cleancache.h>
  30
  31#include <asm/uaccess.h>
  32
  33#define CREATE_TRACE_POINTS
  34
  35#include "ext3.h"
  36#include "xattr.h"
  37#include "acl.h"
  38#include "namei.h"
  39
  40#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
  41  #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
  42#else
  43  #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_WRITEBACK_DATA
  44#endif
  45
  46static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
  47			     unsigned long journal_devnum);
  48static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
  49			       unsigned int);
  50static int ext3_commit_super(struct super_block *sb,
  51			       struct ext3_super_block *es,
  52			       int sync);
  53static void ext3_mark_recovery_complete(struct super_block * sb,
  54					struct ext3_super_block * es);
  55static void ext3_clear_journal_err(struct super_block * sb,
  56				   struct ext3_super_block * es);
  57static int ext3_sync_fs(struct super_block *sb, int wait);
  58static const char *ext3_decode_error(struct super_block * sb, int errno,
  59				     char nbuf[16]);
  60static int ext3_remount (struct super_block * sb, int * flags, char * data);
  61static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
  62static int ext3_unfreeze(struct super_block *sb);
  63static int ext3_freeze(struct super_block *sb);
  64
  65/*
  66 * Wrappers for journal_start/end.
  67 */
  68handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
  69{
  70	journal_t *journal;
  71
  72	if (sb->s_flags & MS_RDONLY)
  73		return ERR_PTR(-EROFS);
  74
  75	/* Special case here: if the journal has aborted behind our
  76	 * backs (eg. EIO in the commit thread), then we still need to
  77	 * take the FS itself readonly cleanly. */
  78	journal = EXT3_SB(sb)->s_journal;
  79	if (is_journal_aborted(journal)) {
  80		ext3_abort(sb, __func__,
  81			   "Detected aborted journal");
  82		return ERR_PTR(-EROFS);
  83	}
  84
  85	return journal_start(journal, nblocks);
  86}
  87
  88int __ext3_journal_stop(const char *where, handle_t *handle)
  89{
  90	struct super_block *sb;
  91	int err;
  92	int rc;
  93
  94	sb = handle->h_transaction->t_journal->j_private;
  95	err = handle->h_err;
  96	rc = journal_stop(handle);
  97
  98	if (!err)
  99		err = rc;
 100	if (err)
 101		__ext3_std_error(sb, where, err);
 102	return err;
 103}
 104
 105void ext3_journal_abort_handle(const char *caller, const char *err_fn,
 106		struct buffer_head *bh, handle_t *handle, int err)
 107{
 108	char nbuf[16];
 109	const char *errstr = ext3_decode_error(NULL, err, nbuf);
 110
 111	if (bh)
 112		BUFFER_TRACE(bh, "abort");
 113
 114	if (!handle->h_err)
 115		handle->h_err = err;
 116
 117	if (is_handle_aborted(handle))
 118		return;
 119
 120	printk(KERN_ERR "EXT3-fs: %s: aborting transaction: %s in %s\n",
 121		caller, errstr, err_fn);
 122
 123	journal_abort_handle(handle);
 124}
 125
 126void ext3_msg(struct super_block *sb, const char *prefix,
 127		const char *fmt, ...)
 128{
 129	struct va_format vaf;
 130	va_list args;
 131
 132	va_start(args, fmt);
 133
 134	vaf.fmt = fmt;
 135	vaf.va = &args;
 136
 137	printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
 138
 139	va_end(args);
 140}
 141
 142/* Deal with the reporting of failure conditions on a filesystem such as
 143 * inconsistencies detected or read IO failures.
 144 *
 145 * On ext2, we can store the error state of the filesystem in the
 146 * superblock.  That is not possible on ext3, because we may have other
 147 * write ordering constraints on the superblock which prevent us from
 148 * writing it out straight away; and given that the journal is about to
 149 * be aborted, we can't rely on the current, or future, transactions to
 150 * write out the superblock safely.
 151 *
 152 * We'll just use the journal_abort() error code to record an error in
 153 * the journal instead.  On recovery, the journal will complain about
 154 * that error until we've noted it down and cleared it.
 155 */
 156
 157static void ext3_handle_error(struct super_block *sb)
 158{
 159	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
 160
 161	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
 162	es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
 163
 164	if (sb->s_flags & MS_RDONLY)
 165		return;
 166
 167	if (!test_opt (sb, ERRORS_CONT)) {
 168		journal_t *journal = EXT3_SB(sb)->s_journal;
 169
 170		set_opt(EXT3_SB(sb)->s_mount_opt, ABORT);
 171		if (journal)
 172			journal_abort(journal, -EIO);
 173	}
 174	if (test_opt (sb, ERRORS_RO)) {
 175		ext3_msg(sb, KERN_CRIT,
 176			"error: remounting filesystem read-only");
 177		sb->s_flags |= MS_RDONLY;
 178	}
 179	ext3_commit_super(sb, es, 1);
 180	if (test_opt(sb, ERRORS_PANIC))
 181		panic("EXT3-fs (%s): panic forced after error\n",
 182			sb->s_id);
 183}
 184
 185void ext3_error(struct super_block *sb, const char *function,
 186		const char *fmt, ...)
 187{
 188	struct va_format vaf;
 189	va_list args;
 190
 191	va_start(args, fmt);
 192
 193	vaf.fmt = fmt;
 194	vaf.va = &args;
 195
 196	printk(KERN_CRIT "EXT3-fs error (device %s): %s: %pV\n",
 197	       sb->s_id, function, &vaf);
 198
 199	va_end(args);
 200
 201	ext3_handle_error(sb);
 202}
 203
 204static const char *ext3_decode_error(struct super_block * sb, int errno,
 205				     char nbuf[16])
 206{
 207	char *errstr = NULL;
 208
 209	switch (errno) {
 210	case -EIO:
 211		errstr = "IO failure";
 212		break;
 213	case -ENOMEM:
 214		errstr = "Out of memory";
 215		break;
 216	case -EROFS:
 217		if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT)
 218			errstr = "Journal has aborted";
 219		else
 220			errstr = "Readonly filesystem";
 221		break;
 222	default:
 223		/* If the caller passed in an extra buffer for unknown
 224		 * errors, textualise them now.  Else we just return
 225		 * NULL. */
 226		if (nbuf) {
 227			/* Check for truncated error codes... */
 228			if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
 229				errstr = nbuf;
 230		}
 231		break;
 232	}
 233
 234	return errstr;
 235}
 236
 237/* __ext3_std_error decodes expected errors from journaling functions
 238 * automatically and invokes the appropriate error response.  */
 239
 240void __ext3_std_error (struct super_block * sb, const char * function,
 241		       int errno)
 242{
 243	char nbuf[16];
 244	const char *errstr;
 245
 246	/* Special case: if the error is EROFS, and we're not already
 247	 * inside a transaction, then there's really no point in logging
 248	 * an error. */
 249	if (errno == -EROFS && journal_current_handle() == NULL &&
 250	    (sb->s_flags & MS_RDONLY))
 251		return;
 252
 253	errstr = ext3_decode_error(sb, errno, nbuf);
 254	ext3_msg(sb, KERN_CRIT, "error in %s: %s", function, errstr);
 255
 256	ext3_handle_error(sb);
 257}
 258
 259/*
 260 * ext3_abort is a much stronger failure handler than ext3_error.  The
 261 * abort function may be used to deal with unrecoverable failures such
 262 * as journal IO errors or ENOMEM at a critical moment in log management.
 263 *
 264 * We unconditionally force the filesystem into an ABORT|READONLY state,
 265 * unless the error response on the fs has been set to panic in which
 266 * case we take the easy way out and panic immediately.
 267 */
 268
 269void ext3_abort(struct super_block *sb, const char *function,
 270		 const char *fmt, ...)
 271{
 272	struct va_format vaf;
 273	va_list args;
 274
 275	va_start(args, fmt);
 276
 277	vaf.fmt = fmt;
 278	vaf.va = &args;
 279
 280	printk(KERN_CRIT "EXT3-fs (%s): error: %s: %pV\n",
 281	       sb->s_id, function, &vaf);
 282
 283	va_end(args);
 284
 285	if (test_opt(sb, ERRORS_PANIC))
 286		panic("EXT3-fs: panic from previous error\n");
 287
 288	if (sb->s_flags & MS_RDONLY)
 289		return;
 290
 291	ext3_msg(sb, KERN_CRIT,
 292		"error: remounting filesystem read-only");
 293	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
 294	sb->s_flags |= MS_RDONLY;
 295	set_opt(EXT3_SB(sb)->s_mount_opt, ABORT);
 296	if (EXT3_SB(sb)->s_journal)
 297		journal_abort(EXT3_SB(sb)->s_journal, -EIO);
 298}
 299
 300void ext3_warning(struct super_block *sb, const char *function,
 301		  const char *fmt, ...)
 302{
 303	struct va_format vaf;
 304	va_list args;
 305
 306	va_start(args, fmt);
 307
 308	vaf.fmt = fmt;
 309	vaf.va = &args;
 310
 311	printk(KERN_WARNING "EXT3-fs (%s): warning: %s: %pV\n",
 312	       sb->s_id, function, &vaf);
 313
 314	va_end(args);
 315}
 316
 317void ext3_update_dynamic_rev(struct super_block *sb)
 318{
 319	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
 320
 321	if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
 322		return;
 323
 324	ext3_msg(sb, KERN_WARNING,
 325		"warning: updating to rev %d because of "
 326		"new feature flag, running e2fsck is recommended",
 327		EXT3_DYNAMIC_REV);
 328
 329	es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
 330	es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
 331	es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV);
 332	/* leave es->s_feature_*compat flags alone */
 333	/* es->s_uuid will be set by e2fsck if empty */
 334
 335	/*
 336	 * The rest of the superblock fields should be zero, and if not it
 337	 * means they are likely already in use, so leave them alone.  We
 338	 * can leave it up to e2fsck to clean up any inconsistencies there.
 339	 */
 340}
 341
 342/*
 343 * Open the external journal device
 344 */
 345static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb)
 346{
 347	struct block_device *bdev;
 348	char b[BDEVNAME_SIZE];
 349
 350	bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
 351	if (IS_ERR(bdev))
 352		goto fail;
 353	return bdev;
 354
 355fail:
 356	ext3_msg(sb, "error: failed to open journal device %s: %ld",
 357		__bdevname(dev, b), PTR_ERR(bdev));
 358
 359	return NULL;
 360}
 361
 362/*
 363 * Release the journal device
 364 */
 365static int ext3_blkdev_put(struct block_device *bdev)
 366{
 367	return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 368}
 369
 370static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
 371{
 372	struct block_device *bdev;
 373	int ret = -ENODEV;
 374
 375	bdev = sbi->journal_bdev;
 376	if (bdev) {
 377		ret = ext3_blkdev_put(bdev);
 378		sbi->journal_bdev = NULL;
 379	}
 380	return ret;
 381}
 382
 383static inline struct inode *orphan_list_entry(struct list_head *l)
 384{
 385	return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode;
 386}
 387
 388static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
 389{
 390	struct list_head *l;
 391
 392	ext3_msg(sb, KERN_ERR, "error: sb orphan head is %d",
 393	       le32_to_cpu(sbi->s_es->s_last_orphan));
 394
 395	ext3_msg(sb, KERN_ERR, "sb_info orphan list:");
 396	list_for_each(l, &sbi->s_orphan) {
 397		struct inode *inode = orphan_list_entry(l);
 398		ext3_msg(sb, KERN_ERR, "  "
 399		       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
 400		       inode->i_sb->s_id, inode->i_ino, inode,
 401		       inode->i_mode, inode->i_nlink,
 402		       NEXT_ORPHAN(inode));
 403	}
 404}
 405
 406static void ext3_put_super (struct super_block * sb)
 407{
 408	struct ext3_sb_info *sbi = EXT3_SB(sb);
 409	struct ext3_super_block *es = sbi->s_es;
 410	int i, err;
 411
 412	dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
 413	ext3_xattr_put_super(sb);
 414	err = journal_destroy(sbi->s_journal);
 415	sbi->s_journal = NULL;
 416	if (err < 0)
 417		ext3_abort(sb, __func__, "Couldn't clean up the journal");
 418
 419	if (!(sb->s_flags & MS_RDONLY)) {
 420		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
 421		es->s_state = cpu_to_le16(sbi->s_mount_state);
 422		BUFFER_TRACE(sbi->s_sbh, "marking dirty");
 423		mark_buffer_dirty(sbi->s_sbh);
 424		ext3_commit_super(sb, es, 1);
 425	}
 426
 427	for (i = 0; i < sbi->s_gdb_count; i++)
 428		brelse(sbi->s_group_desc[i]);
 429	kfree(sbi->s_group_desc);
 430	percpu_counter_destroy(&sbi->s_freeblocks_counter);
 431	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 432	percpu_counter_destroy(&sbi->s_dirs_counter);
 433	brelse(sbi->s_sbh);
 434#ifdef CONFIG_QUOTA
 435	for (i = 0; i < MAXQUOTAS; i++)
 436		kfree(sbi->s_qf_names[i]);
 437#endif
 438
 439	/* Debugging code just in case the in-memory inode orphan list
 440	 * isn't empty.  The on-disk one can be non-empty if we've
 441	 * detected an error and taken the fs readonly, but the
 442	 * in-memory list had better be clean by this point. */
 443	if (!list_empty(&sbi->s_orphan))
 444		dump_orphan_list(sb, sbi);
 445	J_ASSERT(list_empty(&sbi->s_orphan));
 446
 447	invalidate_bdev(sb->s_bdev);
 448	if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
 449		/*
 450		 * Invalidate the journal device's buffers.  We don't want them
 451		 * floating about in memory - the physical journal device may
 452		 * hotswapped, and it breaks the `ro-after' testing code.
 453		 */
 454		sync_blockdev(sbi->journal_bdev);
 455		invalidate_bdev(sbi->journal_bdev);
 456		ext3_blkdev_remove(sbi);
 457	}
 458	sb->s_fs_info = NULL;
 459	kfree(sbi->s_blockgroup_lock);
 460	kfree(sbi);
 461}
 462
 463static struct kmem_cache *ext3_inode_cachep;
 464
 465/*
 466 * Called inside transaction, so use GFP_NOFS
 467 */
 468static struct inode *ext3_alloc_inode(struct super_block *sb)
 469{
 470	struct ext3_inode_info *ei;
 471
 472	ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS);
 473	if (!ei)
 474		return NULL;
 475	ei->i_block_alloc_info = NULL;
 476	ei->vfs_inode.i_version = 1;
 477	atomic_set(&ei->i_datasync_tid, 0);
 478	atomic_set(&ei->i_sync_tid, 0);
 479	return &ei->vfs_inode;
 480}
 481
 482static int ext3_drop_inode(struct inode *inode)
 483{
 484	int drop = generic_drop_inode(inode);
 485
 486	trace_ext3_drop_inode(inode, drop);
 487	return drop;
 488}
 489
 490static void ext3_i_callback(struct rcu_head *head)
 491{
 492	struct inode *inode = container_of(head, struct inode, i_rcu);
 493	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
 494}
 495
 496static void ext3_destroy_inode(struct inode *inode)
 497{
 498	if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
 499		printk("EXT3 Inode %p: orphan list check failed!\n",
 500			EXT3_I(inode));
 501		print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
 502				EXT3_I(inode), sizeof(struct ext3_inode_info),
 503				false);
 504		dump_stack();
 505	}
 506	call_rcu(&inode->i_rcu, ext3_i_callback);
 507}
 508
 509static void init_once(void *foo)
 510{
 511	struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
 512
 513	INIT_LIST_HEAD(&ei->i_orphan);
 514#ifdef CONFIG_EXT3_FS_XATTR
 515	init_rwsem(&ei->xattr_sem);
 516#endif
 517	mutex_init(&ei->truncate_mutex);
 518	inode_init_once(&ei->vfs_inode);
 519}
 520
 521static int init_inodecache(void)
 522{
 523	ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
 524					     sizeof(struct ext3_inode_info),
 525					     0, (SLAB_RECLAIM_ACCOUNT|
 526						SLAB_MEM_SPREAD),
 527					     init_once);
 528	if (ext3_inode_cachep == NULL)
 529		return -ENOMEM;
 530	return 0;
 531}
 532
 533static void destroy_inodecache(void)
 534{
 535	kmem_cache_destroy(ext3_inode_cachep);
 536}
 537
 538static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
 539{
 540#if defined(CONFIG_QUOTA)
 541	struct ext3_sb_info *sbi = EXT3_SB(sb);
 542
 543	if (sbi->s_jquota_fmt) {
 544		char *fmtname = "";
 545
 546		switch (sbi->s_jquota_fmt) {
 547		case QFMT_VFS_OLD:
 548			fmtname = "vfsold";
 549			break;
 550		case QFMT_VFS_V0:
 551			fmtname = "vfsv0";
 552			break;
 553		case QFMT_VFS_V1:
 554			fmtname = "vfsv1";
 555			break;
 556		}
 557		seq_printf(seq, ",jqfmt=%s", fmtname);
 558	}
 559
 560	if (sbi->s_qf_names[USRQUOTA])
 561		seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
 562
 563	if (sbi->s_qf_names[GRPQUOTA])
 564		seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
 565
 566	if (test_opt(sb, USRQUOTA))
 567		seq_puts(seq, ",usrquota");
 568
 569	if (test_opt(sb, GRPQUOTA))
 570		seq_puts(seq, ",grpquota");
 571#endif
 572}
 573
 574static char *data_mode_string(unsigned long mode)
 575{
 576	switch (mode) {
 577	case EXT3_MOUNT_JOURNAL_DATA:
 578		return "journal";
 579	case EXT3_MOUNT_ORDERED_DATA:
 580		return "ordered";
 581	case EXT3_MOUNT_WRITEBACK_DATA:
 582		return "writeback";
 583	}
 584	return "unknown";
 585}
 586
 587/*
 588 * Show an option if
 589 *  - it's set to a non-default value OR
 590 *  - if the per-sb default is different from the global default
 591 */
 592static int ext3_show_options(struct seq_file *seq, struct dentry *root)
 593{
 594	struct super_block *sb = root->d_sb;
 595	struct ext3_sb_info *sbi = EXT3_SB(sb);
 596	struct ext3_super_block *es = sbi->s_es;
 597	unsigned long def_mount_opts;
 598
 599	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
 600
 601	if (sbi->s_sb_block != 1)
 602		seq_printf(seq, ",sb=%lu", sbi->s_sb_block);
 603	if (test_opt(sb, MINIX_DF))
 604		seq_puts(seq, ",minixdf");
 605	if (test_opt(sb, GRPID))
 606		seq_puts(seq, ",grpid");
 607	if (!test_opt(sb, GRPID) && (def_mount_opts & EXT3_DEFM_BSDGROUPS))
 608		seq_puts(seq, ",nogrpid");
 609	if (!uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT3_DEF_RESUID)) ||
 610	    le16_to_cpu(es->s_def_resuid) != EXT3_DEF_RESUID) {
 611		seq_printf(seq, ",resuid=%u",
 612				from_kuid_munged(&init_user_ns, sbi->s_resuid));
 613	}
 614	if (!gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT3_DEF_RESGID)) ||
 615	    le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) {
 616		seq_printf(seq, ",resgid=%u",
 617				from_kgid_munged(&init_user_ns, sbi->s_resgid));
 618	}
 619	if (test_opt(sb, ERRORS_RO)) {
 620		int def_errors = le16_to_cpu(es->s_errors);
 621
 622		if (def_errors == EXT3_ERRORS_PANIC ||
 623		    def_errors == EXT3_ERRORS_CONTINUE) {
 624			seq_puts(seq, ",errors=remount-ro");
 625		}
 626	}
 627	if (test_opt(sb, ERRORS_CONT))
 628		seq_puts(seq, ",errors=continue");
 629	if (test_opt(sb, ERRORS_PANIC))
 630		seq_puts(seq, ",errors=panic");
 631	if (test_opt(sb, NO_UID32))
 632		seq_puts(seq, ",nouid32");
 633	if (test_opt(sb, DEBUG))
 634		seq_puts(seq, ",debug");
 635#ifdef CONFIG_EXT3_FS_XATTR
 636	if (test_opt(sb, XATTR_USER))
 637		seq_puts(seq, ",user_xattr");
 638	if (!test_opt(sb, XATTR_USER) &&
 639	    (def_mount_opts & EXT3_DEFM_XATTR_USER)) {
 640		seq_puts(seq, ",nouser_xattr");
 641	}
 642#endif
 643#ifdef CONFIG_EXT3_FS_POSIX_ACL
 644	if (test_opt(sb, POSIX_ACL))
 645		seq_puts(seq, ",acl");
 646	if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT3_DEFM_ACL))
 647		seq_puts(seq, ",noacl");
 648#endif
 649	if (!test_opt(sb, RESERVATION))
 650		seq_puts(seq, ",noreservation");
 651	if (sbi->s_commit_interval) {
 652		seq_printf(seq, ",commit=%u",
 653			   (unsigned) (sbi->s_commit_interval / HZ));
 654	}
 655
 656	/*
 657	 * Always display barrier state so it's clear what the status is.
 658	 */
 659	seq_puts(seq, ",barrier=");
 660	seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
 661	seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS)));
 662	if (test_opt(sb, DATA_ERR_ABORT))
 663		seq_puts(seq, ",data_err=abort");
 664
 665	if (test_opt(sb, NOLOAD))
 666		seq_puts(seq, ",norecovery");
 667
 668	ext3_show_quota_options(seq, sb);
 669
 670	return 0;
 671}
 672
 673
 674static struct inode *ext3_nfs_get_inode(struct super_block *sb,
 675		u64 ino, u32 generation)
 676{
 677	struct inode *inode;
 678
 679	if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO)
 680		return ERR_PTR(-ESTALE);
 681	if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count))
 682		return ERR_PTR(-ESTALE);
 683
 684	/* iget isn't really right if the inode is currently unallocated!!
 685	 *
 686	 * ext3_read_inode will return a bad_inode if the inode had been
 687	 * deleted, so we should be safe.
 688	 *
 689	 * Currently we don't know the generation for parent directory, so
 690	 * a generation of 0 means "accept any"
 691	 */
 692	inode = ext3_iget(sb, ino);
 693	if (IS_ERR(inode))
 694		return ERR_CAST(inode);
 695	if (generation && inode->i_generation != generation) {
 696		iput(inode);
 697		return ERR_PTR(-ESTALE);
 698	}
 699
 700	return inode;
 701}
 702
 703static struct dentry *ext3_fh_to_dentry(struct super_block *sb, struct fid *fid,
 704		int fh_len, int fh_type)
 705{
 706	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
 707				    ext3_nfs_get_inode);
 708}
 709
 710static struct dentry *ext3_fh_to_parent(struct super_block *sb, struct fid *fid,
 711		int fh_len, int fh_type)
 712{
 713	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
 714				    ext3_nfs_get_inode);
 715}
 716
 717/*
 718 * Try to release metadata pages (indirect blocks, directories) which are
 719 * mapped via the block device.  Since these pages could have journal heads
 720 * which would prevent try_to_free_buffers() from freeing them, we must use
 721 * jbd layer's try_to_free_buffers() function to release them.
 722 */
 723static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
 724				 gfp_t wait)
 725{
 726	journal_t *journal = EXT3_SB(sb)->s_journal;
 727
 728	WARN_ON(PageChecked(page));
 729	if (!page_has_buffers(page))
 730		return 0;
 731	if (journal)
 732		return journal_try_to_free_buffers(journal, page, 
 733						   wait & ~__GFP_WAIT);
 734	return try_to_free_buffers(page);
 735}
 736
 737#ifdef CONFIG_QUOTA
 738#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
 739#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
 740
 741static int ext3_write_dquot(struct dquot *dquot);
 742static int ext3_acquire_dquot(struct dquot *dquot);
 743static int ext3_release_dquot(struct dquot *dquot);
 744static int ext3_mark_dquot_dirty(struct dquot *dquot);
 745static int ext3_write_info(struct super_block *sb, int type);
 746static int ext3_quota_on(struct super_block *sb, int type, int format_id,
 747			 struct path *path);
 748static int ext3_quota_on_mount(struct super_block *sb, int type);
 749static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
 750			       size_t len, loff_t off);
 751static ssize_t ext3_quota_write(struct super_block *sb, int type,
 752				const char *data, size_t len, loff_t off);
 753
 754static const struct dquot_operations ext3_quota_operations = {
 755	.write_dquot	= ext3_write_dquot,
 756	.acquire_dquot	= ext3_acquire_dquot,
 757	.release_dquot	= ext3_release_dquot,
 758	.mark_dirty	= ext3_mark_dquot_dirty,
 759	.write_info	= ext3_write_info,
 760	.alloc_dquot	= dquot_alloc,
 761	.destroy_dquot	= dquot_destroy,
 762};
 763
 764static const struct quotactl_ops ext3_qctl_operations = {
 765	.quota_on	= ext3_quota_on,
 766	.quota_off	= dquot_quota_off,
 767	.quota_sync	= dquot_quota_sync,
 768	.get_info	= dquot_get_dqinfo,
 769	.set_info	= dquot_set_dqinfo,
 770	.get_dqblk	= dquot_get_dqblk,
 771	.set_dqblk	= dquot_set_dqblk
 772};
 773#endif
 774
 775static const struct super_operations ext3_sops = {
 776	.alloc_inode	= ext3_alloc_inode,
 777	.destroy_inode	= ext3_destroy_inode,
 778	.write_inode	= ext3_write_inode,
 779	.dirty_inode	= ext3_dirty_inode,
 780	.drop_inode	= ext3_drop_inode,
 781	.evict_inode	= ext3_evict_inode,
 782	.put_super	= ext3_put_super,
 783	.sync_fs	= ext3_sync_fs,
 784	.freeze_fs	= ext3_freeze,
 785	.unfreeze_fs	= ext3_unfreeze,
 786	.statfs		= ext3_statfs,
 787	.remount_fs	= ext3_remount,
 788	.show_options	= ext3_show_options,
 789#ifdef CONFIG_QUOTA
 790	.quota_read	= ext3_quota_read,
 791	.quota_write	= ext3_quota_write,
 792#endif
 793	.bdev_try_to_free_page = bdev_try_to_free_page,
 794};
 795
 796static const struct export_operations ext3_export_ops = {
 797	.fh_to_dentry = ext3_fh_to_dentry,
 798	.fh_to_parent = ext3_fh_to_parent,
 799	.get_parent = ext3_get_parent,
 800};
 801
 802enum {
 803	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
 804	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
 805	Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
 806	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
 807	Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
 808	Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
 809	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
 810	Opt_data_err_abort, Opt_data_err_ignore,
 811	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
 812	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
 813	Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
 814	Opt_resize, Opt_usrquota, Opt_grpquota
 815};
 816
 817static const match_table_t tokens = {
 818	{Opt_bsd_df, "bsddf"},
 819	{Opt_minix_df, "minixdf"},
 820	{Opt_grpid, "grpid"},
 821	{Opt_grpid, "bsdgroups"},
 822	{Opt_nogrpid, "nogrpid"},
 823	{Opt_nogrpid, "sysvgroups"},
 824	{Opt_resgid, "resgid=%u"},
 825	{Opt_resuid, "resuid=%u"},
 826	{Opt_sb, "sb=%u"},
 827	{Opt_err_cont, "errors=continue"},
 828	{Opt_err_panic, "errors=panic"},
 829	{Opt_err_ro, "errors=remount-ro"},
 830	{Opt_nouid32, "nouid32"},
 831	{Opt_nocheck, "nocheck"},
 832	{Opt_nocheck, "check=none"},
 833	{Opt_debug, "debug"},
 834	{Opt_oldalloc, "oldalloc"},
 835	{Opt_orlov, "orlov"},
 836	{Opt_user_xattr, "user_xattr"},
 837	{Opt_nouser_xattr, "nouser_xattr"},
 838	{Opt_acl, "acl"},
 839	{Opt_noacl, "noacl"},
 840	{Opt_reservation, "reservation"},
 841	{Opt_noreservation, "noreservation"},
 842	{Opt_noload, "noload"},
 843	{Opt_noload, "norecovery"},
 844	{Opt_nobh, "nobh"},
 845	{Opt_bh, "bh"},
 846	{Opt_commit, "commit=%u"},
 847	{Opt_journal_update, "journal=update"},
 848	{Opt_journal_inum, "journal=%u"},
 849	{Opt_journal_dev, "journal_dev=%u"},
 850	{Opt_abort, "abort"},
 851	{Opt_data_journal, "data=journal"},
 852	{Opt_data_ordered, "data=ordered"},
 853	{Opt_data_writeback, "data=writeback"},
 854	{Opt_data_err_abort, "data_err=abort"},
 855	{Opt_data_err_ignore, "data_err=ignore"},
 856	{Opt_offusrjquota, "usrjquota="},
 857	{Opt_usrjquota, "usrjquota=%s"},
 858	{Opt_offgrpjquota, "grpjquota="},
 859	{Opt_grpjquota, "grpjquota=%s"},
 860	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
 861	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
 862	{Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
 863	{Opt_grpquota, "grpquota"},
 864	{Opt_noquota, "noquota"},
 865	{Opt_quota, "quota"},
 866	{Opt_usrquota, "usrquota"},
 867	{Opt_barrier, "barrier=%u"},
 868	{Opt_barrier, "barrier"},
 869	{Opt_nobarrier, "nobarrier"},
 870	{Opt_resize, "resize"},
 871	{Opt_err, NULL},
 872};
 873
 874static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb)
 875{
 876	ext3_fsblk_t	sb_block;
 877	char		*options = (char *) *data;
 878
 879	if (!options || strncmp(options, "sb=", 3) != 0)
 880		return 1;	/* Default location */
 881	options += 3;
 882	/*todo: use simple_strtoll with >32bit ext3 */
 883	sb_block = simple_strtoul(options, &options, 0);
 884	if (*options && *options != ',') {
 885		ext3_msg(sb, "error: invalid sb specification: %s",
 886		       (char *) *data);
 887		return 1;
 888	}
 889	if (*options == ',')
 890		options++;
 891	*data = (void *) options;
 892	return sb_block;
 893}
 894
 895#ifdef CONFIG_QUOTA
 896static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
 897{
 898	struct ext3_sb_info *sbi = EXT3_SB(sb);
 899	char *qname;
 900
 901	if (sb_any_quota_loaded(sb) &&
 902		!sbi->s_qf_names[qtype]) {
 903		ext3_msg(sb, KERN_ERR,
 904			"Cannot change journaled "
 905			"quota options when quota turned on");
 906		return 0;
 907	}
 908	qname = match_strdup(args);
 909	if (!qname) {
 910		ext3_msg(sb, KERN_ERR,
 911			"Not enough memory for storing quotafile name");
 912		return 0;
 913	}
 914	if (sbi->s_qf_names[qtype] &&
 915		strcmp(sbi->s_qf_names[qtype], qname)) {
 916		ext3_msg(sb, KERN_ERR,
 917			"%s quota file already specified", QTYPE2NAME(qtype));
 918		kfree(qname);
 919		return 0;
 920	}
 921	sbi->s_qf_names[qtype] = qname;
 922	if (strchr(sbi->s_qf_names[qtype], '/')) {
 923		ext3_msg(sb, KERN_ERR,
 924			"quotafile must be on filesystem root");
 925		kfree(sbi->s_qf_names[qtype]);
 926		sbi->s_qf_names[qtype] = NULL;
 927		return 0;
 928	}
 929	set_opt(sbi->s_mount_opt, QUOTA);
 930	return 1;
 931}
 932
 933static int clear_qf_name(struct super_block *sb, int qtype) {
 934
 935	struct ext3_sb_info *sbi = EXT3_SB(sb);
 936
 937	if (sb_any_quota_loaded(sb) &&
 938		sbi->s_qf_names[qtype]) {
 939		ext3_msg(sb, KERN_ERR, "Cannot change journaled quota options"
 940			" when quota turned on");
 941		return 0;
 942	}
 943	/*
 944	 * The space will be released later when all options are confirmed
 945	 * to be correct
 946	 */
 947	sbi->s_qf_names[qtype] = NULL;
 948	return 1;
 949}
 950#endif
 951
 952static int parse_options (char *options, struct super_block *sb,
 953			  unsigned int *inum, unsigned long *journal_devnum,
 954			  ext3_fsblk_t *n_blocks_count, int is_remount)
 955{
 956	struct ext3_sb_info *sbi = EXT3_SB(sb);
 957	char * p;
 958	substring_t args[MAX_OPT_ARGS];
 959	int data_opt = 0;
 960	int option;
 961	kuid_t uid;
 962	kgid_t gid;
 963#ifdef CONFIG_QUOTA
 964	int qfmt;
 965#endif
 966
 967	if (!options)
 968		return 1;
 969
 970	while ((p = strsep (&options, ",")) != NULL) {
 971		int token;
 972		if (!*p)
 973			continue;
 974		/*
 975		 * Initialize args struct so we know whether arg was
 976		 * found; some options take optional arguments.
 977		 */
 978		args[0].to = args[0].from = 0;
 979		token = match_token(p, tokens, args);
 980		switch (token) {
 981		case Opt_bsd_df:
 982			clear_opt (sbi->s_mount_opt, MINIX_DF);
 983			break;
 984		case Opt_minix_df:
 985			set_opt (sbi->s_mount_opt, MINIX_DF);
 986			break;
 987		case Opt_grpid:
 988			set_opt (sbi->s_mount_opt, GRPID);
 989			break;
 990		case Opt_nogrpid:
 991			clear_opt (sbi->s_mount_opt, GRPID);
 992			break;
 993		case Opt_resuid:
 994			if (match_int(&args[0], &option))
 995				return 0;
 996			uid = make_kuid(current_user_ns(), option);
 997			if (!uid_valid(uid)) {
 998				ext3_msg(sb, KERN_ERR, "Invalid uid value %d", option);
 999				return -1;
1000
1001			}
1002			sbi->s_resuid = uid;
1003			break;
1004		case Opt_resgid:
1005			if (match_int(&args[0], &option))
1006				return 0;
1007			gid = make_kgid(current_user_ns(), option);
1008			if (!gid_valid(gid)) {
1009				ext3_msg(sb, KERN_ERR, "Invalid gid value %d", option);
1010				return -1;
1011			}
1012			sbi->s_resgid = gid;
1013			break;
1014		case Opt_sb:
1015			/* handled by get_sb_block() instead of here */
1016			/* *sb_block = match_int(&args[0]); */
1017			break;
1018		case Opt_err_panic:
1019			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
1020			clear_opt (sbi->s_mount_opt, ERRORS_RO);
1021			set_opt (sbi->s_mount_opt, ERRORS_PANIC);
1022			break;
1023		case Opt_err_ro:
1024			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
1025			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
1026			set_opt (sbi->s_mount_opt, ERRORS_RO);
1027			break;
1028		case Opt_err_cont:
1029			clear_opt (sbi->s_mount_opt, ERRORS_RO);
1030			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
1031			set_opt (sbi->s_mount_opt, ERRORS_CONT);
1032			break;
1033		case Opt_nouid32:
1034			set_opt (sbi->s_mount_opt, NO_UID32);
1035			break;
1036		case Opt_nocheck:
1037			clear_opt (sbi->s_mount_opt, CHECK);
1038			break;
1039		case Opt_debug:
1040			set_opt (sbi->s_mount_opt, DEBUG);
1041			break;
1042		case Opt_oldalloc:
1043			ext3_msg(sb, KERN_WARNING,
1044				"Ignoring deprecated oldalloc option");
1045			break;
1046		case Opt_orlov:
1047			ext3_msg(sb, KERN_WARNING,
1048				"Ignoring deprecated orlov option");
1049			break;
1050#ifdef CONFIG_EXT3_FS_XATTR
1051		case Opt_user_xattr:
1052			set_opt (sbi->s_mount_opt, XATTR_USER);
1053			break;
1054		case Opt_nouser_xattr:
1055			clear_opt (sbi->s_mount_opt, XATTR_USER);
1056			break;
1057#else
1058		case Opt_user_xattr:
1059		case Opt_nouser_xattr:
1060			ext3_msg(sb, KERN_INFO,
1061				"(no)user_xattr options not supported");
1062			break;
1063#endif
1064#ifdef CONFIG_EXT3_FS_POSIX_ACL
1065		case Opt_acl:
1066			set_opt(sbi->s_mount_opt, POSIX_ACL);
1067			break;
1068		case Opt_noacl:
1069			clear_opt(sbi->s_mount_opt, POSIX_ACL);
1070			break;
1071#else
1072		case Opt_acl:
1073		case Opt_noacl:
1074			ext3_msg(sb, KERN_INFO,
1075				"(no)acl options not supported");
1076			break;
1077#endif
1078		case Opt_reservation:
1079			set_opt(sbi->s_mount_opt, RESERVATION);
1080			break;
1081		case Opt_noreservation:
1082			clear_opt(sbi->s_mount_opt, RESERVATION);
1083			break;
1084		case Opt_journal_update:
1085			/* @@@ FIXME */
1086			/* Eventually we will want to be able to create
1087			   a journal file here.  For now, only allow the
1088			   user to specify an existing inode to be the
1089			   journal file. */
1090			if (is_remount) {
1091				ext3_msg(sb, KERN_ERR, "error: cannot specify "
1092					"journal on remount");
1093				return 0;
1094			}
1095			set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
1096			break;
1097		case Opt_journal_inum:
1098			if (is_remount) {
1099				ext3_msg(sb, KERN_ERR, "error: cannot specify "
1100				       "journal on remount");
1101				return 0;
1102			}
1103			if (match_int(&args[0], &option))
1104				return 0;
1105			*inum = option;
1106			break;
1107		case Opt_journal_dev:
1108			if (is_remount) {
1109				ext3_msg(sb, KERN_ERR, "error: cannot specify "
1110				       "journal on remount");
1111				return 0;
1112			}
1113			if (match_int(&args[0], &option))
1114				return 0;
1115			*journal_devnum = option;
1116			break;
1117		case Opt_noload:
1118			set_opt (sbi->s_mount_opt, NOLOAD);
1119			break;
1120		case Opt_commit:
1121			if (match_int(&args[0], &option))
1122				return 0;
1123			if (option < 0)
1124				return 0;
1125			if (option == 0)
1126				option = JBD_DEFAULT_MAX_COMMIT_AGE;
1127			sbi->s_commit_interval = HZ * option;
1128			break;
1129		case Opt_data_journal:
1130			data_opt = EXT3_MOUNT_JOURNAL_DATA;
1131			goto datacheck;
1132		case Opt_data_ordered:
1133			data_opt = EXT3_MOUNT_ORDERED_DATA;
1134			goto datacheck;
1135		case Opt_data_writeback:
1136			data_opt = EXT3_MOUNT_WRITEBACK_DATA;
1137		datacheck:
1138			if (is_remount) {
1139				if (test_opt(sb, DATA_FLAGS) == data_opt)
1140					break;
1141				ext3_msg(sb, KERN_ERR,
1142					"error: cannot change "
1143					"data mode on remount. The filesystem "
1144					"is mounted in data=%s mode and you "
1145					"try to remount it in data=%s mode.",
1146					data_mode_string(test_opt(sb,
1147							DATA_FLAGS)),
1148					data_mode_string(data_opt));
1149				return 0;
1150			} else {
1151				clear_opt(sbi->s_mount_opt, DATA_FLAGS);
1152				sbi->s_mount_opt |= data_opt;
1153			}
1154			break;
1155		case Opt_data_err_abort:
1156			set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1157			break;
1158		case Opt_data_err_ignore:
1159			clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1160			break;
1161#ifdef CONFIG_QUOTA
1162		case Opt_usrjquota:
1163			if (!set_qf_name(sb, USRQUOTA, &args[0]))
1164				return 0;
1165			break;
1166		case Opt_grpjquota:
1167			if (!set_qf_name(sb, GRPQUOTA, &args[0]))
1168				return 0;
1169			break;
1170		case Opt_offusrjquota:
1171			if (!clear_qf_name(sb, USRQUOTA))
1172				return 0;
1173			break;
1174		case Opt_offgrpjquota:
1175			if (!clear_qf_name(sb, GRPQUOTA))
1176				return 0;
1177			break;
1178		case Opt_jqfmt_vfsold:
1179			qfmt = QFMT_VFS_OLD;
1180			goto set_qf_format;
1181		case Opt_jqfmt_vfsv0:
1182			qfmt = QFMT_VFS_V0;
1183			goto set_qf_format;
1184		case Opt_jqfmt_vfsv1:
1185			qfmt = QFMT_VFS_V1;
1186set_qf_format:
1187			if (sb_any_quota_loaded(sb) &&
1188			    sbi->s_jquota_fmt != qfmt) {
1189				ext3_msg(sb, KERN_ERR, "error: cannot change "
1190					"journaled quota options when "
1191					"quota turned on.");
1192				return 0;
1193			}
1194			sbi->s_jquota_fmt = qfmt;
1195			break;
1196		case Opt_quota:
1197		case Opt_usrquota:
1198			set_opt(sbi->s_mount_opt, QUOTA);
1199			set_opt(sbi->s_mount_opt, USRQUOTA);
1200			break;
1201		case Opt_grpquota:
1202			set_opt(sbi->s_mount_opt, QUOTA);
1203			set_opt(sbi->s_mount_opt, GRPQUOTA);
1204			break;
1205		case Opt_noquota:
1206			if (sb_any_quota_loaded(sb)) {
1207				ext3_msg(sb, KERN_ERR, "error: cannot change "
1208					"quota options when quota turned on.");
1209				return 0;
1210			}
1211			clear_opt(sbi->s_mount_opt, QUOTA);
1212			clear_opt(sbi->s_mount_opt, USRQUOTA);
1213			clear_opt(sbi->s_mount_opt, GRPQUOTA);
1214			break;
1215#else
1216		case Opt_quota:
1217		case Opt_usrquota:
1218		case Opt_grpquota:
1219			ext3_msg(sb, KERN_ERR,
1220				"error: quota options not supported.");
1221			break;
1222		case Opt_usrjquota:
1223		case Opt_grpjquota:
1224		case Opt_offusrjquota:
1225		case Opt_offgrpjquota:
1226		case Opt_jqfmt_vfsold:
1227		case Opt_jqfmt_vfsv0:
1228		case Opt_jqfmt_vfsv1:
1229			ext3_msg(sb, KERN_ERR,
1230				"error: journaled quota options not "
1231				"supported.");
1232			break;
1233		case Opt_noquota:
1234			break;
1235#endif
1236		case Opt_abort:
1237			set_opt(sbi->s_mount_opt, ABORT);
1238			break;
1239		case Opt_nobarrier:
1240			clear_opt(sbi->s_mount_opt, BARRIER);
1241			break;
1242		case Opt_barrier:
1243			if (args[0].from) {
1244				if (match_int(&args[0], &option))
1245					return 0;
1246			} else
1247				option = 1;	/* No argument, default to 1 */
1248			if (option)
1249				set_opt(sbi->s_mount_opt, BARRIER);
1250			else
1251				clear_opt(sbi->s_mount_opt, BARRIER);
1252			break;
1253		case Opt_ignore:
1254			break;
1255		case Opt_resize:
1256			if (!is_remount) {
1257				ext3_msg(sb, KERN_ERR,
1258					"error: resize option only available "
1259					"for remount");
1260				return 0;
1261			}
1262			if (match_int(&args[0], &option) != 0)
1263				return 0;
1264			*n_blocks_count = option;
1265			break;
1266		case Opt_nobh:
1267			ext3_msg(sb, KERN_WARNING,
1268				"warning: ignoring deprecated nobh option");
1269			break;
1270		case Opt_bh:
1271			ext3_msg(sb, KERN_WARNING,
1272				"warning: ignoring deprecated bh option");
1273			break;
1274		default:
1275			ext3_msg(sb, KERN_ERR,
1276				"error: unrecognized mount option \"%s\" "
1277				"or missing value", p);
1278			return 0;
1279		}
1280	}
1281#ifdef CONFIG_QUOTA
1282	if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1283		if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1284			clear_opt(sbi->s_mount_opt, USRQUOTA);
1285		if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1286			clear_opt(sbi->s_mount_opt, GRPQUOTA);
1287
1288		if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1289			ext3_msg(sb, KERN_ERR, "error: old and new quota "
1290					"format mixing.");
1291			return 0;
1292		}
1293
1294		if (!sbi->s_jquota_fmt) {
1295			ext3_msg(sb, KERN_ERR, "error: journaled quota format "
1296					"not specified.");
1297			return 0;
1298		}
1299	} else {
1300		if (sbi->s_jquota_fmt) {
1301			ext3_msg(sb, KERN_ERR, "error: journaled quota format "
1302					"specified with no journaling "
1303					"enabled.");
1304			return 0;
1305		}
1306	}
1307#endif
1308	return 1;
1309}
1310
1311static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1312			    int read_only)
1313{
1314	struct ext3_sb_info *sbi = EXT3_SB(sb);
1315	int res = 0;
1316
1317	if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
1318		ext3_msg(sb, KERN_ERR,
1319			"error: revision level too high, "
1320			"forcing read-only mode");
1321		res = MS_RDONLY;
1322	}
1323	if (read_only)
1324		return res;
1325	if (!(sbi->s_mount_state & EXT3_VALID_FS))
1326		ext3_msg(sb, KERN_WARNING,
1327			"warning: mounting unchecked fs, "
1328			"running e2fsck is recommended");
1329	else if ((sbi->s_mount_state & EXT3_ERROR_FS))
1330		ext3_msg(sb, KERN_WARNING,
1331			"warning: mounting fs with errors, "
1332			"running e2fsck is recommended");
1333	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
1334		 le16_to_cpu(es->s_mnt_count) >=
1335			le16_to_cpu(es->s_max_mnt_count))
1336		ext3_msg(sb, KERN_WARNING,
1337			"warning: maximal mount count reached, "
1338			"running e2fsck is recommended");
1339	else if (le32_to_cpu(es->s_checkinterval) &&
1340		(le32_to_cpu(es->s_lastcheck) +
1341			le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1342		ext3_msg(sb, KERN_WARNING,
1343			"warning: checktime reached, "
1344			"running e2fsck is recommended");
1345#if 0
1346		/* @@@ We _will_ want to clear the valid bit if we find
1347                   inconsistencies, to force a fsck at reboot.  But for
1348                   a plain journaled filesystem we can keep it set as
1349                   valid forever! :) */
1350	es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
1351#endif
1352	if (!le16_to_cpu(es->s_max_mnt_count))
1353		es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
1354	le16_add_cpu(&es->s_mnt_count, 1);
1355	es->s_mtime = cpu_to_le32(get_seconds());
1356	ext3_update_dynamic_rev(sb);
1357	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
1358
1359	ext3_commit_super(sb, es, 1);
1360	if (test_opt(sb, DEBUG))
1361		ext3_msg(sb, KERN_INFO, "[bs=%lu, gc=%lu, "
1362				"bpg=%lu, ipg=%lu, mo=%04lx]",
1363			sb->s_blocksize,
1364			sbi->s_groups_count,
1365			EXT3_BLOCKS_PER_GROUP(sb),
1366			EXT3_INODES_PER_GROUP(sb),
1367			sbi->s_mount_opt);
1368
1369	if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
1370		char b[BDEVNAME_SIZE];
1371		ext3_msg(sb, KERN_INFO, "using external journal on %s",
1372			bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
1373	} else {
1374		ext3_msg(sb, KERN_INFO, "using internal journal");
1375	}
1376	cleancache_init_fs(sb);
1377	return res;
1378}
1379
1380/* Called at mount-time, super-block is locked */
1381static int ext3_check_descriptors(struct super_block *sb)
1382{
1383	struct ext3_sb_info *sbi = EXT3_SB(sb);
1384	int i;
1385
1386	ext3_debug ("Checking group descriptors");
1387
1388	for (i = 0; i < sbi->s_groups_count; i++) {
1389		struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL);
1390		ext3_fsblk_t first_block = ext3_group_first_block_no(sb, i);
1391		ext3_fsblk_t last_block;
1392
1393		if (i == sbi->s_groups_count - 1)
1394			last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
1395		else
1396			last_block = first_block +
1397				(EXT3_BLOCKS_PER_GROUP(sb) - 1);
1398
1399		if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
1400		    le32_to_cpu(gdp->bg_block_bitmap) > last_block)
1401		{
1402			ext3_error (sb, "ext3_check_descriptors",
1403				    "Block bitmap for group %d"
1404				    " not in group (block %lu)!",
1405				    i, (unsigned long)
1406					le32_to_cpu(gdp->bg_block_bitmap));
1407			return 0;
1408		}
1409		if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
1410		    le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
1411		{
1412			ext3_error (sb, "ext3_check_descriptors",
1413				    "Inode bitmap for group %d"
1414				    " not in group (block %lu)!",
1415				    i, (unsigned long)
1416					le32_to_cpu(gdp->bg_inode_bitmap));
1417			return 0;
1418		}
1419		if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
1420		    le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group - 1 >
1421		    last_block)
1422		{
1423			ext3_error (sb, "ext3_check_descriptors",
1424				    "Inode table for group %d"
1425				    " not in group (block %lu)!",
1426				    i, (unsigned long)
1427					le32_to_cpu(gdp->bg_inode_table));
1428			return 0;
1429		}
1430	}
1431
1432	sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb));
1433	sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb));
1434	return 1;
1435}
1436
1437
1438/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at
1439 * the superblock) which were deleted from all directories, but held open by
1440 * a process at the time of a crash.  We walk the list and try to delete these
1441 * inodes at recovery time (only with a read-write filesystem).
1442 *
1443 * In order to keep the orphan inode chain consistent during traversal (in
1444 * case of crash during recovery), we link each inode into the superblock
1445 * orphan list_head and handle it the same way as an inode deletion during
1446 * normal operation (which journals the operations for us).
1447 *
1448 * We only do an iget() and an iput() on each inode, which is very safe if we
1449 * accidentally point at an in-use or already deleted inode.  The worst that
1450 * can happen in this case is that we get a "bit already cleared" message from
1451 * ext3_free_inode().  The only reason we would point at a wrong inode is if
1452 * e2fsck was run on this filesystem, and it must have already done the orphan
1453 * inode cleanup for us, so we can safely abort without any further action.
1454 */
1455static void ext3_orphan_cleanup (struct super_block * sb,
1456				 struct ext3_super_block * es)
1457{
1458	unsigned int s_flags = sb->s_flags;
1459	int nr_orphans = 0, nr_truncates = 0;
1460#ifdef CONFIG_QUOTA
1461	int i;
1462#endif
1463	if (!es->s_last_orphan) {
1464		jbd_debug(4, "no orphan inodes to clean up\n");
1465		return;
1466	}
1467
1468	if (bdev_read_only(sb->s_bdev)) {
1469		ext3_msg(sb, KERN_ERR, "error: write access "
1470			"unavailable, skipping orphan cleanup.");
1471		return;
1472	}
1473
1474	/* Check if feature set allows readwrite operations */
1475	if (EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) {
1476		ext3_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
1477			 "unknown ROCOMPAT features");
1478		return;
1479	}
1480
1481	if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
1482		if (es->s_last_orphan)
1483			jbd_debug(1, "Errors on filesystem, "
1484				  "clearing orphan list.\n");
1485		es->s_last_orphan = 0;
1486		jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1487		return;
1488	}
1489
1490	if (s_flags & MS_RDONLY) {
1491		ext3_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
1492		sb->s_flags &= ~MS_RDONLY;
1493	}
1494#ifdef CONFIG_QUOTA
1495	/* Needed for iput() to work correctly and not trash data */
1496	sb->s_flags |= MS_ACTIVE;
1497	/* Turn on quotas so that they are updated correctly */
1498	for (i = 0; i < MAXQUOTAS; i++) {
1499		if (EXT3_SB(sb)->s_qf_names[i]) {
1500			int ret = ext3_quota_on_mount(sb, i);
1501			if (ret < 0)
1502				ext3_msg(sb, KERN_ERR,
1503					"error: cannot turn on journaled "
1504					"quota: %d", ret);
1505		}
1506	}
1507#endif
1508
1509	while (es->s_last_orphan) {
1510		struct inode *inode;
1511
1512		inode = ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1513		if (IS_ERR(inode)) {
1514			es->s_last_orphan = 0;
1515			break;
1516		}
1517
1518		list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
1519		dquot_initialize(inode);
1520		if (inode->i_nlink) {
1521			printk(KERN_DEBUG
1522				"%s: truncating inode %lu to %Ld bytes\n",
1523				__func__, inode->i_ino, inode->i_size);
1524			jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
1525				  inode->i_ino, inode->i_size);
1526			ext3_truncate(inode);
1527			nr_truncates++;
1528		} else {
1529			printk(KERN_DEBUG
1530				"%s: deleting unreferenced inode %lu\n",
1531				__func__, inode->i_ino);
1532			jbd_debug(2, "deleting unreferenced inode %lu\n",
1533				  inode->i_ino);
1534			nr_orphans++;
1535		}
1536		iput(inode);  /* The delete magic happens here! */
1537	}
1538
1539#define PLURAL(x) (x), ((x)==1) ? "" : "s"
1540
1541	if (nr_orphans)
1542		ext3_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
1543		       PLURAL(nr_orphans));
1544	if (nr_truncates)
1545		ext3_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
1546		       PLURAL(nr_truncates));
1547#ifdef CONFIG_QUOTA
1548	/* Turn quotas off */
1549	for (i = 0; i < MAXQUOTAS; i++) {
1550		if (sb_dqopt(sb)->files[i])
1551			dquot_quota_off(sb, i);
1552	}
1553#endif
1554	sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1555}
1556
1557/*
1558 * Maximal file size.  There is a direct, and {,double-,triple-}indirect
1559 * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
1560 * We need to be 1 filesystem block less than the 2^32 sector limit.
1561 */
1562static loff_t ext3_max_size(int bits)
1563{
1564	loff_t res = EXT3_NDIR_BLOCKS;
1565	int meta_blocks;
1566	loff_t upper_limit;
1567
1568	/* This is calculated to be the largest file size for a
1569	 * dense, file such that the total number of
1570	 * sectors in the file, including data and all indirect blocks,
1571	 * does not exceed 2^32 -1
1572	 * __u32 i_blocks representing the total number of
1573	 * 512 bytes blocks of the file
1574	 */
1575	upper_limit = (1LL << 32) - 1;
1576
1577	/* total blocks in file system block size */
1578	upper_limit >>= (bits - 9);
1579
1580
1581	/* indirect blocks */
1582	meta_blocks = 1;
1583	/* double indirect blocks */
1584	meta_blocks += 1 + (1LL << (bits-2));
1585	/* tripple indirect blocks */
1586	meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
1587
1588	upper_limit -= meta_blocks;
1589	upper_limit <<= bits;
1590
1591	res += 1LL << (bits-2);
1592	res += 1LL << (2*(bits-2));
1593	res += 1LL << (3*(bits-2));
1594	res <<= bits;
1595	if (res > upper_limit)
1596		res = upper_limit;
1597
1598	if (res > MAX_LFS_FILESIZE)
1599		res = MAX_LFS_FILESIZE;
1600
1601	return res;
1602}
1603
1604static ext3_fsblk_t descriptor_loc(struct super_block *sb,
1605				    ext3_fsblk_t logic_sb_block,
1606				    int nr)
1607{
1608	struct ext3_sb_info *sbi = EXT3_SB(sb);
1609	unsigned long bg, first_meta_bg;
1610	int has_super = 0;
1611
1612	first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
1613
1614	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
1615	    nr < first_meta_bg)
1616		return (logic_sb_block + nr + 1);
1617	bg = sbi->s_desc_per_block * nr;
1618	if (ext3_bg_has_super(sb, bg))
1619		has_super = 1;
1620	return (has_super + ext3_group_first_block_no(sb, bg));
1621}
1622
1623
1624static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1625{
1626	struct buffer_head * bh;
1627	struct ext3_super_block *es = NULL;
1628	struct ext3_sb_info *sbi;
1629	ext3_fsblk_t block;
1630	ext3_fsblk_t sb_block = get_sb_block(&data, sb);
1631	ext3_fsblk_t logic_sb_block;
1632	unsigned long offset = 0;
1633	unsigned int journal_inum = 0;
1634	unsigned long journal_devnum = 0;
1635	unsigned long def_mount_opts;
1636	struct inode *root;
1637	int blocksize;
1638	int hblock;
1639	int db_count;
1640	int i;
1641	int needs_recovery;
1642	int ret = -EINVAL;
1643	__le32 features;
1644	int err;
1645
1646	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
1647	if (!sbi)
1648		return -ENOMEM;
1649
1650	sbi->s_blockgroup_lock =
1651		kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
1652	if (!sbi->s_blockgroup_lock) {
1653		kfree(sbi);
1654		return -ENOMEM;
1655	}
1656	sb->s_fs_info = sbi;
1657	sbi->s_mount_opt = 0;
1658	sbi->s_resuid = make_kuid(&init_user_ns, EXT3_DEF_RESUID);
1659	sbi->s_resgid = make_kgid(&init_user_ns, EXT3_DEF_RESGID);
1660	sbi->s_sb_block = sb_block;
1661
1662	blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
1663	if (!blocksize) {
1664		ext3_msg(sb, KERN_ERR, "error: unable to set blocksize");
1665		goto out_fail;
1666	}
1667
1668	/*
1669	 * The ext3 superblock will not be buffer aligned for other than 1kB
1670	 * block sizes.  We need to calculate the offset from buffer start.
1671	 */
1672	if (blocksize != EXT3_MIN_BLOCK_SIZE) {
1673		logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
1674		offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
1675	} else {
1676		logic_sb_block = sb_block;
1677	}
1678
1679	if (!(bh = sb_bread(sb, logic_sb_block))) {
1680		ext3_msg(sb, KERN_ERR, "error: unable to read superblock");
1681		goto out_fail;
1682	}
1683	/*
1684	 * Note: s_es must be initialized as soon as possible because
1685	 *       some ext3 macro-instructions depend on its value
1686	 */
1687	es = (struct ext3_super_block *) (bh->b_data + offset);
1688	sbi->s_es = es;
1689	sb->s_magic = le16_to_cpu(es->s_magic);
1690	if (sb->s_magic != EXT3_SUPER_MAGIC)
1691		goto cantfind_ext3;
1692
1693	/* Set defaults before we parse the mount options */
1694	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
1695	if (def_mount_opts & EXT3_DEFM_DEBUG)
1696		set_opt(sbi->s_mount_opt, DEBUG);
1697	if (def_mount_opts & EXT3_DEFM_BSDGROUPS)
1698		set_opt(sbi->s_mount_opt, GRPID);
1699	if (def_mount_opts & EXT3_DEFM_UID16)
1700		set_opt(sbi->s_mount_opt, NO_UID32);
1701#ifdef CONFIG_EXT3_FS_XATTR
1702	if (def_mount_opts & EXT3_DEFM_XATTR_USER)
1703		set_opt(sbi->s_mount_opt, XATTR_USER);
1704#endif
1705#ifdef CONFIG_EXT3_FS_POSIX_ACL
1706	if (def_mount_opts & EXT3_DEFM_ACL)
1707		set_opt(sbi->s_mount_opt, POSIX_ACL);
1708#endif
1709	if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
1710		set_opt(sbi->s_mount_opt, JOURNAL_DATA);
1711	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
1712		set_opt(sbi->s_mount_opt, ORDERED_DATA);
1713	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK)
1714		set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
1715
1716	if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC)
1717		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1718	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_CONTINUE)
1719		set_opt(sbi->s_mount_opt, ERRORS_CONT);
1720	else
1721		set_opt(sbi->s_mount_opt, ERRORS_RO);
1722
1723	sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
1724	sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
1725
1726	/* enable barriers by default */
1727	set_opt(sbi->s_mount_opt, BARRIER);
1728	set_opt(sbi->s_mount_opt, RESERVATION);
1729
1730	if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
1731			    NULL, 0))
1732		goto failed_mount;
1733
1734	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
1735		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
1736
1737	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
1738	    (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
1739	     EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
1740	     EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
1741		ext3_msg(sb, KERN_WARNING,
1742			"warning: feature flags set on rev 0 fs, "
1743			"running e2fsck is recommended");
1744	/*
1745	 * Check feature flags regardless of the revision level, since we
1746	 * previously didn't change the revision level when setting the flags,
1747	 * so there is a chance incompat flags are set on a rev 0 filesystem.
1748	 */
1749	features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
1750	if (features) {
1751		ext3_msg(sb, KERN_ERR,
1752			"error: couldn't mount because of unsupported "
1753			"optional features (%x)", le32_to_cpu(features));
1754		goto failed_mount;
1755	}
1756	features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
1757	if (!(sb->s_flags & MS_RDONLY) && features) {
1758		ext3_msg(sb, KERN_ERR,
1759			"error: couldn't mount RDWR because of unsupported "
1760			"optional features (%x)", le32_to_cpu(features));
1761		goto failed_mount;
1762	}
1763	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
1764
1765	if (blocksize < EXT3_MIN_BLOCK_SIZE ||
1766	    blocksize > EXT3_MAX_BLOCK_SIZE) {
1767		ext3_msg(sb, KERN_ERR,
1768			"error: couldn't mount because of unsupported "
1769			"filesystem blocksize %d", blocksize);
1770		goto failed_mount;
1771	}
1772
1773	hblock = bdev_logical_block_size(sb->s_bdev);
1774	if (sb->s_blocksize != blocksize) {
1775		/*
1776		 * Make sure the blocksize for the filesystem is larger
1777		 * than the hardware sectorsize for the machine.
1778		 */
1779		if (blocksize < hblock) {
1780			ext3_msg(sb, KERN_ERR,
1781				"error: fsblocksize %d too small for "
1782				"hardware sectorsize %d", blocksize, hblock);
1783			goto failed_mount;
1784		}
1785
1786		brelse (bh);
1787		if (!sb_set_blocksize(sb, blocksize)) {
1788			ext3_msg(sb, KERN_ERR,
1789				"error: bad blocksize %d", blocksize);
1790			goto out_fail;
1791		}
1792		logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
1793		offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
1794		bh = sb_bread(sb, logic_sb_block);
1795		if (!bh) {
1796			ext3_msg(sb, KERN_ERR,
1797			       "error: can't read superblock on 2nd try");
1798			goto failed_mount;
1799		}
1800		es = (struct ext3_super_block *)(bh->b_data + offset);
1801		sbi->s_es = es;
1802		if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
1803			ext3_msg(sb, KERN_ERR,
1804				"error: magic mismatch");
1805			goto failed_mount;
1806		}
1807	}
1808
1809	sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);
1810
1811	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) {
1812		sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE;
1813		sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO;
1814	} else {
1815		sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
1816		sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
1817		if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
1818		    (!is_power_of_2(sbi->s_inode_size)) ||
1819		    (sbi->s_inode_size > blocksize)) {
1820			ext3_msg(sb, KERN_ERR,
1821				"error: unsupported inode size: %d",
1822				sbi->s_inode_size);
1823			goto failed_mount;
1824		}
1825	}
1826	sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
1827				   le32_to_cpu(es->s_log_frag_size);
1828	if (blocksize != sbi->s_frag_size) {
1829		ext3_msg(sb, KERN_ERR,
1830		       "error: fragsize %lu != blocksize %u (unsupported)",
1831		       sbi->s_frag_size, blocksize);
1832		goto failed_mount;
1833	}
1834	sbi->s_frags_per_block = 1;
1835	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
1836	sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
1837	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
1838	if (EXT3_INODE_SIZE(sb) == 0 || EXT3_INODES_PER_GROUP(sb) == 0)
1839		goto cantfind_ext3;
1840	sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb);
1841	if (sbi->s_inodes_per_block == 0)
1842		goto cantfind_ext3;
1843	sbi->s_itb_per_group = sbi->s_inodes_per_group /
1844					sbi->s_inodes_per_block;
1845	sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc);
1846	sbi->s_sbh = bh;
1847	sbi->s_mount_state = le16_to_cpu(es->s_state);
1848	sbi->s_addr_per_block_bits = ilog2(EXT3_ADDR_PER_BLOCK(sb));
1849	sbi->s_desc_per_block_bits = ilog2(EXT3_DESC_PER_BLOCK(sb));
1850	for (i=0; i < 4; i++)
1851		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
1852	sbi->s_def_hash_version = es->s_def_hash_version;
1853	i = le32_to_cpu(es->s_flags);
1854	if (i & EXT2_FLAGS_UNSIGNED_HASH)
1855		sbi->s_hash_unsigned = 3;
1856	else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
1857#ifdef __CHAR_UNSIGNED__
1858		es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
1859		sbi->s_hash_unsigned = 3;
1860#else
1861		es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
1862#endif
1863	}
1864
1865	if (sbi->s_blocks_per_group > blocksize * 8) {
1866		ext3_msg(sb, KERN_ERR,
1867			"#blocks per group too big: %lu",
1868			sbi->s_blocks_per_group);
1869		goto failed_mount;
1870	}
1871	if (sbi->s_frags_per_group > blocksize * 8) {
1872		ext3_msg(sb, KERN_ERR,
1873			"error: #fragments per group too big: %lu",
1874			sbi->s_frags_per_group);
1875		goto failed_mount;
1876	}
1877	if (sbi->s_inodes_per_group > blocksize * 8) {
1878		ext3_msg(sb, KERN_ERR,
1879			"error: #inodes per group too big: %lu",
1880			sbi->s_inodes_per_group);
1881		goto failed_mount;
1882	}
1883
1884	err = generic_check_addressable(sb->s_blocksize_bits,
1885					le32_to_cpu(es->s_blocks_count));
1886	if (err) {
1887		ext3_msg(sb, KERN_ERR,
1888			"error: filesystem is too large to mount safely");
1889		if (sizeof(sector_t) < 8)
1890			ext3_msg(sb, KERN_ERR,
1891				"error: CONFIG_LBDAF not enabled");
1892		ret = err;
1893		goto failed_mount;
1894	}
1895
1896	if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
1897		goto cantfind_ext3;
1898	sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
1899			       le32_to_cpu(es->s_first_data_block) - 1)
1900				       / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
1901	db_count = DIV_ROUND_UP(sbi->s_groups_count, EXT3_DESC_PER_BLOCK(sb));
1902	sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
1903				    GFP_KERNEL);
1904	if (sbi->s_group_desc == NULL) {
1905		ext3_msg(sb, KERN_ERR,
1906			"error: not enough memory");
1907		ret = -ENOMEM;
1908		goto failed_mount;
1909	}
1910
1911	bgl_lock_init(sbi->s_blockgroup_lock);
1912
1913	for (i = 0; i < db_count; i++) {
1914		block = descriptor_loc(sb, logic_sb_block, i);
1915		sbi->s_group_desc[i] = sb_bread(sb, block);
1916		if (!sbi->s_group_desc[i]) {
1917			ext3_msg(sb, KERN_ERR,
1918				"error: can't read group descriptor %d", i);
1919			db_count = i;
1920			goto failed_mount2;
1921		}
1922	}
1923	if (!ext3_check_descriptors (sb)) {
1924		ext3_msg(sb, KERN_ERR,
1925			"error: group descriptors corrupted");
1926		goto failed_mount2;
1927	}
1928	sbi->s_gdb_count = db_count;
1929	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
1930	spin_lock_init(&sbi->s_next_gen_lock);
1931
1932	/* per fileystem reservation list head & lock */
1933	spin_lock_init(&sbi->s_rsv_window_lock);
1934	sbi->s_rsv_window_root = RB_ROOT;
1935	/* Add a single, static dummy reservation to the start of the
1936	 * reservation window list --- it gives us a placeholder for
1937	 * append-at-start-of-list which makes the allocation logic
1938	 * _much_ simpler. */
1939	sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
1940	sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
1941	sbi->s_rsv_window_head.rsv_alloc_hit = 0;
1942	sbi->s_rsv_window_head.rsv_goal_size = 0;
1943	ext3_rsv_window_add(sb, &sbi->s_rsv_window_head);
1944
1945	/*
1946	 * set up enough so that it can read an inode
1947	 */
1948	sb->s_op = &ext3_sops;
1949	sb->s_export_op = &ext3_export_ops;
1950	sb->s_xattr = ext3_xattr_handlers;
1951#ifdef CONFIG_QUOTA
1952	sb->s_qcop = &ext3_qctl_operations;
1953	sb->dq_op = &ext3_quota_operations;
1954#endif
1955	memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
1956	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
1957	mutex_init(&sbi->s_orphan_lock);
1958	mutex_init(&sbi->s_resize_lock);
1959
1960	sb->s_root = NULL;
1961
1962	needs_recovery = (es->s_last_orphan != 0 ||
1963			  EXT3_HAS_INCOMPAT_FEATURE(sb,
1964				    EXT3_FEATURE_INCOMPAT_RECOVER));
1965
1966	/*
1967	 * The first inode we look at is the journal inode.  Don't try
1968	 * root first: it may be modified in the journal!
1969	 */
1970	if (!test_opt(sb, NOLOAD) &&
1971	    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
1972		if (ext3_load_journal(sb, es, journal_devnum))
1973			goto failed_mount2;
1974	} else if (journal_inum) {
1975		if (ext3_create_journal(sb, es, journal_inum))
1976			goto failed_mount2;
1977	} else {
1978		if (!silent)
1979			ext3_msg(sb, KERN_ERR,
1980				"error: no journal found. "
1981				"mounting ext3 over ext2?");
1982		goto failed_mount2;
1983	}
1984	err = percpu_counter_init(&sbi->s_freeblocks_counter,
1985			ext3_count_free_blocks(sb));
1986	if (!err) {
1987		err = percpu_counter_init(&sbi->s_freeinodes_counter,
1988				ext3_count_free_inodes(sb));
1989	}
1990	if (!err) {
1991		err = percpu_counter_init(&sbi->s_dirs_counter,
1992				ext3_count_dirs(sb));
1993	}
1994	if (err) {
1995		ext3_msg(sb, KERN_ERR, "error: insufficient memory");
1996		ret = err;
1997		goto failed_mount3;
1998	}
1999
2000	/* We have now updated the journal if required, so we can
2001	 * validate the data journaling mode. */
2002	switch (test_opt(sb, DATA_FLAGS)) {
2003	case 0:
2004		/* No mode set, assume a default based on the journal
2005                   capabilities: ORDERED_DATA if the journal can
2006                   cope, else JOURNAL_DATA */
2007		if (journal_check_available_features
2008		    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE))
2009			set_opt(sbi->s_mount_opt, DEFAULT_DATA_MODE);
2010		else
2011			set_opt(sbi->s_mount_opt, JOURNAL_DATA);
2012		break;
2013
2014	case EXT3_MOUNT_ORDERED_DATA:
2015	case EXT3_MOUNT_WRITEBACK_DATA:
2016		if (!journal_check_available_features
2017		    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
2018			ext3_msg(sb, KERN_ERR,
2019				"error: journal does not support "
2020				"requested data journaling mode");
2021			goto failed_mount3;
2022		}
2023	default:
2024		break;
2025	}
2026
2027	/*
2028	 * The journal_load will have done any necessary log recovery,
2029	 * so we can safely mount the rest of the filesystem now.
2030	 */
2031
2032	root = ext3_iget(sb, EXT3_ROOT_INO);
2033	if (IS_ERR(root)) {
2034		ext3_msg(sb, KERN_ERR, "error: get root inode failed");
2035		ret = PTR_ERR(root);
2036		goto failed_mount3;
2037	}
2038	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2039		iput(root);
2040		ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
2041		goto failed_mount3;
2042	}
2043	sb->s_root = d_make_root(root);
2044	if (!sb->s_root) {
2045		ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
2046		ret = -ENOMEM;
2047		goto failed_mount3;
2048	}
2049
2050	if (ext3_setup_super(sb, es, sb->s_flags & MS_RDONLY))
2051		sb->s_flags |= MS_RDONLY;
2052
2053	EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
2054	ext3_orphan_cleanup(sb, es);
2055	EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
2056	if (needs_recovery) {
2057		ext3_mark_recovery_complete(sb, es);
2058		ext3_msg(sb, KERN_INFO, "recovery complete");
2059	}
2060	ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode",
2061		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
2062		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
2063		"writeback");
2064
2065	return 0;
2066
2067cantfind_ext3:
2068	if (!silent)
2069		ext3_msg(sb, KERN_INFO,
2070			"error: can't find ext3 filesystem on dev %s.",
2071		       sb->s_id);
2072	goto failed_mount;
2073
2074failed_mount3:
2075	percpu_counter_destroy(&sbi->s_freeblocks_counter);
2076	percpu_counter_destroy(&sbi->s_freeinodes_counter);
2077	percpu_counter_destroy(&sbi->s_dirs_counter);
2078	journal_destroy(sbi->s_journal);
2079failed_mount2:
2080	for (i = 0; i < db_count; i++)
2081		brelse(sbi->s_group_desc[i]);
2082	kfree(sbi->s_group_desc);
2083failed_mount:
2084#ifdef CONFIG_QUOTA
2085	for (i = 0; i < MAXQUOTAS; i++)
2086		kfree(sbi->s_qf_names[i]);
2087#endif
2088	ext3_blkdev_remove(sbi);
2089	brelse(bh);
2090out_fail:
2091	sb->s_fs_info = NULL;
2092	kfree(sbi->s_blockgroup_lock);
2093	kfree(sbi);
2094	return ret;
2095}
2096
2097/*
2098 * Setup any per-fs journal parameters now.  We'll do this both on
2099 * initial mount, once the journal has been initialised but before we've
2100 * done any recovery; and again on any subsequent remount.
2101 */
2102static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
2103{
2104	struct ext3_sb_info *sbi = EXT3_SB(sb);
2105
2106	if (sbi->s_commit_interval)
2107		journal->j_commit_interval = sbi->s_commit_interval;
2108	/* We could also set up an ext3-specific default for the commit
2109	 * interval here, but for now we'll just fall back to the jbd
2110	 * default. */
2111
2112	spin_lock(&journal->j_state_lock);
2113	if (test_opt(sb, BARRIER))
2114		journal->j_flags |= JFS_BARRIER;
2115	else
2116		journal->j_flags &= ~JFS_BARRIER;
2117	if (test_opt(sb, DATA_ERR_ABORT))
2118		journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR;
2119	else
2120		journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR;
2121	spin_unlock(&journal->j_state_lock);
2122}
2123
2124static journal_t *ext3_get_journal(struct super_block *sb,
2125				   unsigned int journal_inum)
2126{
2127	struct inode *journal_inode;
2128	journal_t *journal;
2129
2130	/* First, test for the existence of a valid inode on disk.  Bad
2131	 * things happen if we iget() an unused inode, as the subsequent
2132	 * iput() will try to delete it. */
2133
2134	journal_inode = ext3_iget(sb, journal_inum);
2135	if (IS_ERR(journal_inode)) {
2136		ext3_msg(sb, KERN_ERR, "error: no journal found");
2137		return NULL;
2138	}
2139	if (!journal_inode->i_nlink) {
2140		make_bad_inode(journal_inode);
2141		iput(journal_inode);
2142		ext3_msg(sb, KERN_ERR, "error: journal inode is deleted");
2143		return NULL;
2144	}
2145
2146	jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
2147		  journal_inode, journal_inode->i_size);
2148	if (!S_ISREG(journal_inode->i_mode)) {
2149		ext3_msg(sb, KERN_ERR, "error: invalid journal inode");
2150		iput(journal_inode);
2151		return NULL;
2152	}
2153
2154	journal = journal_init_inode(journal_inode);
2155	if (!journal) {
2156		ext3_msg(sb, KERN_ERR, "error: could not load journal inode");
2157		iput(journal_inode);
2158		return NULL;
2159	}
2160	journal->j_private = sb;
2161	ext3_init_journal_params(sb, journal);
2162	return journal;
2163}
2164
2165static journal_t *ext3_get_dev_journal(struct super_block *sb,
2166				       dev_t j_dev)
2167{
2168	struct buffer_head * bh;
2169	journal_t *journal;
2170	ext3_fsblk_t start;
2171	ext3_fsblk_t len;
2172	int hblock, blocksize;
2173	ext3_fsblk_t sb_block;
2174	unsigned long offset;
2175	struct ext3_super_block * es;
2176	struct block_device *bdev;
2177
2178	bdev = ext3_blkdev_get(j_dev, sb);
2179	if (bdev == NULL)
2180		return NULL;
2181
2182	blocksize = sb->s_blocksize;
2183	hblock = bdev_logical_block_size(bdev);
2184	if (blocksize < hblock) {
2185		ext3_msg(sb, KERN_ERR,
2186			"error: blocksize too small for journal device");
2187		goto out_bdev;
2188	}
2189
2190	sb_block = EXT3_MIN_BLOCK_SIZE / blocksize;
2191	offset = EXT3_MIN_BLOCK_SIZE % blocksize;
2192	set_blocksize(bdev, blocksize);
2193	if (!(bh = __bread(bdev, sb_block, blocksize))) {
2194		ext3_msg(sb, KERN_ERR, "error: couldn't read superblock of "
2195			"external journal");
2196		goto out_bdev;
2197	}
2198
2199	es = (struct ext3_super_block *) (bh->b_data + offset);
2200	if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
2201	    !(le32_to_cpu(es->s_feature_incompat) &
2202	      EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
2203		ext3_msg(sb, KERN_ERR, "error: external journal has "
2204			"bad superblock");
2205		brelse(bh);
2206		goto out_bdev;
2207	}
2208
2209	if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
2210		ext3_msg(sb, KERN_ERR, "error: journal UUID does not match");
2211		brelse(bh);
2212		goto out_bdev;
2213	}
2214
2215	len = le32_to_cpu(es->s_blocks_count);
2216	start = sb_block + 1;
2217	brelse(bh);	/* we're done with the superblock */
2218
2219	journal = journal_init_dev(bdev, sb->s_bdev,
2220					start, len, blocksize);
2221	if (!journal) {
2222		ext3_msg(sb, KERN_ERR,
2223			"error: failed to create device journal");
2224		goto out_bdev;
2225	}
2226	journal->j_private = sb;
2227	if (!bh_uptodate_or_lock(journal->j_sb_buffer)) {
2228		if (bh_submit_read(journal->j_sb_buffer)) {
2229			ext3_msg(sb, KERN_ERR, "I/O error on journal device");
2230			goto out_journal;
2231		}
2232	}
2233	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
2234		ext3_msg(sb, KERN_ERR,
2235			"error: external journal has more than one "
2236			"user (unsupported) - %d",
2237			be32_to_cpu(journal->j_superblock->s_nr_users));
2238		goto out_journal;
2239	}
2240	EXT3_SB(sb)->journal_bdev = bdev;
2241	ext3_init_journal_params(sb, journal);
2242	return journal;
2243out_journal:
2244	journal_destroy(journal);
2245out_bdev:
2246	ext3_blkdev_put(bdev);
2247	return NULL;
2248}
2249
2250static int ext3_load_journal(struct super_block *sb,
2251			     struct ext3_super_block *es,
2252			     unsigned long journal_devnum)
2253{
2254	journal_t *journal;
2255	unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
2256	dev_t journal_dev;
2257	int err = 0;
2258	int really_read_only;
2259
2260	if (journal_devnum &&
2261	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2262		ext3_msg(sb, KERN_INFO, "external journal device major/minor "
2263			"numbers have changed");
2264		journal_dev = new_decode_dev(journal_devnum);
2265	} else
2266		journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
2267
2268	really_read_only = bdev_read_only(sb->s_bdev);
2269
2270	/*
2271	 * Are we loading a blank journal or performing recovery after a
2272	 * crash?  For recovery, we need to check in advance whether we
2273	 * can get read-write access to the device.
2274	 */
2275
2276	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
2277		if (sb->s_flags & MS_RDONLY) {
2278			ext3_msg(sb, KERN_INFO,
2279				"recovery required on readonly filesystem");
2280			if (really_read_only) {
2281				ext3_msg(sb, KERN_ERR, "error: write access "
2282					"unavailable, cannot proceed");
2283				return -EROFS;
2284			}
2285			ext3_msg(sb, KERN_INFO,
2286				"write access will be enabled during recovery");
2287		}
2288	}
2289
2290	if (journal_inum && journal_dev) {
2291		ext3_msg(sb, KERN_ERR, "error: filesystem has both journal "
2292		       "and inode journals");
2293		return -EINVAL;
2294	}
2295
2296	if (journal_inum) {
2297		if (!(journal = ext3_get_journal(sb, journal_inum)))
2298			return -EINVAL;
2299	} else {
2300		if (!(journal = ext3_get_dev_journal(sb, journal_dev)))
2301			return -EINVAL;
2302	}
2303
2304	if (!(journal->j_flags & JFS_BARRIER))
2305		printk(KERN_INFO "EXT3-fs: barriers not enabled\n");
2306
2307	if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
2308		err = journal_update_format(journal);
2309		if (err)  {
2310			ext3_msg(sb, KERN_ERR, "error updating journal");
2311			journal_destroy(journal);
2312			return err;
2313		}
2314	}
2315
2316	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER))
2317		err = journal_wipe(journal, !really_read_only);
2318	if (!err)
2319		err = journal_load(journal);
2320
2321	if (err) {
2322		ext3_msg(sb, KERN_ERR, "error loading journal");
2323		journal_destroy(journal);
2324		return err;
2325	}
2326
2327	EXT3_SB(sb)->s_journal = journal;
2328	ext3_clear_journal_err(sb, es);
2329
2330	if (!really_read_only && journal_devnum &&
2331	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2332		es->s_journal_dev = cpu_to_le32(journal_devnum);
2333
2334		/* Make sure we flush the recovery flag to disk. */
2335		ext3_commit_super(sb, es, 1);
2336	}
2337
2338	return 0;
2339}
2340
2341static int ext3_create_journal(struct super_block *sb,
2342			       struct ext3_super_block *es,
2343			       unsigned int journal_inum)
2344{
2345	journal_t *journal;
2346	int err;
2347
2348	if (sb->s_flags & MS_RDONLY) {
2349		ext3_msg(sb, KERN_ERR,
2350			"error: readonly filesystem when trying to "
2351			"create journal");
2352		return -EROFS;
2353	}
2354
2355	journal = ext3_get_journal(sb, journal_inum);
2356	if (!journal)
2357		return -EINVAL;
2358
2359	ext3_msg(sb, KERN_INFO, "creating new journal on inode %u",
2360	       journal_inum);
2361
2362	err = journal_create(journal);
2363	if (err) {
2364		ext3_msg(sb, KERN_ERR, "error creating journal");
2365		journal_destroy(journal);
2366		return -EIO;
2367	}
2368
2369	EXT3_SB(sb)->s_journal = journal;
2370
2371	ext3_update_dynamic_rev(sb);
2372	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2373	EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
2374
2375	es->s_journal_inum = cpu_to_le32(journal_inum);
2376
2377	/* Make sure we flush the recovery flag to disk. */
2378	ext3_commit_super(sb, es, 1);
2379
2380	return 0;
2381}
2382
2383static int ext3_commit_super(struct super_block *sb,
2384			       struct ext3_super_block *es,
2385			       int sync)
2386{
2387	struct buffer_head *sbh = EXT3_SB(sb)->s_sbh;
2388	int error = 0;
2389
2390	if (!sbh)
2391		return error;
2392
2393	if (buffer_write_io_error(sbh)) {
2394		/*
2395		 * Oh, dear.  A previous attempt to write the
2396		 * superblock failed.  This could happen because the
2397		 * USB device was yanked out.  Or it could happen to
2398		 * be a transient write error and maybe the block will
2399		 * be remapped.  Nothing we can do but to retry the
2400		 * write and hope for the best.
2401		 */
2402		ext3_msg(sb, KERN_ERR, "previous I/O error to "
2403		       "superblock detected");
2404		clear_buffer_write_io_error(sbh);
2405		set_buffer_uptodate(sbh);
2406	}
2407	/*
2408	 * If the file system is mounted read-only, don't update the
2409	 * superblock write time.  This avoids updating the superblock
2410	 * write time when we are mounting the root file system
2411	 * read/only but we need to replay the journal; at that point,
2412	 * for people who are east of GMT and who make their clock
2413	 * tick in localtime for Windows bug-for-bug compatibility,
2414	 * the clock is set in the future, and this will cause e2fsck
2415	 * to complain and force a full file system check.
2416	 */
2417	if (!(sb->s_flags & MS_RDONLY))
2418		es->s_wtime = cpu_to_le32(get_seconds());
2419	es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
2420	es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
2421	BUFFER_TRACE(sbh, "marking dirty");
2422	mark_buffer_dirty(sbh);
2423	if (sync) {
2424		error = sync_dirty_buffer(sbh);
2425		if (buffer_write_io_error(sbh)) {
2426			ext3_msg(sb, KERN_ERR, "I/O error while writing "
2427			       "superblock");
2428			clear_buffer_write_io_error(sbh);
2429			set_buffer_uptodate(sbh);
2430		}
2431	}
2432	return error;
2433}
2434
2435
2436/*
2437 * Have we just finished recovery?  If so, and if we are mounting (or
2438 * remounting) the filesystem readonly, then we will end up with a
2439 * consistent fs on disk.  Record that fact.
2440 */
2441static void ext3_mark_recovery_complete(struct super_block * sb,
2442					struct ext3_super_block * es)
2443{
2444	journal_t *journal = EXT3_SB(sb)->s_journal;
2445
2446	journal_lock_updates(journal);
2447	if (journal_flush(journal) < 0)
2448		goto out;
2449
2450	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
2451	    sb->s_flags & MS_RDONLY) {
2452		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2453		ext3_commit_super(sb, es, 1);
2454	}
2455
2456out:
2457	journal_unlock_updates(journal);
2458}
2459
2460/*
2461 * If we are mounting (or read-write remounting) a filesystem whose journal
2462 * has recorded an error from a previous lifetime, move that error to the
2463 * main filesystem now.
2464 */
2465static void ext3_clear_journal_err(struct super_block *sb,
2466				   struct ext3_super_block *es)
2467{
2468	journal_t *journal;
2469	int j_errno;
2470	const char *errstr;
2471
2472	journal = EXT3_SB(sb)->s_journal;
2473
2474	/*
2475	 * Now check for any error status which may have been recorded in the
2476	 * journal by a prior ext3_error() or ext3_abort()
2477	 */
2478
2479	j_errno = journal_errno(journal);
2480	if (j_errno) {
2481		char nbuf[16];
2482
2483		errstr = ext3_decode_error(sb, j_errno, nbuf);
2484		ext3_warning(sb, __func__, "Filesystem error recorded "
2485			     "from previous mount: %s", errstr);
2486		ext3_warning(sb, __func__, "Marking fs in need of "
2487			     "filesystem check.");
2488
2489		EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
2490		es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
2491		ext3_commit_super (sb, es, 1);
2492
2493		journal_clear_err(journal);
2494	}
2495}
2496
2497/*
2498 * Force the running and committing transactions to commit,
2499 * and wait on the commit.
2500 */
2501int ext3_force_commit(struct super_block *sb)
2502{
2503	journal_t *journal;
2504	int ret;
2505
2506	if (sb->s_flags & MS_RDONLY)
2507		return 0;
2508
2509	journal = EXT3_SB(sb)->s_journal;
2510	ret = ext3_journal_force_commit(journal);
2511	return ret;
2512}
2513
2514static int ext3_sync_fs(struct super_block *sb, int wait)
2515{
2516	tid_t target;
2517
2518	trace_ext3_sync_fs(sb, wait);
2519	/*
2520	 * Writeback quota in non-journalled quota case - journalled quota has
2521	 * no dirty dquots
2522	 */
2523	dquot_writeback_dquots(sb, -1);
2524	if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
2525		if (wait)
2526			log_wait_commit(EXT3_SB(sb)->s_journal, target);
2527	}
2528	return 0;
2529}
2530
2531/*
2532 * LVM calls this function before a (read-only) snapshot is created.  This
2533 * gives us a chance to flush the journal completely and mark the fs clean.
2534 */
2535static int ext3_freeze(struct super_block *sb)
2536{
2537	int error = 0;
2538	journal_t *journal;
2539
2540	if (!(sb->s_flags & MS_RDONLY)) {
2541		journal = EXT3_SB(sb)->s_journal;
2542
2543		/* Now we set up the journal barrier. */
2544		journal_lock_updates(journal);
2545
2546		/*
2547		 * We don't want to clear needs_recovery flag when we failed
2548		 * to flush the journal.
2549		 */
2550		error = journal_flush(journal);
2551		if (error < 0)
2552			goto out;
2553
2554		/* Journal blocked and flushed, clear needs_recovery flag. */
2555		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2556		error = ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
2557		if (error)
2558			goto out;
2559	}
2560	return 0;
2561
2562out:
2563	journal_unlock_updates(journal);
2564	return error;
2565}
2566
2567/*
2568 * Called by LVM after the snapshot is done.  We need to reset the RECOVER
2569 * flag here, even though the filesystem is not technically dirty yet.
2570 */
2571static int ext3_unfreeze(struct super_block *sb)
2572{
2573	if (!(sb->s_flags & MS_RDONLY)) {
2574		lock_super(sb);
2575		/* Reser the needs_recovery flag before the fs is unlocked. */
2576		EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2577		ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
2578		unlock_super(sb);
2579		journal_unlock_updates(EXT3_SB(sb)->s_journal);
2580	}
2581	return 0;
2582}
2583
2584static int ext3_remount (struct super_block * sb, int * flags, char * data)
2585{
2586	struct ext3_super_block * es;
2587	struct ext3_sb_info *sbi = EXT3_SB(sb);
2588	ext3_fsblk_t n_blocks_count = 0;
2589	unsigned long old_sb_flags;
2590	struct ext3_mount_options old_opts;
2591	int enable_quota = 0;
2592	int err;
2593#ifdef CONFIG_QUOTA
2594	int i;
2595#endif
2596
2597	/* Store the original options */
2598	lock_super(sb);
2599	old_sb_flags = sb->s_flags;
2600	old_opts.s_mount_opt = sbi->s_mount_opt;
2601	old_opts.s_resuid = sbi->s_resuid;
2602	old_opts.s_resgid = sbi->s_resgid;
2603	old_opts.s_commit_interval = sbi->s_commit_interval;
2604#ifdef CONFIG_QUOTA
2605	old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
2606	for (i = 0; i < MAXQUOTAS; i++)
2607		old_opts.s_qf_names[i] = sbi->s_qf_names[i];
2608#endif
2609
2610	/*
2611	 * Allow the "check" option to be passed as a remount option.
2612	 */
2613	if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) {
2614		err = -EINVAL;
2615		goto restore_opts;
2616	}
2617
2618	if (test_opt(sb, ABORT))
2619		ext3_abort(sb, __func__, "Abort forced by user");
2620
2621	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2622		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
2623
2624	es = sbi->s_es;
2625
2626	ext3_init_journal_params(sb, sbi->s_journal);
2627
2628	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
2629		n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
2630		if (test_opt(sb, ABORT)) {
2631			err = -EROFS;
2632			goto restore_opts;
2633		}
2634
2635		if (*flags & MS_RDONLY) {
2636			err = dquot_suspend(sb, -1);
2637			if (err < 0)
2638				goto restore_opts;
2639
2640			/*
2641			 * First of all, the unconditional stuff we have to do
2642			 * to disable replay of the journal when we next remount
2643			 */
2644			sb->s_flags |= MS_RDONLY;
2645
2646			/*
2647			 * OK, test if we are remounting a valid rw partition
2648			 * readonly, and if so set the rdonly flag and then
2649			 * mark the partition as valid again.
2650			 */
2651			if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) &&
2652			    (sbi->s_mount_state & EXT3_VALID_FS))
2653				es->s_state = cpu_to_le16(sbi->s_mount_state);
2654
2655			ext3_mark_recovery_complete(sb, es);
2656		} else {
2657			__le32 ret;
2658			if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
2659					~EXT3_FEATURE_RO_COMPAT_SUPP))) {
2660				ext3_msg(sb, KERN_WARNING,
2661					"warning: couldn't remount RDWR "
2662					"because of unsupported optional "
2663					"features (%x)", le32_to_cpu(ret));
2664				err = -EROFS;
2665				goto restore_opts;
2666			}
2667
2668			/*
2669			 * If we have an unprocessed orphan list hanging
2670			 * around from a previously readonly bdev mount,
2671			 * require a full umount & mount for now.
2672			 */
2673			if (es->s_last_orphan) {
2674				ext3_msg(sb, KERN_WARNING, "warning: couldn't "
2675				       "remount RDWR because of unprocessed "
2676				       "orphan inode list.  Please "
2677				       "umount & mount instead.");
2678				err = -EINVAL;
2679				goto restore_opts;
2680			}
2681
2682			/*
2683			 * Mounting a RDONLY partition read-write, so reread
2684			 * and store the current valid flag.  (It may have
2685			 * been changed by e2fsck since we originally mounted
2686			 * the partition.)
2687			 */
2688			ext3_clear_journal_err(sb, es);
2689			sbi->s_mount_state = le16_to_cpu(es->s_state);
2690			if ((err = ext3_group_extend(sb, es, n_blocks_count)))
2691				goto restore_opts;
2692			if (!ext3_setup_super (sb, es, 0))
2693				sb->s_flags &= ~MS_RDONLY;
2694			enable_quota = 1;
2695		}
2696	}
2697#ifdef CONFIG_QUOTA
2698	/* Release old quota file names */
2699	for (i = 0; i < MAXQUOTAS; i++)
2700		if (old_opts.s_qf_names[i] &&
2701		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
2702			kfree(old_opts.s_qf_names[i]);
2703#endif
2704	unlock_super(sb);
2705
2706	if (enable_quota)
2707		dquot_resume(sb, -1);
2708	return 0;
2709restore_opts:
2710	sb->s_flags = old_sb_flags;
2711	sbi->s_mount_opt = old_opts.s_mount_opt;
2712	sbi->s_resuid = old_opts.s_resuid;
2713	sbi->s_resgid = old_opts.s_resgid;
2714	sbi->s_commit_interval = old_opts.s_commit_interval;
2715#ifdef CONFIG_QUOTA
2716	sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
2717	for (i = 0; i < MAXQUOTAS; i++) {
2718		if (sbi->s_qf_names[i] &&
2719		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
2720			kfree(sbi->s_qf_names[i]);
2721		sbi->s_qf_names[i] = old_opts.s_qf_names[i];
2722	}
2723#endif
2724	unlock_super(sb);
2725	return err;
2726}
2727
2728static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
2729{
2730	struct super_block *sb = dentry->d_sb;
2731	struct ext3_sb_info *sbi = EXT3_SB(sb);
2732	struct ext3_super_block *es = sbi->s_es;
2733	u64 fsid;
2734
2735	if (test_opt(sb, MINIX_DF)) {
2736		sbi->s_overhead_last = 0;
2737	} else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
2738		unsigned long ngroups = sbi->s_groups_count, i;
2739		ext3_fsblk_t overhead = 0;
2740		smp_rmb();
2741
2742		/*
2743		 * Compute the overhead (FS structures).  This is constant
2744		 * for a given filesystem unless the number of block groups
2745		 * changes so we cache the previous value until it does.
2746		 */
2747
2748		/*
2749		 * All of the blocks before first_data_block are
2750		 * overhead
2751		 */
2752		overhead = le32_to_cpu(es->s_first_data_block);
2753
2754		/*
2755		 * Add the overhead attributed to the superblock and
2756		 * block group descriptors.  If the sparse superblocks
2757		 * feature is turned on, then not all groups have this.
2758		 */
2759		for (i = 0; i < ngroups; i++) {
2760			overhead += ext3_bg_has_super(sb, i) +
2761				ext3_bg_num_gdb(sb, i);
2762			cond_resched();
2763		}
2764
2765		/*
2766		 * Every block group has an inode bitmap, a block
2767		 * bitmap, and an inode table.
2768		 */
2769		overhead += ngroups * (2 + sbi->s_itb_per_group);
2770		sbi->s_overhead_last = overhead;
2771		smp_wmb();
2772		sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
2773	}
2774
2775	buf->f_type = EXT3_SUPER_MAGIC;
2776	buf->f_bsize = sb->s_blocksize;
2777	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last;
2778	buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
2779	buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
2780	if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
2781		buf->f_bavail = 0;
2782	buf->f_files = le32_to_cpu(es->s_inodes_count);
2783	buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
2784	buf->f_namelen = EXT3_NAME_LEN;
2785	fsid = le64_to_cpup((void *)es->s_uuid) ^
2786	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
2787	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
2788	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
2789	return 0;
2790}
2791
2792/* Helper function for writing quotas on sync - we need to start transaction before quota file
2793 * is locked for write. Otherwise the are possible deadlocks:
2794 * Process 1                         Process 2
2795 * ext3_create()                     quota_sync()
2796 *   journal_start()                   write_dquot()
2797 *   dquot_initialize()                       down(dqio_mutex)
2798 *     down(dqio_mutex)                    journal_start()
2799 *
2800 */
2801
2802#ifdef CONFIG_QUOTA
2803
2804static inline struct inode *dquot_to_inode(struct dquot *dquot)
2805{
2806	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
2807}
2808
2809static int ext3_write_dquot(struct dquot *dquot)
2810{
2811	int ret, err;
2812	handle_t *handle;
2813	struct inode *inode;
2814
2815	inode = dquot_to_inode(dquot);
2816	handle = ext3_journal_start(inode,
2817					EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
2818	if (IS_ERR(handle))
2819		return PTR_ERR(handle);
2820	ret = dquot_commit(dquot);
2821	err = ext3_journal_stop(handle);
2822	if (!ret)
2823		ret = err;
2824	return ret;
2825}
2826
2827static int ext3_acquire_dquot(struct dquot *dquot)
2828{
2829	int ret, err;
2830	handle_t *handle;
2831
2832	handle = ext3_journal_start(dquot_to_inode(dquot),
2833					EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
2834	if (IS_ERR(handle))
2835		return PTR_ERR(handle);
2836	ret = dquot_acquire(dquot);
2837	err = ext3_journal_stop(handle);
2838	if (!ret)
2839		ret = err;
2840	return ret;
2841}
2842
2843static int ext3_release_dquot(struct dquot *dquot)
2844{
2845	int ret, err;
2846	handle_t *handle;
2847
2848	handle = ext3_journal_start(dquot_to_inode(dquot),
2849					EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
2850	if (IS_ERR(handle)) {
2851		/* Release dquot anyway to avoid endless cycle in dqput() */
2852		dquot_release(dquot);
2853		return PTR_ERR(handle);
2854	}
2855	ret = dquot_release(dquot);
2856	err = ext3_journal_stop(handle);
2857	if (!ret)
2858		ret = err;
2859	return ret;
2860}
2861
2862static int ext3_mark_dquot_dirty(struct dquot *dquot)
2863{
2864	/* Are we journaling quotas? */
2865	if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
2866	    EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
2867		dquot_mark_dquot_dirty(dquot);
2868		return ext3_write_dquot(dquot);
2869	} else {
2870		return dquot_mark_dquot_dirty(dquot);
2871	}
2872}
2873
2874static int ext3_write_info(struct super_block *sb, int type)
2875{
2876	int ret, err;
2877	handle_t *handle;
2878
2879	/* Data block + inode block */
2880	handle = ext3_journal_start(sb->s_root->d_inode, 2);
2881	if (IS_ERR(handle))
2882		return PTR_ERR(handle);
2883	ret = dquot_commit_info(sb, type);
2884	err = ext3_journal_stop(handle);
2885	if (!ret)
2886		ret = err;
2887	return ret;
2888}
2889
2890/*
2891 * Turn on quotas during mount time - we need to find
2892 * the quota file and such...
2893 */
2894static int ext3_quota_on_mount(struct super_block *sb, int type)
2895{
2896	return dquot_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
2897					EXT3_SB(sb)->s_jquota_fmt, type);
2898}
2899
2900/*
2901 * Standard function to be called on quota_on
2902 */
2903static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2904			 struct path *path)
2905{
2906	int err;
2907
2908	if (!test_opt(sb, QUOTA))
2909		return -EINVAL;
2910
2911	/* Quotafile not on the same filesystem? */
2912	if (path->dentry->d_sb != sb)
2913		return -EXDEV;
2914	/* Journaling quota? */
2915	if (EXT3_SB(sb)->s_qf_names[type]) {
2916		/* Quotafile not of fs root? */
2917		if (path->dentry->d_parent != sb->s_root)
2918			ext3_msg(sb, KERN_WARNING,
2919				"warning: Quota file not on filesystem root. "
2920				"Journaled quota will not work.");
2921	}
2922
2923	/*
2924	 * When we journal data on quota file, we have to flush journal to see
2925	 * all updates to the file when we bypass pagecache...
2926	 */
2927	if (ext3_should_journal_data(path->dentry->d_inode)) {
2928		/*
2929		 * We don't need to lock updates but journal_flush() could
2930		 * otherwise be livelocked...
2931		 */
2932		journal_lock_updates(EXT3_SB(sb)->s_journal);
2933		err = journal_flush(EXT3_SB(sb)->s_journal);
2934		journal_unlock_updates(EXT3_SB(sb)->s_journal);
2935		if (err)
2936			return err;
2937	}
2938
2939	return dquot_quota_on(sb, type, format_id, path);
2940}
2941
2942/* Read data from quotafile - avoid pagecache and such because we cannot afford
2943 * acquiring the locks... As quota files are never truncated and quota code
2944 * itself serializes the operations (and no one else should touch the files)
2945 * we don't have to be afraid of races */
2946static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
2947			       size_t len, loff_t off)
2948{
2949	struct inode *inode = sb_dqopt(sb)->files[type];
2950	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
2951	int err = 0;
2952	int offset = off & (sb->s_blocksize - 1);
2953	int tocopy;
2954	size_t toread;
2955	struct buffer_head *bh;
2956	loff_t i_size = i_size_read(inode);
2957
2958	if (off > i_size)
2959		return 0;
2960	if (off+len > i_size)
2961		len = i_size-off;
2962	toread = len;
2963	while (toread > 0) {
2964		tocopy = sb->s_blocksize - offset < toread ?
2965				sb->s_blocksize - offset : toread;
2966		bh = ext3_bread(NULL, inode, blk, 0, &err);
2967		if (err)
2968			return err;
2969		if (!bh)	/* A hole? */
2970			memset(data, 0, tocopy);
2971		else
2972			memcpy(data, bh->b_data+offset, tocopy);
2973		brelse(bh);
2974		offset = 0;
2975		toread -= tocopy;
2976		data += tocopy;
2977		blk++;
2978	}
2979	return len;
2980}
2981
2982/* Write to quotafile (we know the transaction is already started and has
2983 * enough credits) */
2984static ssize_t ext3_quota_write(struct super_block *sb, int type,
2985				const char *data, size_t len, loff_t off)
2986{
2987	struct inode *inode = sb_dqopt(sb)->files[type];
2988	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
2989	int err = 0;
2990	int offset = off & (sb->s_blocksize - 1);
2991	int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL;
2992	struct buffer_head *bh;
2993	handle_t *handle = journal_current_handle();
2994
2995	if (!handle) {
2996		ext3_msg(sb, KERN_WARNING,
2997			"warning: quota write (off=%llu, len=%llu)"
2998			" cancelled because transaction is not started.",
2999			(unsigned long long)off, (unsigned long long)len);
3000		return -EIO;
3001	}
3002
3003	/*
3004	 * Since we account only one data block in transaction credits,
3005	 * then it is impossible to cross a block boundary.
3006	 */
3007	if (sb->s_blocksize - offset < len) {
3008		ext3_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
3009			" cancelled because not block aligned",
3010			(unsigned long long)off, (unsigned long long)len);
3011		return -EIO;
3012	}
3013	bh = ext3_bread(handle, inode, blk, 1, &err);
3014	if (!bh)
3015		goto out;
3016	if (journal_quota) {
3017		err = ext3_journal_get_write_access(handle, bh);
3018		if (err) {
3019			brelse(bh);
3020			goto out;
3021		}
3022	}
3023	lock_buffer(bh);
3024	memcpy(bh->b_data+offset, data, len);
3025	flush_dcache_page(bh->b_page);
3026	unlock_buffer(bh);
3027	if (journal_quota)
3028		err = ext3_journal_dirty_metadata(handle, bh);
3029	else {
3030		/* Always do at least ordered writes for quotas */
3031		err = ext3_journal_dirty_data(handle, bh);
3032		mark_buffer_dirty(bh);
3033	}
3034	brelse(bh);
3035out:
3036	if (err)
3037		return err;
3038	if (inode->i_size < off + len) {
3039		i_size_write(inode, off + len);
3040		EXT3_I(inode)->i_disksize = inode->i_size;
3041	}
3042	inode->i_version++;
3043	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3044	ext3_mark_inode_dirty(handle, inode);
3045	return len;
3046}
3047
3048#endif
3049
3050static struct dentry *ext3_mount(struct file_system_type *fs_type,
3051	int flags, const char *dev_name, void *data)
3052{
3053	return mount_bdev(fs_type, flags, dev_name, data, ext3_fill_super);
3054}
3055
3056static struct file_system_type ext3_fs_type = {
3057	.owner		= THIS_MODULE,
3058	.name		= "ext3",
3059	.mount		= ext3_mount,
3060	.kill_sb	= kill_block_super,
3061	.fs_flags	= FS_REQUIRES_DEV,
3062};
3063
3064static int __init init_ext3_fs(void)
3065{
3066	int err = init_ext3_xattr();
3067	if (err)
3068		return err;
3069	err = init_inodecache();
3070	if (err)
3071		goto out1;
3072        err = register_filesystem(&ext3_fs_type);
3073	if (err)
3074		goto out;
3075	return 0;
3076out:
3077	destroy_inodecache();
3078out1:
3079	exit_ext3_xattr();
3080	return err;
3081}
3082
3083static void __exit exit_ext3_fs(void)
3084{
3085	unregister_filesystem(&ext3_fs_type);
3086	destroy_inodecache();
3087	exit_ext3_xattr();
3088}
3089
3090MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
3091MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
3092MODULE_LICENSE("GPL");
3093module_init(init_ext3_fs)
3094module_exit(exit_ext3_fs)