filesystem freeze: add error handling of write_super_lockfs/unlockfs

Currently, ext3 in mainline Linux doesn't have the freeze feature which
suspends write requests. So, we cannot take a backup which keeps the
filesystem's consistency with the storage device's features (snapshot and
replication) while it is mounted.

In many case, a commercial filesystem (e.g. VxFS) has the freeze feature
and it would be used to get the consistent backup.

If Linux's standard filesystem ext3 has the freeze feature, we can do it
without a commercial filesystem.

So I have implemented the ioctls of the freeze feature.
I think we can take the consistent backup with the following steps.
1. Freeze the filesystem with the freeze ioctl.
2. Separate the replication volume or create the snapshot
with the storage device's feature.
3. Unfreeze the filesystem with the unfreeze ioctl.
4. Take the backup from the separated replication volume
or the snapshot.

This patch:

VFS:
Changed the type of write_super_lockfs and unlockfs from "void"
to "int" so that they can return an error.
Rename write_super_lockfs and unlockfs of the super block operation
freeze_fs and unfreeze_fs to avoid a confusion.

ext3, ext4, xfs, gfs2, jfs:
Changed the type of write_super_lockfs and unlockfs from "void"
to "int" so that write_super_lockfs returns an error if needed,
and unlockfs always returns 0.

reiserfs:
Changed the type of write_super_lockfs and unlockfs from "void"
to "int" so that they always return 0 (success) to keep a current behavior.

Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
Signed-off-by: Masayuki Hamaguchi <m-hamaguchi@ys.jp.nec.com>
Cc: <xfs-masters@oss.sgi.com>
Cc: <linux-ext4@vger.kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Takashi Sato and committed by
Linus Torvalds
c4be0c1d 69347a23

+107 -68
+4 -4
Documentation/filesystems/Locking
··· 97 97 void (*put_super) (struct super_block *); 98 98 void (*write_super) (struct super_block *); 99 99 int (*sync_fs)(struct super_block *sb, int wait); 100 - void (*write_super_lockfs) (struct super_block *); 101 - void (*unlockfs) (struct super_block *); 100 + int (*freeze_fs) (struct super_block *); 101 + int (*unfreeze_fs) (struct super_block *); 102 102 int (*statfs) (struct dentry *, struct kstatfs *); 103 103 int (*remount_fs) (struct super_block *, int *, char *); 104 104 void (*clear_inode) (struct inode *); ··· 119 119 put_super: yes yes no 120 120 write_super: no yes read 121 121 sync_fs: no no read 122 - write_super_lockfs: ? 123 - unlockfs: ? 122 + freeze_fs: ? 123 + unfreeze_fs: ? 124 124 statfs: no no no 125 125 remount_fs: yes yes maybe (see below) 126 126 clear_inode: no
+4 -4
Documentation/filesystems/vfs.txt
··· 210 210 void (*put_super) (struct super_block *); 211 211 void (*write_super) (struct super_block *); 212 212 int (*sync_fs)(struct super_block *sb, int wait); 213 - void (*write_super_lockfs) (struct super_block *); 214 - void (*unlockfs) (struct super_block *); 213 + int (*freeze_fs) (struct super_block *); 214 + int (*unfreeze_fs) (struct super_block *); 215 215 int (*statfs) (struct dentry *, struct kstatfs *); 216 216 int (*remount_fs) (struct super_block *, int *, char *); 217 217 void (*clear_inode) (struct inode *); ··· 270 270 a superblock. The second parameter indicates whether the method 271 271 should wait until the write out has been completed. Optional. 272 272 273 - write_super_lockfs: called when VFS is locking a filesystem and 273 + freeze_fs: called when VFS is locking a filesystem and 274 274 forcing it into a consistent state. This method is currently 275 275 used by the Logical Volume Manager (LVM). 276 276 277 - unlockfs: called when VFS is unlocking a filesystem and making it writable 277 + unfreeze_fs: called when VFS is unlocking a filesystem and making it writable 278 278 again. 279 279 280 280 statfs: called when the VFS needs to get filesystem statistics. This
+4 -4
fs/buffer.c
··· 221 221 222 222 sync_blockdev(sb->s_bdev); 223 223 224 - if (sb->s_op->write_super_lockfs) 225 - sb->s_op->write_super_lockfs(sb); 224 + if (sb->s_op->freeze_fs) 225 + sb->s_op->freeze_fs(sb); 226 226 } 227 227 228 228 sync_blockdev(bdev); ··· 242 242 if (sb) { 243 243 BUG_ON(sb->s_bdev != bdev); 244 244 245 - if (sb->s_op->unlockfs) 246 - sb->s_op->unlockfs(sb); 245 + if (sb->s_op->unfreeze_fs) 246 + sb->s_op->unfreeze_fs(sb); 247 247 sb->s_frozen = SB_UNFROZEN; 248 248 smp_wmb(); 249 249 wake_up(&sb->s_wait_unfrozen);
+29 -16
fs/ext3/super.c
··· 48 48 unsigned long journal_devnum); 49 49 static int ext3_create_journal(struct super_block *, struct ext3_super_block *, 50 50 unsigned int); 51 - static void ext3_commit_super (struct super_block * sb, 52 - struct ext3_super_block * es, 51 + static int ext3_commit_super(struct super_block *sb, 52 + struct ext3_super_block *es, 53 53 int sync); 54 54 static void ext3_mark_recovery_complete(struct super_block * sb, 55 55 struct ext3_super_block * es); ··· 60 60 char nbuf[16]); 61 61 static int ext3_remount (struct super_block * sb, int * flags, char * data); 62 62 static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf); 63 - static void ext3_unlockfs(struct super_block *sb); 63 + static int ext3_unfreeze(struct super_block *sb); 64 64 static void ext3_write_super (struct super_block * sb); 65 - static void ext3_write_super_lockfs(struct super_block *sb); 65 + static int ext3_freeze(struct super_block *sb); 66 66 67 67 /* 68 68 * Wrappers for journal_start/end. ··· 759 759 .put_super = ext3_put_super, 760 760 .write_super = ext3_write_super, 761 761 .sync_fs = ext3_sync_fs, 762 - .write_super_lockfs = ext3_write_super_lockfs, 763 - .unlockfs = ext3_unlockfs, 762 + .freeze_fs = ext3_freeze, 763 + .unfreeze_fs = ext3_unfreeze, 764 764 .statfs = ext3_statfs, 765 765 .remount_fs = ext3_remount, 766 766 .clear_inode = ext3_clear_inode, ··· 2311 2311 return 0; 2312 2312 } 2313 2313 2314 - static void ext3_commit_super (struct super_block * sb, 2315 - struct ext3_super_block * es, 2314 + static int ext3_commit_super(struct super_block *sb, 2315 + struct ext3_super_block *es, 2316 2316 int sync) 2317 2317 { 2318 2318 struct buffer_head *sbh = EXT3_SB(sb)->s_sbh; 2319 + int error = 0; 2319 2320 2320 2321 if (!sbh) 2321 - return; 2322 + return error; 2322 2323 es->s_wtime = cpu_to_le32(get_seconds()); 2323 2324 es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb)); 2324 2325 es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb)); 2325 2326 BUFFER_TRACE(sbh, "marking dirty"); 2326 2327 mark_buffer_dirty(sbh); 2327 2328 if (sync) 2328 - sync_dirty_buffer(sbh); 2329 + error = sync_dirty_buffer(sbh); 2330 + return error; 2329 2331 } 2330 2332 2331 2333 ··· 2441 2439 * LVM calls this function before a (read-only) snapshot is created. This 2442 2440 * gives us a chance to flush the journal completely and mark the fs clean. 2443 2441 */ 2444 - static void ext3_write_super_lockfs(struct super_block *sb) 2442 + static int ext3_freeze(struct super_block *sb) 2445 2443 { 2444 + int error = 0; 2445 + journal_t *journal; 2446 2446 sb->s_dirt = 0; 2447 2447 2448 2448 if (!(sb->s_flags & MS_RDONLY)) { 2449 - journal_t *journal = EXT3_SB(sb)->s_journal; 2449 + journal = EXT3_SB(sb)->s_journal; 2450 2450 2451 2451 /* Now we set up the journal barrier. */ 2452 2452 journal_lock_updates(journal); ··· 2457 2453 * We don't want to clear needs_recovery flag when we failed 2458 2454 * to flush the journal. 2459 2455 */ 2460 - if (journal_flush(journal) < 0) 2461 - return; 2456 + error = journal_flush(journal); 2457 + if (error < 0) 2458 + goto out; 2462 2459 2463 2460 /* Journal blocked and flushed, clear needs_recovery flag. */ 2464 2461 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2465 - ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); 2462 + error = ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); 2463 + if (error) 2464 + goto out; 2466 2465 } 2466 + return 0; 2467 + 2468 + out: 2469 + journal_unlock_updates(journal); 2470 + return error; 2467 2471 } 2468 2472 2469 2473 /* 2470 2474 * Called by LVM after the snapshot is done. We need to reset the RECOVER 2471 2475 * flag here, even though the filesystem is not technically dirty yet. 2472 2476 */ 2473 - static void ext3_unlockfs(struct super_block *sb) 2477 + static int ext3_unfreeze(struct super_block *sb) 2474 2478 { 2475 2479 if (!(sb->s_flags & MS_RDONLY)) { 2476 2480 lock_super(sb); ··· 2488 2476 unlock_super(sb); 2489 2477 journal_unlock_updates(EXT3_SB(sb)->s_journal); 2490 2478 } 2479 + return 0; 2491 2480 } 2492 2481 2493 2482 static int ext3_remount (struct super_block * sb, int * flags, char * data)
+31 -14
fs/ext4/super.c
··· 51 51 52 52 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 53 53 unsigned long journal_devnum); 54 - static void ext4_commit_super(struct super_block *sb, 54 + static int ext4_commit_super(struct super_block *sb, 55 55 struct ext4_super_block *es, int sync); 56 56 static void ext4_mark_recovery_complete(struct super_block *sb, 57 57 struct ext4_super_block *es); ··· 62 62 char nbuf[16]); 63 63 static int ext4_remount(struct super_block *sb, int *flags, char *data); 64 64 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 65 - static void ext4_unlockfs(struct super_block *sb); 65 + static int ext4_unfreeze(struct super_block *sb); 66 66 static void ext4_write_super(struct super_block *sb); 67 - static void ext4_write_super_lockfs(struct super_block *sb); 67 + static int ext4_freeze(struct super_block *sb); 68 68 69 69 70 70 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, ··· 978 978 .put_super = ext4_put_super, 979 979 .write_super = ext4_write_super, 980 980 .sync_fs = ext4_sync_fs, 981 - .write_super_lockfs = ext4_write_super_lockfs, 982 - .unlockfs = ext4_unlockfs, 981 + .freeze_fs = ext4_freeze, 982 + .unfreeze_fs = ext4_unfreeze, 983 983 .statfs = ext4_statfs, 984 984 .remount_fs = ext4_remount, 985 985 .clear_inode = ext4_clear_inode, ··· 2888 2888 return 0; 2889 2889 } 2890 2890 2891 - static void ext4_commit_super(struct super_block *sb, 2891 + static int ext4_commit_super(struct super_block *sb, 2892 2892 struct ext4_super_block *es, int sync) 2893 2893 { 2894 2894 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 2895 + int error = 0; 2895 2896 2896 2897 if (!sbh) 2897 - return; 2898 + return error; 2898 2899 if (buffer_write_io_error(sbh)) { 2899 2900 /* 2900 2901 * Oh, dear. A previous attempt to write the ··· 2919 2918 BUFFER_TRACE(sbh, "marking dirty"); 2920 2919 mark_buffer_dirty(sbh); 2921 2920 if (sync) { 2922 - sync_dirty_buffer(sbh); 2923 - if (buffer_write_io_error(sbh)) { 2921 + error = sync_dirty_buffer(sbh); 2922 + if (error) 2923 + return error; 2924 + 2925 + error = buffer_write_io_error(sbh); 2926 + if (error) { 2924 2927 printk(KERN_ERR "EXT4-fs: I/O error while writing " 2925 2928 "superblock for %s.\n", sb->s_id); 2926 2929 clear_buffer_write_io_error(sbh); 2927 2930 set_buffer_uptodate(sbh); 2928 2931 } 2929 2932 } 2933 + return error; 2930 2934 } 2931 2935 2932 2936 ··· 3064 3058 * LVM calls this function before a (read-only) snapshot is created. This 3065 3059 * gives us a chance to flush the journal completely and mark the fs clean. 3066 3060 */ 3067 - static void ext4_write_super_lockfs(struct super_block *sb) 3061 + static int ext4_freeze(struct super_block *sb) 3068 3062 { 3063 + int error = 0; 3064 + journal_t *journal; 3069 3065 sb->s_dirt = 0; 3070 3066 3071 3067 if (!(sb->s_flags & MS_RDONLY)) { 3072 - journal_t *journal = EXT4_SB(sb)->s_journal; 3068 + journal = EXT4_SB(sb)->s_journal; 3073 3069 3074 3070 if (journal) { 3075 3071 /* Now we set up the journal barrier. */ ··· 3081 3073 * We don't want to clear needs_recovery flag when we 3082 3074 * failed to flush the journal. 3083 3075 */ 3084 - if (jbd2_journal_flush(journal) < 0) 3085 - return; 3076 + error = jbd2_journal_flush(journal); 3077 + if (error < 0) 3078 + goto out; 3086 3079 } 3087 3080 3088 3081 /* Journal blocked and flushed, clear needs_recovery flag. */ 3089 3082 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3090 3083 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 3084 + error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 3085 + if (error) 3086 + goto out; 3091 3087 } 3088 + return 0; 3089 + out: 3090 + jbd2_journal_unlock_updates(journal); 3091 + return error; 3092 3092 } 3093 3093 3094 3094 /* 3095 3095 * Called by LVM after the snapshot is done. We need to reset the RECOVER 3096 3096 * flag here, even though the filesystem is not technically dirty yet. 3097 3097 */ 3098 - static void ext4_unlockfs(struct super_block *sb) 3098 + static int ext4_unfreeze(struct super_block *sb) 3099 3099 { 3100 3100 if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) { 3101 3101 lock_super(sb); ··· 3113 3097 unlock_super(sb); 3114 3098 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3115 3099 } 3100 + return 0; 3116 3101 } 3117 3102 3118 3103 static int ext4_remount(struct super_block *sb, int *flags, char *data)
+9 -7
fs/gfs2/ops_super.c
··· 211 211 } 212 212 213 213 /** 214 - * gfs2_write_super_lockfs - prevent further writes to the filesystem 214 + * gfs2_freeze - prevent further writes to the filesystem 215 215 * @sb: the VFS structure for the filesystem 216 216 * 217 217 */ 218 218 219 - static void gfs2_write_super_lockfs(struct super_block *sb) 219 + static int gfs2_freeze(struct super_block *sb) 220 220 { 221 221 struct gfs2_sbd *sdp = sb->s_fs_info; 222 222 int error; 223 223 224 224 if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) 225 - return; 225 + return -EINVAL; 226 226 227 227 for (;;) { 228 228 error = gfs2_freeze_fs(sdp); ··· 242 242 fs_err(sdp, "retrying...\n"); 243 243 msleep(1000); 244 244 } 245 + return 0; 245 246 } 246 247 247 248 /** 248 - * gfs2_unlockfs - reallow writes to the filesystem 249 + * gfs2_unfreeze - reallow writes to the filesystem 249 250 * @sb: the VFS structure for the filesystem 250 251 * 251 252 */ 252 253 253 - static void gfs2_unlockfs(struct super_block *sb) 254 + static int gfs2_unfreeze(struct super_block *sb) 254 255 { 255 256 gfs2_unfreeze_fs(sb->s_fs_info); 257 + return 0; 256 258 } 257 259 258 260 /** ··· 690 688 .put_super = gfs2_put_super, 691 689 .write_super = gfs2_write_super, 692 690 .sync_fs = gfs2_sync_fs, 693 - .write_super_lockfs = gfs2_write_super_lockfs, 694 - .unlockfs = gfs2_unlockfs, 691 + .freeze_fs = gfs2_freeze, 692 + .unfreeze_fs = gfs2_unfreeze, 695 693 .statfs = gfs2_statfs, 696 694 .remount_fs = gfs2_remount_fs, 697 695 .clear_inode = gfs2_clear_inode,
+6 -4
fs/jfs/super.c
··· 543 543 return ret; 544 544 } 545 545 546 - static void jfs_write_super_lockfs(struct super_block *sb) 546 + static int jfs_freeze(struct super_block *sb) 547 547 { 548 548 struct jfs_sb_info *sbi = JFS_SBI(sb); 549 549 struct jfs_log *log = sbi->log; ··· 553 553 lmLogShutdown(log); 554 554 updateSuper(sb, FM_CLEAN); 555 555 } 556 + return 0; 556 557 } 557 558 558 - static void jfs_unlockfs(struct super_block *sb) 559 + static int jfs_unfreeze(struct super_block *sb) 559 560 { 560 561 struct jfs_sb_info *sbi = JFS_SBI(sb); 561 562 struct jfs_log *log = sbi->log; ··· 569 568 else 570 569 txResume(sb); 571 570 } 571 + return 0; 572 572 } 573 573 574 574 static int jfs_get_sb(struct file_system_type *fs_type, ··· 737 735 .delete_inode = jfs_delete_inode, 738 736 .put_super = jfs_put_super, 739 737 .sync_fs = jfs_sync_fs, 740 - .write_super_lockfs = jfs_write_super_lockfs, 741 - .unlockfs = jfs_unlockfs, 738 + .freeze_fs = jfs_freeze, 739 + .unfreeze_fs = jfs_unfreeze, 742 740 .statfs = jfs_statfs, 743 741 .remount_fs = jfs_remount, 744 742 .show_options = jfs_show_options,
+6 -4
fs/reiserfs/super.c
··· 83 83 reiserfs_sync_fs(s, 1); 84 84 } 85 85 86 - static void reiserfs_write_super_lockfs(struct super_block *s) 86 + static int reiserfs_freeze(struct super_block *s) 87 87 { 88 88 struct reiserfs_transaction_handle th; 89 89 reiserfs_write_lock(s); ··· 101 101 } 102 102 s->s_dirt = 0; 103 103 reiserfs_write_unlock(s); 104 + return 0; 104 105 } 105 106 106 - static void reiserfs_unlockfs(struct super_block *s) 107 + static int reiserfs_unfreeze(struct super_block *s) 107 108 { 108 109 reiserfs_allow_writes(s); 110 + return 0; 109 111 } 110 112 111 113 extern const struct in_core_key MAX_IN_CORE_KEY; ··· 615 613 .put_super = reiserfs_put_super, 616 614 .write_super = reiserfs_write_super, 617 615 .sync_fs = reiserfs_sync_fs, 618 - .write_super_lockfs = reiserfs_write_super_lockfs, 619 - .unlockfs = reiserfs_unlockfs, 616 + .freeze_fs = reiserfs_freeze, 617 + .unfreeze_fs = reiserfs_unfreeze, 620 618 .statfs = reiserfs_statfs, 621 619 .remount_fs = reiserfs_remount, 622 620 .show_options = generic_show_options,
+4 -4
fs/xfs/linux-2.6/xfs_super.c
··· 1269 1269 * need to take care of the metadata. Once that's done write a dummy 1270 1270 * record to dirty the log in case of a crash while frozen. 1271 1271 */ 1272 - STATIC void 1273 - xfs_fs_lockfs( 1272 + STATIC int 1273 + xfs_fs_freeze( 1274 1274 struct super_block *sb) 1275 1275 { 1276 1276 struct xfs_mount *mp = XFS_M(sb); 1277 1277 1278 1278 xfs_quiesce_attr(mp); 1279 - xfs_fs_log_dummy(mp); 1279 + return -xfs_fs_log_dummy(mp); 1280 1280 } 1281 1281 1282 1282 STATIC int ··· 1557 1557 .put_super = xfs_fs_put_super, 1558 1558 .write_super = xfs_fs_write_super, 1559 1559 .sync_fs = xfs_fs_sync_super, 1560 - .write_super_lockfs = xfs_fs_lockfs, 1560 + .freeze_fs = xfs_fs_freeze, 1561 1561 .statfs = xfs_fs_statfs, 1562 1562 .remount_fs = xfs_fs_remount, 1563 1563 .show_options = xfs_fs_show_options,
+7 -4
fs/xfs/xfs_fsops.c
··· 595 595 return 0; 596 596 } 597 597 598 - void 598 + int 599 599 xfs_fs_log_dummy( 600 600 xfs_mount_t *mp) 601 601 { 602 602 xfs_trans_t *tp; 603 603 xfs_inode_t *ip; 604 + int error; 604 605 605 606 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); 606 - if (xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0)) { 607 + error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); 608 + if (error) { 607 609 xfs_trans_cancel(tp, 0); 608 - return; 610 + return error; 609 611 } 610 612 611 613 ip = mp->m_rootip; ··· 617 615 xfs_trans_ihold(tp, ip); 618 616 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 619 617 xfs_trans_set_sync(tp); 620 - xfs_trans_commit(tp, 0); 618 + error = xfs_trans_commit(tp, 0); 621 619 622 620 xfs_iunlock(ip, XFS_ILOCK_EXCL); 621 + return error; 623 622 } 624 623 625 624 int
+1 -1
fs/xfs/xfs_fsops.h
··· 25 25 extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, 26 26 xfs_fsop_resblks_t *outval); 27 27 extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); 28 - extern void xfs_fs_log_dummy(xfs_mount_t *mp); 28 + extern int xfs_fs_log_dummy(xfs_mount_t *mp); 29 29 30 30 #endif /* __XFS_FSOPS_H__ */
+2 -2
include/linux/fs.h
··· 1377 1377 void (*put_super) (struct super_block *); 1378 1378 void (*write_super) (struct super_block *); 1379 1379 int (*sync_fs)(struct super_block *sb, int wait); 1380 - void (*write_super_lockfs) (struct super_block *); 1381 - void (*unlockfs) (struct super_block *); 1380 + int (*freeze_fs) (struct super_block *); 1381 + int (*unfreeze_fs) (struct super_block *); 1382 1382 int (*statfs) (struct dentry *, struct kstatfs *); 1383 1383 int (*remount_fs) (struct super_block *, int *, char *); 1384 1384 void (*clear_inode) (struct inode *);