Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ext4: prevent parallel resizers by atomic bit ops

Before this patch, parallel resizers are allowed and protected by a
mutex lock, actually, there is no need to support parallel resizer, so
this patch prevents parallel resizers by atmoic bit ops, like
lock_page() and unlock_page() do.

To do this, the patch removed the mutex lock s_resize_lock from struct
ext4_sb_info and added a unsigned long field named s_resize_flags
which inidicates if there is a resizer.

Signed-off-by: Yongqiang Yang <xiaoqiangnk@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

authored by

Yongqiang Yang and committed by
Theodore Ts'o
8f82f840 2d859db3

+36 -40
+6 -1
fs/ext4/ext4.h
··· 1127 1127 struct journal_s *s_journal; 1128 1128 struct list_head s_orphan; 1129 1129 struct mutex s_orphan_lock; 1130 - struct mutex s_resize_lock; 1130 + unsigned long s_resize_flags; /* Flags indicating if there 1131 + is a resizer */ 1131 1132 unsigned long s_commit_interval; 1132 1133 u32 s_max_batch_time; 1133 1134 u32 s_min_batch_time; ··· 2269 2268 EXT4_WQ_HASH_SZ]) 2270 2269 extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; 2271 2270 extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; 2271 + 2272 + #define EXT4_RESIZING 0 2273 + extern int ext4_resize_begin(struct super_block *sb); 2274 + extern void ext4_resize_end(struct super_block *sb); 2272 2275 2273 2276 #endif /* __KERNEL__ */ 2274 2277
+8 -4
fs/ext4/ioctl.c
··· 202 202 struct super_block *sb = inode->i_sb; 203 203 int err, err2=0; 204 204 205 - if (!capable(CAP_SYS_RESOURCE)) 206 - return -EPERM; 205 + err = ext4_resize_begin(sb); 206 + if (err) 207 + return err; 207 208 208 209 if (get_user(n_blocks_count, (__u32 __user *)arg)) 209 210 return -EFAULT; ··· 222 221 if (err == 0) 223 222 err = err2; 224 223 mnt_drop_write(filp->f_path.mnt); 224 + ext4_resize_end(sb); 225 225 226 226 return err; 227 227 } ··· 273 271 struct super_block *sb = inode->i_sb; 274 272 int err, err2=0; 275 273 276 - if (!capable(CAP_SYS_RESOURCE)) 277 - return -EPERM; 274 + err = ext4_resize_begin(sb); 275 + if (err) 276 + return err; 278 277 279 278 if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, 280 279 sizeof(input))) ··· 294 291 if (err == 0) 295 292 err = err2; 296 293 mnt_drop_write(filp->f_path.mnt); 294 + ext4_resize_end(sb); 297 295 298 296 return err; 299 297 }
+21 -34
fs/ext4/resize.c
··· 16 16 17 17 #include "ext4_jbd2.h" 18 18 19 + int ext4_resize_begin(struct super_block *sb) 20 + { 21 + int ret = 0; 22 + 23 + if (!capable(CAP_SYS_RESOURCE)) 24 + return -EPERM; 25 + 26 + if (test_and_set_bit_lock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags)) 27 + ret = -EBUSY; 28 + 29 + return ret; 30 + } 31 + 32 + void ext4_resize_end(struct super_block *sb) 33 + { 34 + clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags); 35 + smp_mb__after_clear_bit(); 36 + } 37 + 19 38 #define outside(b, first, last) ((b) < (first) || (b) >= (last)) 20 39 #define inside(b, first, last) ((b) >= (first) && (b) < (last)) 21 40 ··· 200 181 if (IS_ERR(handle)) 201 182 return PTR_ERR(handle); 202 183 203 - mutex_lock(&sbi->s_resize_lock); 204 - if (input->group != sbi->s_groups_count) { 205 - err = -EBUSY; 206 - goto exit_journal; 207 - } 184 + BUG_ON(input->group != sbi->s_groups_count); 208 185 209 186 if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) { 210 187 err = PTR_ERR(bh); ··· 300 285 brelse(bh); 301 286 302 287 exit_journal: 303 - mutex_unlock(&sbi->s_resize_lock); 304 288 if ((err2 = ext4_journal_stop(handle)) && !err) 305 289 err = err2; 306 290 ··· 813 799 goto exit_put; 814 800 } 815 801 816 - mutex_lock(&sbi->s_resize_lock); 817 - if (input->group != sbi->s_groups_count) { 818 - ext4_warning(sb, "multiple resizers run on filesystem!"); 819 - err = -EBUSY; 820 - goto exit_journal; 821 - } 822 - 823 802 if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) 824 803 goto exit_journal; 825 804 ··· 836 829 /* 837 830 * OK, now we've set up the new group. Time to make it active. 838 831 * 839 - * We do not lock all allocations via s_resize_lock 840 832 * so we have to be safe wrt. concurrent accesses the group 841 833 * data. So we need to be careful to set all of the relevant 842 834 * group descriptor data etc. *before* we enable the group. ··· 892 886 * 893 887 * The precise rules we use are: 894 888 * 895 - * * Writers of s_groups_count *must* hold s_resize_lock 896 - * AND 897 889 * * Writers must perform a smp_wmb() after updating all dependent 898 890 * data and before modifying the groups count 899 891 * 900 - * * Readers must hold s_resize_lock over the access 901 - * OR 902 892 * * Readers must perform an smp_rmb() after reading the groups count 903 893 * and before reading any dependent data. 904 894 * ··· 939 937 ext4_handle_dirty_super(handle, sb); 940 938 941 939 exit_journal: 942 - mutex_unlock(&sbi->s_resize_lock); 943 940 if ((err2 = ext4_journal_stop(handle)) && !err) 944 941 err = err2; 945 942 if (!err) { ··· 973 972 int err; 974 973 ext4_group_t group; 975 974 976 - /* We don't need to worry about locking wrt other resizers just 977 - * yet: we're going to revalidate es->s_blocks_count after 978 - * taking the s_resize_lock below. */ 979 975 o_blocks_count = ext4_blocks_count(es); 980 976 981 977 if (test_opt(sb, DEBUG)) ··· 993 995 994 996 if (n_blocks_count < o_blocks_count) { 995 997 ext4_warning(sb, "can't shrink FS - resize aborted"); 996 - return -EBUSY; 998 + return -EINVAL; 997 999 } 998 1000 999 1001 /* Handle the remaining blocks in the last group only. */ ··· 1036 1038 goto exit_put; 1037 1039 } 1038 1040 1039 - mutex_lock(&EXT4_SB(sb)->s_resize_lock); 1040 - if (o_blocks_count != ext4_blocks_count(es)) { 1041 - ext4_warning(sb, "multiple resizers run on filesystem!"); 1042 - mutex_unlock(&EXT4_SB(sb)->s_resize_lock); 1043 - ext4_journal_stop(handle); 1044 - err = -EBUSY; 1045 - goto exit_put; 1046 - } 1047 - 1048 1041 if ((err = ext4_journal_get_write_access(handle, 1049 1042 EXT4_SB(sb)->s_sbh))) { 1050 1043 ext4_warning(sb, "error %d on journal write access", err); 1051 - mutex_unlock(&EXT4_SB(sb)->s_resize_lock); 1052 1044 ext4_journal_stop(handle); 1053 1045 goto exit_put; 1054 1046 } 1055 1047 ext4_blocks_count_set(es, o_blocks_count + add); 1056 - mutex_unlock(&EXT4_SB(sb)->s_resize_lock); 1057 1048 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, 1058 1049 o_blocks_count + add); 1059 1050 /* We add the blocks to the bitmap and set the group need init bit */
+1 -1
fs/ext4/super.c
··· 3500 3500 3501 3501 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 3502 3502 mutex_init(&sbi->s_orphan_lock); 3503 - mutex_init(&sbi->s_resize_lock); 3503 + sbi->s_resize_flags = 0; 3504 3504 3505 3505 sb->s_root = NULL; 3506 3506