Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

+2 -2

fs/ext4/ext4_jbd2.h

··· 86 86 87 87 #ifdef CONFIG_QUOTA 88 88 /* Amount of blocks needed for quota update - we know that the structure was 89 - * allocated so we need to update only inode+data */ 90 - #define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) 89 + * allocated so we need to update only data block */ 90 + #define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0) 91 91 /* Amount of blocks needed for quota insert/delete - we do some block writes 92 92 * but inode, sb and group updates are done only once */ 93 93 #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\

+14 -3

fs/ext4/fsync.c

··· 125 125 * the parent directory's parent as well, and so on recursively, if 126 126 * they are also freshly created. 127 127 */ 128 - static void ext4_sync_parent(struct inode *inode) 128 + static int ext4_sync_parent(struct inode *inode) 129 129 { 130 + struct writeback_control wbc; 130 131 struct dentry *dentry = NULL; 132 + int ret = 0; 131 133 132 134 while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { 133 135 ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); ··· 138 136 if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) 139 137 break; 140 138 inode = dentry->d_parent->d_inode; 141 - sync_mapping_buffers(inode->i_mapping); 139 + ret = sync_mapping_buffers(inode->i_mapping); 140 + if (ret) 141 + break; 142 + memset(&wbc, 0, sizeof(wbc)); 143 + wbc.sync_mode = WB_SYNC_ALL; 144 + wbc.nr_to_write = 0; /* only write out the inode */ 145 + ret = sync_inode(inode, &wbc); 146 + if (ret) 147 + break; 142 148 } 149 + return ret; 143 150 } 144 151 145 152 /* ··· 187 176 if (!journal) { 188 177 ret = generic_file_fsync(file, datasync); 189 178 if (!ret && !list_empty(&inode->i_dentry)) 190 - ext4_sync_parent(inode); 179 + ret = ext4_sync_parent(inode); 191 180 goto out; 192 181 } 193 182

+23 -12

fs/ext4/inode.c

··· 2502 2502 * for partial write. 2503 2503 */ 2504 2504 set_buffer_new(bh); 2505 + set_buffer_mapped(bh); 2505 2506 } 2506 2507 return 0; 2507 2508 } ··· 4430 4429 Indirect chain[4]; 4431 4430 Indirect *partial; 4432 4431 __le32 nr = 0; 4433 - int n; 4434 - ext4_lblk_t last_block; 4432 + int n = 0; 4433 + ext4_lblk_t last_block, max_block; 4435 4434 unsigned blocksize = inode->i_sb->s_blocksize; 4436 4435 4437 4436 trace_ext4_truncate_enter(inode); ··· 4456 4455 4457 4456 last_block = (inode->i_size + blocksize-1) 4458 4457 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); 4458 + max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) 4459 + >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); 4459 4460 4460 4461 if (inode->i_size & (blocksize - 1)) 4461 4462 if (ext4_block_truncate_page(handle, mapping, inode->i_size)) 4462 4463 goto out_stop; 4463 4464 4464 - n = ext4_block_to_path(inode, last_block, offsets, NULL); 4465 - if (n == 0) 4466 - goto out_stop; /* error */ 4465 + if (last_block != max_block) { 4466 + n = ext4_block_to_path(inode, last_block, offsets, NULL); 4467 + if (n == 0) 4468 + goto out_stop; /* error */ 4469 + } 4467 4470 4468 4471 /* 4469 4472 * OK. This truncate is going to happen. We add the inode to the ··· 4498 4493 */ 4499 4494 ei->i_disksize = inode->i_size; 4500 4495 4501 - if (n == 1) { /* direct blocks */ 4496 + if (last_block == max_block) { 4497 + /* 4498 + * It is unnecessary to free any data blocks if last_block is 4499 + * equal to the indirect block limit. 4500 + */ 4501 + goto out_unlock; 4502 + } else if (n == 1) { /* direct blocks */ 4502 4503 ext4_free_data(handle, inode, NULL, i_data+offsets[0], 4503 4504 i_data + EXT4_NDIR_BLOCKS); 4504 4505 goto do_indirects; ··· 4564 4553 ; 4565 4554 } 4566 4555 4556 + out_unlock: 4567 4557 up_write(&ei->i_data_sem); 4568 4558 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4569 4559 ext4_mark_inode_dirty(handle, inode); ··· 5410 5398 /* if nrblocks are contiguous */ 5411 5399 if (chunk) { 5412 5400 /* 5413 - * With N contiguous data blocks, it need at most 5414 - * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks 5415 - * 2 dindirect blocks 5416 - * 1 tindirect block 5401 + * With N contiguous data blocks, we need at most 5402 + * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, 5403 + * 2 dindirect blocks, and 1 tindirect block 5417 5404 */ 5418 - indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); 5419 - return indirects + 3; 5405 + return DIV_ROUND_UP(nrblocks, 5406 + EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; 5420 5407 } 5421 5408 /* 5422 5409 * if nrblocks are not contiguous, worse case, each block touch

+58 -16

fs/ext4/super.c

··· 242 242 * journal_end calls result in the superblock being marked dirty, so 243 243 * that sync() will call the filesystem's write_super callback if 244 244 * appropriate. 245 + * 246 + * To avoid j_barrier hold in userspace when a user calls freeze(), 247 + * ext4 prevents a new handle from being started by s_frozen, which 248 + * is in an upper layer. 245 249 */ 246 250 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 247 251 { 248 252 journal_t *journal; 253 + handle_t *handle; 249 254 250 255 if (sb->s_flags & MS_RDONLY) 251 256 return ERR_PTR(-EROFS); 252 257 253 - vfs_check_frozen(sb, SB_FREEZE_TRANS); 254 - /* Special case here: if the journal has aborted behind our 255 - * backs (eg. EIO in the commit thread), then we still need to 256 - * take the FS itself readonly cleanly. */ 257 258 journal = EXT4_SB(sb)->s_journal; 258 - if (journal) { 259 - if (is_journal_aborted(journal)) { 260 - ext4_abort(sb, "Detected aborted journal"); 261 - return ERR_PTR(-EROFS); 262 - } 263 - return jbd2_journal_start(journal, nblocks); 259 + handle = ext4_journal_current_handle(); 260 + 261 + /* 262 + * If a handle has been started, it should be allowed to 263 + * finish, otherwise deadlock could happen between freeze 264 + * and others(e.g. truncate) due to the restart of the 265 + * journal handle if the filesystem is forzen and active 266 + * handles are not stopped. 267 + */ 268 + if (!handle) 269 + vfs_check_frozen(sb, SB_FREEZE_TRANS); 270 + 271 + if (!journal) 272 + return ext4_get_nojournal(); 273 + /* 274 + * Special case here: if the journal has aborted behind our 275 + * backs (eg. EIO in the commit thread), then we still need to 276 + * take the FS itself readonly cleanly. 277 + */ 278 + if (is_journal_aborted(journal)) { 279 + ext4_abort(sb, "Detected aborted journal"); 280 + return ERR_PTR(-EROFS); 264 281 } 265 - return ext4_get_nojournal(); 282 + return jbd2_journal_start(journal, nblocks); 266 283 } 267 284 268 285 /* ··· 2992 2975 mutex_unlock(&ext4_li_info->li_list_mtx); 2993 2976 2994 2977 sbi->s_li_request = elr; 2978 + /* 2979 + * set elr to NULL here since it has been inserted to 2980 + * the request_list and the removal and free of it is 2981 + * handled by ext4_clear_request_list from now on. 2982 + */ 2983 + elr = NULL; 2995 2984 2996 2985 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { 2997 2986 ret = ext4_run_lazyinit_thread(); ··· 3408 3385 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3409 3386 spin_lock_init(&sbi->s_next_gen_lock); 3410 3387 3388 + init_timer(&sbi->s_err_report); 3389 + sbi->s_err_report.function = print_daily_error_info; 3390 + sbi->s_err_report.data = (unsigned long) sb; 3391 + 3411 3392 err = percpu_counter_init(&sbi->s_freeblocks_counter, 3412 3393 ext4_count_free_blocks(sb)); 3413 3394 if (!err) { ··· 3673 3646 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, 3674 3647 *sbi->s_es->s_mount_opts ? "; " : "", orig_data); 3675 3648 3676 - init_timer(&sbi->s_err_report); 3677 - sbi->s_err_report.function = print_daily_error_info; 3678 - sbi->s_err_report.data = (unsigned long) sb; 3679 3649 if (es->s_error_count) 3680 3650 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ 3681 3651 ··· 3696 3672 sbi->s_journal = NULL; 3697 3673 } 3698 3674 failed_mount3: 3675 + del_timer(&sbi->s_err_report); 3699 3676 if (sbi->s_flex_groups) { 3700 3677 if (is_vmalloc_addr(sbi->s_flex_groups)) 3701 3678 vfree(sbi->s_flex_groups); ··· 4163 4138 /* 4164 4139 * LVM calls this function before a (read-only) snapshot is created. This 4165 4140 * gives us a chance to flush the journal completely and mark the fs clean. 4141 + * 4142 + * Note that only this function cannot bring a filesystem to be in a clean 4143 + * state independently, because ext4 prevents a new handle from being started 4144 + * by @sb->s_frozen, which stays in an upper layer. It thus needs help from 4145 + * the upper layer. 4166 4146 */ 4167 4147 static int ext4_freeze(struct super_block *sb) 4168 4148 { ··· 4644 4614 4645 4615 static int ext4_quota_off(struct super_block *sb, int type) 4646 4616 { 4617 + struct inode *inode = sb_dqopt(sb)->files[type]; 4618 + handle_t *handle; 4619 + 4647 4620 /* Force all delayed allocation blocks to be allocated. 4648 4621 * Caller already holds s_umount sem */ 4649 4622 if (test_opt(sb, DELALLOC)) 4650 4623 sync_filesystem(sb); 4651 4624 4625 + /* Update modification times of quota files when userspace can 4626 + * start looking at them */ 4627 + handle = ext4_journal_start(inode, 1); 4628 + if (IS_ERR(handle)) 4629 + goto out; 4630 + inode->i_mtime = inode->i_ctime = CURRENT_TIME; 4631 + ext4_mark_inode_dirty(handle, inode); 4632 + ext4_journal_stop(handle); 4633 + 4634 + out: 4652 4635 return dquot_quota_off(sb, type); 4653 4636 } 4654 4637 ··· 4757 4714 if (inode->i_size < off + len) { 4758 4715 i_size_write(inode, off + len); 4759 4716 EXT4_I(inode)->i_disksize = inode->i_size; 4717 + ext4_mark_inode_dirty(handle, inode); 4760 4718 } 4761 - inode->i_mtime = inode->i_ctime = CURRENT_TIME; 4762 - ext4_mark_inode_dirty(handle, inode); 4763 4719 mutex_unlock(&inode->i_mutex); 4764 4720 return len; 4765 4721 }

+3 -1

fs/jbd2/commit.c

··· 105 105 int ret; 106 106 struct timespec now = current_kernel_time(); 107 107 108 + *cbh = NULL; 109 + 108 110 if (is_journal_aborted(journal)) 109 111 return 0; 110 112 ··· 808 806 if (err) 809 807 __jbd2_journal_abort_hard(journal); 810 808 } 811 - if (!err && !is_journal_aborted(journal)) 809 + if (cbh) 812 810 err = journal_wait_on_commit_record(journal, cbh); 813 811 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 814 812 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&

+2 -1

fs/jbd2/journal.c

··· 2413 2413 new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); 2414 2414 if (!new_dev) 2415 2415 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ 2416 + bd = bdget(device); 2416 2417 spin_lock(&devname_cache_lock); 2417 2418 if (devcache[i]) { 2418 2419 if (devcache[i]->device == device) { 2419 2420 kfree(new_dev); 2421 + bdput(bd); 2420 2422 ret = devcache[i]->devname; 2421 2423 spin_unlock(&devname_cache_lock); 2422 2424 return ret; ··· 2427 2425 } 2428 2426 devcache[i] = new_dev; 2429 2427 devcache[i]->device = device; 2430 - bd = bdget(device); 2431 2428 if (bd) { 2432 2429 bdevname(bd, devcache[i]->devname); 2433 2430 bdput(bd);

Configure Feed

Configure Feed