Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
ext4: Add new "development flag" to the ext4 filesystem
ext4: Don't panic in case of corrupt bitmap
ext4: allocate struct ext4_allocation_context from a kmem cache
JBD2: Clear buffer_ordered flag for barried IO request on success
ext4: Fix Direct I/O locking
ext4: Fix circular locking dependency with migrate and rm.
allow in-inode EAs on ext4 root inode
ext4: Fix null bh pointer dereference in mballoc
ext4: Don't set EXTENTS_FL flag for fast symlinks
JBD2: Use the incompat macro for testing the incompat feature.
jbd2: Fix reference counting on the journal commit block's buffer head
[PATCH] jbd: Remove useless loop when writing commit record
jbd2: Add error check to journal_wait_on_commit_record to avoid oops

+270 -177
+54 -61
fs/ext4/inode.c
··· 892 892 return err; 893 893 } 894 894 895 - #define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32) 895 + /* Maximum number of blocks we map for direct IO at once. */ 896 + #define DIO_MAX_BLOCKS 4096 897 + /* 898 + * Number of credits we need for writing DIO_MAX_BLOCKS: 899 + * We need sb + group descriptor + bitmap + inode -> 4 900 + * For B blocks with A block pointers per block we need: 901 + * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). 902 + * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. 903 + */ 904 + #define DIO_CREDITS 25 896 905 897 906 int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, 898 907 unsigned long max_blocks, struct buffer_head *bh, ··· 948 939 struct buffer_head *bh_result, int create) 949 940 { 950 941 handle_t *handle = ext4_journal_current_handle(); 951 - int ret = 0; 942 + int ret = 0, started = 0; 952 943 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 953 944 954 - if (!create) 955 - goto get_block; /* A read */ 956 - 957 - if (max_blocks == 1) 958 - goto get_block; /* A single block get */ 959 - 960 - if (handle->h_transaction->t_state == T_LOCKED) { 961 - /* 962 - * Huge direct-io writes can hold off commits for long 963 - * periods of time. Let this commit run. 964 - */ 965 - ext4_journal_stop(handle); 966 - handle = ext4_journal_start(inode, DIO_CREDITS); 967 - if (IS_ERR(handle)) 945 + if (create && !handle) { 946 + /* Direct IO write... */ 947 + if (max_blocks > DIO_MAX_BLOCKS) 948 + max_blocks = DIO_MAX_BLOCKS; 949 + handle = ext4_journal_start(inode, DIO_CREDITS + 950 + 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); 951 + if (IS_ERR(handle)) { 968 952 ret = PTR_ERR(handle); 969 - goto get_block; 970 - } 971 - 972 - if (handle->h_buffer_credits <= EXT4_RESERVE_TRANS_BLOCKS) { 973 - /* 974 - * Getting low on buffer credits... 975 - */ 976 - ret = ext4_journal_extend(handle, DIO_CREDITS); 977 - if (ret > 0) { 978 - /* 979 - * Couldn't extend the transaction. Start a new one. 980 - */ 981 - ret = ext4_journal_restart(handle, DIO_CREDITS); 953 + goto out; 982 954 } 955 + started = 1; 983 956 } 984 957 985 - get_block: 986 - if (ret == 0) { 987 - ret = ext4_get_blocks_wrap(handle, inode, iblock, 958 + ret = ext4_get_blocks_wrap(handle, inode, iblock, 988 959 max_blocks, bh_result, create, 0); 989 - if (ret > 0) { 990 - bh_result->b_size = (ret << inode->i_blkbits); 991 - ret = 0; 992 - } 960 + if (ret > 0) { 961 + bh_result->b_size = (ret << inode->i_blkbits); 962 + ret = 0; 993 963 } 964 + if (started) 965 + ext4_journal_stop(handle); 966 + out: 994 967 return ret; 995 968 } 996 969 ··· 1662 1671 * if the machine crashes during the write. 1663 1672 * 1664 1673 * If the O_DIRECT write is intantiating holes inside i_size and the machine 1665 - * crashes then stale disk data _may_ be exposed inside the file. 1674 + * crashes then stale disk data _may_ be exposed inside the file. But current 1675 + * VFS code falls back into buffered path in that case so we are safe. 1666 1676 */ 1667 1677 static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, 1668 1678 const struct iovec *iov, loff_t offset, ··· 1672 1680 struct file *file = iocb->ki_filp; 1673 1681 struct inode *inode = file->f_mapping->host; 1674 1682 struct ext4_inode_info *ei = EXT4_I(inode); 1675 - handle_t *handle = NULL; 1683 + handle_t *handle; 1676 1684 ssize_t ret; 1677 1685 int orphan = 0; 1678 1686 size_t count = iov_length(iov, nr_segs); ··· 1680 1688 if (rw == WRITE) { 1681 1689 loff_t final_size = offset + count; 1682 1690 1683 - handle = ext4_journal_start(inode, DIO_CREDITS); 1684 - if (IS_ERR(handle)) { 1685 - ret = PTR_ERR(handle); 1686 - goto out; 1687 - } 1688 1691 if (final_size > inode->i_size) { 1692 + /* Credits for sb + inode write */ 1693 + handle = ext4_journal_start(inode, 2); 1694 + if (IS_ERR(handle)) { 1695 + ret = PTR_ERR(handle); 1696 + goto out; 1697 + } 1689 1698 ret = ext4_orphan_add(handle, inode); 1690 - if (ret) 1691 - goto out_stop; 1699 + if (ret) { 1700 + ext4_journal_stop(handle); 1701 + goto out; 1702 + } 1692 1703 orphan = 1; 1693 1704 ei->i_disksize = inode->i_size; 1705 + ext4_journal_stop(handle); 1694 1706 } 1695 1707 } 1696 1708 ··· 1702 1706 offset, nr_segs, 1703 1707 ext4_get_block, NULL); 1704 1708 1705 - /* 1706 - * Reacquire the handle: ext4_get_block() can restart the transaction 1707 - */ 1708 - handle = ext4_journal_current_handle(); 1709 - 1710 - out_stop: 1711 - if (handle) { 1709 + if (orphan) { 1712 1710 int err; 1713 1711 1714 - if (orphan && inode->i_nlink) 1712 + /* Credits for sb + inode write */ 1713 + handle = ext4_journal_start(inode, 2); 1714 + if (IS_ERR(handle)) { 1715 + /* This is really bad luck. We've written the data 1716 + * but cannot extend i_size. Bail out and pretend 1717 + * the write failed... */ 1718 + ret = PTR_ERR(handle); 1719 + goto out; 1720 + } 1721 + if (inode->i_nlink) 1715 1722 ext4_orphan_del(handle, inode); 1716 - if (orphan && ret > 0) { 1723 + if (ret > 0) { 1717 1724 loff_t end = offset + ret; 1718 1725 if (end > inode->i_size) { 1719 1726 ei->i_disksize = end; ··· 2757 2758 ei->i_data[block] = raw_inode->i_block[block]; 2758 2759 INIT_LIST_HEAD(&ei->i_orphan); 2759 2760 2760 - if (inode->i_ino >= EXT4_FIRST_INO(inode->i_sb) + 1 && 2761 - EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 2762 - /* 2763 - * When mke2fs creates big inodes it does not zero out 2764 - * the unused bytes above EXT4_GOOD_OLD_INODE_SIZE, 2765 - * so ignore those first few inodes. 2766 - */ 2761 + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 2767 2762 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 2768 2763 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 2769 2764 EXT4_INODE_SIZE(inode->i_sb)) {
+104 -60
fs/ext4/mballoc.c
··· 420 420 #define MB_DEFAULT_GROUP_PREALLOC 512 421 421 422 422 static struct kmem_cache *ext4_pspace_cachep; 423 + static struct kmem_cache *ext4_ac_cachep; 423 424 424 425 #ifdef EXT4_BB_MAX_BLOCKS 425 426 #undef EXT4_BB_MAX_BLOCKS ··· 681 680 { 682 681 char *bb; 683 682 684 - /* FIXME!! is this needed */ 685 683 BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); 686 684 BUG_ON(max == NULL); 687 685 ··· 964 964 grp->bb_fragments = fragments; 965 965 966 966 if (free != grp->bb_free) { 967 - printk(KERN_DEBUG 967 + ext4_error(sb, __FUNCTION__, 968 968 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", 969 969 group, free, grp->bb_free); 970 970 grp->bb_free = free; ··· 1821 1821 i = ext4_find_next_zero_bit(bitmap, 1822 1822 EXT4_BLOCKS_PER_GROUP(sb), i); 1823 1823 if (i >= EXT4_BLOCKS_PER_GROUP(sb)) { 1824 - BUG_ON(free != 0); 1824 + /* 1825 + * IF we corrupt the bitmap we won't find any 1826 + * free blocks even though group info says we 1827 + * we have free blocks 1828 + */ 1829 + ext4_error(sb, __FUNCTION__, "%d free blocks as per " 1830 + "group info. But bitmap says 0\n", 1831 + free); 1825 1832 break; 1826 1833 } 1827 1834 1828 1835 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); 1829 1836 BUG_ON(ex.fe_len <= 0); 1830 - BUG_ON(free < ex.fe_len); 1837 + if (free < ex.fe_len) { 1838 + ext4_error(sb, __FUNCTION__, "%d free blocks as per " 1839 + "group info. But got %d blocks\n", 1840 + free, ex.fe_len); 1841 + } 1831 1842 1832 1843 ext4_mb_measure_extent(ac, &ex, e4b); 1833 1844 ··· 2970 2959 if (ext4_pspace_cachep == NULL) 2971 2960 return -ENOMEM; 2972 2961 2962 + ext4_ac_cachep = 2963 + kmem_cache_create("ext4_alloc_context", 2964 + sizeof(struct ext4_allocation_context), 2965 + 0, SLAB_RECLAIM_ACCOUNT, NULL); 2966 + if (ext4_ac_cachep == NULL) { 2967 + kmem_cache_destroy(ext4_pspace_cachep); 2968 + return -ENOMEM; 2969 + } 2973 2970 #ifdef CONFIG_PROC_FS 2974 2971 proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs); 2975 2972 if (proc_root_ext4 == NULL) 2976 2973 printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT); 2977 2974 #endif 2978 - 2979 2975 return 0; 2980 2976 } 2981 2977 ··· 2990 2972 { 2991 2973 /* XXX: synchronize_rcu(); */ 2992 2974 kmem_cache_destroy(ext4_pspace_cachep); 2975 + kmem_cache_destroy(ext4_ac_cachep); 2993 2976 #ifdef CONFIG_PROC_FS 2994 2977 remove_proc_entry(EXT4_ROOT, proc_root_fs); 2995 2978 #endif ··· 3088 3069 3089 3070 out_err: 3090 3071 sb->s_dirt = 1; 3091 - put_bh(bitmap_bh); 3072 + brelse(bitmap_bh); 3092 3073 return err; 3093 3074 } 3094 3075 ··· 3373 3354 ac->ac_pa = pa; 3374 3355 3375 3356 /* we don't correct pa_pstart or pa_plen here to avoid 3376 - * possible race when tte group is being loaded concurrently 3357 + * possible race when the group is being loaded concurrently 3377 3358 * instead we correct pa later, after blocks are marked 3378 - * in on-disk bitmap -- see ext4_mb_release_context() */ 3379 - /* 3380 - * FIXME!! but the other CPUs can look at this particular 3381 - * pa and think that it have enought free blocks if we 3382 - * don't update pa_free here right ? 3359 + * in on-disk bitmap -- see ext4_mb_release_context() 3360 + * Other CPUs are prevented from allocating from this pa by lg_mutex 3383 3361 */ 3384 3362 mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); 3385 3363 } ··· 3715 3699 struct buffer_head *bitmap_bh, 3716 3700 struct ext4_prealloc_space *pa) 3717 3701 { 3718 - struct ext4_allocation_context ac; 3702 + struct ext4_allocation_context *ac; 3719 3703 struct super_block *sb = e4b->bd_sb; 3720 3704 struct ext4_sb_info *sbi = EXT4_SB(sb); 3721 3705 unsigned long end; ··· 3731 3715 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3732 3716 end = bit + pa->pa_len; 3733 3717 3734 - ac.ac_sb = sb; 3735 - ac.ac_inode = pa->pa_inode; 3736 - ac.ac_op = EXT4_MB_HISTORY_DISCARD; 3718 + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 3719 + 3720 + if (ac) { 3721 + ac->ac_sb = sb; 3722 + ac->ac_inode = pa->pa_inode; 3723 + ac->ac_op = EXT4_MB_HISTORY_DISCARD; 3724 + } 3737 3725 3738 3726 while (bit < end) { 3739 3727 bit = ext4_find_next_zero_bit(bitmap_bh->b_data, end, bit); ··· 3753 3733 (unsigned) group); 3754 3734 free += next - bit; 3755 3735 3756 - ac.ac_b_ex.fe_group = group; 3757 - ac.ac_b_ex.fe_start = bit; 3758 - ac.ac_b_ex.fe_len = next - bit; 3759 - ac.ac_b_ex.fe_logical = 0; 3760 - ext4_mb_store_history(&ac); 3736 + if (ac) { 3737 + ac->ac_b_ex.fe_group = group; 3738 + ac->ac_b_ex.fe_start = bit; 3739 + ac->ac_b_ex.fe_len = next - bit; 3740 + ac->ac_b_ex.fe_logical = 0; 3741 + ext4_mb_store_history(ac); 3742 + } 3761 3743 3762 3744 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); 3763 3745 bit = next + 1; 3764 3746 } 3765 3747 if (free != pa->pa_free) { 3766 - printk(KERN_ERR "pa %p: logic %lu, phys. %lu, len %lu\n", 3748 + printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n", 3767 3749 pa, (unsigned long) pa->pa_lstart, 3768 3750 (unsigned long) pa->pa_pstart, 3769 3751 (unsigned long) pa->pa_len); 3770 - printk(KERN_ERR "free %u, pa_free %u\n", free, pa->pa_free); 3752 + ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n", 3753 + free, pa->pa_free); 3771 3754 } 3772 - BUG_ON(free != pa->pa_free); 3773 3755 atomic_add(free, &sbi->s_mb_discarded); 3756 + if (ac) 3757 + kmem_cache_free(ext4_ac_cachep, ac); 3774 3758 3775 3759 return err; 3776 3760 } ··· 3782 3758 static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, 3783 3759 struct ext4_prealloc_space *pa) 3784 3760 { 3785 - struct ext4_allocation_context ac; 3761 + struct ext4_allocation_context *ac; 3786 3762 struct super_block *sb = e4b->bd_sb; 3787 3763 ext4_group_t group; 3788 3764 ext4_grpblk_t bit; 3789 3765 3790 - ac.ac_op = EXT4_MB_HISTORY_DISCARD; 3766 + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 3767 + 3768 + if (ac) 3769 + ac->ac_op = EXT4_MB_HISTORY_DISCARD; 3791 3770 3792 3771 BUG_ON(pa->pa_deleted == 0); 3793 3772 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); ··· 3798 3771 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); 3799 3772 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); 3800 3773 3801 - ac.ac_sb = sb; 3802 - ac.ac_inode = NULL; 3803 - ac.ac_b_ex.fe_group = group; 3804 - ac.ac_b_ex.fe_start = bit; 3805 - ac.ac_b_ex.fe_len = pa->pa_len; 3806 - ac.ac_b_ex.fe_logical = 0; 3807 - ext4_mb_store_history(&ac); 3774 + if (ac) { 3775 + ac->ac_sb = sb; 3776 + ac->ac_inode = NULL; 3777 + ac->ac_b_ex.fe_group = group; 3778 + ac->ac_b_ex.fe_start = bit; 3779 + ac->ac_b_ex.fe_len = pa->pa_len; 3780 + ac->ac_b_ex.fe_logical = 0; 3781 + ext4_mb_store_history(ac); 3782 + kmem_cache_free(ext4_ac_cachep, ac); 3783 + } 3808 3784 3809 3785 return 0; 3810 3786 } ··· 4261 4231 ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, 4262 4232 struct ext4_allocation_request *ar, int *errp) 4263 4233 { 4264 - struct ext4_allocation_context ac; 4234 + struct ext4_allocation_context *ac = NULL; 4265 4235 struct ext4_sb_info *sbi; 4266 4236 struct super_block *sb; 4267 4237 ext4_fsblk_t block = 0; ··· 4287 4257 } 4288 4258 inquota = ar->len; 4289 4259 4260 + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4261 + if (!ac) { 4262 + *errp = -ENOMEM; 4263 + return 0; 4264 + } 4265 + 4290 4266 ext4_mb_poll_new_transaction(sb, handle); 4291 4267 4292 - *errp = ext4_mb_initialize_context(&ac, ar); 4268 + *errp = ext4_mb_initialize_context(ac, ar); 4293 4269 if (*errp) { 4294 4270 ar->len = 0; 4295 4271 goto out; 4296 4272 } 4297 4273 4298 - ac.ac_op = EXT4_MB_HISTORY_PREALLOC; 4299 - if (!ext4_mb_use_preallocated(&ac)) { 4274 + ac->ac_op = EXT4_MB_HISTORY_PREALLOC; 4275 + if (!ext4_mb_use_preallocated(ac)) { 4300 4276 4301 - ac.ac_op = EXT4_MB_HISTORY_ALLOC; 4302 - ext4_mb_normalize_request(&ac, ar); 4277 + ac->ac_op = EXT4_MB_HISTORY_ALLOC; 4278 + ext4_mb_normalize_request(ac, ar); 4303 4279 4304 4280 repeat: 4305 4281 /* allocate space in core */ 4306 - ext4_mb_regular_allocator(&ac); 4282 + ext4_mb_regular_allocator(ac); 4307 4283 4308 4284 /* as we've just preallocated more space than 4309 4285 * user requested orinally, we store allocated 4310 4286 * space in a special descriptor */ 4311 - if (ac.ac_status == AC_STATUS_FOUND && 4312 - ac.ac_o_ex.fe_len < ac.ac_b_ex.fe_len) 4313 - ext4_mb_new_preallocation(&ac); 4287 + if (ac->ac_status == AC_STATUS_FOUND && 4288 + ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) 4289 + ext4_mb_new_preallocation(ac); 4314 4290 } 4315 4291 4316 - if (likely(ac.ac_status == AC_STATUS_FOUND)) { 4317 - ext4_mb_mark_diskspace_used(&ac, handle); 4292 + if (likely(ac->ac_status == AC_STATUS_FOUND)) { 4293 + ext4_mb_mark_diskspace_used(ac, handle); 4318 4294 *errp = 0; 4319 - block = ext4_grp_offs_to_block(sb, &ac.ac_b_ex); 4320 - ar->len = ac.ac_b_ex.fe_len; 4295 + block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); 4296 + ar->len = ac->ac_b_ex.fe_len; 4321 4297 } else { 4322 - freed = ext4_mb_discard_preallocations(sb, ac.ac_o_ex.fe_len); 4298 + freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len); 4323 4299 if (freed) 4324 4300 goto repeat; 4325 4301 *errp = -ENOSPC; 4326 - ac.ac_b_ex.fe_len = 0; 4302 + ac->ac_b_ex.fe_len = 0; 4327 4303 ar->len = 0; 4328 - ext4_mb_show_ac(&ac); 4304 + ext4_mb_show_ac(ac); 4329 4305 } 4330 4306 4331 - ext4_mb_release_context(&ac); 4307 + ext4_mb_release_context(ac); 4332 4308 4333 4309 out: 4334 4310 if (ar->len < inquota) 4335 4311 DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); 4336 4312 4313 + kmem_cache_free(ext4_ac_cachep, ac); 4337 4314 return block; 4338 4315 } 4339 4316 static void ext4_mb_poll_new_transaction(struct super_block *sb, ··· 4442 4405 unsigned long block, unsigned long count, 4443 4406 int metadata, unsigned long *freed) 4444 4407 { 4445 - struct buffer_head *bitmap_bh = 0; 4408 + struct buffer_head *bitmap_bh = NULL; 4446 4409 struct super_block *sb = inode->i_sb; 4447 - struct ext4_allocation_context ac; 4410 + struct ext4_allocation_context *ac = NULL; 4448 4411 struct ext4_group_desc *gdp; 4449 4412 struct ext4_super_block *es; 4450 4413 unsigned long overflow; ··· 4473 4436 4474 4437 ext4_debug("freeing block %lu\n", block); 4475 4438 4476 - ac.ac_op = EXT4_MB_HISTORY_FREE; 4477 - ac.ac_inode = inode; 4478 - ac.ac_sb = sb; 4439 + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4440 + if (ac) { 4441 + ac->ac_op = EXT4_MB_HISTORY_FREE; 4442 + ac->ac_inode = inode; 4443 + ac->ac_sb = sb; 4444 + } 4479 4445 4480 4446 do_more: 4481 4447 overflow = 0; ··· 4544 4504 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 4545 4505 err = ext4_journal_dirty_metadata(handle, bitmap_bh); 4546 4506 4547 - ac.ac_b_ex.fe_group = block_group; 4548 - ac.ac_b_ex.fe_start = bit; 4549 - ac.ac_b_ex.fe_len = count; 4550 - ext4_mb_store_history(&ac); 4507 + if (ac) { 4508 + ac->ac_b_ex.fe_group = block_group; 4509 + ac->ac_b_ex.fe_start = bit; 4510 + ac->ac_b_ex.fe_len = count; 4511 + ext4_mb_store_history(ac); 4512 + } 4551 4513 4552 4514 if (metadata) { 4553 4515 /* blocks being freed are metadata. these blocks shouldn't ··· 4590 4548 error_return: 4591 4549 brelse(bitmap_bh); 4592 4550 ext4_std_error(sb, err); 4551 + if (ac) 4552 + kmem_cache_free(ext4_ac_cachep, ac); 4593 4553 return; 4594 4554 }
+80 -43
fs/ext4/migrate.c
··· 61 61 retval = ext4_journal_restart(handle, needed); 62 62 if (retval) 63 63 goto err_out; 64 - } 65 - if (needed) { 64 + } else if (needed) { 66 65 retval = ext4_journal_extend(handle, needed); 67 - if (retval != 0) { 66 + if (retval) { 68 67 /* 69 68 * IF not able to extend the journal restart the journal 70 69 */ ··· 219 220 220 221 } 221 222 223 + static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode) 224 + { 225 + int retval = 0, needed; 226 + 227 + if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS) 228 + return 0; 229 + /* 230 + * We are freeing a blocks. During this we touch 231 + * superblock, group descriptor and block bitmap. 232 + * So allocate a credit of 3. We may update 233 + * quota (user and group). 234 + */ 235 + needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 236 + 237 + if (ext4_journal_extend(handle, needed) != 0) 238 + retval = ext4_journal_restart(handle, needed); 239 + 240 + return retval; 241 + } 242 + 222 243 static int free_dind_blocks(handle_t *handle, 223 244 struct inode *inode, __le32 i_data) 224 245 { ··· 253 234 254 235 tmp_idata = (__le32 *)bh->b_data; 255 236 for (i = 0; i < max_entries; i++) { 256 - if (tmp_idata[i]) 237 + if (tmp_idata[i]) { 238 + extend_credit_for_blkdel(handle, inode); 257 239 ext4_free_blocks(handle, inode, 258 240 le32_to_cpu(tmp_idata[i]), 1, 1); 241 + } 259 242 } 260 243 put_bh(bh); 244 + extend_credit_for_blkdel(handle, inode); 261 245 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); 262 246 return 0; 263 247 } ··· 289 267 } 290 268 } 291 269 put_bh(bh); 270 + extend_credit_for_blkdel(handle, inode); 292 271 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); 293 272 return 0; 294 273 } 295 274 296 - static int free_ind_block(handle_t *handle, struct inode *inode) 275 + static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) 297 276 { 298 277 int retval; 299 - struct ext4_inode_info *ei = EXT4_I(inode); 300 278 301 - if (ei->i_data[EXT4_IND_BLOCK]) 279 + /* ei->i_data[EXT4_IND_BLOCK] */ 280 + if (i_data[0]) { 281 + extend_credit_for_blkdel(handle, inode); 302 282 ext4_free_blocks(handle, inode, 303 - le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1, 1); 283 + le32_to_cpu(i_data[0]), 1, 1); 284 + } 304 285 305 - if (ei->i_data[EXT4_DIND_BLOCK]) { 306 - retval = free_dind_blocks(handle, inode, 307 - ei->i_data[EXT4_DIND_BLOCK]); 286 + /* ei->i_data[EXT4_DIND_BLOCK] */ 287 + if (i_data[1]) { 288 + retval = free_dind_blocks(handle, inode, i_data[1]); 308 289 if (retval) 309 290 return retval; 310 291 } 311 292 312 - if (ei->i_data[EXT4_TIND_BLOCK]) { 313 - retval = free_tind_blocks(handle, inode, 314 - ei->i_data[EXT4_TIND_BLOCK]); 293 + /* ei->i_data[EXT4_TIND_BLOCK] */ 294 + if (i_data[2]) { 295 + retval = free_tind_blocks(handle, inode, i_data[2]); 315 296 if (retval) 316 297 return retval; 317 298 } ··· 322 297 } 323 298 324 299 static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, 325 - struct inode *tmp_inode, int retval) 300 + struct inode *tmp_inode) 326 301 { 302 + int retval; 303 + __le32 i_data[3]; 327 304 struct ext4_inode_info *ei = EXT4_I(inode); 328 305 struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); 329 - 330 - retval = free_ind_block(handle, inode); 331 - if (retval) 332 - goto err_out; 333 306 334 307 /* 335 308 * One credit accounted for writing the ··· 340 317 goto err_out; 341 318 } 342 319 320 + i_data[0] = ei->i_data[EXT4_IND_BLOCK]; 321 + i_data[1] = ei->i_data[EXT4_DIND_BLOCK]; 322 + i_data[2] = ei->i_data[EXT4_TIND_BLOCK]; 323 + 324 + down_write(&EXT4_I(inode)->i_data_sem); 343 325 /* 344 326 * We have the extent map build with the tmp inode. 345 327 * Now copy the i_data across ··· 364 336 spin_lock(&inode->i_lock); 365 337 inode->i_blocks += tmp_inode->i_blocks; 366 338 spin_unlock(&inode->i_lock); 339 + up_write(&EXT4_I(inode)->i_data_sem); 367 340 341 + /* 342 + * We mark the inode dirty after, because we decrement the 343 + * i_blocks when freeing the indirect meta-data blocks 344 + */ 345 + retval = free_ind_block(handle, inode, i_data); 368 346 ext4_mark_inode_dirty(handle, inode); 347 + 369 348 err_out: 370 349 return retval; 371 350 } ··· 400 365 } 401 366 } 402 367 put_bh(bh); 368 + extend_credit_for_blkdel(handle, inode); 403 369 ext4_free_blocks(handle, inode, block, 1, 1); 404 370 return retval; 405 371 } ··· 450 414 if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) 451 415 return -EINVAL; 452 416 453 - down_write(&EXT4_I(inode)->i_data_sem); 417 + if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) 418 + /* 419 + * don't migrate fast symlink 420 + */ 421 + return retval; 422 + 454 423 handle = ext4_journal_start(inode, 455 424 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 456 425 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + ··· 489 448 ext4_orphan_add(handle, tmp_inode); 490 449 ext4_journal_stop(handle); 491 450 492 - ei = EXT4_I(inode); 493 - i_data = ei->i_data; 494 - memset(&lb, 0, sizeof(lb)); 495 - 496 - /* 32 bit block address 4 bytes */ 497 - max_entries = inode->i_sb->s_blocksize >> 2; 498 - 499 451 /* 500 452 * start with one credit accounted for 501 453 * superblock modification. ··· 497 463 * trascation that created the inode. Later as and 498 464 * when we add extents we extent the journal 499 465 */ 466 + /* 467 + * inode_mutex prevent write and truncate on the file. Read still goes 468 + * through. We take i_data_sem in ext4_ext_swap_inode_data before we 469 + * switch the inode format to prevent read. 470 + */ 471 + mutex_lock(&(inode->i_mutex)); 500 472 handle = ext4_journal_start(inode, 1); 473 + 474 + ei = EXT4_I(inode); 475 + i_data = ei->i_data; 476 + memset(&lb, 0, sizeof(lb)); 477 + 478 + /* 32 bit block address 4 bytes */ 479 + max_entries = inode->i_sb->s_blocksize >> 2; 501 480 for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { 502 481 if (i_data[i]) { 503 482 retval = update_extent_range(handle, tmp_inode, ··· 548 501 */ 549 502 retval = finish_range(handle, tmp_inode, &lb); 550 503 err_out: 551 - /* 552 - * We are either freeing extent information or indirect 553 - * blocks. During this we touch superblock, group descriptor 554 - * and block bitmap. Later we mark the tmp_inode dirty 555 - * via ext4_ext_tree_init. So allocate a credit of 4 556 - * We may update quota (user and group). 557 - * 558 - * FIXME!! we may be touching bitmaps in different block groups. 559 - */ 560 - if (ext4_journal_extend(handle, 561 - 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0) 562 - ext4_journal_restart(handle, 563 - 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); 564 504 if (retval) 565 505 /* 566 506 * Failure case delete the extent information with the ··· 556 522 free_ext_block(handle, tmp_inode); 557 523 else 558 524 retval = ext4_ext_swap_inode_data(handle, inode, 559 - tmp_inode, retval); 525 + tmp_inode); 526 + 527 + /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ 528 + if (ext4_journal_extend(handle, 1) != 0) 529 + ext4_journal_restart(handle, 1); 560 530 561 531 /* 562 532 * Mark the tmp_inode as of size zero ··· 588 550 tmp_inode->i_nlink = 0; 589 551 590 552 ext4_journal_stop(handle); 591 - 592 - up_write(&EXT4_I(inode)->i_data_sem); 553 + mutex_unlock(&(inode->i_mutex)); 593 554 594 555 if (tmp_inode) 595 556 iput(tmp_inode);
+1
fs/ext4/namei.c
··· 2223 2223 inode->i_op = &ext4_fast_symlink_inode_operations; 2224 2224 memcpy((char*)&EXT4_I(inode)->i_data,symname,l); 2225 2225 inode->i_size = l-1; 2226 + EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; 2226 2227 } 2227 2228 EXT4_I(inode)->i_disksize = inode->i_size; 2228 2229 err = ext4_add_nondir(handle, dentry, inode);
+11
fs/ext4/super.c
··· 1919 1919 printk(KERN_WARNING 1920 1920 "EXT4-fs warning: feature flags set on rev 0 fs, " 1921 1921 "running e2fsck is recommended\n"); 1922 + 1923 + /* 1924 + * Since ext4 is still considered development code, we require 1925 + * that the TEST_FILESYS flag in s->flags be set. 1926 + */ 1927 + if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) { 1928 + printk(KERN_WARNING "EXT4-fs: %s: not marked " 1929 + "OK to use with test code.\n", sb->s_id); 1930 + goto failed_mount; 1931 + } 1932 + 1922 1933 /* 1923 1934 * Check feature flags regardless of the revision level, since we 1924 1935 * previously didn't change the revision level when setting the flags,
+6 -8
fs/jbd/commit.c
··· 104 104 { 105 105 struct journal_head *descriptor; 106 106 struct buffer_head *bh; 107 - int i, ret; 107 + journal_header_t *header; 108 + int ret; 108 109 int barrier_done = 0; 109 110 110 111 if (is_journal_aborted(journal)) ··· 117 116 118 117 bh = jh2bh(descriptor); 119 118 120 - /* AKPM: buglet - add `i' to tmp! */ 121 - for (i = 0; i < bh->b_size; i += 512) { 122 - journal_header_t *tmp = (journal_header_t*)bh->b_data; 123 - tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); 124 - tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK); 125 - tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); 126 - } 119 + header = (journal_header_t *)(bh->b_data); 120 + header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); 121 + header->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK); 122 + header->h_sequence = cpu_to_be32(commit_transaction->t_tid); 127 123 128 124 JBUFFER_TRACE(descriptor, "write commit block"); 129 125 set_buffer_dirty(bh);
+6 -4
fs/jbd2/commit.c
··· 136 136 137 137 JBUFFER_TRACE(descriptor, "submit commit block"); 138 138 lock_buffer(bh); 139 - 139 + get_bh(bh); 140 140 set_buffer_dirty(bh); 141 141 set_buffer_uptodate(bh); 142 142 bh->b_end_io = journal_end_buffer_io_sync; 143 143 144 144 if (journal->j_flags & JBD2_BARRIER && 145 - !JBD2_HAS_COMPAT_FEATURE(journal, 145 + !JBD2_HAS_INCOMPAT_FEATURE(journal, 146 146 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { 147 147 set_buffer_ordered(bh); 148 148 barrier_done = 1; 149 149 } 150 150 ret = submit_bh(WRITE, bh); 151 + if (barrier_done) 152 + clear_buffer_ordered(bh); 151 153 152 154 /* is it possible for another commit to fail at roughly 153 155 * the same time as this one? If so, we don't want to ··· 168 166 spin_unlock(&journal->j_state_lock); 169 167 170 168 /* And try again, without the barrier */ 171 - clear_buffer_ordered(bh); 172 169 set_buffer_uptodate(bh); 173 170 set_buffer_dirty(bh); 174 171 ret = submit_bh(WRITE, bh); ··· 873 872 if (err) 874 873 __jbd2_journal_abort_hard(journal); 875 874 } 876 - err = journal_wait_on_commit_record(cbh); 875 + if (!err && !is_journal_aborted(journal)) 876 + err = journal_wait_on_commit_record(cbh); 877 877 878 878 if (err) 879 879 jbd2_journal_abort(journal, err);
+1 -1
fs/jbd2/recovery.c
··· 641 641 if (chksum_err) { 642 642 info->end_transaction = next_commit_ID; 643 643 644 - if (!JBD2_HAS_COMPAT_FEATURE(journal, 644 + if (!JBD2_HAS_INCOMPAT_FEATURE(journal, 645 645 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ 646 646 printk(KERN_ERR 647 647 "JBD: Transaction %u "
+7
include/linux/ext4_fs.h
··· 490 490 #define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */ 491 491 492 492 /* 493 + * Misc. filesystem flags 494 + */ 495 + #define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */ 496 + #define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */ 497 + #define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */ 498 + 499 + /* 493 500 * Mount flags 494 501 */ 495 502 #define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */