Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
"Major changes for 3.14 include support for the newly added ZERO_RANGE
and COLLAPSE_RANGE fallocate operations, and scalability improvements
in the jbd2 layer and in xattr handling when the extended attributes
spill over into an external block.

Other than that, the usual clean ups and minor bug fixes"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (42 commits)
ext4: fix premature freeing of partial clusters split across leaf blocks
ext4: remove unneeded test of ret variable
ext4: fix comment typo
ext4: make ext4_block_zero_page_range static
ext4: atomically set inode->i_flags in ext4_set_inode_flags()
ext4: optimize Hurd tests when reading/writing inodes
ext4: kill i_version support for Hurd-castrated file systems
ext4: each filesystem creates and uses its own mb_cache
fs/mbcache.c: doucple the locking of local from global data
fs/mbcache.c: change block and index hash chain to hlist_bl_node
ext4: Introduce FALLOC_FL_ZERO_RANGE flag for fallocate
ext4: refactor ext4_fallocate code
ext4: Update inode i_size after the preallocation
ext4: fix partial cluster handling for bigalloc file systems
ext4: delete path dealloc code in ext4_ext_handle_uninitialized_extents
ext4: only call sync_filesystm() when remounting read-only
fs: push sync_filesystem() down to the file system's remount_fs()
jbd2: improve error messages for inconsistent journal heads
jbd2: minimize region locked by j_list_lock in jbd2_journal_forget()
jbd2: minimize region locked by j_list_lock in journal_get_create_access()
...

+1515 -502
+1
fs/adfs/super.c
··· 212 212 213 213 static int adfs_remount(struct super_block *sb, int *flags, char *data) 214 214 { 215 + sync_filesystem(sb); 215 216 *flags |= MS_NODIRATIME; 216 217 return parse_options(sb, data); 217 218 }
+1
fs/affs/super.c
··· 530 530 531 531 pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data); 532 532 533 + sync_filesystem(sb); 533 534 *flags |= MS_NODIRATIME; 534 535 535 536 memcpy(volume, sbi->s_volume, 32);
+1
fs/befs/linuxvfs.c
··· 913 913 static int 914 914 befs_remount(struct super_block *sb, int *flags, char *data) 915 915 { 916 + sync_filesystem(sb); 916 917 if (!(*flags & MS_RDONLY)) 917 918 return -EINVAL; 918 919 return 0;
+1
fs/btrfs/super.c
··· 1380 1380 unsigned int old_metadata_ratio = fs_info->metadata_ratio; 1381 1381 int ret; 1382 1382 1383 + sync_filesystem(sb); 1383 1384 btrfs_remount_prepare(fs_info); 1384 1385 1385 1386 ret = btrfs_parse_options(root, data);
+1
fs/cifs/cifsfs.c
··· 541 541 542 542 static int cifs_remount(struct super_block *sb, int *flags, char *data) 543 543 { 544 + sync_filesystem(sb); 544 545 *flags |= MS_NODIRATIME; 545 546 return 0; 546 547 }
+1
fs/coda/inode.c
··· 96 96 97 97 static int coda_remount(struct super_block *sb, int *flags, char *data) 98 98 { 99 + sync_filesystem(sb); 99 100 *flags |= MS_NOATIME; 100 101 return 0; 101 102 }
+1
fs/cramfs/inode.c
··· 243 243 244 244 static int cramfs_remount(struct super_block *sb, int *flags, char *data) 245 245 { 246 + sync_filesystem(sb); 246 247 *flags |= MS_RDONLY; 247 248 return 0; 248 249 }
+1
fs/debugfs/inode.c
··· 218 218 int err; 219 219 struct debugfs_fs_info *fsi = sb->s_fs_info; 220 220 221 + sync_filesystem(sb); 221 222 err = debugfs_parse_options(data, &fsi->mount_opts); 222 223 if (err) 223 224 goto fail;
+1
fs/devpts/inode.c
··· 313 313 struct pts_fs_info *fsi = DEVPTS_SB(sb); 314 314 struct pts_mount_opts *opts = &fsi->mount_opts; 315 315 316 + sync_filesystem(sb); 316 317 err = parse_mount_options(data, PARSE_REMOUNT, opts); 317 318 318 319 /*
+1
fs/efs/super.c
··· 114 114 115 115 static int efs_remount(struct super_block *sb, int *flags, char *data) 116 116 { 117 + sync_filesystem(sb); 117 118 *flags |= MS_RDONLY; 118 119 return 0; 119 120 }
+1
fs/ext2/super.c
··· 1254 1254 unsigned long old_sb_flags; 1255 1255 int err; 1256 1256 1257 + sync_filesystem(sb); 1257 1258 spin_lock(&sbi->s_lock); 1258 1259 1259 1260 /* Store the old options */
+2
fs/ext3/super.c
··· 2649 2649 int i; 2650 2650 #endif 2651 2651 2652 + sync_filesystem(sb); 2653 + 2652 2654 /* Store the original options */ 2653 2655 old_sb_flags = sb->s_flags; 2654 2656 old_opts.s_mount_opt = sbi->s_mount_opt;
+9 -2
fs/ext4/ext4.h
··· 31 31 #include <linux/percpu_counter.h> 32 32 #include <linux/ratelimit.h> 33 33 #include <crypto/hash.h> 34 + #include <linux/falloc.h> 34 35 #ifdef __KERNEL__ 35 36 #include <linux/compat.h> 36 37 #endif ··· 568 567 #define EXT4_GET_BLOCKS_NO_LOCK 0x0100 569 568 /* Do not put hole in extent cache */ 570 569 #define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200 570 + /* Convert written extents to unwritten */ 571 + #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0400 571 572 572 573 /* 573 574 * The bit position of these flags must not overlap with any of the ··· 1001 998 #define EXT4_MOUNT2_STD_GROUP_SIZE 0x00000002 /* We have standard group 1002 999 size of blocksize * 8 1003 1000 blocks */ 1001 + #define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated 1002 + file systems */ 1004 1003 1005 1004 #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ 1006 1005 ~EXT4_MOUNT_##opt ··· 1331 1326 struct list_head s_es_lru; 1332 1327 unsigned long s_es_last_sorted; 1333 1328 struct percpu_counter s_extent_cache_cnt; 1329 + struct mb_cache *s_mb_cache; 1334 1330 spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; 1335 1331 1336 1332 /* Ratelimit ext4 messages. */ ··· 2139 2133 extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); 2140 2134 extern int ext4_block_truncate_page(handle_t *handle, 2141 2135 struct address_space *mapping, loff_t from); 2142 - extern int ext4_block_zero_page_range(handle_t *handle, 2143 - struct address_space *mapping, loff_t from, loff_t length); 2144 2136 extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, 2145 2137 loff_t lstart, loff_t lend); 2146 2138 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); ··· 2761 2757 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2762 2758 __u64 start, __u64 len); 2763 2759 extern int ext4_ext_precache(struct inode *inode); 2760 + extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); 2764 2761 2765 2762 /* move_extent.c */ 2766 2763 extern void ext4_double_down_write_data_sem(struct inode *first, ··· 2771 2766 extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, 2772 2767 __u64 start_orig, __u64 start_donor, 2773 2768 __u64 len, __u64 *moved_len); 2769 + extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path, 2770 + struct ext4_extent **extent); 2774 2771 2775 2772 /* page-io.c */ 2776 2773 extern int __init ext4_init_pageio(void);
+10
fs/ext4/ext4_jbd2.c
··· 259 259 if (WARN_ON_ONCE(err)) { 260 260 ext4_journal_abort_handle(where, line, __func__, bh, 261 261 handle, err); 262 + if (inode == NULL) { 263 + pr_err("EXT4: jbd2_journal_dirty_metadata " 264 + "failed: handle type %u started at " 265 + "line %u, credits %u/%u, errcode %d", 266 + handle->h_type, 267 + handle->h_line_no, 268 + handle->h_requested_credits, 269 + handle->h_buffer_credits, err); 270 + return err; 271 + } 262 272 ext4_error_inode(inode, where, line, 263 273 bh->b_blocknr, 264 274 "journal_dirty_metadata failed: "
+707 -113
fs/ext4/extents.c
··· 37 37 #include <linux/quotaops.h> 38 38 #include <linux/string.h> 39 39 #include <linux/slab.h> 40 - #include <linux/falloc.h> 41 40 #include <asm/uaccess.h> 42 41 #include <linux/fiemap.h> 43 42 #include "ext4_jbd2.h" ··· 1690 1691 * the extent that was written properly split out and conversion to 1691 1692 * initialized is trivial. 1692 1693 */ 1693 - if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) 1694 + if (ext4_ext_is_uninitialized(ex1) != ext4_ext_is_uninitialized(ex2)) 1694 1695 return 0; 1695 1696 1696 1697 ext1_ee_len = ext4_ext_get_actual_len(ex1); ··· 1706 1707 * this can result in the top bit of ee_len being set. 1707 1708 */ 1708 1709 if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) 1710 + return 0; 1711 + if (ext4_ext_is_uninitialized(ex1) && 1712 + (ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) || 1713 + atomic_read(&EXT4_I(inode)->i_unwritten) || 1714 + (ext1_ee_len + ext2_ee_len > EXT_UNINIT_MAX_LEN))) 1709 1715 return 0; 1710 1716 #ifdef AGGRESSIVE_TEST 1711 1717 if (ext1_ee_len >= 4) ··· 1735 1731 { 1736 1732 struct ext4_extent_header *eh; 1737 1733 unsigned int depth, len; 1738 - int merge_done = 0; 1734 + int merge_done = 0, uninit; 1739 1735 1740 1736 depth = ext_depth(inode); 1741 1737 BUG_ON(path[depth].p_hdr == NULL); ··· 1745 1741 if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) 1746 1742 break; 1747 1743 /* merge with next extent! */ 1744 + uninit = ext4_ext_is_uninitialized(ex); 1748 1745 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1749 1746 + ext4_ext_get_actual_len(ex + 1)); 1747 + if (uninit) 1748 + ext4_ext_mark_uninitialized(ex); 1750 1749 1751 1750 if (ex + 1 < EXT_LAST_EXTENT(eh)) { 1752 1751 len = (EXT_LAST_EXTENT(eh) - ex - 1) ··· 1903 1896 struct ext4_ext_path *npath = NULL; 1904 1897 int depth, len, err; 1905 1898 ext4_lblk_t next; 1906 - int mb_flags = 0; 1899 + int mb_flags = 0, uninit; 1907 1900 1908 1901 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { 1909 1902 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); ··· 1953 1946 path + depth); 1954 1947 if (err) 1955 1948 return err; 1956 - 1949 + uninit = ext4_ext_is_uninitialized(ex); 1957 1950 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1958 1951 + ext4_ext_get_actual_len(newext)); 1952 + if (uninit) 1953 + ext4_ext_mark_uninitialized(ex); 1959 1954 eh = path[depth].p_hdr; 1960 1955 nearex = ex; 1961 1956 goto merge; ··· 1980 1971 if (err) 1981 1972 return err; 1982 1973 1974 + uninit = ext4_ext_is_uninitialized(ex); 1983 1975 ex->ee_block = newext->ee_block; 1984 1976 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); 1985 1977 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1986 1978 + ext4_ext_get_actual_len(newext)); 1979 + if (uninit) 1980 + ext4_ext_mark_uninitialized(ex); 1987 1981 eh = path[depth].p_hdr; 1988 1982 nearex = ex; 1989 1983 goto merge; ··· 2597 2585 ex_ee_block = le32_to_cpu(ex->ee_block); 2598 2586 ex_ee_len = ext4_ext_get_actual_len(ex); 2599 2587 2588 + /* 2589 + * If we're starting with an extent other than the last one in the 2590 + * node, we need to see if it shares a cluster with the extent to 2591 + * the right (towards the end of the file). If its leftmost cluster 2592 + * is this extent's rightmost cluster and it is not cluster aligned, 2593 + * we'll mark it as a partial that is not to be deallocated. 2594 + */ 2595 + 2596 + if (ex != EXT_LAST_EXTENT(eh)) { 2597 + ext4_fsblk_t current_pblk, right_pblk; 2598 + long long current_cluster, right_cluster; 2599 + 2600 + current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1; 2601 + current_cluster = (long long)EXT4_B2C(sbi, current_pblk); 2602 + right_pblk = ext4_ext_pblock(ex + 1); 2603 + right_cluster = (long long)EXT4_B2C(sbi, right_pblk); 2604 + if (current_cluster == right_cluster && 2605 + EXT4_PBLK_COFF(sbi, right_pblk)) 2606 + *partial_cluster = -right_cluster; 2607 + } 2608 + 2600 2609 trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); 2601 2610 2602 2611 while (ex >= EXT_FIRST_EXTENT(eh) && ··· 2743 2710 err = ext4_ext_correct_indexes(handle, inode, path); 2744 2711 2745 2712 /* 2746 - * Free the partial cluster only if the current extent does not 2747 - * reference it. Otherwise we might free used cluster. 2713 + * If there's a partial cluster and at least one extent remains in 2714 + * the leaf, free the partial cluster if it isn't shared with the 2715 + * current extent. If there's a partial cluster and no extents 2716 + * remain in the leaf, it can't be freed here. It can only be 2717 + * freed when it's possible to determine if it's not shared with 2718 + * any other extent - when the next leaf is processed or when space 2719 + * removal is complete. 2748 2720 */ 2749 - if (*partial_cluster > 0 && 2721 + if (*partial_cluster > 0 && eh->eh_entries && 2750 2722 (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != 2751 2723 *partial_cluster)) { 2752 2724 int flags = get_default_free_blocks_flags(inode); ··· 3607 3569 * b> Splits in two extents: Write is happening at either end of the extent 3608 3570 * c> Splits in three extents: Somone is writing in middle of the extent 3609 3571 * 3572 + * This works the same way in the case of initialized -> unwritten conversion. 3573 + * 3610 3574 * One of more index blocks maybe needed if the extent tree grow after 3611 3575 * the uninitialized extent split. To prevent ENOSPC occur at the IO 3612 3576 * complete, we need to split the uninitialized extent before DIO submit ··· 3619 3579 * 3620 3580 * Returns the size of uninitialized extent to be written on success. 3621 3581 */ 3622 - static int ext4_split_unwritten_extents(handle_t *handle, 3582 + static int ext4_split_convert_extents(handle_t *handle, 3623 3583 struct inode *inode, 3624 3584 struct ext4_map_blocks *map, 3625 3585 struct ext4_ext_path *path, ··· 3631 3591 unsigned int ee_len; 3632 3592 int split_flag = 0, depth; 3633 3593 3634 - ext_debug("ext4_split_unwritten_extents: inode %lu, logical" 3635 - "block %llu, max_blocks %u\n", inode->i_ino, 3636 - (unsigned long long)map->m_lblk, map->m_len); 3594 + ext_debug("%s: inode %lu, logical block %llu, max_blocks %u\n", 3595 + __func__, inode->i_ino, 3596 + (unsigned long long)map->m_lblk, map->m_len); 3637 3597 3638 3598 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 3639 3599 inode->i_sb->s_blocksize_bits; ··· 3648 3608 ee_block = le32_to_cpu(ex->ee_block); 3649 3609 ee_len = ext4_ext_get_actual_len(ex); 3650 3610 3651 - split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; 3652 - split_flag |= EXT4_EXT_MARK_UNINIT2; 3653 - if (flags & EXT4_GET_BLOCKS_CONVERT) 3654 - split_flag |= EXT4_EXT_DATA_VALID2; 3611 + /* Convert to unwritten */ 3612 + if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) { 3613 + split_flag |= EXT4_EXT_DATA_VALID1; 3614 + /* Convert to initialized */ 3615 + } else if (flags & EXT4_GET_BLOCKS_CONVERT) { 3616 + split_flag |= ee_block + ee_len <= eof_block ? 3617 + EXT4_EXT_MAY_ZEROOUT : 0; 3618 + split_flag |= (EXT4_EXT_MARK_UNINIT2 | EXT4_EXT_DATA_VALID2); 3619 + } 3655 3620 flags |= EXT4_GET_BLOCKS_PRE_IO; 3656 3621 return ext4_split_extent(handle, inode, path, map, split_flag, flags); 3657 3622 } 3623 + 3624 + static int ext4_convert_initialized_extents(handle_t *handle, 3625 + struct inode *inode, 3626 + struct ext4_map_blocks *map, 3627 + struct ext4_ext_path *path) 3628 + { 3629 + struct ext4_extent *ex; 3630 + ext4_lblk_t ee_block; 3631 + unsigned int ee_len; 3632 + int depth; 3633 + int err = 0; 3634 + 3635 + depth = ext_depth(inode); 3636 + ex = path[depth].p_ext; 3637 + ee_block = le32_to_cpu(ex->ee_block); 3638 + ee_len = ext4_ext_get_actual_len(ex); 3639 + 3640 + ext_debug("%s: inode %lu, logical" 3641 + "block %llu, max_blocks %u\n", __func__, inode->i_ino, 3642 + (unsigned long long)ee_block, ee_len); 3643 + 3644 + if (ee_block != map->m_lblk || ee_len > map->m_len) { 3645 + err = ext4_split_convert_extents(handle, inode, map, path, 3646 + EXT4_GET_BLOCKS_CONVERT_UNWRITTEN); 3647 + if (err < 0) 3648 + goto out; 3649 + ext4_ext_drop_refs(path); 3650 + path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); 3651 + if (IS_ERR(path)) { 3652 + err = PTR_ERR(path); 3653 + goto out; 3654 + } 3655 + depth = ext_depth(inode); 3656 + ex = path[depth].p_ext; 3657 + } 3658 + 3659 + err = ext4_ext_get_access(handle, inode, path + depth); 3660 + if (err) 3661 + goto out; 3662 + /* first mark the extent as uninitialized */ 3663 + ext4_ext_mark_uninitialized(ex); 3664 + 3665 + /* note: ext4_ext_correct_indexes() isn't needed here because 3666 + * borders are not changed 3667 + */ 3668 + ext4_ext_try_to_merge(handle, inode, path, ex); 3669 + 3670 + /* Mark modified extent as dirty */ 3671 + err = ext4_ext_dirty(handle, inode, path + path->p_depth); 3672 + out: 3673 + ext4_ext_show_leaf(inode, path); 3674 + return err; 3675 + } 3676 + 3658 3677 3659 3678 static int ext4_convert_unwritten_extents_endio(handle_t *handle, 3660 3679 struct inode *inode, ··· 3748 3649 inode->i_ino, (unsigned long long)ee_block, ee_len, 3749 3650 (unsigned long long)map->m_lblk, map->m_len); 3750 3651 #endif 3751 - err = ext4_split_unwritten_extents(handle, inode, map, path, 3752 - EXT4_GET_BLOCKS_CONVERT); 3652 + err = ext4_split_convert_extents(handle, inode, map, path, 3653 + EXT4_GET_BLOCKS_CONVERT); 3753 3654 if (err < 0) 3754 3655 goto out; 3755 3656 ext4_ext_drop_refs(path); ··· 3950 3851 } 3951 3852 3952 3853 static int 3854 + ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, 3855 + struct ext4_map_blocks *map, 3856 + struct ext4_ext_path *path, int flags, 3857 + unsigned int allocated, ext4_fsblk_t newblock) 3858 + { 3859 + int ret = 0; 3860 + int err = 0; 3861 + 3862 + /* 3863 + * Make sure that the extent is no bigger than we support with 3864 + * uninitialized extent 3865 + */ 3866 + if (map->m_len > EXT_UNINIT_MAX_LEN) 3867 + map->m_len = EXT_UNINIT_MAX_LEN / 2; 3868 + 3869 + ret = ext4_convert_initialized_extents(handle, inode, map, 3870 + path); 3871 + if (ret >= 0) { 3872 + ext4_update_inode_fsync_trans(handle, inode, 1); 3873 + err = check_eofblocks_fl(handle, inode, map->m_lblk, 3874 + path, map->m_len); 3875 + } else 3876 + err = ret; 3877 + map->m_flags |= EXT4_MAP_UNWRITTEN; 3878 + if (allocated > map->m_len) 3879 + allocated = map->m_len; 3880 + map->m_len = allocated; 3881 + 3882 + return err ? err : allocated; 3883 + } 3884 + 3885 + static int 3953 3886 ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3954 3887 struct ext4_map_blocks *map, 3955 3888 struct ext4_ext_path *path, int flags, ··· 4008 3877 4009 3878 /* get_block() before submit the IO, split the extent */ 4010 3879 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 4011 - ret = ext4_split_unwritten_extents(handle, inode, map, 4012 - path, flags); 3880 + ret = ext4_split_convert_extents(handle, inode, map, 3881 + path, flags | EXT4_GET_BLOCKS_CONVERT); 4013 3882 if (ret <= 0) 4014 3883 goto out; 4015 3884 /* ··· 4124 3993 map->m_pblk = newblock; 4125 3994 map->m_len = allocated; 4126 3995 out2: 4127 - if (path) { 4128 - ext4_ext_drop_refs(path); 4129 - kfree(path); 4130 - } 4131 3996 return err ? err : allocated; 4132 3997 } 4133 3998 ··· 4255 4128 struct ext4_extent newex, *ex, *ex2; 4256 4129 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 4257 4130 ext4_fsblk_t newblock = 0; 4258 - int free_on_err = 0, err = 0, depth; 4131 + int free_on_err = 0, err = 0, depth, ret; 4259 4132 unsigned int allocated = 0, offset = 0; 4260 4133 unsigned int allocated_clusters = 0; 4261 4134 struct ext4_allocation_request ar; ··· 4297 4170 ext4_fsblk_t ee_start = ext4_ext_pblock(ex); 4298 4171 unsigned short ee_len; 4299 4172 4173 + 4300 4174 /* 4301 4175 * Uninitialized extents are treated as holes, except that 4302 4176 * we split out initialized portions during a write. ··· 4314 4186 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, 4315 4187 ee_block, ee_len, newblock); 4316 4188 4317 - if (!ext4_ext_is_uninitialized(ex)) 4189 + /* 4190 + * If the extent is initialized check whether the 4191 + * caller wants to convert it to unwritten. 4192 + */ 4193 + if ((!ext4_ext_is_uninitialized(ex)) && 4194 + (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { 4195 + allocated = ext4_ext_convert_initialized_extent( 4196 + handle, inode, map, path, flags, 4197 + allocated, newblock); 4198 + goto out2; 4199 + } else if (!ext4_ext_is_uninitialized(ex)) 4318 4200 goto out; 4319 4201 4320 - allocated = ext4_ext_handle_uninitialized_extents( 4202 + ret = ext4_ext_handle_uninitialized_extents( 4321 4203 handle, inode, map, path, flags, 4322 4204 allocated, newblock); 4323 - goto out3; 4205 + if (ret < 0) 4206 + err = ret; 4207 + else 4208 + allocated = ret; 4209 + goto out2; 4324 4210 } 4325 4211 } 4326 4212 ··· 4615 4473 kfree(path); 4616 4474 } 4617 4475 4618 - out3: 4619 4476 trace_ext4_ext_map_blocks_exit(inode, flags, map, 4620 4477 err ? err : allocated); 4621 4478 ext4_es_lru_add(inode); ··· 4655 4514 ext4_std_error(inode->i_sb, err); 4656 4515 } 4657 4516 4658 - static void ext4_falloc_update_inode(struct inode *inode, 4659 - int mode, loff_t new_size, int update_ctime) 4517 + static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, 4518 + ext4_lblk_t len, int flags, int mode) 4660 4519 { 4661 - struct timespec now; 4520 + struct inode *inode = file_inode(file); 4521 + handle_t *handle; 4522 + int ret = 0; 4523 + int ret2 = 0; 4524 + int retries = 0; 4525 + struct ext4_map_blocks map; 4526 + unsigned int credits; 4662 4527 4663 - if (update_ctime) { 4664 - now = current_fs_time(inode->i_sb); 4665 - if (!timespec_equal(&inode->i_ctime, &now)) 4666 - inode->i_ctime = now; 4667 - } 4528 + map.m_lblk = offset; 4668 4529 /* 4669 - * Update only when preallocation was requested beyond 4670 - * the file size. 4530 + * Don't normalize the request if it can fit in one extent so 4531 + * that it doesn't get unnecessarily split into multiple 4532 + * extents. 4671 4533 */ 4672 - if (!(mode & FALLOC_FL_KEEP_SIZE)) { 4534 + if (len <= EXT_UNINIT_MAX_LEN) 4535 + flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; 4536 + 4537 + /* 4538 + * credits to insert 1 extent into extent tree 4539 + */ 4540 + credits = ext4_chunk_trans_blocks(inode, len); 4541 + 4542 + retry: 4543 + while (ret >= 0 && ret < len) { 4544 + map.m_lblk = map.m_lblk + ret; 4545 + map.m_len = len = len - ret; 4546 + handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, 4547 + credits); 4548 + if (IS_ERR(handle)) { 4549 + ret = PTR_ERR(handle); 4550 + break; 4551 + } 4552 + ret = ext4_map_blocks(handle, inode, &map, flags); 4553 + if (ret <= 0) { 4554 + ext4_debug("inode #%lu: block %u: len %u: " 4555 + "ext4_ext_map_blocks returned %d", 4556 + inode->i_ino, map.m_lblk, 4557 + map.m_len, ret); 4558 + ext4_mark_inode_dirty(handle, inode); 4559 + ret2 = ext4_journal_stop(handle); 4560 + break; 4561 + } 4562 + ret2 = ext4_journal_stop(handle); 4563 + if (ret2) 4564 + break; 4565 + } 4566 + if (ret == -ENOSPC && 4567 + ext4_should_retry_alloc(inode->i_sb, &retries)) { 4568 + ret = 0; 4569 + goto retry; 4570 + } 4571 + 4572 + return ret > 0 ? ret2 : ret; 4573 + } 4574 + 4575 + static long ext4_zero_range(struct file *file, loff_t offset, 4576 + loff_t len, int mode) 4577 + { 4578 + struct inode *inode = file_inode(file); 4579 + handle_t *handle = NULL; 4580 + unsigned int max_blocks; 4581 + loff_t new_size = 0; 4582 + int ret = 0; 4583 + int flags; 4584 + int partial; 4585 + loff_t start, end; 4586 + ext4_lblk_t lblk; 4587 + struct address_space *mapping = inode->i_mapping; 4588 + unsigned int blkbits = inode->i_blkbits; 4589 + 4590 + trace_ext4_zero_range(inode, offset, len, mode); 4591 + 4592 + /* 4593 + * Write out all dirty pages to avoid race conditions 4594 + * Then release them. 4595 + */ 4596 + if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 4597 + ret = filemap_write_and_wait_range(mapping, offset, 4598 + offset + len - 1); 4599 + if (ret) 4600 + return ret; 4601 + } 4602 + 4603 + /* 4604 + * Round up offset. This is not fallocate, we neet to zero out 4605 + * blocks, so convert interior block aligned part of the range to 4606 + * unwritten and possibly manually zero out unaligned parts of the 4607 + * range. 4608 + */ 4609 + start = round_up(offset, 1 << blkbits); 4610 + end = round_down((offset + len), 1 << blkbits); 4611 + 4612 + if (start < offset || end > offset + len) 4613 + return -EINVAL; 4614 + partial = (offset + len) & ((1 << blkbits) - 1); 4615 + 4616 + lblk = start >> blkbits; 4617 + max_blocks = (end >> blkbits); 4618 + if (max_blocks < lblk) 4619 + max_blocks = 0; 4620 + else 4621 + max_blocks -= lblk; 4622 + 4623 + flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT | 4624 + EXT4_GET_BLOCKS_CONVERT_UNWRITTEN; 4625 + if (mode & FALLOC_FL_KEEP_SIZE) 4626 + flags |= EXT4_GET_BLOCKS_KEEP_SIZE; 4627 + 4628 + mutex_lock(&inode->i_mutex); 4629 + 4630 + /* 4631 + * Indirect files do not support unwritten extnets 4632 + */ 4633 + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 4634 + ret = -EOPNOTSUPP; 4635 + goto out_mutex; 4636 + } 4637 + 4638 + if (!(mode & FALLOC_FL_KEEP_SIZE) && 4639 + offset + len > i_size_read(inode)) { 4640 + new_size = offset + len; 4641 + ret = inode_newsize_ok(inode, new_size); 4642 + if (ret) 4643 + goto out_mutex; 4644 + /* 4645 + * If we have a partial block after EOF we have to allocate 4646 + * the entire block. 4647 + */ 4648 + if (partial) 4649 + max_blocks += 1; 4650 + } 4651 + 4652 + if (max_blocks > 0) { 4653 + 4654 + /* Now release the pages and zero block aligned part of pages*/ 4655 + truncate_pagecache_range(inode, start, end - 1); 4656 + 4657 + /* Wait all existing dio workers, newcomers will block on i_mutex */ 4658 + ext4_inode_block_unlocked_dio(inode); 4659 + inode_dio_wait(inode); 4660 + 4661 + /* 4662 + * Remove entire range from the extent status tree. 4663 + */ 4664 + ret = ext4_es_remove_extent(inode, lblk, max_blocks); 4665 + if (ret) 4666 + goto out_dio; 4667 + 4668 + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, 4669 + mode); 4670 + if (ret) 4671 + goto out_dio; 4672 + } 4673 + 4674 + handle = ext4_journal_start(inode, EXT4_HT_MISC, 4); 4675 + if (IS_ERR(handle)) { 4676 + ret = PTR_ERR(handle); 4677 + ext4_std_error(inode->i_sb, ret); 4678 + goto out_dio; 4679 + } 4680 + 4681 + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4682 + 4683 + if (new_size) { 4673 4684 if (new_size > i_size_read(inode)) 4674 4685 i_size_write(inode, new_size); 4675 4686 if (new_size > EXT4_I(inode)->i_disksize) 4676 4687 ext4_update_i_disksize(inode, new_size); 4677 4688 } else { 4678 4689 /* 4679 - * Mark that we allocate beyond EOF so the subsequent truncate 4680 - * can proceed even if the new size is the same as i_size. 4681 - */ 4682 - if (new_size > i_size_read(inode)) 4690 + * Mark that we allocate beyond EOF so the subsequent truncate 4691 + * can proceed even if the new size is the same as i_size. 4692 + */ 4693 + if ((offset + len) > i_size_read(inode)) 4683 4694 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 4684 4695 } 4685 4696 4697 + ext4_mark_inode_dirty(handle, inode); 4698 + 4699 + /* Zero out partial block at the edges of the range */ 4700 + ret = ext4_zero_partial_blocks(handle, inode, offset, len); 4701 + 4702 + if (file->f_flags & O_SYNC) 4703 + ext4_handle_sync(handle); 4704 + 4705 + ext4_journal_stop(handle); 4706 + out_dio: 4707 + ext4_inode_resume_unlocked_dio(inode); 4708 + out_mutex: 4709 + mutex_unlock(&inode->i_mutex); 4710 + return ret; 4686 4711 } 4687 4712 4688 4713 /* ··· 4862 4555 { 4863 4556 struct inode *inode = file_inode(file); 4864 4557 handle_t *handle; 4865 - loff_t new_size; 4558 + loff_t new_size = 0; 4866 4559 unsigned int max_blocks; 4867 4560 int ret = 0; 4868 - int ret2 = 0; 4869 - int retries = 0; 4870 4561 int flags; 4871 - struct ext4_map_blocks map; 4872 - unsigned int credits, blkbits = inode->i_blkbits; 4562 + ext4_lblk_t lblk; 4563 + struct timespec tv; 4564 + unsigned int blkbits = inode->i_blkbits; 4873 4565 4874 4566 /* Return error if mode is not supported */ 4875 - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 4567 + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 4568 + FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) 4876 4569 return -EOPNOTSUPP; 4877 4570 4878 4571 if (mode & FALLOC_FL_PUNCH_HOLE) 4879 4572 return ext4_punch_hole(inode, offset, len); 4573 + 4574 + if (mode & FALLOC_FL_COLLAPSE_RANGE) 4575 + return ext4_collapse_range(inode, offset, len); 4880 4576 4881 4577 ret = ext4_convert_inline_data(inode); 4882 4578 if (ret) ··· 4892 4582 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 4893 4583 return -EOPNOTSUPP; 4894 4584 4585 + if (mode & FALLOC_FL_ZERO_RANGE) 4586 + return ext4_zero_range(file, offset, len, mode); 4587 + 4895 4588 trace_ext4_fallocate_enter(inode, offset, len, mode); 4896 - map.m_lblk = offset >> blkbits; 4589 + lblk = offset >> blkbits; 4897 4590 /* 4898 4591 * We can't just convert len to max_blocks because 4899 4592 * If blocksize = 4096 offset = 3072 and len = 2048 4900 4593 */ 4901 4594 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) 4902 - - map.m_lblk; 4903 - /* 4904 - * credits to insert 1 extent into extent tree 4905 - */ 4906 - credits = ext4_chunk_trans_blocks(inode, max_blocks); 4907 - mutex_lock(&inode->i_mutex); 4908 - ret = inode_newsize_ok(inode, (len + offset)); 4909 - if (ret) { 4910 - mutex_unlock(&inode->i_mutex); 4911 - trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); 4912 - return ret; 4913 - } 4595 + - lblk; 4596 + 4914 4597 flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; 4915 4598 if (mode & FALLOC_FL_KEEP_SIZE) 4916 4599 flags |= EXT4_GET_BLOCKS_KEEP_SIZE; 4917 - /* 4918 - * Don't normalize the request if it can fit in one extent so 4919 - * that it doesn't get unnecessarily split into multiple 4920 - * extents. 4921 - */ 4922 - if (len <= EXT_UNINIT_MAX_LEN << blkbits) 4923 - flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; 4924 4600 4925 - retry: 4926 - while (ret >= 0 && ret < max_blocks) { 4927 - map.m_lblk = map.m_lblk + ret; 4928 - map.m_len = max_blocks = max_blocks - ret; 4929 - handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, 4930 - credits); 4931 - if (IS_ERR(handle)) { 4932 - ret = PTR_ERR(handle); 4933 - break; 4934 - } 4935 - ret = ext4_map_blocks(handle, inode, &map, flags); 4936 - if (ret <= 0) { 4937 - #ifdef EXT4FS_DEBUG 4938 - ext4_warning(inode->i_sb, 4939 - "inode #%lu: block %u: len %u: " 4940 - "ext4_ext_map_blocks returned %d", 4941 - inode->i_ino, map.m_lblk, 4942 - map.m_len, ret); 4943 - #endif 4944 - ext4_mark_inode_dirty(handle, inode); 4945 - ret2 = ext4_journal_stop(handle); 4946 - break; 4947 - } 4948 - if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len, 4949 - blkbits) >> blkbits)) 4950 - new_size = offset + len; 4951 - else 4952 - new_size = ((loff_t) map.m_lblk + ret) << blkbits; 4601 + mutex_lock(&inode->i_mutex); 4953 4602 4954 - ext4_falloc_update_inode(inode, mode, new_size, 4955 - (map.m_flags & EXT4_MAP_NEW)); 4956 - ext4_mark_inode_dirty(handle, inode); 4957 - if ((file->f_flags & O_SYNC) && ret >= max_blocks) 4958 - ext4_handle_sync(handle); 4959 - ret2 = ext4_journal_stop(handle); 4960 - if (ret2) 4961 - break; 4603 + if (!(mode & FALLOC_FL_KEEP_SIZE) && 4604 + offset + len > i_size_read(inode)) { 4605 + new_size = offset + len; 4606 + ret = inode_newsize_ok(inode, new_size); 4607 + if (ret) 4608 + goto out; 4962 4609 } 4963 - if (ret == -ENOSPC && 4964 - ext4_should_retry_alloc(inode->i_sb, &retries)) { 4965 - ret = 0; 4966 - goto retry; 4610 + 4611 + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode); 4612 + if (ret) 4613 + goto out; 4614 + 4615 + handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 4616 + if (IS_ERR(handle)) 4617 + goto out; 4618 + 4619 + tv = inode->i_ctime = ext4_current_time(inode); 4620 + 4621 + if (new_size) { 4622 + if (new_size > i_size_read(inode)) { 4623 + i_size_write(inode, new_size); 4624 + inode->i_mtime = tv; 4625 + } 4626 + if (new_size > EXT4_I(inode)->i_disksize) 4627 + ext4_update_i_disksize(inode, new_size); 4628 + } else { 4629 + /* 4630 + * Mark that we allocate beyond EOF so the subsequent truncate 4631 + * can proceed even if the new size is the same as i_size. 4632 + */ 4633 + if ((offset + len) > i_size_read(inode)) 4634 + ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 4967 4635 } 4636 + ext4_mark_inode_dirty(handle, inode); 4637 + if (file->f_flags & O_SYNC) 4638 + ext4_handle_sync(handle); 4639 + 4640 + ext4_journal_stop(handle); 4641 + out: 4968 4642 mutex_unlock(&inode->i_mutex); 4969 - trace_ext4_fallocate_exit(inode, offset, max_blocks, 4970 - ret > 0 ? ret2 : ret); 4971 - return ret > 0 ? ret2 : ret; 4643 + trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); 4644 + return ret; 4972 4645 } 4973 4646 4974 4647 /* ··· 5161 4868 } 5162 4869 ext4_es_lru_add(inode); 5163 4870 return error; 4871 + } 4872 + 4873 + /* 4874 + * ext4_access_path: 4875 + * Function to access the path buffer for marking it dirty. 4876 + * It also checks if there are sufficient credits left in the journal handle 4877 + * to update path. 4878 + */ 4879 + static int 4880 + ext4_access_path(handle_t *handle, struct inode *inode, 4881 + struct ext4_ext_path *path) 4882 + { 4883 + int credits, err; 4884 + 4885 + if (!ext4_handle_valid(handle)) 4886 + return 0; 4887 + 4888 + /* 4889 + * Check if need to extend journal credits 4890 + * 3 for leaf, sb, and inode plus 2 (bmap and group 4891 + * descriptor) for each block group; assume two block 4892 + * groups 4893 + */ 4894 + if (handle->h_buffer_credits < 7) { 4895 + credits = ext4_writepage_trans_blocks(inode); 4896 + err = ext4_ext_truncate_extend_restart(handle, inode, credits); 4897 + /* EAGAIN is success */ 4898 + if (err && err != -EAGAIN) 4899 + return err; 4900 + } 4901 + 4902 + err = ext4_ext_get_access(handle, inode, path); 4903 + return err; 4904 + } 4905 + 4906 + /* 4907 + * ext4_ext_shift_path_extents: 4908 + * Shift the extents of a path structure lying between path[depth].p_ext 4909 + * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift 4910 + * from starting block for each extent. 4911 + */ 4912 + static int 4913 + ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, 4914 + struct inode *inode, handle_t *handle, 4915 + ext4_lblk_t *start) 4916 + { 4917 + int depth, err = 0; 4918 + struct ext4_extent *ex_start, *ex_last; 4919 + bool update = 0; 4920 + depth = path->p_depth; 4921 + 4922 + while (depth >= 0) { 4923 + if (depth == path->p_depth) { 4924 + ex_start = path[depth].p_ext; 4925 + if (!ex_start) 4926 + return -EIO; 4927 + 4928 + ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); 4929 + if (!ex_last) 4930 + return -EIO; 4931 + 4932 + err = ext4_access_path(handle, inode, path + depth); 4933 + if (err) 4934 + goto out; 4935 + 4936 + if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) 4937 + update = 1; 4938 + 4939 + *start = ex_last->ee_block + 4940 + ext4_ext_get_actual_len(ex_last); 4941 + 4942 + while (ex_start <= ex_last) { 4943 + ex_start->ee_block -= shift; 4944 + if (ex_start > 4945 + EXT_FIRST_EXTENT(path[depth].p_hdr)) { 4946 + if (ext4_ext_try_to_merge_right(inode, 4947 + path, ex_start - 1)) 4948 + ex_last--; 4949 + } 4950 + ex_start++; 4951 + } 4952 + err = ext4_ext_dirty(handle, inode, path + depth); 4953 + if (err) 4954 + goto out; 4955 + 4956 + if (--depth < 0 || !update) 4957 + break; 4958 + } 4959 + 4960 + /* Update index too */ 4961 + err = ext4_access_path(handle, inode, path + depth); 4962 + if (err) 4963 + goto out; 4964 + 4965 + path[depth].p_idx->ei_block -= shift; 4966 + err = ext4_ext_dirty(handle, inode, path + depth); 4967 + if (err) 4968 + goto out; 4969 + 4970 + /* we are done if current index is not a starting index */ 4971 + if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr)) 4972 + break; 4973 + 4974 + depth--; 4975 + } 4976 + 4977 + out: 4978 + return err; 4979 + } 4980 + 4981 + /* 4982 + * ext4_ext_shift_extents: 4983 + * All the extents which lies in the range from start to the last allocated 4984 + * block for the file are shifted downwards by shift blocks. 4985 + * On success, 0 is returned, error otherwise. 4986 + */ 4987 + static int 4988 + ext4_ext_shift_extents(struct inode *inode, handle_t *handle, 4989 + ext4_lblk_t start, ext4_lblk_t shift) 4990 + { 4991 + struct ext4_ext_path *path; 4992 + int ret = 0, depth; 4993 + struct ext4_extent *extent; 4994 + ext4_lblk_t stop_block, current_block; 4995 + ext4_lblk_t ex_start, ex_end; 4996 + 4997 + /* Let path point to the last extent */ 4998 + path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); 4999 + if (IS_ERR(path)) 5000 + return PTR_ERR(path); 5001 + 5002 + depth = path->p_depth; 5003 + extent = path[depth].p_ext; 5004 + if (!extent) { 5005 + ext4_ext_drop_refs(path); 5006 + kfree(path); 5007 + return ret; 5008 + } 5009 + 5010 + stop_block = extent->ee_block + ext4_ext_get_actual_len(extent); 5011 + ext4_ext_drop_refs(path); 5012 + kfree(path); 5013 + 5014 + /* Nothing to shift, if hole is at the end of file */ 5015 + if (start >= stop_block) 5016 + return ret; 5017 + 5018 + /* 5019 + * Don't start shifting extents until we make sure the hole is big 5020 + * enough to accomodate the shift. 5021 + */ 5022 + path = ext4_ext_find_extent(inode, start - 1, NULL, 0); 5023 + depth = path->p_depth; 5024 + extent = path[depth].p_ext; 5025 + ex_start = extent->ee_block; 5026 + ex_end = extent->ee_block + ext4_ext_get_actual_len(extent); 5027 + ext4_ext_drop_refs(path); 5028 + kfree(path); 5029 + 5030 + if ((start == ex_start && shift > ex_start) || 5031 + (shift > start - ex_end)) 5032 + return -EINVAL; 5033 + 5034 + /* Its safe to start updating extents */ 5035 + while (start < stop_block) { 5036 + path = ext4_ext_find_extent(inode, start, NULL, 0); 5037 + if (IS_ERR(path)) 5038 + return PTR_ERR(path); 5039 + depth = path->p_depth; 5040 + extent = path[depth].p_ext; 5041 + current_block = extent->ee_block; 5042 + if (start > current_block) { 5043 + /* Hole, move to the next extent */ 5044 + ret = mext_next_extent(inode, path, &extent); 5045 + if (ret != 0) { 5046 + ext4_ext_drop_refs(path); 5047 + kfree(path); 5048 + if (ret == 1) 5049 + ret = 0; 5050 + break; 5051 + } 5052 + } 5053 + ret = ext4_ext_shift_path_extents(path, shift, inode, 5054 + handle, &start); 5055 + ext4_ext_drop_refs(path); 5056 + kfree(path); 5057 + if (ret) 5058 + break; 5059 + } 5060 + 5061 + return ret; 5062 + } 5063 + 5064 + /* 5065 + * ext4_collapse_range: 5066 + * This implements the fallocate's collapse range functionality for ext4 5067 + * Returns: 0 and non-zero on error. 5068 + */ 5069 + int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) 5070 + { 5071 + struct super_block *sb = inode->i_sb; 5072 + ext4_lblk_t punch_start, punch_stop; 5073 + handle_t *handle; 5074 + unsigned int credits; 5075 + loff_t new_size; 5076 + int ret; 5077 + 5078 + BUG_ON(offset + len > i_size_read(inode)); 5079 + 5080 + /* Collapse range works only on fs block size aligned offsets. */ 5081 + if (offset & (EXT4_BLOCK_SIZE(sb) - 1) || 5082 + len & (EXT4_BLOCK_SIZE(sb) - 1)) 5083 + return -EINVAL; 5084 + 5085 + if (!S_ISREG(inode->i_mode)) 5086 + return -EOPNOTSUPP; 5087 + 5088 + trace_ext4_collapse_range(inode, offset, len); 5089 + 5090 + punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); 5091 + punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); 5092 + 5093 + /* Write out all dirty pages */ 5094 + ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1); 5095 + if (ret) 5096 + return ret; 5097 + 5098 + /* Take mutex lock */ 5099 + mutex_lock(&inode->i_mutex); 5100 + 5101 + /* It's not possible punch hole on append only file */ 5102 + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { 5103 + ret = -EPERM; 5104 + goto out_mutex; 5105 + } 5106 + 5107 + if (IS_SWAPFILE(inode)) { 5108 + ret = -ETXTBSY; 5109 + goto out_mutex; 5110 + } 5111 + 5112 + /* Currently just for extent based files */ 5113 + if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 5114 + ret = -EOPNOTSUPP; 5115 + goto out_mutex; 5116 + } 5117 + 5118 + truncate_pagecache_range(inode, offset, -1); 5119 + 5120 + /* Wait for existing dio to complete */ 5121 + ext4_inode_block_unlocked_dio(inode); 5122 + inode_dio_wait(inode); 5123 + 5124 + credits = ext4_writepage_trans_blocks(inode); 5125 + handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); 5126 + if (IS_ERR(handle)) { 5127 + ret = PTR_ERR(handle); 5128 + goto out_dio; 5129 + } 5130 + 5131 + down_write(&EXT4_I(inode)->i_data_sem); 5132 + ext4_discard_preallocations(inode); 5133 + 5134 + ret = ext4_es_remove_extent(inode, punch_start, 5135 + EXT_MAX_BLOCKS - punch_start - 1); 5136 + if (ret) { 5137 + up_write(&EXT4_I(inode)->i_data_sem); 5138 + goto out_stop; 5139 + } 5140 + 5141 + ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1); 5142 + if (ret) { 5143 + up_write(&EXT4_I(inode)->i_data_sem); 5144 + goto out_stop; 5145 + } 5146 + 5147 + ret = ext4_ext_shift_extents(inode, handle, punch_stop, 5148 + punch_stop - punch_start); 5149 + if (ret) { 5150 + up_write(&EXT4_I(inode)->i_data_sem); 5151 + goto out_stop; 5152 + } 5153 + 5154 + new_size = i_size_read(inode) - len; 5155 + truncate_setsize(inode, new_size); 5156 + EXT4_I(inode)->i_disksize = new_size; 5157 + 5158 + ext4_discard_preallocations(inode); 5159 + up_write(&EXT4_I(inode)->i_data_sem); 5160 + if (IS_SYNC(inode)) 5161 + ext4_handle_sync(handle); 5162 + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 5163 + ext4_mark_inode_dirty(handle, inode); 5164 + 5165 + out_stop: 5166 + ext4_journal_stop(handle); 5167 + out_dio: 5168 + ext4_inode_resume_unlocked_dio(inode); 5169 + out_mutex: 5170 + mutex_unlock(&inode->i_mutex); 5171 + return ret; 5164 5172 }
+13 -15
fs/ext4/extents_status.c
··· 184 184 while (node) { 185 185 struct extent_status *es; 186 186 es = rb_entry(node, struct extent_status, rb_node); 187 - printk(KERN_DEBUG " [%u/%u) %llu %llx", 187 + printk(KERN_DEBUG " [%u/%u) %llu %x", 188 188 es->es_lblk, es->es_len, 189 189 ext4_es_pblock(es), ext4_es_status(es)); 190 190 node = rb_next(node); ··· 445 445 pr_warn("ES insert assertion failed for " 446 446 "inode: %lu we can find an extent " 447 447 "at block [%d/%d/%llu/%c], but we " 448 - "want to add an delayed/hole extent " 449 - "[%d/%d/%llu/%llx]\n", 448 + "want to add a delayed/hole extent " 449 + "[%d/%d/%llu/%x]\n", 450 450 inode->i_ino, ee_block, ee_len, 451 451 ee_start, ee_status ? 'u' : 'w', 452 452 es->es_lblk, es->es_len, ··· 486 486 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { 487 487 pr_warn("ES insert assertion failed for inode: %lu " 488 488 "can't find an extent at block %d but we want " 489 - "to add an written/unwritten extent " 490 - "[%d/%d/%llu/%llx]\n", inode->i_ino, 489 + "to add a written/unwritten extent " 490 + "[%d/%d/%llu/%x]\n", inode->i_ino, 491 491 es->es_lblk, es->es_lblk, es->es_len, 492 492 ext4_es_pblock(es), ext4_es_status(es)); 493 493 } ··· 524 524 */ 525 525 pr_warn("ES insert assertion failed for inode: %lu " 526 526 "We can find blocks but we want to add a " 527 - "delayed/hole extent [%d/%d/%llu/%llx]\n", 527 + "delayed/hole extent [%d/%d/%llu/%x]\n", 528 528 inode->i_ino, es->es_lblk, es->es_len, 529 529 ext4_es_pblock(es), ext4_es_status(es)); 530 530 return; ··· 554 554 if (ext4_es_is_written(es)) { 555 555 pr_warn("ES insert assertion failed for inode: %lu " 556 556 "We can't find the block but we want to add " 557 - "an written extent [%d/%d/%llu/%llx]\n", 557 + "a written extent [%d/%d/%llu/%x]\n", 558 558 inode->i_ino, es->es_lblk, es->es_len, 559 559 ext4_es_pblock(es), ext4_es_status(es)); 560 560 return; ··· 658 658 659 659 newes.es_lblk = lblk; 660 660 newes.es_len = len; 661 - ext4_es_store_pblock(&newes, pblk); 662 - ext4_es_store_status(&newes, status); 661 + ext4_es_store_pblock_status(&newes, pblk, status); 663 662 trace_ext4_es_insert_extent(inode, &newes); 664 663 665 664 ext4_es_insert_extent_check(inode, &newes); ··· 698 699 699 700 newes.es_lblk = lblk; 700 701 newes.es_len = len; 701 - ext4_es_store_pblock(&newes, pblk); 702 - ext4_es_store_status(&newes, status); 702 + ext4_es_store_pblock_status(&newes, pblk, status); 703 703 trace_ext4_es_cache_extent(inode, &newes); 704 704 705 705 if (!len) ··· 810 812 811 813 newes.es_lblk = end + 1; 812 814 newes.es_len = len2; 815 + block = 0x7FDEADBEEF; 813 816 if (ext4_es_is_written(&orig_es) || 814 - ext4_es_is_unwritten(&orig_es)) { 817 + ext4_es_is_unwritten(&orig_es)) 815 818 block = ext4_es_pblock(&orig_es) + 816 819 orig_es.es_len - len2; 817 - ext4_es_store_pblock(&newes, block); 818 - } 819 - ext4_es_store_status(&newes, ext4_es_status(&orig_es)); 820 + ext4_es_store_pblock_status(&newes, block, 821 + ext4_es_status(&orig_es)); 820 822 err = __es_insert_extent(inode, &newes); 821 823 if (err) { 822 824 es->es_lblk = orig_es.es_lblk;
+9
fs/ext4/extents_status.h
··· 129 129 (es->es_pblk & ~ES_MASK)); 130 130 } 131 131 132 + static inline void ext4_es_store_pblock_status(struct extent_status *es, 133 + ext4_fsblk_t pb, 134 + unsigned int status) 135 + { 136 + es->es_pblk = (((ext4_fsblk_t) 137 + (status & EXTENT_STATUS_FLAGS) << ES_SHIFT) | 138 + (pb & ~ES_MASK)); 139 + } 140 + 132 141 extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); 133 142 extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); 134 143 extern void ext4_es_lru_add(struct inode *inode);
+75 -45
fs/ext4/inode.c
··· 504 504 { 505 505 struct extent_status es; 506 506 int retval; 507 + int ret = 0; 507 508 #ifdef ES_AGGRESSIVE_TEST 508 509 struct ext4_map_blocks orig_map; 509 510 ··· 515 514 ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," 516 515 "logical block %lu\n", inode->i_ino, flags, map->m_len, 517 516 (unsigned long) map->m_lblk); 517 + 518 + /* 519 + * ext4_map_blocks returns an int, and m_len is an unsigned int 520 + */ 521 + if (unlikely(map->m_len > INT_MAX)) 522 + map->m_len = INT_MAX; 518 523 519 524 /* Lookup extent status tree firstly */ 520 525 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { ··· 560 553 EXT4_GET_BLOCKS_KEEP_SIZE); 561 554 } 562 555 if (retval > 0) { 563 - int ret; 564 556 unsigned int status; 565 557 566 558 if (unlikely(retval != map->m_len)) { ··· 586 580 587 581 found: 588 582 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 589 - int ret = check_block_validity(inode, map); 583 + ret = check_block_validity(inode, map); 590 584 if (ret != 0) 591 585 return ret; 592 586 } ··· 603 597 * with buffer head unmapped. 604 598 */ 605 599 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) 606 - return retval; 600 + /* 601 + * If we need to convert extent to unwritten 602 + * we continue and do the actual work in 603 + * ext4_ext_map_blocks() 604 + */ 605 + if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) 606 + return retval; 607 607 608 608 /* 609 609 * Here we clear m_flags because after allocating an new extent, ··· 665 653 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); 666 654 667 655 if (retval > 0) { 668 - int ret; 669 656 unsigned int status; 670 657 671 658 if (unlikely(retval != map->m_len)) { ··· 699 688 has_zeroout: 700 689 up_write((&EXT4_I(inode)->i_data_sem)); 701 690 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 702 - int ret = check_block_validity(inode, map); 691 + ret = check_block_validity(inode, map); 703 692 if (ret != 0) 704 693 return ret; 705 694 } ··· 3324 3313 } 3325 3314 3326 3315 /* 3327 - * ext4_block_truncate_page() zeroes out a mapping from file offset `from' 3328 - * up to the end of the block which corresponds to `from'. 3329 - * This required during truncate. We need to physically zero the tail end 3330 - * of that block so it doesn't yield old data if the file is later grown. 3331 - */ 3332 - int ext4_block_truncate_page(handle_t *handle, 3333 - struct address_space *mapping, loff_t from) 3334 - { 3335 - unsigned offset = from & (PAGE_CACHE_SIZE-1); 3336 - unsigned length; 3337 - unsigned blocksize; 3338 - struct inode *inode = mapping->host; 3339 - 3340 - blocksize = inode->i_sb->s_blocksize; 3341 - length = blocksize - (offset & (blocksize - 1)); 3342 - 3343 - return ext4_block_zero_page_range(handle, mapping, from, length); 3344 - } 3345 - 3346 - /* 3347 3316 * ext4_block_zero_page_range() zeros out a mapping of length 'length' 3348 3317 * starting from file offset 'from'. The range to be zero'd must 3349 3318 * be contained with in one block. If the specified range exceeds 3350 3319 * the end of the block it will be shortened to end of the block 3351 3320 * that cooresponds to 'from' 3352 3321 */ 3353 - int ext4_block_zero_page_range(handle_t *handle, 3322 + static int ext4_block_zero_page_range(handle_t *handle, 3354 3323 struct address_space *mapping, loff_t from, loff_t length) 3355 3324 { 3356 3325 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; ··· 3420 3429 return err; 3421 3430 } 3422 3431 3432 + /* 3433 + * ext4_block_truncate_page() zeroes out a mapping from file offset `from' 3434 + * up to the end of the block which corresponds to `from'. 3435 + * This required during truncate. We need to physically zero the tail end 3436 + * of that block so it doesn't yield old data if the file is later grown. 3437 + */ 3438 + int ext4_block_truncate_page(handle_t *handle, 3439 + struct address_space *mapping, loff_t from) 3440 + { 3441 + unsigned offset = from & (PAGE_CACHE_SIZE-1); 3442 + unsigned length; 3443 + unsigned blocksize; 3444 + struct inode *inode = mapping->host; 3445 + 3446 + blocksize = inode->i_sb->s_blocksize; 3447 + length = blocksize - (offset & (blocksize - 1)); 3448 + 3449 + return ext4_block_zero_page_range(handle, mapping, from, length); 3450 + } 3451 + 3423 3452 int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, 3424 3453 loff_t lstart, loff_t length) 3425 3454 { ··· 3513 3502 if (!S_ISREG(inode->i_mode)) 3514 3503 return -EOPNOTSUPP; 3515 3504 3516 - trace_ext4_punch_hole(inode, offset, length); 3505 + trace_ext4_punch_hole(inode, offset, length, 0); 3517 3506 3518 3507 /* 3519 3508 * Write out all dirty pages to avoid race conditions ··· 3620 3609 up_write(&EXT4_I(inode)->i_data_sem); 3621 3610 if (IS_SYNC(inode)) 3622 3611 ext4_handle_sync(handle); 3612 + 3613 + /* Now release the pages again to reduce race window */ 3614 + if (last_block_offset > first_block_offset) 3615 + truncate_pagecache_range(inode, first_block_offset, 3616 + last_block_offset); 3617 + 3623 3618 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 3624 3619 ext4_mark_inode_dirty(handle, inode); 3625 3620 out_stop: ··· 3699 3682 3700 3683 /* 3701 3684 * There is a possibility that we're either freeing the inode 3702 - * or it completely new indode. In those cases we might not 3685 + * or it's a completely new inode. In those cases we might not 3703 3686 * have i_mutex locked because it's not necessary. 3704 3687 */ 3705 3688 if (!(inode->i_state & (I_NEW|I_FREEING))) ··· 3951 3934 new_fl |= S_NOATIME; 3952 3935 if (flags & EXT4_DIRSYNC_FL) 3953 3936 new_fl |= S_DIRSYNC; 3954 - set_mask_bits(&inode->i_flags, 3955 - S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); 3937 + inode_set_flags(inode, new_fl, 3938 + S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); 3956 3939 } 3957 3940 3958 3941 /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ ··· 4171 4154 EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); 4172 4155 EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); 4173 4156 4174 - inode->i_version = le32_to_cpu(raw_inode->i_disk_version); 4175 - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 4176 - if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) 4177 - inode->i_version |= 4178 - (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; 4157 + if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { 4158 + inode->i_version = le32_to_cpu(raw_inode->i_disk_version); 4159 + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 4160 + if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) 4161 + inode->i_version |= 4162 + (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; 4163 + } 4179 4164 } 4180 4165 4181 4166 ret = 0; ··· 4347 4328 goto out_brelse; 4348 4329 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 4349 4330 raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); 4350 - if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 4351 - cpu_to_le32(EXT4_OS_HURD)) 4331 + if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) 4352 4332 raw_inode->i_file_acl_high = 4353 4333 cpu_to_le16(ei->i_file_acl >> 32); 4354 4334 raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); ··· 4392 4374 raw_inode->i_block[block] = ei->i_data[block]; 4393 4375 } 4394 4376 4395 - raw_inode->i_disk_version = cpu_to_le32(inode->i_version); 4396 - if (ei->i_extra_isize) { 4397 - if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) 4398 - raw_inode->i_version_hi = 4399 - cpu_to_le32(inode->i_version >> 32); 4400 - raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); 4377 + if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { 4378 + raw_inode->i_disk_version = cpu_to_le32(inode->i_version); 4379 + if (ei->i_extra_isize) { 4380 + if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) 4381 + raw_inode->i_version_hi = 4382 + cpu_to_le32(inode->i_version >> 32); 4383 + raw_inode->i_extra_isize = 4384 + cpu_to_le16(ei->i_extra_isize); 4385 + } 4401 4386 } 4402 4387 4403 4388 ext4_inode_csum_set(inode, raw_inode, ei); ··· 4467 4446 return -EIO; 4468 4447 } 4469 4448 4470 - if (wbc->sync_mode != WB_SYNC_ALL) 4449 + /* 4450 + * No need to force transaction in WB_SYNC_NONE mode. Also 4451 + * ext4_sync_fs() will force the commit after everything is 4452 + * written. 4453 + */ 4454 + if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync) 4471 4455 return 0; 4472 4456 4473 4457 err = ext4_force_commit(inode->i_sb); ··· 4482 4456 err = __ext4_get_inode_loc(inode, &iloc, 0); 4483 4457 if (err) 4484 4458 return err; 4485 - if (wbc->sync_mode == WB_SYNC_ALL) 4459 + /* 4460 + * sync(2) will flush the whole buffer cache. No need to do 4461 + * it here separately for each inode. 4462 + */ 4463 + if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) 4486 4464 sync_dirty_buffer(iloc.bh); 4487 4465 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { 4488 4466 EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
+6 -18
fs/ext4/ioctl.c
··· 104 104 struct ext4_inode_info *ei_bl; 105 105 struct ext4_sb_info *sbi = EXT4_SB(sb); 106 106 107 - if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode)) { 108 - err = -EINVAL; 109 - goto swap_boot_out; 110 - } 107 + if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode)) 108 + return -EINVAL; 111 109 112 - if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) { 113 - err = -EPERM; 114 - goto swap_boot_out; 115 - } 110 + if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) 111 + return -EPERM; 116 112 117 113 inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO); 118 - if (IS_ERR(inode_bl)) { 119 - err = PTR_ERR(inode_bl); 120 - goto swap_boot_out; 121 - } 114 + if (IS_ERR(inode_bl)) 115 + return PTR_ERR(inode_bl); 122 116 ei_bl = EXT4_I(inode_bl); 123 117 124 118 filemap_flush(inode->i_mapping); ··· 187 193 ext4_mark_inode_dirty(handle, inode); 188 194 } 189 195 } 190 - 191 196 ext4_journal_stop(handle); 192 - 193 197 ext4_double_up_write_data_sem(inode, inode_bl); 194 198 195 199 journal_err_out: 196 200 ext4_inode_resume_unlocked_dio(inode); 197 201 ext4_inode_resume_unlocked_dio(inode_bl); 198 - 199 202 unlock_two_nondirectories(inode, inode_bl); 200 - 201 203 iput(inode_bl); 202 - 203 - swap_boot_out: 204 204 return err; 205 205 } 206 206
+4 -3
fs/ext4/mballoc.c
··· 1808 1808 ext4_lock_group(ac->ac_sb, group); 1809 1809 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, 1810 1810 ac->ac_g_ex.fe_len, &ex); 1811 + ex.fe_logical = 0xDEADFA11; /* debug value */ 1811 1812 1812 1813 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { 1813 1814 ext4_fsblk_t start; ··· 1937 1936 */ 1938 1937 break; 1939 1938 } 1940 - 1939 + ex.fe_logical = 0xDEADC0DE; /* debug value */ 1941 1940 ext4_mb_measure_extent(ac, &ex, e4b); 1942 1941 1943 1942 i += ex.fe_len; ··· 1978 1977 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex); 1979 1978 if (max >= sbi->s_stripe) { 1980 1979 ac->ac_found++; 1980 + ex.fe_logical = 0xDEADF00D; /* debug value */ 1981 1981 ac->ac_b_ex = ex; 1982 1982 ext4_mb_use_best_found(ac, e4b); 1983 1983 break; ··· 4008 4006 (unsigned long)ac->ac_b_ex.fe_len, 4009 4007 (unsigned long)ac->ac_b_ex.fe_logical, 4010 4008 (int)ac->ac_criteria); 4011 - ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found", 4012 - ac->ac_ex_scanned, ac->ac_found); 4009 + ext4_msg(ac->ac_sb, KERN_ERR, "%d found", ac->ac_found); 4013 4010 ext4_msg(ac->ac_sb, KERN_ERR, "groups: "); 4014 4011 ngroups = ext4_get_groups_count(sb); 4015 4012 for (i = 0; i < ngroups; i++) {
+1 -3
fs/ext4/mballoc.h
··· 48 48 } \ 49 49 } while (0) 50 50 #else 51 - #define mb_debug(n, fmt, a...) 51 + #define mb_debug(n, fmt, a...) no_printk(fmt, ## a) 52 52 #endif 53 53 54 54 #define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ ··· 175 175 /* copy of the best found extent taken before preallocation efforts */ 176 176 struct ext4_free_extent ac_f_ex; 177 177 178 - /* number of iterations done. we have to track to limit searching */ 179 - unsigned long ac_ex_scanned; 180 178 __u16 ac_groups_scanned; 181 179 __u16 ac_found; 182 180 __u16 ac_tail;
+2 -3
fs/ext4/move_extent.c
··· 76 76 * ext4_ext_path structure refers to the last extent, or a negative error 77 77 * value on failure. 78 78 */ 79 - static int 79 + int 80 80 mext_next_extent(struct inode *inode, struct ext4_ext_path *path, 81 81 struct ext4_extent **extent) 82 82 { ··· 861 861 } 862 862 if (!buffer_mapped(bh)) { 863 863 zero_user(page, block_start, blocksize); 864 - if (!err) 865 - set_buffer_uptodate(bh); 864 + set_buffer_uptodate(bh); 866 865 continue; 867 866 } 868 867 }
+31 -9
fs/ext4/super.c
··· 59 59 static struct ext4_lazy_init *ext4_li_info; 60 60 static struct mutex ext4_li_mtx; 61 61 static struct ext4_features *ext4_feat; 62 + static int ext4_mballoc_ready; 62 63 63 64 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 64 65 unsigned long journal_devnum); ··· 846 845 invalidate_bdev(sbi->journal_bdev); 847 846 ext4_blkdev_remove(sbi); 848 847 } 848 + if (sbi->s_mb_cache) { 849 + ext4_xattr_destroy_cache(sbi->s_mb_cache); 850 + sbi->s_mb_cache = NULL; 851 + } 849 852 if (sbi->s_mmp_tsk) 850 853 kthread_stop(sbi->s_mmp_tsk); 851 854 sb->s_fs_info = NULL; ··· 945 940 inode_init_once(&ei->vfs_inode); 946 941 } 947 942 948 - static int init_inodecache(void) 943 + static int __init init_inodecache(void) 949 944 { 950 945 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 951 946 sizeof(struct ext4_inode_info), ··· 3580 3575 "feature flags set on rev 0 fs, " 3581 3576 "running e2fsck is recommended"); 3582 3577 3578 + if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) { 3579 + set_opt2(sb, HURD_COMPAT); 3580 + if (EXT4_HAS_INCOMPAT_FEATURE(sb, 3581 + EXT4_FEATURE_INCOMPAT_64BIT)) { 3582 + ext4_msg(sb, KERN_ERR, 3583 + "The Hurd can't support 64-bit file systems"); 3584 + goto failed_mount; 3585 + } 3586 + } 3587 + 3583 3588 if (IS_EXT2_SB(sb)) { 3584 3589 if (ext2_feature_set_ok(sb)) 3585 3590 ext4_msg(sb, KERN_INFO, "mounting ext2 file system " ··· 4025 4010 percpu_counter_set(&sbi->s_dirtyclusters_counter, 0); 4026 4011 4027 4012 no_journal: 4013 + if (ext4_mballoc_ready) { 4014 + sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id); 4015 + if (!sbi->s_mb_cache) { 4016 + ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); 4017 + goto failed_mount_wq; 4018 + } 4019 + } 4020 + 4028 4021 /* 4029 4022 * Get the # of file system overhead blocks from the 4030 4023 * superblock if present. ··· 4858 4835 } 4859 4836 4860 4837 if (*flags & MS_RDONLY) { 4838 + err = sync_filesystem(sb); 4839 + if (err < 0) 4840 + goto restore_opts; 4861 4841 err = dquot_suspend(sb, -1); 4862 4842 if (err < 0) 4863 4843 goto restore_opts; ··· 5542 5516 5543 5517 err = ext4_init_mballoc(); 5544 5518 if (err) 5545 - goto out3; 5546 - 5547 - err = ext4_init_xattr(); 5548 - if (err) 5549 5519 goto out2; 5520 + else 5521 + ext4_mballoc_ready = 1; 5550 5522 err = init_inodecache(); 5551 5523 if (err) 5552 5524 goto out1; ··· 5560 5536 unregister_as_ext3(); 5561 5537 destroy_inodecache(); 5562 5538 out1: 5563 - ext4_exit_xattr(); 5564 - out2: 5539 + ext4_mballoc_ready = 0; 5565 5540 ext4_exit_mballoc(); 5566 - out3: 5541 + out2: 5567 5542 ext4_exit_feat_adverts(); 5568 5543 out4: 5569 5544 if (ext4_proc_root) ··· 5585 5562 unregister_as_ext3(); 5586 5563 unregister_filesystem(&ext4_fs_type); 5587 5564 destroy_inodecache(); 5588 - ext4_exit_xattr(); 5589 5565 ext4_exit_mballoc(); 5590 5566 ext4_exit_feat_adverts(); 5591 5567 remove_proc_entry("fs/ext4", NULL);
+32 -27
fs/ext4/xattr.c
··· 81 81 # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) 82 82 #endif 83 83 84 - static void ext4_xattr_cache_insert(struct buffer_head *); 84 + static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *); 85 85 static struct buffer_head *ext4_xattr_cache_find(struct inode *, 86 86 struct ext4_xattr_header *, 87 87 struct mb_cache_entry **); ··· 89 89 struct ext4_xattr_entry *); 90 90 static int ext4_xattr_list(struct dentry *dentry, char *buffer, 91 91 size_t buffer_size); 92 - 93 - static struct mb_cache *ext4_xattr_cache; 94 92 95 93 static const struct xattr_handler *ext4_xattr_handler_map[] = { 96 94 [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, ··· 114 116 #endif 115 117 NULL 116 118 }; 119 + 120 + #define EXT4_GET_MB_CACHE(inode) (((struct ext4_sb_info *) \ 121 + inode->i_sb->s_fs_info)->s_mb_cache) 117 122 118 123 static __le32 ext4_xattr_block_csum(struct inode *inode, 119 124 sector_t block_nr, ··· 266 265 struct ext4_xattr_entry *entry; 267 266 size_t size; 268 267 int error; 268 + struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 269 269 270 270 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", 271 271 name_index, name, buffer, (long)buffer_size); ··· 288 286 error = -EIO; 289 287 goto cleanup; 290 288 } 291 - ext4_xattr_cache_insert(bh); 289 + ext4_xattr_cache_insert(ext4_mb_cache, bh); 292 290 entry = BFIRST(bh); 293 291 error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1); 294 292 if (error == -EIO) ··· 411 409 struct inode *inode = dentry->d_inode; 412 410 struct buffer_head *bh = NULL; 413 411 int error; 412 + struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 414 413 415 414 ea_idebug(inode, "buffer=%p, buffer_size=%ld", 416 415 buffer, (long)buffer_size); ··· 433 430 error = -EIO; 434 431 goto cleanup; 435 432 } 436 - ext4_xattr_cache_insert(bh); 433 + ext4_xattr_cache_insert(ext4_mb_cache, bh); 437 434 error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size); 438 435 439 436 cleanup: ··· 529 526 { 530 527 struct mb_cache_entry *ce = NULL; 531 528 int error = 0; 529 + struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 532 530 533 - ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr); 531 + ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr); 534 532 error = ext4_journal_get_write_access(handle, bh); 535 533 if (error) 536 534 goto out; ··· 571 567 size_t *min_offs, void *base, int *total) 572 568 { 573 569 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { 574 - *total += EXT4_XATTR_LEN(last->e_name_len); 575 570 if (!last->e_value_block && last->e_value_size) { 576 571 size_t offs = le16_to_cpu(last->e_value_offs); 577 572 if (offs < *min_offs) 578 573 *min_offs = offs; 579 574 } 575 + if (total) 576 + *total += EXT4_XATTR_LEN(last->e_name_len); 580 577 } 581 578 return (*min_offs - ((void *)last - base) - sizeof(__u32)); 582 579 } ··· 750 745 struct ext4_xattr_search *s = &bs->s; 751 746 struct mb_cache_entry *ce = NULL; 752 747 int error = 0; 748 + struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 753 749 754 750 #define header(x) ((struct ext4_xattr_header *)(x)) 755 751 756 752 if (i->value && i->value_len > sb->s_blocksize) 757 753 return -ENOSPC; 758 754 if (s->base) { 759 - ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev, 755 + ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev, 760 756 bs->bh->b_blocknr); 761 757 error = ext4_journal_get_write_access(handle, bs->bh); 762 758 if (error) ··· 775 769 if (!IS_LAST_ENTRY(s->first)) 776 770 ext4_xattr_rehash(header(s->base), 777 771 s->here); 778 - ext4_xattr_cache_insert(bs->bh); 772 + ext4_xattr_cache_insert(ext4_mb_cache, 773 + bs->bh); 779 774 } 780 775 unlock_buffer(bs->bh); 781 776 if (error == -EIO) ··· 912 905 memcpy(new_bh->b_data, s->base, new_bh->b_size); 913 906 set_buffer_uptodate(new_bh); 914 907 unlock_buffer(new_bh); 915 - ext4_xattr_cache_insert(new_bh); 908 + ext4_xattr_cache_insert(ext4_mb_cache, new_bh); 916 909 error = ext4_handle_dirty_xattr_block(handle, 917 910 inode, new_bh); 918 911 if (error) ··· 1235 1228 struct ext4_xattr_block_find *bs = NULL; 1236 1229 char *buffer = NULL, *b_entry_name = NULL; 1237 1230 size_t min_offs, free; 1238 - int total_ino, total_blk; 1231 + int total_ino; 1239 1232 void *base, *start, *end; 1240 1233 int extra_isize = 0, error = 0, tried_min_extra_isize = 0; 1241 1234 int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); ··· 1293 1286 first = BFIRST(bh); 1294 1287 end = bh->b_data + bh->b_size; 1295 1288 min_offs = end - base; 1296 - free = ext4_xattr_free_space(first, &min_offs, base, 1297 - &total_blk); 1289 + free = ext4_xattr_free_space(first, &min_offs, base, NULL); 1298 1290 if (free < new_extra_isize) { 1299 1291 if (!tried_min_extra_isize && s_min_extra_isize) { 1300 1292 tried_min_extra_isize++; ··· 1501 1495 * Returns 0, or a negative error number on failure. 1502 1496 */ 1503 1497 static void 1504 - ext4_xattr_cache_insert(struct buffer_head *bh) 1498 + ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh) 1505 1499 { 1506 1500 __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); 1507 1501 struct mb_cache_entry *ce; 1508 1502 int error; 1509 1503 1510 - ce = mb_cache_entry_alloc(ext4_xattr_cache, GFP_NOFS); 1504 + ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS); 1511 1505 if (!ce) { 1512 1506 ea_bdebug(bh, "out of memory"); 1513 1507 return; ··· 1579 1573 { 1580 1574 __u32 hash = le32_to_cpu(header->h_hash); 1581 1575 struct mb_cache_entry *ce; 1576 + struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 1582 1577 1583 1578 if (!header->h_hash) 1584 1579 return NULL; /* never share */ 1585 1580 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); 1586 1581 again: 1587 - ce = mb_cache_entry_find_first(ext4_xattr_cache, inode->i_sb->s_bdev, 1582 + ce = mb_cache_entry_find_first(ext4_mb_cache, inode->i_sb->s_bdev, 1588 1583 hash); 1589 1584 while (ce) { 1590 1585 struct buffer_head *bh; ··· 1683 1676 1684 1677 #undef BLOCK_HASH_SHIFT 1685 1678 1686 - int __init 1687 - ext4_init_xattr(void) 1679 + #define HASH_BUCKET_BITS 10 1680 + 1681 + struct mb_cache * 1682 + ext4_xattr_create_cache(char *name) 1688 1683 { 1689 - ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); 1690 - if (!ext4_xattr_cache) 1691 - return -ENOMEM; 1692 - return 0; 1684 + return mb_cache_create(name, HASH_BUCKET_BITS); 1693 1685 } 1694 1686 1695 - void 1696 - ext4_exit_xattr(void) 1687 + void ext4_xattr_destroy_cache(struct mb_cache *cache) 1697 1688 { 1698 - if (ext4_xattr_cache) 1699 - mb_cache_destroy(ext4_xattr_cache); 1700 - ext4_xattr_cache = NULL; 1689 + if (cache) 1690 + mb_cache_destroy(cache); 1701 1691 } 1692 +
+3 -3
fs/ext4/xattr.h
··· 110 110 extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, 111 111 struct ext4_inode *raw_inode, handle_t *handle); 112 112 113 - extern int __init ext4_init_xattr(void); 114 - extern void ext4_exit_xattr(void); 115 - 116 113 extern const struct xattr_handler *ext4_xattr_handlers[]; 117 114 118 115 extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, ··· 120 123 extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, 121 124 struct ext4_xattr_info *i, 122 125 struct ext4_xattr_ibody_find *is); 126 + 127 + extern struct mb_cache *ext4_xattr_create_cache(char *name); 128 + extern void ext4_xattr_destroy_cache(struct mb_cache *); 123 129 124 130 #ifdef CONFIG_EXT4_FS_SECURITY 125 131 extern int ext4_init_security(handle_t *handle, struct inode *inode,
+2
fs/f2fs/super.c
··· 568 568 struct f2fs_mount_info org_mount_opt; 569 569 int err, active_logs; 570 570 571 + sync_filesystem(sb); 572 + 571 573 /* 572 574 * Save the old mount options in case we 573 575 * need to restore them.
+2
fs/fat/inode.c
··· 635 635 struct msdos_sb_info *sbi = MSDOS_SB(sb); 636 636 *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME); 637 637 638 + sync_filesystem(sb); 639 + 638 640 /* make sure we update state on remount. */ 639 641 new_rdonly = *flags & MS_RDONLY; 640 642 if (new_rdonly != (sb->s_flags & MS_RDONLY)) {
+1
fs/freevxfs/vxfs_super.c
··· 124 124 125 125 static int vxfs_remount(struct super_block *sb, int *flags, char *data) 126 126 { 127 + sync_filesystem(sb); 127 128 *flags |= MS_RDONLY; 128 129 return 0; 129 130 }
+1
fs/fuse/inode.c
··· 135 135 136 136 static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) 137 137 { 138 + sync_filesystem(sb); 138 139 if (*flags & MS_MANDLOCK) 139 140 return -EINVAL; 140 141
+2
fs/gfs2/super.c
··· 1167 1167 struct gfs2_tune *gt = &sdp->sd_tune; 1168 1168 int error; 1169 1169 1170 + sync_filesystem(sb); 1171 + 1170 1172 spin_lock(&gt->gt_spin); 1171 1173 args.ar_commit = gt->gt_logd_secs; 1172 1174 args.ar_quota_quantum = gt->gt_quota_quantum;
+1
fs/hfs/super.c
··· 112 112 113 113 static int hfs_remount(struct super_block *sb, int *flags, char *data) 114 114 { 115 + sync_filesystem(sb); 115 116 *flags |= MS_NODIRATIME; 116 117 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 117 118 return 0;
+1
fs/hfsplus/super.c
··· 323 323 324 324 static int hfsplus_remount(struct super_block *sb, int *flags, char *data) 325 325 { 326 + sync_filesystem(sb); 326 327 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 327 328 return 0; 328 329 if (!(*flags & MS_RDONLY)) {
+2
fs/hpfs/super.c
··· 421 421 struct hpfs_sb_info *sbi = hpfs_sb(s); 422 422 char *new_opts = kstrdup(data, GFP_KERNEL); 423 423 424 + sync_filesystem(s); 425 + 424 426 *flags |= MS_NOATIME; 425 427 426 428 hpfs_lock(s);
+31
fs/inode.c
··· 1898 1898 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); 1899 1899 } 1900 1900 EXPORT_SYMBOL(inode_dio_done); 1901 + 1902 + /* 1903 + * inode_set_flags - atomically set some inode flags 1904 + * 1905 + * Note: the caller should be holding i_mutex, or else be sure that 1906 + * they have exclusive access to the inode structure (i.e., while the 1907 + * inode is being instantiated). The reason for the cmpxchg() loop 1908 + * --- which wouldn't be necessary if all code paths which modify 1909 + * i_flags actually followed this rule, is that there is at least one 1910 + * code path which doesn't today --- for example, 1911 + * __generic_file_aio_write() calls file_remove_suid() without holding 1912 + * i_mutex --- so we use cmpxchg() out of an abundance of caution. 1913 + * 1914 + * In the long run, i_mutex is overkill, and we should probably look 1915 + * at using the i_lock spinlock to protect i_flags, and then make sure 1916 + * it is so documented in include/linux/fs.h and that all code follows 1917 + * the locking convention!! 1918 + */ 1919 + void inode_set_flags(struct inode *inode, unsigned int flags, 1920 + unsigned int mask) 1921 + { 1922 + unsigned int old_flags, new_flags; 1923 + 1924 + WARN_ON_ONCE(flags & ~mask); 1925 + do { 1926 + old_flags = ACCESS_ONCE(inode->i_flags); 1927 + new_flags = (old_flags & ~mask) | flags; 1928 + } while (unlikely(cmpxchg(&inode->i_flags, old_flags, 1929 + new_flags) != old_flags)); 1930 + } 1931 + EXPORT_SYMBOL(inode_set_flags);
+1
fs/isofs/inode.c
··· 117 117 118 118 static int isofs_remount(struct super_block *sb, int *flags, char *data) 119 119 { 120 + sync_filesystem(sb); 120 121 if (!(*flags & MS_RDONLY)) 121 122 return -EROFS; 122 123 return 0;
+38 -39
fs/jbd2/commit.c
··· 555 555 blk_start_plug(&plug); 556 556 jbd2_journal_write_revoke_records(journal, commit_transaction, 557 557 &log_bufs, WRITE_SYNC); 558 - blk_finish_plug(&plug); 559 558 560 559 jbd_debug(3, "JBD2: commit phase 2b\n"); 561 560 ··· 581 582 err = 0; 582 583 bufs = 0; 583 584 descriptor = NULL; 584 - blk_start_plug(&plug); 585 585 while (commit_transaction->t_buffers) { 586 586 587 587 /* Find the next buffer to be journaled... */ ··· 1065 1067 goto restart_loop; 1066 1068 } 1067 1069 1070 + /* Add the transaction to the checkpoint list 1071 + * __journal_remove_checkpoint() can not destroy transaction 1072 + * under us because it is not marked as T_FINISHED yet */ 1073 + if (journal->j_checkpoint_transactions == NULL) { 1074 + journal->j_checkpoint_transactions = commit_transaction; 1075 + commit_transaction->t_cpnext = commit_transaction; 1076 + commit_transaction->t_cpprev = commit_transaction; 1077 + } else { 1078 + commit_transaction->t_cpnext = 1079 + journal->j_checkpoint_transactions; 1080 + commit_transaction->t_cpprev = 1081 + commit_transaction->t_cpnext->t_cpprev; 1082 + commit_transaction->t_cpnext->t_cpprev = 1083 + commit_transaction; 1084 + commit_transaction->t_cpprev->t_cpnext = 1085 + commit_transaction; 1086 + } 1087 + spin_unlock(&journal->j_list_lock); 1088 + 1068 1089 /* Done with this transaction! */ 1069 1090 1070 1091 jbd_debug(3, "JBD2: commit phase 7\n"); ··· 1102 1085 atomic_read(&commit_transaction->t_handle_count); 1103 1086 trace_jbd2_run_stats(journal->j_fs_dev->bd_dev, 1104 1087 commit_transaction->t_tid, &stats.run); 1105 - 1106 - /* 1107 - * Calculate overall stats 1108 - */ 1109 - spin_lock(&journal->j_history_lock); 1110 - journal->j_stats.ts_tid++; 1111 - if (commit_transaction->t_requested) 1112 - journal->j_stats.ts_requested++; 1113 - journal->j_stats.run.rs_wait += stats.run.rs_wait; 1114 - journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay; 1115 - journal->j_stats.run.rs_running += stats.run.rs_running; 1116 - journal->j_stats.run.rs_locked += stats.run.rs_locked; 1117 - journal->j_stats.run.rs_flushing += stats.run.rs_flushing; 1118 - journal->j_stats.run.rs_logging += stats.run.rs_logging; 1119 - journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count; 1120 - journal->j_stats.run.rs_blocks += stats.run.rs_blocks; 1121 - journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged; 1122 - spin_unlock(&journal->j_history_lock); 1088 + stats.ts_requested = (commit_transaction->t_requested) ? 1 : 0; 1123 1089 1124 1090 commit_transaction->t_state = T_COMMIT_CALLBACK; 1125 1091 J_ASSERT(commit_transaction == journal->j_committing_transaction); ··· 1122 1122 1123 1123 write_unlock(&journal->j_state_lock); 1124 1124 1125 - if (journal->j_checkpoint_transactions == NULL) { 1126 - journal->j_checkpoint_transactions = commit_transaction; 1127 - commit_transaction->t_cpnext = commit_transaction; 1128 - commit_transaction->t_cpprev = commit_transaction; 1129 - } else { 1130 - commit_transaction->t_cpnext = 1131 - journal->j_checkpoint_transactions; 1132 - commit_transaction->t_cpprev = 1133 - commit_transaction->t_cpnext->t_cpprev; 1134 - commit_transaction->t_cpnext->t_cpprev = 1135 - commit_transaction; 1136 - commit_transaction->t_cpprev->t_cpnext = 1137 - commit_transaction; 1138 - } 1139 - spin_unlock(&journal->j_list_lock); 1140 - /* Drop all spin_locks because commit_callback may be block. 1141 - * __journal_remove_checkpoint() can not destroy transaction 1142 - * under us because it is not marked as T_FINISHED yet */ 1143 1125 if (journal->j_commit_callback) 1144 1126 journal->j_commit_callback(journal, commit_transaction); 1145 1127 ··· 1132 1150 write_lock(&journal->j_state_lock); 1133 1151 spin_lock(&journal->j_list_lock); 1134 1152 commit_transaction->t_state = T_FINISHED; 1135 - /* Recheck checkpoint lists after j_list_lock was dropped */ 1153 + /* Check if the transaction can be dropped now that we are finished */ 1136 1154 if (commit_transaction->t_checkpoint_list == NULL && 1137 1155 commit_transaction->t_checkpoint_io_list == NULL) { 1138 1156 __jbd2_journal_drop_transaction(journal, commit_transaction); ··· 1141 1159 spin_unlock(&journal->j_list_lock); 1142 1160 write_unlock(&journal->j_state_lock); 1143 1161 wake_up(&journal->j_wait_done_commit); 1162 + 1163 + /* 1164 + * Calculate overall stats 1165 + */ 1166 + spin_lock(&journal->j_history_lock); 1167 + journal->j_stats.ts_tid++; 1168 + journal->j_stats.ts_requested += stats.ts_requested; 1169 + journal->j_stats.run.rs_wait += stats.run.rs_wait; 1170 + journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay; 1171 + journal->j_stats.run.rs_running += stats.run.rs_running; 1172 + journal->j_stats.run.rs_locked += stats.run.rs_locked; 1173 + journal->j_stats.run.rs_flushing += stats.run.rs_flushing; 1174 + journal->j_stats.run.rs_logging += stats.run.rs_logging; 1175 + journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count; 1176 + journal->j_stats.run.rs_blocks += stats.run.rs_blocks; 1177 + journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged; 1178 + spin_unlock(&journal->j_history_lock); 1144 1179 }
+5 -5
fs/jbd2/journal.c
··· 122 122 #endif 123 123 124 124 /* Checksumming functions */ 125 - int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) 125 + static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) 126 126 { 127 127 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 128 128 return 1; ··· 143 143 return cpu_to_be32(csum); 144 144 } 145 145 146 - int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) 146 + static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) 147 147 { 148 148 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 149 149 return 1; ··· 151 151 return sb->s_checksum == jbd2_superblock_csum(j, sb); 152 152 } 153 153 154 - void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) 154 + static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) 155 155 { 156 156 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 157 157 return; ··· 302 302 journal->j_flags |= JBD2_UNMOUNT; 303 303 304 304 while (journal->j_task) { 305 - wake_up(&journal->j_wait_commit); 306 305 write_unlock(&journal->j_state_lock); 306 + wake_up(&journal->j_wait_commit); 307 307 wait_event(journal->j_wait_done_commit, journal->j_task == NULL); 308 308 write_lock(&journal->j_state_lock); 309 309 } ··· 710 710 while (tid_gt(tid, journal->j_commit_sequence)) { 711 711 jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n", 712 712 tid, journal->j_commit_sequence); 713 - wake_up(&journal->j_wait_commit); 714 713 read_unlock(&journal->j_state_lock); 714 + wake_up(&journal->j_wait_commit); 715 715 wait_event(journal->j_wait_done_commit, 716 716 !tid_gt(tid, journal->j_commit_sequence)); 717 717 read_lock(&journal->j_state_lock);
+22 -24
fs/jbd2/transaction.c
··· 1073 1073 * reused here. 1074 1074 */ 1075 1075 jbd_lock_bh_state(bh); 1076 - spin_lock(&journal->j_list_lock); 1077 1076 J_ASSERT_JH(jh, (jh->b_transaction == transaction || 1078 1077 jh->b_transaction == NULL || 1079 1078 (jh->b_transaction == journal->j_committing_transaction && ··· 1095 1096 jh->b_modified = 0; 1096 1097 1097 1098 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 1099 + spin_lock(&journal->j_list_lock); 1098 1100 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); 1099 1101 } else if (jh->b_transaction == journal->j_committing_transaction) { 1100 1102 /* first access by this transaction */ 1101 1103 jh->b_modified = 0; 1102 1104 1103 1105 JBUFFER_TRACE(jh, "set next transaction"); 1106 + spin_lock(&journal->j_list_lock); 1104 1107 jh->b_next_transaction = transaction; 1105 1108 } 1106 1109 spin_unlock(&journal->j_list_lock); ··· 1313 1312 journal->j_running_transaction)) { 1314 1313 printk(KERN_ERR "JBD2: %s: " 1315 1314 "jh->b_transaction (%llu, %p, %u) != " 1316 - "journal->j_running_transaction (%p, %u)", 1315 + "journal->j_running_transaction (%p, %u)\n", 1317 1316 journal->j_devname, 1318 1317 (unsigned long long) bh->b_blocknr, 1319 1318 jh->b_transaction, ··· 1336 1335 */ 1337 1336 if (jh->b_transaction != transaction) { 1338 1337 JBUFFER_TRACE(jh, "already on other transaction"); 1339 - if (unlikely(jh->b_transaction != 1340 - journal->j_committing_transaction)) { 1341 - printk(KERN_ERR "JBD2: %s: " 1342 - "jh->b_transaction (%llu, %p, %u) != " 1343 - "journal->j_committing_transaction (%p, %u)", 1338 + if (unlikely(((jh->b_transaction != 1339 + journal->j_committing_transaction)) || 1340 + (jh->b_next_transaction != transaction))) { 1341 + printk(KERN_ERR "jbd2_journal_dirty_metadata: %s: " 1342 + "bad jh for block %llu: " 1343 + "transaction (%p, %u), " 1344 + "jh->b_transaction (%p, %u), " 1345 + "jh->b_next_transaction (%p, %u), jlist %u\n", 1344 1346 journal->j_devname, 1345 1347 (unsigned long long) bh->b_blocknr, 1348 + transaction, transaction->t_tid, 1346 1349 jh->b_transaction, 1347 - jh->b_transaction ? jh->b_transaction->t_tid : 0, 1348 - journal->j_committing_transaction, 1349 - journal->j_committing_transaction ? 1350 - journal->j_committing_transaction->t_tid : 0); 1351 - ret = -EINVAL; 1352 - } 1353 - if (unlikely(jh->b_next_transaction != transaction)) { 1354 - printk(KERN_ERR "JBD2: %s: " 1355 - "jh->b_next_transaction (%llu, %p, %u) != " 1356 - "transaction (%p, %u)", 1357 - journal->j_devname, 1358 - (unsigned long long) bh->b_blocknr, 1350 + jh->b_transaction ? 1351 + jh->b_transaction->t_tid : 0, 1359 1352 jh->b_next_transaction, 1360 1353 jh->b_next_transaction ? 1361 1354 jh->b_next_transaction->t_tid : 0, 1362 - transaction, transaction->t_tid); 1355 + jh->b_jlist); 1356 + WARN_ON(1); 1363 1357 ret = -EINVAL; 1364 1358 } 1365 1359 /* And this case is illegal: we can't reuse another ··· 1411 1415 BUFFER_TRACE(bh, "entry"); 1412 1416 1413 1417 jbd_lock_bh_state(bh); 1414 - spin_lock(&journal->j_list_lock); 1415 1418 1416 1419 if (!buffer_jbd(bh)) 1417 1420 goto not_jbd; ··· 1463 1468 * we know to remove the checkpoint after we commit. 1464 1469 */ 1465 1470 1471 + spin_lock(&journal->j_list_lock); 1466 1472 if (jh->b_cp_transaction) { 1467 1473 __jbd2_journal_temp_unlink_buffer(jh); 1468 1474 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); ··· 1476 1480 goto drop; 1477 1481 } 1478 1482 } 1483 + spin_unlock(&journal->j_list_lock); 1479 1484 } else if (jh->b_transaction) { 1480 1485 J_ASSERT_JH(jh, (jh->b_transaction == 1481 1486 journal->j_committing_transaction)); ··· 1488 1491 1489 1492 if (jh->b_next_transaction) { 1490 1493 J_ASSERT(jh->b_next_transaction == transaction); 1494 + spin_lock(&journal->j_list_lock); 1491 1495 jh->b_next_transaction = NULL; 1496 + spin_unlock(&journal->j_list_lock); 1492 1497 1493 1498 /* 1494 1499 * only drop a reference if this transaction modified ··· 1502 1503 } 1503 1504 1504 1505 not_jbd: 1505 - spin_unlock(&journal->j_list_lock); 1506 1506 jbd_unlock_bh_state(bh); 1507 1507 __brelse(bh); 1508 1508 drop: ··· 1819 1821 if (buffer_locked(bh) || buffer_dirty(bh)) 1820 1822 goto out; 1821 1823 1822 - if (jh->b_next_transaction != NULL) 1824 + if (jh->b_next_transaction != NULL || jh->b_transaction != NULL) 1823 1825 goto out; 1824 1826 1825 1827 spin_lock(&journal->j_list_lock); 1826 - if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { 1828 + if (jh->b_cp_transaction != NULL) { 1827 1829 /* written-back checkpointed metadata buffer */ 1828 1830 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1829 1831 __jbd2_journal_remove_checkpoint(jh);
+1
fs/jffs2/super.c
··· 243 243 struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); 244 244 int err; 245 245 246 + sync_filesystem(sb); 246 247 err = jffs2_parse_options(c, data); 247 248 if (err) 248 249 return -EINVAL;
+1
fs/jfs/super.c
··· 418 418 int flag = JFS_SBI(sb)->flag; 419 419 int ret; 420 420 421 + sync_filesystem(sb); 421 422 if (!parse_options(data, sb, &newLVSize, &flag)) { 422 423 return -EINVAL; 423 424 }
+386 -154
fs/mbcache.c
··· 26 26 * back on the lru list. 27 27 */ 28 28 29 + /* 30 + * Lock descriptions and usage: 31 + * 32 + * Each hash chain of both the block and index hash tables now contains 33 + * a built-in lock used to serialize accesses to the hash chain. 34 + * 35 + * Accesses to global data structures mb_cache_list and mb_cache_lru_list 36 + * are serialized via the global spinlock mb_cache_spinlock. 37 + * 38 + * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize 39 + * accesses to its local data, such as e_used and e_queued. 40 + * 41 + * Lock ordering: 42 + * 43 + * Each block hash chain's lock has the highest lock order, followed by an 44 + * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's 45 + * lock), and mb_cach_spinlock, with the lowest order. While holding 46 + * either a block or index hash chain lock, a thread can acquire an 47 + * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock. 48 + * 49 + * Synchronization: 50 + * 51 + * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and 52 + * index hash chian, it needs to lock the corresponding hash chain. For each 53 + * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to 54 + * prevent either any simultaneous release or free on the entry and also 55 + * to serialize accesses to either the e_used or e_queued member of the entry. 56 + * 57 + * To avoid having a dangling reference to an already freed 58 + * mb_cache_entry, an mb_cache_entry is only freed when it is not on a 59 + * block hash chain and also no longer being referenced, both e_used, 60 + * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is 61 + * first removed from a block hash chain. 62 + */ 63 + 29 64 #include <linux/kernel.h> 30 65 #include <linux/module.h> 31 66 ··· 69 34 #include <linux/mm.h> 70 35 #include <linux/slab.h> 71 36 #include <linux/sched.h> 72 - #include <linux/init.h> 37 + #include <linux/list_bl.h> 73 38 #include <linux/mbcache.h> 74 - 39 + #include <linux/init.h> 40 + #include <linux/blockgroup_lock.h> 75 41 76 42 #ifdef MB_CACHE_DEBUG 77 43 # define mb_debug(f...) do { \ ··· 93 57 94 58 #define MB_CACHE_WRITER ((unsigned short)~0U >> 1) 95 59 60 + #define MB_CACHE_ENTRY_LOCK_BITS __builtin_log2(NR_BG_LOCKS) 61 + #define MB_CACHE_ENTRY_LOCK_INDEX(ce) \ 62 + (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS)) 63 + 96 64 static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); 97 - 65 + static struct blockgroup_lock *mb_cache_bg_lock; 66 + static struct kmem_cache *mb_cache_kmem_cache; 67 + 98 68 MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>"); 99 69 MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); 100 70 MODULE_LICENSE("GPL"); ··· 128 86 static LIST_HEAD(mb_cache_lru_list); 129 87 static DEFINE_SPINLOCK(mb_cache_spinlock); 130 88 89 + static inline void 90 + __spin_lock_mb_cache_entry(struct mb_cache_entry *ce) 91 + { 92 + spin_lock(bgl_lock_ptr(mb_cache_bg_lock, 93 + MB_CACHE_ENTRY_LOCK_INDEX(ce))); 94 + } 95 + 96 + static inline void 97 + __spin_unlock_mb_cache_entry(struct mb_cache_entry *ce) 98 + { 99 + spin_unlock(bgl_lock_ptr(mb_cache_bg_lock, 100 + MB_CACHE_ENTRY_LOCK_INDEX(ce))); 101 + } 102 + 131 103 static inline int 132 - __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) 104 + __mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce) 133 105 { 134 - return !list_empty(&ce->e_block_list); 106 + return !hlist_bl_unhashed(&ce->e_block_list); 135 107 } 136 108 137 109 138 - static void 139 - __mb_cache_entry_unhash(struct mb_cache_entry *ce) 110 + static inline void 111 + __mb_cache_entry_unhash_block(struct mb_cache_entry *ce) 140 112 { 141 - if (__mb_cache_entry_is_hashed(ce)) { 142 - list_del_init(&ce->e_block_list); 143 - list_del(&ce->e_index.o_list); 144 - } 113 + if (__mb_cache_entry_is_block_hashed(ce)) 114 + hlist_bl_del_init(&ce->e_block_list); 145 115 } 146 116 117 + static inline int 118 + __mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce) 119 + { 120 + return !hlist_bl_unhashed(&ce->e_index.o_list); 121 + } 122 + 123 + static inline void 124 + __mb_cache_entry_unhash_index(struct mb_cache_entry *ce) 125 + { 126 + if (__mb_cache_entry_is_index_hashed(ce)) 127 + hlist_bl_del_init(&ce->e_index.o_list); 128 + } 129 + 130 + /* 131 + * __mb_cache_entry_unhash_unlock() 132 + * 133 + * This function is called to unhash both the block and index hash 134 + * chain. 135 + * It assumes both the block and index hash chain is locked upon entry. 136 + * It also unlock both hash chains both exit 137 + */ 138 + static inline void 139 + __mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce) 140 + { 141 + __mb_cache_entry_unhash_index(ce); 142 + hlist_bl_unlock(ce->e_index_hash_p); 143 + __mb_cache_entry_unhash_block(ce); 144 + hlist_bl_unlock(ce->e_block_hash_p); 145 + } 147 146 148 147 static void 149 148 __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) 150 149 { 151 150 struct mb_cache *cache = ce->e_cache; 152 151 153 - mb_assert(!(ce->e_used || ce->e_queued)); 152 + mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))); 154 153 kmem_cache_free(cache->c_entry_cache, ce); 155 154 atomic_dec(&cache->c_entry_count); 156 155 } 157 156 158 - 159 157 static void 160 - __mb_cache_entry_release_unlock(struct mb_cache_entry *ce) 161 - __releases(mb_cache_spinlock) 158 + __mb_cache_entry_release(struct mb_cache_entry *ce) 162 159 { 160 + /* First lock the entry to serialize access to its local data. */ 161 + __spin_lock_mb_cache_entry(ce); 163 162 /* Wake up all processes queuing for this cache entry. */ 164 163 if (ce->e_queued) 165 164 wake_up_all(&mb_cache_queue); 166 165 if (ce->e_used >= MB_CACHE_WRITER) 167 166 ce->e_used -= MB_CACHE_WRITER; 167 + /* 168 + * Make sure that all cache entries on lru_list have 169 + * both e_used and e_qued of 0s. 170 + */ 168 171 ce->e_used--; 169 - if (!(ce->e_used || ce->e_queued)) { 170 - if (!__mb_cache_entry_is_hashed(ce)) 172 + if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) { 173 + if (!__mb_cache_entry_is_block_hashed(ce)) { 174 + __spin_unlock_mb_cache_entry(ce); 171 175 goto forget; 172 - mb_assert(list_empty(&ce->e_lru_list)); 173 - list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); 176 + } 177 + /* 178 + * Need access to lru list, first drop entry lock, 179 + * then reacquire the lock in the proper order. 180 + */ 181 + spin_lock(&mb_cache_spinlock); 182 + if (list_empty(&ce->e_lru_list)) 183 + list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); 184 + spin_unlock(&mb_cache_spinlock); 174 185 } 175 - spin_unlock(&mb_cache_spinlock); 186 + __spin_unlock_mb_cache_entry(ce); 176 187 return; 177 188 forget: 178 - spin_unlock(&mb_cache_spinlock); 189 + mb_assert(list_empty(&ce->e_lru_list)); 179 190 __mb_cache_entry_forget(ce, GFP_KERNEL); 180 191 } 181 - 182 192 183 193 /* 184 194 * mb_cache_shrink_scan() memory pressure callback ··· 254 160 255 161 mb_debug("trying to free %d entries", nr_to_scan); 256 162 spin_lock(&mb_cache_spinlock); 257 - while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) { 163 + while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) { 258 164 struct mb_cache_entry *ce = 259 165 list_entry(mb_cache_lru_list.next, 260 - struct mb_cache_entry, e_lru_list); 261 - list_move_tail(&ce->e_lru_list, &free_list); 262 - __mb_cache_entry_unhash(ce); 263 - freed++; 166 + struct mb_cache_entry, e_lru_list); 167 + list_del_init(&ce->e_lru_list); 168 + if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)) 169 + continue; 170 + spin_unlock(&mb_cache_spinlock); 171 + /* Prevent any find or get operation on the entry */ 172 + hlist_bl_lock(ce->e_block_hash_p); 173 + hlist_bl_lock(ce->e_index_hash_p); 174 + /* Ignore if it is touched by a find/get */ 175 + if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) || 176 + !list_empty(&ce->e_lru_list)) { 177 + hlist_bl_unlock(ce->e_index_hash_p); 178 + hlist_bl_unlock(ce->e_block_hash_p); 179 + spin_lock(&mb_cache_spinlock); 180 + continue; 181 + } 182 + __mb_cache_entry_unhash_unlock(ce); 183 + list_add_tail(&ce->e_lru_list, &free_list); 184 + spin_lock(&mb_cache_spinlock); 264 185 } 265 186 spin_unlock(&mb_cache_spinlock); 187 + 266 188 list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { 267 189 __mb_cache_entry_forget(entry, gfp_mask); 190 + freed++; 268 191 } 269 192 return freed; 270 193 } ··· 326 215 int n, bucket_count = 1 << bucket_bits; 327 216 struct mb_cache *cache = NULL; 328 217 218 + if (!mb_cache_bg_lock) { 219 + mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock), 220 + GFP_KERNEL); 221 + if (!mb_cache_bg_lock) 222 + return NULL; 223 + bgl_lock_init(mb_cache_bg_lock); 224 + } 225 + 329 226 cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); 330 227 if (!cache) 331 228 return NULL; 332 229 cache->c_name = name; 333 230 atomic_set(&cache->c_entry_count, 0); 334 231 cache->c_bucket_bits = bucket_bits; 335 - cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), 336 - GFP_KERNEL); 232 + cache->c_block_hash = kmalloc(bucket_count * 233 + sizeof(struct hlist_bl_head), GFP_KERNEL); 337 234 if (!cache->c_block_hash) 338 235 goto fail; 339 236 for (n=0; n<bucket_count; n++) 340 - INIT_LIST_HEAD(&cache->c_block_hash[n]); 341 - cache->c_index_hash = kmalloc(bucket_count * sizeof(struct list_head), 342 - GFP_KERNEL); 237 + INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]); 238 + cache->c_index_hash = kmalloc(bucket_count * 239 + sizeof(struct hlist_bl_head), GFP_KERNEL); 343 240 if (!cache->c_index_hash) 344 241 goto fail; 345 242 for (n=0; n<bucket_count; n++) 346 - INIT_LIST_HEAD(&cache->c_index_hash[n]); 347 - cache->c_entry_cache = kmem_cache_create(name, 348 - sizeof(struct mb_cache_entry), 0, 349 - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); 350 - if (!cache->c_entry_cache) 351 - goto fail2; 243 + INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]); 244 + if (!mb_cache_kmem_cache) { 245 + mb_cache_kmem_cache = kmem_cache_create(name, 246 + sizeof(struct mb_cache_entry), 0, 247 + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); 248 + if (!mb_cache_kmem_cache) 249 + goto fail2; 250 + } 251 + cache->c_entry_cache = mb_cache_kmem_cache; 352 252 353 253 /* 354 254 * Set an upper limit on the number of cache entries so that the hash ··· 395 273 mb_cache_shrink(struct block_device *bdev) 396 274 { 397 275 LIST_HEAD(free_list); 398 - struct list_head *l, *ltmp; 276 + struct list_head *l; 277 + struct mb_cache_entry *ce, *tmp; 399 278 279 + l = &mb_cache_lru_list; 400 280 spin_lock(&mb_cache_spinlock); 401 - list_for_each_safe(l, ltmp, &mb_cache_lru_list) { 402 - struct mb_cache_entry *ce = 403 - list_entry(l, struct mb_cache_entry, e_lru_list); 281 + while (!list_is_last(l, &mb_cache_lru_list)) { 282 + l = l->next; 283 + ce = list_entry(l, struct mb_cache_entry, e_lru_list); 404 284 if (ce->e_bdev == bdev) { 405 - list_move_tail(&ce->e_lru_list, &free_list); 406 - __mb_cache_entry_unhash(ce); 285 + list_del_init(&ce->e_lru_list); 286 + if (ce->e_used || ce->e_queued || 287 + atomic_read(&ce->e_refcnt)) 288 + continue; 289 + spin_unlock(&mb_cache_spinlock); 290 + /* 291 + * Prevent any find or get operation on the entry. 292 + */ 293 + hlist_bl_lock(ce->e_block_hash_p); 294 + hlist_bl_lock(ce->e_index_hash_p); 295 + /* Ignore if it is touched by a find/get */ 296 + if (ce->e_used || ce->e_queued || 297 + atomic_read(&ce->e_refcnt) || 298 + !list_empty(&ce->e_lru_list)) { 299 + hlist_bl_unlock(ce->e_index_hash_p); 300 + hlist_bl_unlock(ce->e_block_hash_p); 301 + l = &mb_cache_lru_list; 302 + spin_lock(&mb_cache_spinlock); 303 + continue; 304 + } 305 + __mb_cache_entry_unhash_unlock(ce); 306 + mb_assert(!(ce->e_used || ce->e_queued || 307 + atomic_read(&ce->e_refcnt))); 308 + list_add_tail(&ce->e_lru_list, &free_list); 309 + l = &mb_cache_lru_list; 310 + spin_lock(&mb_cache_spinlock); 407 311 } 408 312 } 409 313 spin_unlock(&mb_cache_spinlock); 410 - list_for_each_safe(l, ltmp, &free_list) { 411 - __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, 412 - e_lru_list), GFP_KERNEL); 314 + 315 + list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { 316 + __mb_cache_entry_forget(ce, GFP_KERNEL); 413 317 } 414 318 } 415 319 ··· 451 303 mb_cache_destroy(struct mb_cache *cache) 452 304 { 453 305 LIST_HEAD(free_list); 454 - struct list_head *l, *ltmp; 306 + struct mb_cache_entry *ce, *tmp; 455 307 456 308 spin_lock(&mb_cache_spinlock); 457 - list_for_each_safe(l, ltmp, &mb_cache_lru_list) { 458 - struct mb_cache_entry *ce = 459 - list_entry(l, struct mb_cache_entry, e_lru_list); 460 - if (ce->e_cache == cache) { 309 + list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) { 310 + if (ce->e_cache == cache) 461 311 list_move_tail(&ce->e_lru_list, &free_list); 462 - __mb_cache_entry_unhash(ce); 463 - } 464 312 } 465 313 list_del(&cache->c_cache_list); 466 314 spin_unlock(&mb_cache_spinlock); 467 315 468 - list_for_each_safe(l, ltmp, &free_list) { 469 - __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, 470 - e_lru_list), GFP_KERNEL); 316 + list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { 317 + list_del_init(&ce->e_lru_list); 318 + /* 319 + * Prevent any find or get operation on the entry. 320 + */ 321 + hlist_bl_lock(ce->e_block_hash_p); 322 + hlist_bl_lock(ce->e_index_hash_p); 323 + mb_assert(!(ce->e_used || ce->e_queued || 324 + atomic_read(&ce->e_refcnt))); 325 + __mb_cache_entry_unhash_unlock(ce); 326 + __mb_cache_entry_forget(ce, GFP_KERNEL); 471 327 } 472 328 473 329 if (atomic_read(&cache->c_entry_count) > 0) { ··· 480 328 atomic_read(&cache->c_entry_count)); 481 329 } 482 330 483 - kmem_cache_destroy(cache->c_entry_cache); 484 - 331 + if (list_empty(&mb_cache_list)) { 332 + kmem_cache_destroy(mb_cache_kmem_cache); 333 + mb_cache_kmem_cache = NULL; 334 + } 485 335 kfree(cache->c_index_hash); 486 336 kfree(cache->c_block_hash); 487 337 kfree(cache); ··· 500 346 struct mb_cache_entry * 501 347 mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) 502 348 { 503 - struct mb_cache_entry *ce = NULL; 349 + struct mb_cache_entry *ce; 504 350 505 351 if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { 352 + struct list_head *l; 353 + 354 + l = &mb_cache_lru_list; 506 355 spin_lock(&mb_cache_spinlock); 507 - if (!list_empty(&mb_cache_lru_list)) { 508 - ce = list_entry(mb_cache_lru_list.next, 509 - struct mb_cache_entry, e_lru_list); 510 - list_del_init(&ce->e_lru_list); 511 - __mb_cache_entry_unhash(ce); 356 + while (!list_is_last(l, &mb_cache_lru_list)) { 357 + l = l->next; 358 + ce = list_entry(l, struct mb_cache_entry, e_lru_list); 359 + if (ce->e_cache == cache) { 360 + list_del_init(&ce->e_lru_list); 361 + if (ce->e_used || ce->e_queued || 362 + atomic_read(&ce->e_refcnt)) 363 + continue; 364 + spin_unlock(&mb_cache_spinlock); 365 + /* 366 + * Prevent any find or get operation on the 367 + * entry. 368 + */ 369 + hlist_bl_lock(ce->e_block_hash_p); 370 + hlist_bl_lock(ce->e_index_hash_p); 371 + /* Ignore if it is touched by a find/get */ 372 + if (ce->e_used || ce->e_queued || 373 + atomic_read(&ce->e_refcnt) || 374 + !list_empty(&ce->e_lru_list)) { 375 + hlist_bl_unlock(ce->e_index_hash_p); 376 + hlist_bl_unlock(ce->e_block_hash_p); 377 + l = &mb_cache_lru_list; 378 + spin_lock(&mb_cache_spinlock); 379 + continue; 380 + } 381 + mb_assert(list_empty(&ce->e_lru_list)); 382 + mb_assert(!(ce->e_used || ce->e_queued || 383 + atomic_read(&ce->e_refcnt))); 384 + __mb_cache_entry_unhash_unlock(ce); 385 + goto found; 386 + } 512 387 } 513 388 spin_unlock(&mb_cache_spinlock); 514 389 } 515 - if (!ce) { 516 - ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); 517 - if (!ce) 518 - return NULL; 519 - atomic_inc(&cache->c_entry_count); 520 - INIT_LIST_HEAD(&ce->e_lru_list); 521 - INIT_LIST_HEAD(&ce->e_block_list); 522 - ce->e_cache = cache; 523 - ce->e_queued = 0; 524 - } 390 + 391 + ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); 392 + if (!ce) 393 + return NULL; 394 + atomic_inc(&cache->c_entry_count); 395 + INIT_LIST_HEAD(&ce->e_lru_list); 396 + INIT_HLIST_BL_NODE(&ce->e_block_list); 397 + INIT_HLIST_BL_NODE(&ce->e_index.o_list); 398 + ce->e_cache = cache; 399 + ce->e_queued = 0; 400 + atomic_set(&ce->e_refcnt, 0); 401 + found: 402 + ce->e_block_hash_p = &cache->c_block_hash[0]; 403 + ce->e_index_hash_p = &cache->c_index_hash[0]; 525 404 ce->e_used = 1 + MB_CACHE_WRITER; 526 405 return ce; 527 406 } ··· 580 393 { 581 394 struct mb_cache *cache = ce->e_cache; 582 395 unsigned int bucket; 583 - struct list_head *l; 584 - int error = -EBUSY; 396 + struct hlist_bl_node *l; 397 + struct hlist_bl_head *block_hash_p; 398 + struct hlist_bl_head *index_hash_p; 399 + struct mb_cache_entry *lce; 585 400 401 + mb_assert(ce); 586 402 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 587 403 cache->c_bucket_bits); 588 - spin_lock(&mb_cache_spinlock); 589 - list_for_each_prev(l, &cache->c_block_hash[bucket]) { 590 - struct mb_cache_entry *ce = 591 - list_entry(l, struct mb_cache_entry, e_block_list); 592 - if (ce->e_bdev == bdev && ce->e_block == block) 593 - goto out; 404 + block_hash_p = &cache->c_block_hash[bucket]; 405 + hlist_bl_lock(block_hash_p); 406 + hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) { 407 + if (lce->e_bdev == bdev && lce->e_block == block) { 408 + hlist_bl_unlock(block_hash_p); 409 + return -EBUSY; 410 + } 594 411 } 595 - __mb_cache_entry_unhash(ce); 412 + mb_assert(!__mb_cache_entry_is_block_hashed(ce)); 413 + __mb_cache_entry_unhash_block(ce); 414 + __mb_cache_entry_unhash_index(ce); 596 415 ce->e_bdev = bdev; 597 416 ce->e_block = block; 598 - list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); 417 + ce->e_block_hash_p = block_hash_p; 599 418 ce->e_index.o_key = key; 419 + hlist_bl_add_head(&ce->e_block_list, block_hash_p); 420 + hlist_bl_unlock(block_hash_p); 600 421 bucket = hash_long(key, cache->c_bucket_bits); 601 - list_add(&ce->e_index.o_list, &cache->c_index_hash[bucket]); 602 - error = 0; 603 - out: 604 - spin_unlock(&mb_cache_spinlock); 605 - return error; 422 + index_hash_p = &cache->c_index_hash[bucket]; 423 + hlist_bl_lock(index_hash_p); 424 + ce->e_index_hash_p = index_hash_p; 425 + hlist_bl_add_head(&ce->e_index.o_list, index_hash_p); 426 + hlist_bl_unlock(index_hash_p); 427 + return 0; 606 428 } 607 429 608 430 ··· 625 429 void 626 430 mb_cache_entry_release(struct mb_cache_entry *ce) 627 431 { 628 - spin_lock(&mb_cache_spinlock); 629 - __mb_cache_entry_release_unlock(ce); 432 + __mb_cache_entry_release(ce); 630 433 } 631 434 632 435 633 436 /* 634 437 * mb_cache_entry_free() 635 438 * 636 - * This is equivalent to the sequence mb_cache_entry_takeout() -- 637 - * mb_cache_entry_release(). 638 439 */ 639 440 void 640 441 mb_cache_entry_free(struct mb_cache_entry *ce) 641 442 { 642 - spin_lock(&mb_cache_spinlock); 443 + mb_assert(ce); 643 444 mb_assert(list_empty(&ce->e_lru_list)); 644 - __mb_cache_entry_unhash(ce); 645 - __mb_cache_entry_release_unlock(ce); 445 + hlist_bl_lock(ce->e_index_hash_p); 446 + __mb_cache_entry_unhash_index(ce); 447 + hlist_bl_unlock(ce->e_index_hash_p); 448 + hlist_bl_lock(ce->e_block_hash_p); 449 + __mb_cache_entry_unhash_block(ce); 450 + hlist_bl_unlock(ce->e_block_hash_p); 451 + __mb_cache_entry_release(ce); 646 452 } 647 453 648 454 ··· 661 463 sector_t block) 662 464 { 663 465 unsigned int bucket; 664 - struct list_head *l; 466 + struct hlist_bl_node *l; 665 467 struct mb_cache_entry *ce; 468 + struct hlist_bl_head *block_hash_p; 666 469 667 470 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 668 471 cache->c_bucket_bits); 669 - spin_lock(&mb_cache_spinlock); 670 - list_for_each(l, &cache->c_block_hash[bucket]) { 671 - ce = list_entry(l, struct mb_cache_entry, e_block_list); 472 + block_hash_p = &cache->c_block_hash[bucket]; 473 + /* First serialize access to the block corresponding hash chain. */ 474 + hlist_bl_lock(block_hash_p); 475 + hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) { 476 + mb_assert(ce->e_block_hash_p == block_hash_p); 672 477 if (ce->e_bdev == bdev && ce->e_block == block) { 673 - DEFINE_WAIT(wait); 674 - 675 - if (!list_empty(&ce->e_lru_list)) 676 - list_del_init(&ce->e_lru_list); 677 - 678 - while (ce->e_used > 0) { 679 - ce->e_queued++; 680 - prepare_to_wait(&mb_cache_queue, &wait, 681 - TASK_UNINTERRUPTIBLE); 682 - spin_unlock(&mb_cache_spinlock); 683 - schedule(); 684 - spin_lock(&mb_cache_spinlock); 685 - ce->e_queued--; 478 + /* 479 + * Prevent a free from removing the entry. 480 + */ 481 + atomic_inc(&ce->e_refcnt); 482 + hlist_bl_unlock(block_hash_p); 483 + __spin_lock_mb_cache_entry(ce); 484 + atomic_dec(&ce->e_refcnt); 485 + if (ce->e_used > 0) { 486 + DEFINE_WAIT(wait); 487 + while (ce->e_used > 0) { 488 + ce->e_queued++; 489 + prepare_to_wait(&mb_cache_queue, &wait, 490 + TASK_UNINTERRUPTIBLE); 491 + __spin_unlock_mb_cache_entry(ce); 492 + schedule(); 493 + __spin_lock_mb_cache_entry(ce); 494 + ce->e_queued--; 495 + } 496 + finish_wait(&mb_cache_queue, &wait); 686 497 } 687 - finish_wait(&mb_cache_queue, &wait); 688 498 ce->e_used += 1 + MB_CACHE_WRITER; 499 + __spin_unlock_mb_cache_entry(ce); 689 500 690 - if (!__mb_cache_entry_is_hashed(ce)) { 691 - __mb_cache_entry_release_unlock(ce); 501 + if (!list_empty(&ce->e_lru_list)) { 502 + spin_lock(&mb_cache_spinlock); 503 + list_del_init(&ce->e_lru_list); 504 + spin_unlock(&mb_cache_spinlock); 505 + } 506 + if (!__mb_cache_entry_is_block_hashed(ce)) { 507 + __mb_cache_entry_release(ce); 692 508 return NULL; 693 509 } 694 - goto cleanup; 510 + return ce; 695 511 } 696 512 } 697 - ce = NULL; 698 - 699 - cleanup: 700 - spin_unlock(&mb_cache_spinlock); 701 - return ce; 513 + hlist_bl_unlock(block_hash_p); 514 + return NULL; 702 515 } 703 516 704 517 #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) 705 518 706 519 static struct mb_cache_entry * 707 - __mb_cache_entry_find(struct list_head *l, struct list_head *head, 520 + __mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head, 708 521 struct block_device *bdev, unsigned int key) 709 522 { 710 - while (l != head) { 523 + 524 + /* The index hash chain is alredy acquire by caller. */ 525 + while (l != NULL) { 711 526 struct mb_cache_entry *ce = 712 - list_entry(l, struct mb_cache_entry, e_index.o_list); 527 + hlist_bl_entry(l, struct mb_cache_entry, 528 + e_index.o_list); 529 + mb_assert(ce->e_index_hash_p == head); 713 530 if (ce->e_bdev == bdev && ce->e_index.o_key == key) { 714 - DEFINE_WAIT(wait); 715 - 716 - if (!list_empty(&ce->e_lru_list)) 717 - list_del_init(&ce->e_lru_list); 718 - 531 + /* 532 + * Prevent a free from removing the entry. 533 + */ 534 + atomic_inc(&ce->e_refcnt); 535 + hlist_bl_unlock(head); 536 + __spin_lock_mb_cache_entry(ce); 537 + atomic_dec(&ce->e_refcnt); 538 + ce->e_used++; 719 539 /* Incrementing before holding the lock gives readers 720 540 priority over writers. */ 721 - ce->e_used++; 722 - while (ce->e_used >= MB_CACHE_WRITER) { 723 - ce->e_queued++; 724 - prepare_to_wait(&mb_cache_queue, &wait, 725 - TASK_UNINTERRUPTIBLE); 726 - spin_unlock(&mb_cache_spinlock); 727 - schedule(); 728 - spin_lock(&mb_cache_spinlock); 729 - ce->e_queued--; 730 - } 731 - finish_wait(&mb_cache_queue, &wait); 541 + if (ce->e_used >= MB_CACHE_WRITER) { 542 + DEFINE_WAIT(wait); 732 543 733 - if (!__mb_cache_entry_is_hashed(ce)) { 734 - __mb_cache_entry_release_unlock(ce); 544 + while (ce->e_used >= MB_CACHE_WRITER) { 545 + ce->e_queued++; 546 + prepare_to_wait(&mb_cache_queue, &wait, 547 + TASK_UNINTERRUPTIBLE); 548 + __spin_unlock_mb_cache_entry(ce); 549 + schedule(); 550 + __spin_lock_mb_cache_entry(ce); 551 + ce->e_queued--; 552 + } 553 + finish_wait(&mb_cache_queue, &wait); 554 + } 555 + __spin_unlock_mb_cache_entry(ce); 556 + if (!list_empty(&ce->e_lru_list)) { 735 557 spin_lock(&mb_cache_spinlock); 558 + list_del_init(&ce->e_lru_list); 559 + spin_unlock(&mb_cache_spinlock); 560 + } 561 + if (!__mb_cache_entry_is_block_hashed(ce)) { 562 + __mb_cache_entry_release(ce); 736 563 return ERR_PTR(-EAGAIN); 737 564 } 738 565 return ce; 739 566 } 740 567 l = l->next; 741 568 } 569 + hlist_bl_unlock(head); 742 570 return NULL; 743 571 } 744 572 ··· 786 562 unsigned int key) 787 563 { 788 564 unsigned int bucket = hash_long(key, cache->c_bucket_bits); 789 - struct list_head *l; 790 - struct mb_cache_entry *ce; 565 + struct hlist_bl_node *l; 566 + struct mb_cache_entry *ce = NULL; 567 + struct hlist_bl_head *index_hash_p; 791 568 792 - spin_lock(&mb_cache_spinlock); 793 - l = cache->c_index_hash[bucket].next; 794 - ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); 795 - spin_unlock(&mb_cache_spinlock); 569 + index_hash_p = &cache->c_index_hash[bucket]; 570 + hlist_bl_lock(index_hash_p); 571 + if (!hlist_bl_empty(index_hash_p)) { 572 + l = hlist_bl_first(index_hash_p); 573 + ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); 574 + } else 575 + hlist_bl_unlock(index_hash_p); 796 576 return ce; 797 577 } 798 578 ··· 825 597 { 826 598 struct mb_cache *cache = prev->e_cache; 827 599 unsigned int bucket = hash_long(key, cache->c_bucket_bits); 828 - struct list_head *l; 600 + struct hlist_bl_node *l; 829 601 struct mb_cache_entry *ce; 602 + struct hlist_bl_head *index_hash_p; 830 603 831 - spin_lock(&mb_cache_spinlock); 604 + index_hash_p = &cache->c_index_hash[bucket]; 605 + mb_assert(prev->e_index_hash_p == index_hash_p); 606 + hlist_bl_lock(index_hash_p); 607 + mb_assert(!hlist_bl_empty(index_hash_p)); 832 608 l = prev->e_index.o_list.next; 833 - ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); 834 - __mb_cache_entry_release_unlock(prev); 609 + ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); 610 + __mb_cache_entry_release(prev); 835 611 return ce; 836 612 } 837 613
+1
fs/minix/inode.c
··· 123 123 struct minix_sb_info * sbi = minix_sb(sb); 124 124 struct minix_super_block * ms; 125 125 126 + sync_filesystem(sb); 126 127 ms = sbi->s_ms; 127 128 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 128 129 return 0;
+1
fs/ncpfs/inode.c
··· 99 99 100 100 static int ncp_remount(struct super_block *sb, int *flags, char* data) 101 101 { 102 + sync_filesystem(sb); 102 103 *flags |= MS_NODIRATIME; 103 104 return 0; 104 105 }
+2
fs/nfs/super.c
··· 2215 2215 struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; 2216 2216 u32 nfsvers = nfss->nfs_client->rpc_ops->version; 2217 2217 2218 + sync_filesystem(sb); 2219 + 2218 2220 /* 2219 2221 * Userspace mount programs that send binary options generally send 2220 2222 * them populated with default values. We have no way to know which
+1
fs/nilfs2/super.c
··· 1129 1129 unsigned long old_mount_opt; 1130 1130 int err; 1131 1131 1132 + sync_filesystem(sb); 1132 1133 old_sb_flags = sb->s_flags; 1133 1134 old_mount_opt = nilfs->ns_mount_opt; 1134 1135
+2
fs/ntfs/super.c
··· 468 468 469 469 ntfs_debug("Entering with remount options string: %s", opt); 470 470 471 + sync_filesystem(sb); 472 + 471 473 #ifndef NTFS_RW 472 474 /* For read-only compiled driver, enforce read-only flag. */ 473 475 *flags |= MS_RDONLY;
+2
fs/ocfs2/super.c
··· 634 634 struct ocfs2_super *osb = OCFS2_SB(sb); 635 635 u32 tmp; 636 636 637 + sync_filesystem(sb); 638 + 637 639 if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || 638 640 !ocfs2_check_set_options(sb, &parsed_options)) { 639 641 ret = -EINVAL;
+1
fs/openpromfs/inode.c
··· 368 368 369 369 static int openprom_remount(struct super_block *sb, int *flags, char *data) 370 370 { 371 + sync_filesystem(sb); 371 372 *flags |= MS_NOATIME; 372 373 return 0; 373 374 }
+2
fs/proc/root.c
··· 92 92 int proc_remount(struct super_block *sb, int *flags, char *data) 93 93 { 94 94 struct pid_namespace *pid = sb->s_fs_info; 95 + 96 + sync_filesystem(sb); 95 97 return !proc_parse_options(data, pid); 96 98 } 97 99
+1
fs/pstore/inode.c
··· 249 249 250 250 static int pstore_remount(struct super_block *sb, int *flags, char *data) 251 251 { 252 + sync_filesystem(sb); 252 253 parse_options(data); 253 254 254 255 return 0;
+1
fs/qnx4/inode.c
··· 44 44 { 45 45 struct qnx4_sb_info *qs; 46 46 47 + sync_filesystem(sb); 47 48 qs = qnx4_sb(sb); 48 49 qs->Version = QNX4_VERSION; 49 50 *flags |= MS_RDONLY;
+1
fs/qnx6/inode.c
··· 55 55 56 56 static int qnx6_remount(struct super_block *sb, int *flags, char *data) 57 57 { 58 + sync_filesystem(sb); 58 59 *flags |= MS_RDONLY; 59 60 return 0; 60 61 }
+1
fs/reiserfs/super.c
··· 1318 1318 int i; 1319 1319 #endif 1320 1320 1321 + sync_filesystem(s); 1321 1322 reiserfs_write_lock(s); 1322 1323 1323 1324 #ifdef CONFIG_QUOTA
+1
fs/romfs/super.c
··· 432 432 */ 433 433 static int romfs_remount(struct super_block *sb, int *flags, char *data) 434 434 { 435 + sync_filesystem(sb); 435 436 *flags |= MS_RDONLY; 436 437 return 0; 437 438 }
+1
fs/squashfs/super.c
··· 371 371 372 372 static int squashfs_remount(struct super_block *sb, int *flags, char *data) 373 373 { 374 + sync_filesystem(sb); 374 375 *flags |= MS_RDONLY; 375 376 return 0; 376 377 }
-2
fs/super.c
··· 719 719 } 720 720 } 721 721 722 - sync_filesystem(sb); 723 - 724 722 if (sb->s_op->remount_fs) { 725 723 retval = sb->s_op->remount_fs(sb, &flags, data); 726 724 if (retval) {
+1
fs/sysv/inode.c
··· 60 60 { 61 61 struct sysv_sb_info *sbi = SYSV_SB(sb); 62 62 63 + sync_filesystem(sb); 63 64 if (sbi->s_forced_ro) 64 65 *flags |= MS_RDONLY; 65 66 return 0;
+1
fs/ubifs/super.c
··· 1827 1827 int err; 1828 1828 struct ubifs_info *c = sb->s_fs_info; 1829 1829 1830 + sync_filesystem(sb); 1830 1831 dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); 1831 1832 1832 1833 err = ubifs_parse_options(c, data, 1);
+1
fs/udf/super.c
··· 646 646 int error = 0; 647 647 struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb); 648 648 649 + sync_filesystem(sb); 649 650 if (lvidiu) { 650 651 int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev); 651 652 if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY))
+1
fs/ufs/super.c
··· 1280 1280 unsigned new_mount_opt, ufstype; 1281 1281 unsigned flags; 1282 1282 1283 + sync_filesystem(sb); 1283 1284 lock_ufs(sb); 1284 1285 mutex_lock(&UFS_SB(sb)->s_lock); 1285 1286 uspi = UFS_SB(sb)->s_uspi;
+1
fs/xfs/xfs_super.c
··· 1197 1197 char *p; 1198 1198 int error; 1199 1199 1200 + sync_filesystem(sb); 1200 1201 while ((p = strsep(&options, ",")) != NULL) { 1201 1202 int token; 1202 1203
+3
include/linux/fs.h
··· 2572 2572 void inode_dio_wait(struct inode *inode); 2573 2573 void inode_dio_done(struct inode *inode); 2574 2574 2575 + extern void inode_set_flags(struct inode *inode, unsigned int flags, 2576 + unsigned int mask); 2577 + 2575 2578 extern const struct file_operations generic_ro_fops; 2576 2579 2577 2580 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
+7 -5
include/linux/mbcache.h
··· 3 3 4 4 (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> 5 5 */ 6 - 7 6 struct mb_cache_entry { 8 7 struct list_head e_lru_list; 9 8 struct mb_cache *e_cache; 10 9 unsigned short e_used; 11 10 unsigned short e_queued; 11 + atomic_t e_refcnt; 12 12 struct block_device *e_bdev; 13 13 sector_t e_block; 14 - struct list_head e_block_list; 14 + struct hlist_bl_node e_block_list; 15 15 struct { 16 - struct list_head o_list; 16 + struct hlist_bl_node o_list; 17 17 unsigned int o_key; 18 18 } e_index; 19 + struct hlist_bl_head *e_block_hash_p; 20 + struct hlist_bl_head *e_index_hash_p; 19 21 }; 20 22 21 23 struct mb_cache { ··· 27 25 int c_max_entries; 28 26 int c_bucket_bits; 29 27 struct kmem_cache *c_entry_cache; 30 - struct list_head *c_block_hash; 31 - struct list_head *c_index_hash; 28 + struct hlist_bl_head *c_block_hash; 29 + struct hlist_bl_head *c_index_hash; 32 30 }; 33 31 34 32 /* Functions on caches */
+70 -32
include/trace/events/ext4.h
··· 16 16 struct ext4_map_blocks; 17 17 struct extent_status; 18 18 19 + /* shim until we merge in the xfs_collapse_range branch */ 20 + #ifndef FALLOC_FL_COLLAPSE_RANGE 21 + #define FALLOC_FL_COLLAPSE_RANGE 0x08 22 + #endif 23 + 24 + #ifndef FALLOC_FL_ZERO_RANGE 25 + #define FALLOC_FL_ZERO_RANGE 0x10 26 + #endif 27 + 19 28 #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) 20 29 21 30 #define show_mballoc_flags(flags) __print_flags(flags, "|", \ ··· 76 67 { EXTENT_STATUS_UNWRITTEN, "U" }, \ 77 68 { EXTENT_STATUS_DELAYED, "D" }, \ 78 69 { EXTENT_STATUS_HOLE, "H" }) 70 + 71 + #define show_falloc_mode(mode) __print_flags(mode, "|", \ 72 + { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \ 73 + { FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \ 74 + { FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}, \ 75 + { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \ 76 + { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) 79 77 80 78 81 79 TRACE_EVENT(ext4_free_inode, ··· 1344 1328 __entry->rw, __entry->ret) 1345 1329 ); 1346 1330 1347 - TRACE_EVENT(ext4_fallocate_enter, 1331 + DECLARE_EVENT_CLASS(ext4__fallocate_mode, 1348 1332 TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), 1349 1333 1350 1334 TP_ARGS(inode, offset, len, mode), ··· 1352 1336 TP_STRUCT__entry( 1353 1337 __field( dev_t, dev ) 1354 1338 __field( ino_t, ino ) 1355 - __field( loff_t, pos ) 1356 - __field( loff_t, len ) 1339 + __field( loff_t, offset ) 1340 + __field( loff_t, len ) 1357 1341 __field( int, mode ) 1358 1342 ), 1359 1343 1360 1344 TP_fast_assign( 1361 1345 __entry->dev = inode->i_sb->s_dev; 1362 1346 __entry->ino = inode->i_ino; 1363 - __entry->pos = offset; 1347 + __entry->offset = offset; 1364 1348 __entry->len = len; 1365 1349 __entry->mode = mode; 1366 1350 ), 1367 1351 1368 - TP_printk("dev %d,%d ino %lu pos %lld len %lld mode %d", 1352 + TP_printk("dev %d,%d ino %lu offset %lld len %lld mode %s", 1369 1353 MAJOR(__entry->dev), MINOR(__entry->dev), 1370 - (unsigned long) __entry->ino, __entry->pos, 1371 - __entry->len, __entry->mode) 1354 + (unsigned long) __entry->ino, 1355 + __entry->offset, __entry->len, 1356 + show_falloc_mode(__entry->mode)) 1357 + ); 1358 + 1359 + DEFINE_EVENT(ext4__fallocate_mode, ext4_fallocate_enter, 1360 + 1361 + TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), 1362 + 1363 + TP_ARGS(inode, offset, len, mode) 1364 + ); 1365 + 1366 + DEFINE_EVENT(ext4__fallocate_mode, ext4_punch_hole, 1367 + 1368 + TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), 1369 + 1370 + TP_ARGS(inode, offset, len, mode) 1371 + ); 1372 + 1373 + DEFINE_EVENT(ext4__fallocate_mode, ext4_zero_range, 1374 + 1375 + TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), 1376 + 1377 + TP_ARGS(inode, offset, len, mode) 1372 1378 ); 1373 1379 1374 1380 TRACE_EVENT(ext4_fallocate_exit, ··· 1420 1382 (unsigned long) __entry->ino, 1421 1383 __entry->pos, __entry->blocks, 1422 1384 __entry->ret) 1423 - ); 1424 - 1425 - TRACE_EVENT(ext4_punch_hole, 1426 - TP_PROTO(struct inode *inode, loff_t offset, loff_t len), 1427 - 1428 - TP_ARGS(inode, offset, len), 1429 - 1430 - TP_STRUCT__entry( 1431 - __field( dev_t, dev ) 1432 - __field( ino_t, ino ) 1433 - __field( loff_t, offset ) 1434 - __field( loff_t, len ) 1435 - ), 1436 - 1437 - TP_fast_assign( 1438 - __entry->dev = inode->i_sb->s_dev; 1439 - __entry->ino = inode->i_ino; 1440 - __entry->offset = offset; 1441 - __entry->len = len; 1442 - ), 1443 - 1444 - TP_printk("dev %d,%d ino %lu offset %lld len %lld", 1445 - MAJOR(__entry->dev), MINOR(__entry->dev), 1446 - (unsigned long) __entry->ino, 1447 - __entry->offset, __entry->len) 1448 1385 ); 1449 1386 1450 1387 TRACE_EVENT(ext4_unlink_enter, ··· 2421 2408 TP_printk("dev %d,%d shrunk_nr %d cache_cnt %d", 2422 2409 MAJOR(__entry->dev), MINOR(__entry->dev), 2423 2410 __entry->shrunk_nr, __entry->cache_cnt) 2411 + ); 2412 + 2413 + TRACE_EVENT(ext4_collapse_range, 2414 + TP_PROTO(struct inode *inode, loff_t offset, loff_t len), 2415 + 2416 + TP_ARGS(inode, offset, len), 2417 + 2418 + TP_STRUCT__entry( 2419 + __field(dev_t, dev) 2420 + __field(ino_t, ino) 2421 + __field(loff_t, offset) 2422 + __field(loff_t, len) 2423 + ), 2424 + 2425 + TP_fast_assign( 2426 + __entry->dev = inode->i_sb->s_dev; 2427 + __entry->ino = inode->i_ino; 2428 + __entry->offset = offset; 2429 + __entry->len = len; 2430 + ), 2431 + 2432 + TP_printk("dev %d,%d ino %lu offset %lld len %lld", 2433 + MAJOR(__entry->dev), MINOR(__entry->dev), 2434 + (unsigned long) __entry->ino, 2435 + __entry->offset, __entry->len) 2424 2436 ); 2425 2437 2426 2438 #endif /* _TRACE_EXT4_H */