Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

+3 -2

fs/ext4/balloc.c

··· 591 591 ret = ext4_mb_new_blocks(handle, &ar, errp); 592 592 if (count) 593 593 *count = ar.len; 594 - 595 594 /* 596 - * Account for the allocated meta blocks 595 + * Account for the allocated meta blocks. We will never 596 + * fail EDQUOT for metdata, but we do account for it. 597 597 */ 598 598 if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { 599 599 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 600 600 EXT4_I(inode)->i_allocated_meta_blocks += ar.len; 601 601 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 602 + dquot_alloc_block_nofail(inode, ar.len); 602 603 } 603 604 return ret; 604 605 }

+2 -2

fs/ext4/block_validity.c

··· 72 72 else if (start_blk >= (entry->start_blk + entry->count)) 73 73 n = &(*n)->rb_right; 74 74 else { 75 - if (start_blk + count > (entry->start_blk + 75 + if (start_blk + count > (entry->start_blk + 76 76 entry->count)) 77 - entry->count = (start_blk + count - 77 + entry->count = (start_blk + count - 78 78 entry->start_blk); 79 79 new_node = *n; 80 80 new_entry = rb_entry(new_node, struct ext4_system_zone,

+12 -14

fs/ext4/dir.c

··· 83 83 error_msg = "inode out of bounds"; 84 84 85 85 if (error_msg != NULL) 86 - __ext4_error(dir->i_sb, function, 87 - "bad entry in directory #%lu: %s - block=%llu" 86 + ext4_error_inode(function, dir, 87 + "bad entry in directory: %s - block=%llu" 88 88 "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", 89 - dir->i_ino, error_msg, 90 - (unsigned long long) bh->b_blocknr, 89 + error_msg, (unsigned long long) bh->b_blocknr, 91 90 (unsigned) (offset%bh->b_size), offset, 92 91 le32_to_cpu(de->inode), 93 92 rlen, de->name_len); ··· 110 111 111 112 if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, 112 113 EXT4_FEATURE_COMPAT_DIR_INDEX) && 113 - ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) || 114 + ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) || 114 115 ((inode->i_size >> sb->s_blocksize_bits) == 1))) { 115 116 err = ext4_dx_readdir(filp, dirent, filldir); 116 117 if (err != ERR_BAD_DX_DIR) { ··· 121 122 * We don't set the inode dirty flag since it's not 122 123 * critical that it get flushed back to the disk. 123 124 */ 124 - EXT4_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL; 125 + ext4_clear_inode_flag(filp->f_path.dentry->d_inode, EXT4_INODE_INDEX); 125 126 } 126 127 stored = 0; 127 128 offset = filp->f_pos & (sb->s_blocksize - 1); 128 129 129 130 while (!error && !stored && filp->f_pos < inode->i_size) { 130 - ext4_lblk_t blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); 131 - struct buffer_head map_bh; 131 + struct ext4_map_blocks map; 132 132 struct buffer_head *bh = NULL; 133 133 134 - map_bh.b_state = 0; 135 - err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0); 134 + map.m_lblk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); 135 + map.m_len = 1; 136 + err = ext4_map_blocks(NULL, inode, &map, 0); 136 137 if (err > 0) { 137 - pgoff_t index = map_bh.b_blocknr >> 138 + pgoff_t index = map.m_pblk >> 138 139 (PAGE_CACHE_SHIFT - inode->i_blkbits); 139 140 if (!ra_has_index(&filp->f_ra, index)) 140 141 page_cache_sync_readahead( ··· 142 143 &filp->f_ra, filp, 143 144 index, 1); 144 145 filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; 145 - bh = ext4_bread(NULL, inode, blk, 0, &err); 146 + bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err); 146 147 } 147 148 148 149 /* ··· 151 152 */ 152 153 if (!bh) { 153 154 if (!dir_has_error) { 154 - ext4_error(sb, "directory #%lu " 155 + EXT4_ERROR_INODE(inode, "directory " 155 156 "contains a hole at offset %Lu", 156 - inode->i_ino, 157 157 (unsigned long long) filp->f_pos); 158 158 dir_has_error = 1; 159 159 }

+145 -22

fs/ext4/ext4.h

··· 29 29 #include <linux/wait.h> 30 30 #include <linux/blockgroup_lock.h> 31 31 #include <linux/percpu_counter.h> 32 + #ifdef __KERNEL__ 33 + #include <linux/compat.h> 34 + #endif 32 35 33 36 /* 34 37 * The fourth extended filesystem constants/structures ··· 57 54 #endif 58 55 59 56 #define EXT4_ERROR_INODE(inode, fmt, a...) \ 60 - ext4_error_inode(__func__, (inode), (fmt), ## a); 57 + ext4_error_inode(__func__, (inode), (fmt), ## a) 61 58 62 59 #define EXT4_ERROR_FILE(file, fmt, a...) \ 63 - ext4_error_file(__func__, (file), (fmt), ## a); 60 + ext4_error_file(__func__, (file), (fmt), ## a) 64 61 65 62 /* data type for block offset of block group */ 66 63 typedef int ext4_grpblk_t; ··· 75 72 typedef unsigned int ext4_group_t; 76 73 77 74 /* 78 - * Flags used in mballoc's allocation_context flags field. 75 + * Flags used in mballoc's allocation_context flags field. 79 76 * 80 77 * Also used to show what's going on for debugging purposes when the 81 78 * flag field is exported via the traceport interface ··· 126 123 ext4_fsblk_t pright; 127 124 /* flags. see above EXT4_MB_HINT_* */ 128 125 unsigned int flags; 126 + }; 127 + 128 + /* 129 + * Logical to physical block mapping, used by ext4_map_blocks() 130 + * 131 + * This structure is used to pass requests into ext4_map_blocks() as 132 + * well as to store the information returned by ext4_map_blocks(). It 133 + * takes less room on the stack than a struct buffer_head. 134 + */ 135 + #define EXT4_MAP_NEW (1 << BH_New) 136 + #define EXT4_MAP_MAPPED (1 << BH_Mapped) 137 + #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) 138 + #define EXT4_MAP_BOUNDARY (1 << BH_Boundary) 139 + #define EXT4_MAP_UNINIT (1 << BH_Uninit) 140 + #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ 141 + EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ 142 + EXT4_MAP_UNINIT) 143 + 144 + struct ext4_map_blocks { 145 + ext4_fsblk_t m_pblk; 146 + ext4_lblk_t m_lblk; 147 + unsigned int m_len; 148 + unsigned int m_flags; 129 149 }; 130 150 131 151 /* ··· 347 321 return flags & EXT4_OTHER_FLMASK; 348 322 } 349 323 324 + /* 325 + * Inode flags used for atomic set/get 326 + */ 327 + enum { 328 + EXT4_INODE_SECRM = 0, /* Secure deletion */ 329 + EXT4_INODE_UNRM = 1, /* Undelete */ 330 + EXT4_INODE_COMPR = 2, /* Compress file */ 331 + EXT4_INODE_SYNC = 3, /* Synchronous updates */ 332 + EXT4_INODE_IMMUTABLE = 4, /* Immutable file */ 333 + EXT4_INODE_APPEND = 5, /* writes to file may only append */ 334 + EXT4_INODE_NODUMP = 6, /* do not dump file */ 335 + EXT4_INODE_NOATIME = 7, /* do not update atime */ 336 + /* Reserved for compression usage... */ 337 + EXT4_INODE_DIRTY = 8, 338 + EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */ 339 + EXT4_INODE_NOCOMPR = 10, /* Don't compress */ 340 + EXT4_INODE_ECOMPR = 11, /* Compression error */ 341 + /* End compression flags --- maybe not all used */ 342 + EXT4_INODE_INDEX = 12, /* hash-indexed directory */ 343 + EXT4_INODE_IMAGIC = 13, /* AFS directory */ 344 + EXT4_INODE_JOURNAL_DATA = 14, /* file data should be journaled */ 345 + EXT4_INODE_NOTAIL = 15, /* file tail should not be merged */ 346 + EXT4_INODE_DIRSYNC = 16, /* dirsync behaviour (directories only) */ 347 + EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/ 348 + EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */ 349 + EXT4_INODE_EXTENTS = 19, /* Inode uses extents */ 350 + EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ 351 + EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */ 352 + EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */ 353 + }; 354 + 355 + #define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG)) 356 + #define CHECK_FLAG_VALUE(FLAG) if (!TEST_FLAG_VALUE(FLAG)) { \ 357 + printk(KERN_EMERG "EXT4 flag fail: " #FLAG ": %d %d\n", \ 358 + EXT4_##FLAG##_FL, EXT4_INODE_##FLAG); BUG_ON(1); } 359 + 360 + /* 361 + * Since it's pretty easy to mix up bit numbers and hex values, and we 362 + * can't do a compile-time test for ENUM values, we use a run-time 363 + * test to make sure that EXT4_XXX_FL is consistent with respect to 364 + * EXT4_INODE_XXX. If all is well the printk and BUG_ON will all drop 365 + * out so it won't cost any extra space in the compiled kernel image. 366 + * But it's important that these values are the same, since we are 367 + * using EXT4_INODE_XXX to test for the flag values, but EXT4_XX_FL 368 + * must be consistent with the values of FS_XXX_FL defined in 369 + * include/linux/fs.h and the on-disk values found in ext2, ext3, and 370 + * ext4 filesystems, and of course the values defined in e2fsprogs. 371 + * 372 + * It's not paranoia if the Murphy's Law really *is* out to get you. :-) 373 + */ 374 + static inline void ext4_check_flag_values(void) 375 + { 376 + CHECK_FLAG_VALUE(SECRM); 377 + CHECK_FLAG_VALUE(UNRM); 378 + CHECK_FLAG_VALUE(COMPR); 379 + CHECK_FLAG_VALUE(SYNC); 380 + CHECK_FLAG_VALUE(IMMUTABLE); 381 + CHECK_FLAG_VALUE(APPEND); 382 + CHECK_FLAG_VALUE(NODUMP); 383 + CHECK_FLAG_VALUE(NOATIME); 384 + CHECK_FLAG_VALUE(DIRTY); 385 + CHECK_FLAG_VALUE(COMPRBLK); 386 + CHECK_FLAG_VALUE(NOCOMPR); 387 + CHECK_FLAG_VALUE(ECOMPR); 388 + CHECK_FLAG_VALUE(INDEX); 389 + CHECK_FLAG_VALUE(IMAGIC); 390 + CHECK_FLAG_VALUE(JOURNAL_DATA); 391 + CHECK_FLAG_VALUE(NOTAIL); 392 + CHECK_FLAG_VALUE(DIRSYNC); 393 + CHECK_FLAG_VALUE(TOPDIR); 394 + CHECK_FLAG_VALUE(HUGE_FILE); 395 + CHECK_FLAG_VALUE(EXTENTS); 396 + CHECK_FLAG_VALUE(EA_INODE); 397 + CHECK_FLAG_VALUE(EOFBLOCKS); 398 + CHECK_FLAG_VALUE(RESERVED); 399 + } 400 + 350 401 /* Used to pass group descriptor data when online resize is done */ 351 402 struct ext4_new_group_input { 352 403 __u32 group; /* Group number for this data */ ··· 434 331 __u16 reserved_blocks; /* Number of reserved blocks in this group */ 435 332 __u16 unused; 436 333 }; 334 + 335 + #if defined(__KERNEL__) && defined(CONFIG_COMPAT) 336 + struct compat_ext4_new_group_input { 337 + u32 group; 338 + compat_u64 block_bitmap; 339 + compat_u64 inode_bitmap; 340 + compat_u64 inode_table; 341 + u32 blocks_count; 342 + u16 reserved_blocks; 343 + u16 unused; 344 + }; 345 + #endif 437 346 438 347 /* The struct ext4_new_group_input in kernel space, with free_blocks_count */ 439 348 struct ext4_new_group_data { ··· 470 355 #define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\ 471 356 EXT4_GET_BLOCKS_CREATE) 472 357 /* Caller is from the delayed allocation writeout path, 473 - so set the magic i_delalloc_reserve_flag after taking the 358 + so set the magic i_delalloc_reserve_flag after taking the 474 359 inode allocation semaphore for */ 475 360 #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 476 361 /* caller is from the direct IO path, request to creation of an ··· 513 398 #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) 514 399 #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) 515 400 401 + #if defined(__KERNEL__) && defined(CONFIG_COMPAT) 516 402 /* 517 403 * ioctl commands in 32 bit emulation 518 404 */ ··· 524 408 #define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) 525 409 #define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) 526 410 #define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) 411 + #define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input) 527 412 #ifdef CONFIG_JBD2_DEBUG 528 413 #define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) 529 414 #endif 530 415 #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION 531 416 #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION 417 + #endif 532 418 533 419 534 420 /* ··· 734 616 */ 735 617 struct ext4_inode_info { 736 618 __le32 i_data[15]; /* unconverted */ 737 - __u32 i_flags; 738 - ext4_fsblk_t i_file_acl; 739 619 __u32 i_dtime; 620 + ext4_fsblk_t i_file_acl; 740 621 741 622 /* 742 623 * i_block_group is the number of the block group which contains ··· 746 629 */ 747 630 ext4_group_t i_block_group; 748 631 unsigned long i_state_flags; /* Dynamic state flags */ 632 + unsigned long i_flags; 749 633 750 634 ext4_lblk_t i_dir_start_lookup; 751 635 #ifdef CONFIG_EXT4_FS_XATTR ··· 1180 1062 EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */ 1181 1063 EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ 1182 1064 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ 1065 + EXT4_STATE_NEWENTRY, /* File just added to dir */ 1183 1066 }; 1184 1067 1185 - static inline int ext4_test_inode_state(struct inode *inode, int bit) 1186 - { 1187 - return test_bit(bit, &EXT4_I(inode)->i_state_flags); 1068 + #define EXT4_INODE_BIT_FNS(name, field) \ 1069 + static inline int ext4_test_inode_##name(struct inode *inode, int bit) \ 1070 + { \ 1071 + return test_bit(bit, &EXT4_I(inode)->i_##field); \ 1072 + } \ 1073 + static inline void ext4_set_inode_##name(struct inode *inode, int bit) \ 1074 + { \ 1075 + set_bit(bit, &EXT4_I(inode)->i_##field); \ 1076 + } \ 1077 + static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \ 1078 + { \ 1079 + clear_bit(bit, &EXT4_I(inode)->i_##field); \ 1188 1080 } 1189 1081 1190 - static inline void ext4_set_inode_state(struct inode *inode, int bit) 1191 - { 1192 - set_bit(bit, &EXT4_I(inode)->i_state_flags); 1193 - } 1194 - 1195 - static inline void ext4_clear_inode_state(struct inode *inode, int bit) 1196 - { 1197 - clear_bit(bit, &EXT4_I(inode)->i_state_flags); 1198 - } 1082 + EXT4_INODE_BIT_FNS(flag, flags) 1083 + EXT4_INODE_BIT_FNS(state, state_flags) 1199 1084 #else 1200 1085 /* Assume that user mode programs are passing in an ext4fs superblock, not 1201 1086 * a kernel struct super_block. This will allow us to call the feature-test ··· 1385 1264 1386 1265 #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ 1387 1266 EXT4_FEATURE_COMPAT_DIR_INDEX) && \ 1388 - (EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) 1267 + ext4_test_inode_flag((dir), EXT4_INODE_INDEX)) 1389 1268 #define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) 1390 1269 #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) 1391 1270 ··· 1799 1678 ext4_grpblk_t bb_first_free; /* first free block */ 1800 1679 ext4_grpblk_t bb_free; /* total free blocks */ 1801 1680 ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ 1681 + ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */ 1802 1682 struct list_head bb_prealloc_list; 1803 1683 #ifdef DOUBLE_CHECK 1804 1684 void *bb_bitmap; ··· 1894 1772 extern int ext4_ext_writepage_trans_blocks(struct inode *, int); 1895 1773 extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, 1896 1774 int chunk); 1897 - extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, 1898 - ext4_lblk_t iblock, unsigned int max_blocks, 1899 - struct buffer_head *bh_result, int flags); 1775 + extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, 1776 + struct ext4_map_blocks *map, int flags); 1900 1777 extern void ext4_ext_truncate(struct inode *); 1901 1778 extern void ext4_ext_init(struct super_block *); 1902 1779 extern void ext4_ext_release(struct super_block *); ··· 1903 1782 loff_t len); 1904 1783 extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, 1905 1784 ssize_t len); 1785 + extern int ext4_map_blocks(handle_t *handle, struct inode *inode, 1786 + struct ext4_map_blocks *map, int flags); 1906 1787 extern int ext4_get_blocks(handle_t *handle, struct inode *inode, 1907 1788 sector_t block, unsigned int max_blocks, 1908 1789 struct buffer_head *bh, int flags);

+4 -4

fs/ext4/ext4_jbd2.h

··· 273 273 return 1; 274 274 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 275 275 return 1; 276 - if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) 276 + if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) 277 277 return 1; 278 278 return 0; 279 279 } ··· 284 284 return 0; 285 285 if (!S_ISREG(inode->i_mode)) 286 286 return 0; 287 - if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) 287 + if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) 288 288 return 0; 289 289 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 290 290 return 1; ··· 297 297 return 0; 298 298 if (EXT4_JOURNAL(inode) == NULL) 299 299 return 1; 300 - if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) 300 + if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) 301 301 return 0; 302 302 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 303 303 return 1; ··· 321 321 return 0; 322 322 if (!S_ISREG(inode->i_mode)) 323 323 return 0; 324 - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) 324 + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 325 325 return 0; 326 326 if (ext4_should_journal_data(inode)) 327 327 return 0;

+228 -189

fs/ext4/extents.c

··· 107 107 if (err <= 0) 108 108 return err; 109 109 err = ext4_truncate_restart_trans(handle, inode, needed); 110 - /* 111 - * We have dropped i_data_sem so someone might have cached again 112 - * an extent we are going to truncate. 113 - */ 114 - ext4_ext_invalidate_cache(inode); 110 + if (err == 0) 111 + err = -EAGAIN; 115 112 116 113 return err; 117 114 } ··· 182 185 if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { 183 186 /* 184 187 * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME 185 - * block groups per flexgroup, reserve the first block 186 - * group for directories and special files. Regular 188 + * block groups per flexgroup, reserve the first block 189 + * group for directories and special files. Regular 187 190 * files will start at the second block group. This 188 - * tends to speed up directory access and improves 191 + * tends to speed up directory access and improves 189 192 * fsck times. 190 193 */ 191 194 block_group &= ~(flex_size-1); ··· 436 439 return 0; 437 440 438 441 corrupted: 439 - __ext4_error(inode->i_sb, function, 440 - "bad header/extent in inode #%lu: %s - magic %x, " 442 + ext4_error_inode(function, inode, 443 + "bad header/extent: %s - magic %x, " 441 444 "entries %u, max %u(%u), depth %u(%u)", 442 - inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), 445 + error_msg, le16_to_cpu(eh->eh_magic), 443 446 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), 444 447 max, le16_to_cpu(eh->eh_depth), depth); 445 448 ··· 1619 1622 merge_done = 1; 1620 1623 WARN_ON(eh->eh_entries == 0); 1621 1624 if (!eh->eh_entries) 1622 - ext4_error(inode->i_sb, 1623 - "inode#%lu, eh->eh_entries = 0!", 1624 - inode->i_ino); 1625 + EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!"); 1625 1626 } 1626 1627 1627 1628 return merge_done; ··· 2034 2039 struct ext4_ext_cache *cex; 2035 2040 int ret = EXT4_EXT_CACHE_NO; 2036 2041 2037 - /* 2042 + /* 2038 2043 * We borrow i_block_reservation_lock to protect i_cached_extent 2039 2044 */ 2040 2045 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); ··· 2356 2361 int depth = ext_depth(inode); 2357 2362 struct ext4_ext_path *path; 2358 2363 handle_t *handle; 2359 - int i = 0, err = 0; 2364 + int i, err; 2360 2365 2361 2366 ext_debug("truncate since %u\n", start); 2362 2367 ··· 2365 2370 if (IS_ERR(handle)) 2366 2371 return PTR_ERR(handle); 2367 2372 2373 + again: 2368 2374 ext4_ext_invalidate_cache(inode); 2369 2375 2370 2376 /* 2371 2377 * We start scanning from right side, freeing all the blocks 2372 2378 * after i_size and walking into the tree depth-wise. 2373 2379 */ 2380 + depth = ext_depth(inode); 2374 2381 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); 2375 2382 if (path == NULL) { 2376 2383 ext4_journal_stop(handle); 2377 2384 return -ENOMEM; 2378 2385 } 2386 + path[0].p_depth = depth; 2379 2387 path[0].p_hdr = ext_inode_hdr(inode); 2380 2388 if (ext4_ext_check(inode, path[0].p_hdr, depth)) { 2381 2389 err = -EIO; 2382 2390 goto out; 2383 2391 } 2384 - path[0].p_depth = depth; 2392 + i = err = 0; 2385 2393 2386 2394 while (i >= 0 && err == 0) { 2387 2395 if (i == depth) { ··· 2478 2480 out: 2479 2481 ext4_ext_drop_refs(path); 2480 2482 kfree(path); 2483 + if (err == -EAGAIN) 2484 + goto again; 2481 2485 ext4_journal_stop(handle); 2482 2486 2483 2487 return err; ··· 2544 2544 /* FIXME!! we need to try to merge to left or right after zero-out */ 2545 2545 static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) 2546 2546 { 2547 - int ret = -EIO; 2547 + int ret; 2548 2548 struct bio *bio; 2549 2549 int blkbits, blocksize; 2550 2550 sector_t ee_pblock; ··· 2568 2568 len = ee_len; 2569 2569 2570 2570 bio = bio_alloc(GFP_NOIO, len); 2571 + if (!bio) 2572 + return -ENOMEM; 2573 + 2571 2574 bio->bi_sector = ee_pblock; 2572 2575 bio->bi_bdev = inode->i_sb->s_bdev; 2573 2576 ··· 2598 2595 submit_bio(WRITE, bio); 2599 2596 wait_for_completion(&event); 2600 2597 2601 - if (test_bit(BIO_UPTODATE, &bio->bi_flags)) 2602 - ret = 0; 2603 - else { 2604 - ret = -EIO; 2605 - break; 2598 + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { 2599 + bio_put(bio); 2600 + return -EIO; 2606 2601 } 2607 2602 bio_put(bio); 2608 2603 ee_len -= done; 2609 2604 ee_pblock += done << (blkbits - 9); 2610 2605 } 2611 - return ret; 2606 + return 0; 2612 2607 } 2613 2608 2614 2609 #define EXT4_EXT_ZERO_LEN 7 2615 2610 /* 2616 - * This function is called by ext4_ext_get_blocks() if someone tries to write 2611 + * This function is called by ext4_ext_map_blocks() if someone tries to write 2617 2612 * to an uninitialized extent. It may result in splitting the uninitialized 2618 2613 * extent into multiple extents (upto three - one initialized and two 2619 2614 * uninitialized). ··· 2621 2620 * c> Splits in three extents: Somone is writing in middle of the extent 2622 2621 */ 2623 2622 static int ext4_ext_convert_to_initialized(handle_t *handle, 2624 - struct inode *inode, 2625 - struct ext4_ext_path *path, 2626 - ext4_lblk_t iblock, 2627 - unsigned int max_blocks) 2623 + struct inode *inode, 2624 + struct ext4_map_blocks *map, 2625 + struct ext4_ext_path *path) 2628 2626 { 2629 2627 struct ext4_extent *ex, newex, orig_ex; 2630 2628 struct ext4_extent *ex1 = NULL; 2631 2629 struct ext4_extent *ex2 = NULL; 2632 2630 struct ext4_extent *ex3 = NULL; 2633 2631 struct ext4_extent_header *eh; 2634 - ext4_lblk_t ee_block; 2632 + ext4_lblk_t ee_block, eof_block; 2635 2633 unsigned int allocated, ee_len, depth; 2636 2634 ext4_fsblk_t newblock; 2637 2635 int err = 0; 2638 2636 int ret = 0; 2637 + int may_zeroout; 2638 + 2639 + ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" 2640 + "block %llu, max_blocks %u\n", inode->i_ino, 2641 + (unsigned long long)map->m_lblk, map->m_len); 2642 + 2643 + eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 2644 + inode->i_sb->s_blocksize_bits; 2645 + if (eof_block < map->m_lblk + map->m_len) 2646 + eof_block = map->m_lblk + map->m_len; 2639 2647 2640 2648 depth = ext_depth(inode); 2641 2649 eh = path[depth].p_hdr; 2642 2650 ex = path[depth].p_ext; 2643 2651 ee_block = le32_to_cpu(ex->ee_block); 2644 2652 ee_len = ext4_ext_get_actual_len(ex); 2645 - allocated = ee_len - (iblock - ee_block); 2646 - newblock = iblock - ee_block + ext_pblock(ex); 2653 + allocated = ee_len - (map->m_lblk - ee_block); 2654 + newblock = map->m_lblk - ee_block + ext_pblock(ex); 2655 + 2647 2656 ex2 = ex; 2648 2657 orig_ex.ee_block = ex->ee_block; 2649 2658 orig_ex.ee_len = cpu_to_le16(ee_len); 2650 2659 ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); 2651 2660 2661 + /* 2662 + * It is safe to convert extent to initialized via explicit 2663 + * zeroout only if extent is fully insde i_size or new_size. 2664 + */ 2665 + may_zeroout = ee_block + ee_len <= eof_block; 2666 + 2652 2667 err = ext4_ext_get_access(handle, inode, path + depth); 2653 2668 if (err) 2654 2669 goto out; 2655 2670 /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ 2656 - if (ee_len <= 2*EXT4_EXT_ZERO_LEN) { 2671 + if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) { 2657 2672 err = ext4_ext_zeroout(inode, &orig_ex); 2658 2673 if (err) 2659 2674 goto fix_extent_len; ··· 2682 2665 return allocated; 2683 2666 } 2684 2667 2685 - /* ex1: ee_block to iblock - 1 : uninitialized */ 2686 - if (iblock > ee_block) { 2668 + /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ 2669 + if (map->m_lblk > ee_block) { 2687 2670 ex1 = ex; 2688 - ex1->ee_len = cpu_to_le16(iblock - ee_block); 2671 + ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); 2689 2672 ext4_ext_mark_uninitialized(ex1); 2690 2673 ex2 = &newex; 2691 2674 } ··· 2694 2677 * we insert ex3, if ex1 is NULL. This is to avoid temporary 2695 2678 * overlap of blocks. 2696 2679 */ 2697 - if (!ex1 && allocated > max_blocks) 2698 - ex2->ee_len = cpu_to_le16(max_blocks); 2680 + if (!ex1 && allocated > map->m_len) 2681 + ex2->ee_len = cpu_to_le16(map->m_len); 2699 2682 /* ex3: to ee_block + ee_len : uninitialised */ 2700 - if (allocated > max_blocks) { 2683 + if (allocated > map->m_len) { 2701 2684 unsigned int newdepth; 2702 2685 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ 2703 - if (allocated <= EXT4_EXT_ZERO_LEN) { 2686 + if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) { 2704 2687 /* 2705 - * iblock == ee_block is handled by the zerouout 2688 + * map->m_lblk == ee_block is handled by the zerouout 2706 2689 * at the beginning. 2707 2690 * Mark first half uninitialized. 2708 2691 * Mark second half initialized and zero out the ··· 2715 2698 ext4_ext_dirty(handle, inode, path + depth); 2716 2699 2717 2700 ex3 = &newex; 2718 - ex3->ee_block = cpu_to_le32(iblock); 2701 + ex3->ee_block = cpu_to_le32(map->m_lblk); 2719 2702 ext4_ext_store_pblock(ex3, newblock); 2720 2703 ex3->ee_len = cpu_to_le16(allocated); 2721 2704 err = ext4_ext_insert_extent(handle, inode, path, ··· 2728 2711 ex->ee_len = orig_ex.ee_len; 2729 2712 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2730 2713 ext4_ext_dirty(handle, inode, path + depth); 2731 - /* blocks available from iblock */ 2714 + /* blocks available from map->m_lblk */ 2732 2715 return allocated; 2733 2716 2734 2717 } else if (err) ··· 2750 2733 */ 2751 2734 depth = ext_depth(inode); 2752 2735 ext4_ext_drop_refs(path); 2753 - path = ext4_ext_find_extent(inode, 2754 - iblock, path); 2736 + path = ext4_ext_find_extent(inode, map->m_lblk, 2737 + path); 2755 2738 if (IS_ERR(path)) { 2756 2739 err = PTR_ERR(path); 2757 2740 return err; ··· 2771 2754 return allocated; 2772 2755 } 2773 2756 ex3 = &newex; 2774 - ex3->ee_block = cpu_to_le32(iblock + max_blocks); 2775 - ext4_ext_store_pblock(ex3, newblock + max_blocks); 2776 - ex3->ee_len = cpu_to_le16(allocated - max_blocks); 2757 + ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); 2758 + ext4_ext_store_pblock(ex3, newblock + map->m_len); 2759 + ex3->ee_len = cpu_to_le16(allocated - map->m_len); 2777 2760 ext4_ext_mark_uninitialized(ex3); 2778 2761 err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); 2779 - if (err == -ENOSPC) { 2762 + if (err == -ENOSPC && may_zeroout) { 2780 2763 err = ext4_ext_zeroout(inode, &orig_ex); 2781 2764 if (err) 2782 2765 goto fix_extent_len; ··· 2786 2769 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2787 2770 ext4_ext_dirty(handle, inode, path + depth); 2788 2771 /* zeroed the full extent */ 2789 - /* blocks available from iblock */ 2772 + /* blocks available from map->m_lblk */ 2790 2773 return allocated; 2791 2774 2792 2775 } else if (err) ··· 2800 2783 * update the extent length after successful insert of the 2801 2784 * split extent 2802 2785 */ 2803 - orig_ex.ee_len = cpu_to_le16(ee_len - 2804 - ext4_ext_get_actual_len(ex3)); 2786 + ee_len -= ext4_ext_get_actual_len(ex3); 2787 + orig_ex.ee_len = cpu_to_le16(ee_len); 2788 + may_zeroout = ee_block + ee_len <= eof_block; 2789 + 2805 2790 depth = newdepth; 2806 2791 ext4_ext_drop_refs(path); 2807 - path = ext4_ext_find_extent(inode, iblock, path); 2792 + path = ext4_ext_find_extent(inode, map->m_lblk, path); 2808 2793 if (IS_ERR(path)) { 2809 2794 err = PTR_ERR(path); 2810 2795 goto out; ··· 2820 2801 if (err) 2821 2802 goto out; 2822 2803 2823 - allocated = max_blocks; 2804 + allocated = map->m_len; 2824 2805 2825 2806 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying 2826 2807 * to insert a extent in the middle zerout directly 2827 2808 * otherwise give the extent a chance to merge to left 2828 2809 */ 2829 2810 if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && 2830 - iblock != ee_block) { 2811 + map->m_lblk != ee_block && may_zeroout) { 2831 2812 err = ext4_ext_zeroout(inode, &orig_ex); 2832 2813 if (err) 2833 2814 goto fix_extent_len; ··· 2837 2818 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2838 2819 ext4_ext_dirty(handle, inode, path + depth); 2839 2820 /* zero out the first half */ 2840 - /* blocks available from iblock */ 2821 + /* blocks available from map->m_lblk */ 2841 2822 return allocated; 2842 2823 } 2843 2824 } ··· 2848 2829 */ 2849 2830 if (ex1 && ex1 != ex) { 2850 2831 ex1 = ex; 2851 - ex1->ee_len = cpu_to_le16(iblock - ee_block); 2832 + ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); 2852 2833 ext4_ext_mark_uninitialized(ex1); 2853 2834 ex2 = &newex; 2854 2835 } 2855 - /* ex2: iblock to iblock + maxblocks-1 : initialised */ 2856 - ex2->ee_block = cpu_to_le32(iblock); 2836 + /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */ 2837 + ex2->ee_block = cpu_to_le32(map->m_lblk); 2857 2838 ext4_ext_store_pblock(ex2, newblock); 2858 2839 ex2->ee_len = cpu_to_le16(allocated); 2859 2840 if (ex2 != ex) ··· 2896 2877 goto out; 2897 2878 insert: 2898 2879 err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); 2899 - if (err == -ENOSPC) { 2880 + if (err == -ENOSPC && may_zeroout) { 2900 2881 err = ext4_ext_zeroout(inode, &orig_ex); 2901 2882 if (err) 2902 2883 goto fix_extent_len; ··· 2923 2904 } 2924 2905 2925 2906 /* 2926 - * This function is called by ext4_ext_get_blocks() from 2907 + * This function is called by ext4_ext_map_blocks() from 2927 2908 * ext4_get_blocks_dio_write() when DIO to write 2928 2909 * to an uninitialized extent. 2929 2910 * ··· 2946 2927 */ 2947 2928 static int ext4_split_unwritten_extents(handle_t *handle, 2948 2929 struct inode *inode, 2930 + struct ext4_map_blocks *map, 2949 2931 struct ext4_ext_path *path, 2950 - ext4_lblk_t iblock, 2951 - unsigned int max_blocks, 2952 2932 int flags) 2953 2933 { 2954 2934 struct ext4_extent *ex, newex, orig_ex; ··· 2955 2937 struct ext4_extent *ex2 = NULL; 2956 2938 struct ext4_extent *ex3 = NULL; 2957 2939 struct ext4_extent_header *eh; 2958 - ext4_lblk_t ee_block; 2940 + ext4_lblk_t ee_block, eof_block; 2959 2941 unsigned int allocated, ee_len, depth; 2960 2942 ext4_fsblk_t newblock; 2961 2943 int err = 0; 2944 + int may_zeroout; 2962 2945 2963 - ext_debug("ext4_split_unwritten_extents: inode %lu," 2964 - "iblock %llu, max_blocks %u\n", inode->i_ino, 2965 - (unsigned long long)iblock, max_blocks); 2946 + ext_debug("ext4_split_unwritten_extents: inode %lu, logical" 2947 + "block %llu, max_blocks %u\n", inode->i_ino, 2948 + (unsigned long long)map->m_lblk, map->m_len); 2949 + 2950 + eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 2951 + inode->i_sb->s_blocksize_bits; 2952 + if (eof_block < map->m_lblk + map->m_len) 2953 + eof_block = map->m_lblk + map->m_len; 2954 + 2966 2955 depth = ext_depth(inode); 2967 2956 eh = path[depth].p_hdr; 2968 2957 ex = path[depth].p_ext; 2969 2958 ee_block = le32_to_cpu(ex->ee_block); 2970 2959 ee_len = ext4_ext_get_actual_len(ex); 2971 - allocated = ee_len - (iblock - ee_block); 2972 - newblock = iblock - ee_block + ext_pblock(ex); 2960 + allocated = ee_len - (map->m_lblk - ee_block); 2961 + newblock = map->m_lblk - ee_block + ext_pblock(ex); 2962 + 2973 2963 ex2 = ex; 2974 2964 orig_ex.ee_block = ex->ee_block; 2975 2965 orig_ex.ee_len = cpu_to_le16(ee_len); 2976 2966 ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); 2977 2967 2978 2968 /* 2969 + * It is safe to convert extent to initialized via explicit 2970 + * zeroout only if extent is fully insde i_size or new_size. 2971 + */ 2972 + may_zeroout = ee_block + ee_len <= eof_block; 2973 + 2974 + /* 2979 2975 * If the uninitialized extent begins at the same logical 2980 2976 * block where the write begins, and the write completely 2981 2977 * covers the extent, then we don't need to split it. 2982 2978 */ 2983 - if ((iblock == ee_block) && (allocated <= max_blocks)) 2979 + if ((map->m_lblk == ee_block) && (allocated <= map->m_len)) 2984 2980 return allocated; 2985 2981 2986 2982 err = ext4_ext_get_access(handle, inode, path + depth); 2987 2983 if (err) 2988 2984 goto out; 2989 - /* ex1: ee_block to iblock - 1 : uninitialized */ 2990 - if (iblock > ee_block) { 2985 + /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ 2986 + if (map->m_lblk > ee_block) { 2991 2987 ex1 = ex; 2992 - ex1->ee_len = cpu_to_le16(iblock - ee_block); 2988 + ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); 2993 2989 ext4_ext_mark_uninitialized(ex1); 2994 2990 ex2 = &newex; 2995 2991 } ··· 3012 2980 * we insert ex3, if ex1 is NULL. This is to avoid temporary 3013 2981 * overlap of blocks. 3014 2982 */ 3015 - if (!ex1 && allocated > max_blocks) 3016 - ex2->ee_len = cpu_to_le16(max_blocks); 2983 + if (!ex1 && allocated > map->m_len) 2984 + ex2->ee_len = cpu_to_le16(map->m_len); 3017 2985 /* ex3: to ee_block + ee_len : uninitialised */ 3018 - if (allocated > max_blocks) { 2986 + if (allocated > map->m_len) { 3019 2987 unsigned int newdepth; 3020 2988 ex3 = &newex; 3021 - ex3->ee_block = cpu_to_le32(iblock + max_blocks); 3022 - ext4_ext_store_pblock(ex3, newblock + max_blocks); 3023 - ex3->ee_len = cpu_to_le16(allocated - max_blocks); 2989 + ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); 2990 + ext4_ext_store_pblock(ex3, newblock + map->m_len); 2991 + ex3->ee_len = cpu_to_le16(allocated - map->m_len); 3024 2992 ext4_ext_mark_uninitialized(ex3); 3025 2993 err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); 3026 - if (err == -ENOSPC) { 2994 + if (err == -ENOSPC && may_zeroout) { 3027 2995 err = ext4_ext_zeroout(inode, &orig_ex); 3028 2996 if (err) 3029 2997 goto fix_extent_len; ··· 3033 3001 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 3034 3002 ext4_ext_dirty(handle, inode, path + depth); 3035 3003 /* zeroed the full extent */ 3036 - /* blocks available from iblock */ 3004 + /* blocks available from map->m_lblk */ 3037 3005 return allocated; 3038 3006 3039 3007 } else if (err) ··· 3047 3015 * update the extent length after successful insert of the 3048 3016 * split extent 3049 3017 */ 3050 - orig_ex.ee_len = cpu_to_le16(ee_len - 3051 - ext4_ext_get_actual_len(ex3)); 3018 + ee_len -= ext4_ext_get_actual_len(ex3); 3019 + orig_ex.ee_len = cpu_to_le16(ee_len); 3020 + may_zeroout = ee_block + ee_len <= eof_block; 3021 + 3052 3022 depth = newdepth; 3053 3023 ext4_ext_drop_refs(path); 3054 - path = ext4_ext_find_extent(inode, iblock, path); 3024 + path = ext4_ext_find_extent(inode, map->m_lblk, path); 3055 3025 if (IS_ERR(path)) { 3056 3026 err = PTR_ERR(path); 3057 3027 goto out; ··· 3067 3033 if (err) 3068 3034 goto out; 3069 3035 3070 - allocated = max_blocks; 3036 + allocated = map->m_len; 3071 3037 } 3072 3038 /* 3073 3039 * If there was a change of depth as part of the ··· 3076 3042 */ 3077 3043 if (ex1 && ex1 != ex) { 3078 3044 ex1 = ex; 3079 - ex1->ee_len = cpu_to_le16(iblock - ee_block); 3045 + ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); 3080 3046 ext4_ext_mark_uninitialized(ex1); 3081 3047 ex2 = &newex; 3082 3048 } 3083 3049 /* 3084 - * ex2: iblock to iblock + maxblocks-1 : to be direct IO written, 3085 - * uninitialised still. 3050 + * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written 3051 + * using direct I/O, uninitialised still. 3086 3052 */ 3087 - ex2->ee_block = cpu_to_le32(iblock); 3053 + ex2->ee_block = cpu_to_le32(map->m_lblk); 3088 3054 ext4_ext_store_pblock(ex2, newblock); 3089 3055 ex2->ee_len = cpu_to_le16(allocated); 3090 3056 ext4_ext_mark_uninitialized(ex2); ··· 3096 3062 goto out; 3097 3063 insert: 3098 3064 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 3099 - if (err == -ENOSPC) { 3065 + if (err == -ENOSPC && may_zeroout) { 3100 3066 err = ext4_ext_zeroout(inode, &orig_ex); 3101 3067 if (err) 3102 3068 goto fix_extent_len; ··· 3186 3152 3187 3153 static int 3188 3154 ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3189 - ext4_lblk_t iblock, unsigned int max_blocks, 3155 + struct ext4_map_blocks *map, 3190 3156 struct ext4_ext_path *path, int flags, 3191 - unsigned int allocated, struct buffer_head *bh_result, 3192 - ext4_fsblk_t newblock) 3157 + unsigned int allocated, ext4_fsblk_t newblock) 3193 3158 { 3194 3159 int ret = 0; 3195 3160 int err = 0; ··· 3196 3163 3197 3164 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" 3198 3165 "block %llu, max_blocks %u, flags %d, allocated %u", 3199 - inode->i_ino, (unsigned long long)iblock, max_blocks, 3166 + inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, 3200 3167 flags, allocated); 3201 3168 ext4_ext_show_leaf(inode, path); 3202 3169 3203 3170 /* get_block() before submit the IO, split the extent */ 3204 3171 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3205 - ret = ext4_split_unwritten_extents(handle, 3206 - inode, path, iblock, 3207 - max_blocks, flags); 3172 + ret = ext4_split_unwritten_extents(handle, inode, map, 3173 + path, flags); 3208 3174 /* 3209 3175 * Flag the inode(non aio case) or end_io struct (aio case) 3210 3176 * that this IO needs to convertion to written when IO is ··· 3214 3182 else 3215 3183 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3216 3184 if (ext4_should_dioread_nolock(inode)) 3217 - set_buffer_uninit(bh_result); 3185 + map->m_flags |= EXT4_MAP_UNINIT; 3218 3186 goto out; 3219 3187 } 3220 3188 /* IO end_io complete, convert the filled extent to written */ ··· 3242 3210 * the buffer head will be unmapped so that 3243 3211 * a read from the block returns 0s. 3244 3212 */ 3245 - set_buffer_unwritten(bh_result); 3213 + map->m_flags |= EXT4_MAP_UNWRITTEN; 3246 3214 goto out1; 3247 3215 } 3248 3216 3249 3217 /* buffered write, writepage time, convert*/ 3250 - ret = ext4_ext_convert_to_initialized(handle, inode, 3251 - path, iblock, 3252 - max_blocks); 3218 + ret = ext4_ext_convert_to_initialized(handle, inode, map, path); 3253 3219 if (ret >= 0) 3254 3220 ext4_update_inode_fsync_trans(handle, inode, 1); 3255 3221 out: ··· 3256 3226 goto out2; 3257 3227 } else 3258 3228 allocated = ret; 3259 - set_buffer_new(bh_result); 3229 + map->m_flags |= EXT4_MAP_NEW; 3260 3230 /* 3261 3231 * if we allocated more blocks than requested 3262 3232 * we need to make sure we unmap the extra block ··· 3264 3234 * unmapped later when we find the buffer_head marked 3265 3235 * new. 3266 3236 */ 3267 - if (allocated > max_blocks) { 3237 + if (allocated > map->m_len) { 3268 3238 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, 3269 - newblock + max_blocks, 3270 - allocated - max_blocks); 3271 - allocated = max_blocks; 3239 + newblock + map->m_len, 3240 + allocated - map->m_len); 3241 + allocated = map->m_len; 3272 3242 } 3273 3243 3274 3244 /* ··· 3282 3252 ext4_da_update_reserve_space(inode, allocated, 0); 3283 3253 3284 3254 map_out: 3285 - set_buffer_mapped(bh_result); 3255 + map->m_flags |= EXT4_MAP_MAPPED; 3286 3256 out1: 3287 - if (allocated > max_blocks) 3288 - allocated = max_blocks; 3257 + if (allocated > map->m_len) 3258 + allocated = map->m_len; 3289 3259 ext4_ext_show_leaf(inode, path); 3290 - bh_result->b_bdev = inode->i_sb->s_bdev; 3291 - bh_result->b_blocknr = newblock; 3260 + map->m_pblk = newblock; 3261 + map->m_len = allocated; 3292 3262 out2: 3293 3263 if (path) { 3294 3264 ext4_ext_drop_refs(path); ··· 3314 3284 * 3315 3285 * return < 0, error case. 3316 3286 */ 3317 - int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, 3318 - ext4_lblk_t iblock, 3319 - unsigned int max_blocks, struct buffer_head *bh_result, 3320 - int flags) 3287 + int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, 3288 + struct ext4_map_blocks *map, int flags) 3321 3289 { 3322 3290 struct ext4_ext_path *path = NULL; 3323 3291 struct ext4_extent_header *eh; 3324 3292 struct ext4_extent newex, *ex, *last_ex; 3325 3293 ext4_fsblk_t newblock; 3326 - int err = 0, depth, ret, cache_type; 3294 + int i, err = 0, depth, ret, cache_type; 3327 3295 unsigned int allocated = 0; 3328 3296 struct ext4_allocation_request ar; 3329 3297 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3330 3298 3331 - __clear_bit(BH_New, &bh_result->b_state); 3332 3299 ext_debug("blocks %u/%u requested for inode %lu\n", 3333 - iblock, max_blocks, inode->i_ino); 3300 + map->m_lblk, map->m_len, inode->i_ino); 3334 3301 3335 3302 /* check in cache */ 3336 - cache_type = ext4_ext_in_cache(inode, iblock, &newex); 3303 + cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex); 3337 3304 if (cache_type) { 3338 3305 if (cache_type == EXT4_EXT_CACHE_GAP) { 3339 3306 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { ··· 3343 3316 /* we should allocate requested block */ 3344 3317 } else if (cache_type == EXT4_EXT_CACHE_EXTENT) { 3345 3318 /* block is already allocated */ 3346 - newblock = iblock 3319 + newblock = map->m_lblk 3347 3320 - le32_to_cpu(newex.ee_block) 3348 3321 + ext_pblock(&newex); 3349 3322 /* number of remaining blocks in the extent */ 3350 3323 allocated = ext4_ext_get_actual_len(&newex) - 3351 - (iblock - le32_to_cpu(newex.ee_block)); 3324 + (map->m_lblk - le32_to_cpu(newex.ee_block)); 3352 3325 goto out; 3353 3326 } else { 3354 3327 BUG(); ··· 3356 3329 } 3357 3330 3358 3331 /* find extent for this block */ 3359 - path = ext4_ext_find_extent(inode, iblock, NULL); 3332 + path = ext4_ext_find_extent(inode, map->m_lblk, NULL); 3360 3333 if (IS_ERR(path)) { 3361 3334 err = PTR_ERR(path); 3362 3335 path = NULL; ··· 3372 3345 */ 3373 3346 if (unlikely(path[depth].p_ext == NULL && depth != 0)) { 3374 3347 EXT4_ERROR_INODE(inode, "bad extent address " 3375 - "iblock: %d, depth: %d pblock %lld", 3376 - iblock, depth, path[depth].p_block); 3348 + "lblock: %lu, depth: %d pblock %lld", 3349 + (unsigned long) map->m_lblk, depth, 3350 + path[depth].p_block); 3377 3351 err = -EIO; 3378 3352 goto out2; 3379 3353 } ··· 3392 3364 */ 3393 3365 ee_len = ext4_ext_get_actual_len(ex); 3394 3366 /* if found extent covers block, simply return it */ 3395 - if (in_range(iblock, ee_block, ee_len)) { 3396 - newblock = iblock - ee_block + ee_start; 3367 + if (in_range(map->m_lblk, ee_block, ee_len)) { 3368 + newblock = map->m_lblk - ee_block + ee_start; 3397 3369 /* number of remaining blocks in the extent */ 3398 - allocated = ee_len - (iblock - ee_block); 3399 - ext_debug("%u fit into %u:%d -> %llu\n", iblock, 3400 - ee_block, ee_len, newblock); 3370 + allocated = ee_len - (map->m_lblk - ee_block); 3371 + ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, 3372 + ee_block, ee_len, newblock); 3401 3373 3402 3374 /* Do not put uninitialized extent in the cache */ 3403 3375 if (!ext4_ext_is_uninitialized(ex)) { ··· 3407 3379 goto out; 3408 3380 } 3409 3381 ret = ext4_ext_handle_uninitialized_extents(handle, 3410 - inode, iblock, max_blocks, path, 3411 - flags, allocated, bh_result, newblock); 3382 + inode, map, path, flags, allocated, 3383 + newblock); 3412 3384 return ret; 3413 3385 } 3414 3386 } ··· 3422 3394 * put just found gap into cache to speed up 3423 3395 * subsequent requests 3424 3396 */ 3425 - ext4_ext_put_gap_in_cache(inode, path, iblock); 3397 + ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); 3426 3398 goto out2; 3427 3399 } 3428 3400 /* ··· 3430 3402 */ 3431 3403 3432 3404 /* find neighbour allocated blocks */ 3433 - ar.lleft = iblock; 3405 + ar.lleft = map->m_lblk; 3434 3406 err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft); 3435 3407 if (err) 3436 3408 goto out2; 3437 - ar.lright = iblock; 3409 + ar.lright = map->m_lblk; 3438 3410 err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); 3439 3411 if (err) 3440 3412 goto out2; ··· 3445 3417 * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is 3446 3418 * EXT_UNINIT_MAX_LEN. 3447 3419 */ 3448 - if (max_blocks > EXT_INIT_MAX_LEN && 3420 + if (map->m_len > EXT_INIT_MAX_LEN && 3449 3421 !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) 3450 - max_blocks = EXT_INIT_MAX_LEN; 3451 - else if (max_blocks > EXT_UNINIT_MAX_LEN && 3422 + map->m_len = EXT_INIT_MAX_LEN; 3423 + else if (map->m_len > EXT_UNINIT_MAX_LEN && 3452 3424 (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) 3453 - max_blocks = EXT_UNINIT_MAX_LEN; 3425 + map->m_len = EXT_UNINIT_MAX_LEN; 3454 3426 3455 - /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ 3456 - newex.ee_block = cpu_to_le32(iblock); 3457 - newex.ee_len = cpu_to_le16(max_blocks); 3427 + /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ 3428 + newex.ee_block = cpu_to_le32(map->m_lblk); 3429 + newex.ee_len = cpu_to_le16(map->m_len); 3458 3430 err = ext4_ext_check_overlap(inode, &newex, path); 3459 3431 if (err) 3460 3432 allocated = ext4_ext_get_actual_len(&newex); 3461 3433 else 3462 - allocated = max_blocks; 3434 + allocated = map->m_len; 3463 3435 3464 3436 /* allocate new block */ 3465 3437 ar.inode = inode; 3466 - ar.goal = ext4_ext_find_goal(inode, path, iblock); 3467 - ar.logical = iblock; 3438 + ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); 3439 + ar.logical = map->m_lblk; 3468 3440 ar.len = allocated; 3469 3441 if (S_ISREG(inode->i_mode)) 3470 3442 ar.flags = EXT4_MB_HINT_DATA; ··· 3498 3470 EXT4_STATE_DIO_UNWRITTEN); 3499 3471 } 3500 3472 if (ext4_should_dioread_nolock(inode)) 3501 - set_buffer_uninit(bh_result); 3473 + map->m_flags |= EXT4_MAP_UNINIT; 3502 3474 } 3503 3475 3504 - if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) { 3476 + if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) { 3505 3477 if (unlikely(!eh->eh_entries)) { 3506 3478 EXT4_ERROR_INODE(inode, 3507 - "eh->eh_entries == 0 ee_block %d", 3508 - ex->ee_block); 3479 + "eh->eh_entries == 0 and " 3480 + "EOFBLOCKS_FL set"); 3509 3481 err = -EIO; 3510 3482 goto out2; 3511 3483 } 3512 3484 last_ex = EXT_LAST_EXTENT(eh); 3513 - if (iblock + ar.len > le32_to_cpu(last_ex->ee_block) 3514 - + ext4_ext_get_actual_len(last_ex)) 3515 - EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; 3485 + /* 3486 + * If the current leaf block was reached by looking at 3487 + * the last index block all the way down the tree, and 3488 + * we are extending the inode beyond the last extent 3489 + * in the current leaf block, then clear the 3490 + * EOFBLOCKS_FL flag. 3491 + */ 3492 + for (i = depth-1; i >= 0; i--) { 3493 + if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) 3494 + break; 3495 + } 3496 + if ((i < 0) && 3497 + (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) + 3498 + ext4_ext_get_actual_len(last_ex))) 3499 + ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 3516 3500 } 3517 3501 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 3518 3502 if (err) { ··· 3540 3500 /* previous routine could use block we allocated */ 3541 3501 newblock = ext_pblock(&newex); 3542 3502 allocated = ext4_ext_get_actual_len(&newex); 3543 - if (allocated > max_blocks) 3544 - allocated = max_blocks; 3545 - set_buffer_new(bh_result); 3503 + if (allocated > map->m_len) 3504 + allocated = map->m_len; 3505 + map->m_flags |= EXT4_MAP_NEW; 3546 3506 3547 3507 /* 3548 3508 * Update reserved blocks/metadata blocks after successful ··· 3556 3516 * when it is _not_ an uninitialized extent. 3557 3517 */ 3558 3518 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { 3559 - ext4_ext_put_in_cache(inode, iblock, allocated, newblock, 3519 + ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock, 3560 3520 EXT4_EXT_CACHE_EXTENT); 3561 3521 ext4_update_inode_fsync_trans(handle, inode, 1); 3562 3522 } else 3563 3523 ext4_update_inode_fsync_trans(handle, inode, 0); 3564 3524 out: 3565 - if (allocated > max_blocks) 3566 - allocated = max_blocks; 3525 + if (allocated > map->m_len) 3526 + allocated = map->m_len; 3567 3527 ext4_ext_show_leaf(inode, path); 3568 - set_buffer_mapped(bh_result); 3569 - bh_result->b_bdev = inode->i_sb->s_bdev; 3570 - bh_result->b_blocknr = newblock; 3528 + map->m_flags |= EXT4_MAP_MAPPED; 3529 + map->m_pblk = newblock; 3530 + map->m_len = allocated; 3571 3531 out2: 3572 3532 if (path) { 3573 3533 ext4_ext_drop_refs(path); ··· 3665 3625 * can proceed even if the new size is the same as i_size. 3666 3626 */ 3667 3627 if (new_size > i_size_read(inode)) 3668 - EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL; 3628 + ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 3669 3629 } 3670 3630 3671 3631 } ··· 3680 3640 long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) 3681 3641 { 3682 3642 handle_t *handle; 3683 - ext4_lblk_t block; 3684 3643 loff_t new_size; 3685 3644 unsigned int max_blocks; 3686 3645 int ret = 0; 3687 3646 int ret2 = 0; 3688 3647 int retries = 0; 3689 - struct buffer_head map_bh; 3648 + struct ext4_map_blocks map; 3690 3649 unsigned int credits, blkbits = inode->i_blkbits; 3691 3650 3692 3651 /* 3693 3652 * currently supporting (pre)allocate mode for extent-based 3694 3653 * files _only_ 3695 3654 */ 3696 - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) 3655 + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 3697 3656 return -EOPNOTSUPP; 3698 3657 3699 3658 /* preallocation to directories is currently not supported */ 3700 3659 if (S_ISDIR(inode->i_mode)) 3701 3660 return -ENODEV; 3702 3661 3703 - block = offset >> blkbits; 3662 + map.m_lblk = offset >> blkbits; 3704 3663 /* 3705 3664 * We can't just convert len to max_blocks because 3706 3665 * If blocksize = 4096 offset = 3072 and len = 2048 3707 3666 */ 3708 3667 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) 3709 - - block; 3668 + - map.m_lblk; 3710 3669 /* 3711 3670 * credits to insert 1 extent into extent tree 3712 3671 */ 3713 3672 credits = ext4_chunk_trans_blocks(inode, max_blocks); 3714 3673 mutex_lock(&inode->i_mutex); 3674 + ret = inode_newsize_ok(inode, (len + offset)); 3675 + if (ret) { 3676 + mutex_unlock(&inode->i_mutex); 3677 + return ret; 3678 + } 3715 3679 retry: 3716 3680 while (ret >= 0 && ret < max_blocks) { 3717 - block = block + ret; 3718 - max_blocks = max_blocks - ret; 3681 + map.m_lblk = map.m_lblk + ret; 3682 + map.m_len = max_blocks = max_blocks - ret; 3719 3683 handle = ext4_journal_start(inode, credits); 3720 3684 if (IS_ERR(handle)) { 3721 3685 ret = PTR_ERR(handle); 3722 3686 break; 3723 3687 } 3724 - map_bh.b_state = 0; 3725 - ret = ext4_get_blocks(handle, inode, block, 3726 - max_blocks, &map_bh, 3688 + ret = ext4_map_blocks(handle, inode, &map, 3727 3689 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); 3728 3690 if (ret <= 0) { 3729 3691 #ifdef EXT4FS_DEBUG 3730 3692 WARN_ON(ret <= 0); 3731 - printk(KERN_ERR "%s: ext4_ext_get_blocks " 3693 + printk(KERN_ERR "%s: ext4_ext_map_blocks " 3732 3694 "returned error inode#%lu, block=%u, " 3733 3695 "max_blocks=%u", __func__, 3734 3696 inode->i_ino, block, max_blocks); ··· 3739 3697 ret2 = ext4_journal_stop(handle); 3740 3698 break; 3741 3699 } 3742 - if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len, 3700 + if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len, 3743 3701 blkbits) >> blkbits)) 3744 3702 new_size = offset + len; 3745 3703 else 3746 - new_size = (block + ret) << blkbits; 3704 + new_size = (map.m_lblk + ret) << blkbits; 3747 3705 3748 3706 ext4_falloc_update_inode(inode, mode, new_size, 3749 - buffer_new(&map_bh)); 3707 + (map.m_flags & EXT4_MAP_NEW)); 3750 3708 ext4_mark_inode_dirty(handle, inode); 3751 3709 ret2 = ext4_journal_stop(handle); 3752 3710 if (ret2) ··· 3775 3733 ssize_t len) 3776 3734 { 3777 3735 handle_t *handle; 3778 - ext4_lblk_t block; 3779 3736 unsigned int max_blocks; 3780 3737 int ret = 0; 3781 3738 int ret2 = 0; 3782 - struct buffer_head map_bh; 3739 + struct ext4_map_blocks map; 3783 3740 unsigned int credits, blkbits = inode->i_blkbits; 3784 3741 3785 - block = offset >> blkbits; 3742 + map.m_lblk = offset >> blkbits; 3786 3743 /* 3787 3744 * We can't just convert len to max_blocks because 3788 3745 * If blocksize = 4096 offset = 3072 and len = 2048 3789 3746 */ 3790 - max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) 3791 - - block; 3747 + max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - 3748 + map.m_lblk); 3792 3749 /* 3793 3750 * credits to insert 1 extent into extent tree 3794 3751 */ 3795 3752 credits = ext4_chunk_trans_blocks(inode, max_blocks); 3796 3753 while (ret >= 0 && ret < max_blocks) { 3797 - block = block + ret; 3798 - max_blocks = max_blocks - ret; 3754 + map.m_lblk += ret; 3755 + map.m_len = (max_blocks -= ret); 3799 3756 handle = ext4_journal_start(inode, credits); 3800 3757 if (IS_ERR(handle)) { 3801 3758 ret = PTR_ERR(handle); 3802 3759 break; 3803 3760 } 3804 - map_bh.b_state = 0; 3805 - ret = ext4_get_blocks(handle, inode, block, 3806 - max_blocks, &map_bh, 3761 + ret = ext4_map_blocks(handle, inode, &map, 3807 3762 EXT4_GET_BLOCKS_IO_CONVERT_EXT); 3808 3763 if (ret <= 0) { 3809 3764 WARN_ON(ret <= 0); 3810 - printk(KERN_ERR "%s: ext4_ext_get_blocks " 3765 + printk(KERN_ERR "%s: ext4_ext_map_blocks " 3811 3766 "returned error inode#%lu, block=%u, " 3812 3767 "max_blocks=%u", __func__, 3813 - inode->i_ino, block, max_blocks); 3768 + inode->i_ino, map.m_lblk, map.m_len); 3814 3769 } 3815 3770 ext4_mark_inode_dirty(handle, inode); 3816 3771 ret2 = ext4_journal_stop(handle); ··· 3937 3898 int error = 0; 3938 3899 3939 3900 /* fallback to generic here if not in extents fmt */ 3940 - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) 3901 + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 3941 3902 return generic_block_fiemap(inode, fieinfo, start, len, 3942 3903 ext4_get_block); 3943 3904

+1 -1

fs/ext4/file.c

··· 66 66 * is smaller than s_maxbytes, which is for extent-mapped files. 67 67 */ 68 68 69 - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) { 69 + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 70 70 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 71 71 size_t length = iov_length(iov, nr_segs); 72 72

+31 -4

fs/ext4/fsync.c

··· 35 35 #include <trace/events/ext4.h> 36 36 37 37 /* 38 + * If we're not journaling and this is a just-created file, we have to 39 + * sync our parent directory (if it was freshly created) since 40 + * otherwise it will only be written by writeback, leaving a huge 41 + * window during which a crash may lose the file. This may apply for 42 + * the parent directory's parent as well, and so on recursively, if 43 + * they are also freshly created. 44 + */ 45 + static void ext4_sync_parent(struct inode *inode) 46 + { 47 + struct dentry *dentry = NULL; 48 + 49 + while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { 50 + ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); 51 + dentry = list_entry(inode->i_dentry.next, 52 + struct dentry, d_alias); 53 + if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) 54 + break; 55 + inode = dentry->d_parent->d_inode; 56 + sync_mapping_buffers(inode->i_mapping); 57 + } 58 + } 59 + 60 + /* 38 61 * akpm: A new design for ext4_sync_file(). 39 62 * 40 63 * This is only called from sys_fsync(), sys_fdatasync() and sys_msync(). ··· 89 66 ret = flush_completed_IO(inode); 90 67 if (ret < 0) 91 68 return ret; 92 - 93 - if (!journal) 94 - return simple_fsync(file, dentry, datasync); 69 + 70 + if (!journal) { 71 + ret = simple_fsync(file, dentry, datasync); 72 + if (!ret && !list_empty(&inode->i_dentry)) 73 + ext4_sync_parent(inode); 74 + return ret; 75 + } 95 76 96 77 /* 97 78 * data=writeback,ordered: ··· 129 102 (journal->j_flags & JBD2_BARRIER)) 130 103 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, 131 104 NULL, BLKDEV_IFL_WAIT); 132 - jbd2_log_wait_commit(journal, commit_tid); 105 + ret = jbd2_log_wait_commit(journal, commit_tid); 133 106 } else if (journal->j_flags & JBD2_BARRIER) 134 107 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, 135 108 BLKDEV_IFL_WAIT);

+42 -49

fs/ext4/ialloc.c

··· 240 240 if (fatal) 241 241 goto error_return; 242 242 243 - /* Ok, now we can actually update the inode bitmaps.. */ 244 - cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), 245 - bit, bitmap_bh->b_data); 246 - if (!cleared) 247 - ext4_error(sb, "bit already cleared for inode %lu", ino); 248 - else { 249 - gdp = ext4_get_group_desc(sb, block_group, &bh2); 250 - 243 + fatal = -ESRCH; 244 + gdp = ext4_get_group_desc(sb, block_group, &bh2); 245 + if (gdp) { 251 246 BUFFER_TRACE(bh2, "get_write_access"); 252 247 fatal = ext4_journal_get_write_access(handle, bh2); 253 - if (fatal) goto error_return; 254 - 255 - if (gdp) { 256 - ext4_lock_group(sb, block_group); 257 - count = ext4_free_inodes_count(sb, gdp) + 1; 258 - ext4_free_inodes_set(sb, gdp, count); 259 - if (is_directory) { 260 - count = ext4_used_dirs_count(sb, gdp) - 1; 261 - ext4_used_dirs_set(sb, gdp, count); 262 - if (sbi->s_log_groups_per_flex) { 263 - ext4_group_t f; 264 - 265 - f = ext4_flex_group(sbi, block_group); 266 - atomic_dec(&sbi->s_flex_groups[f].used_dirs); 267 - } 268 - 269 - } 270 - gdp->bg_checksum = ext4_group_desc_csum(sbi, 271 - block_group, gdp); 272 - ext4_unlock_group(sb, block_group); 273 - percpu_counter_inc(&sbi->s_freeinodes_counter); 274 - if (is_directory) 275 - percpu_counter_dec(&sbi->s_dirs_counter); 276 - 277 - if (sbi->s_log_groups_per_flex) { 278 - ext4_group_t f; 279 - 280 - f = ext4_flex_group(sbi, block_group); 281 - atomic_inc(&sbi->s_flex_groups[f].free_inodes); 282 - } 283 - } 284 - BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); 285 - err = ext4_handle_dirty_metadata(handle, NULL, bh2); 286 - if (!fatal) fatal = err; 287 248 } 288 - BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata"); 289 - err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 290 - if (!fatal) 291 - fatal = err; 292 - sb->s_dirt = 1; 249 + ext4_lock_group(sb, block_group); 250 + cleared = ext4_clear_bit(bit, bitmap_bh->b_data); 251 + if (fatal || !cleared) { 252 + ext4_unlock_group(sb, block_group); 253 + goto out; 254 + } 255 + 256 + count = ext4_free_inodes_count(sb, gdp) + 1; 257 + ext4_free_inodes_set(sb, gdp, count); 258 + if (is_directory) { 259 + count = ext4_used_dirs_count(sb, gdp) - 1; 260 + ext4_used_dirs_set(sb, gdp, count); 261 + percpu_counter_dec(&sbi->s_dirs_counter); 262 + } 263 + gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 264 + ext4_unlock_group(sb, block_group); 265 + 266 + percpu_counter_inc(&sbi->s_freeinodes_counter); 267 + if (sbi->s_log_groups_per_flex) { 268 + ext4_group_t f = ext4_flex_group(sbi, block_group); 269 + 270 + atomic_inc(&sbi->s_flex_groups[f].free_inodes); 271 + if (is_directory) 272 + atomic_dec(&sbi->s_flex_groups[f].used_dirs); 273 + } 274 + BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); 275 + fatal = ext4_handle_dirty_metadata(handle, NULL, bh2); 276 + out: 277 + if (cleared) { 278 + BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata"); 279 + err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 280 + if (!fatal) 281 + fatal = err; 282 + sb->s_dirt = 1; 283 + } else 284 + ext4_error(sb, "bit already cleared for inode %lu", ino); 285 + 293 286 error_return: 294 287 brelse(bitmap_bh); 295 288 ext4_std_error(sb, fatal); ··· 492 499 493 500 if (S_ISDIR(mode) && 494 501 ((parent == sb->s_root->d_inode) || 495 - (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) { 502 + (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) { 496 503 int best_ndir = inodes_per_group; 497 504 int ret = -1; 498 505 ··· 1034 1041 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { 1035 1042 /* set extent flag only for directory, file and normal symlink*/ 1036 1043 if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { 1037 - EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; 1044 + ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); 1038 1045 ext4_ext_tree_init(handle, inode); 1039 1046 } 1040 1047 }

+373 -354

fs/ext4/inode.c

··· 149 149 int ret; 150 150 151 151 /* 152 - * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this 152 + * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this 153 153 * moment, get_block can be called only for blocks inside i_size since 154 154 * page cache has been already dropped and writes are blocked by 155 155 * i_mutex. So we can safely drop the i_data_sem here. ··· 348 348 if (blk && 349 349 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), 350 350 blk, 1))) { 351 - __ext4_error(inode->i_sb, function, 352 - "invalid block reference %u " 353 - "in inode #%lu", blk, inode->i_ino); 351 + ext4_error_inode(function, inode, 352 + "invalid block reference %u", blk); 354 353 return -EIO; 355 354 } 356 355 } ··· 784 785 /* Allocation failed, free what we already allocated */ 785 786 ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); 786 787 for (i = 1; i <= n ; i++) { 787 - /* 788 + /* 788 789 * branch[i].bh is newly allocated, so there is no 789 790 * need to revoke the block, which is why we don't 790 791 * need to set EXT4_FREE_BLOCKS_METADATA. ··· 874 875 875 876 err_out: 876 877 for (i = 1; i <= num; i++) { 877 - /* 878 + /* 878 879 * branch[i].bh is newly allocated, so there is no 879 880 * need to revoke the block, which is why we don't 880 881 * need to set EXT4_FREE_BLOCKS_METADATA. ··· 889 890 } 890 891 891 892 /* 892 - * The ext4_ind_get_blocks() function handles non-extents inodes 893 + * The ext4_ind_map_blocks() function handles non-extents inodes 893 894 * (i.e., using the traditional indirect/double-indirect i_blocks 894 - * scheme) for ext4_get_blocks(). 895 + * scheme) for ext4_map_blocks(). 895 896 * 896 897 * Allocation strategy is simple: if we have to allocate something, we will 897 898 * have to go the whole way to leaf. So let's do it before attaching anything ··· 916 917 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system 917 918 * blocks. 918 919 */ 919 - static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, 920 - ext4_lblk_t iblock, unsigned int maxblocks, 921 - struct buffer_head *bh_result, 920 + static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, 921 + struct ext4_map_blocks *map, 922 922 int flags) 923 923 { 924 924 int err = -EIO; ··· 931 933 int count = 0; 932 934 ext4_fsblk_t first_block = 0; 933 935 934 - J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); 936 + J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); 935 937 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); 936 - depth = ext4_block_to_path(inode, iblock, offsets, 938 + depth = ext4_block_to_path(inode, map->m_lblk, offsets, 937 939 &blocks_to_boundary); 938 940 939 941 if (depth == 0) ··· 944 946 /* Simplest case - block found, no allocation needed */ 945 947 if (!partial) { 946 948 first_block = le32_to_cpu(chain[depth - 1].key); 947 - clear_buffer_new(bh_result); 948 949 count++; 949 950 /*map more blocks*/ 950 - while (count < maxblocks && count <= blocks_to_boundary) { 951 + while (count < map->m_len && count <= blocks_to_boundary) { 951 952 ext4_fsblk_t blk; 952 953 953 954 blk = le32_to_cpu(*(chain[depth-1].p + count)); ··· 966 969 /* 967 970 * Okay, we need to do block allocation. 968 971 */ 969 - goal = ext4_find_goal(inode, iblock, partial); 972 + goal = ext4_find_goal(inode, map->m_lblk, partial); 970 973 971 974 /* the number of blocks need to allocate for [d,t]indirect blocks */ 972 975 indirect_blks = (chain + depth) - partial - 1; ··· 976 979 * direct blocks to allocate for this branch. 977 980 */ 978 981 count = ext4_blks_to_allocate(partial, indirect_blks, 979 - maxblocks, blocks_to_boundary); 982 + map->m_len, blocks_to_boundary); 980 983 /* 981 984 * Block out ext4_truncate while we alter the tree 982 985 */ 983 - err = ext4_alloc_branch(handle, inode, iblock, indirect_blks, 986 + err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, 984 987 &count, goal, 985 988 offsets + (partial - chain), partial); 986 989 ··· 992 995 * may need to return -EAGAIN upwards in the worst case. --sct 993 996 */ 994 997 if (!err) 995 - err = ext4_splice_branch(handle, inode, iblock, 998 + err = ext4_splice_branch(handle, inode, map->m_lblk, 996 999 partial, indirect_blks, count); 997 1000 if (err) 998 1001 goto cleanup; 999 1002 1000 - set_buffer_new(bh_result); 1003 + map->m_flags |= EXT4_MAP_NEW; 1001 1004 1002 1005 ext4_update_inode_fsync_trans(handle, inode, 1); 1003 1006 got_it: 1004 - map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); 1007 + map->m_flags |= EXT4_MAP_MAPPED; 1008 + map->m_pblk = le32_to_cpu(chain[depth-1].key); 1009 + map->m_len = count; 1005 1010 if (count > blocks_to_boundary) 1006 - set_buffer_boundary(bh_result); 1011 + map->m_flags |= EXT4_MAP_BOUNDARY; 1007 1012 err = count; 1008 1013 /* Clean up and exit */ 1009 1014 partial = chain + depth - 1; /* the whole chain */ ··· 1015 1016 brelse(partial->bh); 1016 1017 partial--; 1017 1018 } 1018 - BUFFER_TRACE(bh_result, "returned"); 1019 1019 out: 1020 1020 return err; 1021 1021 } ··· 1059 1061 */ 1060 1062 static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) 1061 1063 { 1062 - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 1064 + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 1063 1065 return ext4_ext_calc_metadata_amount(inode, lblock); 1064 1066 1065 1067 return ext4_indirect_calc_metadata_amount(inode, lblock); ··· 1074 1076 { 1075 1077 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1076 1078 struct ext4_inode_info *ei = EXT4_I(inode); 1077 - int mdb_free = 0, allocated_meta_blocks = 0; 1078 1079 1079 1080 spin_lock(&ei->i_block_reservation_lock); 1080 1081 trace_ext4_da_update_reserve_space(inode, used); ··· 1088 1091 1089 1092 /* Update per-inode reservations */ 1090 1093 ei->i_reserved_data_blocks -= used; 1091 - used += ei->i_allocated_meta_blocks; 1092 1094 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; 1093 - allocated_meta_blocks = ei->i_allocated_meta_blocks; 1095 + percpu_counter_sub(&sbi->s_dirtyblocks_counter, 1096 + used + ei->i_allocated_meta_blocks); 1094 1097 ei->i_allocated_meta_blocks = 0; 1095 - percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); 1096 1098 1097 1099 if (ei->i_reserved_data_blocks == 0) { 1098 1100 /* ··· 1099 1103 * only when we have written all of the delayed 1100 1104 * allocation blocks. 1101 1105 */ 1102 - mdb_free = ei->i_reserved_meta_blocks; 1106 + percpu_counter_sub(&sbi->s_dirtyblocks_counter, 1107 + ei->i_reserved_meta_blocks); 1103 1108 ei->i_reserved_meta_blocks = 0; 1104 1109 ei->i_da_metadata_calc_len = 0; 1105 - percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); 1106 1110 } 1107 1111 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1108 1112 1109 - /* Update quota subsystem */ 1110 - if (quota_claim) { 1113 + /* Update quota subsystem for data blocks */ 1114 + if (quota_claim) 1111 1115 dquot_claim_block(inode, used); 1112 - if (mdb_free) 1113 - dquot_release_reservation_block(inode, mdb_free); 1114 - } else { 1116 + else { 1115 1117 /* 1116 1118 * We did fallocate with an offset that is already delayed 1117 1119 * allocated. So on delayed allocated writeback we should 1118 - * not update the quota for allocated blocks. But then 1119 - * converting an fallocate region to initialized region would 1120 - * have caused a metadata allocation. So claim quota for 1121 - * that 1120 + * not re-claim the quota for fallocated blocks. 1122 1121 */ 1123 - if (allocated_meta_blocks) 1124 - dquot_claim_block(inode, allocated_meta_blocks); 1125 - dquot_release_reservation_block(inode, mdb_free + used); 1122 + dquot_release_reservation_block(inode, used); 1126 1123 } 1127 1124 1128 1125 /* ··· 1128 1139 ext4_discard_preallocations(inode); 1129 1140 } 1130 1141 1131 - static int check_block_validity(struct inode *inode, const char *msg, 1132 - sector_t logical, sector_t phys, int len) 1142 + static int check_block_validity(struct inode *inode, const char *func, 1143 + struct ext4_map_blocks *map) 1133 1144 { 1134 - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { 1135 - __ext4_error(inode->i_sb, msg, 1136 - "inode #%lu logical block %llu mapped to %llu " 1137 - "(size %d)", inode->i_ino, 1138 - (unsigned long long) logical, 1139 - (unsigned long long) phys, len); 1145 + if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, 1146 + map->m_len)) { 1147 + ext4_error_inode(func, inode, 1148 + "lblock %lu mapped to illegal pblock %llu " 1149 + "(length %d)", (unsigned long) map->m_lblk, 1150 + map->m_pblk, map->m_len); 1140 1151 return -EIO; 1141 1152 } 1142 1153 return 0; ··· 1201 1212 } 1202 1213 1203 1214 /* 1204 - * The ext4_get_blocks() function tries to look up the requested blocks, 1215 + * The ext4_map_blocks() function tries to look up the requested blocks, 1205 1216 * and returns if the blocks are already mapped. 1206 1217 * 1207 1218 * Otherwise it takes the write lock of the i_data_sem and allocate blocks 1208 1219 * and store the allocated blocks in the result buffer head and mark it 1209 1220 * mapped. 1210 1221 * 1211 - * If file type is extents based, it will call ext4_ext_get_blocks(), 1212 - * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping 1222 + * If file type is extents based, it will call ext4_ext_map_blocks(), 1223 + * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping 1213 1224 * based files 1214 1225 * 1215 1226 * On success, it returns the number of blocks being mapped or allocate. ··· 1222 1233 * 1223 1234 * It returns the error in case of allocation failure. 1224 1235 */ 1225 - int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, 1226 - unsigned int max_blocks, struct buffer_head *bh, 1227 - int flags) 1236 + int ext4_map_blocks(handle_t *handle, struct inode *inode, 1237 + struct ext4_map_blocks *map, int flags) 1228 1238 { 1229 1239 int retval; 1230 1240 1231 - clear_buffer_mapped(bh); 1232 - clear_buffer_unwritten(bh); 1233 - 1234 - ext_debug("ext4_get_blocks(): inode %lu, flag %d, max_blocks %u," 1235 - "logical block %lu\n", inode->i_ino, flags, max_blocks, 1236 - (unsigned long)block); 1241 + map->m_flags = 0; 1242 + ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," 1243 + "logical block %lu\n", inode->i_ino, flags, map->m_len, 1244 + (unsigned long) map->m_lblk); 1237 1245 /* 1238 1246 * Try to see if we can get the block without requesting a new 1239 1247 * file system block. 1240 1248 */ 1241 1249 down_read((&EXT4_I(inode)->i_data_sem)); 1242 - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 1243 - retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, 1244 - bh, 0); 1250 + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 1251 + retval = ext4_ext_map_blocks(handle, inode, map, 0); 1245 1252 } else { 1246 - retval = ext4_ind_get_blocks(handle, inode, block, max_blocks, 1247 - bh, 0); 1253 + retval = ext4_ind_map_blocks(handle, inode, map, 0); 1248 1254 } 1249 1255 up_read((&EXT4_I(inode)->i_data_sem)); 1250 1256 1251 - if (retval > 0 && buffer_mapped(bh)) { 1252 - int ret = check_block_validity(inode, "file system corruption", 1253 - block, bh->b_blocknr, retval); 1257 + if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 1258 + int ret = check_block_validity(inode, __func__, map); 1254 1259 if (ret != 0) 1255 1260 return ret; 1256 1261 } ··· 1260 1277 * ext4_ext_get_block() returns th create = 0 1261 1278 * with buffer head unmapped. 1262 1279 */ 1263 - if (retval > 0 && buffer_mapped(bh)) 1280 + if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) 1264 1281 return retval; 1265 1282 1266 1283 /* ··· 1273 1290 * of BH_Unwritten and BH_Mapped flags being simultaneously 1274 1291 * set on the buffer_head. 1275 1292 */ 1276 - clear_buffer_unwritten(bh); 1293 + map->m_flags &= ~EXT4_MAP_UNWRITTEN; 1277 1294 1278 1295 /* 1279 1296 * New blocks allocate and/or writing to uninitialized extent ··· 1295 1312 * We need to check for EXT4 here because migrate 1296 1313 * could have changed the inode type in between 1297 1314 */ 1298 - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 1299 - retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, 1300 - bh, flags); 1315 + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 1316 + retval = ext4_ext_map_blocks(handle, inode, map, flags); 1301 1317 } else { 1302 - retval = ext4_ind_get_blocks(handle, inode, block, 1303 - max_blocks, bh, flags); 1318 + retval = ext4_ind_map_blocks(handle, inode, map, flags); 1304 1319 1305 - if (retval > 0 && buffer_new(bh)) { 1320 + if (retval > 0 && map->m_flags & EXT4_MAP_NEW) { 1306 1321 /* 1307 1322 * We allocated new blocks which will result in 1308 1323 * i_data's format changing. Force the migrate ··· 1323 1342 EXT4_I(inode)->i_delalloc_reserved_flag = 0; 1324 1343 1325 1344 up_write((&EXT4_I(inode)->i_data_sem)); 1326 - if (retval > 0 && buffer_mapped(bh)) { 1327 - int ret = check_block_validity(inode, "file system " 1328 - "corruption after allocation", 1329 - block, bh->b_blocknr, retval); 1345 + if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 1346 + int ret = check_block_validity(inode, 1347 + "ext4_map_blocks_after_alloc", 1348 + map); 1330 1349 if (ret != 0) 1331 1350 return ret; 1332 1351 } ··· 1336 1355 /* Maximum number of blocks we map for direct IO at once. */ 1337 1356 #define DIO_MAX_BLOCKS 4096 1338 1357 1339 - int ext4_get_block(struct inode *inode, sector_t iblock, 1340 - struct buffer_head *bh_result, int create) 1358 + static int _ext4_get_block(struct inode *inode, sector_t iblock, 1359 + struct buffer_head *bh, int flags) 1341 1360 { 1342 1361 handle_t *handle = ext4_journal_current_handle(); 1362 + struct ext4_map_blocks map; 1343 1363 int ret = 0, started = 0; 1344 - unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 1345 1364 int dio_credits; 1346 1365 1347 - if (create && !handle) { 1366 + map.m_lblk = iblock; 1367 + map.m_len = bh->b_size >> inode->i_blkbits; 1368 + 1369 + if (flags && !handle) { 1348 1370 /* Direct IO write... */ 1349 - if (max_blocks > DIO_MAX_BLOCKS) 1350 - max_blocks = DIO_MAX_BLOCKS; 1351 - dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); 1371 + if (map.m_len > DIO_MAX_BLOCKS) 1372 + map.m_len = DIO_MAX_BLOCKS; 1373 + dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); 1352 1374 handle = ext4_journal_start(inode, dio_credits); 1353 1375 if (IS_ERR(handle)) { 1354 1376 ret = PTR_ERR(handle); 1355 - goto out; 1377 + return ret; 1356 1378 } 1357 1379 started = 1; 1358 1380 } 1359 1381 1360 - ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, 1361 - create ? EXT4_GET_BLOCKS_CREATE : 0); 1382 + ret = ext4_map_blocks(handle, inode, &map, flags); 1362 1383 if (ret > 0) { 1363 - bh_result->b_size = (ret << inode->i_blkbits); 1384 + map_bh(bh, inode->i_sb, map.m_pblk); 1385 + bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; 1386 + bh->b_size = inode->i_sb->s_blocksize * map.m_len; 1364 1387 ret = 0; 1365 1388 } 1366 1389 if (started) 1367 1390 ext4_journal_stop(handle); 1368 - out: 1369 1391 return ret; 1392 + } 1393 + 1394 + int ext4_get_block(struct inode *inode, sector_t iblock, 1395 + struct buffer_head *bh, int create) 1396 + { 1397 + return _ext4_get_block(inode, iblock, bh, 1398 + create ? EXT4_GET_BLOCKS_CREATE : 0); 1370 1399 } 1371 1400 1372 1401 /* ··· 1385 1394 struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, 1386 1395 ext4_lblk_t block, int create, int *errp) 1387 1396 { 1388 - struct buffer_head dummy; 1397 + struct ext4_map_blocks map; 1398 + struct buffer_head *bh; 1389 1399 int fatal = 0, err; 1390 - int flags = 0; 1391 1400 1392 1401 J_ASSERT(handle != NULL || create == 0); 1393 1402 1394 - dummy.b_state = 0; 1395 - dummy.b_blocknr = -1000; 1396 - buffer_trace_init(&dummy.b_history); 1397 - if (create) 1398 - flags |= EXT4_GET_BLOCKS_CREATE; 1399 - err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags); 1400 - /* 1401 - * ext4_get_blocks() returns number of blocks mapped. 0 in 1402 - * case of a HOLE. 1403 - */ 1404 - if (err > 0) { 1405 - if (err > 1) 1406 - WARN_ON(1); 1407 - err = 0; 1408 - } 1409 - *errp = err; 1410 - if (!err && buffer_mapped(&dummy)) { 1411 - struct buffer_head *bh; 1412 - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); 1413 - if (!bh) { 1414 - *errp = -EIO; 1415 - goto err; 1416 - } 1417 - if (buffer_new(&dummy)) { 1418 - J_ASSERT(create != 0); 1419 - J_ASSERT(handle != NULL); 1403 + map.m_lblk = block; 1404 + map.m_len = 1; 1405 + err = ext4_map_blocks(handle, inode, &map, 1406 + create ? EXT4_GET_BLOCKS_CREATE : 0); 1420 1407 1421 - /* 1422 - * Now that we do not always journal data, we should 1423 - * keep in mind whether this should always journal the 1424 - * new buffer as metadata. For now, regular file 1425 - * writes use ext4_get_block instead, so it's not a 1426 - * problem. 1427 - */ 1428 - lock_buffer(bh); 1429 - BUFFER_TRACE(bh, "call get_create_access"); 1430 - fatal = ext4_journal_get_create_access(handle, bh); 1431 - if (!fatal && !buffer_uptodate(bh)) { 1432 - memset(bh->b_data, 0, inode->i_sb->s_blocksize); 1433 - set_buffer_uptodate(bh); 1434 - } 1435 - unlock_buffer(bh); 1436 - BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 1437 - err = ext4_handle_dirty_metadata(handle, inode, bh); 1438 - if (!fatal) 1439 - fatal = err; 1440 - } else { 1441 - BUFFER_TRACE(bh, "not a new buffer"); 1442 - } 1443 - if (fatal) { 1444 - *errp = fatal; 1445 - brelse(bh); 1446 - bh = NULL; 1447 - } 1448 - return bh; 1408 + if (err < 0) 1409 + *errp = err; 1410 + if (err <= 0) 1411 + return NULL; 1412 + *errp = 0; 1413 + 1414 + bh = sb_getblk(inode->i_sb, map.m_pblk); 1415 + if (!bh) { 1416 + *errp = -EIO; 1417 + return NULL; 1449 1418 } 1450 - err: 1451 - return NULL; 1419 + if (map.m_flags & EXT4_MAP_NEW) { 1420 + J_ASSERT(create != 0); 1421 + J_ASSERT(handle != NULL); 1422 + 1423 + /* 1424 + * Now that we do not always journal data, we should 1425 + * keep in mind whether this should always journal the 1426 + * new buffer as metadata. For now, regular file 1427 + * writes use ext4_get_block instead, so it's not a 1428 + * problem. 1429 + */ 1430 + lock_buffer(bh); 1431 + BUFFER_TRACE(bh, "call get_create_access"); 1432 + fatal = ext4_journal_get_create_access(handle, bh); 1433 + if (!fatal && !buffer_uptodate(bh)) { 1434 + memset(bh->b_data, 0, inode->i_sb->s_blocksize); 1435 + set_buffer_uptodate(bh); 1436 + } 1437 + unlock_buffer(bh); 1438 + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 1439 + err = ext4_handle_dirty_metadata(handle, inode, bh); 1440 + if (!fatal) 1441 + fatal = err; 1442 + } else { 1443 + BUFFER_TRACE(bh, "not a new buffer"); 1444 + } 1445 + if (fatal) { 1446 + *errp = fatal; 1447 + brelse(bh); 1448 + bh = NULL; 1449 + } 1450 + return bh; 1452 1451 } 1453 1452 1454 1453 struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, ··· 1841 1860 int retries = 0; 1842 1861 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1843 1862 struct ext4_inode_info *ei = EXT4_I(inode); 1844 - unsigned long md_needed, md_reserved; 1863 + unsigned long md_needed; 1845 1864 int ret; 1846 1865 1847 1866 /* ··· 1851 1870 */ 1852 1871 repeat: 1853 1872 spin_lock(&ei->i_block_reservation_lock); 1854 - md_reserved = ei->i_reserved_meta_blocks; 1855 1873 md_needed = ext4_calc_metadata_amount(inode, lblock); 1856 1874 trace_ext4_da_reserve_space(inode, md_needed); 1857 1875 spin_unlock(&ei->i_block_reservation_lock); 1858 1876 1859 1877 /* 1860 - * Make quota reservation here to prevent quota overflow 1861 - * later. Real quota accounting is done at pages writeout 1862 - * time. 1878 + * We will charge metadata quota at writeout time; this saves 1879 + * us from metadata over-estimation, though we may go over by 1880 + * a small amount in the end. Here we just reserve for data. 1863 1881 */ 1864 - ret = dquot_reserve_block(inode, md_needed + 1); 1882 + ret = dquot_reserve_block(inode, 1); 1865 1883 if (ret) 1866 1884 return ret; 1867 - 1885 + /* 1886 + * We do still charge estimated metadata to the sb though; 1887 + * we cannot afford to run out of free blocks. 1888 + */ 1868 1889 if (ext4_claim_free_blocks(sbi, md_needed + 1)) { 1869 - dquot_release_reservation_block(inode, md_needed + 1); 1890 + dquot_release_reservation_block(inode, 1); 1870 1891 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1871 1892 yield(); 1872 1893 goto repeat; ··· 1893 1910 1894 1911 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1895 1912 1913 + trace_ext4_da_release_space(inode, to_free); 1896 1914 if (unlikely(to_free > ei->i_reserved_data_blocks)) { 1897 1915 /* 1898 1916 * if there aren't enough reserved blocks, then the ··· 1916 1932 * only when we have written all of the delayed 1917 1933 * allocation blocks. 1918 1934 */ 1919 - to_free += ei->i_reserved_meta_blocks; 1935 + percpu_counter_sub(&sbi->s_dirtyblocks_counter, 1936 + ei->i_reserved_meta_blocks); 1920 1937 ei->i_reserved_meta_blocks = 0; 1921 1938 ei->i_da_metadata_calc_len = 0; 1922 1939 } 1923 1940 1924 - /* update fs dirty blocks counter */ 1941 + /* update fs dirty data blocks counter */ 1925 1942 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); 1926 1943 1927 1944 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); ··· 2027 2042 /* 2028 2043 * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers 2029 2044 * 2030 - * @mpd->inode - inode to walk through 2031 - * @exbh->b_blocknr - first block on a disk 2032 - * @exbh->b_size - amount of space in bytes 2033 - * @logical - first logical block to start assignment with 2034 - * 2035 2045 * the function goes through all passed space and put actual disk 2036 2046 * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten 2037 2047 */ 2038 - static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, 2039 - struct buffer_head *exbh) 2048 + static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, 2049 + struct ext4_map_blocks *map) 2040 2050 { 2041 2051 struct inode *inode = mpd->inode; 2042 2052 struct address_space *mapping = inode->i_mapping; 2043 - int blocks = exbh->b_size >> inode->i_blkbits; 2044 - sector_t pblock = exbh->b_blocknr, cur_logical; 2053 + int blocks = map->m_len; 2054 + sector_t pblock = map->m_pblk, cur_logical; 2045 2055 struct buffer_head *head, *bh; 2046 2056 pgoff_t index, end; 2047 2057 struct pagevec pvec; 2048 2058 int nr_pages, i; 2049 2059 2050 - index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); 2051 - end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); 2060 + index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); 2061 + end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); 2052 2062 cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); 2053 2063 2054 2064 pagevec_init(&pvec, 0); ··· 2070 2090 2071 2091 /* skip blocks out of the range */ 2072 2092 do { 2073 - if (cur_logical >= logical) 2093 + if (cur_logical >= map->m_lblk) 2074 2094 break; 2075 2095 cur_logical++; 2076 2096 } while ((bh = bh->b_this_page) != head); 2077 2097 2078 2098 do { 2079 - if (cur_logical >= logical + blocks) 2099 + if (cur_logical >= map->m_lblk + blocks) 2080 2100 break; 2081 2101 2082 - if (buffer_delay(bh) || 2083 - buffer_unwritten(bh)) { 2102 + if (buffer_delay(bh) || buffer_unwritten(bh)) { 2084 2103 2085 2104 BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); 2086 2105 ··· 2098 2119 } else if (buffer_mapped(bh)) 2099 2120 BUG_ON(bh->b_blocknr != pblock); 2100 2121 2101 - if (buffer_uninit(exbh)) 2122 + if (map->m_flags & EXT4_MAP_UNINIT) 2102 2123 set_buffer_uninit(bh); 2103 2124 cur_logical++; 2104 2125 pblock++; ··· 2108 2129 } 2109 2130 } 2110 2131 2111 - 2112 - /* 2113 - * __unmap_underlying_blocks - just a helper function to unmap 2114 - * set of blocks described by @bh 2115 - */ 2116 - static inline void __unmap_underlying_blocks(struct inode *inode, 2117 - struct buffer_head *bh) 2118 - { 2119 - struct block_device *bdev = inode->i_sb->s_bdev; 2120 - int blocks, i; 2121 - 2122 - blocks = bh->b_size >> inode->i_blkbits; 2123 - for (i = 0; i < blocks; i++) 2124 - unmap_underlying_metadata(bdev, bh->b_blocknr + i); 2125 - } 2126 2132 2127 2133 static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, 2128 2134 sector_t logical, long blk_cnt) ··· 2170 2206 static int mpage_da_map_blocks(struct mpage_da_data *mpd) 2171 2207 { 2172 2208 int err, blks, get_blocks_flags; 2173 - struct buffer_head new; 2209 + struct ext4_map_blocks map; 2174 2210 sector_t next = mpd->b_blocknr; 2175 2211 unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; 2176 2212 loff_t disksize = EXT4_I(mpd->inode)->i_disksize; ··· 2211 2247 * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting 2212 2248 * variables are updated after the blocks have been allocated. 2213 2249 */ 2214 - new.b_state = 0; 2250 + map.m_lblk = next; 2251 + map.m_len = max_blocks; 2215 2252 get_blocks_flags = EXT4_GET_BLOCKS_CREATE; 2216 2253 if (ext4_should_dioread_nolock(mpd->inode)) 2217 2254 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; 2218 2255 if (mpd->b_state & (1 << BH_Delay)) 2219 2256 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; 2220 2257 2221 - blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, 2222 - &new, get_blocks_flags); 2258 + blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); 2223 2259 if (blks < 0) { 2224 2260 err = blks; 2225 2261 /* ··· 2246 2282 ext4_msg(mpd->inode->i_sb, KERN_CRIT, 2247 2283 "delayed block allocation failed for inode %lu at " 2248 2284 "logical offset %llu with max blocks %zd with " 2249 - "error %d\n", mpd->inode->i_ino, 2285 + "error %d", mpd->inode->i_ino, 2250 2286 (unsigned long long) next, 2251 2287 mpd->b_size >> mpd->inode->i_blkbits, err); 2252 2288 printk(KERN_CRIT "This should not happen!! " ··· 2261 2297 } 2262 2298 BUG_ON(blks == 0); 2263 2299 2264 - new.b_size = (blks << mpd->inode->i_blkbits); 2300 + if (map.m_flags & EXT4_MAP_NEW) { 2301 + struct block_device *bdev = mpd->inode->i_sb->s_bdev; 2302 + int i; 2265 2303 2266 - if (buffer_new(&new)) 2267 - __unmap_underlying_blocks(mpd->inode, &new); 2304 + for (i = 0; i < map.m_len; i++) 2305 + unmap_underlying_metadata(bdev, map.m_pblk + i); 2306 + } 2268 2307 2269 2308 /* 2270 2309 * If blocks are delayed marked, we need to ··· 2275 2308 */ 2276 2309 if ((mpd->b_state & (1 << BH_Delay)) || 2277 2310 (mpd->b_state & (1 << BH_Unwritten))) 2278 - mpage_put_bnr_to_bhs(mpd, next, &new); 2311 + mpage_put_bnr_to_bhs(mpd, &map); 2279 2312 2280 2313 if (ext4_should_order_data(mpd->inode)) { 2281 2314 err = ext4_jbd2_file_inode(handle, mpd->inode); ··· 2316 2349 sector_t next; 2317 2350 int nrblocks = mpd->b_size >> mpd->inode->i_blkbits; 2318 2351 2352 + /* 2353 + * XXX Don't go larger than mballoc is willing to allocate 2354 + * This is a stopgap solution. We eventually need to fold 2355 + * mpage_da_submit_io() into this function and then call 2356 + * ext4_get_blocks() multiple times in a loop 2357 + */ 2358 + if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) 2359 + goto flush_it; 2360 + 2319 2361 /* check if thereserved journal credits might overflow */ 2320 - if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { 2362 + if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) { 2321 2363 if (nrblocks >= EXT4_MAX_TRANS_DATA) { 2322 2364 /* 2323 2365 * With non-extent format we are limited by the journal ··· 2399 2423 struct buffer_head *bh, *head; 2400 2424 sector_t logical; 2401 2425 2402 - if (mpd->io_done) { 2403 - /* 2404 - * Rest of the page in the page_vec 2405 - * redirty then and skip then. We will 2406 - * try to write them again after 2407 - * starting a new transaction 2408 - */ 2409 - redirty_page_for_writepage(wbc, page); 2410 - unlock_page(page); 2411 - return MPAGE_DA_EXTENT_TAIL; 2412 - } 2413 2426 /* 2414 2427 * Can we merge this page to current extent? 2415 2428 */ ··· 2493 2528 * initialized properly. 2494 2529 */ 2495 2530 static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, 2496 - struct buffer_head *bh_result, int create) 2531 + struct buffer_head *bh, int create) 2497 2532 { 2533 + struct ext4_map_blocks map; 2498 2534 int ret = 0; 2499 2535 sector_t invalid_block = ~((sector_t) 0xffff); 2500 2536 ··· 2503 2537 invalid_block = ~0; 2504 2538 2505 2539 BUG_ON(create == 0); 2506 - BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); 2540 + BUG_ON(bh->b_size != inode->i_sb->s_blocksize); 2541 + 2542 + map.m_lblk = iblock; 2543 + map.m_len = 1; 2507 2544 2508 2545 /* 2509 2546 * first, we need to know whether the block is allocated already 2510 2547 * preallocated blocks are unmapped but should treated 2511 2548 * the same as allocated blocks. 2512 2549 */ 2513 - ret = ext4_get_blocks(NULL, inode, iblock, 1, bh_result, 0); 2514 - if ((ret == 0) && !buffer_delay(bh_result)) { 2515 - /* the block isn't (pre)allocated yet, let's reserve space */ 2550 + ret = ext4_map_blocks(NULL, inode, &map, 0); 2551 + if (ret < 0) 2552 + return ret; 2553 + if (ret == 0) { 2554 + if (buffer_delay(bh)) 2555 + return 0; /* Not sure this could or should happen */ 2516 2556 /* 2517 2557 * XXX: __block_prepare_write() unmaps passed block, 2518 2558 * is it OK? ··· 2528 2556 /* not enough space to reserve */ 2529 2557 return ret; 2530 2558 2531 - map_bh(bh_result, inode->i_sb, invalid_block); 2532 - set_buffer_new(bh_result); 2533 - set_buffer_delay(bh_result); 2534 - } else if (ret > 0) { 2535 - bh_result->b_size = (ret << inode->i_blkbits); 2536 - if (buffer_unwritten(bh_result)) { 2537 - /* A delayed write to unwritten bh should 2538 - * be marked new and mapped. Mapped ensures 2539 - * that we don't do get_block multiple times 2540 - * when we write to the same offset and new 2541 - * ensures that we do proper zero out for 2542 - * partial write. 2543 - */ 2544 - set_buffer_new(bh_result); 2545 - set_buffer_mapped(bh_result); 2546 - } 2547 - ret = 0; 2559 + map_bh(bh, inode->i_sb, invalid_block); 2560 + set_buffer_new(bh); 2561 + set_buffer_delay(bh); 2562 + return 0; 2548 2563 } 2549 2564 2550 - return ret; 2565 + map_bh(bh, inode->i_sb, map.m_pblk); 2566 + bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; 2567 + 2568 + if (buffer_unwritten(bh)) { 2569 + /* A delayed write to unwritten bh should be marked 2570 + * new and mapped. Mapped ensures that we don't do 2571 + * get_block multiple times when we write to the same 2572 + * offset and new ensures that we do proper zero out 2573 + * for partial write. 2574 + */ 2575 + set_buffer_new(bh); 2576 + set_buffer_mapped(bh); 2577 + } 2578 + return 0; 2551 2579 } 2552 2580 2553 2581 /* ··· 2569 2597 static int noalloc_get_block_write(struct inode *inode, sector_t iblock, 2570 2598 struct buffer_head *bh_result, int create) 2571 2599 { 2572 - int ret = 0; 2573 - unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 2574 - 2575 2600 BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); 2576 - 2577 - /* 2578 - * we don't want to do block allocation in writepage 2579 - * so call get_block_wrap with create = 0 2580 - */ 2581 - ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); 2582 - if (ret > 0) { 2583 - bh_result->b_size = (ret << inode->i_blkbits); 2584 - ret = 0; 2585 - } 2586 - return ret; 2601 + return _ext4_get_block(inode, iblock, bh_result, 0); 2587 2602 } 2588 2603 2589 2604 static int bget_one(handle_t *handle, struct buffer_head *bh) ··· 2780 2821 * number of contiguous block. So we will limit 2781 2822 * number of contiguous block to a sane value 2782 2823 */ 2783 - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) && 2824 + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) && 2784 2825 (max_blocks > EXT4_MAX_TRANS_DATA)) 2785 2826 max_blocks = EXT4_MAX_TRANS_DATA; 2786 2827 2787 2828 return ext4_chunk_trans_blocks(inode, max_blocks); 2788 2829 } 2830 + 2831 + /* 2832 + * write_cache_pages_da - walk the list of dirty pages of the given 2833 + * address space and call the callback function (which usually writes 2834 + * the pages). 2835 + * 2836 + * This is a forked version of write_cache_pages(). Differences: 2837 + * Range cyclic is ignored. 2838 + * no_nrwrite_index_update is always presumed true 2839 + */ 2840 + static int write_cache_pages_da(struct address_space *mapping, 2841 + struct writeback_control *wbc, 2842 + struct mpage_da_data *mpd) 2843 + { 2844 + int ret = 0; 2845 + int done = 0; 2846 + struct pagevec pvec; 2847 + int nr_pages; 2848 + pgoff_t index; 2849 + pgoff_t end; /* Inclusive */ 2850 + long nr_to_write = wbc->nr_to_write; 2851 + 2852 + pagevec_init(&pvec, 0); 2853 + index = wbc->range_start >> PAGE_CACHE_SHIFT; 2854 + end = wbc->range_end >> PAGE_CACHE_SHIFT; 2855 + 2856 + while (!done && (index <= end)) { 2857 + int i; 2858 + 2859 + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 2860 + PAGECACHE_TAG_DIRTY, 2861 + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 2862 + if (nr_pages == 0) 2863 + break; 2864 + 2865 + for (i = 0; i < nr_pages; i++) { 2866 + struct page *page = pvec.pages[i]; 2867 + 2868 + /* 2869 + * At this point, the page may be truncated or 2870 + * invalidated (changing page->mapping to NULL), or 2871 + * even swizzled back from swapper_space to tmpfs file 2872 + * mapping. However, page->index will not change 2873 + * because we have a reference on the page. 2874 + */ 2875 + if (page->index > end) { 2876 + done = 1; 2877 + break; 2878 + } 2879 + 2880 + lock_page(page); 2881 + 2882 + /* 2883 + * Page truncated or invalidated. We can freely skip it 2884 + * then, even for data integrity operations: the page 2885 + * has disappeared concurrently, so there could be no 2886 + * real expectation of this data interity operation 2887 + * even if there is now a new, dirty page at the same 2888 + * pagecache address. 2889 + */ 2890 + if (unlikely(page->mapping != mapping)) { 2891 + continue_unlock: 2892 + unlock_page(page); 2893 + continue; 2894 + } 2895 + 2896 + if (!PageDirty(page)) { 2897 + /* someone wrote it for us */ 2898 + goto continue_unlock; 2899 + } 2900 + 2901 + if (PageWriteback(page)) { 2902 + if (wbc->sync_mode != WB_SYNC_NONE) 2903 + wait_on_page_writeback(page); 2904 + else 2905 + goto continue_unlock; 2906 + } 2907 + 2908 + BUG_ON(PageWriteback(page)); 2909 + if (!clear_page_dirty_for_io(page)) 2910 + goto continue_unlock; 2911 + 2912 + ret = __mpage_da_writepage(page, wbc, mpd); 2913 + if (unlikely(ret)) { 2914 + if (ret == AOP_WRITEPAGE_ACTIVATE) { 2915 + unlock_page(page); 2916 + ret = 0; 2917 + } else { 2918 + done = 1; 2919 + break; 2920 + } 2921 + } 2922 + 2923 + if (nr_to_write > 0) { 2924 + nr_to_write--; 2925 + if (nr_to_write == 0 && 2926 + wbc->sync_mode == WB_SYNC_NONE) { 2927 + /* 2928 + * We stop writing back only if we are 2929 + * not doing integrity sync. In case of 2930 + * integrity sync we have to keep going 2931 + * because someone may be concurrently 2932 + * dirtying pages, and we might have 2933 + * synced a lot of newly appeared dirty 2934 + * pages, but have not synced all of the 2935 + * old dirty pages. 2936 + */ 2937 + done = 1; 2938 + break; 2939 + } 2940 + } 2941 + } 2942 + pagevec_release(&pvec); 2943 + cond_resched(); 2944 + } 2945 + return ret; 2946 + } 2947 + 2789 2948 2790 2949 static int ext4_da_writepages(struct address_space *mapping, 2791 2950 struct writeback_control *wbc) ··· 2913 2836 handle_t *handle = NULL; 2914 2837 struct mpage_da_data mpd; 2915 2838 struct inode *inode = mapping->host; 2916 - int no_nrwrite_index_update; 2917 2839 int pages_written = 0; 2918 2840 long pages_skipped; 2919 2841 unsigned int max_pages; ··· 2992 2916 mpd.wbc = wbc; 2993 2917 mpd.inode = mapping->host; 2994 2918 2995 - /* 2996 - * we don't want write_cache_pages to update 2997 - * nr_to_write and writeback_index 2998 - */ 2999 - no_nrwrite_index_update = wbc->no_nrwrite_index_update; 3000 - wbc->no_nrwrite_index_update = 1; 3001 2919 pages_skipped = wbc->pages_skipped; 3002 2920 3003 2921 retry: ··· 3011 2941 if (IS_ERR(handle)) { 3012 2942 ret = PTR_ERR(handle); 3013 2943 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " 3014 - "%ld pages, ino %lu; err %d\n", __func__, 2944 + "%ld pages, ino %lu; err %d", __func__, 3015 2945 wbc->nr_to_write, inode->i_ino, ret); 3016 2946 goto out_writepages; 3017 2947 } ··· 3033 2963 mpd.io_done = 0; 3034 2964 mpd.pages_written = 0; 3035 2965 mpd.retval = 0; 3036 - ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, 3037 - &mpd); 2966 + ret = write_cache_pages_da(mapping, wbc, &mpd); 3038 2967 /* 3039 2968 * If we have a contiguous extent of pages and we 3040 2969 * haven't done the I/O yet, map the blocks and submit ··· 3085 3016 if (pages_skipped != wbc->pages_skipped) 3086 3017 ext4_msg(inode->i_sb, KERN_CRIT, 3087 3018 "This should not happen leaving %s " 3088 - "with nr_to_write = %ld ret = %d\n", 3019 + "with nr_to_write = %ld ret = %d", 3089 3020 __func__, wbc->nr_to_write, ret); 3090 3021 3091 3022 /* Update index */ ··· 3099 3030 mapping->writeback_index = index; 3100 3031 3101 3032 out_writepages: 3102 - if (!no_nrwrite_index_update) 3103 - wbc->no_nrwrite_index_update = 0; 3104 3033 wbc->nr_to_write -= nr_to_writebump; 3105 3034 wbc->range_start = range_start; 3106 3035 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); ··· 3143 3076 loff_t pos, unsigned len, unsigned flags, 3144 3077 struct page **pagep, void **fsdata) 3145 3078 { 3146 - int ret, retries = 0, quota_retries = 0; 3079 + int ret, retries = 0; 3147 3080 struct page *page; 3148 3081 pgoff_t index; 3149 3082 unsigned from, to; ··· 3202 3135 3203 3136 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3204 3137 goto retry; 3205 - 3206 - if ((ret == -EDQUOT) && 3207 - EXT4_I(inode)->i_reserved_meta_blocks && 3208 - (quota_retries++ < 3)) { 3209 - /* 3210 - * Since we often over-estimate the number of meta 3211 - * data blocks required, we may sometimes get a 3212 - * spurios out of quota error even though there would 3213 - * be enough space once we write the data blocks and 3214 - * find out how many meta data blocks were _really_ 3215 - * required. So try forcing the inode write to see if 3216 - * that helps. 3217 - */ 3218 - write_inode_now(inode, (quota_retries == 3)); 3219 - goto retry; 3220 - } 3221 3138 out: 3222 3139 return ret; 3223 3140 } ··· 3597 3546 return ret; 3598 3547 } 3599 3548 3549 + /* 3550 + * ext4_get_block used when preparing for a DIO write or buffer write. 3551 + * We allocate an uinitialized extent if blocks haven't been allocated. 3552 + * The extent will be converted to initialized after the IO is complete. 3553 + */ 3600 3554 static int ext4_get_block_write(struct inode *inode, sector_t iblock, 3601 3555 struct buffer_head *bh_result, int create) 3602 3556 { 3603 - handle_t *handle = ext4_journal_current_handle(); 3604 - int ret = 0; 3605 - unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 3606 - int dio_credits; 3607 - int started = 0; 3608 - 3609 3557 ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", 3610 3558 inode->i_ino, create); 3611 - /* 3612 - * ext4_get_block in prepare for a DIO write or buffer write. 3613 - * We allocate an uinitialized extent if blocks haven't been allocated. 3614 - * The extent will be converted to initialized after IO complete. 3615 - */ 3616 - create = EXT4_GET_BLOCKS_IO_CREATE_EXT; 3617 - 3618 - if (!handle) { 3619 - if (max_blocks > DIO_MAX_BLOCKS) 3620 - max_blocks = DIO_MAX_BLOCKS; 3621 - dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); 3622 - handle = ext4_journal_start(inode, dio_credits); 3623 - if (IS_ERR(handle)) { 3624 - ret = PTR_ERR(handle); 3625 - goto out; 3626 - } 3627 - started = 1; 3628 - } 3629 - 3630 - ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, 3631 - create); 3632 - if (ret > 0) { 3633 - bh_result->b_size = (ret << inode->i_blkbits); 3634 - ret = 0; 3635 - } 3636 - if (started) 3637 - ext4_journal_stop(handle); 3638 - out: 3639 - return ret; 3559 + return _ext4_get_block(inode, iblock, bh_result, 3560 + EXT4_GET_BLOCKS_IO_CREATE_EXT); 3640 3561 } 3641 3562 3642 3563 static void dump_completed_IO(struct inode * inode) ··· 3996 3973 struct file *file = iocb->ki_filp; 3997 3974 struct inode *inode = file->f_mapping->host; 3998 3975 3999 - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 3976 + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 4000 3977 return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); 4001 3978 4002 3979 return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); ··· 4325 4302 4326 4303 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, 4327 4304 count)) { 4328 - ext4_error(inode->i_sb, "inode #%lu: " 4329 - "attempt to clear blocks %llu len %lu, invalid", 4330 - inode->i_ino, (unsigned long long) block_to_free, 4331 - count); 4305 + EXT4_ERROR_INODE(inode, "attempt to clear invalid " 4306 + "blocks %llu len %lu", 4307 + (unsigned long long) block_to_free, count); 4332 4308 return 1; 4333 4309 } 4334 4310 ··· 4432 4410 if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) 4433 4411 ext4_handle_dirty_metadata(handle, inode, this_bh); 4434 4412 else 4435 - ext4_error(inode->i_sb, 4436 - "circular indirect block detected, " 4437 - "inode=%lu, block=%llu", 4438 - inode->i_ino, 4439 - (unsigned long long) this_bh->b_blocknr); 4413 + EXT4_ERROR_INODE(inode, 4414 + "circular indirect block detected at " 4415 + "block %llu", 4416 + (unsigned long long) this_bh->b_blocknr); 4440 4417 } 4441 4418 } 4442 4419 ··· 4473 4452 4474 4453 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), 4475 4454 nr, 1)) { 4476 - ext4_error(inode->i_sb, 4477 - "indirect mapped block in inode " 4478 - "#%lu invalid (level %d, blk #%lu)", 4479 - inode->i_ino, depth, 4480 - (unsigned long) nr); 4455 + EXT4_ERROR_INODE(inode, 4456 + "invalid indirect mapped " 4457 + "block %lu (level %d)", 4458 + (unsigned long) nr, depth); 4481 4459 break; 4482 4460 } 4483 4461 ··· 4488 4468 * (should be rare). 4489 4469 */ 4490 4470 if (!bh) { 4491 - ext4_error(inode->i_sb, 4492 - "Read failure, inode=%lu, block=%llu", 4493 - inode->i_ino, nr); 4471 + EXT4_ERROR_INODE(inode, 4472 + "Read failure block=%llu", 4473 + (unsigned long long) nr); 4494 4474 continue; 4495 4475 } 4496 4476 ··· 4632 4612 if (!ext4_can_truncate(inode)) 4633 4613 return; 4634 4614 4635 - EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; 4615 + ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 4636 4616 4637 4617 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) 4638 4618 ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); 4639 4619 4640 - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 4620 + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 4641 4621 ext4_ext_truncate(inode); 4642 4622 return; 4643 4623 } ··· 4805 4785 4806 4786 bh = sb_getblk(sb, block); 4807 4787 if (!bh) { 4808 - ext4_error(sb, "unable to read inode block - " 4809 - "inode=%lu, block=%llu", inode->i_ino, block); 4788 + EXT4_ERROR_INODE(inode, "unable to read inode block - " 4789 + "block %llu", block); 4810 4790 return -EIO; 4811 4791 } 4812 4792 if (!buffer_uptodate(bh)) { ··· 4904 4884 submit_bh(READ_META, bh); 4905 4885 wait_on_buffer(bh); 4906 4886 if (!buffer_uptodate(bh)) { 4907 - ext4_error(sb, "unable to read inode block - inode=%lu," 4908 - " block=%llu", inode->i_ino, block); 4887 + EXT4_ERROR_INODE(inode, "unable to read inode " 4888 + "block %llu", block); 4909 4889 brelse(bh); 4910 4890 return -EIO; 4911 4891 } ··· 5116 5096 ret = 0; 5117 5097 if (ei->i_file_acl && 5118 5098 !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { 5119 - ext4_error(sb, "bad extended attribute block %llu inode #%lu", 5120 - ei->i_file_acl, inode->i_ino); 5099 + EXT4_ERROR_INODE(inode, "bad extended attribute block %llu", 5100 + ei->i_file_acl); 5121 5101 ret = -EIO; 5122 5102 goto bad_inode; 5123 5103 } else if (ei->i_flags & EXT4_EXTENTS_FL) { ··· 5162 5142 new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 5163 5143 } else { 5164 5144 ret = -EIO; 5165 - ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu", 5166 - inode->i_mode, inode->i_ino); 5145 + EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode); 5167 5146 goto bad_inode; 5168 5147 } 5169 5148 brelse(iloc.bh); ··· 5400 5381 if (wbc->sync_mode == WB_SYNC_ALL) 5401 5382 sync_dirty_buffer(iloc.bh); 5402 5383 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { 5403 - ext4_error(inode->i_sb, "IO error syncing inode, " 5404 - "inode=%lu, block=%llu", inode->i_ino, 5405 - (unsigned long long)iloc.bh->b_blocknr); 5384 + EXT4_ERROR_INODE(inode, 5385 + "IO error syncing inode (block=%llu)", 5386 + (unsigned long long) iloc.bh->b_blocknr); 5406 5387 err = -EIO; 5407 5388 } 5408 5389 brelse(iloc.bh); ··· 5474 5455 } 5475 5456 5476 5457 if (attr->ia_valid & ATTR_SIZE) { 5477 - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) { 5458 + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 5478 5459 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 5479 5460 5480 5461 if (attr->ia_size > sbi->s_bitmap_maxbytes) { ··· 5487 5468 if (S_ISREG(inode->i_mode) && 5488 5469 attr->ia_valid & ATTR_SIZE && 5489 5470 (attr->ia_size < inode->i_size || 5490 - (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) { 5471 + (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) { 5491 5472 handle_t *handle; 5492 5473 5493 5474 handle = ext4_journal_start(inode, 3); ··· 5519 5500 } 5520 5501 } 5521 5502 /* ext4_truncate will clear the flag */ 5522 - if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) 5503 + if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) 5523 5504 ext4_truncate(inode); 5524 5505 } 5525 5506 ··· 5595 5576 5596 5577 static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) 5597 5578 { 5598 - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) 5579 + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 5599 5580 return ext4_indirect_trans_blocks(inode, nrblocks, chunk); 5600 5581 return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); 5601 5582 } ··· 5930 5911 */ 5931 5912 5932 5913 if (val) 5933 - EXT4_I(inode)->i_flags |= EXT4_JOURNAL_DATA_FL; 5914 + ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 5934 5915 else 5935 - EXT4_I(inode)->i_flags &= ~EXT4_JOURNAL_DATA_FL; 5916 + ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 5936 5917 ext4_set_aops(inode); 5937 5918 5938 5919 jbd2_journal_unlock_updates(journal);

+25 -2

fs/ext4/ioctl.c

··· 258 258 if (me.moved_len > 0) 259 259 file_remove_suid(donor_filp); 260 260 261 - if (copy_to_user((struct move_extent __user *)arg, 261 + if (copy_to_user((struct move_extent __user *)arg, 262 262 &me, sizeof(me))) 263 263 err = -EFAULT; 264 264 mext_out: ··· 373 373 case EXT4_IOC32_SETRSVSZ: 374 374 cmd = EXT4_IOC_SETRSVSZ; 375 375 break; 376 - case EXT4_IOC_GROUP_ADD: 376 + case EXT4_IOC32_GROUP_ADD: { 377 + struct compat_ext4_new_group_input __user *uinput; 378 + struct ext4_new_group_input input; 379 + mm_segment_t old_fs; 380 + int err; 381 + 382 + uinput = compat_ptr(arg); 383 + err = get_user(input.group, &uinput->group); 384 + err |= get_user(input.block_bitmap, &uinput->block_bitmap); 385 + err |= get_user(input.inode_bitmap, &uinput->inode_bitmap); 386 + err |= get_user(input.inode_table, &uinput->inode_table); 387 + err |= get_user(input.blocks_count, &uinput->blocks_count); 388 + err |= get_user(input.reserved_blocks, 389 + &uinput->reserved_blocks); 390 + if (err) 391 + return -EFAULT; 392 + old_fs = get_fs(); 393 + set_fs(KERNEL_DS); 394 + err = ext4_ioctl(file, EXT4_IOC_GROUP_ADD, 395 + (unsigned long) &input); 396 + set_fs(old_fs); 397 + return err; 398 + } 399 + case EXT4_IOC_MOVE_EXT: 377 400 break; 378 401 default: 379 402 return -ENOIOCTLCMD;

+85 -35

fs/ext4/mballoc.c

··· 658 658 } 659 659 } 660 660 661 + /* 662 + * Cache the order of the largest free extent we have available in this block 663 + * group. 664 + */ 665 + static void 666 + mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp) 667 + { 668 + int i; 669 + int bits; 670 + 671 + grp->bb_largest_free_order = -1; /* uninit */ 672 + 673 + bits = sb->s_blocksize_bits + 1; 674 + for (i = bits; i >= 0; i--) { 675 + if (grp->bb_counters[i] > 0) { 676 + grp->bb_largest_free_order = i; 677 + break; 678 + } 679 + } 680 + } 681 + 661 682 static noinline_for_stack 662 683 void ext4_mb_generate_buddy(struct super_block *sb, 663 684 void *buddy, void *bitmap, ext4_group_t group) ··· 721 700 */ 722 701 grp->bb_free = free; 723 702 } 703 + mb_set_largest_free_order(sb, grp); 724 704 725 705 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); 726 706 ··· 747 725 * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize) blocks. 748 726 * So it can have information regarding groups_per_page which 749 727 * is blocks_per_page/2 728 + * 729 + * Locking note: This routine takes the block group lock of all groups 730 + * for this page; do not hold this lock when calling this routine! 750 731 */ 751 732 752 733 static int ext4_mb_init_cache(struct page *page, char *incore) ··· 890 865 BUG_ON(incore == NULL); 891 866 mb_debug(1, "put buddy for group %u in page %lu/%x\n", 892 867 group, page->index, i * blocksize); 868 + trace_ext4_mb_buddy_bitmap_load(sb, group); 893 869 grinfo = ext4_get_group_info(sb, group); 894 870 grinfo->bb_fragments = 0; 895 871 memset(grinfo->bb_counters, 0, ··· 908 882 BUG_ON(incore != NULL); 909 883 mb_debug(1, "put bitmap for group %u in page %lu/%x\n", 910 884 group, page->index, i * blocksize); 885 + trace_ext4_mb_bitmap_load(sb, group); 911 886 912 887 /* see comments in ext4_mb_put_pa() */ 913 888 ext4_lock_group(sb, group); ··· 937 910 return err; 938 911 } 939 912 913 + /* 914 + * Locking note: This routine calls ext4_mb_init_cache(), which takes the 915 + * block group lock of all groups for this page; do not hold the BG lock when 916 + * calling this routine! 917 + */ 940 918 static noinline_for_stack 941 919 int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) 942 920 { ··· 1036 1004 return ret; 1037 1005 } 1038 1006 1007 + /* 1008 + * Locking note: This routine calls ext4_mb_init_cache(), which takes the 1009 + * block group lock of all groups for this page; do not hold the BG lock when 1010 + * calling this routine! 1011 + */ 1039 1012 static noinline_for_stack int 1040 1013 ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, 1041 1014 struct ext4_buddy *e4b) ··· 1187 1150 return ret; 1188 1151 } 1189 1152 1190 - static void ext4_mb_release_desc(struct ext4_buddy *e4b) 1153 + static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) 1191 1154 { 1192 1155 if (e4b->bd_bitmap_page) 1193 1156 page_cache_release(e4b->bd_bitmap_page); ··· 1336 1299 buddy = buddy2; 1337 1300 } while (1); 1338 1301 } 1302 + mb_set_largest_free_order(sb, e4b->bd_info); 1339 1303 mb_check_buddy(e4b); 1340 1304 } 1341 1305 ··· 1465 1427 e4b->bd_info->bb_counters[ord]++; 1466 1428 e4b->bd_info->bb_counters[ord]++; 1467 1429 } 1430 + mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); 1468 1431 1469 1432 mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); 1470 1433 mb_check_buddy(e4b); ··· 1656 1617 } 1657 1618 1658 1619 ext4_unlock_group(ac->ac_sb, group); 1659 - ext4_mb_release_desc(e4b); 1620 + ext4_mb_unload_buddy(e4b); 1660 1621 1661 1622 return 0; 1662 1623 } ··· 1711 1672 ext4_mb_use_best_found(ac, e4b); 1712 1673 } 1713 1674 ext4_unlock_group(ac->ac_sb, group); 1714 - ext4_mb_release_desc(e4b); 1675 + ext4_mb_unload_buddy(e4b); 1715 1676 1716 1677 return 0; 1717 1678 } ··· 1860 1821 } 1861 1822 } 1862 1823 1824 + /* This is now called BEFORE we load the buddy bitmap. */ 1863 1825 static int ext4_mb_good_group(struct ext4_allocation_context *ac, 1864 1826 ext4_group_t group, int cr) 1865 1827 { 1866 1828 unsigned free, fragments; 1867 - unsigned i, bits; 1868 1829 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); 1869 1830 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); 1870 1831 1871 1832 BUG_ON(cr < 0 || cr >= 4); 1872 - BUG_ON(EXT4_MB_GRP_NEED_INIT(grp)); 1833 + 1834 + /* We only do this if the grp has never been initialized */ 1835 + if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { 1836 + int ret = ext4_mb_init_group(ac->ac_sb, group); 1837 + if (ret) 1838 + return 0; 1839 + } 1873 1840 1874 1841 free = grp->bb_free; 1875 1842 fragments = grp->bb_fragments; ··· 1888 1843 case 0: 1889 1844 BUG_ON(ac->ac_2order == 0); 1890 1845 1846 + if (grp->bb_largest_free_order < ac->ac_2order) 1847 + return 0; 1848 + 1891 1849 /* Avoid using the first bg of a flexgroup for data files */ 1892 1850 if ((ac->ac_flags & EXT4_MB_HINT_DATA) && 1893 1851 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) && 1894 1852 ((group % flex_size) == 0)) 1895 1853 return 0; 1896 1854 1897 - bits = ac->ac_sb->s_blocksize_bits + 1; 1898 - for (i = ac->ac_2order; i <= bits; i++) 1899 - if (grp->bb_counters[i] > 0) 1900 - return 1; 1901 - break; 1855 + return 1; 1902 1856 case 1: 1903 1857 if ((free / fragments) >= ac->ac_g_ex.fe_len) 1904 1858 return 1; ··· 2008 1964 sbi = EXT4_SB(sb); 2009 1965 ngroups = ext4_get_groups_count(sb); 2010 1966 /* non-extent files are limited to low blocks/groups */ 2011 - if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL)) 1967 + if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))) 2012 1968 ngroups = sbi->s_blockfile_groups; 2013 1969 2014 1970 BUG_ON(ac->ac_status == AC_STATUS_FOUND); ··· 2068 2024 group = ac->ac_g_ex.fe_group; 2069 2025 2070 2026 for (i = 0; i < ngroups; group++, i++) { 2071 - struct ext4_group_info *grp; 2072 - struct ext4_group_desc *desc; 2073 - 2074 2027 if (group == ngroups) 2075 2028 group = 0; 2076 2029 2077 - /* quick check to skip empty groups */ 2078 - grp = ext4_get_group_info(sb, group); 2079 - if (grp->bb_free == 0) 2030 + /* This now checks without needing the buddy page */ 2031 + if (!ext4_mb_good_group(ac, group, cr)) 2080 2032 continue; 2081 2033 2082 2034 err = ext4_mb_load_buddy(sb, group, &e4b); ··· 2080 2040 goto out; 2081 2041 2082 2042 ext4_lock_group(sb, group); 2043 + 2044 + /* 2045 + * We need to check again after locking the 2046 + * block group 2047 + */ 2083 2048 if (!ext4_mb_good_group(ac, group, cr)) { 2084 - /* someone did allocation from this group */ 2085 2049 ext4_unlock_group(sb, group); 2086 - ext4_mb_release_desc(&e4b); 2050 + ext4_mb_unload_buddy(&e4b); 2087 2051 continue; 2088 2052 } 2089 2053 2090 2054 ac->ac_groups_scanned++; 2091 - desc = ext4_get_group_desc(sb, group, NULL); 2092 2055 if (cr == 0) 2093 2056 ext4_mb_simple_scan_group(ac, &e4b); 2094 2057 else if (cr == 1 && ··· 2101 2058 ext4_mb_complex_scan_group(ac, &e4b); 2102 2059 2103 2060 ext4_unlock_group(sb, group); 2104 - ext4_mb_release_desc(&e4b); 2061 + ext4_mb_unload_buddy(&e4b); 2105 2062 2106 2063 if (ac->ac_status != AC_STATUS_CONTINUE) 2107 2064 break; ··· 2191 2148 ext4_lock_group(sb, group); 2192 2149 memcpy(&sg, ext4_get_group_info(sb, group), i); 2193 2150 ext4_unlock_group(sb, group); 2194 - ext4_mb_release_desc(&e4b); 2151 + ext4_mb_unload_buddy(&e4b); 2195 2152 2196 2153 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free, 2197 2154 sg.info.bb_fragments, sg.info.bb_first_free); ··· 2298 2255 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); 2299 2256 init_rwsem(&meta_group_info[i]->alloc_sem); 2300 2257 meta_group_info[i]->bb_free_root = RB_ROOT; 2258 + meta_group_info[i]->bb_largest_free_order = -1; /* uninit */ 2301 2259 2302 2260 #ifdef DOUBLE_CHECK 2303 2261 { ··· 2580 2536 entry->count, entry->group, entry); 2581 2537 2582 2538 if (test_opt(sb, DISCARD)) { 2539 + int ret; 2583 2540 ext4_fsblk_t discard_block; 2584 2541 2585 2542 discard_block = entry->start_blk + ··· 2588 2543 trace_ext4_discard_blocks(sb, 2589 2544 (unsigned long long)discard_block, 2590 2545 entry->count); 2591 - sb_issue_discard(sb, discard_block, entry->count); 2546 + ret = sb_issue_discard(sb, discard_block, entry->count); 2547 + if (ret == EOPNOTSUPP) { 2548 + ext4_warning(sb, 2549 + "discard not supported, disabling"); 2550 + clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); 2551 + } 2592 2552 } 2593 2553 2594 2554 err = ext4_mb_load_buddy(sb, entry->group, &e4b); ··· 2618 2568 } 2619 2569 ext4_unlock_group(sb, entry->group); 2620 2570 kmem_cache_free(ext4_free_ext_cachep, entry); 2621 - ext4_mb_release_desc(&e4b); 2571 + ext4_mb_unload_buddy(&e4b); 2622 2572 } 2623 2573 2624 2574 mb_debug(1, "freed %u blocks in %u structures\n", count, count2); ··· 2691 2641 2692 2642 void exit_ext4_mballoc(void) 2693 2643 { 2694 - /* 2644 + /* 2695 2645 * Wait for completion of call_rcu()'s on ext4_pspace_cachep 2696 2646 * before destroying the slab cache. 2697 2647 */ ··· 3031 2981 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) { 3032 2982 atomic_inc(&sbi->s_bal_reqs); 3033 2983 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated); 3034 - if (ac->ac_o_ex.fe_len >= ac->ac_g_ex.fe_len) 2984 + if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len) 3035 2985 atomic_inc(&sbi->s_bal_success); 3036 2986 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned); 3037 2987 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && ··· 3173 3123 continue; 3174 3124 3175 3125 /* non-extent files can't have physical blocks past 2^32 */ 3176 - if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL) && 3126 + if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) && 3177 3127 pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS) 3178 3128 continue; 3179 3129 ··· 3330 3280 spin_unlock(&pa->pa_lock); 3331 3281 3332 3282 grp_blk = pa->pa_pstart; 3333 - /* 3283 + /* 3334 3284 * If doing group-based preallocation, pa_pstart may be in the 3335 3285 * next group when pa is used up 3336 3286 */ ··· 3747 3697 ext4_unlock_group(sb, group); 3748 3698 if (ac) 3749 3699 kmem_cache_free(ext4_ac_cachep, ac); 3750 - ext4_mb_release_desc(&e4b); 3700 + ext4_mb_unload_buddy(&e4b); 3751 3701 put_bh(bitmap_bh); 3752 3702 return free; 3753 3703 } ··· 3851 3801 if (bitmap_bh == NULL) { 3852 3802 ext4_error(sb, "Error reading block bitmap for %u", 3853 3803 group); 3854 - ext4_mb_release_desc(&e4b); 3804 + ext4_mb_unload_buddy(&e4b); 3855 3805 continue; 3856 3806 } 3857 3807 ··· 3860 3810 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); 3861 3811 ext4_unlock_group(sb, group); 3862 3812 3863 - ext4_mb_release_desc(&e4b); 3813 + ext4_mb_unload_buddy(&e4b); 3864 3814 put_bh(bitmap_bh); 3865 3815 3866 3816 list_del(&pa->u.pa_tmp_list); ··· 4124 4074 ext4_mb_release_group_pa(&e4b, pa, ac); 4125 4075 ext4_unlock_group(sb, group); 4126 4076 4127 - ext4_mb_release_desc(&e4b); 4077 + ext4_mb_unload_buddy(&e4b); 4128 4078 list_del(&pa->u.pa_tmp_list); 4129 4079 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 4130 4080 } ··· 4534 4484 if (!bh) 4535 4485 tbh = sb_find_get_block(inode->i_sb, 4536 4486 block + i); 4537 - ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, 4487 + ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, 4538 4488 inode, tbh, block + i); 4539 4489 } 4540 4490 } 4541 4491 4542 - /* 4492 + /* 4543 4493 * We need to make sure we don't reuse the freed block until 4544 4494 * after the transaction is committed, which we can do by 4545 4495 * treating the block as metadata, below. We make an ··· 4660 4610 atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks); 4661 4611 } 4662 4612 4663 - ext4_mb_release_desc(&e4b); 4613 + ext4_mb_unload_buddy(&e4b); 4664 4614 4665 4615 freed += count; 4666 4616

+1 -1

fs/ext4/migrate.c

··· 475 475 */ 476 476 if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, 477 477 EXT4_FEATURE_INCOMPAT_EXTENTS) || 478 - (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) 478 + (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 479 479 return -EINVAL; 480 480 481 481 if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)

+7 -6

fs/ext4/move_extent.c

··· 482 482 int depth = ext_depth(orig_inode); 483 483 int ret; 484 484 485 + start_ext.ee_block = end_ext.ee_block = 0; 485 486 o_start = o_end = oext = orig_path[depth].p_ext; 486 487 oext_alen = ext4_ext_get_actual_len(oext); 487 488 start_ext.ee_len = end_ext.ee_len = 0; ··· 530 529 * new_ext |-------| 531 530 */ 532 531 if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { 533 - ext4_error(orig_inode->i_sb, 532 + EXT4_ERROR_INODE(orig_inode, 534 533 "new_ext_end(%u) should be less than or equal to " 535 534 "oext->ee_block(%u) + oext_alen(%d) - 1", 536 535 new_ext_end, le32_to_cpu(oext->ee_block), ··· 693 692 while (1) { 694 693 /* The extent for donor must be found. */ 695 694 if (!dext) { 696 - ext4_error(donor_inode->i_sb, 695 + EXT4_ERROR_INODE(donor_inode, 697 696 "The extent for donor must be found"); 698 697 *err = -EIO; 699 698 goto out; 700 699 } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { 701 - ext4_error(donor_inode->i_sb, 700 + EXT4_ERROR_INODE(donor_inode, 702 701 "Donor offset(%u) and the first block of donor " 703 702 "extent(%u) should be equal", 704 703 donor_off, ··· 977 976 } 978 977 979 978 /* Ext4 move extent supports only extent based file */ 980 - if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) { 979 + if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { 981 980 ext4_debug("ext4 move extent: orig file is not extents " 982 981 "based file [ino:orig %lu]\n", orig_inode->i_ino); 983 982 return -EOPNOTSUPP; 984 - } else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) { 983 + } else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) { 985 984 ext4_debug("ext4 move extent: donor file is not extents " 986 985 "based file [ino:donor %lu]\n", donor_inode->i_ino); 987 986 return -EOPNOTSUPP; ··· 1355 1354 if (ret1 < 0) 1356 1355 break; 1357 1356 if (*moved_len > len) { 1358 - ext4_error(orig_inode->i_sb, 1357 + EXT4_ERROR_INODE(orig_inode, 1359 1358 "We replaced blocks too much! " 1360 1359 "sum of replaced: %llu requested: %llu", 1361 1360 *moved_len, len);

+30 -31

fs/ext4/namei.c

··· 187 187 return blocksize; 188 188 return (len & 65532) | ((len & 3) << 16); 189 189 } 190 - 190 + 191 191 __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) 192 192 { 193 193 if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) ··· 197 197 if (len == blocksize) { 198 198 if (blocksize == 65536) 199 199 return cpu_to_le16(EXT4_MAX_REC_LEN); 200 - else 200 + else 201 201 return cpu_to_le16(0); 202 202 } 203 203 return cpu_to_le16((len & 65532) | ((len >> 16) & 3)); ··· 349 349 brelse(bh); 350 350 } 351 351 if (bcount) 352 - printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", 352 + printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", 353 353 levels ? "" : " ", names, space/bcount, 354 354 (space/bcount)*100/blocksize); 355 355 return (struct stats) { names, space, bcount}; ··· 653 653 int ret, err; 654 654 __u32 hashval; 655 655 656 - dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", 656 + dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", 657 657 start_hash, start_minor_hash)); 658 658 dir = dir_file->f_path.dentry->d_inode; 659 - if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { 659 + if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) { 660 660 hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; 661 661 if (hinfo.hash_version <= DX_HASH_TEA) 662 662 hinfo.hash_version += ··· 801 801 { 802 802 if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb, 803 803 EXT4_FEATURE_COMPAT_DIR_INDEX)) 804 - EXT4_I(inode)->i_flags &= ~EXT4_INDEX_FL; 804 + ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); 805 805 } 806 806 807 807 /* ··· 943 943 wait_on_buffer(bh); 944 944 if (!buffer_uptodate(bh)) { 945 945 /* read error, skip block & hope for the best */ 946 - ext4_error(sb, "reading directory #%lu offset %lu", 947 - dir->i_ino, (unsigned long)block); 946 + EXT4_ERROR_INODE(dir, "reading directory lblock %lu", 947 + (unsigned long) block); 948 948 brelse(bh); 949 949 goto next; 950 950 } ··· 1066 1066 __u32 ino = le32_to_cpu(de->inode); 1067 1067 brelse(bh); 1068 1068 if (!ext4_valid_inum(dir->i_sb, ino)) { 1069 - ext4_error(dir->i_sb, "bad inode number: %u", ino); 1069 + EXT4_ERROR_INODE(dir, "bad inode number: %u", ino); 1070 1070 return ERR_PTR(-EIO); 1071 1071 } 1072 1072 inode = ext4_iget(dir->i_sb, ino); 1073 1073 if (unlikely(IS_ERR(inode))) { 1074 1074 if (PTR_ERR(inode) == -ESTALE) { 1075 - ext4_error(dir->i_sb, 1076 - "deleted inode referenced: %u", 1077 - ino); 1075 + EXT4_ERROR_INODE(dir, 1076 + "deleted inode referenced: %u", 1077 + ino); 1078 1078 return ERR_PTR(-EIO); 1079 1079 } else { 1080 1080 return ERR_CAST(inode); ··· 1104 1104 brelse(bh); 1105 1105 1106 1106 if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { 1107 - ext4_error(child->d_inode->i_sb, 1108 - "bad inode number: %u", ino); 1107 + EXT4_ERROR_INODE(child->d_inode, 1108 + "bad parent inode number: %u", ino); 1109 1109 return ERR_PTR(-EIO); 1110 1110 } 1111 1111 ··· 1141 1141 unsigned rec_len = 0; 1142 1142 1143 1143 while (count--) { 1144 - struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) 1144 + struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) 1145 1145 (from + (map->offs<<2)); 1146 1146 rec_len = EXT4_DIR_REC_LEN(de->name_len); 1147 1147 memcpy (to, de, rec_len); ··· 1404 1404 de = (struct ext4_dir_entry_2 *)((char *)fde + 1405 1405 ext4_rec_len_from_disk(fde->rec_len, blocksize)); 1406 1406 if ((char *) de >= (((char *) root) + blocksize)) { 1407 - ext4_error(dir->i_sb, 1408 - "invalid rec_len for '..' in inode %lu", 1409 - dir->i_ino); 1407 + EXT4_ERROR_INODE(dir, "invalid rec_len for '..'"); 1410 1408 brelse(bh); 1411 1409 return -EIO; 1412 1410 } ··· 1416 1418 brelse(bh); 1417 1419 return retval; 1418 1420 } 1419 - EXT4_I(dir)->i_flags |= EXT4_INDEX_FL; 1421 + ext4_set_inode_flag(dir, EXT4_INODE_INDEX); 1420 1422 data1 = bh2->b_data; 1421 1423 1422 1424 memcpy (data1, de, len); ··· 1489 1491 retval = ext4_dx_add_entry(handle, dentry, inode); 1490 1492 if (!retval || (retval != ERR_BAD_DX_DIR)) 1491 1493 return retval; 1492 - EXT4_I(dir)->i_flags &= ~EXT4_INDEX_FL; 1494 + ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); 1493 1495 dx_fallback++; 1494 1496 ext4_mark_inode_dirty(handle, dir); 1495 1497 } ··· 1517 1519 de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); 1518 1520 retval = add_dirent_to_buf(handle, dentry, inode, de, bh); 1519 1521 brelse(bh); 1522 + if (retval == 0) 1523 + ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY); 1520 1524 return retval; 1521 1525 } 1522 1526 ··· 1915 1915 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || 1916 1916 !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { 1917 1917 if (err) 1918 - ext4_error(inode->i_sb, 1919 - "error %d reading directory #%lu offset 0", 1920 - err, inode->i_ino); 1918 + EXT4_ERROR_INODE(inode, 1919 + "error %d reading directory lblock 0", err); 1921 1920 else 1922 1921 ext4_warning(inode->i_sb, 1923 1922 "bad directory (dir #%lu) - no data block", ··· 1940 1941 de = ext4_next_entry(de1, sb->s_blocksize); 1941 1942 while (offset < inode->i_size) { 1942 1943 if (!bh || 1943 - (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { 1944 + (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { 1945 + unsigned int lblock; 1944 1946 err = 0; 1945 1947 brelse(bh); 1946 - bh = ext4_bread(NULL, inode, 1947 - offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); 1948 + lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); 1949 + bh = ext4_bread(NULL, inode, lblock, 0, &err); 1948 1950 if (!bh) { 1949 1951 if (err) 1950 - ext4_error(sb, 1951 - "error %d reading directory" 1952 - " #%lu offset %u", 1953 - err, inode->i_ino, offset); 1952 + EXT4_ERROR_INODE(inode, 1953 + "error %d reading directory " 1954 + "lblock %u", err, lblock); 1954 1955 offset += sb->s_blocksize; 1955 1956 continue; 1956 1957 } ··· 2296 2297 } 2297 2298 } else { 2298 2299 /* clear the extent format for fast symlink */ 2299 - EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; 2300 + ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); 2300 2301 inode->i_op = &ext4_fast_symlink_inode_operations; 2301 2302 memcpy((char *)&EXT4_I(inode)->i_data, symname, l); 2302 2303 inode->i_size = l-1;

+2 -1

fs/ext4/resize.c

··· 911 911 percpu_counter_add(&sbi->s_freeinodes_counter, 912 912 EXT4_INODES_PER_GROUP(sb)); 913 913 914 - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { 914 + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && 915 + sbi->s_log_groups_per_flex) { 915 916 ext4_group_t flex_group; 916 917 flex_group = ext4_flex_group(sbi, input->group); 917 918 atomic_add(input->free_blocks_count,

+44 -36

fs/ext4/super.c

··· 241 241 if (sb->s_flags & MS_RDONLY) 242 242 return ERR_PTR(-EROFS); 243 243 244 + vfs_check_frozen(sb, SB_FREEZE_WRITE); 244 245 /* Special case here: if the journal has aborted behind our 245 246 * backs (eg. EIO in the commit thread), then we still need to 246 247 * take the FS itself readonly cleanly. */ ··· 942 941 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 943 942 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) 944 943 seq_puts(seq, ",journal_async_commit"); 944 + else if (test_opt(sb, JOURNAL_CHECKSUM)) 945 + seq_puts(seq, ",journal_checksum"); 945 946 if (test_opt(sb, NOBH)) 946 947 seq_puts(seq, ",nobh"); 947 948 if (test_opt(sb, I_VERSION)) ··· 2216 2213 struct ext4_attr { 2217 2214 struct attribute attr; 2218 2215 ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); 2219 - ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 2216 + ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 2220 2217 const char *, size_t); 2221 2218 int offset; 2222 2219 }; ··· 2433 2430 __releases(kernel_lock) 2434 2431 __acquires(kernel_lock) 2435 2432 { 2433 + char *orig_data = kstrdup(data, GFP_KERNEL); 2436 2434 struct buffer_head *bh; 2437 2435 struct ext4_super_block *es = NULL; 2438 2436 struct ext4_sb_info *sbi; ··· 2797 2793 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 2798 2794 spin_lock_init(&sbi->s_next_gen_lock); 2799 2795 2800 - err = percpu_counter_init(&sbi->s_freeblocks_counter, 2801 - ext4_count_free_blocks(sb)); 2802 - if (!err) { 2803 - err = percpu_counter_init(&sbi->s_freeinodes_counter, 2804 - ext4_count_free_inodes(sb)); 2805 - } 2806 - if (!err) { 2807 - err = percpu_counter_init(&sbi->s_dirs_counter, 2808 - ext4_count_dirs(sb)); 2809 - } 2810 - if (!err) { 2811 - err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 2812 - } 2813 - if (err) { 2814 - ext4_msg(sb, KERN_ERR, "insufficient memory"); 2815 - goto failed_mount3; 2816 - } 2817 - 2818 2796 sbi->s_stripe = ext4_get_stripe_size(sbi); 2819 2797 sbi->s_max_writeback_mb_bump = 128; 2820 2798 ··· 2896 2910 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 2897 2911 2898 2912 no_journal: 2913 + err = percpu_counter_init(&sbi->s_freeblocks_counter, 2914 + ext4_count_free_blocks(sb)); 2915 + if (!err) 2916 + err = percpu_counter_init(&sbi->s_freeinodes_counter, 2917 + ext4_count_free_inodes(sb)); 2918 + if (!err) 2919 + err = percpu_counter_init(&sbi->s_dirs_counter, 2920 + ext4_count_dirs(sb)); 2921 + if (!err) 2922 + err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 2923 + if (err) { 2924 + ext4_msg(sb, KERN_ERR, "insufficient memory"); 2925 + goto failed_mount_wq; 2926 + } 2899 2927 if (test_opt(sb, NOBH)) { 2900 2928 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { 2901 2929 ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " ··· 3001 3001 err = ext4_setup_system_zone(sb); 3002 3002 if (err) { 3003 3003 ext4_msg(sb, KERN_ERR, "failed to initialize system " 3004 - "zone (%d)\n", err); 3004 + "zone (%d)", err); 3005 3005 goto failed_mount4; 3006 3006 } 3007 3007 ··· 3040 3040 } else 3041 3041 descr = "out journal"; 3042 3042 3043 - ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr); 3043 + ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " 3044 + "Opts: %s", descr, orig_data); 3044 3045 3045 3046 lock_kernel(); 3047 + kfree(orig_data); 3046 3048 return 0; 3047 3049 3048 3050 cantfind_ext4: ··· 3061 3059 jbd2_journal_destroy(sbi->s_journal); 3062 3060 sbi->s_journal = NULL; 3063 3061 } 3062 + percpu_counter_destroy(&sbi->s_freeblocks_counter); 3063 + percpu_counter_destroy(&sbi->s_freeinodes_counter); 3064 + percpu_counter_destroy(&sbi->s_dirs_counter); 3065 + percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 3064 3066 failed_mount3: 3065 3067 if (sbi->s_flex_groups) { 3066 3068 if (is_vmalloc_addr(sbi->s_flex_groups)) ··· 3072 3066 else 3073 3067 kfree(sbi->s_flex_groups); 3074 3068 } 3075 - percpu_counter_destroy(&sbi->s_freeblocks_counter); 3076 - percpu_counter_destroy(&sbi->s_freeinodes_counter); 3077 - percpu_counter_destroy(&sbi->s_dirs_counter); 3078 - percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 3079 3069 failed_mount2: 3080 3070 for (i = 0; i < db_count; i++) 3081 3071 brelse(sbi->s_group_desc[i]); ··· 3091 3089 kfree(sbi->s_blockgroup_lock); 3092 3090 kfree(sbi); 3093 3091 lock_kernel(); 3092 + kfree(orig_data); 3094 3093 return ret; 3095 3094 } 3096 3095 ··· 3383 3380 if (!(sb->s_flags & MS_RDONLY)) 3384 3381 es->s_wtime = cpu_to_le32(get_seconds()); 3385 3382 es->s_kbytes_written = 3386 - cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 3383 + cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 3387 3384 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 3388 3385 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 3389 3386 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( ··· 3488 3485 return 0; 3489 3486 3490 3487 journal = EXT4_SB(sb)->s_journal; 3491 - if (journal) 3488 + if (journal) { 3489 + vfs_check_frozen(sb, SB_FREEZE_WRITE); 3492 3490 ret = ext4_journal_force_commit(journal); 3491 + } 3493 3492 3494 3493 return ret; 3495 3494 } ··· 3540 3535 * the journal. 3541 3536 */ 3542 3537 error = jbd2_journal_flush(journal); 3543 - if (error < 0) { 3544 - out: 3545 - jbd2_journal_unlock_updates(journal); 3546 - return error; 3547 - } 3538 + if (error < 0) 3539 + goto out; 3548 3540 3549 3541 /* Journal blocked and flushed, clear needs_recovery flag. */ 3550 3542 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3551 3543 error = ext4_commit_super(sb, 1); 3552 - if (error) 3553 - goto out; 3554 - return 0; 3544 + out: 3545 + /* we rely on s_frozen to stop further updates */ 3546 + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3547 + return error; 3555 3548 } 3556 3549 3557 3550 /* ··· 3566 3563 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3567 3564 ext4_commit_super(sb, 1); 3568 3565 unlock_super(sb); 3569 - jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3570 3566 return 0; 3571 3567 } 3572 3568 ··· 3582 3580 #ifdef CONFIG_QUOTA 3583 3581 int i; 3584 3582 #endif 3583 + char *orig_data = kstrdup(data, GFP_KERNEL); 3585 3584 3586 3585 lock_kernel(); 3587 3586 ··· 3716 3713 #endif 3717 3714 unlock_super(sb); 3718 3715 unlock_kernel(); 3716 + 3717 + ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); 3718 + kfree(orig_data); 3719 3719 return 0; 3720 3720 3721 3721 restore_opts: ··· 3740 3734 #endif 3741 3735 unlock_super(sb); 3742 3736 unlock_kernel(); 3737 + kfree(orig_data); 3743 3738 return err; 3744 3739 } 3745 3740 ··· 4148 4141 { 4149 4142 int err; 4150 4143 4144 + ext4_check_flag_values(); 4151 4145 err = init_ext4_system_zone(); 4152 4146 if (err) 4153 4147 return err;

+2

fs/ext4/symlink.c

··· 34 34 .readlink = generic_readlink, 35 35 .follow_link = page_follow_link_light, 36 36 .put_link = page_put_link, 37 + .setattr = ext4_setattr, 37 38 #ifdef CONFIG_EXT4_FS_XATTR 38 39 .setxattr = generic_setxattr, 39 40 .getxattr = generic_getxattr, ··· 46 45 const struct inode_operations ext4_fast_symlink_inode_operations = { 47 46 .readlink = generic_readlink, 48 47 .follow_link = ext4_follow_link, 48 + .setattr = ext4_setattr, 49 49 #ifdef CONFIG_EXT4_FS_XATTR 50 50 .setxattr = generic_setxattr, 51 51 .getxattr = generic_getxattr,

+18 -21

fs/ext4/xattr.c

··· 228 228 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 229 229 if (ext4_xattr_check_block(bh)) { 230 230 bad_block: 231 - ext4_error(inode->i_sb, 232 - "inode %lu: bad block %llu", inode->i_ino, 233 - EXT4_I(inode)->i_file_acl); 231 + EXT4_ERROR_INODE(inode, "bad block %llu", 232 + EXT4_I(inode)->i_file_acl); 234 233 error = -EIO; 235 234 goto cleanup; 236 235 } ··· 371 372 ea_bdebug(bh, "b_count=%d, refcount=%d", 372 373 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 373 374 if (ext4_xattr_check_block(bh)) { 374 - ext4_error(inode->i_sb, 375 - "inode %lu: bad block %llu", inode->i_ino, 376 - EXT4_I(inode)->i_file_acl); 375 + EXT4_ERROR_INODE(inode, "bad block %llu", 376 + EXT4_I(inode)->i_file_acl); 377 377 error = -EIO; 378 378 goto cleanup; 379 379 } ··· 664 666 atomic_read(&(bs->bh->b_count)), 665 667 le32_to_cpu(BHDR(bs->bh)->h_refcount)); 666 668 if (ext4_xattr_check_block(bs->bh)) { 667 - ext4_error(sb, "inode %lu: bad block %llu", 668 - inode->i_ino, EXT4_I(inode)->i_file_acl); 669 + EXT4_ERROR_INODE(inode, "bad block %llu", 670 + EXT4_I(inode)->i_file_acl); 669 671 error = -EIO; 670 672 goto cleanup; 671 673 } ··· 818 820 EXT4_I(inode)->i_block_group); 819 821 820 822 /* non-extent files can't have physical blocks past 2^32 */ 821 - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) 823 + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 822 824 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; 823 825 824 826 block = ext4_new_meta_blocks(handle, inode, ··· 826 828 if (error) 827 829 goto cleanup; 828 830 829 - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) 831 + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 830 832 BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); 831 833 832 834 ea_idebug(inode, "creating block %d", block); ··· 878 880 goto cleanup; 879 881 880 882 bad_block: 881 - ext4_error(inode->i_sb, "inode %lu: bad block %llu", 882 - inode->i_ino, EXT4_I(inode)->i_file_acl); 883 + EXT4_ERROR_INODE(inode, "bad block %llu", 884 + EXT4_I(inode)->i_file_acl); 883 885 goto cleanup; 884 886 885 887 #undef header ··· 1192 1194 if (!bh) 1193 1195 goto cleanup; 1194 1196 if (ext4_xattr_check_block(bh)) { 1195 - ext4_error(inode->i_sb, "inode %lu: bad block %llu", 1196 - inode->i_ino, EXT4_I(inode)->i_file_acl); 1197 + EXT4_ERROR_INODE(inode, "bad block %llu", 1198 + EXT4_I(inode)->i_file_acl); 1197 1199 error = -EIO; 1198 1200 goto cleanup; 1199 1201 } ··· 1370 1372 goto cleanup; 1371 1373 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 1372 1374 if (!bh) { 1373 - ext4_error(inode->i_sb, "inode %lu: block %llu read error", 1374 - inode->i_ino, EXT4_I(inode)->i_file_acl); 1375 + EXT4_ERROR_INODE(inode, "block %llu read error", 1376 + EXT4_I(inode)->i_file_acl); 1375 1377 goto cleanup; 1376 1378 } 1377 1379 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || 1378 1380 BHDR(bh)->h_blocks != cpu_to_le32(1)) { 1379 - ext4_error(inode->i_sb, "inode %lu: bad block %llu", 1380 - inode->i_ino, EXT4_I(inode)->i_file_acl); 1381 + EXT4_ERROR_INODE(inode, "bad block %llu", 1382 + EXT4_I(inode)->i_file_acl); 1381 1383 goto cleanup; 1382 1384 } 1383 1385 ext4_xattr_release_block(handle, inode, bh); ··· 1502 1504 } 1503 1505 bh = sb_bread(inode->i_sb, ce->e_block); 1504 1506 if (!bh) { 1505 - ext4_error(inode->i_sb, 1506 - "inode %lu: block %lu read error", 1507 - inode->i_ino, (unsigned long) ce->e_block); 1507 + EXT4_ERROR_INODE(inode, "block %lu read error", 1508 + (unsigned long) ce->e_block); 1508 1509 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= 1509 1510 EXT4_XATTR_REFCOUNT_MAX) { 1510 1511 ea_idebug(inode, "block %lu refcount %d>=%d",

+1 -4

fs/jbd2/transaction.c

··· 1311 1311 if (handle->h_sync) 1312 1312 transaction->t_synchronous_commit = 1; 1313 1313 current->journal_info = NULL; 1314 - spin_lock(&journal->j_state_lock); 1315 1314 spin_lock(&transaction->t_handle_lock); 1316 1315 transaction->t_outstanding_credits -= handle->h_buffer_credits; 1317 1316 transaction->t_updates--; ··· 1339 1340 jbd_debug(2, "transaction too old, requesting commit for " 1340 1341 "handle %p\n", handle); 1341 1342 /* This is non-blocking */ 1342 - __jbd2_log_start_commit(journal, transaction->t_tid); 1343 - spin_unlock(&journal->j_state_lock); 1343 + jbd2_log_start_commit(journal, transaction->t_tid); 1344 1344 1345 1345 /* 1346 1346 * Special case: JBD2_SYNC synchronous updates require us ··· 1349 1351 err = jbd2_log_wait_commit(journal, tid); 1350 1352 } else { 1351 1353 spin_unlock(&transaction->t_handle_lock); 1352 - spin_unlock(&journal->j_state_lock); 1353 1354 } 1354 1355 1355 1356 lock_map_release(&handle->h_lockdep_map);

+7 -4

fs/quota/dquot.c

··· 1514 1514 /* 1515 1515 * This operation can block, but only after everything is updated 1516 1516 */ 1517 - int __dquot_alloc_space(struct inode *inode, qsize_t number, 1518 - int warn, int reserve) 1517 + int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) 1519 1518 { 1520 1519 int cnt, ret = 0; 1521 1520 char warntype[MAXQUOTAS]; 1521 + int warn = flags & DQUOT_SPACE_WARN; 1522 + int reserve = flags & DQUOT_SPACE_RESERVE; 1523 + int nofail = flags & DQUOT_SPACE_NOFAIL; 1522 1524 1523 1525 /* 1524 1526 * First test before acquiring mutex - solves deadlocks when we ··· 1541 1539 continue; 1542 1540 ret = check_bdq(inode->i_dquot[cnt], number, !warn, 1543 1541 warntype+cnt); 1544 - if (ret) { 1542 + if (ret && !nofail) { 1545 1543 spin_unlock(&dq_data_lock); 1546 1544 goto out_flush_warn; 1547 1545 } ··· 1640 1638 /* 1641 1639 * This operation can block, but only after everything is updated 1642 1640 */ 1643 - void __dquot_free_space(struct inode *inode, qsize_t number, int reserve) 1641 + void __dquot_free_space(struct inode *inode, qsize_t number, int flags) 1644 1642 { 1645 1643 unsigned int cnt; 1646 1644 char warntype[MAXQUOTAS]; 1645 + int reserve = flags & DQUOT_SPACE_RESERVE; 1647 1646 1648 1647 /* First test before acquiring mutex - solves deadlocks when we 1649 1648 * re-enter the quota code and are already holding the mutex */

+26 -11

include/linux/quotaops.h

··· 9 9 10 10 #include <linux/fs.h> 11 11 12 + #define DQUOT_SPACE_WARN 0x1 13 + #define DQUOT_SPACE_RESERVE 0x2 14 + #define DQUOT_SPACE_NOFAIL 0x4 15 + 12 16 static inline struct quota_info *sb_dqopt(struct super_block *sb) 13 17 { 14 18 return &sb->s_dquot; ··· 45 41 struct dquot *dquot_alloc(struct super_block *sb, int type); 46 42 void dquot_destroy(struct dquot *dquot); 47 43 48 - int __dquot_alloc_space(struct inode *inode, qsize_t number, 49 - int warn, int reserve); 50 - void __dquot_free_space(struct inode *inode, qsize_t number, int reserve); 44 + int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags); 45 + void __dquot_free_space(struct inode *inode, qsize_t number, int flags); 51 46 52 47 int dquot_alloc_inode(const struct inode *inode); 53 48 ··· 245 242 } 246 243 247 244 static inline int __dquot_alloc_space(struct inode *inode, qsize_t number, 248 - int warn, int reserve) 245 + int flags) 249 246 { 250 - if (!reserve) 247 + if (!(flags & DQUOT_SPACE_RESERVE)) 251 248 inode_add_bytes(inode, number); 252 249 return 0; 253 250 } 254 251 255 252 static inline void __dquot_free_space(struct inode *inode, qsize_t number, 256 - int reserve) 253 + int flags) 257 254 { 258 - if (!reserve) 255 + if (!(flags & DQUOT_SPACE_RESERVE)) 259 256 inode_sub_bytes(inode, number); 260 257 } 261 258 ··· 271 268 272 269 static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr) 273 270 { 274 - return __dquot_alloc_space(inode, nr, 1, 0); 271 + return __dquot_alloc_space(inode, nr, DQUOT_SPACE_WARN); 272 + } 273 + 274 + static inline void dquot_alloc_space_nofail(struct inode *inode, qsize_t nr) 275 + { 276 + __dquot_alloc_space(inode, nr, DQUOT_SPACE_WARN|DQUOT_SPACE_NOFAIL); 277 + mark_inode_dirty(inode); 275 278 } 276 279 277 280 static inline int dquot_alloc_space(struct inode *inode, qsize_t nr) ··· 295 286 return dquot_alloc_space_nodirty(inode, nr << inode->i_blkbits); 296 287 } 297 288 289 + static inline void dquot_alloc_block_nofail(struct inode *inode, qsize_t nr) 290 + { 291 + dquot_alloc_space_nofail(inode, nr << inode->i_blkbits); 292 + } 293 + 298 294 static inline int dquot_alloc_block(struct inode *inode, qsize_t nr) 299 295 { 300 296 return dquot_alloc_space(inode, nr << inode->i_blkbits); ··· 307 293 308 294 static inline int dquot_prealloc_block_nodirty(struct inode *inode, qsize_t nr) 309 295 { 310 - return __dquot_alloc_space(inode, nr << inode->i_blkbits, 0, 0); 296 + return __dquot_alloc_space(inode, nr << inode->i_blkbits, 0); 311 297 } 312 298 313 299 static inline int dquot_prealloc_block(struct inode *inode, qsize_t nr) ··· 322 308 323 309 static inline int dquot_reserve_block(struct inode *inode, qsize_t nr) 324 310 { 325 - return __dquot_alloc_space(inode, nr << inode->i_blkbits, 1, 1); 311 + return __dquot_alloc_space(inode, nr << inode->i_blkbits, 312 + DQUOT_SPACE_WARN|DQUOT_SPACE_RESERVE); 326 313 } 327 314 328 315 static inline int dquot_claim_block(struct inode *inode, qsize_t nr) ··· 360 345 static inline void dquot_release_reservation_block(struct inode *inode, 361 346 qsize_t nr) 362 347 { 363 - __dquot_free_space(inode, nr << inode->i_blkbits, 1); 348 + __dquot_free_space(inode, nr << inode->i_blkbits, DQUOT_SPACE_RESERVE); 364 349 } 365 350 366 351 #endif /* _LINUX_QUOTAOPS_ */

+50 -44

include/trace/events/ext4.h

··· 353 353 jbd2_dev_to_name(__entry->dev), __entry->blk, __entry->count) 354 354 ); 355 355 356 - TRACE_EVENT(ext4_mb_new_inode_pa, 356 + DECLARE_EVENT_CLASS(ext4__mb_new_pa, 357 357 TP_PROTO(struct ext4_allocation_context *ac, 358 358 struct ext4_prealloc_space *pa), 359 359 ··· 381 381 __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart) 382 382 ); 383 383 384 - TRACE_EVENT(ext4_mb_new_group_pa, 384 + DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_inode_pa, 385 + 385 386 TP_PROTO(struct ext4_allocation_context *ac, 386 387 struct ext4_prealloc_space *pa), 387 388 388 - TP_ARGS(ac, pa), 389 + TP_ARGS(ac, pa) 390 + ); 389 391 390 - TP_STRUCT__entry( 391 - __field( dev_t, dev ) 392 - __field( ino_t, ino ) 393 - __field( __u64, pa_pstart ) 394 - __field( __u32, pa_len ) 395 - __field( __u64, pa_lstart ) 392 + DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa, 396 393 397 - ), 394 + TP_PROTO(struct ext4_allocation_context *ac, 395 + struct ext4_prealloc_space *pa), 398 396 399 - TP_fast_assign( 400 - __entry->dev = ac->ac_sb->s_dev; 401 - __entry->ino = ac->ac_inode->i_ino; 402 - __entry->pa_pstart = pa->pa_pstart; 403 - __entry->pa_len = pa->pa_len; 404 - __entry->pa_lstart = pa->pa_lstart; 405 - ), 406 - 407 - TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu", 408 - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 409 - __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart) 397 + TP_ARGS(ac, pa) 410 398 ); 411 399 412 400 TRACE_EVENT(ext4_mb_release_inode_pa, ··· 778 790 __entry->result_len, __entry->result_logical) 779 791 ); 780 792 781 - TRACE_EVENT(ext4_mballoc_discard, 793 + DECLARE_EVENT_CLASS(ext4__mballoc, 782 794 TP_PROTO(struct ext4_allocation_context *ac), 783 795 784 796 TP_ARGS(ac), ··· 807 819 __entry->result_len, __entry->result_logical) 808 820 ); 809 821 810 - TRACE_EVENT(ext4_mballoc_free, 822 + DEFINE_EVENT(ext4__mballoc, ext4_mballoc_discard, 823 + 811 824 TP_PROTO(struct ext4_allocation_context *ac), 812 825 813 - TP_ARGS(ac), 826 + TP_ARGS(ac) 827 + ); 814 828 815 - TP_STRUCT__entry( 816 - __field( dev_t, dev ) 817 - __field( ino_t, ino ) 818 - __field( __u32, result_logical ) 819 - __field( int, result_start ) 820 - __field( __u32, result_group ) 821 - __field( int, result_len ) 822 - ), 829 + DEFINE_EVENT(ext4__mballoc, ext4_mballoc_free, 823 830 824 - TP_fast_assign( 825 - __entry->dev = ac->ac_inode->i_sb->s_dev; 826 - __entry->ino = ac->ac_inode->i_ino; 827 - __entry->result_logical = ac->ac_b_ex.fe_logical; 828 - __entry->result_start = ac->ac_b_ex.fe_start; 829 - __entry->result_group = ac->ac_b_ex.fe_group; 830 - __entry->result_len = ac->ac_b_ex.fe_len; 831 - ), 831 + TP_PROTO(struct ext4_allocation_context *ac), 832 832 833 - TP_printk("dev %s inode %lu extent %u/%d/%u@%u ", 834 - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 835 - __entry->result_group, __entry->result_start, 836 - __entry->result_len, __entry->result_logical) 833 + TP_ARGS(ac) 837 834 ); 838 835 839 836 TRACE_EVENT(ext4_forget, ··· 947 974 __entry->reserved_meta_blocks, __entry->allocated_meta_blocks) 948 975 ); 949 976 977 + DECLARE_EVENT_CLASS(ext4__bitmap_load, 978 + TP_PROTO(struct super_block *sb, unsigned long group), 979 + 980 + TP_ARGS(sb, group), 981 + 982 + TP_STRUCT__entry( 983 + __field( dev_t, dev ) 984 + __field( __u32, group ) 985 + 986 + ), 987 + 988 + TP_fast_assign( 989 + __entry->dev = sb->s_dev; 990 + __entry->group = group; 991 + ), 992 + 993 + TP_printk("dev %s group %u", 994 + jbd2_dev_to_name(__entry->dev), __entry->group) 995 + ); 996 + 997 + DEFINE_EVENT(ext4__bitmap_load, ext4_mb_bitmap_load, 998 + 999 + TP_PROTO(struct super_block *sb, unsigned long group), 1000 + 1001 + TP_ARGS(sb, group) 1002 + ); 1003 + 1004 + DEFINE_EVENT(ext4__bitmap_load, ext4_mb_buddy_bitmap_load, 1005 + 1006 + TP_PROTO(struct super_block *sb, unsigned long group), 1007 + 1008 + TP_ARGS(sb, group) 1009 + ); 950 1010 951 1011 #endif /* _TRACE_EXT4_H */ 952 1012