Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
"Only bug fixes and cleanups for ext4 this merge window.

Of note are fixes for the combination of the inline_data and
fast_commit fixes, and more accurately calculating when to schedule
additional lazy inode table init, especially when CONFIG_HZ is 100HZ"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
ext4: fix error code saved on super block during file system abort
ext4: inline data inode fast commit replay fixes
ext4: commit inline data during fast commit
ext4: scope ret locally in ext4_try_to_trim_range()
ext4: remove an unused variable warning with CONFIG_QUOTA=n
ext4: fix boolreturn.cocci warnings in fs/ext4/name.c
ext4: prevent getting empty inode buffer
ext4: move ext4_fill_raw_inode() related functions
ext4: factor out ext4_fill_raw_inode()
ext4: prevent partial update of the extent blocks
ext4: check for inconsistent extents between index and leaf block
ext4: check for out-of-order index extents in ext4_valid_extent_entries()
ext4: convert from atomic_t to refcount_t on ext4_io_end->count
ext4: refresh the ext4_ext_path struct after dropping i_data_sem.
ext4: ensure enough credits in ext4_ext_shift_path_extents
ext4: correct the left/middle/right debug message for binsearch
ext4: fix lazy initialization next schedule time computation in more granular unit
Revert "ext4: enforce buffer head state assertion in ext4_da_map_blocks"

+300 -250
+2 -1
fs/ext4/ext4.h
··· 17 17 #ifndef _EXT4_H 18 18 #define _EXT4_H 19 19 20 + #include <linux/refcount.h> 20 21 #include <linux/types.h> 21 22 #include <linux/blkdev.h> 22 23 #include <linux/magic.h> ··· 242 241 struct bio *bio; /* Linked list of completed 243 242 * bios covering the extent */ 244 243 unsigned int flag; /* unwritten or not */ 245 - atomic_t count; /* reference counter */ 244 + refcount_t count; /* reference counter */ 246 245 struct list_head list_vec; /* list of ext4_io_end_vec */ 247 246 } ext4_io_end_t; 248 247
+103 -72
fs/ext4/extents.c
··· 136 136 static int ext4_ext_get_access(handle_t *handle, struct inode *inode, 137 137 struct ext4_ext_path *path) 138 138 { 139 + int err = 0; 140 + 139 141 if (path->p_bh) { 140 142 /* path points to block */ 141 143 BUFFER_TRACE(path->p_bh, "get_write_access"); 142 - return ext4_journal_get_write_access(handle, inode->i_sb, 143 - path->p_bh, EXT4_JTR_NONE); 144 + err = ext4_journal_get_write_access(handle, inode->i_sb, 145 + path->p_bh, EXT4_JTR_NONE); 146 + /* 147 + * The extent buffer's verified bit will be set again in 148 + * __ext4_ext_dirty(). We could leave an inconsistent 149 + * buffer if the extents updating procudure break off du 150 + * to some error happens, force to check it again. 151 + */ 152 + if (!err) 153 + clear_buffer_verified(path->p_bh); 144 154 } 145 155 /* path points to leaf/index in inode body */ 146 156 /* we use in-core data, no need to protect them */ 147 - return 0; 157 + return err; 148 158 } 149 159 150 160 /* ··· 175 165 /* path points to block */ 176 166 err = __ext4_handle_dirty_metadata(where, line, handle, 177 167 inode, path->p_bh); 168 + /* Extents updating done, re-set verified flag */ 169 + if (!err) 170 + set_buffer_verified(path->p_bh); 178 171 } else { 179 172 /* path points to leaf/index in inode body */ 180 173 err = ext4_mark_inode_dirty(handle, inode); ··· 367 354 368 355 static int ext4_valid_extent_entries(struct inode *inode, 369 356 struct ext4_extent_header *eh, 370 - ext4_fsblk_t *pblk, int depth) 357 + ext4_lblk_t lblk, ext4_fsblk_t *pblk, 358 + int depth) 371 359 { 372 360 unsigned short entries; 361 + ext4_lblk_t lblock = 0; 362 + ext4_lblk_t prev = 0; 363 + 373 364 if (eh->eh_entries == 0) 374 365 return 1; 375 366 ··· 382 365 if (depth == 0) { 383 366 /* leaf entries */ 384 367 struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); 385 - ext4_lblk_t lblock = 0; 386 - ext4_lblk_t prev = 0; 387 - int len = 0; 368 + 369 + /* 370 + * The logical block in the first entry should equal to 371 + * the number in the index block. 372 + */ 373 + if (depth != ext_depth(inode) && 374 + lblk != le32_to_cpu(ext->ee_block)) 375 + return 0; 388 376 while (entries) { 389 377 if (!ext4_valid_extent(inode, ext)) 390 378 return 0; 391 379 392 380 /* Check for overlapping extents */ 393 381 lblock = le32_to_cpu(ext->ee_block); 394 - len = ext4_ext_get_actual_len(ext); 395 382 if ((lblock <= prev) && prev) { 396 383 *pblk = ext4_ext_pblock(ext); 397 384 return 0; 398 385 } 386 + prev = lblock + ext4_ext_get_actual_len(ext) - 1; 399 387 ext++; 400 388 entries--; 401 - prev = lblock + len - 1; 402 389 } 403 390 } else { 404 391 struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); 392 + 393 + /* 394 + * The logical block in the first entry should equal to 395 + * the number in the parent index block. 396 + */ 397 + if (depth != ext_depth(inode) && 398 + lblk != le32_to_cpu(ext_idx->ei_block)) 399 + return 0; 405 400 while (entries) { 406 401 if (!ext4_valid_extent_idx(inode, ext_idx)) 407 402 return 0; 403 + 404 + /* Check for overlapping index extents */ 405 + lblock = le32_to_cpu(ext_idx->ei_block); 406 + if ((lblock <= prev) && prev) { 407 + *pblk = ext4_idx_pblock(ext_idx); 408 + return 0; 409 + } 408 410 ext_idx++; 409 411 entries--; 412 + prev = lblock; 410 413 } 411 414 } 412 415 return 1; ··· 434 397 435 398 static int __ext4_ext_check(const char *function, unsigned int line, 436 399 struct inode *inode, struct ext4_extent_header *eh, 437 - int depth, ext4_fsblk_t pblk) 400 + int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk) 438 401 { 439 402 const char *error_msg; 440 403 int max = 0, err = -EFSCORRUPTED; ··· 460 423 error_msg = "invalid eh_entries"; 461 424 goto corrupted; 462 425 } 463 - if (!ext4_valid_extent_entries(inode, eh, &pblk, depth)) { 426 + if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) { 464 427 error_msg = "invalid extent entries"; 465 428 goto corrupted; 466 429 } ··· 490 453 } 491 454 492 455 #define ext4_ext_check(inode, eh, depth, pblk) \ 493 - __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk)) 456 + __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0) 494 457 495 458 int ext4_ext_check_inode(struct inode *inode) 496 459 { ··· 523 486 524 487 static struct buffer_head * 525 488 __read_extent_tree_block(const char *function, unsigned int line, 526 - struct inode *inode, ext4_fsblk_t pblk, int depth, 527 - int flags) 489 + struct inode *inode, struct ext4_extent_idx *idx, 490 + int depth, int flags) 528 491 { 529 492 struct buffer_head *bh; 530 493 int err; 531 494 gfp_t gfp_flags = __GFP_MOVABLE | GFP_NOFS; 495 + ext4_fsblk_t pblk; 532 496 533 497 if (flags & EXT4_EX_NOFAIL) 534 498 gfp_flags |= __GFP_NOFAIL; 535 499 500 + pblk = ext4_idx_pblock(idx); 536 501 bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags); 537 502 if (unlikely(!bh)) 538 503 return ERR_PTR(-ENOMEM); ··· 547 508 } 548 509 if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE)) 549 510 return bh; 550 - err = __ext4_ext_check(function, line, inode, 551 - ext_block_hdr(bh), depth, pblk); 511 + err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh), 512 + depth, pblk, le32_to_cpu(idx->ei_block)); 552 513 if (err) 553 514 goto errout; 554 515 set_buffer_verified(bh); ··· 566 527 567 528 } 568 529 569 - #define read_extent_tree_block(inode, pblk, depth, flags) \ 570 - __read_extent_tree_block(__func__, __LINE__, (inode), (pblk), \ 530 + #define read_extent_tree_block(inode, idx, depth, flags) \ 531 + __read_extent_tree_block(__func__, __LINE__, (inode), (idx), \ 571 532 (depth), (flags)) 572 533 573 534 /* ··· 617 578 i--; 618 579 continue; 619 580 } 620 - bh = read_extent_tree_block(inode, 621 - ext4_idx_pblock(path[i].p_idx++), 581 + bh = read_extent_tree_block(inode, path[i].p_idx++, 622 582 depth - i - 1, 623 583 EXT4_EX_FORCE_CACHE); 624 584 if (IS_ERR(bh)) { ··· 752 714 r = EXT_LAST_INDEX(eh); 753 715 while (l <= r) { 754 716 m = l + (r - l) / 2; 717 + ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, 718 + le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block), 719 + r, le32_to_cpu(r->ei_block)); 720 + 755 721 if (block < le32_to_cpu(m->ei_block)) 756 722 r = m - 1; 757 723 else 758 724 l = m + 1; 759 - ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, 760 - le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block), 761 - r, le32_to_cpu(r->ei_block)); 762 725 } 763 726 764 727 path->p_idx = l - 1; ··· 821 782 822 783 while (l <= r) { 823 784 m = l + (r - l) / 2; 785 + ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, 786 + le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block), 787 + r, le32_to_cpu(r->ee_block)); 788 + 824 789 if (block < le32_to_cpu(m->ee_block)) 825 790 r = m - 1; 826 791 else 827 792 l = m + 1; 828 - ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, 829 - le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block), 830 - r, le32_to_cpu(r->ee_block)); 831 793 } 832 794 833 795 path->p_ext = l - 1; ··· 924 884 path[ppos].p_depth = i; 925 885 path[ppos].p_ext = NULL; 926 886 927 - bh = read_extent_tree_block(inode, path[ppos].p_block, --i, 928 - flags); 887 + bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags); 929 888 if (IS_ERR(bh)) { 930 889 ret = PTR_ERR(bh); 931 890 goto err; ··· 1533 1494 struct ext4_extent_header *eh; 1534 1495 struct ext4_extent_idx *ix; 1535 1496 struct ext4_extent *ex; 1536 - ext4_fsblk_t block; 1537 1497 int depth; /* Note, NOT eh_depth; depth from top of tree */ 1538 1498 int ee_len; 1539 1499 ··· 1599 1561 * follow it and find the closest allocated 1600 1562 * block to the right */ 1601 1563 ix++; 1602 - block = ext4_idx_pblock(ix); 1603 1564 while (++depth < path->p_depth) { 1604 1565 /* subtract from p_depth to get proper eh_depth */ 1605 - bh = read_extent_tree_block(inode, block, 1606 - path->p_depth - depth, 0); 1566 + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); 1607 1567 if (IS_ERR(bh)) 1608 1568 return PTR_ERR(bh); 1609 1569 eh = ext_block_hdr(bh); 1610 1570 ix = EXT_FIRST_INDEX(eh); 1611 - block = ext4_idx_pblock(ix); 1612 1571 put_bh(bh); 1613 1572 } 1614 1573 1615 - bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0); 1574 + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); 1616 1575 if (IS_ERR(bh)) 1617 1576 return PTR_ERR(bh); 1618 1577 eh = ext_block_hdr(bh); ··· 2988 2953 ext_debug(inode, "move to level %d (block %llu)\n", 2989 2954 i + 1, ext4_idx_pblock(path[i].p_idx)); 2990 2955 memset(path + i + 1, 0, sizeof(*path)); 2991 - bh = read_extent_tree_block(inode, 2992 - ext4_idx_pblock(path[i].p_idx), depth - i - 1, 2993 - EXT4_EX_NOCACHE); 2956 + bh = read_extent_tree_block(inode, path[i].p_idx, 2957 + depth - i - 1, 2958 + EXT4_EX_NOCACHE); 2994 2959 if (IS_ERR(bh)) { 2995 2960 /* should we reset i_size? */ 2996 2961 err = PTR_ERR(bh); ··· 5013 4978 } 5014 4979 5015 4980 /* 5016 - * ext4_access_path: 5017 - * Function to access the path buffer for marking it dirty. 5018 - * It also checks if there are sufficient credits left in the journal handle 5019 - * to update path. 5020 - */ 5021 - static int 5022 - ext4_access_path(handle_t *handle, struct inode *inode, 5023 - struct ext4_ext_path *path) 5024 - { 5025 - int credits, err; 5026 - 5027 - if (!ext4_handle_valid(handle)) 5028 - return 0; 5029 - 5030 - /* 5031 - * Check if need to extend journal credits 5032 - * 3 for leaf, sb, and inode plus 2 (bmap and group 5033 - * descriptor) for each block group; assume two block 5034 - * groups 5035 - */ 5036 - credits = ext4_writepage_trans_blocks(inode); 5037 - err = ext4_datasem_ensure_credits(handle, inode, 7, credits, 0); 5038 - if (err < 0) 5039 - return err; 5040 - 5041 - err = ext4_ext_get_access(handle, inode, path); 5042 - return err; 5043 - } 5044 - 5045 - /* 5046 4981 * ext4_ext_shift_path_extents: 5047 4982 * Shift the extents of a path structure lying between path[depth].p_ext 5048 4983 * and EXT_LAST_EXTENT(path[depth].p_hdr), by @shift blocks. @SHIFT tells ··· 5026 5021 int depth, err = 0; 5027 5022 struct ext4_extent *ex_start, *ex_last; 5028 5023 bool update = false; 5024 + int credits, restart_credits; 5029 5025 depth = path->p_depth; 5030 5026 5031 5027 while (depth >= 0) { ··· 5036 5030 return -EFSCORRUPTED; 5037 5031 5038 5032 ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); 5033 + /* leaf + sb + inode */ 5034 + credits = 3; 5035 + if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) { 5036 + update = true; 5037 + /* extent tree + sb + inode */ 5038 + credits = depth + 2; 5039 + } 5039 5040 5040 - err = ext4_access_path(handle, inode, path + depth); 5041 + restart_credits = ext4_writepage_trans_blocks(inode); 5042 + err = ext4_datasem_ensure_credits(handle, inode, credits, 5043 + restart_credits, 0); 5044 + if (err) { 5045 + if (err > 0) 5046 + err = -EAGAIN; 5047 + goto out; 5048 + } 5049 + 5050 + err = ext4_ext_get_access(handle, inode, path + depth); 5041 5051 if (err) 5042 5052 goto out; 5043 - 5044 - if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) 5045 - update = true; 5046 5053 5047 5054 while (ex_start <= ex_last) { 5048 5055 if (SHIFT == SHIFT_LEFT) { ··· 5086 5067 } 5087 5068 5088 5069 /* Update index too */ 5089 - err = ext4_access_path(handle, inode, path + depth); 5070 + err = ext4_ext_get_access(handle, inode, path + depth); 5090 5071 if (err) 5091 5072 goto out; 5092 5073 ··· 5125 5106 int ret = 0, depth; 5126 5107 struct ext4_extent *extent; 5127 5108 ext4_lblk_t stop, *iterator, ex_start, ex_end; 5109 + ext4_lblk_t tmp = EXT_MAX_BLOCKS; 5128 5110 5129 5111 /* Let path point to the last extent */ 5130 5112 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, ··· 5179 5159 * till we reach stop. In case of right shift, iterator points to stop 5180 5160 * and it is decreased till we reach start. 5181 5161 */ 5162 + again: 5182 5163 if (SHIFT == SHIFT_LEFT) 5183 5164 iterator = &start; 5184 5165 else 5185 5166 iterator = &stop; 5167 + 5168 + if (tmp != EXT_MAX_BLOCKS) 5169 + *iterator = tmp; 5186 5170 5187 5171 /* 5188 5172 * Its safe to start updating extents. Start and stop are unsigned, so ··· 5216 5192 } 5217 5193 } 5218 5194 5195 + tmp = *iterator; 5219 5196 if (SHIFT == SHIFT_LEFT) { 5220 5197 extent = EXT_LAST_EXTENT(path[depth].p_hdr); 5221 5198 *iterator = le32_to_cpu(extent->ee_block) + ··· 5235 5210 } 5236 5211 ret = ext4_ext_shift_path_extents(path, shift, inode, 5237 5212 handle, SHIFT); 5213 + /* iterator can be NULL which means we should break */ 5214 + if (ret == -EAGAIN) 5215 + goto again; 5238 5216 if (ret) 5239 5217 break; 5240 5218 } ··· 6070 6042 ext4_lblk_t cur = 0, end; 6071 6043 int j, ret = 0; 6072 6044 struct ext4_map_blocks map; 6045 + 6046 + if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) 6047 + return 0; 6073 6048 6074 6049 /* Determin the size of the file first */ 6075 6050 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
+9 -2
fs/ext4/fast_commit.c
··· 819 819 if (ret) 820 820 return ret; 821 821 822 - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) 822 + if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) 823 + inode_len = EXT4_INODE_SIZE(inode->i_sb); 824 + else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) 823 825 inode_len += ei->i_extra_isize; 824 826 825 827 fc_inode.fc_ino = cpu_to_le32(inode->i_ino); ··· 1526 1524 * crashing. This should be fixed but until then, we calculate 1527 1525 * the number of blocks the inode. 1528 1526 */ 1529 - ext4_ext_replay_set_iblocks(inode); 1527 + if (!ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) 1528 + ext4_ext_replay_set_iblocks(inode); 1530 1529 1531 1530 inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation); 1532 1531 ext4_reset_inode_seed(inode); ··· 1845 1842 } 1846 1843 cur = 0; 1847 1844 end = EXT_MAX_BLOCKS; 1845 + if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) { 1846 + iput(inode); 1847 + continue; 1848 + } 1848 1849 while (cur < end) { 1849 1850 map.m_lblk = cur; 1850 1851 map.m_len = end - cur;
+171 -160
fs/ext4/inode.c
··· 1711 1711 } 1712 1712 1713 1713 /* 1714 - * the buffer head associated with a delayed and not unwritten 1715 - * block found in the extent status cache must contain an 1716 - * invalid block number and have its BH_New and BH_Delay bits 1717 - * set, reflecting the state assigned when the block was 1718 - * initially delayed allocated 1714 + * Delayed extent could be allocated by fallocate. 1715 + * So we need to check it. 1719 1716 */ 1720 - if (ext4_es_is_delonly(&es)) { 1721 - BUG_ON(bh->b_blocknr != invalid_block); 1722 - BUG_ON(!buffer_new(bh)); 1723 - BUG_ON(!buffer_delay(bh)); 1717 + if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) { 1718 + map_bh(bh, inode->i_sb, invalid_block); 1719 + set_buffer_new(bh); 1720 + set_buffer_delay(bh); 1724 1721 return 0; 1725 1722 } 1726 1723 ··· 4231 4234 return err; 4232 4235 } 4233 4236 4237 + static inline u64 ext4_inode_peek_iversion(const struct inode *inode) 4238 + { 4239 + if (unlikely(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) 4240 + return inode_peek_iversion_raw(inode); 4241 + else 4242 + return inode_peek_iversion(inode); 4243 + } 4244 + 4245 + static int ext4_inode_blocks_set(struct ext4_inode *raw_inode, 4246 + struct ext4_inode_info *ei) 4247 + { 4248 + struct inode *inode = &(ei->vfs_inode); 4249 + u64 i_blocks = READ_ONCE(inode->i_blocks); 4250 + struct super_block *sb = inode->i_sb; 4251 + 4252 + if (i_blocks <= ~0U) { 4253 + /* 4254 + * i_blocks can be represented in a 32 bit variable 4255 + * as multiple of 512 bytes 4256 + */ 4257 + raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 4258 + raw_inode->i_blocks_high = 0; 4259 + ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); 4260 + return 0; 4261 + } 4262 + 4263 + /* 4264 + * This should never happen since sb->s_maxbytes should not have 4265 + * allowed this, sb->s_maxbytes was set according to the huge_file 4266 + * feature in ext4_fill_super(). 4267 + */ 4268 + if (!ext4_has_feature_huge_file(sb)) 4269 + return -EFSCORRUPTED; 4270 + 4271 + if (i_blocks <= 0xffffffffffffULL) { 4272 + /* 4273 + * i_blocks can be represented in a 48 bit variable 4274 + * as multiple of 512 bytes 4275 + */ 4276 + raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 4277 + raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 4278 + ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); 4279 + } else { 4280 + ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE); 4281 + /* i_block is stored in file system block size */ 4282 + i_blocks = i_blocks >> (inode->i_blkbits - 9); 4283 + raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 4284 + raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 4285 + } 4286 + return 0; 4287 + } 4288 + 4289 + static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode) 4290 + { 4291 + struct ext4_inode_info *ei = EXT4_I(inode); 4292 + uid_t i_uid; 4293 + gid_t i_gid; 4294 + projid_t i_projid; 4295 + int block; 4296 + int err; 4297 + 4298 + err = ext4_inode_blocks_set(raw_inode, ei); 4299 + 4300 + raw_inode->i_mode = cpu_to_le16(inode->i_mode); 4301 + i_uid = i_uid_read(inode); 4302 + i_gid = i_gid_read(inode); 4303 + i_projid = from_kprojid(&init_user_ns, ei->i_projid); 4304 + if (!(test_opt(inode->i_sb, NO_UID32))) { 4305 + raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); 4306 + raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); 4307 + /* 4308 + * Fix up interoperability with old kernels. Otherwise, 4309 + * old inodes get re-used with the upper 16 bits of the 4310 + * uid/gid intact. 4311 + */ 4312 + if (ei->i_dtime && list_empty(&ei->i_orphan)) { 4313 + raw_inode->i_uid_high = 0; 4314 + raw_inode->i_gid_high = 0; 4315 + } else { 4316 + raw_inode->i_uid_high = 4317 + cpu_to_le16(high_16_bits(i_uid)); 4318 + raw_inode->i_gid_high = 4319 + cpu_to_le16(high_16_bits(i_gid)); 4320 + } 4321 + } else { 4322 + raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid)); 4323 + raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid)); 4324 + raw_inode->i_uid_high = 0; 4325 + raw_inode->i_gid_high = 0; 4326 + } 4327 + raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); 4328 + 4329 + EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); 4330 + EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); 4331 + EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); 4332 + EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); 4333 + 4334 + raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 4335 + raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); 4336 + if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) 4337 + raw_inode->i_file_acl_high = 4338 + cpu_to_le16(ei->i_file_acl >> 32); 4339 + raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); 4340 + ext4_isize_set(raw_inode, ei->i_disksize); 4341 + 4342 + raw_inode->i_generation = cpu_to_le32(inode->i_generation); 4343 + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 4344 + if (old_valid_dev(inode->i_rdev)) { 4345 + raw_inode->i_block[0] = 4346 + cpu_to_le32(old_encode_dev(inode->i_rdev)); 4347 + raw_inode->i_block[1] = 0; 4348 + } else { 4349 + raw_inode->i_block[0] = 0; 4350 + raw_inode->i_block[1] = 4351 + cpu_to_le32(new_encode_dev(inode->i_rdev)); 4352 + raw_inode->i_block[2] = 0; 4353 + } 4354 + } else if (!ext4_has_inline_data(inode)) { 4355 + for (block = 0; block < EXT4_N_BLOCKS; block++) 4356 + raw_inode->i_block[block] = ei->i_data[block]; 4357 + } 4358 + 4359 + if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { 4360 + u64 ivers = ext4_inode_peek_iversion(inode); 4361 + 4362 + raw_inode->i_disk_version = cpu_to_le32(ivers); 4363 + if (ei->i_extra_isize) { 4364 + if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) 4365 + raw_inode->i_version_hi = 4366 + cpu_to_le32(ivers >> 32); 4367 + raw_inode->i_extra_isize = 4368 + cpu_to_le16(ei->i_extra_isize); 4369 + } 4370 + } 4371 + 4372 + if (i_projid != EXT4_DEF_PROJID && 4373 + !ext4_has_feature_project(inode->i_sb)) 4374 + err = err ?: -EFSCORRUPTED; 4375 + 4376 + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && 4377 + EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) 4378 + raw_inode->i_projid = cpu_to_le32(i_projid); 4379 + 4380 + ext4_inode_csum_set(inode, raw_inode, ei); 4381 + return err; 4382 + } 4383 + 4234 4384 /* 4235 4385 * ext4_get_inode_loc returns with an extra refcount against the inode's 4236 - * underlying buffer_head on success. If 'in_mem' is true, we have all 4237 - * data in memory that is needed to recreate the on-disk version of this 4238 - * inode. 4386 + * underlying buffer_head on success. If we pass 'inode' and it does not 4387 + * have in-inode xattr, we have all inode data in memory that is needed 4388 + * to recreate the on-disk version of this inode. 4239 4389 */ 4240 4390 static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino, 4241 - struct ext4_iloc *iloc, int in_mem, 4391 + struct inode *inode, struct ext4_iloc *iloc, 4242 4392 ext4_fsblk_t *ret_block) 4243 4393 { 4244 4394 struct ext4_group_desc *gdp; ··· 4431 4287 * is the only valid inode in the block, we need not read the 4432 4288 * block. 4433 4289 */ 4434 - if (in_mem) { 4290 + if (inode && !ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { 4435 4291 struct buffer_head *bitmap_bh; 4436 4292 int i, start; 4437 4293 ··· 4459 4315 } 4460 4316 brelse(bitmap_bh); 4461 4317 if (i == start + inodes_per_block) { 4318 + struct ext4_inode *raw_inode = 4319 + (struct ext4_inode *) (bh->b_data + iloc->offset); 4320 + 4462 4321 /* all other inodes are free, so skip I/O */ 4463 4322 memset(bh->b_data, 0, bh->b_size); 4323 + if (!ext4_test_inode_state(inode, EXT4_STATE_NEW)) 4324 + ext4_fill_raw_inode(inode, raw_inode); 4464 4325 set_buffer_uptodate(bh); 4465 4326 unlock_buffer(bh); 4466 4327 goto has_buffer; ··· 4526 4377 ext4_fsblk_t err_blk; 4527 4378 int ret; 4528 4379 4529 - ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, iloc, 0, 4380 + ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, NULL, iloc, 4530 4381 &err_blk); 4531 4382 4532 4383 if (ret == -EIO) ··· 4541 4392 ext4_fsblk_t err_blk; 4542 4393 int ret; 4543 4394 4544 - /* We have all inode data except xattrs in memory here. */ 4545 - ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, iloc, 4546 - !ext4_test_inode_state(inode, EXT4_STATE_XATTR), &err_blk); 4395 + ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, inode, iloc, 4396 + &err_blk); 4547 4397 4548 4398 if (ret == -EIO) 4549 4399 ext4_error_inode_block(inode, err_blk, EIO, ··· 4555 4407 int ext4_get_fc_inode_loc(struct super_block *sb, unsigned long ino, 4556 4408 struct ext4_iloc *iloc) 4557 4409 { 4558 - return __ext4_get_inode_loc(sb, ino, iloc, 0, NULL); 4410 + return __ext4_get_inode_loc(sb, ino, NULL, iloc, NULL); 4559 4411 } 4560 4412 4561 4413 static bool ext4_should_enable_dax(struct inode *inode) ··· 4675 4527 inode_set_iversion_raw(inode, val); 4676 4528 else 4677 4529 inode_set_iversion_queried(inode, val); 4678 - } 4679 - static inline u64 ext4_inode_peek_iversion(const struct inode *inode) 4680 - { 4681 - if (unlikely(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) 4682 - return inode_peek_iversion_raw(inode); 4683 - else 4684 - return inode_peek_iversion(inode); 4685 4530 } 4686 4531 4687 4532 struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, ··· 4996 4855 return ERR_PTR(ret); 4997 4856 } 4998 4857 4999 - static int ext4_inode_blocks_set(handle_t *handle, 5000 - struct ext4_inode *raw_inode, 5001 - struct ext4_inode_info *ei) 5002 - { 5003 - struct inode *inode = &(ei->vfs_inode); 5004 - u64 i_blocks = READ_ONCE(inode->i_blocks); 5005 - struct super_block *sb = inode->i_sb; 5006 - 5007 - if (i_blocks <= ~0U) { 5008 - /* 5009 - * i_blocks can be represented in a 32 bit variable 5010 - * as multiple of 512 bytes 5011 - */ 5012 - raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 5013 - raw_inode->i_blocks_high = 0; 5014 - ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); 5015 - return 0; 5016 - } 5017 - 5018 - /* 5019 - * This should never happen since sb->s_maxbytes should not have 5020 - * allowed this, sb->s_maxbytes was set according to the huge_file 5021 - * feature in ext4_fill_super(). 5022 - */ 5023 - if (!ext4_has_feature_huge_file(sb)) 5024 - return -EFSCORRUPTED; 5025 - 5026 - if (i_blocks <= 0xffffffffffffULL) { 5027 - /* 5028 - * i_blocks can be represented in a 48 bit variable 5029 - * as multiple of 512 bytes 5030 - */ 5031 - raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 5032 - raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 5033 - ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); 5034 - } else { 5035 - ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE); 5036 - /* i_block is stored in file system block size */ 5037 - i_blocks = i_blocks >> (inode->i_blkbits - 9); 5038 - raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 5039 - raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 5040 - } 5041 - return 0; 5042 - } 5043 - 5044 4858 static void __ext4_update_other_inode_time(struct super_block *sb, 5045 4859 unsigned long orig_ino, 5046 4860 unsigned long ino, ··· 5071 4975 struct ext4_inode_info *ei = EXT4_I(inode); 5072 4976 struct buffer_head *bh = iloc->bh; 5073 4977 struct super_block *sb = inode->i_sb; 5074 - int err = 0, block; 4978 + int err; 5075 4979 int need_datasync = 0, set_large_file = 0; 5076 - uid_t i_uid; 5077 - gid_t i_gid; 5078 - projid_t i_projid; 5079 4980 5080 4981 spin_lock(&ei->i_raw_lock); 5081 4982 ··· 5083 4990 if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) 5084 4991 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); 5085 4992 5086 - err = ext4_inode_blocks_set(handle, raw_inode, ei); 5087 - 5088 - raw_inode->i_mode = cpu_to_le16(inode->i_mode); 5089 - i_uid = i_uid_read(inode); 5090 - i_gid = i_gid_read(inode); 5091 - i_projid = from_kprojid(&init_user_ns, ei->i_projid); 5092 - if (!(test_opt(inode->i_sb, NO_UID32))) { 5093 - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); 5094 - raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); 5095 - /* 5096 - * Fix up interoperability with old kernels. Otherwise, 5097 - * old inodes get re-used with the upper 16 bits of the 5098 - * uid/gid intact. 5099 - */ 5100 - if (ei->i_dtime && list_empty(&ei->i_orphan)) { 5101 - raw_inode->i_uid_high = 0; 5102 - raw_inode->i_gid_high = 0; 5103 - } else { 5104 - raw_inode->i_uid_high = 5105 - cpu_to_le16(high_16_bits(i_uid)); 5106 - raw_inode->i_gid_high = 5107 - cpu_to_le16(high_16_bits(i_gid)); 5108 - } 5109 - } else { 5110 - raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid)); 5111 - raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid)); 5112 - raw_inode->i_uid_high = 0; 5113 - raw_inode->i_gid_high = 0; 5114 - } 5115 - raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); 5116 - 5117 - EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); 5118 - EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); 5119 - EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); 5120 - EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); 5121 - 5122 - raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 5123 - raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); 5124 - if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) 5125 - raw_inode->i_file_acl_high = 5126 - cpu_to_le16(ei->i_file_acl >> 32); 5127 - raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); 5128 - if (READ_ONCE(ei->i_disksize) != ext4_isize(inode->i_sb, raw_inode)) { 5129 - ext4_isize_set(raw_inode, ei->i_disksize); 4993 + if (READ_ONCE(ei->i_disksize) != ext4_isize(inode->i_sb, raw_inode)) 5130 4994 need_datasync = 1; 5131 - } 5132 4995 if (ei->i_disksize > 0x7fffffffULL) { 5133 4996 if (!ext4_has_feature_large_file(sb) || 5134 - EXT4_SB(sb)->s_es->s_rev_level == 5135 - cpu_to_le32(EXT4_GOOD_OLD_REV)) 4997 + EXT4_SB(sb)->s_es->s_rev_level == cpu_to_le32(EXT4_GOOD_OLD_REV)) 5136 4998 set_large_file = 1; 5137 4999 } 5138 - raw_inode->i_generation = cpu_to_le32(inode->i_generation); 5139 - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 5140 - if (old_valid_dev(inode->i_rdev)) { 5141 - raw_inode->i_block[0] = 5142 - cpu_to_le32(old_encode_dev(inode->i_rdev)); 5143 - raw_inode->i_block[1] = 0; 5144 - } else { 5145 - raw_inode->i_block[0] = 0; 5146 - raw_inode->i_block[1] = 5147 - cpu_to_le32(new_encode_dev(inode->i_rdev)); 5148 - raw_inode->i_block[2] = 0; 5149 - } 5150 - } else if (!ext4_has_inline_data(inode)) { 5151 - for (block = 0; block < EXT4_N_BLOCKS; block++) 5152 - raw_inode->i_block[block] = ei->i_data[block]; 5153 - } 5154 5000 5155 - if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { 5156 - u64 ivers = ext4_inode_peek_iversion(inode); 5157 - 5158 - raw_inode->i_disk_version = cpu_to_le32(ivers); 5159 - if (ei->i_extra_isize) { 5160 - if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) 5161 - raw_inode->i_version_hi = 5162 - cpu_to_le32(ivers >> 32); 5163 - raw_inode->i_extra_isize = 5164 - cpu_to_le16(ei->i_extra_isize); 5165 - } 5166 - } 5167 - 5168 - if (i_projid != EXT4_DEF_PROJID && 5169 - !ext4_has_feature_project(inode->i_sb)) 5170 - err = err ?: -EFSCORRUPTED; 5171 - 5172 - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && 5173 - EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) 5174 - raw_inode->i_projid = cpu_to_le32(i_projid); 5175 - 5176 - ext4_inode_csum_set(inode, raw_inode, ei); 5001 + err = ext4_fill_raw_inode(inode, raw_inode); 5177 5002 spin_unlock(&ei->i_raw_lock); 5178 5003 if (err) { 5179 5004 EXT4_ERROR_INODE(inode, "corrupted inode contents");
+2 -3
fs/ext4/mballoc.c
··· 6299 6299 { 6300 6300 ext4_grpblk_t next, count, free_count; 6301 6301 void *bitmap; 6302 - int ret = 0; 6303 6302 6304 6303 bitmap = e4b->bd_bitmap; 6305 6304 start = (e4b->bd_info->bb_first_free > start) ? ··· 6313 6314 next = mb_find_next_bit(bitmap, max + 1, start); 6314 6315 6315 6316 if ((next - start) >= minblocks) { 6316 - ret = ext4_trim_extent(sb, start, next - start, e4b); 6317 + int ret = ext4_trim_extent(sb, start, next - start, e4b); 6318 + 6317 6319 if (ret && ret != -EOPNOTSUPP) 6318 6320 break; 6319 - ret = 0; 6320 6321 count += next - start; 6321 6322 } 6322 6323 free_count += next - start;
+1 -1
fs/ext4/namei.c
··· 1439 1439 fname->hinfo.minor_hash != 1440 1440 EXT4_DIRENT_MINOR_HASH(de)) { 1441 1441 1442 - return 0; 1442 + return false; 1443 1443 } 1444 1444 } 1445 1445 return !ext4_ci_compare(parent, &cf, de->name,
+4 -4
fs/ext4/page-io.c
··· 279 279 io_end->inode = inode; 280 280 INIT_LIST_HEAD(&io_end->list); 281 281 INIT_LIST_HEAD(&io_end->list_vec); 282 - atomic_set(&io_end->count, 1); 282 + refcount_set(&io_end->count, 1); 283 283 } 284 284 return io_end; 285 285 } 286 286 287 287 void ext4_put_io_end_defer(ext4_io_end_t *io_end) 288 288 { 289 - if (atomic_dec_and_test(&io_end->count)) { 289 + if (refcount_dec_and_test(&io_end->count)) { 290 290 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || 291 291 list_empty(&io_end->list_vec)) { 292 292 ext4_release_io_end(io_end); ··· 300 300 { 301 301 int err = 0; 302 302 303 - if (atomic_dec_and_test(&io_end->count)) { 303 + if (refcount_dec_and_test(&io_end->count)) { 304 304 if (io_end->flag & EXT4_IO_END_UNWRITTEN) { 305 305 err = ext4_convert_unwritten_io_end_vec(io_end->handle, 306 306 io_end); ··· 314 314 315 315 ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) 316 316 { 317 - atomic_inc(&io_end->count); 317 + refcount_inc(&io_end->count); 318 318 return io_end; 319 319 } 320 320
+8 -7
fs/ext4/super.c
··· 3270 3270 struct super_block *sb = elr->lr_super; 3271 3271 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 3272 3272 ext4_group_t group = elr->lr_next_group; 3273 - unsigned long timeout = 0; 3274 3273 unsigned int prefetch_ios = 0; 3275 3274 int ret = 0; 3275 + u64 start_time; 3276 3276 3277 3277 if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) { 3278 3278 elr->lr_next_group = ext4_mb_prefetch(sb, group, ··· 3309 3309 ret = 1; 3310 3310 3311 3311 if (!ret) { 3312 - timeout = jiffies; 3312 + start_time = ktime_get_real_ns(); 3313 3313 ret = ext4_init_inode_table(sb, group, 3314 3314 elr->lr_timeout ? 0 : 1); 3315 3315 trace_ext4_lazy_itable_init(sb, group); 3316 3316 if (elr->lr_timeout == 0) { 3317 - timeout = (jiffies - timeout) * 3318 - EXT4_SB(elr->lr_super)->s_li_wait_mult; 3319 - elr->lr_timeout = timeout; 3317 + elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) * 3318 + EXT4_SB(elr->lr_super)->s_li_wait_mult); 3320 3319 } 3321 3320 elr->lr_next_sched = jiffies + elr->lr_timeout; 3322 3321 elr->lr_next_group = group + 1; ··· 5733 5734 struct ext4_sb_info *sbi = EXT4_SB(sb); 5734 5735 unsigned long old_sb_flags, vfs_flags; 5735 5736 struct ext4_mount_options old_opts; 5736 - int enable_quota = 0; 5737 5737 ext4_group_t g; 5738 5738 int err = 0; 5739 5739 #ifdef CONFIG_QUOTA 5740 + int enable_quota = 0; 5740 5741 int i, j; 5741 5742 char *to_free[EXT4_MAXQUOTAS]; 5742 5743 #endif ··· 5827 5828 } 5828 5829 5829 5830 if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) 5830 - ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user"); 5831 + ext4_abort(sb, ESHUTDOWN, "Abort forced by user"); 5831 5832 5832 5833 sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | 5833 5834 (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); ··· 5941 5942 err = -EROFS; 5942 5943 goto restore_opts; 5943 5944 } 5945 + #ifdef CONFIG_QUOTA 5944 5946 enable_quota = 1; 5947 + #endif 5945 5948 } 5946 5949 } 5947 5950