Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ext4: cleanup transaction restarts during inode deletion

During inode deletion, the number of journal credits that will be
needed is hard to determine. For that reason we have journal
extend/restart calls in several places. Whenever a transaction is
restarted, filesystem must be in a consistent state because there is
no atomicity guarantee beyond a restart call.

Add ext4_xattr_ensure_credits() helper function which takes care of
journal extend/restart logic. It also handles getting jbd2 write
access and dirty metadata calls. This function is called at every
iteration of handling an ea_inode reference.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

authored by

Tahsin Erdogan and committed by
Theodore Ts'o
30a7eb97 02749a4c

+184 -143
+17 -49
fs/ext4/inode.c
··· 239 239 */ 240 240 sb_start_intwrite(inode->i_sb); 241 241 242 - handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, extra_credits); 242 + if (!IS_NOQUOTA(inode)) 243 + extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb); 244 + 245 + handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, 246 + ext4_blocks_for_truncate(inode)+extra_credits); 243 247 if (IS_ERR(handle)) { 244 248 ext4_std_error(inode->i_sb, PTR_ERR(handle)); 245 249 /* ··· 255 251 sb_end_intwrite(inode->i_sb); 256 252 goto no_delete; 257 253 } 254 + 258 255 if (IS_SYNC(inode)) 259 256 ext4_handle_sync(handle); 260 - 261 - /* 262 - * Delete xattr inode before deleting the main inode. 263 - */ 264 - err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array); 265 - if (err) { 266 - ext4_warning(inode->i_sb, 267 - "couldn't delete inode's xattr (err %d)", err); 268 - goto stop_handle; 269 - } 270 - 271 - if (!IS_NOQUOTA(inode)) 272 - extra_credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb); 273 - 274 - if (!ext4_handle_has_enough_credits(handle, 275 - ext4_blocks_for_truncate(inode) + extra_credits)) { 276 - err = ext4_journal_extend(handle, 277 - ext4_blocks_for_truncate(inode) + extra_credits); 278 - if (err > 0) 279 - err = ext4_journal_restart(handle, 280 - ext4_blocks_for_truncate(inode) + extra_credits); 281 - if (err != 0) { 282 - ext4_warning(inode->i_sb, 283 - "couldn't extend journal (err %d)", err); 284 - goto stop_handle; 285 - } 286 - } 287 - 288 257 inode->i_size = 0; 289 258 err = ext4_mark_inode_dirty(handle, inode); 290 259 if (err) { ··· 275 298 } 276 299 } 277 300 278 - /* 279 - * ext4_ext_truncate() doesn't reserve any slop when it 280 - * restarts journal transactions; therefore there may not be 281 - * enough credits left in the handle to remove the inode from 282 - * the orphan list and set the dtime field. 283 - */ 284 - if (!ext4_handle_has_enough_credits(handle, extra_credits)) { 285 - err = ext4_journal_extend(handle, extra_credits); 286 - if (err > 0) 287 - err = ext4_journal_restart(handle, extra_credits); 288 - if (err != 0) { 289 - ext4_warning(inode->i_sb, 290 - "couldn't extend journal (err %d)", err); 291 - stop_handle: 292 - ext4_journal_stop(handle); 293 - ext4_orphan_del(NULL, inode); 294 - sb_end_intwrite(inode->i_sb); 295 - goto no_delete; 296 - } 301 + /* Remove xattr references. */ 302 + err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array, 303 + extra_credits); 304 + if (err) { 305 + ext4_warning(inode->i_sb, "xattr delete (err %d)", err); 306 + stop_handle: 307 + ext4_journal_stop(handle); 308 + ext4_orphan_del(NULL, inode); 309 + sb_end_intwrite(inode->i_sb); 310 + ext4_xattr_inode_array_free(ea_inode_array); 311 + goto no_delete; 297 312 } 298 313 299 314 /* ··· 311 342 ext4_clear_inode(inode); 312 343 else 313 344 ext4_free_inode(handle, inode); 314 - 315 345 ext4_journal_stop(handle); 316 346 sb_end_intwrite(inode->i_sb); 317 347 ext4_xattr_inode_array_free(ea_inode_array);
+165 -93
fs/ext4/xattr.c
··· 108 108 #define EA_BLOCK_CACHE(inode) (((struct ext4_sb_info *) \ 109 109 inode->i_sb->s_fs_info)->s_ea_block_cache) 110 110 111 + static int 112 + ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array, 113 + struct inode *inode); 114 + 111 115 #ifdef CONFIG_LOCKDEP 112 116 void ext4_xattr_inode_set_class(struct inode *ea_inode) 113 117 { ··· 653 649 if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { 654 650 ext4_set_feature_xattr(sb); 655 651 ext4_handle_dirty_super(handle, sb); 652 + } 653 + } 654 + 655 + static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode, 656 + int credits, struct buffer_head *bh, 657 + bool dirty, bool block_csum) 658 + { 659 + int error; 660 + 661 + if (!ext4_handle_valid(handle)) 662 + return 0; 663 + 664 + if (handle->h_buffer_credits >= credits) 665 + return 0; 666 + 667 + error = ext4_journal_extend(handle, credits - handle->h_buffer_credits); 668 + if (!error) 669 + return 0; 670 + if (error < 0) { 671 + ext4_warning(inode->i_sb, "Extend journal (error %d)", error); 672 + return error; 673 + } 674 + 675 + if (bh && dirty) { 676 + if (block_csum) 677 + ext4_xattr_block_csum_set(inode, bh); 678 + error = ext4_handle_dirty_metadata(handle, NULL, bh); 679 + if (error) { 680 + ext4_warning(inode->i_sb, "Handle metadata (error %d)", 681 + error); 682 + return error; 683 + } 684 + } 685 + 686 + error = ext4_journal_restart(handle, credits); 687 + if (error) { 688 + ext4_warning(inode->i_sb, "Restart journal (error %d)", error); 689 + return error; 690 + } 691 + 692 + if (bh) { 693 + error = ext4_journal_get_write_access(handle, bh); 694 + if (error) { 695 + ext4_warning(inode->i_sb, 696 + "Get write access failed (error %d)", 697 + error); 698 + return error; 699 + } 700 + } 701 + return 0; 702 + } 703 + 704 + static void 705 + ext4_xattr_inode_remove_all(handle_t *handle, struct inode *parent, 706 + struct buffer_head *bh, 707 + struct ext4_xattr_entry *first, bool block_csum, 708 + struct ext4_xattr_inode_array **ea_inode_array, 709 + int extra_credits) 710 + { 711 + struct inode *ea_inode; 712 + struct ext4_xattr_entry *entry; 713 + bool dirty = false; 714 + unsigned int ea_ino; 715 + int err; 716 + int credits; 717 + 718 + /* One credit for dec ref on ea_inode, one for orphan list addition, */ 719 + credits = 2 + extra_credits; 720 + 721 + for (entry = first; !IS_LAST_ENTRY(entry); 722 + entry = EXT4_XATTR_NEXT(entry)) { 723 + if (!entry->e_value_inum) 724 + continue; 725 + ea_ino = le32_to_cpu(entry->e_value_inum); 726 + err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode); 727 + if (err) 728 + continue; 729 + 730 + err = ext4_expand_inode_array(ea_inode_array, ea_inode); 731 + if (err) { 732 + ext4_warning_inode(ea_inode, 733 + "Expand inode array err=%d", err); 734 + iput(ea_inode); 735 + continue; 736 + } 737 + 738 + err = ext4_xattr_ensure_credits(handle, parent, credits, bh, 739 + dirty, block_csum); 740 + if (err) { 741 + ext4_warning_inode(ea_inode, "Ensure credits err=%d", 742 + err); 743 + continue; 744 + } 745 + 746 + inode_lock(ea_inode); 747 + clear_nlink(ea_inode); 748 + ext4_orphan_add(handle, ea_inode); 749 + inode_unlock(ea_inode); 750 + 751 + /* 752 + * Forget about ea_inode within the same transaction that 753 + * decrements the ref count. This avoids duplicate decrements in 754 + * case the rest of the work spills over to subsequent 755 + * transactions. 756 + */ 757 + entry->e_value_inum = 0; 758 + entry->e_value_size = 0; 759 + 760 + dirty = true; 761 + } 762 + 763 + if (dirty) { 764 + /* 765 + * Note that we are deliberately skipping csum calculation for 766 + * the final update because we do not expect any journal 767 + * restarts until xattr block is freed. 768 + */ 769 + 770 + err = ext4_handle_dirty_metadata(handle, NULL, bh); 771 + if (err) 772 + ext4_warning_inode(parent, 773 + "handle dirty metadata err=%d", err); 656 774 } 657 775 } 658 776 ··· 2108 1982 return 0; 2109 1983 } 2110 1984 2111 - /** 2112 - * Add xattr inode to orphan list 2113 - */ 2114 - static int 2115 - ext4_xattr_inode_orphan_add(handle_t *handle, struct inode *inode, int credits, 2116 - struct ext4_xattr_inode_array *ea_inode_array) 2117 - { 2118 - int idx = 0, error = 0; 2119 - struct inode *ea_inode; 2120 - 2121 - if (ea_inode_array == NULL) 2122 - return 0; 2123 - 2124 - for (; idx < ea_inode_array->count; ++idx) { 2125 - if (!ext4_handle_has_enough_credits(handle, credits)) { 2126 - error = ext4_journal_extend(handle, credits); 2127 - if (error > 0) 2128 - error = ext4_journal_restart(handle, credits); 2129 - 2130 - if (error != 0) { 2131 - ext4_warning(inode->i_sb, 2132 - "couldn't extend journal " 2133 - "(err %d)", error); 2134 - return error; 2135 - } 2136 - } 2137 - ea_inode = ea_inode_array->inodes[idx]; 2138 - inode_lock(ea_inode); 2139 - ext4_orphan_add(handle, ea_inode); 2140 - inode_unlock(ea_inode); 2141 - /* the inode's i_count will be released by caller */ 2142 - } 2143 - 2144 - return 0; 2145 - } 2146 - 2147 1985 /* 2148 1986 * ext4_xattr_delete_inode() 2149 1987 * ··· 2120 2030 */ 2121 2031 int 2122 2032 ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, 2123 - struct ext4_xattr_inode_array **ea_inode_array) 2033 + struct ext4_xattr_inode_array **ea_inode_array, 2034 + int extra_credits) 2124 2035 { 2125 2036 struct buffer_head *bh = NULL; 2126 2037 struct ext4_xattr_ibody_header *header; 2127 2038 struct ext4_inode *raw_inode; 2128 - struct ext4_iloc iloc; 2129 - struct ext4_xattr_entry *entry; 2130 - struct inode *ea_inode; 2131 - unsigned int ea_ino; 2132 - int credits = 3, error = 0; 2039 + struct ext4_iloc iloc = { .bh = NULL }; 2040 + int error; 2041 + 2042 + error = ext4_xattr_ensure_credits(handle, inode, extra_credits, 2043 + NULL /* bh */, 2044 + false /* dirty */, 2045 + false /* block_csum */); 2046 + if (error) { 2047 + EXT4_ERROR_INODE(inode, "ensure credits (error %d)", error); 2048 + goto cleanup; 2049 + } 2133 2050 2134 2051 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) 2135 2052 goto delete_external_ea; ··· 2144 2047 error = ext4_get_inode_loc(inode, &iloc); 2145 2048 if (error) 2146 2049 goto cleanup; 2050 + 2051 + error = ext4_journal_get_write_access(handle, iloc.bh); 2052 + if (error) 2053 + goto cleanup; 2054 + 2147 2055 raw_inode = ext4_raw_inode(&iloc); 2148 2056 header = IHDR(inode, raw_inode); 2149 - for (entry = IFIRST(header); !IS_LAST_ENTRY(entry); 2150 - entry = EXT4_XATTR_NEXT(entry)) { 2151 - if (!entry->e_value_inum) 2152 - continue; 2153 - ea_ino = le32_to_cpu(entry->e_value_inum); 2154 - error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode); 2155 - if (error) 2156 - continue; 2157 - error = ext4_expand_inode_array(ea_inode_array, ea_inode); 2158 - if (error) { 2159 - iput(ea_inode); 2160 - brelse(iloc.bh); 2161 - goto cleanup; 2162 - } 2163 - entry->e_value_inum = 0; 2164 - } 2165 - brelse(iloc.bh); 2057 + ext4_xattr_inode_remove_all(handle, inode, iloc.bh, IFIRST(header), 2058 + false /* block_csum */, ea_inode_array, 2059 + extra_credits); 2166 2060 2167 2061 delete_external_ea: 2168 2062 if (!EXT4_I(inode)->i_file_acl) { 2169 - /* add xattr inode to orphan list */ 2170 - error = ext4_xattr_inode_orphan_add(handle, inode, credits, 2171 - *ea_inode_array); 2063 + error = 0; 2172 2064 goto cleanup; 2173 2065 } 2174 2066 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); ··· 2175 2089 goto cleanup; 2176 2090 } 2177 2091 2178 - for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry); 2179 - entry = EXT4_XATTR_NEXT(entry)) { 2180 - if (!entry->e_value_inum) 2181 - continue; 2182 - ea_ino = le32_to_cpu(entry->e_value_inum); 2183 - error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode); 2184 - if (error) 2185 - continue; 2186 - error = ext4_expand_inode_array(ea_inode_array, ea_inode); 2187 - if (error) 2188 - goto cleanup; 2189 - entry->e_value_inum = 0; 2190 - } 2191 - 2192 - /* add xattr inode to orphan list */ 2193 - error = ext4_xattr_inode_orphan_add(handle, inode, credits, 2194 - *ea_inode_array); 2195 - if (error) 2196 - goto cleanup; 2197 - 2198 - if (!IS_NOQUOTA(inode)) 2199 - credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb); 2200 - 2201 - if (!ext4_handle_has_enough_credits(handle, credits)) { 2202 - error = ext4_journal_extend(handle, credits); 2203 - if (error > 0) 2204 - error = ext4_journal_restart(handle, credits); 2092 + if (ext4_has_feature_ea_inode(inode->i_sb)) { 2093 + error = ext4_journal_get_write_access(handle, bh); 2205 2094 if (error) { 2206 - ext4_warning(inode->i_sb, 2207 - "couldn't extend journal (err %d)", error); 2095 + EXT4_ERROR_INODE(inode, "write access %llu", 2096 + EXT4_I(inode)->i_file_acl); 2208 2097 goto cleanup; 2209 2098 } 2099 + ext4_xattr_inode_remove_all(handle, inode, bh, 2100 + BFIRST(bh), 2101 + true /* block_csum */, 2102 + ea_inode_array, 2103 + extra_credits); 2210 2104 } 2211 2105 2212 2106 ext4_xattr_release_block(handle, inode, bh); 2107 + /* Update i_file_acl within the same transaction that releases block. */ 2213 2108 EXT4_I(inode)->i_file_acl = 0; 2214 - 2109 + error = ext4_mark_inode_dirty(handle, inode); 2110 + if (error) { 2111 + EXT4_ERROR_INODE(inode, "mark inode dirty (error %d)", 2112 + error); 2113 + goto cleanup; 2114 + } 2215 2115 cleanup: 2116 + brelse(iloc.bh); 2216 2117 brelse(bh); 2217 - 2218 2118 return error; 2219 2119 } 2220 2120
+2 -1
fs/ext4/xattr.h
··· 169 169 170 170 extern int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino); 171 171 extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, 172 - struct ext4_xattr_inode_array **array); 172 + struct ext4_xattr_inode_array **array, 173 + int extra_credits); 173 174 extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array); 174 175 175 176 extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,