Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
"Fix some bugs in converting ext4 to use the new mount API, as well as
more bug fixes and clean ups in the ext4 fast_commit feature (most
notably, in the tracepoints).

In the jbd2 layer, the t_handle_lock spinlock has been removed, with
the last place where it was actually needed replaced with an atomic
cmpxchg"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (35 commits)
ext4: fix kernel doc warnings
ext4: fix remaining two trace events to use same printk convention
ext4: add commit tid info in ext4_fc_commit_start/stop trace events
ext4: add commit_tid info in jbd debug log
ext4: add transaction tid info in fc_track events
ext4: add new trace event in ext4_fc_cleanup
ext4: return early for non-eligible fast_commit track events
ext4: do not call FC trace event in ext4_fc_commit() if FS does not support FC
ext4: convert ext4_fc_track_dentry type events to use event class
ext4: fix ext4_fc_stats trace point
ext4: remove unused enum EXT4_FC_COMMIT_FAILED
ext4: warn when dirtying page w/o buffers in data=journal mode
doc: fixed a typo in ext4 documentation
ext4: make mb_optimize_scan performance mount option work with extents
ext4: make mb_optimize_scan option work with set/unset mount cmd
ext4: don't BUG if someone dirty pages without asking ext4 first
ext4: remove redundant assignment to variable split_flag1
ext4: fix underflow in ext4_max_bitmap_size()
ext4: fix ext4_mb_clear_bb() kernel-doc comment
ext4: fix fs corruption when tring to remove a non-empty directory with IO error
...

+693 -410
+1 -1
Documentation/filesystems/ext4/blocks.rst
··· 39 39 - 4TiB 40 40 - 8TiB 41 41 - 16TiB 42 - - 256PiB 42 + - 256TiB 43 43 * - Blocks Per Block Group 44 44 - 8,192 45 45 - 16,384
+1
fs/ext4/balloc.c
··· 411 411 * ext4_read_block_bitmap_nowait() 412 412 * @sb: super block 413 413 * @block_group: given block group 414 + * @ignore_locked: ignore locked buffers 414 415 * 415 416 * Read the bitmap for a given block_group,and validate the 416 417 * bits for block/inode/inode tables are set in the bitmaps
+17 -9
fs/ext4/block_validity.c
··· 292 292 call_rcu(&system_blks->rcu, ext4_destroy_system_zone); 293 293 } 294 294 295 - /* 296 - * Returns 1 if the passed-in block region (start_blk, 297 - * start_blk+count) is valid; 0 if some part of the block region 298 - * overlaps with some other filesystem metadata blocks. 299 - */ 300 - int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, 301 - unsigned int count) 295 + int ext4_sb_block_valid(struct super_block *sb, struct inode *inode, 296 + ext4_fsblk_t start_blk, unsigned int count) 302 297 { 303 - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 298 + struct ext4_sb_info *sbi = EXT4_SB(sb); 304 299 struct ext4_system_blocks *system_blks; 305 300 struct ext4_system_zone *entry; 306 301 struct rb_node *n; ··· 324 329 else if (start_blk >= (entry->start_blk + entry->count)) 325 330 n = n->rb_right; 326 331 else { 327 - ret = (entry->ino == inode->i_ino); 332 + ret = 0; 333 + if (inode) 334 + ret = (entry->ino == inode->i_ino); 328 335 break; 329 336 } 330 337 } 331 338 out_rcu: 332 339 rcu_read_unlock(); 333 340 return ret; 341 + } 342 + 343 + /* 344 + * Returns 1 if the passed-in block region (start_blk, 345 + * start_blk+count) is valid; 0 if some part of the block region 346 + * overlaps with some other filesystem metadata blocks. 347 + */ 348 + int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, 349 + unsigned int count) 350 + { 351 + return ext4_sb_block_valid(inode->i_sb, inode, start_blk, count); 334 352 } 335 353 336 354 int ext4_check_blockref(const char *function, unsigned int line,
+6 -1
fs/ext4/ext4.h
··· 1046 1046 1047 1047 /* Fast commit related info */ 1048 1048 1049 + /* For tracking dentry create updates */ 1050 + struct list_head i_fc_dilist; 1049 1051 struct list_head i_fc_list; /* 1050 1052 * inodes that need fast commit 1051 1053 * protected by sbi->s_fc_lock. ··· 1281 1279 #define ext4_find_next_zero_bit find_next_zero_bit_le 1282 1280 #define ext4_find_next_bit find_next_bit_le 1283 1281 1284 - extern void ext4_set_bits(void *bm, int cur, int len); 1282 + extern void mb_set_bits(void *bm, int cur, int len); 1285 1283 1286 1284 /* 1287 1285 * Maximal mount counts between two filesystem checks ··· 3709 3707 unsigned int count); 3710 3708 extern int ext4_check_blockref(const char *, unsigned int, 3711 3709 struct inode *, __le32 *, unsigned int); 3710 + extern int ext4_sb_block_valid(struct super_block *sb, struct inode *inode, 3711 + ext4_fsblk_t start_blk, unsigned int count); 3712 + 3712 3713 3713 3714 /* extents.c */ 3714 3715 struct ext4_ext_path;
-1
fs/ext4/extents.c
··· 3368 3368 return -EFSCORRUPTED; 3369 3369 } 3370 3370 unwritten = ext4_ext_is_unwritten(ex); 3371 - split_flag1 = 0; 3372 3371 3373 3372 if (map->m_lblk >= ee_block) { 3374 3373 split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
+127 -46
fs/ext4/fast_commit.c
··· 199 199 ext4_fc_reset_inode(inode); 200 200 ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING); 201 201 INIT_LIST_HEAD(&ei->i_fc_list); 202 + INIT_LIST_HEAD(&ei->i_fc_dilist); 202 203 init_waitqueue_head(&ei->i_fc_wait); 203 204 atomic_set(&ei->i_fc_updates, 0); 204 205 } ··· 280 279 void ext4_fc_del(struct inode *inode) 281 280 { 282 281 struct ext4_inode_info *ei = EXT4_I(inode); 282 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 283 + struct ext4_fc_dentry_update *fc_dentry; 283 284 284 285 if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 285 286 (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) ··· 289 286 290 287 restart: 291 288 spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); 292 - if (list_empty(&ei->i_fc_list)) { 289 + if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) { 293 290 spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 294 291 return; 295 292 } ··· 298 295 ext4_fc_wait_committing_inode(inode); 299 296 goto restart; 300 297 } 301 - list_del_init(&ei->i_fc_list); 302 - spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 298 + 299 + if (!list_empty(&ei->i_fc_list)) 300 + list_del_init(&ei->i_fc_list); 301 + 302 + /* 303 + * Since this inode is getting removed, let's also remove all FC 304 + * dentry create references, since it is not needed to log it anyways. 305 + */ 306 + if (list_empty(&ei->i_fc_dilist)) { 307 + spin_unlock(&sbi->s_fc_lock); 308 + return; 309 + } 310 + 311 + fc_dentry = list_first_entry(&ei->i_fc_dilist, struct ext4_fc_dentry_update, fcd_dilist); 312 + WARN_ON(fc_dentry->fcd_op != EXT4_FC_TAG_CREAT); 313 + list_del_init(&fc_dentry->fcd_list); 314 + list_del_init(&fc_dentry->fcd_dilist); 315 + 316 + WARN_ON(!list_empty(&ei->i_fc_dilist)); 317 + spin_unlock(&sbi->s_fc_lock); 318 + 319 + if (fc_dentry->fcd_name.name && 320 + fc_dentry->fcd_name.len > DNAME_INLINE_LEN) 321 + kfree(fc_dentry->fcd_name.name); 322 + kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); 323 + 324 + return; 303 325 } 304 326 305 327 /* ··· 378 350 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 379 351 tid_t tid = 0; 380 352 int ret; 381 - 382 - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 383 - (sbi->s_mount_state & EXT4_FC_REPLAY)) 384 - return -EOPNOTSUPP; 385 - 386 - if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 387 - return -EINVAL; 388 353 389 354 tid = handle->h_transaction->t_tid; 390 355 mutex_lock(&ei->i_fc_lock); ··· 448 427 node->fcd_name.name = node->fcd_iname; 449 428 } 450 429 node->fcd_name.len = dentry->d_name.len; 451 - 430 + INIT_LIST_HEAD(&node->fcd_dilist); 452 431 spin_lock(&sbi->s_fc_lock); 453 432 if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || 454 433 sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ··· 456 435 &sbi->s_fc_dentry_q[FC_Q_STAGING]); 457 436 else 458 437 list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]); 438 + 439 + /* 440 + * This helps us keep a track of all fc_dentry updates which is part of 441 + * this ext4 inode. So in case the inode is getting unlinked, before 442 + * even we get a chance to fsync, we could remove all fc_dentry 443 + * references while evicting the inode in ext4_fc_del(). 444 + * Also with this, we don't need to loop over all the inodes in 445 + * sbi->s_fc_q to get the corresponding inode in 446 + * ext4_fc_commit_dentry_updates(). 447 + */ 448 + if (dentry_update->op == EXT4_FC_TAG_CREAT) { 449 + WARN_ON(!list_empty(&ei->i_fc_dilist)); 450 + list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist); 451 + } 459 452 spin_unlock(&sbi->s_fc_lock); 460 453 mutex_lock(&ei->i_fc_lock); 461 454 ··· 487 452 488 453 ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 489 454 (void *)&args, 0); 490 - trace_ext4_fc_track_unlink(inode, dentry, ret); 455 + trace_ext4_fc_track_unlink(handle, inode, dentry, ret); 491 456 } 492 457 493 458 void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry) 494 459 { 495 - __ext4_fc_track_unlink(handle, d_inode(dentry), dentry); 460 + struct inode *inode = d_inode(dentry); 461 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 462 + 463 + if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 464 + (sbi->s_mount_state & EXT4_FC_REPLAY)) 465 + return; 466 + 467 + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 468 + return; 469 + 470 + __ext4_fc_track_unlink(handle, inode, dentry); 496 471 } 497 472 498 473 void __ext4_fc_track_link(handle_t *handle, ··· 516 471 517 472 ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 518 473 (void *)&args, 0); 519 - trace_ext4_fc_track_link(inode, dentry, ret); 474 + trace_ext4_fc_track_link(handle, inode, dentry, ret); 520 475 } 521 476 522 477 void ext4_fc_track_link(handle_t *handle, struct dentry *dentry) 523 478 { 524 - __ext4_fc_track_link(handle, d_inode(dentry), dentry); 479 + struct inode *inode = d_inode(dentry); 480 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 481 + 482 + if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 483 + (sbi->s_mount_state & EXT4_FC_REPLAY)) 484 + return; 485 + 486 + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 487 + return; 488 + 489 + __ext4_fc_track_link(handle, inode, dentry); 525 490 } 526 491 527 492 void __ext4_fc_track_create(handle_t *handle, struct inode *inode, ··· 545 490 546 491 ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 547 492 (void *)&args, 0); 548 - trace_ext4_fc_track_create(inode, dentry, ret); 493 + trace_ext4_fc_track_create(handle, inode, dentry, ret); 549 494 } 550 495 551 496 void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) 552 497 { 553 - __ext4_fc_track_create(handle, d_inode(dentry), dentry); 498 + struct inode *inode = d_inode(dentry); 499 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 500 + 501 + if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 502 + (sbi->s_mount_state & EXT4_FC_REPLAY)) 503 + return; 504 + 505 + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 506 + return; 507 + 508 + __ext4_fc_track_create(handle, inode, dentry); 554 509 } 555 510 556 511 /* __track_fn for inode tracking */ ··· 576 511 577 512 void ext4_fc_track_inode(handle_t *handle, struct inode *inode) 578 513 { 514 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 579 515 int ret; 580 516 581 517 if (S_ISDIR(inode->i_mode)) ··· 588 522 return; 589 523 } 590 524 525 + if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 526 + (sbi->s_mount_state & EXT4_FC_REPLAY)) 527 + return; 528 + 529 + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 530 + return; 531 + 591 532 ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1); 592 - trace_ext4_fc_track_inode(inode, ret); 533 + trace_ext4_fc_track_inode(handle, inode, ret); 593 534 } 594 535 595 536 struct __track_range_args { ··· 634 561 void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start, 635 562 ext4_lblk_t end) 636 563 { 564 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 637 565 struct __track_range_args args; 638 566 int ret; 639 567 640 568 if (S_ISDIR(inode->i_mode)) 569 + return; 570 + 571 + if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 572 + (sbi->s_mount_state & EXT4_FC_REPLAY)) 573 + return; 574 + 575 + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 641 576 return; 642 577 643 578 args.start = start; ··· 653 572 654 573 ret = ext4_fc_track_template(handle, inode, __track_range, &args, 1); 655 574 656 - trace_ext4_fc_track_range(inode, start, end, ret); 575 + trace_ext4_fc_track_range(handle, inode, start, end, ret); 657 576 } 658 577 659 578 static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) ··· 1035 954 struct ext4_sb_info *sbi = EXT4_SB(sb); 1036 955 struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n; 1037 956 struct inode *inode; 1038 - struct ext4_inode_info *ei, *ei_n; 957 + struct ext4_inode_info *ei; 1039 958 int ret; 1040 959 1041 960 if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) ··· 1051 970 spin_lock(&sbi->s_fc_lock); 1052 971 continue; 1053 972 } 1054 - 1055 - inode = NULL; 1056 - list_for_each_entry_safe(ei, ei_n, &sbi->s_fc_q[FC_Q_MAIN], 1057 - i_fc_list) { 1058 - if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) { 1059 - inode = &ei->vfs_inode; 1060 - break; 1061 - } 1062 - } 1063 973 /* 1064 - * If we don't find inode in our list, then it was deleted, 1065 - * in which case, we don't need to record it's create tag. 974 + * With fcd_dilist we need not loop in sbi->s_fc_q to get the 975 + * corresponding inode pointer 1066 976 */ 1067 - if (!inode) 1068 - continue; 977 + WARN_ON(list_empty(&fc_dentry->fcd_dilist)); 978 + ei = list_first_entry(&fc_dentry->fcd_dilist, 979 + struct ext4_inode_info, i_fc_dilist); 980 + inode = &ei->vfs_inode; 981 + WARN_ON(inode->i_ino != fc_dentry->fcd_ino); 982 + 1069 983 spin_unlock(&sbi->s_fc_lock); 1070 984 1071 985 /* ··· 1164 1088 } 1165 1089 1166 1090 static void ext4_fc_update_stats(struct super_block *sb, int status, 1167 - u64 commit_time, int nblks) 1091 + u64 commit_time, int nblks, tid_t commit_tid) 1168 1092 { 1169 1093 struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats; 1170 1094 1171 - jbd_debug(1, "Fast commit ended with status = %d", status); 1095 + jbd_debug(1, "Fast commit ended with status = %d for tid %u", 1096 + status, commit_tid); 1172 1097 if (status == EXT4_FC_STATUS_OK) { 1173 1098 stats->fc_num_commits++; 1174 1099 stats->fc_numblks += nblks; ··· 1187 1110 } else { 1188 1111 stats->fc_skipped_commits++; 1189 1112 } 1190 - trace_ext4_fc_commit_stop(sb, nblks, status); 1113 + trace_ext4_fc_commit_stop(sb, nblks, status, commit_tid); 1191 1114 } 1192 1115 1193 1116 /* ··· 1205 1128 int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0; 1206 1129 ktime_t start_time, commit_time; 1207 1130 1208 - trace_ext4_fc_commit_start(sb); 1209 - 1210 - start_time = ktime_get(); 1211 - 1212 1131 if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) 1213 1132 return jbd2_complete_transaction(journal, commit_tid); 1133 + 1134 + trace_ext4_fc_commit_start(sb, commit_tid); 1135 + 1136 + start_time = ktime_get(); 1214 1137 1215 1138 restart_fc: 1216 1139 ret = jbd2_fc_begin_commit(journal, commit_tid); ··· 1219 1142 if (atomic_read(&sbi->s_fc_subtid) <= subtid && 1220 1143 commit_tid > journal->j_commit_sequence) 1221 1144 goto restart_fc; 1222 - ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0); 1145 + ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0, 1146 + commit_tid); 1223 1147 return 0; 1224 1148 } else if (ret) { 1225 1149 /* 1226 1150 * Commit couldn't start. Just update stats and perform a 1227 1151 * full commit. 1228 1152 */ 1229 - ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0); 1153 + ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0, 1154 + commit_tid); 1230 1155 return jbd2_complete_transaction(journal, commit_tid); 1231 1156 } 1232 1157 ··· 1260 1181 * don't react too strongly to vast changes in the commit time 1261 1182 */ 1262 1183 commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); 1263 - ext4_fc_update_stats(sb, status, commit_time, nblks); 1184 + ext4_fc_update_stats(sb, status, commit_time, nblks, commit_tid); 1264 1185 return ret; 1265 1186 1266 1187 fallback: 1267 1188 ret = jbd2_fc_end_commit_fallback(journal); 1268 - ext4_fc_update_stats(sb, status, 0, 0); 1189 + ext4_fc_update_stats(sb, status, 0, 0, commit_tid); 1269 1190 return ret; 1270 1191 } 1271 1192 ··· 1283 1204 if (full && sbi->s_fc_bh) 1284 1205 sbi->s_fc_bh = NULL; 1285 1206 1207 + trace_ext4_fc_cleanup(journal, full, tid); 1286 1208 jbd2_fc_release_bufs(journal); 1287 1209 1288 1210 spin_lock(&sbi->s_fc_lock); ··· 1308 1228 struct ext4_fc_dentry_update, 1309 1229 fcd_list); 1310 1230 list_del_init(&fc_dentry->fcd_list); 1231 + list_del_init(&fc_dentry->fcd_dilist); 1311 1232 spin_unlock(&sbi->s_fc_lock); 1312 1233 1313 1234 if (fc_dentry->fcd_name.name && ··· 1956 1875 if (state->fc_regions[i].ino == 0 || 1957 1876 state->fc_regions[i].len == 0) 1958 1877 continue; 1959 - if (blk >= state->fc_regions[i].pblk && 1960 - blk < state->fc_regions[i].pblk + state->fc_regions[i].len) 1878 + if (in_range(blk, state->fc_regions[i].pblk, 1879 + state->fc_regions[i].len)) 1961 1880 return true; 1962 1881 } 1963 1882 return false;
+1 -1
fs/ext4/fast_commit.h
··· 93 93 EXT4_FC_REASON_RENAME_DIR, 94 94 EXT4_FC_REASON_FALLOC_RANGE, 95 95 EXT4_FC_REASON_INODE_JOURNAL_DATA, 96 - EXT4_FC_COMMIT_FAILED, 97 96 EXT4_FC_REASON_MAX 98 97 }; 99 98 ··· 108 109 struct qstr fcd_name; /* Dirent name */ 109 110 unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */ 110 111 struct list_head fcd_list; 112 + struct list_head fcd_dilist; 111 113 }; 112 114 113 115 struct ext4_fc_stats {
+4 -5
fs/ext4/inline.c
··· 1783 1783 void *inline_pos; 1784 1784 unsigned int offset; 1785 1785 struct ext4_dir_entry_2 *de; 1786 - bool ret = true; 1786 + bool ret = false; 1787 1787 1788 1788 err = ext4_get_inode_loc(dir, &iloc); 1789 1789 if (err) { 1790 1790 EXT4_ERROR_INODE_ERR(dir, -err, 1791 1791 "error %d getting inode %lu block", 1792 1792 err, dir->i_ino); 1793 - return true; 1793 + return false; 1794 1794 } 1795 1795 1796 1796 down_read(&EXT4_I(dir)->xattr_sem); 1797 1797 if (!ext4_has_inline_data(dir)) { 1798 1798 *has_inline_data = 0; 1799 + ret = true; 1799 1800 goto out; 1800 1801 } 1801 1802 ··· 1805 1804 ext4_warning(dir->i_sb, 1806 1805 "bad inline directory (dir #%lu) - no `..'", 1807 1806 dir->i_ino); 1808 - ret = true; 1809 1807 goto out; 1810 1808 } 1811 1809 ··· 1823 1823 dir->i_ino, le32_to_cpu(de->inode), 1824 1824 le16_to_cpu(de->rec_len), de->name_len, 1825 1825 inline_size); 1826 - ret = true; 1827 1826 goto out; 1828 1827 } 1829 1828 if (le32_to_cpu(de->inode)) { 1830 - ret = false; 1831 1829 goto out; 1832 1830 } 1833 1831 offset += ext4_rec_len_from_disk(de->rec_len, inline_size); 1834 1832 } 1835 1833 1834 + ret = true; 1836 1835 out: 1837 1836 up_read(&EXT4_I(dir)->xattr_sem); 1838 1837 brelse(iloc.bh);
+31 -4
fs/ext4/inode.c
··· 1993 1993 else 1994 1994 len = PAGE_SIZE; 1995 1995 1996 + /* Should never happen but for bugs in other kernel subsystems */ 1997 + if (!page_has_buffers(page)) { 1998 + ext4_warning_inode(inode, 1999 + "page %lu does not have buffers attached", page->index); 2000 + ClearPageDirty(page); 2001 + unlock_page(page); 2002 + return 0; 2003 + } 2004 + 1996 2005 page_bufs = page_buffers(page); 1997 2006 /* 1998 2007 * We cannot do block allocation or other extent handling in this ··· 2602 2593 2603 2594 wait_on_page_writeback(page); 2604 2595 BUG_ON(PageWriteback(page)); 2596 + 2597 + /* 2598 + * Should never happen but for buggy code in 2599 + * other subsystems that call 2600 + * set_page_dirty() without properly warning 2601 + * the file system first. See [1] for more 2602 + * information. 2603 + * 2604 + * [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz 2605 + */ 2606 + if (!page_has_buffers(page)) { 2607 + ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index); 2608 + ClearPageDirty(page); 2609 + unlock_page(page); 2610 + continue; 2611 + } 2605 2612 2606 2613 if (mpd->map.m_len == 0) 2607 2614 mpd->first_page = page->index; ··· 3573 3548 }; 3574 3549 3575 3550 /* 3576 - * Pages can be marked dirty completely asynchronously from ext4's journalling 3577 - * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do 3578 - * much here because ->set_page_dirty is called under VFS locks. The page is 3579 - * not necessarily locked. 3551 + * Whenever the page is being dirtied, corresponding buffers should already be 3552 + * attached to the transaction (we take care of this in ext4_page_mkwrite() and 3553 + * ext4_write_begin()). However we cannot move buffers to dirty transaction 3554 + * lists here because ->set_page_dirty is called under VFS locks and the page 3555 + * is not necessarily locked. 3580 3556 * 3581 3557 * We cannot just dirty the page and leave attached buffers clean, because the 3582 3558 * buffers' dirty state is "definitive". We cannot just set the buffers dirty ··· 3588 3562 */ 3589 3563 static int ext4_journalled_set_page_dirty(struct page *page) 3590 3564 { 3565 + WARN_ON_ONCE(!page_has_buffers(page)); 3591 3566 SetPageChecked(page); 3592 3567 return __set_page_dirty_nobuffers(page); 3593 3568 }
+3 -3
fs/ext4/ioctl.c
··· 269 269 return err ? err : 0; 270 270 } 271 271 272 - /** 272 + /* 273 273 * Swap memory between @a and @b for @len bytes. 274 274 * 275 275 * @a: pointer to first memory area ··· 290 290 } 291 291 } 292 292 293 - /** 293 + /* 294 294 * Swap i_data and associated attributes between @inode1 and @inode2. 295 295 * This function is used for the primary swap between inode1 and inode2 296 296 * and also to revert this primary swap in case of errors. ··· 344 344 ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, sizeof(gen)); 345 345 } 346 346 347 - /** 347 + /* 348 348 * Swap the information from the given @inode and the inode 349 349 * EXT4_BOOT_LOADER_INO. It will basically swap i_data and all other 350 350 * important fields of the inodes.
+203 -156
fs/ext4/mballoc.c
··· 1000 1000 return 0; 1001 1001 if (ac->ac_criteria >= 2) 1002 1002 return 0; 1003 - if (ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) 1003 + if (!ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) 1004 1004 return 0; 1005 1005 return 1; 1006 1006 } ··· 1689 1689 return zero_bit; 1690 1690 } 1691 1691 1692 - void ext4_set_bits(void *bm, int cur, int len) 1692 + void mb_set_bits(void *bm, int cur, int len) 1693 1693 { 1694 1694 __u32 *addr; 1695 1695 ··· 1996 1996 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); 1997 1997 1998 1998 mb_update_avg_fragment_size(e4b->bd_sb, e4b->bd_info); 1999 - ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0); 1999 + mb_set_bits(e4b->bd_bitmap, ex->fe_start, len0); 2000 2000 mb_check_buddy(e4b); 2001 2001 2002 2002 return ret; ··· 3825 3825 * We leak some of the blocks here. 3826 3826 */ 3827 3827 ext4_lock_group(sb, ac->ac_b_ex.fe_group); 3828 - ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, 3828 + mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, 3829 3829 ac->ac_b_ex.fe_len); 3830 3830 ext4_unlock_group(sb, ac->ac_b_ex.fe_group); 3831 3831 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); ··· 3844 3844 } 3845 3845 } 3846 3846 #endif 3847 - ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, 3847 + mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, 3848 3848 ac->ac_b_ex.fe_len); 3849 3849 if (ext4_has_group_desc_csum(sb) && 3850 3850 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { ··· 3899 3899 struct ext4_sb_info *sbi = EXT4_SB(sb); 3900 3900 ext4_group_t group; 3901 3901 ext4_grpblk_t blkoff; 3902 - int i, clen, err; 3902 + int i, err; 3903 3903 int already; 3904 + unsigned int clen, clen_changed, thisgrp_len; 3904 3905 3905 - clen = EXT4_B2C(sbi, len); 3906 + while (len > 0) { 3907 + ext4_get_group_no_and_offset(sb, block, &group, &blkoff); 3906 3908 3907 - ext4_get_group_no_and_offset(sb, block, &group, &blkoff); 3908 - bitmap_bh = ext4_read_block_bitmap(sb, group); 3909 - if (IS_ERR(bitmap_bh)) { 3910 - err = PTR_ERR(bitmap_bh); 3911 - bitmap_bh = NULL; 3912 - goto out_err; 3909 + /* 3910 + * Check to see if we are freeing blocks across a group 3911 + * boundary. 3912 + * In case of flex_bg, this can happen that (block, len) may 3913 + * span across more than one group. In that case we need to 3914 + * get the corresponding group metadata to work with. 3915 + * For this we have goto again loop. 3916 + */ 3917 + thisgrp_len = min_t(unsigned int, (unsigned int)len, 3918 + EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff)); 3919 + clen = EXT4_NUM_B2C(sbi, thisgrp_len); 3920 + 3921 + if (!ext4_sb_block_valid(sb, NULL, block, thisgrp_len)) { 3922 + ext4_error(sb, "Marking blocks in system zone - " 3923 + "Block = %llu, len = %u", 3924 + block, thisgrp_len); 3925 + bitmap_bh = NULL; 3926 + break; 3927 + } 3928 + 3929 + bitmap_bh = ext4_read_block_bitmap(sb, group); 3930 + if (IS_ERR(bitmap_bh)) { 3931 + err = PTR_ERR(bitmap_bh); 3932 + bitmap_bh = NULL; 3933 + break; 3934 + } 3935 + 3936 + err = -EIO; 3937 + gdp = ext4_get_group_desc(sb, group, &gdp_bh); 3938 + if (!gdp) 3939 + break; 3940 + 3941 + ext4_lock_group(sb, group); 3942 + already = 0; 3943 + for (i = 0; i < clen; i++) 3944 + if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == 3945 + !state) 3946 + already++; 3947 + 3948 + clen_changed = clen - already; 3949 + if (state) 3950 + mb_set_bits(bitmap_bh->b_data, blkoff, clen); 3951 + else 3952 + mb_clear_bits(bitmap_bh->b_data, blkoff, clen); 3953 + if (ext4_has_group_desc_csum(sb) && 3954 + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { 3955 + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 3956 + ext4_free_group_clusters_set(sb, gdp, 3957 + ext4_free_clusters_after_init(sb, group, gdp)); 3958 + } 3959 + if (state) 3960 + clen = ext4_free_group_clusters(sb, gdp) - clen_changed; 3961 + else 3962 + clen = ext4_free_group_clusters(sb, gdp) + clen_changed; 3963 + 3964 + ext4_free_group_clusters_set(sb, gdp, clen); 3965 + ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); 3966 + ext4_group_desc_csum_set(sb, group, gdp); 3967 + 3968 + ext4_unlock_group(sb, group); 3969 + 3970 + if (sbi->s_log_groups_per_flex) { 3971 + ext4_group_t flex_group = ext4_flex_group(sbi, group); 3972 + struct flex_groups *fg = sbi_array_rcu_deref(sbi, 3973 + s_flex_groups, flex_group); 3974 + 3975 + if (state) 3976 + atomic64_sub(clen_changed, &fg->free_clusters); 3977 + else 3978 + atomic64_add(clen_changed, &fg->free_clusters); 3979 + 3980 + } 3981 + 3982 + err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); 3983 + if (err) 3984 + break; 3985 + sync_dirty_buffer(bitmap_bh); 3986 + err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); 3987 + sync_dirty_buffer(gdp_bh); 3988 + if (err) 3989 + break; 3990 + 3991 + block += thisgrp_len; 3992 + len -= thisgrp_len; 3993 + brelse(bitmap_bh); 3994 + BUG_ON(len < 0); 3913 3995 } 3914 3996 3915 - err = -EIO; 3916 - gdp = ext4_get_group_desc(sb, group, &gdp_bh); 3917 - if (!gdp) 3918 - goto out_err; 3919 - 3920 - ext4_lock_group(sb, group); 3921 - already = 0; 3922 - for (i = 0; i < clen; i++) 3923 - if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state) 3924 - already++; 3925 - 3926 - if (state) 3927 - ext4_set_bits(bitmap_bh->b_data, blkoff, clen); 3928 - else 3929 - mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen); 3930 - if (ext4_has_group_desc_csum(sb) && 3931 - (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { 3932 - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 3933 - ext4_free_group_clusters_set(sb, gdp, 3934 - ext4_free_clusters_after_init(sb, 3935 - group, gdp)); 3936 - } 3937 - if (state) 3938 - clen = ext4_free_group_clusters(sb, gdp) - clen + already; 3939 - else 3940 - clen = ext4_free_group_clusters(sb, gdp) + clen - already; 3941 - 3942 - ext4_free_group_clusters_set(sb, gdp, clen); 3943 - ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); 3944 - ext4_group_desc_csum_set(sb, group, gdp); 3945 - 3946 - ext4_unlock_group(sb, group); 3947 - 3948 - if (sbi->s_log_groups_per_flex) { 3949 - ext4_group_t flex_group = ext4_flex_group(sbi, group); 3950 - 3951 - atomic64_sub(len, 3952 - &sbi_array_rcu_deref(sbi, s_flex_groups, 3953 - flex_group)->free_clusters); 3954 - } 3955 - 3956 - err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); 3957 3997 if (err) 3958 - goto out_err; 3959 - sync_dirty_buffer(bitmap_bh); 3960 - err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); 3961 - sync_dirty_buffer(gdp_bh); 3962 - 3963 - out_err: 3964 - brelse(bitmap_bh); 3998 + brelse(bitmap_bh); 3965 3999 } 3966 4000 3967 4001 /* ··· 4467 4433 4468 4434 while (n) { 4469 4435 entry = rb_entry(n, struct ext4_free_data, efd_node); 4470 - ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count); 4436 + mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count); 4471 4437 n = rb_next(n); 4472 4438 } 4473 4439 return; ··· 4508 4474 if (unlikely(len == 0)) 4509 4475 continue; 4510 4476 BUG_ON(groupnr != group); 4511 - ext4_set_bits(bitmap, start, len); 4477 + mb_set_bits(bitmap, start, len); 4512 4478 preallocated += len; 4513 4479 } 4514 4480 mb_debug(sb, "preallocated %d for group %u\n", preallocated, group); ··· 5880 5846 } 5881 5847 5882 5848 /** 5883 - * ext4_free_blocks() -- Free given blocks and update quota 5849 + * ext4_mb_clear_bb() -- helper function for freeing blocks. 5850 + * Used by ext4_free_blocks() 5884 5851 * @handle: handle for this transaction 5885 5852 * @inode: inode 5886 - * @bh: optional buffer of the block to be freed 5887 5853 * @block: starting physical block to be freed 5888 5854 * @count: number of blocks to be freed 5889 5855 * @flags: flags used by ext4_free_blocks 5890 5856 */ 5891 - void ext4_free_blocks(handle_t *handle, struct inode *inode, 5892 - struct buffer_head *bh, ext4_fsblk_t block, 5893 - unsigned long count, int flags) 5857 + static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, 5858 + ext4_fsblk_t block, unsigned long count, 5859 + int flags) 5894 5860 { 5895 5861 struct buffer_head *bitmap_bh = NULL; 5896 5862 struct super_block *sb = inode->i_sb; ··· 5906 5872 int ret; 5907 5873 5908 5874 sbi = EXT4_SB(sb); 5909 - 5910 - if (sbi->s_mount_state & EXT4_FC_REPLAY) { 5911 - ext4_free_blocks_simple(inode, block, count); 5912 - return; 5913 - } 5914 - 5915 - might_sleep(); 5916 - if (bh) { 5917 - if (block) 5918 - BUG_ON(block != bh->b_blocknr); 5919 - else 5920 - block = bh->b_blocknr; 5921 - } 5922 - 5923 - if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && 5924 - !ext4_inode_block_valid(inode, block, count)) { 5925 - ext4_error(sb, "Freeing blocks not in datazone - " 5926 - "block = %llu, count = %lu", block, count); 5927 - goto error_return; 5928 - } 5929 - 5930 - ext4_debug("freeing block %llu\n", block); 5931 - trace_ext4_free_blocks(inode, block, count, flags); 5932 - 5933 - if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { 5934 - BUG_ON(count > 1); 5935 - 5936 - ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, 5937 - inode, bh, block); 5938 - } 5939 - 5940 - /* 5941 - * If the extent to be freed does not begin on a cluster 5942 - * boundary, we need to deal with partial clusters at the 5943 - * beginning and end of the extent. Normally we will free 5944 - * blocks at the beginning or the end unless we are explicitly 5945 - * requested to avoid doing so. 5946 - */ 5947 - overflow = EXT4_PBLK_COFF(sbi, block); 5948 - if (overflow) { 5949 - if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { 5950 - overflow = sbi->s_cluster_ratio - overflow; 5951 - block += overflow; 5952 - if (count > overflow) 5953 - count -= overflow; 5954 - else 5955 - return; 5956 - } else { 5957 - block -= overflow; 5958 - count += overflow; 5959 - } 5960 - } 5961 - overflow = EXT4_LBLK_COFF(sbi, count); 5962 - if (overflow) { 5963 - if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { 5964 - if (count > overflow) 5965 - count -= overflow; 5966 - else 5967 - return; 5968 - } else 5969 - count += sbi->s_cluster_ratio - overflow; 5970 - } 5971 - 5972 - if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { 5973 - int i; 5974 - int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA; 5975 - 5976 - for (i = 0; i < count; i++) { 5977 - cond_resched(); 5978 - if (is_metadata) 5979 - bh = sb_find_get_block(inode->i_sb, block + i); 5980 - ext4_forget(handle, is_metadata, inode, bh, block + i); 5981 - } 5982 - } 5983 5875 5984 5876 do_more: 5985 5877 overflow = 0; ··· 5937 5977 goto error_return; 5938 5978 } 5939 5979 5940 - if (in_range(ext4_block_bitmap(sb, gdp), block, count) || 5941 - in_range(ext4_inode_bitmap(sb, gdp), block, count) || 5942 - in_range(block, ext4_inode_table(sb, gdp), 5943 - sbi->s_itb_per_group) || 5944 - in_range(block + count - 1, ext4_inode_table(sb, gdp), 5945 - sbi->s_itb_per_group)) { 5946 - 5980 + if (!ext4_inode_block_valid(inode, block, count)) { 5947 5981 ext4_error(sb, "Freeing blocks in system zone - " 5948 5982 "Block = %llu, count = %lu", block, count); 5949 5983 /* err = 0. ext4_std_error should be a no op */ ··· 6008 6054 NULL); 6009 6055 if (err && err != -EOPNOTSUPP) 6010 6056 ext4_msg(sb, KERN_WARNING, "discard request in" 6011 - " group:%d block:%d count:%lu failed" 6057 + " group:%u block:%d count:%lu failed" 6012 6058 " with %d", block_group, bit, count, 6013 6059 err); 6014 6060 } else ··· 6069 6115 } 6070 6116 6071 6117 /** 6118 + * ext4_free_blocks() -- Free given blocks and update quota 6119 + * @handle: handle for this transaction 6120 + * @inode: inode 6121 + * @bh: optional buffer of the block to be freed 6122 + * @block: starting physical block to be freed 6123 + * @count: number of blocks to be freed 6124 + * @flags: flags used by ext4_free_blocks 6125 + */ 6126 + void ext4_free_blocks(handle_t *handle, struct inode *inode, 6127 + struct buffer_head *bh, ext4_fsblk_t block, 6128 + unsigned long count, int flags) 6129 + { 6130 + struct super_block *sb = inode->i_sb; 6131 + unsigned int overflow; 6132 + struct ext4_sb_info *sbi; 6133 + 6134 + sbi = EXT4_SB(sb); 6135 + 6136 + if (sbi->s_mount_state & EXT4_FC_REPLAY) { 6137 + ext4_free_blocks_simple(inode, block, count); 6138 + return; 6139 + } 6140 + 6141 + might_sleep(); 6142 + if (bh) { 6143 + if (block) 6144 + BUG_ON(block != bh->b_blocknr); 6145 + else 6146 + block = bh->b_blocknr; 6147 + } 6148 + 6149 + if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && 6150 + !ext4_inode_block_valid(inode, block, count)) { 6151 + ext4_error(sb, "Freeing blocks not in datazone - " 6152 + "block = %llu, count = %lu", block, count); 6153 + return; 6154 + } 6155 + 6156 + ext4_debug("freeing block %llu\n", block); 6157 + trace_ext4_free_blocks(inode, block, count, flags); 6158 + 6159 + if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { 6160 + BUG_ON(count > 1); 6161 + 6162 + ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, 6163 + inode, bh, block); 6164 + } 6165 + 6166 + /* 6167 + * If the extent to be freed does not begin on a cluster 6168 + * boundary, we need to deal with partial clusters at the 6169 + * beginning and end of the extent. Normally we will free 6170 + * blocks at the beginning or the end unless we are explicitly 6171 + * requested to avoid doing so. 6172 + */ 6173 + overflow = EXT4_PBLK_COFF(sbi, block); 6174 + if (overflow) { 6175 + if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { 6176 + overflow = sbi->s_cluster_ratio - overflow; 6177 + block += overflow; 6178 + if (count > overflow) 6179 + count -= overflow; 6180 + else 6181 + return; 6182 + } else { 6183 + block -= overflow; 6184 + count += overflow; 6185 + } 6186 + } 6187 + overflow = EXT4_LBLK_COFF(sbi, count); 6188 + if (overflow) { 6189 + if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { 6190 + if (count > overflow) 6191 + count -= overflow; 6192 + else 6193 + return; 6194 + } else 6195 + count += sbi->s_cluster_ratio - overflow; 6196 + } 6197 + 6198 + if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { 6199 + int i; 6200 + int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA; 6201 + 6202 + for (i = 0; i < count; i++) { 6203 + cond_resched(); 6204 + if (is_metadata) 6205 + bh = sb_find_get_block(inode->i_sb, block + i); 6206 + ext4_forget(handle, is_metadata, inode, bh, block + i); 6207 + } 6208 + } 6209 + 6210 + ext4_mb_clear_bb(handle, inode, block, count, flags); 6211 + return; 6212 + } 6213 + 6214 + /** 6072 6215 * ext4_group_add_blocks() -- Add given blocks to an existing group 6073 6216 * @handle: handle to this transaction 6074 6217 * @sb: super block ··· 6221 6170 goto error_return; 6222 6171 } 6223 6172 6224 - if (in_range(ext4_block_bitmap(sb, desc), block, count) || 6225 - in_range(ext4_inode_bitmap(sb, desc), block, count) || 6226 - in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || 6227 - in_range(block + count - 1, ext4_inode_table(sb, desc), 6228 - sbi->s_itb_per_group)) { 6173 + if (!ext4_sb_block_valid(sb, NULL, block, count)) { 6229 6174 ext4_error(sb, "Adding blocks in system zones - " 6230 6175 "Block = %llu, count = %lu", 6231 6176 block, count);
+16 -9
fs/ext4/namei.c
··· 2997 2997 if (inode->i_size < ext4_dir_rec_len(1, NULL) + 2998 2998 ext4_dir_rec_len(2, NULL)) { 2999 2999 EXT4_ERROR_INODE(inode, "invalid size"); 3000 - return true; 3000 + return false; 3001 3001 } 3002 3002 /* The first directory block must not be a hole, 3003 3003 * so treat it as DIRENT_HTREE 3004 3004 */ 3005 3005 bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); 3006 3006 if (IS_ERR(bh)) 3007 - return true; 3007 + return false; 3008 3008 3009 3009 de = (struct ext4_dir_entry_2 *) bh->b_data; 3010 3010 if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, ··· 3012 3012 le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) { 3013 3013 ext4_warning_inode(inode, "directory missing '.'"); 3014 3014 brelse(bh); 3015 - return true; 3015 + return false; 3016 3016 } 3017 3017 offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); 3018 3018 de = ext4_next_entry(de, sb->s_blocksize); ··· 3021 3021 le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { 3022 3022 ext4_warning_inode(inode, "directory missing '..'"); 3023 3023 brelse(bh); 3024 - return true; 3024 + return false; 3025 3025 } 3026 3026 offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); 3027 3027 while (offset < inode->i_size) { ··· 3035 3035 continue; 3036 3036 } 3037 3037 if (IS_ERR(bh)) 3038 - return true; 3038 + return false; 3039 3039 } 3040 3040 de = (struct ext4_dir_entry_2 *) (bh->b_data + 3041 3041 (offset & (sb->s_blocksize - 1))); ··· 3891 3891 ext4_fc_mark_ineligible(old.inode->i_sb, 3892 3892 EXT4_FC_REASON_RENAME_DIR, handle); 3893 3893 } else { 3894 + struct super_block *sb = old.inode->i_sb; 3895 + 3894 3896 if (new.inode) 3895 3897 ext4_fc_track_unlink(handle, new.dentry); 3896 - __ext4_fc_track_link(handle, old.inode, new.dentry); 3897 - __ext4_fc_track_unlink(handle, old.inode, old.dentry); 3898 - if (whiteout) 3899 - __ext4_fc_track_create(handle, whiteout, old.dentry); 3898 + if (test_opt2(sb, JOURNAL_FAST_COMMIT) && 3899 + !(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) && 3900 + !(ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE))) { 3901 + __ext4_fc_track_link(handle, old.inode, new.dentry); 3902 + __ext4_fc_track_unlink(handle, old.inode, old.dentry); 3903 + if (whiteout) 3904 + __ext4_fc_track_create(handle, whiteout, 3905 + old.dentry); 3906 + } 3900 3907 } 3901 3908 3902 3909 if (new.inode) {
+4 -3
fs/ext4/resize.c
··· 14 14 15 15 #include <linux/errno.h> 16 16 #include <linux/slab.h> 17 + #include <linux/jiffies.h> 17 18 18 19 #include "ext4_jbd2.h" 19 20 ··· 484 483 } 485 484 ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", 486 485 first_cluster, first_cluster - start, count2); 487 - ext4_set_bits(bh->b_data, first_cluster - start, count2); 486 + mb_set_bits(bh->b_data, first_cluster - start, count2); 488 487 489 488 err = ext4_handle_dirty_metadata(handle, NULL, bh); 490 489 brelse(bh); ··· 633 632 if (overhead != 0) { 634 633 ext4_debug("mark backup superblock %#04llx (+0)\n", 635 634 start); 636 - ext4_set_bits(bh->b_data, 0, 635 + mb_set_bits(bh->b_data, 0, 637 636 EXT4_NUM_B2C(sbi, overhead)); 638 637 } 639 638 ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count), ··· 2101 2100 */ 2102 2101 while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, 2103 2102 flexbg_size)) { 2104 - if (jiffies - last_update_time > HZ * 10) { 2103 + if (time_is_before_jiffies(last_update_time + HZ * 10)) { 2105 2104 if (last_update_time) 2106 2105 ext4_msg(sb, KERN_INFO, 2107 2106 "resized to %llu blocks",
+66 -33
fs/ext4/super.c
··· 2021 2021 #define EXT4_SPEC_s_commit_interval (1 << 16) 2022 2022 #define EXT4_SPEC_s_fc_debug_max_replay (1 << 17) 2023 2023 #define EXT4_SPEC_s_sb_block (1 << 18) 2024 + #define EXT4_SPEC_mb_optimize_scan (1 << 19) 2024 2025 2025 2026 struct ext4_fs_context { 2026 2027 char *s_qf_names[EXT4_MAXQUOTAS]; 2027 2028 char *test_dummy_enc_arg; 2028 2029 int s_jquota_fmt; /* Format of quota to use */ 2029 - int mb_optimize_scan; 2030 2030 #ifdef CONFIG_EXT4_DEBUG 2031 2031 int s_fc_debug_max_replay; 2032 2032 #endif ··· 2045 2045 unsigned int mask_s_mount_opt; 2046 2046 unsigned int vals_s_mount_opt2; 2047 2047 unsigned int mask_s_mount_opt2; 2048 - unsigned int vals_s_mount_flags; 2049 - unsigned int mask_s_mount_flags; 2048 + unsigned long vals_s_mount_flags; 2049 + unsigned long mask_s_mount_flags; 2050 2050 unsigned int opt_flags; /* MOPT flags */ 2051 2051 unsigned int spec; 2052 2052 u32 s_max_batch_time; ··· 2149 2149 { \ 2150 2150 ctx->mask_s_##name |= flag; \ 2151 2151 ctx->vals_s_##name |= flag; \ 2152 - } \ 2152 + } 2153 + 2154 + #define EXT4_CLEAR_CTX(name) \ 2153 2155 static inline void ctx_clear_##name(struct ext4_fs_context *ctx, \ 2154 2156 unsigned long flag) \ 2155 2157 { \ 2156 2158 ctx->mask_s_##name |= flag; \ 2157 2159 ctx->vals_s_##name &= ~flag; \ 2158 - } \ 2160 + } 2161 + 2162 + #define EXT4_TEST_CTX(name) \ 2159 2163 static inline unsigned long \ 2160 2164 ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag) \ 2161 2165 { \ 2162 2166 return (ctx->vals_s_##name & flag); \ 2163 - } \ 2167 + } 2164 2168 2165 - EXT4_SET_CTX(flags); 2169 + EXT4_SET_CTX(flags); /* set only */ 2166 2170 EXT4_SET_CTX(mount_opt); 2171 + EXT4_CLEAR_CTX(mount_opt); 2172 + EXT4_TEST_CTX(mount_opt); 2167 2173 EXT4_SET_CTX(mount_opt2); 2168 - EXT4_SET_CTX(mount_flags); 2174 + EXT4_CLEAR_CTX(mount_opt2); 2175 + EXT4_TEST_CTX(mount_opt2); 2176 + 2177 + static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit) 2178 + { 2179 + set_bit(bit, &ctx->mask_s_mount_flags); 2180 + set_bit(bit, &ctx->vals_s_mount_flags); 2181 + } 2169 2182 2170 2183 static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) 2171 2184 { ··· 2248 2235 param->key); 2249 2236 return 0; 2250 2237 case Opt_abort: 2251 - ctx_set_mount_flags(ctx, EXT4_MF_FS_ABORTED); 2238 + ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED); 2252 2239 return 0; 2253 2240 case Opt_i_version: 2254 2241 ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "5.20"); ··· 2464 2451 ctx_clear_mount_opt(ctx, m->mount_opt); 2465 2452 return 0; 2466 2453 case Opt_mb_optimize_scan: 2467 - if (result.int_32 != 0 && result.int_32 != 1) { 2454 + if (result.int_32 == 1) { 2455 + ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN); 2456 + ctx->spec |= EXT4_SPEC_mb_optimize_scan; 2457 + } else if (result.int_32 == 0) { 2458 + ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN); 2459 + ctx->spec |= EXT4_SPEC_mb_optimize_scan; 2460 + } else { 2468 2461 ext4_msg(NULL, KERN_WARNING, 2469 2462 "mb_optimize_scan should be set to 0 or 1."); 2470 2463 return -EINVAL; 2471 2464 } 2472 - ctx->mb_optimize_scan = result.int_32; 2473 2465 return 0; 2474 2466 } 2475 2467 ··· 3486 3468 */ 3487 3469 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) 3488 3470 { 3489 - unsigned long long upper_limit, res = EXT4_NDIR_BLOCKS; 3471 + loff_t upper_limit, res = EXT4_NDIR_BLOCKS; 3490 3472 int meta_blocks; 3473 + unsigned int ppb = 1 << (bits - 2); 3491 3474 3492 3475 /* 3493 3476 * This is calculated to be the largest file size for a dense, block ··· 3520 3501 3521 3502 } 3522 3503 3504 + /* Compute how many blocks we can address by block tree */ 3505 + res += ppb; 3506 + res += ppb * ppb; 3507 + res += ((loff_t)ppb) * ppb * ppb; 3508 + /* Compute how many metadata blocks are needed */ 3509 + meta_blocks = 1; 3510 + meta_blocks += 1 + ppb; 3511 + meta_blocks += 1 + ppb + ppb * ppb; 3512 + /* Does block tree limit file size? */ 3513 + if (res + meta_blocks <= upper_limit) 3514 + goto check_lfs; 3515 + 3516 + res = upper_limit; 3517 + /* How many metadata blocks are needed for addressing upper_limit? */ 3518 + upper_limit -= EXT4_NDIR_BLOCKS; 3523 3519 /* indirect blocks */ 3524 3520 meta_blocks = 1; 3521 + upper_limit -= ppb; 3525 3522 /* double indirect blocks */ 3526 - meta_blocks += 1 + (1LL << (bits-2)); 3527 - /* tripple indirect blocks */ 3528 - meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 3529 - 3530 - upper_limit -= meta_blocks; 3531 - upper_limit <<= bits; 3532 - 3533 - res += 1LL << (bits-2); 3534 - res += 1LL << (2*(bits-2)); 3535 - res += 1LL << (3*(bits-2)); 3523 + if (upper_limit < ppb * ppb) { 3524 + meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb); 3525 + res -= meta_blocks; 3526 + goto check_lfs; 3527 + } 3528 + meta_blocks += 1 + ppb; 3529 + upper_limit -= ppb * ppb; 3530 + /* tripple indirect blocks for the rest */ 3531 + meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb) + 3532 + DIV_ROUND_UP_ULL(upper_limit, ppb*ppb); 3533 + res -= meta_blocks; 3534 + check_lfs: 3536 3535 res <<= bits; 3537 - if (res > upper_limit) 3538 - res = upper_limit; 3539 - 3540 3536 if (res > MAX_LFS_FILESIZE) 3541 3537 res = MAX_LFS_FILESIZE; 3542 3538 3543 - return (loff_t)res; 3539 + return res; 3544 3540 } 3545 3541 3546 3542 static ext4_fsblk_t descriptor_loc(struct super_block *sb, ··· 4403 4369 4404 4370 /* Set defaults for the variables that will be set during parsing */ 4405 4371 ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 4406 - ctx->mb_optimize_scan = DEFAULT_MB_OPTIMIZE_SCAN; 4407 4372 4408 4373 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 4409 4374 sbi->s_sectors_written_start = ··· 5353 5320 * turned off by passing "mb_optimize_scan=0". This can also be 5354 5321 * turned on forcefully by passing "mb_optimize_scan=1". 5355 5322 */ 5356 - if (ctx->mb_optimize_scan == 1) 5357 - set_opt2(sb, MB_OPTIMIZE_SCAN); 5358 - else if (ctx->mb_optimize_scan == 0) 5359 - clear_opt2(sb, MB_OPTIMIZE_SCAN); 5360 - else if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD) 5361 - set_opt2(sb, MB_OPTIMIZE_SCAN); 5323 + if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) { 5324 + if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD) 5325 + set_opt2(sb, MB_OPTIMIZE_SCAN); 5326 + else 5327 + clear_opt2(sb, MB_OPTIMIZE_SCAN); 5328 + } 5362 5329 5363 5330 err = ext4_mb_init(sb); 5364 5331 if (err) {
+34 -36
fs/jbd2/transaction.c
··· 107 107 transaction->t_start_time = ktime_get(); 108 108 transaction->t_tid = journal->j_transaction_sequence++; 109 109 transaction->t_expires = jiffies + journal->j_commit_interval; 110 - spin_lock_init(&transaction->t_handle_lock); 111 110 atomic_set(&transaction->t_updates, 0); 112 111 atomic_set(&transaction->t_outstanding_credits, 113 112 jbd2_descriptor_blocks_per_trans(journal) + ··· 138 139 /* 139 140 * Update transaction's maximum wait time, if debugging is enabled. 140 141 * 141 - * In order for t_max_wait to be reliable, it must be protected by a 142 - * lock. But doing so will mean that start_this_handle() can not be 143 - * run in parallel on SMP systems, which limits our scalability. So 144 - * unless debugging is enabled, we no longer update t_max_wait, which 145 - * means that maximum wait time reported by the jbd2_run_stats 146 - * tracepoint will always be zero. 142 + * t_max_wait is carefully updated here with use of atomic compare exchange. 143 + * Note that there could be multiplre threads trying to do this simultaneously 144 + * hence using cmpxchg to avoid any use of locks in this case. 145 + * With this t_max_wait can be updated w/o enabling jbd2_journal_enable_debug. 147 146 */ 148 147 static inline void update_t_max_wait(transaction_t *transaction, 149 148 unsigned long ts) 150 149 { 151 - #ifdef CONFIG_JBD2_DEBUG 152 - if (jbd2_journal_enable_debug && 153 - time_after(transaction->t_start, ts)) { 154 - ts = jbd2_time_diff(ts, transaction->t_start); 155 - spin_lock(&transaction->t_handle_lock); 156 - if (ts > transaction->t_max_wait) 157 - transaction->t_max_wait = ts; 158 - spin_unlock(&transaction->t_handle_lock); 150 + unsigned long oldts, newts; 151 + 152 + if (time_after(transaction->t_start, ts)) { 153 + newts = jbd2_time_diff(ts, transaction->t_start); 154 + oldts = READ_ONCE(transaction->t_max_wait); 155 + while (oldts < newts) 156 + oldts = cmpxchg(&transaction->t_max_wait, oldts, newts); 159 157 } 160 - #endif 161 158 } 162 159 163 160 /* ··· 685 690 DIV_ROUND_UP( 686 691 handle->h_revoke_credits_requested, 687 692 journal->j_revoke_records_per_block); 688 - spin_lock(&transaction->t_handle_lock); 689 693 wanted = atomic_add_return(nblocks, 690 694 &transaction->t_outstanding_credits); 691 695 ··· 692 698 jbd_debug(3, "denied handle %p %d blocks: " 693 699 "transaction too large\n", handle, nblocks); 694 700 atomic_sub(nblocks, &transaction->t_outstanding_credits); 695 - goto unlock; 701 + goto error_out; 696 702 } 697 703 698 704 trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev, ··· 708 714 result = 0; 709 715 710 716 jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); 711 - unlock: 712 - spin_unlock(&transaction->t_handle_lock); 713 717 error_out: 714 718 read_unlock(&journal->j_state_lock); 715 719 return result; ··· 834 842 */ 835 843 void jbd2_journal_wait_updates(journal_t *journal) 836 844 { 837 - transaction_t *commit_transaction = journal->j_running_transaction; 845 + DEFINE_WAIT(wait); 838 846 839 - if (!commit_transaction) 840 - return; 847 + while (1) { 848 + /* 849 + * Note that the running transaction can get freed under us if 850 + * this transaction is getting committed in 851 + * jbd2_journal_commit_transaction() -> 852 + * jbd2_journal_free_transaction(). This can only happen when we 853 + * release j_state_lock -> schedule() -> acquire j_state_lock. 854 + * Hence we should everytime retrieve new j_running_transaction 855 + * value (after j_state_lock release acquire cycle), else it may 856 + * lead to use-after-free of old freed transaction. 857 + */ 858 + transaction_t *transaction = journal->j_running_transaction; 841 859 842 - spin_lock(&commit_transaction->t_handle_lock); 843 - while (atomic_read(&commit_transaction->t_updates)) { 844 - DEFINE_WAIT(wait); 860 + if (!transaction) 861 + break; 845 862 846 863 prepare_to_wait(&journal->j_wait_updates, &wait, 847 - TASK_UNINTERRUPTIBLE); 848 - if (atomic_read(&commit_transaction->t_updates)) { 849 - spin_unlock(&commit_transaction->t_handle_lock); 850 - write_unlock(&journal->j_state_lock); 851 - schedule(); 852 - write_lock(&journal->j_state_lock); 853 - spin_lock(&commit_transaction->t_handle_lock); 864 + TASK_UNINTERRUPTIBLE); 865 + if (!atomic_read(&transaction->t_updates)) { 866 + finish_wait(&journal->j_wait_updates, &wait); 867 + break; 854 868 } 869 + write_unlock(&journal->j_state_lock); 870 + schedule(); 855 871 finish_wait(&journal->j_wait_updates, &wait); 872 + write_lock(&journal->j_state_lock); 856 873 } 857 - spin_unlock(&commit_transaction->t_handle_lock); 858 874 } 859 875 860 876 /** ··· 877 877 */ 878 878 void jbd2_journal_lock_updates(journal_t *journal) 879 879 { 880 - DEFINE_WAIT(wait); 881 - 882 880 jbd2_might_wait_for_commit(journal); 883 881 884 882 write_lock(&journal->j_state_lock);
-3
include/linux/jbd2.h
··· 554 554 * ->j_list_lock 555 555 * 556 556 * j_state_lock 557 - * ->t_handle_lock 558 - * 559 - * j_state_lock 560 557 * ->j_list_lock (journal_unmap_buffer) 561 558 * 562 559 */
+179 -99
include/trace/events/ext4.h
··· 95 95 { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \ 96 96 { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) 97 97 98 + TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR); 99 + TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME); 100 + TRACE_DEFINE_ENUM(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE); 101 + TRACE_DEFINE_ENUM(EXT4_FC_REASON_NOMEM); 102 + TRACE_DEFINE_ENUM(EXT4_FC_REASON_SWAP_BOOT); 103 + TRACE_DEFINE_ENUM(EXT4_FC_REASON_RESIZE); 104 + TRACE_DEFINE_ENUM(EXT4_FC_REASON_RENAME_DIR); 105 + TRACE_DEFINE_ENUM(EXT4_FC_REASON_FALLOC_RANGE); 106 + TRACE_DEFINE_ENUM(EXT4_FC_REASON_INODE_JOURNAL_DATA); 107 + TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX); 108 + 98 109 #define show_fc_reason(reason) \ 99 110 __print_symbolic(reason, \ 100 111 { EXT4_FC_REASON_XATTR, "XATTR"}, \ ··· 2654 2643 __entry->off = off; 2655 2644 ), 2656 2645 2657 - TP_printk("FC scan pass on dev %d,%d: error %d, off %d", 2646 + TP_printk("dev %d,%d error %d, off %d", 2658 2647 MAJOR(__entry->dev), MINOR(__entry->dev), 2659 2648 __entry->error, __entry->off) 2660 2649 ); ··· 2680 2669 __entry->priv2 = priv2; 2681 2670 ), 2682 2671 2683 - TP_printk("FC Replay %d,%d: tag %d, ino %d, data1 %d, data2 %d", 2672 + TP_printk("dev %d,%d: tag %d, ino %d, data1 %d, data2 %d", 2684 2673 MAJOR(__entry->dev), MINOR(__entry->dev), 2685 2674 __entry->tag, __entry->ino, __entry->priv1, __entry->priv2) 2686 2675 ); 2687 2676 2688 2677 TRACE_EVENT(ext4_fc_commit_start, 2689 - TP_PROTO(struct super_block *sb), 2678 + TP_PROTO(struct super_block *sb, tid_t commit_tid), 2690 2679 2691 - TP_ARGS(sb), 2680 + TP_ARGS(sb, commit_tid), 2692 2681 2693 2682 TP_STRUCT__entry( 2694 2683 __field(dev_t, dev) 2684 + __field(tid_t, tid) 2695 2685 ), 2696 2686 2697 2687 TP_fast_assign( 2698 2688 __entry->dev = sb->s_dev; 2689 + __entry->tid = commit_tid; 2699 2690 ), 2700 2691 2701 - TP_printk("fast_commit started on dev %d,%d", 2702 - MAJOR(__entry->dev), MINOR(__entry->dev)) 2692 + TP_printk("dev %d,%d tid %u", MAJOR(__entry->dev), MINOR(__entry->dev), 2693 + __entry->tid) 2703 2694 ); 2704 2695 2705 2696 TRACE_EVENT(ext4_fc_commit_stop, 2706 - TP_PROTO(struct super_block *sb, int nblks, int reason), 2697 + TP_PROTO(struct super_block *sb, int nblks, int reason, 2698 + tid_t commit_tid), 2707 2699 2708 - TP_ARGS(sb, nblks, reason), 2700 + TP_ARGS(sb, nblks, reason, commit_tid), 2709 2701 2710 2702 TP_STRUCT__entry( 2711 2703 __field(dev_t, dev) ··· 2717 2703 __field(int, num_fc) 2718 2704 __field(int, num_fc_ineligible) 2719 2705 __field(int, nblks_agg) 2706 + __field(tid_t, tid) 2720 2707 ), 2721 2708 2722 2709 TP_fast_assign( ··· 2728 2713 __entry->num_fc_ineligible = 2729 2714 EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits; 2730 2715 __entry->nblks_agg = EXT4_SB(sb)->s_fc_stats.fc_numblks; 2716 + __entry->tid = commit_tid; 2731 2717 ), 2732 2718 2733 - TP_printk("fc on [%d,%d] nblks %d, reason %d, fc = %d, ineligible = %d, agg_nblks %d", 2719 + TP_printk("dev %d,%d nblks %d, reason %d, fc = %d, ineligible = %d, agg_nblks %d, tid %u", 2734 2720 MAJOR(__entry->dev), MINOR(__entry->dev), 2735 2721 __entry->nblks, __entry->reason, __entry->num_fc, 2736 - __entry->num_fc_ineligible, __entry->nblks_agg) 2722 + __entry->num_fc_ineligible, __entry->nblks_agg, __entry->tid) 2737 2723 ); 2738 2724 2739 2725 #define FC_REASON_NAME_STAT(reason) \ 2740 2726 show_fc_reason(reason), \ 2741 - __entry->sbi->s_fc_stats.fc_ineligible_reason_count[reason] 2727 + __entry->fc_ineligible_rc[reason] 2742 2728 2743 2729 TRACE_EVENT(ext4_fc_stats, 2744 - TP_PROTO(struct super_block *sb), 2730 + TP_PROTO(struct super_block *sb), 2745 2731 2746 - TP_ARGS(sb), 2732 + TP_ARGS(sb), 2747 2733 2748 - TP_STRUCT__entry( 2749 - __field(dev_t, dev) 2750 - __field(struct ext4_sb_info *, sbi) 2751 - __field(int, count) 2752 - ), 2734 + TP_STRUCT__entry( 2735 + __field(dev_t, dev) 2736 + __array(unsigned int, fc_ineligible_rc, EXT4_FC_REASON_MAX) 2737 + __field(unsigned long, fc_commits) 2738 + __field(unsigned long, fc_ineligible_commits) 2739 + __field(unsigned long, fc_numblks) 2740 + ), 2753 2741 2754 - TP_fast_assign( 2755 - __entry->dev = sb->s_dev; 2756 - __entry->sbi = EXT4_SB(sb); 2757 - ), 2742 + TP_fast_assign( 2743 + int i; 2758 2744 2759 - TP_printk("dev %d:%d fc ineligible reasons:\n" 2760 - "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; " 2761 - "num_commits:%ld, ineligible: %ld, numblks: %ld", 2762 - MAJOR(__entry->dev), MINOR(__entry->dev), 2763 - FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), 2764 - FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME), 2765 - FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE), 2766 - FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM), 2767 - FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT), 2768 - FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), 2769 - FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), 2770 - FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), 2771 - FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA), 2772 - __entry->sbi->s_fc_stats.fc_num_commits, 2773 - __entry->sbi->s_fc_stats.fc_ineligible_commits, 2774 - __entry->sbi->s_fc_stats.fc_numblks) 2745 + __entry->dev = sb->s_dev; 2746 + for (i = 0; i < EXT4_FC_REASON_MAX; i++) { 2747 + __entry->fc_ineligible_rc[i] = 2748 + EXT4_SB(sb)->s_fc_stats.fc_ineligible_reason_count[i]; 2749 + } 2750 + __entry->fc_commits = EXT4_SB(sb)->s_fc_stats.fc_num_commits; 2751 + __entry->fc_ineligible_commits = 2752 + EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits; 2753 + __entry->fc_numblks = EXT4_SB(sb)->s_fc_stats.fc_numblks; 2754 + ), 2775 2755 2756 + TP_printk("dev %d,%d fc ineligible reasons:\n" 2757 + "%s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u " 2758 + "num_commits:%lu, ineligible: %lu, numblks: %lu", 2759 + MAJOR(__entry->dev), MINOR(__entry->dev), 2760 + FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), 2761 + FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME), 2762 + FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE), 2763 + FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM), 2764 + FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT), 2765 + FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), 2766 + FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), 2767 + FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), 2768 + FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA), 2769 + __entry->fc_commits, __entry->fc_ineligible_commits, 2770 + __entry->fc_numblks) 2776 2771 ); 2777 2772 2778 - #define DEFINE_TRACE_DENTRY_EVENT(__type) \ 2779 - TRACE_EVENT(ext4_fc_track_##__type, \ 2780 - TP_PROTO(struct inode *inode, struct dentry *dentry, int ret), \ 2781 - \ 2782 - TP_ARGS(inode, dentry, ret), \ 2783 - \ 2784 - TP_STRUCT__entry( \ 2785 - __field(dev_t, dev) \ 2786 - __field(int, ino) \ 2787 - __field(int, error) \ 2788 - ), \ 2789 - \ 2790 - TP_fast_assign( \ 2791 - __entry->dev = inode->i_sb->s_dev; \ 2792 - __entry->ino = inode->i_ino; \ 2793 - __entry->error = ret; \ 2794 - ), \ 2795 - \ 2796 - TP_printk("dev %d:%d, inode %d, error %d, fc_%s", \ 2797 - MAJOR(__entry->dev), MINOR(__entry->dev), \ 2798 - __entry->ino, __entry->error, \ 2799 - #__type) \ 2800 - ) 2773 + DECLARE_EVENT_CLASS(ext4_fc_track_dentry, 2801 2774 2802 - DEFINE_TRACE_DENTRY_EVENT(create); 2803 - DEFINE_TRACE_DENTRY_EVENT(link); 2804 - DEFINE_TRACE_DENTRY_EVENT(unlink); 2775 + TP_PROTO(handle_t *handle, struct inode *inode, 2776 + struct dentry *dentry, int ret), 2777 + 2778 + TP_ARGS(handle, inode, dentry, ret), 2779 + 2780 + TP_STRUCT__entry( 2781 + __field(dev_t, dev) 2782 + __field(tid_t, t_tid) 2783 + __field(ino_t, i_ino) 2784 + __field(tid_t, i_sync_tid) 2785 + __field(int, error) 2786 + ), 2787 + 2788 + TP_fast_assign( 2789 + struct ext4_inode_info *ei = EXT4_I(inode); 2790 + 2791 + __entry->dev = inode->i_sb->s_dev; 2792 + __entry->t_tid = handle->h_transaction->t_tid; 2793 + __entry->i_ino = inode->i_ino; 2794 + __entry->i_sync_tid = ei->i_sync_tid; 2795 + __entry->error = ret; 2796 + ), 2797 + 2798 + TP_printk("dev %d,%d, t_tid %u, ino %lu, i_sync_tid %u, error %d", 2799 + MAJOR(__entry->dev), MINOR(__entry->dev), 2800 + __entry->t_tid, __entry->i_ino, __entry->i_sync_tid, 2801 + __entry->error 2802 + ) 2803 + ); 2804 + 2805 + #define DEFINE_EVENT_CLASS_DENTRY(__type) \ 2806 + DEFINE_EVENT(ext4_fc_track_dentry, ext4_fc_track_##__type, \ 2807 + TP_PROTO(handle_t *handle, struct inode *inode, \ 2808 + struct dentry *dentry, int ret), \ 2809 + TP_ARGS(handle, inode, dentry, ret) \ 2810 + ) 2811 + 2812 + DEFINE_EVENT_CLASS_DENTRY(create); 2813 + DEFINE_EVENT_CLASS_DENTRY(link); 2814 + DEFINE_EVENT_CLASS_DENTRY(unlink); 2805 2815 2806 2816 TRACE_EVENT(ext4_fc_track_inode, 2807 - TP_PROTO(struct inode *inode, int ret), 2817 + TP_PROTO(handle_t *handle, struct inode *inode, int ret), 2808 2818 2809 - TP_ARGS(inode, ret), 2819 + TP_ARGS(handle, inode, ret), 2810 2820 2811 - TP_STRUCT__entry( 2812 - __field(dev_t, dev) 2813 - __field(int, ino) 2814 - __field(int, error) 2815 - ), 2821 + TP_STRUCT__entry( 2822 + __field(dev_t, dev) 2823 + __field(tid_t, t_tid) 2824 + __field(ino_t, i_ino) 2825 + __field(tid_t, i_sync_tid) 2826 + __field(int, error) 2827 + ), 2816 2828 2817 - TP_fast_assign( 2818 - __entry->dev = inode->i_sb->s_dev; 2819 - __entry->ino = inode->i_ino; 2820 - __entry->error = ret; 2821 - ), 2829 + TP_fast_assign( 2830 + struct ext4_inode_info *ei = EXT4_I(inode); 2822 2831 2823 - TP_printk("dev %d:%d, inode %d, error %d", 2824 - MAJOR(__entry->dev), MINOR(__entry->dev), 2825 - __entry->ino, __entry->error) 2832 + __entry->dev = inode->i_sb->s_dev; 2833 + __entry->t_tid = handle->h_transaction->t_tid; 2834 + __entry->i_ino = inode->i_ino; 2835 + __entry->i_sync_tid = ei->i_sync_tid; 2836 + __entry->error = ret; 2837 + ), 2838 + 2839 + TP_printk("dev %d:%d, t_tid %u, inode %lu, i_sync_tid %u, error %d", 2840 + MAJOR(__entry->dev), MINOR(__entry->dev), 2841 + __entry->t_tid, __entry->i_ino, __entry->i_sync_tid, 2842 + __entry->error) 2826 2843 ); 2827 2844 2828 2845 TRACE_EVENT(ext4_fc_track_range, 2829 - TP_PROTO(struct inode *inode, long start, long end, int ret), 2846 + TP_PROTO(handle_t *handle, struct inode *inode, 2847 + long start, long end, int ret), 2830 2848 2831 - TP_ARGS(inode, start, end, ret), 2849 + TP_ARGS(handle, inode, start, end, ret), 2832 2850 2833 - TP_STRUCT__entry( 2834 - __field(dev_t, dev) 2835 - __field(int, ino) 2836 - __field(long, start) 2837 - __field(long, end) 2838 - __field(int, error) 2839 - ), 2851 + TP_STRUCT__entry( 2852 + __field(dev_t, dev) 2853 + __field(tid_t, t_tid) 2854 + __field(ino_t, i_ino) 2855 + __field(tid_t, i_sync_tid) 2856 + __field(long, start) 2857 + __field(long, end) 2858 + __field(int, error) 2859 + ), 2840 2860 2841 - TP_fast_assign( 2842 - __entry->dev = inode->i_sb->s_dev; 2843 - __entry->ino = inode->i_ino; 2844 - __entry->start = start; 2845 - __entry->end = end; 2846 - __entry->error = ret; 2847 - ), 2861 + TP_fast_assign( 2862 + struct ext4_inode_info *ei = EXT4_I(inode); 2848 2863 2849 - TP_printk("dev %d:%d, inode %d, error %d, start %ld, end %ld", 2850 - MAJOR(__entry->dev), MINOR(__entry->dev), 2851 - __entry->ino, __entry->error, __entry->start, 2852 - __entry->end) 2864 + __entry->dev = inode->i_sb->s_dev; 2865 + __entry->t_tid = handle->h_transaction->t_tid; 2866 + __entry->i_ino = inode->i_ino; 2867 + __entry->i_sync_tid = ei->i_sync_tid; 2868 + __entry->start = start; 2869 + __entry->end = end; 2870 + __entry->error = ret; 2871 + ), 2872 + 2873 + TP_printk("dev %d:%d, t_tid %u, inode %lu, i_sync_tid %u, error %d, start %ld, end %ld", 2874 + MAJOR(__entry->dev), MINOR(__entry->dev), 2875 + __entry->t_tid, __entry->i_ino, __entry->i_sync_tid, 2876 + __entry->error, __entry->start, __entry->end) 2877 + ); 2878 + 2879 + TRACE_EVENT(ext4_fc_cleanup, 2880 + TP_PROTO(journal_t *journal, int full, tid_t tid), 2881 + 2882 + TP_ARGS(journal, full, tid), 2883 + 2884 + TP_STRUCT__entry( 2885 + __field(dev_t, dev) 2886 + __field(int, j_fc_off) 2887 + __field(int, full) 2888 + __field(tid_t, tid) 2889 + ), 2890 + 2891 + TP_fast_assign( 2892 + struct super_block *sb = journal->j_private; 2893 + 2894 + __entry->dev = sb->s_dev; 2895 + __entry->j_fc_off = journal->j_fc_off; 2896 + __entry->full = full; 2897 + __entry->tid = tid; 2898 + ), 2899 + 2900 + TP_printk("dev %d,%d, j_fc_off %d, full %d, tid %u", 2901 + MAJOR(__entry->dev), MINOR(__entry->dev), 2902 + __entry->j_fc_off, __entry->full, __entry->tid) 2853 2903 ); 2854 2904 2855 2905 TRACE_EVENT(ext4_update_sb,