Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ext4: Replace hackish ext4_mb_poll_new_transaction with commit callback

The multiblock allocator needs to be able to release blocks (and issue
a blkdev discard request) when the transaction which freed those
blocks is committed. Previously this was done via a polling mechanism
when blocks are allocated or freed. A much better way of doing things
is to create a jbd2 callback function and attaching the list of blocks
to be freed directly to the transaction structure.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

+29 -75
-3
fs/ext4/ext4_sb.h
··· 99 99 struct inode *s_buddy_cache; 100 100 long s_blocks_reserved; 101 101 spinlock_t s_reserve_lock; 102 - struct list_head s_active_transaction; 103 - struct list_head s_closed_transaction; 104 - struct list_head s_committed_transaction; 105 102 spinlock_t s_md_lock; 106 103 tid_t s_last_transaction; 107 104 unsigned short *s_mb_offsets, *s_mb_maxs;
+15 -70
fs/ext4/mballoc.c
··· 2523 2523 } 2524 2524 2525 2525 spin_lock_init(&sbi->s_md_lock); 2526 - INIT_LIST_HEAD(&sbi->s_active_transaction); 2527 - INIT_LIST_HEAD(&sbi->s_closed_transaction); 2528 - INIT_LIST_HEAD(&sbi->s_committed_transaction); 2529 2526 spin_lock_init(&sbi->s_bal_lock); 2530 2527 2531 2528 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; ··· 2550 2553 2551 2554 ext4_mb_init_per_dev_proc(sb); 2552 2555 ext4_mb_history_init(sb); 2556 + 2557 + sbi->s_journal->j_commit_callback = release_blocks_on_commit; 2553 2558 2554 2559 printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); 2555 2560 return 0; ··· 2581 2582 int num_meta_group_infos; 2582 2583 struct ext4_group_info *grinfo; 2583 2584 struct ext4_sb_info *sbi = EXT4_SB(sb); 2584 - 2585 - /* release freed, non-committed blocks */ 2586 - spin_lock(&sbi->s_md_lock); 2587 - list_splice_init(&sbi->s_closed_transaction, 2588 - &sbi->s_committed_transaction); 2589 - list_splice_init(&sbi->s_active_transaction, 2590 - &sbi->s_committed_transaction); 2591 - spin_unlock(&sbi->s_md_lock); 2592 - ext4_mb_free_committed_blocks(sb); 2593 2585 2594 2586 if (sbi->s_group_info) { 2595 2587 for (i = 0; i < sbi->s_groups_count; i++) { ··· 2635 2645 return 0; 2636 2646 } 2637 2647 2638 - static noinline_for_stack void 2639 - ext4_mb_free_committed_blocks(struct super_block *sb) 2648 + /* 2649 + * This function is called by the jbd2 layer once the commit has finished, 2650 + * so we know we can free the blocks that were released with that commit. 2651 + */ 2652 + static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) 2640 2653 { 2654 + struct super_block *sb = journal->j_private; 2641 2655 struct ext4_buddy e4b; 2642 2656 struct ext4_group_info *db; 2643 - struct ext4_sb_info *sbi = EXT4_SB(sb); 2644 2657 int err, count = 0, count2 = 0; 2645 2658 struct ext4_free_data *entry; 2646 2659 ext4_fsblk_t discard_block; 2660 + struct list_head *l, *ltmp; 2647 2661 2648 - if (list_empty(&sbi->s_committed_transaction)) 2649 - return; 2650 - 2651 - /* there is committed blocks to be freed yet */ 2652 - do { 2653 - /* get next array of blocks */ 2654 - entry = NULL; 2655 - spin_lock(&sbi->s_md_lock); 2656 - if (!list_empty(&sbi->s_committed_transaction)) { 2657 - entry = list_entry(sbi->s_committed_transaction.next, 2658 - struct ext4_free_data, list); 2659 - list_del(&entry->list); 2660 - } 2661 - spin_unlock(&sbi->s_md_lock); 2662 - 2663 - if (entry == NULL) 2664 - break; 2662 + list_for_each_safe(l, ltmp, &txn->t_private_list) { 2663 + entry = list_entry(l, struct ext4_free_data, list); 2665 2664 2666 2665 mb_debug("gonna free %u blocks in group %lu (0x%p):", 2667 - entry->count, entry->group, entry); 2666 + entry->count, entry->group, entry); 2668 2667 2669 2668 err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2670 2669 /* we expect to find existing buddy because it's pinned */ ··· 2685 2706 2686 2707 kmem_cache_free(ext4_free_ext_cachep, entry); 2687 2708 ext4_mb_release_desc(&e4b); 2688 - } while (1); 2709 + } 2689 2710 2690 2711 mb_debug("freed %u blocks in %u structures\n", count, count2); 2691 2712 } ··· 4327 4348 goto out1; 4328 4349 } 4329 4350 4330 - ext4_mb_poll_new_transaction(sb, handle); 4331 - 4332 4351 *errp = ext4_mb_initialize_context(ac, ar); 4333 4352 if (*errp) { 4334 4353 ar->len = 0; ··· 4384 4407 DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); 4385 4408 4386 4409 return block; 4387 - } 4388 - static void ext4_mb_poll_new_transaction(struct super_block *sb, 4389 - handle_t *handle) 4390 - { 4391 - struct ext4_sb_info *sbi = EXT4_SB(sb); 4392 - 4393 - if (sbi->s_last_transaction == handle->h_transaction->t_tid) 4394 - return; 4395 - 4396 - /* new transaction! time to close last one and free blocks for 4397 - * committed transaction. we know that only transaction can be 4398 - * active, so previos transaction can be being logged and we 4399 - * know that transaction before previous is known to be already 4400 - * logged. this means that now we may free blocks freed in all 4401 - * transactions before previous one. hope I'm clear enough ... */ 4402 - 4403 - spin_lock(&sbi->s_md_lock); 4404 - if (sbi->s_last_transaction != handle->h_transaction->t_tid) { 4405 - mb_debug("new transaction %lu, old %lu\n", 4406 - (unsigned long) handle->h_transaction->t_tid, 4407 - (unsigned long) sbi->s_last_transaction); 4408 - list_splice_init(&sbi->s_closed_transaction, 4409 - &sbi->s_committed_transaction); 4410 - list_splice_init(&sbi->s_active_transaction, 4411 - &sbi->s_closed_transaction); 4412 - sbi->s_last_transaction = handle->h_transaction->t_tid; 4413 - } 4414 - spin_unlock(&sbi->s_md_lock); 4415 - 4416 - ext4_mb_free_committed_blocks(sb); 4417 4410 } 4418 4411 4419 4412 /* ··· 4478 4531 kmem_cache_free(ext4_free_ext_cachep, entry); 4479 4532 } 4480 4533 } 4481 - /* Add the extent to active_transaction list */ 4534 + /* Add the extent to transaction's private list */ 4482 4535 spin_lock(&sbi->s_md_lock); 4483 - list_add(&new_entry->list, &sbi->s_active_transaction); 4536 + list_add(&new_entry->list, &handle->h_transaction->t_private_list); 4484 4537 spin_unlock(&sbi->s_md_lock); 4485 4538 ext4_unlock_group(sb, group); 4486 4539 return 0; ··· 4508 4561 int ret; 4509 4562 4510 4563 *freed = 0; 4511 - 4512 - ext4_mb_poll_new_transaction(sb, handle); 4513 4564 4514 4565 sbi = EXT4_SB(sb); 4515 4566 es = EXT4_SB(sb)->s_es;
+1 -2
fs/ext4/mballoc.h
··· 269 269 270 270 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 271 271 ext4_group_t group); 272 - static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *); 273 - static void ext4_mb_free_committed_blocks(struct super_block *); 274 272 static void ext4_mb_return_to_preallocation(struct inode *inode, 275 273 struct ext4_buddy *e4b, sector_t block, 276 274 int count); ··· 276 278 struct super_block *, struct ext4_prealloc_space *pa); 277 279 static int ext4_mb_init_per_dev_proc(struct super_block *sb); 278 280 static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); 281 + static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); 279 282 280 283 281 284 static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
+3
fs/jbd2/commit.c
··· 995 995 } 996 996 spin_unlock(&journal->j_list_lock); 997 997 998 + if (journal->j_commit_callback) 999 + journal->j_commit_callback(journal, commit_transaction); 1000 + 998 1001 trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", 999 1002 journal->j_devname, commit_transaction->t_tid, 1000 1003 journal->j_tail_sequence);
+1
fs/jbd2/transaction.c
··· 52 52 transaction->t_expires = jiffies + journal->j_commit_interval; 53 53 spin_lock_init(&transaction->t_handle_lock); 54 54 INIT_LIST_HEAD(&transaction->t_inode_list); 55 + INIT_LIST_HEAD(&transaction->t_private_list); 55 56 56 57 /* Set up the commit timer for the new transaction. */ 57 58 journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
+9
include/linux/jbd2.h
··· 641 641 */ 642 642 int t_handle_count; 643 643 644 + /* 645 + * For use by the filesystem to store fs-specific data 646 + * structures associated with the transaction 647 + */ 648 + struct list_head t_private_list; 644 649 }; 645 650 646 651 struct transaction_run_stats_s { ··· 939 934 int j_wbufsize; 940 935 941 936 pid_t j_last_sync_writer; 937 + 938 + /* This function is called when a transaction is closed */ 939 + void (*j_commit_callback)(journal_t *, 940 + transaction_t *); 942 941 943 942 /* 944 943 * Journal statistics