Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ext4: do not ask jbd2 to write data for delalloc buffers

Currently we ask jbd2 to write all dirty allocated buffers before
committing a transaction when doing writeback of delay allocated blocks.
However this is unnecessary since we move all pages to writeback state
before dropping a transaction handle and then submit all the necessary
IO. We still need the transaction commit to wait for all the outstanding
writeback before flushing disk caches during transaction commit to avoid
data exposure issues though. Use the new jbd2 capability and ask it to
only wait for outstanding writeback during transaction commit when
writing back data in ext4_writepages().

Tested-by: "HUANG Weller (CM/ESW12-CN)" <Weller.Huang@cn.bosch.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

authored by

Jan Kara and committed by
Theodore Ts'o
ee0876bc 41617e1a

+22 -5
+3
fs/ext4/ext4.h
··· 581 581 #define EXT4_GET_BLOCKS_ZERO 0x0200 582 582 #define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\ 583 583 EXT4_GET_BLOCKS_ZERO) 584 + /* Caller will submit data before dropping transaction handle. This 585 + * allows jbd2 to avoid submitting data before commit. */ 586 + #define EXT4_GET_BLOCKS_IO_SUBMIT 0x0400 584 587 585 588 /* 586 589 * The bit position of these flags must not overlap with any of the
+11 -1
fs/ext4/ext4_jbd2.h
··· 359 359 return 0; 360 360 } 361 361 362 - static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) 362 + static inline int ext4_jbd2_inode_add_write(handle_t *handle, 363 + struct inode *inode) 363 364 { 364 365 if (ext4_handle_valid(handle)) 365 366 return jbd2_journal_inode_add_write(handle, 366 367 EXT4_I(inode)->jinode); 368 + return 0; 369 + } 370 + 371 + static inline int ext4_jbd2_inode_add_wait(handle_t *handle, 372 + struct inode *inode) 373 + { 374 + if (ext4_handle_valid(handle)) 375 + return jbd2_journal_inode_add_wait(handle, 376 + EXT4_I(inode)->jinode); 367 377 return 0; 368 378 } 369 379
+7 -3
fs/ext4/inode.c
··· 695 695 !(flags & EXT4_GET_BLOCKS_ZERO) && 696 696 !IS_NOQUOTA(inode) && 697 697 ext4_should_order_data(inode)) { 698 - ret = ext4_jbd2_file_inode(handle, inode); 698 + if (flags & EXT4_GET_BLOCKS_IO_SUBMIT) 699 + ret = ext4_jbd2_inode_add_wait(handle, inode); 700 + else 701 + ret = ext4_jbd2_inode_add_write(handle, inode); 699 702 if (ret) 700 703 return ret; 701 704 } ··· 2322 2319 * the data was copied into the page cache. 2323 2320 */ 2324 2321 get_blocks_flags = EXT4_GET_BLOCKS_CREATE | 2325 - EXT4_GET_BLOCKS_METADATA_NOFAIL; 2322 + EXT4_GET_BLOCKS_METADATA_NOFAIL | 2323 + EXT4_GET_BLOCKS_IO_SUBMIT; 2326 2324 dioread_nolock = ext4_should_dioread_nolock(inode); 2327 2325 if (dioread_nolock) 2328 2326 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; ··· 3638 3634 err = 0; 3639 3635 mark_buffer_dirty(bh); 3640 3636 if (ext4_should_order_data(inode)) 3641 - err = ext4_jbd2_file_inode(handle, inode); 3637 + err = ext4_jbd2_inode_add_write(handle, inode); 3642 3638 } 3643 3639 3644 3640 unlock:
+1 -1
fs/ext4/move_extent.c
··· 400 400 401 401 /* Even in case of data=writeback it is reasonable to pin 402 402 * inode to transaction, to prevent unexpected data loss */ 403 - *err = ext4_jbd2_file_inode(handle, orig_inode); 403 + *err = ext4_jbd2_inode_add_write(handle, orig_inode); 404 404 405 405 unlock_pages: 406 406 unlock_page(pagep[0]);