Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ext4: fix unwritten counter leakage

ext4_set_io_unwritten_flag() will increment i_unwritten counter, so
once we mark end_io with EXT4_END_IO_UNWRITTEN we have to revert it back
on error path.

- add missed error checks to prevent counter leakage
- ext4_end_io_nolock() will clear EXT4_END_IO_UNWRITTEN flag to signal
that conversion finished.
- add BUG_ON to ext4_free_end_io() to prevent similar leakage in future.

Visible effect of this bug is that unaligned aio_stress may deadlock

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

authored by

Dmitry Monakhov and committed by
Theodore Ts'o
82e54229 e27f41e1

+19 -8
+14 -7
fs/ext4/extents.c
··· 3633 3633 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3634 3634 ret = ext4_split_unwritten_extents(handle, inode, map, 3635 3635 path, flags); 3636 + if (ret <= 0) 3637 + goto out; 3636 3638 /* 3637 3639 * Flag the inode(non aio case) or end_io struct (aio case) 3638 3640 * that this IO needs to conversion to written when IO is ··· 3880 3878 struct ext4_allocation_request ar; 3881 3879 ext4_io_end_t *io = ext4_inode_aio(inode); 3882 3880 ext4_lblk_t cluster_offset; 3881 + int set_unwritten = 0; 3883 3882 3884 3883 ext_debug("blocks %u/%u requested for inode %lu\n", 3885 3884 map->m_lblk, map->m_len, inode->i_ino); ··· 4103 4100 * For non asycn direct IO case, flag the inode state 4104 4101 * that we need to perform conversion when IO is done. 4105 4102 */ 4106 - if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 4107 - if (io) 4108 - ext4_set_io_unwritten_flag(inode, io); 4109 - else 4110 - ext4_set_inode_state(inode, 4111 - EXT4_STATE_DIO_UNWRITTEN); 4112 - } 4103 + if ((flags & EXT4_GET_BLOCKS_PRE_IO)) 4104 + set_unwritten = 1; 4113 4105 if (ext4_should_dioread_nolock(inode)) 4114 4106 map->m_flags |= EXT4_MAP_UNINIT; 4115 4107 } ··· 4116 4118 if (!err) 4117 4119 err = ext4_ext_insert_extent(handle, inode, path, 4118 4120 &newex, flags); 4121 + 4122 + if (!err && set_unwritten) { 4123 + if (io) 4124 + ext4_set_io_unwritten_flag(inode, io); 4125 + else 4126 + ext4_set_inode_state(inode, 4127 + EXT4_STATE_DIO_UNWRITTEN); 4128 + } 4129 + 4119 4130 if (err && free_on_err) { 4120 4131 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? 4121 4132 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
+5 -1
fs/ext4/page-io.c
··· 71 71 int i; 72 72 73 73 BUG_ON(!io); 74 + BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); 75 + 74 76 if (io->page) 75 77 put_page(io->page); 76 78 for (i = 0; i < io->num_io_pages; i++) ··· 96 94 ssize_t size = io->size; 97 95 int ret = 0; 98 96 97 + BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN)); 98 + 99 99 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," 100 100 "list->prev 0x%p\n", 101 101 io, inode->i_ino, io->list.next, io->list.prev); ··· 110 106 "(inode %lu, offset %llu, size %zd, error %d)", 111 107 inode->i_ino, offset, size, ret); 112 108 } 113 - 109 + io->flag &= ~EXT4_IO_END_UNWRITTEN; 114 110 if (io->iocb) 115 111 aio_complete(io->iocb, io->result, 0); 116 112