Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ceph: fix potential race condition on operations with CEPH_I_ODIRECT flag

The Coverity Scan service has detected potential
race conditions in ceph_block_o_direct(), ceph_start_io_read(),
ceph_block_buffered(), and ceph_start_io_direct() [1 - 4].

The CID 1590942, 1590665, 1589664, 1590377 contain explanation:
"The value of the shared data will be determined by
the interleaving of thread execution. Thread shared data is accessed
without holding an appropriate lock, possibly causing
a race condition (CWE-366)".

This patch reworks the pattern of accessing/modification of
CEPH_I_ODIRECT flag by means of adding smp_mb__before_atomic()
before reading the status of CEPH_I_ODIRECT flag and
smp_mb__after_atomic() after clearing set/clear this flag.
Also, it was reworked the pattern of using of ci->i_ceph_lock
in ceph_block_o_direct(), ceph_start_io_read(),
ceph_block_buffered(), and ceph_start_io_direct() methods.

[1] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590942
[2] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590665
[3] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1589664
[4] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590377

Signed-off-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Reviewed-by: Alex Markuze <amarkuze@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

authored by

Viacheslav Dubeyko and committed by
Ilya Dryomov
fbeafe78 53db6f25

+44 -12
+42 -11
fs/ceph/io.c
··· 21 21 /* Call with exclusively locked inode->i_rwsem */ 22 22 static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode) 23 23 { 24 + bool is_odirect; 25 + 24 26 lockdep_assert_held_write(&inode->i_rwsem); 25 27 26 - if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) { 27 - spin_lock(&ci->i_ceph_lock); 28 - ci->i_ceph_flags &= ~CEPH_I_ODIRECT; 29 - spin_unlock(&ci->i_ceph_lock); 30 - inode_dio_wait(inode); 28 + spin_lock(&ci->i_ceph_lock); 29 + /* ensure that bit state is consistent */ 30 + smp_mb__before_atomic(); 31 + is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT; 32 + if (is_odirect) { 33 + clear_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags); 34 + /* ensure modified bit is visible */ 35 + smp_mb__after_atomic(); 31 36 } 37 + spin_unlock(&ci->i_ceph_lock); 38 + 39 + if (is_odirect) 40 + inode_dio_wait(inode); 32 41 } 33 42 34 43 /** ··· 59 50 int ceph_start_io_read(struct inode *inode) 60 51 { 61 52 struct ceph_inode_info *ci = ceph_inode(inode); 53 + bool is_odirect; 62 54 int err; 63 55 64 56 /* Be an optimist! */ ··· 67 57 if (err) 68 58 return err; 69 59 70 - if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) 60 + spin_lock(&ci->i_ceph_lock); 61 + /* ensure that bit state is consistent */ 62 + smp_mb__before_atomic(); 63 + is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT; 64 + spin_unlock(&ci->i_ceph_lock); 65 + if (!is_odirect) 71 66 return 0; 72 67 up_read(&inode->i_rwsem); 73 68 ··· 131 116 /* Call with exclusively locked inode->i_rwsem */ 132 117 static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode) 133 118 { 119 + bool is_odirect; 120 + 134 121 lockdep_assert_held_write(&inode->i_rwsem); 135 122 136 - if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) { 137 - spin_lock(&ci->i_ceph_lock); 138 - ci->i_ceph_flags |= CEPH_I_ODIRECT; 139 - spin_unlock(&ci->i_ceph_lock); 123 + spin_lock(&ci->i_ceph_lock); 124 + /* ensure that bit state is consistent */ 125 + smp_mb__before_atomic(); 126 + is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT; 127 + if (!is_odirect) { 128 + set_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags); 129 + /* ensure modified bit is visible */ 130 + smp_mb__after_atomic(); 131 + } 132 + spin_unlock(&ci->i_ceph_lock); 133 + 134 + if (!is_odirect) { 140 135 /* FIXME: unmap_mapping_range? */ 141 136 filemap_write_and_wait(inode->i_mapping); 142 137 } ··· 171 146 int ceph_start_io_direct(struct inode *inode) 172 147 { 173 148 struct ceph_inode_info *ci = ceph_inode(inode); 149 + bool is_odirect; 174 150 int err; 175 151 176 152 /* Be an optimist! */ ··· 179 153 if (err) 180 154 return err; 181 155 182 - if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) 156 + spin_lock(&ci->i_ceph_lock); 157 + /* ensure that bit state is consistent */ 158 + smp_mb__before_atomic(); 159 + is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT; 160 + spin_unlock(&ci->i_ceph_lock); 161 + if (is_odirect) 183 162 return 0; 184 163 up_read(&inode->i_rwsem); 185 164
+2 -1
fs/ceph/super.h
··· 638 638 #define CEPH_I_FLUSH_SNAPS (1 << 8) /* need flush snapss */ 639 639 #define CEPH_I_ERROR_WRITE (1 << 9) /* have seen write errors */ 640 640 #define CEPH_I_ERROR_FILELOCK (1 << 10) /* have seen file lock errors */ 641 - #define CEPH_I_ODIRECT (1 << 11) /* inode in direct I/O mode */ 641 + #define CEPH_I_ODIRECT_BIT (11) /* inode in direct I/O mode */ 642 + #define CEPH_I_ODIRECT (1 << CEPH_I_ODIRECT_BIT) 642 643 #define CEPH_ASYNC_CREATE_BIT (12) /* async create in flight for this */ 643 644 #define CEPH_I_ASYNC_CREATE (1 << CEPH_ASYNC_CREATE_BIT) 644 645 #define CEPH_I_SHUTDOWN (1 << 13) /* inode is no longer usable */