Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Btrfs: fix deadlocks with trylock on tree nodes

The Btrfs tree trylock function is poorly named. It always takes
the spinlock and backs off if the blocking lock is held. This
can lead to surprising lockups because people expect it to really be a
trylock.

This commit makes it a pure trylock, both for the spinlock and the
blocking lock. It also reworks the nested lock handling slightly to
avoid taking the read lock while a spinning write lock might be held.

Signed-off-by: Chris Mason <clm@fb.com>

+46 -34
+46 -34
fs/btrfs/locking.c
··· 33 33 */ 34 34 void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) 35 35 { 36 - if (eb->lock_nested) { 37 - read_lock(&eb->lock); 38 - if (eb->lock_nested && current->pid == eb->lock_owner) { 39 - read_unlock(&eb->lock); 40 - return; 41 - } 42 - read_unlock(&eb->lock); 43 - } 36 + /* 37 + * no lock is required. The lock owner may change if 38 + * we have a read lock, but it won't change to or away 39 + * from us. If we have the write lock, we are the owner 40 + * and it'll never change. 41 + */ 42 + if (eb->lock_nested && current->pid == eb->lock_owner) 43 + return; 44 44 if (rw == BTRFS_WRITE_LOCK) { 45 45 if (atomic_read(&eb->blocking_writers) == 0) { 46 46 WARN_ON(atomic_read(&eb->spinning_writers) != 1); ··· 65 65 */ 66 66 void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) 67 67 { 68 - if (eb->lock_nested) { 69 - read_lock(&eb->lock); 70 - if (eb->lock_nested && current->pid == eb->lock_owner) { 71 - read_unlock(&eb->lock); 72 - return; 73 - } 74 - read_unlock(&eb->lock); 75 - } 68 + /* 69 + * no lock is required. The lock owner may change if 70 + * we have a read lock, but it won't change to or away 71 + * from us. If we have the write lock, we are the owner 72 + * and it'll never change. 73 + */ 74 + if (eb->lock_nested && current->pid == eb->lock_owner) 75 + return; 76 + 76 77 if (rw == BTRFS_WRITE_LOCK_BLOCKING) { 77 78 BUG_ON(atomic_read(&eb->blocking_writers) != 1); 78 79 write_lock(&eb->lock); ··· 100 99 void btrfs_tree_read_lock(struct extent_buffer *eb) 101 100 { 102 101 again: 102 + BUG_ON(!atomic_read(&eb->blocking_writers) && 103 + current->pid == eb->lock_owner); 104 + 103 105 read_lock(&eb->lock); 104 106 if (atomic_read(&eb->blocking_writers) && 105 107 current->pid == eb->lock_owner) { ··· 136 132 if (atomic_read(&eb->blocking_writers)) 137 133 return 0; 138 134 139 - read_lock(&eb->lock); 135 + if (!read_trylock(&eb->lock)) 136 + return 0; 137 + 140 138 if (atomic_read(&eb->blocking_writers)) { 141 139 read_unlock(&eb->lock); 142 140 return 0; ··· 157 151 if (atomic_read(&eb->blocking_writers) || 158 152 atomic_read(&eb->blocking_readers)) 159 153 return 0; 160 - write_lock(&eb->lock); 154 + 155 + if (!write_trylock(&eb->lock)) 156 + return 0; 157 + 161 158 if (atomic_read(&eb->blocking_writers) || 162 159 atomic_read(&eb->blocking_readers)) { 163 160 write_unlock(&eb->lock); ··· 177 168 */ 178 169 void btrfs_tree_read_unlock(struct extent_buffer *eb) 179 170 { 180 - if (eb->lock_nested) { 181 - read_lock(&eb->lock); 182 - if (eb->lock_nested && current->pid == eb->lock_owner) { 183 - eb->lock_nested = 0; 184 - read_unlock(&eb->lock); 185 - return; 186 - } 187 - read_unlock(&eb->lock); 171 + /* 172 + * if we're nested, we have the write lock. No new locking 173 + * is needed as long as we are the lock owner. 174 + * The write unlock will do a barrier for us, and the lock_nested 175 + * field only matters to the lock owner. 176 + */ 177 + if (eb->lock_nested && current->pid == eb->lock_owner) { 178 + eb->lock_nested = 0; 179 + return; 188 180 } 189 181 btrfs_assert_tree_read_locked(eb); 190 182 WARN_ON(atomic_read(&eb->spinning_readers) == 0); ··· 199 189 */ 200 190 void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) 201 191 { 202 - if (eb->lock_nested) { 203 - read_lock(&eb->lock); 204 - if (eb->lock_nested && current->pid == eb->lock_owner) { 205 - eb->lock_nested = 0; 206 - read_unlock(&eb->lock); 207 - return; 208 - } 209 - read_unlock(&eb->lock); 192 + /* 193 + * if we're nested, we have the write lock. No new locking 194 + * is needed as long as we are the lock owner. 195 + * The write unlock will do a barrier for us, and the lock_nested 196 + * field only matters to the lock owner. 197 + */ 198 + if (eb->lock_nested && current->pid == eb->lock_owner) { 199 + eb->lock_nested = 0; 200 + return; 210 201 } 211 202 btrfs_assert_tree_read_locked(eb); 212 203 WARN_ON(atomic_read(&eb->blocking_readers) == 0); ··· 255 244 BUG_ON(blockers > 1); 256 245 257 246 btrfs_assert_tree_locked(eb); 247 + eb->lock_owner = 0; 258 248 atomic_dec(&eb->write_locks); 259 249 260 250 if (blockers) {