at v6.8 405 lines 11 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright (C) 2008 Oracle. All rights reserved. 4 */ 5 6#include <linux/sched.h> 7#include <linux/pagemap.h> 8#include <linux/spinlock.h> 9#include <linux/page-flags.h> 10#include <asm/bug.h> 11#include <trace/events/btrfs.h> 12#include "misc.h" 13#include "ctree.h" 14#include "extent_io.h" 15#include "locking.h" 16#include "accessors.h" 17 18/* 19 * Lockdep class keys for extent_buffer->lock's in this root. For a given 20 * eb, the lockdep key is determined by the btrfs_root it belongs to and 21 * the level the eb occupies in the tree. 22 * 23 * Different roots are used for different purposes and may nest inside each 24 * other and they require separate keysets. As lockdep keys should be 25 * static, assign keysets according to the purpose of the root as indicated 26 * by btrfs_root->root_key.objectid. This ensures that all special purpose 27 * roots have separate keysets. 28 * 29 * Lock-nesting across peer nodes is always done with the immediate parent 30 * node locked thus preventing deadlock. As lockdep doesn't know this, use 31 * subclass to avoid triggering lockdep warning in such cases. 32 * 33 * The key is set by the readpage_end_io_hook after the buffer has passed 34 * csum validation but before the pages are unlocked. It is also set by 35 * btrfs_init_new_buffer on freshly allocated blocks. 36 * 37 * We also add a check to make sure the highest level of the tree is the 38 * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code 39 * needs update as well. 40 */ 41#ifdef CONFIG_DEBUG_LOCK_ALLOC 42#if BTRFS_MAX_LEVEL != 8 43#error 44#endif 45 46#define DEFINE_LEVEL(stem, level) \ 47 .names[level] = "btrfs-" stem "-0" #level, 48 49#define DEFINE_NAME(stem) \ 50 DEFINE_LEVEL(stem, 0) \ 51 DEFINE_LEVEL(stem, 1) \ 52 DEFINE_LEVEL(stem, 2) \ 53 DEFINE_LEVEL(stem, 3) \ 54 DEFINE_LEVEL(stem, 4) \ 55 DEFINE_LEVEL(stem, 5) \ 56 DEFINE_LEVEL(stem, 6) \ 57 DEFINE_LEVEL(stem, 7) 58 59static struct btrfs_lockdep_keyset { 60 u64 id; /* root objectid */ 61 /* Longest entry: btrfs-block-group-00 */ 62 char names[BTRFS_MAX_LEVEL][24]; 63 struct lock_class_key keys[BTRFS_MAX_LEVEL]; 64} btrfs_lockdep_keysets[] = { 65 { .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") }, 66 { .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") }, 67 { .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") }, 68 { .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") }, 69 { .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") }, 70 { .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") }, 71 { .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") }, 72 { .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") }, 73 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") }, 74 { .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") }, 75 { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") }, 76 { .id = BTRFS_BLOCK_GROUP_TREE_OBJECTID, DEFINE_NAME("block-group") }, 77 { .id = BTRFS_RAID_STRIPE_TREE_OBJECTID, DEFINE_NAME("raid-stripe") }, 78 { .id = 0, DEFINE_NAME("tree") }, 79}; 80 81#undef DEFINE_LEVEL 82#undef DEFINE_NAME 83 84void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level) 85{ 86 struct btrfs_lockdep_keyset *ks; 87 88 BUG_ON(level >= ARRAY_SIZE(ks->keys)); 89 90 /* Find the matching keyset, id 0 is the default entry */ 91 for (ks = btrfs_lockdep_keysets; ks->id; ks++) 92 if (ks->id == objectid) 93 break; 94 95 lockdep_set_class_and_name(&eb->lock, &ks->keys[level], ks->names[level]); 96} 97 98void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb) 99{ 100 if (test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state)) 101 btrfs_set_buffer_lockdep_class(root->root_key.objectid, 102 eb, btrfs_header_level(eb)); 103} 104 105#endif 106 107#ifdef CONFIG_BTRFS_DEBUG 108static void btrfs_set_eb_lock_owner(struct extent_buffer *eb, pid_t owner) 109{ 110 eb->lock_owner = owner; 111} 112#else 113static void btrfs_set_eb_lock_owner(struct extent_buffer *eb, pid_t owner) { } 114#endif 115 116/* 117 * Extent buffer locking 118 * ===================== 119 * 120 * We use a rw_semaphore for tree locking, and the semantics are exactly the 121 * same: 122 * 123 * - reader/writer exclusion 124 * - writer/writer exclusion 125 * - reader/reader sharing 126 * - try-lock semantics for readers and writers 127 * 128 * The rwsem implementation does opportunistic spinning which reduces number of 129 * times the locking task needs to sleep. 130 */ 131 132/* 133 * __btrfs_tree_read_lock - lock extent buffer for read 134 * @eb: the eb to be locked 135 * @nest: the nesting level to be used for lockdep 136 * 137 * This takes the read lock on the extent buffer, using the specified nesting 138 * level for lockdep purposes. 139 */ 140void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest) 141{ 142 u64 start_ns = 0; 143 144 if (trace_btrfs_tree_read_lock_enabled()) 145 start_ns = ktime_get_ns(); 146 147 down_read_nested(&eb->lock, nest); 148 trace_btrfs_tree_read_lock(eb, start_ns); 149} 150 151void btrfs_tree_read_lock(struct extent_buffer *eb) 152{ 153 __btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL); 154} 155 156/* 157 * Try-lock for read. 158 * 159 * Return 1 if the rwlock has been taken, 0 otherwise 160 */ 161int btrfs_try_tree_read_lock(struct extent_buffer *eb) 162{ 163 if (down_read_trylock(&eb->lock)) { 164 trace_btrfs_try_tree_read_lock(eb); 165 return 1; 166 } 167 return 0; 168} 169 170/* 171 * Try-lock for write. 172 * 173 * Return 1 if the rwlock has been taken, 0 otherwise 174 */ 175int btrfs_try_tree_write_lock(struct extent_buffer *eb) 176{ 177 if (down_write_trylock(&eb->lock)) { 178 btrfs_set_eb_lock_owner(eb, current->pid); 179 trace_btrfs_try_tree_write_lock(eb); 180 return 1; 181 } 182 return 0; 183} 184 185/* 186 * Release read lock. 187 */ 188void btrfs_tree_read_unlock(struct extent_buffer *eb) 189{ 190 trace_btrfs_tree_read_unlock(eb); 191 up_read(&eb->lock); 192} 193 194/* 195 * Lock eb for write. 196 * 197 * @eb: the eb to lock 198 * @nest: the nesting to use for the lock 199 * 200 * Returns with the eb->lock write locked. 201 */ 202void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest) 203 __acquires(&eb->lock) 204{ 205 u64 start_ns = 0; 206 207 if (trace_btrfs_tree_lock_enabled()) 208 start_ns = ktime_get_ns(); 209 210 down_write_nested(&eb->lock, nest); 211 btrfs_set_eb_lock_owner(eb, current->pid); 212 trace_btrfs_tree_lock(eb, start_ns); 213} 214 215void btrfs_tree_lock(struct extent_buffer *eb) 216{ 217 __btrfs_tree_lock(eb, BTRFS_NESTING_NORMAL); 218} 219 220/* 221 * Release the write lock. 222 */ 223void btrfs_tree_unlock(struct extent_buffer *eb) 224{ 225 trace_btrfs_tree_unlock(eb); 226 btrfs_set_eb_lock_owner(eb, 0); 227 up_write(&eb->lock); 228} 229 230/* 231 * This releases any locks held in the path starting at level and going all the 232 * way up to the root. 233 * 234 * btrfs_search_slot will keep the lock held on higher nodes in a few corner 235 * cases, such as COW of the block at slot zero in the node. This ignores 236 * those rules, and it should only be called when there are no more updates to 237 * be done higher up in the tree. 238 */ 239void btrfs_unlock_up_safe(struct btrfs_path *path, int level) 240{ 241 int i; 242 243 if (path->keep_locks) 244 return; 245 246 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 247 if (!path->nodes[i]) 248 continue; 249 if (!path->locks[i]) 250 continue; 251 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); 252 path->locks[i] = 0; 253 } 254} 255 256/* 257 * Loop around taking references on and locking the root node of the tree until 258 * we end up with a lock on the root node. 259 * 260 * Return: root extent buffer with write lock held 261 */ 262struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) 263{ 264 struct extent_buffer *eb; 265 266 while (1) { 267 eb = btrfs_root_node(root); 268 269 btrfs_maybe_reset_lockdep_class(root, eb); 270 btrfs_tree_lock(eb); 271 if (eb == root->node) 272 break; 273 btrfs_tree_unlock(eb); 274 free_extent_buffer(eb); 275 } 276 return eb; 277} 278 279/* 280 * Loop around taking references on and locking the root node of the tree until 281 * we end up with a lock on the root node. 282 * 283 * Return: root extent buffer with read lock held 284 */ 285struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) 286{ 287 struct extent_buffer *eb; 288 289 while (1) { 290 eb = btrfs_root_node(root); 291 292 btrfs_maybe_reset_lockdep_class(root, eb); 293 btrfs_tree_read_lock(eb); 294 if (eb == root->node) 295 break; 296 btrfs_tree_read_unlock(eb); 297 free_extent_buffer(eb); 298 } 299 return eb; 300} 301 302/* 303 * Loop around taking references on and locking the root node of the tree in 304 * nowait mode until we end up with a lock on the root node or returning to 305 * avoid blocking. 306 * 307 * Return: root extent buffer with read lock held or -EAGAIN. 308 */ 309struct extent_buffer *btrfs_try_read_lock_root_node(struct btrfs_root *root) 310{ 311 struct extent_buffer *eb; 312 313 while (1) { 314 eb = btrfs_root_node(root); 315 if (!btrfs_try_tree_read_lock(eb)) { 316 free_extent_buffer(eb); 317 return ERR_PTR(-EAGAIN); 318 } 319 if (eb == root->node) 320 break; 321 btrfs_tree_read_unlock(eb); 322 free_extent_buffer(eb); 323 } 324 return eb; 325} 326 327/* 328 * DREW locks 329 * ========== 330 * 331 * DREW stands for double-reader-writer-exclusion lock. It's used in situation 332 * where you want to provide A-B exclusion but not AA or BB. 333 * 334 * Currently implementation gives more priority to reader. If a reader and a 335 * writer both race to acquire their respective sides of the lock the writer 336 * would yield its lock as soon as it detects a concurrent reader. Additionally 337 * if there are pending readers no new writers would be allowed to come in and 338 * acquire the lock. 339 */ 340 341void btrfs_drew_lock_init(struct btrfs_drew_lock *lock) 342{ 343 atomic_set(&lock->readers, 0); 344 atomic_set(&lock->writers, 0); 345 init_waitqueue_head(&lock->pending_readers); 346 init_waitqueue_head(&lock->pending_writers); 347} 348 349/* Return true if acquisition is successful, false otherwise */ 350bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock) 351{ 352 if (atomic_read(&lock->readers)) 353 return false; 354 355 atomic_inc(&lock->writers); 356 357 /* Ensure writers count is updated before we check for pending readers */ 358 smp_mb__after_atomic(); 359 if (atomic_read(&lock->readers)) { 360 btrfs_drew_write_unlock(lock); 361 return false; 362 } 363 364 return true; 365} 366 367void btrfs_drew_write_lock(struct btrfs_drew_lock *lock) 368{ 369 while (true) { 370 if (btrfs_drew_try_write_lock(lock)) 371 return; 372 wait_event(lock->pending_writers, !atomic_read(&lock->readers)); 373 } 374} 375 376void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock) 377{ 378 atomic_dec(&lock->writers); 379 cond_wake_up(&lock->pending_readers); 380} 381 382void btrfs_drew_read_lock(struct btrfs_drew_lock *lock) 383{ 384 atomic_inc(&lock->readers); 385 386 /* 387 * Ensure the pending reader count is perceieved BEFORE this reader 388 * goes to sleep in case of active writers. This guarantees new writers 389 * won't be allowed and that the current reader will be woken up when 390 * the last active writer finishes its jobs. 391 */ 392 smp_mb__after_atomic(); 393 394 wait_event(lock->pending_readers, atomic_read(&lock->writers) == 0); 395} 396 397void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock) 398{ 399 /* 400 * atomic_dec_and_test implies a full barrier, so woken up writers 401 * are guaranteed to see the decrement 402 */ 403 if (atomic_dec_and_test(&lock->readers)) 404 wake_up(&lock->pending_writers); 405}