Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bcachefs: Improved nocow locking

This improves the nocow lock table so that hash table entries have
multiple locks, and locks specify which bucket they're for - i.e. we can
now resolve hash collisions.

This is important because the allocator has to skip buckets that are
locked in the nocow lock table, and previously hash collisions would
cause it to spuriously skip unlocked buckets.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

+184 -67
+10
fs/bcachefs/alloc_background.h
··· 23 23 pos.offset < ca->mi.nbuckets; 24 24 } 25 25 26 + static inline u64 bucket_to_u64(struct bpos bucket) 27 + { 28 + return (bucket.inode << 48) | bucket.offset; 29 + } 30 + 31 + static inline struct bpos u64_to_bucket(u64 bucket) 32 + { 33 + return POS(bucket >> 48, bucket & ~(~0ULL << 48)); 34 + } 35 + 26 36 static inline u8 alloc_gc_gen(struct bch_alloc_v4 a) 27 37 { 28 38 return a.gen - a.oldest_gen;
+1
fs/bcachefs/alloc_foreground.c
··· 28 28 #include "io.h" 29 29 #include "journal.h" 30 30 #include "movinggc.h" 31 + #include "nocow_locking.h" 31 32 #include "trace.h" 32 33 33 34 #include <linux/math64.h>
+1 -1
fs/bcachefs/bcachefs.h
··· 206 206 #include "bcachefs_format.h" 207 207 #include "errcode.h" 208 208 #include "fifo.h" 209 - #include "nocow_locking.h" 209 + #include "nocow_locking_types.h" 210 210 #include "opts.h" 211 211 #include "util.h" 212 212
+1
fs/bcachefs/data_update.c
··· 11 11 #include "io.h" 12 12 #include "keylist.h" 13 13 #include "move.h" 14 + #include "nocow_locking.h" 14 15 #include "subvolume.h" 15 16 #include "trace.h" 16 17
+8 -10
fs/bcachefs/io.c
··· 27 27 #include "journal.h" 28 28 #include "keylist.h" 29 29 #include "move.h" 30 + #include "nocow_locking.h" 30 31 #include "rebalance.h" 31 32 #include "subvolume.h" 32 33 #include "super.h" ··· 1470 1469 struct { 1471 1470 struct bpos b; 1472 1471 unsigned gen; 1473 - two_state_lock_t *l; 1472 + struct nocow_lock_bucket *l; 1474 1473 } buckets[BCH_REPLICAS_MAX]; 1475 1474 unsigned nr_buckets = 0; 1476 1475 u32 snapshot; ··· 1517 1516 buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr); 1518 1517 buckets[nr_buckets].gen = ptr->gen; 1519 1518 buckets[nr_buckets].l = 1520 - bucket_nocow_lock(&c->nocow_locks, buckets[nr_buckets].b); 1519 + bucket_nocow_lock(&c->nocow_locks, 1520 + bucket_to_u64(buckets[nr_buckets].b)); 1521 1521 1522 1522 prefetch(buckets[nr_buckets].l); 1523 1523 nr_buckets++; ··· 1540 1538 1541 1539 for (i = 0; i < nr_buckets; i++) { 1542 1540 struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode); 1543 - two_state_lock_t *l = buckets[i].l; 1541 + struct nocow_lock_bucket *l = buckets[i].l; 1544 1542 bool stale; 1545 1543 1546 - if (!bch2_two_state_trylock(l, BUCKET_NOCOW_LOCK_UPDATE)) 1547 - __bch2_bucket_nocow_lock(&c->nocow_locks, l, BUCKET_NOCOW_LOCK_UPDATE); 1544 + __bch2_bucket_nocow_lock(&c->nocow_locks, l, 1545 + bucket_to_u64(buckets[i].b), 1546 + BUCKET_NOCOW_LOCK_UPDATE); 1548 1547 1549 1548 rcu_read_lock(); 1550 1549 stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen); ··· 2987 2984 2988 2985 int bch2_fs_io_init(struct bch_fs *c) 2989 2986 { 2990 - unsigned i; 2991 - 2992 - for (i = 0; i < ARRAY_SIZE(c->nocow_locks.l); i++) 2993 - two_state_lock_init(&c->nocow_locks.l[i]); 2994 - 2995 2987 if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio), 2996 2988 BIOSET_NEED_BVECS) || 2997 2989 bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
+111 -7
fs/bcachefs/nocow_locking.c
··· 4 4 #include "nocow_locking.h" 5 5 #include "util.h" 6 6 7 - void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t, 8 - two_state_lock_t *l, int flags) 9 - { 10 - struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks); 11 - u64 start_time = local_clock(); 7 + #include <linux/closure.h> 12 8 13 - __bch2_two_state_lock(l, flags & BUCKET_NOCOW_LOCK_UPDATE); 14 - bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time); 9 + bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t, struct bpos bucket) 10 + { 11 + u64 dev_bucket = bucket_to_u64(bucket); 12 + struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); 13 + unsigned i; 14 + 15 + for (i = 0; i < ARRAY_SIZE(l->b); i++) 16 + if (l->b[i] == dev_bucket && atomic_read(&l->l[i])) 17 + return true; 18 + return false; 19 + } 20 + 21 + void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, struct bpos bucket, int flags) 22 + { 23 + u64 dev_bucket = bucket_to_u64(bucket); 24 + struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); 25 + int lock_val = flags ? 1 : -1; 26 + unsigned i; 27 + 28 + for (i = 0; i < ARRAY_SIZE(l->b); i++) 29 + if (l->b[i] == dev_bucket) { 30 + if (!atomic_sub_return(lock_val, &l->l[i])) 31 + closure_wake_up(&l->wait); 32 + return; 33 + } 34 + 35 + BUG(); 36 + } 37 + 38 + bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l, 39 + u64 dev_bucket, int flags) 40 + { 41 + int v, lock_val = flags ? 1 : -1; 42 + unsigned i; 43 + 44 + spin_lock(&l->lock); 45 + 46 + for (i = 0; i < ARRAY_SIZE(l->b); i++) 47 + if (l->b[i] == dev_bucket) 48 + goto got_entry; 49 + 50 + for (i = 0; i < ARRAY_SIZE(l->b); i++) 51 + if (!atomic_read(&l->l[i])) { 52 + l->b[i] = dev_bucket; 53 + goto take_lock; 54 + } 55 + fail: 56 + spin_unlock(&l->lock); 57 + return false; 58 + got_entry: 59 + v = atomic_read(&l->l[i]); 60 + if (lock_val > 0 ? v < 0 : v > 0) 61 + goto fail; 62 + take_lock: 63 + atomic_add(lock_val, &l->l[i]); 64 + spin_unlock(&l->lock); 65 + return true; 66 + } 67 + 68 + void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t, 69 + struct nocow_lock_bucket *l, 70 + u64 dev_bucket, int flags) 71 + { 72 + if (!__bch2_bucket_nocow_trylock(l, dev_bucket, flags)) { 73 + struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks); 74 + u64 start_time = local_clock(); 75 + 76 + __closure_wait_event(&l->wait, __bch2_bucket_nocow_trylock(l, dev_bucket, flags)); 77 + bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time); 78 + } 79 + } 80 + 81 + void bch2_nocow_locks_to_text(struct printbuf *out, struct bucket_nocow_lock_table *t) 82 + { 83 + unsigned i, nr_zero = 0; 84 + struct nocow_lock_bucket *l; 85 + 86 + for (l = t->l; l < t->l + ARRAY_SIZE(t->l); l++) { 87 + unsigned v = 0; 88 + 89 + for (i = 0; i < ARRAY_SIZE(l->l); i++) 90 + v |= atomic_read(&l->l[i]); 91 + 92 + if (!v) { 93 + nr_zero++; 94 + continue; 95 + } 96 + 97 + if (nr_zero) 98 + prt_printf(out, "(%u empty entries)\n", nr_zero); 99 + nr_zero = 0; 100 + 101 + for (i = 0; i < ARRAY_SIZE(l->l); i++) 102 + if (atomic_read(&l->l[i])) 103 + prt_printf(out, "%llu: %i ", l->b[i], atomic_read(&l->l[i])); 104 + prt_newline(out); 105 + } 106 + 107 + if (nr_zero) 108 + prt_printf(out, "(%u empty entries)\n", nr_zero); 109 + } 110 + 111 + int bch2_fs_nocow_locking_init(struct bch_fs *c) 112 + { 113 + unsigned i; 114 + 115 + for (i = 0; i < ARRAY_SIZE(c->nocow_locks.l); i++) 116 + spin_lock_init(&c->nocow_locks.l[i].lock); 117 + 118 + return 0; 15 119 }
+27 -33
fs/bcachefs/nocow_locking.h
··· 2 2 #ifndef _BCACHEFS_NOCOW_LOCKING_H 3 3 #define _BCACHEFS_NOCOW_LOCKING_H 4 4 5 - #include "bcachefs_format.h" 6 - #include "two_state_shared_lock.h" 5 + #include "bcachefs.h" 6 + #include "alloc_background.h" 7 + #include "nocow_locking_types.h" 7 8 8 9 #include <linux/hash.h> 9 10 10 - #define BUCKET_NOCOW_LOCKS_BITS 10 11 - #define BUCKET_NOCOW_LOCKS (1U << BUCKET_NOCOW_LOCKS_BITS) 12 - 13 - struct bucket_nocow_lock_table { 14 - two_state_lock_t l[BUCKET_NOCOW_LOCKS]; 15 - }; 16 - 17 - #define BUCKET_NOCOW_LOCK_UPDATE (1 << 0) 18 - 19 - static inline two_state_lock_t *bucket_nocow_lock(struct bucket_nocow_lock_table *t, 20 - struct bpos bucket) 11 + static inline struct nocow_lock_bucket *bucket_nocow_lock(struct bucket_nocow_lock_table *t, 12 + u64 dev_bucket) 21 13 { 22 - u64 dev_bucket = bucket.inode << 56 | bucket.offset; 23 14 unsigned h = hash_64(dev_bucket, BUCKET_NOCOW_LOCKS_BITS); 24 15 25 16 return t->l + (h & (BUCKET_NOCOW_LOCKS - 1)); 26 17 } 27 18 28 - static inline bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t, 29 - struct bpos bucket) 30 - { 31 - two_state_lock_t *l = bucket_nocow_lock(t, bucket); 19 + #define BUCKET_NOCOW_LOCK_UPDATE (1 << 0) 32 20 33 - return atomic_long_read(&l->v) != 0; 34 - } 35 - 36 - static inline void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, 37 - struct bpos bucket, int flags) 38 - { 39 - two_state_lock_t *l = bucket_nocow_lock(t, bucket); 40 - 41 - bch2_two_state_unlock(l, flags & BUCKET_NOCOW_LOCK_UPDATE); 42 - } 43 - 44 - void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *, two_state_lock_t *, int); 21 + bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *, struct bpos); 22 + void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *, struct bpos, int); 23 + bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *, u64, int); 24 + void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *, 25 + struct nocow_lock_bucket *, u64, int); 45 26 46 27 static inline void bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t, 47 28 struct bpos bucket, int flags) 48 29 { 49 - two_state_lock_t *l = bucket_nocow_lock(t, bucket); 30 + u64 dev_bucket = bucket_to_u64(bucket); 31 + struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); 50 32 51 - if (!bch2_two_state_trylock(l, flags & BUCKET_NOCOW_LOCK_UPDATE)) 52 - __bch2_bucket_nocow_lock(t, l, flags); 33 + __bch2_bucket_nocow_lock(t, l, dev_bucket, flags); 53 34 } 35 + 36 + static inline bool bch2_bucket_nocow_trylock(struct bucket_nocow_lock_table *t, 37 + struct bpos bucket, int flags) 38 + { 39 + u64 dev_bucket = bucket_to_u64(bucket); 40 + struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); 41 + 42 + return __bch2_bucket_nocow_trylock(l, dev_bucket, flags); 43 + } 44 + 45 + void bch2_nocow_locks_to_text(struct printbuf *, struct bucket_nocow_lock_table *); 46 + 47 + int bch2_fs_nocow_locking_init(struct bch_fs *); 54 48 55 49 #endif /* _BCACHEFS_NOCOW_LOCKING_H */
+20
fs/bcachefs/nocow_locking_types.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_NOCOW_LOCKING_TYPES_H 3 + #define _BCACHEFS_NOCOW_LOCKING_TYPES_H 4 + 5 + #define BUCKET_NOCOW_LOCKS_BITS 10 6 + #define BUCKET_NOCOW_LOCKS (1U << BUCKET_NOCOW_LOCKS_BITS) 7 + 8 + struct nocow_lock_bucket { 9 + struct closure_waitlist wait; 10 + spinlock_t lock; 11 + u64 b[4]; 12 + atomic_t l[4]; 13 + } __aligned(SMP_CACHE_BYTES); 14 + 15 + struct bucket_nocow_lock_table { 16 + struct nocow_lock_bucket l[BUCKET_NOCOW_LOCKS]; 17 + }; 18 + 19 + #endif /* _BCACHEFS_NOCOW_LOCKING_TYPES_H */ 20 +
+2
fs/bcachefs/super.c
··· 39 39 #include "move.h" 40 40 #include "migrate.h" 41 41 #include "movinggc.h" 42 + #include "nocow_locking.h" 42 43 #include "quota.h" 43 44 #include "rebalance.h" 44 45 #include "recovery.h" ··· 822 821 bch2_fs_btree_write_buffer_init(c) ?: 823 822 bch2_fs_subvolumes_init(c) ?: 824 823 bch2_fs_io_init(c) ?: 824 + bch2_fs_nocow_locking_init(c) ?: 825 825 bch2_fs_encryption_init(c) ?: 826 826 bch2_fs_compress_init(c) ?: 827 827 bch2_fs_ec_init(c) ?:
+3 -16
fs/bcachefs/sysfs.c
··· 27 27 #include "journal.h" 28 28 #include "keylist.h" 29 29 #include "move.h" 30 + #include "nocow_locking.h" 30 31 #include "opts.h" 31 32 #include "rebalance.h" 32 33 #include "replicas.h" ··· 478 477 bch2_write_refs_to_text(out, c); 479 478 #endif 480 479 481 - if (attr == &sysfs_nocow_lock_table) { 482 - int i, count = 1; 483 - long last, curr = 0; 484 - 485 - last = atomic_long_read(&c->nocow_locks.l[0].v); 486 - for (i = 1; i < BUCKET_NOCOW_LOCKS; i++) { 487 - curr = atomic_long_read(&c->nocow_locks.l[i].v); 488 - if (last != curr) { 489 - prt_printf(out, "%li: %d\n", last, count); 490 - count = 1; 491 - last = curr; 492 - } else 493 - count++; 494 - } 495 - prt_printf(out, "%li: %d\n", last, count); 496 - } 480 + if (attr == &sysfs_nocow_lock_table) 481 + bch2_nocow_locks_to_text(out, &c->nocow_locks); 497 482 498 483 return 0; 499 484 }