Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bcachefs: seqmutex; fix a lockdep splat

We can't be holding btree_trans_lock while copying to user space, which
might incur a page fault. To fix this, convert it to a seqmutex so we
can unlock/relock.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

+96 -23
+2 -1
fs/bcachefs/bcachefs.h
··· 208 208 #include "fifo.h" 209 209 #include "nocow_locking_types.h" 210 210 #include "opts.h" 211 + #include "seqmutex.h" 211 212 #include "util.h" 212 213 213 214 #ifdef CONFIG_BCACHEFS_DEBUG ··· 780 779 } btree_write_stats[BTREE_WRITE_TYPE_NR]; 781 780 782 781 /* btree_iter.c: */ 783 - struct mutex btree_trans_lock; 782 + struct seqmutex btree_trans_lock; 784 783 struct list_head btree_trans_list; 785 784 mempool_t btree_paths_pool; 786 785 mempool_t btree_trans_mem_pool;
+9 -9
fs/bcachefs/btree_iter.c
··· 2991 2991 if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) { 2992 2992 struct btree_trans *pos; 2993 2993 2994 - mutex_lock(&c->btree_trans_lock); 2994 + seqmutex_lock(&c->btree_trans_lock); 2995 2995 list_for_each_entry(pos, &c->btree_trans_list, list) { 2996 2996 /* 2997 2997 * We'd much prefer to be stricter here and completely ··· 3009 3009 } 3010 3010 list_add_tail(&trans->list, &c->btree_trans_list); 3011 3011 list_add_done: 3012 - mutex_unlock(&c->btree_trans_lock); 3012 + seqmutex_unlock(&c->btree_trans_lock); 3013 3013 } 3014 3014 } 3015 3015 ··· 3044 3044 3045 3045 bch2_trans_unlock(trans); 3046 3046 3047 + if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) { 3048 + seqmutex_lock(&c->btree_trans_lock); 3049 + list_del(&trans->list); 3050 + seqmutex_unlock(&c->btree_trans_lock); 3051 + } 3052 + 3047 3053 closure_sync(&trans->ref); 3048 3054 3049 3055 if (s) ··· 3060 3054 trans->nr_updates = 0; 3061 3055 3062 3056 check_btree_paths_leaked(trans); 3063 - 3064 - if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) { 3065 - mutex_lock(&c->btree_trans_lock); 3066 - list_del(&trans->list); 3067 - mutex_unlock(&c->btree_trans_lock); 3068 - } 3069 3057 3070 3058 srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); 3071 3059 ··· 3198 3198 } 3199 3199 3200 3200 INIT_LIST_HEAD(&c->btree_trans_list); 3201 - mutex_init(&c->btree_trans_lock); 3201 + seqmutex_init(&c->btree_trans_lock); 3202 3202 3203 3203 ret = mempool_init_kmalloc_pool(&c->btree_paths_pool, 1, 3204 3204 sizeof(struct btree_path) * nr +
+35 -11
fs/bcachefs/debug.c
··· 627 627 struct bch_fs *c = i->c; 628 628 struct btree_trans *trans; 629 629 ssize_t ret = 0; 630 + u32 seq; 630 631 631 632 i->ubuf = buf; 632 633 i->size = size; 633 634 i->ret = 0; 634 - 635 - mutex_lock(&c->btree_trans_lock); 635 + restart: 636 + seqmutex_lock(&c->btree_trans_lock); 636 637 list_for_each_entry(trans, &c->btree_trans_list, list) { 637 638 if (trans->locking_wait.task->pid <= i->iter) 638 639 continue; 639 640 641 + closure_get(&trans->ref); 642 + seq = seqmutex_seq(&c->btree_trans_lock); 643 + seqmutex_unlock(&c->btree_trans_lock); 644 + 640 645 ret = flush_buf(i); 641 - if (ret) 642 - break; 646 + if (ret) { 647 + closure_put(&trans->ref); 648 + goto unlocked; 649 + } 643 650 644 651 bch2_btree_trans_to_text(&i->buf, trans); 645 652 ··· 658 651 prt_newline(&i->buf); 659 652 660 653 i->iter = trans->locking_wait.task->pid; 661 - } 662 - mutex_unlock(&c->btree_trans_lock); 663 654 655 + closure_put(&trans->ref); 656 + 657 + if (!seqmutex_relock(&c->btree_trans_lock, seq)) 658 + goto restart; 659 + } 660 + seqmutex_unlock(&c->btree_trans_lock); 661 + unlocked: 664 662 if (i->buf.allocation_failure) 665 663 ret = -ENOMEM; 666 664 ··· 827 815 struct bch_fs *c = i->c; 828 816 struct btree_trans *trans; 829 817 ssize_t ret = 0; 818 + u32 seq; 830 819 831 820 i->ubuf = buf; 832 821 i->size = size; ··· 835 822 836 823 if (i->iter) 837 824 goto out; 838 - 839 - mutex_lock(&c->btree_trans_lock); 825 + restart: 826 + seqmutex_lock(&c->btree_trans_lock); 840 827 list_for_each_entry(trans, &c->btree_trans_list, list) { 841 828 if (trans->locking_wait.task->pid <= i->iter) 842 829 continue; 843 830 831 + closure_get(&trans->ref); 832 + seq = seqmutex_seq(&c->btree_trans_lock); 833 + seqmutex_unlock(&c->btree_trans_lock); 834 + 844 835 ret = flush_buf(i); 845 - if (ret) 846 - break; 836 + if (ret) { 837 + closure_put(&trans->ref); 838 + goto out; 839 + } 847 840 848 841 bch2_check_for_deadlock(trans, &i->buf); 849 842 850 843 i->iter = trans->locking_wait.task->pid; 844 + 845 + closure_put(&trans->ref); 846 + 847 + if (!seqmutex_relock(&c->btree_trans_lock, seq)) 848 + goto restart; 851 849 } 852 - mutex_unlock(&c->btree_trans_lock); 850 + seqmutex_unlock(&c->btree_trans_lock); 853 851 out: 854 852 if (i->buf.allocation_failure) 855 853 ret = -ENOMEM;
+48
fs/bcachefs/seqmutex.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_SEQMUTEX_H 3 + #define _BCACHEFS_SEQMUTEX_H 4 + 5 + #include <linux/mutex.h> 6 + 7 + struct seqmutex { 8 + struct mutex lock; 9 + u32 seq; 10 + }; 11 + 12 + #define seqmutex_init(_lock) mutex_init(&(_lock)->lock) 13 + 14 + static inline bool seqmutex_trylock(struct seqmutex *lock) 15 + { 16 + return mutex_trylock(&lock->lock); 17 + } 18 + 19 + static inline void seqmutex_lock(struct seqmutex *lock) 20 + { 21 + mutex_lock(&lock->lock); 22 + } 23 + 24 + static inline void seqmutex_unlock(struct seqmutex *lock) 25 + { 26 + lock->seq++; 27 + mutex_unlock(&lock->lock); 28 + } 29 + 30 + static inline u32 seqmutex_seq(struct seqmutex *lock) 31 + { 32 + return lock->seq; 33 + } 34 + 35 + static inline bool seqmutex_relock(struct seqmutex *lock, u32 seq) 36 + { 37 + if (lock->seq != seq || !mutex_trylock(&lock->lock)) 38 + return false; 39 + 40 + if (lock->seq != seq) { 41 + mutex_unlock(&lock->lock); 42 + return false; 43 + } 44 + 45 + return true; 46 + } 47 + 48 + #endif /* _BCACHEFS_SEQMUTEX_H */
+2 -2
fs/bcachefs/sysfs.c
··· 379 379 { 380 380 struct btree_trans *trans; 381 381 382 - mutex_lock(&c->btree_trans_lock); 382 + seqmutex_lock(&c->btree_trans_lock); 383 383 list_for_each_entry(trans, &c->btree_trans_list, list) { 384 384 struct btree_bkey_cached_common *b = READ_ONCE(trans->locking); 385 385 ··· 387 387 six_lock_wakeup_all(&b->lock); 388 388 389 389 } 390 - mutex_unlock(&c->btree_trans_lock); 390 + seqmutex_unlock(&c->btree_trans_lock); 391 391 } 392 392 393 393 SHOW(bch2_fs)