Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bcachefs: Ensure srcu lock is not held too long

The SRCU read lock that btree_trans takes exists to make it safe for
bch2_trans_relock() to deref pointers to btree nodes/key cache items we
don't have locked, but as a side effect it blocks reclaim from freeing
those items.

Thus, it's important to not hold it for too long: we need to
differentiate between bch2_trans_unlock() calls that will be only for a
short duration, and ones that will be for an unbounded duration.

This introduces bch2_trans_unlock_long(), to be used mainly by the data
move paths.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

+40 -13
+29 -13
fs/bcachefs/btree_iter.c
··· 1109 1109 if (unlikely(ret)) 1110 1110 goto out; 1111 1111 1112 + if (unlikely(!trans->srcu_held)) 1113 + bch2_trans_srcu_lock(trans); 1114 + 1112 1115 /* 1113 1116 * Ensure we obey path->should_be_locked: if it's set, we can't unlock 1114 1117 * and re-traverse the path without a transaction restart: ··· 2833 2830 return p; 2834 2831 } 2835 2832 2836 - static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans) 2833 + void bch2_trans_srcu_unlock(struct btree_trans *trans) 2837 2834 { 2838 - struct bch_fs *c = trans->c; 2839 - struct btree_path *path; 2835 + if (trans->srcu_held) { 2836 + struct bch_fs *c = trans->c; 2837 + struct btree_path *path; 2840 2838 2841 - trans_for_each_path(trans, path) 2842 - if (path->cached && !btree_node_locked(path, 0)) 2843 - path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset); 2839 + trans_for_each_path(trans, path) 2840 + if (path->cached && !btree_node_locked(path, 0)) 2841 + path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset); 2844 2842 2845 - srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); 2846 - trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); 2847 - trans->srcu_lock_time = jiffies; 2843 + srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); 2844 + trans->srcu_held = false; 2845 + } 2846 + } 2847 + 2848 + void bch2_trans_srcu_lock(struct btree_trans *trans) 2849 + { 2850 + if (!trans->srcu_held) { 2851 + trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier); 2852 + trans->srcu_lock_time = jiffies; 2853 + trans->srcu_held = true; 2854 + } 2848 2855 } 2849 2856 2850 2857 /** ··· 2908 2895 } 2909 2896 trans->last_begin_time = now; 2910 2897 2911 - if (unlikely(time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10)))) 2912 - bch2_trans_reset_srcu_lock(trans); 2898 + if (unlikely(trans->srcu_held && 2899 + time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10)))) 2900 + bch2_trans_srcu_unlock(trans); 2913 2901 2914 2902 trans->last_begin_ip = _RET_IP_; 2915 2903 if (trans->restarted) { ··· 2995 2981 trans->wb_updates_size = s->wb_updates_size; 2996 2982 } 2997 2983 2998 - trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); 2984 + trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); 2999 2985 trans->srcu_lock_time = jiffies; 2986 + trans->srcu_held = true; 3000 2987 3001 2988 if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) { 3002 2989 struct btree_trans *pos; ··· 3074 3059 3075 3060 check_btree_paths_leaked(trans); 3076 3061 3077 - srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); 3062 + if (trans->srcu_held) 3063 + srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); 3078 3064 3079 3065 bch2_journal_preres_put(&c->journal, &trans->journal_preres); 3080 3066
+4
fs/bcachefs/btree_iter.h
··· 274 274 int bch2_trans_relock(struct btree_trans *); 275 275 int bch2_trans_relock_notrace(struct btree_trans *); 276 276 void bch2_trans_unlock(struct btree_trans *); 277 + void bch2_trans_unlock_long(struct btree_trans *); 277 278 bool bch2_trans_locked(struct btree_trans *); 278 279 279 280 static inline int trans_was_restarted(struct btree_trans *trans, u32 restart_count) ··· 579 578 #define bch2_bkey_get_val_typed(_trans, _btree_id, _pos, _flags, _type, _val)\ 580 579 __bch2_bkey_get_val_typed(_trans, _btree_id, _pos, _flags, \ 581 580 KEY_TYPE_##_type, sizeof(*_val), _val) 581 + 582 + void bch2_trans_srcu_unlock(struct btree_trans *); 583 + void bch2_trans_srcu_lock(struct btree_trans *); 582 584 583 585 u32 bch2_trans_begin(struct btree_trans *); 584 586
+6
fs/bcachefs/btree_locking.c
··· 753 753 __bch2_btree_path_unlock(trans, path); 754 754 } 755 755 756 + void bch2_trans_unlock_long(struct btree_trans *trans) 757 + { 758 + bch2_trans_unlock(trans); 759 + bch2_trans_srcu_unlock(trans); 760 + } 761 + 756 762 bool bch2_trans_locked(struct btree_trans *trans) 757 763 { 758 764 struct btree_path *path;
+1
fs/bcachefs/btree_types.h
··· 426 426 u8 nr_updates; 427 427 u8 nr_wb_updates; 428 428 u8 wb_updates_size; 429 + bool srcu_held:1; 429 430 bool used_mempool:1; 430 431 bool in_traverse_all:1; 431 432 bool paths_sorted:1;