Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _BCACHEFS_BTREE_UPDATE_H
3#define _BCACHEFS_BTREE_UPDATE_H
4
5#include "btree_iter.h"
6#include "journal.h"
7#include "snapshot.h"
8
9struct bch_fs;
10struct btree;
11
12void bch2_btree_node_prep_for_write(struct btree_trans *,
13 struct btree_path *, struct btree *);
14bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *,
15 struct btree *, struct btree_node_iter *,
16 struct bkey_i *);
17
18int bch2_btree_node_flush0(struct journal *, struct journal_entry_pin *, u64);
19int bch2_btree_node_flush1(struct journal *, struct journal_entry_pin *, u64);
20void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
21
22void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *,
23 struct bkey_i *, u64);
24
25#define BCH_TRANS_COMMIT_FLAGS() \
26 x(no_enospc, "don't check for enospc") \
27 x(no_check_rw, "don't attempt to take a ref on c->writes") \
28 x(no_journal_res, "don't take a journal reservation, instead " \
29 "pin journal entry referred to by trans->journal_res.seq") \
30 x(journal_reclaim, "operation required for journal reclaim; may return error" \
31 "instead of deadlocking if BCH_WATERMARK_reclaim not specified")\
32 x(skip_accounting_apply, "we're in journal replay - accounting updates have already been applied")
33
34enum __bch_trans_commit_flags {
35 /* First bits for bch_watermark: */
36 __BCH_TRANS_COMMIT_FLAGS_START = BCH_WATERMARK_BITS,
37#define x(n, ...) __BCH_TRANS_COMMIT_##n,
38 BCH_TRANS_COMMIT_FLAGS()
39#undef x
40};
41
42enum bch_trans_commit_flags {
43#define x(n, ...) BCH_TRANS_COMMIT_##n = BIT(__BCH_TRANS_COMMIT_##n),
44 BCH_TRANS_COMMIT_FLAGS()
45#undef x
46};
47
48void bch2_trans_commit_flags_to_text(struct printbuf *, enum bch_trans_commit_flags);
49
50int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
51int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned);
52
53int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id,
54 struct bkey_i *, enum btree_iter_update_trigger_flags);
55
56int bch2_btree_insert_trans(struct btree_trans *, enum btree_id, struct bkey_i *,
57 enum btree_iter_update_trigger_flags);
58int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, struct
59 disk_reservation *, int flags, enum
60 btree_iter_update_trigger_flags iter_flags);
61
62int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id,
63 struct bpos, struct bpos, unsigned, u64 *);
64int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
65 struct bpos, struct bpos, unsigned, u64 *);
66
67int bch2_btree_bit_mod_iter(struct btree_trans *, struct btree_iter *, bool);
68int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool);
69int bch2_btree_bit_mod_buffered(struct btree_trans *, enum btree_id, struct bpos, bool);
70
71static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans,
72 enum btree_id btree, struct bpos pos)
73{
74 return bch2_btree_bit_mod_buffered(trans, btree, pos, false);
75}
76
77int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id,
78 struct bpos, snapshot_id_list *);
79
80/*
81 * For use when splitting extents in existing snapshots:
82 *
83 * If @old_pos is an interior snapshot node, iterate over descendent snapshot
84 * nodes: for every descendent snapshot in whiche @old_pos is overwritten and
85 * not visible, emit a whiteout at @new_pos.
86 */
87static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
88 enum btree_id btree,
89 struct bpos old_pos,
90 struct bpos new_pos)
91{
92 BUG_ON(old_pos.snapshot != new_pos.snapshot);
93
94 if (!btree_type_has_snapshots(btree) ||
95 bkey_eq(old_pos, new_pos))
96 return 0;
97
98 snapshot_id_list s;
99 int ret = bch2_get_snapshot_overwrites(trans, btree, old_pos, &s);
100 if (ret)
101 return ret;
102
103 return s.nr
104 ? __bch2_insert_snapshot_whiteouts(trans, btree, new_pos, &s)
105 : 0;
106}
107
108int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter *,
109 enum btree_iter_update_trigger_flags,
110 struct bkey_s_c, struct bkey_s_c);
111
112int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *,
113 enum btree_id, struct bpos);
114
115int __must_check bch2_trans_update_ip(struct btree_trans *, struct btree_iter *,
116 struct bkey_i *, enum btree_iter_update_trigger_flags,
117 unsigned long);
118
119static inline int __must_check
120bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
121 struct bkey_i *k, enum btree_iter_update_trigger_flags flags)
122{
123 return bch2_trans_update_ip(trans, iter, k, flags, _THIS_IP_);
124}
125
126static inline void *btree_trans_subbuf_base(struct btree_trans *trans,
127 struct btree_trans_subbuf *buf)
128{
129 return (u64 *) trans->mem + buf->base;
130}
131
132static inline void *btree_trans_subbuf_top(struct btree_trans *trans,
133 struct btree_trans_subbuf *buf)
134{
135 return (u64 *) trans->mem + buf->base + buf->u64s;
136}
137
138void *__bch2_trans_subbuf_alloc(struct btree_trans *,
139 struct btree_trans_subbuf *,
140 unsigned);
141
142static inline void *
143bch2_trans_subbuf_alloc(struct btree_trans *trans,
144 struct btree_trans_subbuf *buf,
145 unsigned u64s)
146{
147 if (buf->u64s + u64s > buf->size)
148 return __bch2_trans_subbuf_alloc(trans, buf, u64s);
149
150 void *p = btree_trans_subbuf_top(trans, buf);
151 buf->u64s += u64s;
152 return p;
153}
154
155static inline struct jset_entry *btree_trans_journal_entries_start(struct btree_trans *trans)
156{
157 return btree_trans_subbuf_base(trans, &trans->journal_entries);
158}
159
160static inline struct jset_entry *btree_trans_journal_entries_top(struct btree_trans *trans)
161{
162 return btree_trans_subbuf_top(trans, &trans->journal_entries);
163}
164
165static inline struct jset_entry *
166bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
167{
168 return bch2_trans_subbuf_alloc(trans, &trans->journal_entries, u64s);
169}
170
171int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *);
172
173int bch2_btree_write_buffer_insert_err(struct btree_trans *,
174 enum btree_id, struct bkey_i *);
175
176static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
177 enum btree_id btree,
178 struct bkey_i *k)
179{
180 kmsan_check_memory(k, bkey_bytes(&k->k));
181
182 EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
183
184 if (unlikely(!btree_type_uses_write_buffer(btree))) {
185 int ret = bch2_btree_write_buffer_insert_err(trans, btree, k);
186 dump_stack();
187 return ret;
188 }
189 /*
190 * Most updates skip the btree write buffer until journal replay is
191 * finished because synchronization with journal replay relies on having
192 * a btree node locked - if we're overwriting a key in the journal that
193 * journal replay hasn't yet replayed, we have to mark it as
194 * overwritten.
195 *
196 * But accounting updates don't overwrite, they're deltas, and they have
197 * to be flushed to the btree strictly in order for journal replay to be
198 * able to tell which updates need to be applied:
199 */
200 if (k->k.type != KEY_TYPE_accounting &&
201 unlikely(trans->journal_replay_not_finished))
202 return bch2_btree_insert_clone_trans(trans, btree, k);
203
204 struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s));
205 int ret = PTR_ERR_OR_ZERO(e);
206 if (ret)
207 return ret;
208
209 journal_entry_init(e, BCH_JSET_ENTRY_write_buffer_keys, btree, 0, k->k.u64s);
210 bkey_copy(e->start, k);
211 return 0;
212}
213
214void bch2_trans_commit_hook(struct btree_trans *,
215 struct btree_trans_commit_hook *);
216int __bch2_trans_commit(struct btree_trans *, unsigned);
217
218int bch2_trans_log_str(struct btree_trans *, const char *);
219int bch2_trans_log_msg(struct btree_trans *, struct printbuf *);
220int bch2_trans_log_bkey(struct btree_trans *, enum btree_id, unsigned, struct bkey_i *);
221
222__printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
223__printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...);
224
225/**
226 * bch2_trans_commit - insert keys at given iterator positions
227 *
228 * This is main entry point for btree updates.
229 *
230 * Return values:
231 * -EROFS: filesystem read only
232 * -EIO: journal or btree node IO error
233 */
234static inline int bch2_trans_commit(struct btree_trans *trans,
235 struct disk_reservation *disk_res,
236 u64 *journal_seq,
237 unsigned flags)
238{
239 trans->disk_res = disk_res;
240 trans->journal_seq = journal_seq;
241
242 return __bch2_trans_commit(trans, flags);
243}
244
245#define commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \
246 lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
247 (_journal_seq), (_flags)))
248
249#define nested_commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \
250 nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
251 (_journal_seq), (_flags)))
252
253#define bch2_trans_commit_do(_c, _disk_res, _journal_seq, _flags, _do) \
254 bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do))
255
256#define trans_for_each_update(_trans, _i) \
257 for (struct btree_insert_entry *_i = (_trans)->updates; \
258 (_i) < (_trans)->updates + (_trans)->nr_updates; \
259 (_i)++)
260
261static inline void bch2_trans_reset_updates(struct btree_trans *trans)
262{
263 trans_for_each_update(trans, i)
264 bch2_path_put(trans, i->path, true);
265
266 trans->nr_updates = 0;
267 trans->journal_entries.u64s = 0;
268 trans->journal_entries.size = 0;
269 trans->accounting.u64s = 0;
270 trans->accounting.size = 0;
271 trans->hooks = NULL;
272 trans->extra_disk_res = 0;
273}
274
275static __always_inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k,
276 unsigned type, unsigned min_bytes)
277{
278 unsigned bytes = max_t(unsigned, min_bytes, bkey_bytes(k.k));
279 struct bkey_i *mut;
280
281 if (type && k.k->type != type)
282 return ERR_PTR(-ENOENT);
283
284 /* extra padding for varint_decode_fast... */
285 mut = bch2_trans_kmalloc_nomemzero(trans, bytes + 8);
286 if (!IS_ERR(mut)) {
287 bkey_reassemble(mut, k);
288
289 if (unlikely(bytes > bkey_bytes(k.k))) {
290 memset((void *) mut + bkey_bytes(k.k), 0,
291 bytes - bkey_bytes(k.k));
292 mut->k.u64s = DIV_ROUND_UP(bytes, sizeof(u64));
293 }
294 }
295 return mut;
296}
297
298static __always_inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k)
299{
300 return __bch2_bkey_make_mut_noupdate(trans, k, 0, 0);
301}
302
303#define bch2_bkey_make_mut_noupdate_typed(_trans, _k, _type) \
304 bkey_i_to_##_type(__bch2_bkey_make_mut_noupdate(_trans, _k, \
305 KEY_TYPE_##_type, sizeof(struct bkey_i_##_type)))
306
307static inline struct bkey_i *__bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter,
308 struct bkey_s_c *k,
309 enum btree_iter_update_trigger_flags flags,
310 unsigned type, unsigned min_bytes)
311{
312 struct bkey_i *mut = __bch2_bkey_make_mut_noupdate(trans, *k, type, min_bytes);
313 int ret;
314
315 if (IS_ERR(mut))
316 return mut;
317
318 ret = bch2_trans_update(trans, iter, mut, flags);
319 if (ret)
320 return ERR_PTR(ret);
321
322 *k = bkey_i_to_s_c(mut);
323 return mut;
324}
325
326static inline struct bkey_i *bch2_bkey_make_mut(struct btree_trans *trans,
327 struct btree_iter *iter, struct bkey_s_c *k,
328 enum btree_iter_update_trigger_flags flags)
329{
330 return __bch2_bkey_make_mut(trans, iter, k, flags, 0, 0);
331}
332
333#define bch2_bkey_make_mut_typed(_trans, _iter, _k, _flags, _type) \
334 bkey_i_to_##_type(__bch2_bkey_make_mut(_trans, _iter, _k, _flags,\
335 KEY_TYPE_##_type, sizeof(struct bkey_i_##_type)))
336
337static inline struct bkey_i *__bch2_bkey_get_mut_noupdate(struct btree_trans *trans,
338 struct btree_iter *iter,
339 unsigned btree_id, struct bpos pos,
340 enum btree_iter_update_trigger_flags flags,
341 unsigned type, unsigned min_bytes)
342{
343 struct bkey_s_c k = __bch2_bkey_get_iter(trans, iter,
344 btree_id, pos, flags|BTREE_ITER_intent, type);
345 struct bkey_i *ret = IS_ERR(k.k)
346 ? ERR_CAST(k.k)
347 : __bch2_bkey_make_mut_noupdate(trans, k, 0, min_bytes);
348 if (IS_ERR(ret))
349 bch2_trans_iter_exit(trans, iter);
350 return ret;
351}
352
353static inline struct bkey_i *bch2_bkey_get_mut_noupdate(struct btree_trans *trans,
354 struct btree_iter *iter,
355 unsigned btree_id, struct bpos pos,
356 enum btree_iter_update_trigger_flags flags)
357{
358 return __bch2_bkey_get_mut_noupdate(trans, iter, btree_id, pos, flags, 0, 0);
359}
360
361static inline struct bkey_i *__bch2_bkey_get_mut(struct btree_trans *trans,
362 struct btree_iter *iter,
363 unsigned btree_id, struct bpos pos,
364 enum btree_iter_update_trigger_flags flags,
365 unsigned type, unsigned min_bytes)
366{
367 struct bkey_i *mut = __bch2_bkey_get_mut_noupdate(trans, iter,
368 btree_id, pos, flags|BTREE_ITER_intent, type, min_bytes);
369 int ret;
370
371 if (IS_ERR(mut))
372 return mut;
373
374 ret = bch2_trans_update(trans, iter, mut, flags);
375 if (ret) {
376 bch2_trans_iter_exit(trans, iter);
377 return ERR_PTR(ret);
378 }
379
380 return mut;
381}
382
383static inline struct bkey_i *bch2_bkey_get_mut_minsize(struct btree_trans *trans,
384 struct btree_iter *iter,
385 unsigned btree_id, struct bpos pos,
386 enum btree_iter_update_trigger_flags flags,
387 unsigned min_bytes)
388{
389 return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, min_bytes);
390}
391
392static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans,
393 struct btree_iter *iter,
394 unsigned btree_id, struct bpos pos,
395 enum btree_iter_update_trigger_flags flags)
396{
397 return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, 0);
398}
399
400#define bch2_bkey_get_mut_typed(_trans, _iter, _btree_id, _pos, _flags, _type)\
401 bkey_i_to_##_type(__bch2_bkey_get_mut(_trans, _iter, \
402 _btree_id, _pos, _flags, \
403 KEY_TYPE_##_type, sizeof(struct bkey_i_##_type)))
404
405static inline struct bkey_i *__bch2_bkey_alloc(struct btree_trans *trans, struct btree_iter *iter,
406 enum btree_iter_update_trigger_flags flags,
407 unsigned type, unsigned val_size)
408{
409 struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k) + val_size);
410 int ret;
411
412 if (IS_ERR(k))
413 return k;
414
415 bkey_init(&k->k);
416 k->k.p = iter->pos;
417 k->k.type = type;
418 set_bkey_val_bytes(&k->k, val_size);
419
420 ret = bch2_trans_update(trans, iter, k, flags);
421 if (unlikely(ret))
422 return ERR_PTR(ret);
423 return k;
424}
425
426#define bch2_bkey_alloc(_trans, _iter, _flags, _type) \
427 bkey_i_to_##_type(__bch2_bkey_alloc(_trans, _iter, _flags, \
428 KEY_TYPE_##_type, sizeof(struct bch_##_type)))
429
430#endif /* _BCACHEFS_BTREE_UPDATE_H */