Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2#include "bcachefs.h"
3#include "bbpos.h"
4#include "alloc_background.h"
5#include "backpointers.h"
6#include "bkey_buf.h"
7#include "btree_cache.h"
8#include "btree_update.h"
9#include "btree_update_interior.h"
10#include "btree_write_buffer.h"
11#include "checksum.h"
12#include "error.h"
13
14#include <linux/mm.h>
15
16static bool extent_matches_bp(struct bch_fs *c,
17 enum btree_id btree_id, unsigned level,
18 struct bkey_s_c k,
19 struct bpos bucket,
20 struct bch_backpointer bp)
21{
22 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
23 const union bch_extent_entry *entry;
24 struct extent_ptr_decoded p;
25
26 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
27 struct bpos bucket2;
28 struct bch_backpointer bp2;
29
30 if (p.ptr.cached)
31 continue;
32
33 bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bucket2, &bp2);
34 if (bpos_eq(bucket, bucket2) &&
35 !memcmp(&bp, &bp2, sizeof(bp)))
36 return true;
37 }
38
39 return false;
40}
41
42int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
43 enum bkey_invalid_flags flags,
44 struct printbuf *err)
45{
46 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
47
48 /* these will be caught by fsck */
49 if (!bch2_dev_exists2(c, bp.k->p.inode))
50 return 0;
51
52 struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
53 int ret = 0;
54
55 bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
56 c, err,
57 backpointer_pos_wrong,
58 "backpointer at wrong pos");
59fsck_err:
60 return ret;
61}
62
63void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
64{
65 prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
66 bch2_btree_id_str(bp->btree_id),
67 bp->level,
68 (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
69 (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
70 bp->bucket_len);
71 bch2_bpos_to_text(out, bp->pos);
72}
73
74void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
75{
76 if (bch2_dev_exists2(c, k.k->p.inode)) {
77 prt_str(out, "bucket=");
78 bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p));
79 prt_str(out, " ");
80 }
81
82 bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
83}
84
85void bch2_backpointer_swab(struct bkey_s k)
86{
87 struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
88
89 bp.v->bucket_offset = swab40(bp.v->bucket_offset);
90 bp.v->bucket_len = swab32(bp.v->bucket_len);
91 bch2_bpos_swab(&bp.v->pos);
92}
93
94static noinline int backpointer_mod_err(struct btree_trans *trans,
95 struct bch_backpointer bp,
96 struct bkey_s_c bp_k,
97 struct bkey_s_c orig_k,
98 bool insert)
99{
100 struct bch_fs *c = trans->c;
101 struct printbuf buf = PRINTBUF;
102
103 if (insert) {
104 prt_printf(&buf, "existing backpointer found when inserting ");
105 bch2_backpointer_to_text(&buf, &bp);
106 prt_newline(&buf);
107 printbuf_indent_add(&buf, 2);
108
109 prt_printf(&buf, "found ");
110 bch2_bkey_val_to_text(&buf, c, bp_k);
111 prt_newline(&buf);
112
113 prt_printf(&buf, "for ");
114 bch2_bkey_val_to_text(&buf, c, orig_k);
115
116 bch_err(c, "%s", buf.buf);
117 } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
118 prt_printf(&buf, "backpointer not found when deleting");
119 prt_newline(&buf);
120 printbuf_indent_add(&buf, 2);
121
122 prt_printf(&buf, "searching for ");
123 bch2_backpointer_to_text(&buf, &bp);
124 prt_newline(&buf);
125
126 prt_printf(&buf, "got ");
127 bch2_bkey_val_to_text(&buf, c, bp_k);
128 prt_newline(&buf);
129
130 prt_printf(&buf, "for ");
131 bch2_bkey_val_to_text(&buf, c, orig_k);
132
133 bch_err(c, "%s", buf.buf);
134 }
135
136 printbuf_exit(&buf);
137
138 if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
139 return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
140 } else {
141 return 0;
142 }
143}
144
145int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
146 struct bpos bucket,
147 struct bch_backpointer bp,
148 struct bkey_s_c orig_k,
149 bool insert)
150{
151 struct btree_iter bp_iter;
152 struct bkey_s_c k;
153 struct bkey_i_backpointer *bp_k;
154 int ret;
155
156 bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
157 ret = PTR_ERR_OR_ZERO(bp_k);
158 if (ret)
159 return ret;
160
161 bkey_backpointer_init(&bp_k->k_i);
162 bp_k->k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
163 bp_k->v = bp;
164
165 if (!insert) {
166 bp_k->k.type = KEY_TYPE_deleted;
167 set_bkey_val_u64s(&bp_k->k, 0);
168 }
169
170 k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
171 bp_k->k.p,
172 BTREE_ITER_INTENT|
173 BTREE_ITER_SLOTS|
174 BTREE_ITER_WITH_UPDATES);
175 ret = bkey_err(k);
176 if (ret)
177 goto err;
178
179 if (insert
180 ? k.k->type
181 : (k.k->type != KEY_TYPE_backpointer ||
182 memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) {
183 ret = backpointer_mod_err(trans, bp, k, orig_k, insert);
184 if (ret)
185 goto err;
186 }
187
188 ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
189err:
190 bch2_trans_iter_exit(trans, &bp_iter);
191 return ret;
192}
193
194/*
195 * Find the next backpointer >= *bp_offset:
196 */
197int bch2_get_next_backpointer(struct btree_trans *trans,
198 struct bpos bucket, int gen,
199 struct bpos *bp_pos,
200 struct bch_backpointer *bp,
201 unsigned iter_flags)
202{
203 struct bch_fs *c = trans->c;
204 struct bpos bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
205 struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL };
206 struct bkey_s_c k;
207 int ret = 0;
208
209 if (bpos_ge(*bp_pos, bp_end_pos))
210 goto done;
211
212 if (gen >= 0) {
213 k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
214 bucket, BTREE_ITER_CACHED|iter_flags);
215 ret = bkey_err(k);
216 if (ret)
217 goto out;
218
219 if (k.k->type != KEY_TYPE_alloc_v4 ||
220 bkey_s_c_to_alloc_v4(k).v->gen != gen)
221 goto done;
222 }
223
224 *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(c, bucket, 0));
225
226 for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
227 *bp_pos, iter_flags, k, ret) {
228 if (bpos_ge(k.k->p, bp_end_pos))
229 break;
230
231 *bp_pos = k.k->p;
232 *bp = *bkey_s_c_to_backpointer(k).v;
233 goto out;
234 }
235done:
236 *bp_pos = SPOS_MAX;
237out:
238 bch2_trans_iter_exit(trans, &bp_iter);
239 bch2_trans_iter_exit(trans, &alloc_iter);
240 return ret;
241}
242
243static void backpointer_not_found(struct btree_trans *trans,
244 struct bpos bp_pos,
245 struct bch_backpointer bp,
246 struct bkey_s_c k)
247{
248 struct bch_fs *c = trans->c;
249 struct printbuf buf = PRINTBUF;
250 struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
251
252 /*
253 * If we're using the btree write buffer, the backpointer we were
254 * looking at may have already been deleted - failure to find what it
255 * pointed to is not an error:
256 */
257 if (likely(!bch2_backpointers_no_use_write_buffer))
258 return;
259
260 prt_printf(&buf, "backpointer doesn't match %s it points to:\n ",
261 bp.level ? "btree node" : "extent");
262 prt_printf(&buf, "bucket: ");
263 bch2_bpos_to_text(&buf, bucket);
264 prt_printf(&buf, "\n ");
265
266 prt_printf(&buf, "backpointer pos: ");
267 bch2_bpos_to_text(&buf, bp_pos);
268 prt_printf(&buf, "\n ");
269
270 bch2_backpointer_to_text(&buf, &bp);
271 prt_printf(&buf, "\n ");
272 bch2_bkey_val_to_text(&buf, c, k);
273 if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers)
274 bch_err_ratelimited(c, "%s", buf.buf);
275 else
276 bch2_trans_inconsistent(trans, "%s", buf.buf);
277
278 printbuf_exit(&buf);
279}
280
281struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
282 struct btree_iter *iter,
283 struct bpos bp_pos,
284 struct bch_backpointer bp,
285 unsigned iter_flags)
286{
287 if (likely(!bp.level)) {
288 struct bch_fs *c = trans->c;
289 struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
290 struct bkey_s_c k;
291
292 bch2_trans_node_iter_init(trans, iter,
293 bp.btree_id,
294 bp.pos,
295 0, 0,
296 iter_flags);
297 k = bch2_btree_iter_peek_slot(iter);
298 if (bkey_err(k)) {
299 bch2_trans_iter_exit(trans, iter);
300 return k;
301 }
302
303 if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
304 return k;
305
306 bch2_trans_iter_exit(trans, iter);
307 backpointer_not_found(trans, bp_pos, bp, k);
308 return bkey_s_c_null;
309 } else {
310 struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
311
312 if (IS_ERR_OR_NULL(b)) {
313 bch2_trans_iter_exit(trans, iter);
314 return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
315 }
316 return bkey_i_to_s_c(&b->key);
317 }
318}
319
320struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
321 struct btree_iter *iter,
322 struct bpos bp_pos,
323 struct bch_backpointer bp)
324{
325 struct bch_fs *c = trans->c;
326 struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
327 struct btree *b;
328
329 BUG_ON(!bp.level);
330
331 bch2_trans_node_iter_init(trans, iter,
332 bp.btree_id,
333 bp.pos,
334 0,
335 bp.level - 1,
336 0);
337 b = bch2_btree_iter_peek_node(iter);
338 if (IS_ERR_OR_NULL(b))
339 goto err;
340
341 BUG_ON(b->c.level != bp.level - 1);
342
343 if (extent_matches_bp(c, bp.btree_id, bp.level,
344 bkey_i_to_s_c(&b->key),
345 bucket, bp))
346 return b;
347
348 if (btree_node_will_make_reachable(b)) {
349 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
350 } else {
351 backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
352 b = NULL;
353 }
354err:
355 bch2_trans_iter_exit(trans, iter);
356 return b;
357}
358
359static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
360 struct bkey_s_c k)
361{
362 struct bch_fs *c = trans->c;
363 struct btree_iter alloc_iter = { NULL };
364 struct bkey_s_c alloc_k;
365 struct printbuf buf = PRINTBUF;
366 int ret = 0;
367
368 if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
369 backpointer_to_missing_device,
370 "backpointer for missing device:\n%s",
371 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
372 ret = bch2_btree_delete_at(trans, bp_iter, 0);
373 goto out;
374 }
375
376 alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
377 bp_pos_to_bucket(c, k.k->p), 0);
378 ret = bkey_err(alloc_k);
379 if (ret)
380 goto out;
381
382 if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
383 backpointer_to_missing_alloc,
384 "backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
385 alloc_iter.pos.inode, alloc_iter.pos.offset,
386 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
387 ret = bch2_btree_delete_at(trans, bp_iter, 0);
388 goto out;
389 }
390out:
391fsck_err:
392 bch2_trans_iter_exit(trans, &alloc_iter);
393 printbuf_exit(&buf);
394 return ret;
395}
396
397/* verify that every backpointer has a corresponding alloc key */
398int bch2_check_btree_backpointers(struct bch_fs *c)
399{
400 int ret = bch2_trans_run(c,
401 for_each_btree_key_commit(trans, iter,
402 BTREE_ID_backpointers, POS_MIN, 0, k,
403 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
404 bch2_check_btree_backpointer(trans, &iter, k)));
405 bch_err_fn(c, ret);
406 return ret;
407}
408
409static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
410{
411 return bpos_eq(l.k->p, r.k->p) &&
412 bkey_bytes(l.k) == bkey_bytes(r.k) &&
413 !memcmp(l.v, r.v, bkey_val_bytes(l.k));
414}
415
416struct extents_to_bp_state {
417 struct bpos bucket_start;
418 struct bpos bucket_end;
419 struct bkey_buf last_flushed;
420};
421
422static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
423 struct bkey_s_c extent, unsigned dev)
424{
425 struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent);
426 int ret = PTR_ERR_OR_ZERO(n);
427 if (ret)
428 return ret;
429
430 bch2_bkey_drop_device(bkey_i_to_s(n), dev);
431 return bch2_btree_insert_trans(trans, btree, n, 0);
432}
433
434static int check_extent_checksum(struct btree_trans *trans,
435 enum btree_id btree, struct bkey_s_c extent,
436 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
437{
438 struct bch_fs *c = trans->c;
439 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
440 const union bch_extent_entry *entry;
441 struct extent_ptr_decoded p;
442 struct printbuf buf = PRINTBUF;
443 void *data_buf = NULL;
444 struct bio *bio = NULL;
445 size_t bytes;
446 int ret = 0;
447
448 if (bkey_is_btree_ptr(extent.k))
449 return false;
450
451 bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
452 if (p.ptr.dev == dev)
453 goto found;
454 BUG();
455found:
456 if (!p.crc.csum_type)
457 return false;
458
459 bytes = p.crc.compressed_size << 9;
460
461 struct bch_dev *ca = bch_dev_bkey_exists(c, dev);
462 if (!bch2_dev_get_ioref(ca, READ))
463 return false;
464
465 data_buf = kvmalloc(bytes, GFP_KERNEL);
466 if (!data_buf) {
467 ret = -ENOMEM;
468 goto err;
469 }
470
471 bio = bio_alloc(ca->disk_sb.bdev, 1, REQ_OP_READ, GFP_KERNEL);
472 bio->bi_iter.bi_sector = p.ptr.offset;
473 bch2_bio_map(bio, data_buf, bytes);
474 ret = submit_bio_wait(bio);
475 if (ret)
476 goto err;
477
478 prt_str(&buf, "extents pointing to same space, but first extent checksum bad:");
479 prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree));
480 bch2_bkey_val_to_text(&buf, c, extent);
481 prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree));
482 bch2_bkey_val_to_text(&buf, c, extent2);
483
484 struct nonce nonce = extent_nonce(extent.k->version, p.crc);
485 struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
486 if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
487 c, dup_backpointer_to_bad_csum_extent,
488 "%s", buf.buf))
489 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
490fsck_err:
491err:
492 if (bio)
493 bio_put(bio);
494 kvfree(data_buf);
495 percpu_ref_put(&ca->io_ref);
496 printbuf_exit(&buf);
497 return ret;
498}
499
500static int check_bp_exists(struct btree_trans *trans,
501 struct extents_to_bp_state *s,
502 struct bpos bucket,
503 struct bch_backpointer bp,
504 struct bkey_s_c orig_k)
505{
506 struct bch_fs *c = trans->c;
507 struct btree_iter bp_iter = {};
508 struct btree_iter other_extent_iter = {};
509 struct printbuf buf = PRINTBUF;
510 struct bkey_s_c bp_k;
511 struct bkey_buf tmp;
512 int ret;
513
514 bch2_bkey_buf_init(&tmp);
515
516 if (!bch2_dev_bucket_exists(c, bucket)) {
517 prt_str(&buf, "extent for nonexistent device:bucket ");
518 bch2_bpos_to_text(&buf, bucket);
519 prt_str(&buf, "\n ");
520 bch2_bkey_val_to_text(&buf, c, orig_k);
521 bch_err(c, "%s", buf.buf);
522 return -BCH_ERR_fsck_repair_unimplemented;
523 }
524
525 if (bpos_lt(bucket, s->bucket_start) ||
526 bpos_gt(bucket, s->bucket_end))
527 return 0;
528
529 bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
530 bucket_pos_to_bp(c, bucket, bp.bucket_offset),
531 0);
532 ret = bkey_err(bp_k);
533 if (ret)
534 goto err;
535
536 if (bp_k.k->type != KEY_TYPE_backpointer ||
537 memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
538 bch2_bkey_buf_reassemble(&tmp, c, orig_k);
539
540 if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) {
541 if (bp.level) {
542 bch2_trans_unlock(trans);
543 bch2_btree_interior_updates_flush(c);
544 }
545
546 ret = bch2_btree_write_buffer_flush_sync(trans);
547 if (ret)
548 goto err;
549
550 bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k);
551 ret = -BCH_ERR_transaction_restart_write_buffer_flush;
552 goto out;
553 }
554
555 goto check_existing_bp;
556 }
557out:
558err:
559fsck_err:
560 bch2_trans_iter_exit(trans, &other_extent_iter);
561 bch2_trans_iter_exit(trans, &bp_iter);
562 bch2_bkey_buf_exit(&tmp, c);
563 printbuf_exit(&buf);
564 return ret;
565check_existing_bp:
566 /* Do we have a backpointer for a different extent? */
567 if (bp_k.k->type != KEY_TYPE_backpointer)
568 goto missing;
569
570 struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v;
571
572 struct bkey_s_c other_extent =
573 bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0);
574 ret = bkey_err(other_extent);
575 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
576 ret = 0;
577 if (ret)
578 goto err;
579
580 if (!other_extent.k)
581 goto missing;
582
583 if (bch2_extents_match(orig_k, other_extent)) {
584 printbuf_reset(&buf);
585 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n ");
586 bch2_bkey_val_to_text(&buf, c, orig_k);
587 prt_str(&buf, "\n ");
588 bch2_bkey_val_to_text(&buf, c, other_extent);
589 bch_err(c, "%s", buf.buf);
590
591 if (other_extent.k->size <= orig_k.k->size) {
592 ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode);
593 if (ret)
594 goto err;
595 goto out;
596 } else {
597 ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode);
598 if (ret)
599 goto err;
600 goto missing;
601 }
602 }
603
604 ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode);
605 if (ret < 0)
606 goto err;
607 if (ret) {
608 ret = 0;
609 goto missing;
610 }
611
612 ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode);
613 if (ret < 0)
614 goto err;
615 if (ret) {
616 ret = 0;
617 goto out;
618 }
619
620 printbuf_reset(&buf);
621 prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode);
622 bch2_bkey_val_to_text(&buf, c, orig_k);
623 prt_str(&buf, "\n ");
624 bch2_bkey_val_to_text(&buf, c, other_extent);
625 bch_err(c, "%s", buf.buf);
626 ret = -BCH_ERR_fsck_repair_unimplemented;
627 goto err;
628missing:
629 printbuf_reset(&buf);
630 prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
631 bch2_btree_id_str(bp.btree_id), bp.level);
632 bch2_bkey_val_to_text(&buf, c, orig_k);
633 prt_printf(&buf, "\n got: ");
634 bch2_bkey_val_to_text(&buf, c, bp_k);
635
636 struct bkey_i_backpointer n_bp_k;
637 bkey_backpointer_init(&n_bp_k.k_i);
638 n_bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
639 n_bp_k.v = bp;
640 prt_printf(&buf, "\n want: ");
641 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i));
642
643 if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
644 ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
645
646 goto out;
647}
648
649static int check_extent_to_backpointers(struct btree_trans *trans,
650 struct extents_to_bp_state *s,
651 enum btree_id btree, unsigned level,
652 struct bkey_s_c k)
653{
654 struct bch_fs *c = trans->c;
655 struct bkey_ptrs_c ptrs;
656 const union bch_extent_entry *entry;
657 struct extent_ptr_decoded p;
658 int ret;
659
660 ptrs = bch2_bkey_ptrs_c(k);
661 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
662 struct bpos bucket_pos;
663 struct bch_backpointer bp;
664
665 if (p.ptr.cached)
666 continue;
667
668 bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bucket_pos, &bp);
669
670 ret = check_bp_exists(trans, s, bucket_pos, bp, k);
671 if (ret)
672 return ret;
673 }
674
675 return 0;
676}
677
678static int check_btree_root_to_backpointers(struct btree_trans *trans,
679 struct extents_to_bp_state *s,
680 enum btree_id btree_id,
681 int *level)
682{
683 struct bch_fs *c = trans->c;
684 struct btree_iter iter;
685 struct btree *b;
686 struct bkey_s_c k;
687 int ret;
688retry:
689 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
690 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
691 b = bch2_btree_iter_peek_node(&iter);
692 ret = PTR_ERR_OR_ZERO(b);
693 if (ret)
694 goto err;
695
696 if (b != btree_node_root(c, b)) {
697 bch2_trans_iter_exit(trans, &iter);
698 goto retry;
699 }
700
701 *level = b->c.level;
702
703 k = bkey_i_to_s_c(&b->key);
704 ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k);
705err:
706 bch2_trans_iter_exit(trans, &iter);
707 return ret;
708}
709
710static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
711{
712 return (struct bbpos) {
713 .btree = bp.btree_id,
714 .pos = bp.pos,
715 };
716}
717
718static u64 mem_may_pin_bytes(struct bch_fs *c)
719{
720 struct sysinfo i;
721 si_meminfo(&i);
722
723 u64 mem_bytes = i.totalram * i.mem_unit;
724 return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100);
725}
726
727static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
728{
729 return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size);
730}
731
732static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
733 u64 btree_leaf_mask,
734 u64 btree_interior_mask,
735 struct bbpos start, struct bbpos *end)
736{
737 struct bch_fs *c = trans->c;
738 s64 mem_may_pin = mem_may_pin_bytes(c);
739 int ret = 0;
740
741 btree_interior_mask |= btree_leaf_mask;
742
743 c->btree_cache.pinned_nodes_leaf_mask = btree_leaf_mask;
744 c->btree_cache.pinned_nodes_interior_mask = btree_interior_mask;
745 c->btree_cache.pinned_nodes_start = start;
746 c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX;
747
748 for (enum btree_id btree = start.btree;
749 btree < BTREE_ID_NR && !ret;
750 btree++) {
751 unsigned depth = ((1U << btree) & btree_leaf_mask) ? 0 : 1;
752 struct btree_iter iter;
753 struct btree *b;
754
755 if (!((1U << btree) & btree_leaf_mask) &&
756 !((1U << btree) & btree_interior_mask))
757 continue;
758
759 __for_each_btree_node(trans, iter, btree,
760 btree == start.btree ? start.pos : POS_MIN,
761 0, depth, BTREE_ITER_PREFETCH, b, ret) {
762 mem_may_pin -= btree_buf_bytes(b);
763 if (mem_may_pin <= 0) {
764 c->btree_cache.pinned_nodes_end = *end =
765 BBPOS(btree, b->key.k.p);
766 bch2_trans_iter_exit(trans, &iter);
767 return 0;
768 }
769 }
770 bch2_trans_iter_exit(trans, &iter);
771 }
772
773 return ret;
774}
775
776static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
777 struct extents_to_bp_state *s)
778{
779 struct bch_fs *c = trans->c;
780 int ret = 0;
781
782 for (enum btree_id btree_id = 0;
783 btree_id < btree_id_nr_alive(c);
784 btree_id++) {
785 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
786
787 ret = commit_do(trans, NULL, NULL,
788 BCH_TRANS_COMMIT_no_enospc,
789 check_btree_root_to_backpointers(trans, s, btree_id, &level));
790 if (ret)
791 return ret;
792
793 while (level >= depth) {
794 struct btree_iter iter;
795 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
796 level,
797 BTREE_ITER_PREFETCH);
798 while (1) {
799 bch2_trans_begin(trans);
800
801 struct bkey_s_c k = bch2_btree_iter_peek(&iter);
802 if (!k.k)
803 break;
804 ret = bkey_err(k) ?:
805 check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
806 bch2_trans_commit(trans, NULL, NULL,
807 BCH_TRANS_COMMIT_no_enospc);
808 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
809 ret = 0;
810 continue;
811 }
812 if (ret)
813 break;
814 if (bpos_eq(iter.pos, SPOS_MAX))
815 break;
816 bch2_btree_iter_advance(&iter);
817 }
818 bch2_trans_iter_exit(trans, &iter);
819
820 if (ret)
821 return ret;
822
823 --level;
824 }
825 }
826
827 return 0;
828}
829
830int bch2_check_extents_to_backpointers(struct bch_fs *c)
831{
832 struct btree_trans *trans = bch2_trans_get(c);
833 struct extents_to_bp_state s = { .bucket_start = POS_MIN };
834 int ret;
835
836 bch2_bkey_buf_init(&s.last_flushed);
837 bkey_init(&s.last_flushed.k->k);
838
839 while (1) {
840 struct bbpos end;
841 ret = bch2_get_btree_in_memory_pos(trans,
842 BIT_ULL(BTREE_ID_backpointers),
843 BIT_ULL(BTREE_ID_backpointers),
844 BBPOS(BTREE_ID_backpointers, s.bucket_start), &end);
845 if (ret)
846 break;
847
848 s.bucket_end = end.pos;
849
850 if ( bpos_eq(s.bucket_start, POS_MIN) &&
851 !bpos_eq(s.bucket_end, SPOS_MAX))
852 bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
853 __func__, btree_nodes_fit_in_ram(c));
854
855 if (!bpos_eq(s.bucket_start, POS_MIN) ||
856 !bpos_eq(s.bucket_end, SPOS_MAX)) {
857 struct printbuf buf = PRINTBUF;
858
859 prt_str(&buf, "check_extents_to_backpointers(): ");
860 bch2_bpos_to_text(&buf, s.bucket_start);
861 prt_str(&buf, "-");
862 bch2_bpos_to_text(&buf, s.bucket_end);
863
864 bch_verbose(c, "%s", buf.buf);
865 printbuf_exit(&buf);
866 }
867
868 ret = bch2_check_extents_to_backpointers_pass(trans, &s);
869 if (ret || bpos_eq(s.bucket_end, SPOS_MAX))
870 break;
871
872 s.bucket_start = bpos_successor(s.bucket_end);
873 }
874 bch2_trans_put(trans);
875 bch2_bkey_buf_exit(&s.last_flushed, c);
876
877 c->btree_cache.pinned_nodes_leaf_mask = 0;
878 c->btree_cache.pinned_nodes_interior_mask = 0;
879
880 bch_err_fn(c, ret);
881 return ret;
882}
883
884static int check_one_backpointer(struct btree_trans *trans,
885 struct bbpos start,
886 struct bbpos end,
887 struct bkey_s_c_backpointer bp,
888 struct bpos *last_flushed_pos)
889{
890 struct bch_fs *c = trans->c;
891 struct btree_iter iter;
892 struct bbpos pos = bp_to_bbpos(*bp.v);
893 struct bkey_s_c k;
894 struct printbuf buf = PRINTBUF;
895 int ret;
896
897 if (bbpos_cmp(pos, start) < 0 ||
898 bbpos_cmp(pos, end) > 0)
899 return 0;
900
901 k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0);
902 ret = bkey_err(k);
903 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
904 return 0;
905 if (ret)
906 return ret;
907
908 if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
909 *last_flushed_pos = bp.k->p;
910 ret = bch2_btree_write_buffer_flush_sync(trans) ?:
911 -BCH_ERR_transaction_restart_write_buffer_flush;
912 goto out;
913 }
914
915 if (fsck_err_on(!k.k, c,
916 backpointer_to_missing_ptr,
917 "backpointer for missing %s\n %s",
918 bp.v->level ? "btree node" : "extent",
919 (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
920 ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
921 goto out;
922 }
923out:
924fsck_err:
925 bch2_trans_iter_exit(trans, &iter);
926 printbuf_exit(&buf);
927 return ret;
928}
929
930static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
931 struct bbpos start,
932 struct bbpos end)
933{
934 struct bpos last_flushed_pos = SPOS_MAX;
935
936 return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
937 POS_MIN, BTREE_ITER_PREFETCH, k,
938 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
939 check_one_backpointer(trans, start, end,
940 bkey_s_c_to_backpointer(k),
941 &last_flushed_pos));
942}
943
944int bch2_check_backpointers_to_extents(struct bch_fs *c)
945{
946 struct btree_trans *trans = bch2_trans_get(c);
947 struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
948 int ret;
949
950 while (1) {
951 ret = bch2_get_btree_in_memory_pos(trans,
952 (1U << BTREE_ID_extents)|
953 (1U << BTREE_ID_reflink),
954 ~0,
955 start, &end);
956 if (ret)
957 break;
958
959 if (!bbpos_cmp(start, BBPOS_MIN) &&
960 bbpos_cmp(end, BBPOS_MAX))
961 bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
962 __func__, btree_nodes_fit_in_ram(c));
963
964 if (bbpos_cmp(start, BBPOS_MIN) ||
965 bbpos_cmp(end, BBPOS_MAX)) {
966 struct printbuf buf = PRINTBUF;
967
968 prt_str(&buf, "check_backpointers_to_extents(): ");
969 bch2_bbpos_to_text(&buf, start);
970 prt_str(&buf, "-");
971 bch2_bbpos_to_text(&buf, end);
972
973 bch_verbose(c, "%s", buf.buf);
974 printbuf_exit(&buf);
975 }
976
977 ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
978 if (ret || !bbpos_cmp(end, BBPOS_MAX))
979 break;
980
981 start = bbpos_successor(end);
982 }
983 bch2_trans_put(trans);
984
985 c->btree_cache.pinned_nodes_leaf_mask = 0;
986 c->btree_cache.pinned_nodes_interior_mask = 0;
987
988 bch_err_fn(c, ret);
989 return ret;
990}