Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2
3#include "bcachefs.h"
4#include "alloc_background.h"
5#include "alloc_foreground.h"
6#include "backpointers.h"
7#include "bkey_buf.h"
8#include "btree_gc.h"
9#include "btree_io.h"
10#include "btree_update.h"
11#include "btree_update_interior.h"
12#include "btree_write_buffer.h"
13#include "compress.h"
14#include "disk_groups.h"
15#include "ec.h"
16#include "errcode.h"
17#include "error.h"
18#include "inode.h"
19#include "io_read.h"
20#include "io_write.h"
21#include "journal_reclaim.h"
22#include "keylist.h"
23#include "move.h"
24#include "rebalance.h"
25#include "reflink.h"
26#include "replicas.h"
27#include "snapshot.h"
28#include "super-io.h"
29#include "trace.h"
30
31#include <linux/ioprio.h>
32#include <linux/kthread.h>
33
34const char * const bch2_data_ops_strs[] = {
35#define x(t, n, ...) [n] = #t,
36 BCH_DATA_OPS()
37#undef x
38 NULL
39};
40
41static void trace_io_move2(struct bch_fs *c, struct bkey_s_c k,
42 struct bch_io_opts *io_opts,
43 struct data_update_opts *data_opts)
44{
45 if (trace_io_move_enabled()) {
46 struct printbuf buf = PRINTBUF;
47
48 bch2_bkey_val_to_text(&buf, c, k);
49 prt_newline(&buf);
50 bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts);
51 trace_io_move(c, buf.buf);
52 printbuf_exit(&buf);
53 }
54}
55
56static void trace_io_move_read2(struct bch_fs *c, struct bkey_s_c k)
57{
58 if (trace_io_move_read_enabled()) {
59 struct printbuf buf = PRINTBUF;
60
61 bch2_bkey_val_to_text(&buf, c, k);
62 trace_io_move_read(c, buf.buf);
63 printbuf_exit(&buf);
64 }
65}
66
67struct moving_io {
68 struct list_head read_list;
69 struct list_head io_list;
70 struct move_bucket_in_flight *b;
71 struct closure cl;
72 bool read_completed;
73
74 unsigned read_sectors;
75 unsigned write_sectors;
76
77 struct data_update write;
78};
79
80static void move_free(struct moving_io *io)
81{
82 struct moving_context *ctxt = io->write.ctxt;
83
84 if (io->b)
85 atomic_dec(&io->b->count);
86
87 mutex_lock(&ctxt->lock);
88 list_del(&io->io_list);
89 wake_up(&ctxt->wait);
90 mutex_unlock(&ctxt->lock);
91
92 if (!io->write.data_opts.scrub) {
93 bch2_data_update_exit(&io->write);
94 } else {
95 bch2_bio_free_pages_pool(io->write.op.c, &io->write.op.wbio.bio);
96 kfree(io->write.bvecs);
97 }
98 kfree(io);
99}
100
101static void move_write_done(struct bch_write_op *op)
102{
103 struct moving_io *io = container_of(op, struct moving_io, write.op);
104 struct bch_fs *c = op->c;
105 struct moving_context *ctxt = io->write.ctxt;
106
107 if (op->error) {
108 if (trace_io_move_write_fail_enabled()) {
109 struct printbuf buf = PRINTBUF;
110
111 bch2_write_op_to_text(&buf, op);
112 prt_printf(&buf, "ret\t%s\n", bch2_err_str(op->error));
113 trace_io_move_write_fail(c, buf.buf);
114 printbuf_exit(&buf);
115 }
116 this_cpu_inc(c->counters[BCH_COUNTER_io_move_write_fail]);
117
118 ctxt->write_error = true;
119 }
120
121 atomic_sub(io->write_sectors, &ctxt->write_sectors);
122 atomic_dec(&ctxt->write_ios);
123 move_free(io);
124 closure_put(&ctxt->cl);
125}
126
127static void move_write(struct moving_io *io)
128{
129 struct moving_context *ctxt = io->write.ctxt;
130
131 if (ctxt->stats) {
132 if (io->write.rbio.bio.bi_status)
133 atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9,
134 &ctxt->stats->sectors_error_uncorrected);
135 else if (io->write.rbio.saw_error)
136 atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9,
137 &ctxt->stats->sectors_error_corrected);
138 }
139
140 if (unlikely(io->write.rbio.ret ||
141 io->write.rbio.bio.bi_status ||
142 io->write.data_opts.scrub)) {
143 move_free(io);
144 return;
145 }
146
147 if (trace_io_move_write_enabled()) {
148 struct bch_fs *c = io->write.op.c;
149 struct printbuf buf = PRINTBUF;
150
151 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k));
152 trace_io_move_write(c, buf.buf);
153 printbuf_exit(&buf);
154 }
155
156 closure_get(&io->write.ctxt->cl);
157 atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
158 atomic_inc(&io->write.ctxt->write_ios);
159
160 bch2_data_update_read_done(&io->write);
161}
162
163struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt)
164{
165 struct moving_io *io =
166 list_first_entry_or_null(&ctxt->reads, struct moving_io, read_list);
167
168 return io && io->read_completed ? io : NULL;
169}
170
171static void move_read_endio(struct bio *bio)
172{
173 struct moving_io *io = container_of(bio, struct moving_io, write.rbio.bio);
174 struct moving_context *ctxt = io->write.ctxt;
175
176 atomic_sub(io->read_sectors, &ctxt->read_sectors);
177 atomic_dec(&ctxt->read_ios);
178 io->read_completed = true;
179
180 wake_up(&ctxt->wait);
181 closure_put(&ctxt->cl);
182}
183
184void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt)
185{
186 struct moving_io *io;
187
188 while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) {
189 bch2_trans_unlock_long(ctxt->trans);
190 list_del(&io->read_list);
191 move_write(io);
192 }
193}
194
195void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
196{
197 unsigned sectors_pending = atomic_read(&ctxt->write_sectors);
198
199 move_ctxt_wait_event(ctxt,
200 !atomic_read(&ctxt->write_sectors) ||
201 atomic_read(&ctxt->write_sectors) != sectors_pending);
202}
203
204void bch2_moving_ctxt_flush_all(struct moving_context *ctxt)
205{
206 move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
207 bch2_trans_unlock_long(ctxt->trans);
208 closure_sync(&ctxt->cl);
209}
210
211void bch2_moving_ctxt_exit(struct moving_context *ctxt)
212{
213 struct bch_fs *c = ctxt->trans->c;
214
215 bch2_moving_ctxt_flush_all(ctxt);
216
217 EBUG_ON(atomic_read(&ctxt->write_sectors));
218 EBUG_ON(atomic_read(&ctxt->write_ios));
219 EBUG_ON(atomic_read(&ctxt->read_sectors));
220 EBUG_ON(atomic_read(&ctxt->read_ios));
221
222 mutex_lock(&c->moving_context_lock);
223 list_del(&ctxt->list);
224 mutex_unlock(&c->moving_context_lock);
225
226 /*
227 * Generally, releasing a transaction within a transaction restart means
228 * an unhandled transaction restart: but this can happen legitimately
229 * within the move code, e.g. when bch2_move_ratelimit() tells us to
230 * exit before we've retried
231 */
232 bch2_trans_begin(ctxt->trans);
233 bch2_trans_put(ctxt->trans);
234 memset(ctxt, 0, sizeof(*ctxt));
235}
236
237void bch2_moving_ctxt_init(struct moving_context *ctxt,
238 struct bch_fs *c,
239 struct bch_ratelimit *rate,
240 struct bch_move_stats *stats,
241 struct write_point_specifier wp,
242 bool wait_on_copygc)
243{
244 memset(ctxt, 0, sizeof(*ctxt));
245
246 ctxt->trans = bch2_trans_get(c);
247 ctxt->fn = (void *) _RET_IP_;
248 ctxt->rate = rate;
249 ctxt->stats = stats;
250 ctxt->wp = wp;
251 ctxt->wait_on_copygc = wait_on_copygc;
252
253 closure_init_stack(&ctxt->cl);
254
255 mutex_init(&ctxt->lock);
256 INIT_LIST_HEAD(&ctxt->reads);
257 INIT_LIST_HEAD(&ctxt->ios);
258 init_waitqueue_head(&ctxt->wait);
259
260 mutex_lock(&c->moving_context_lock);
261 list_add(&ctxt->list, &c->moving_context_list);
262 mutex_unlock(&c->moving_context_lock);
263}
264
265void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c)
266{
267 trace_move_data(c, stats);
268}
269
270void bch2_move_stats_init(struct bch_move_stats *stats, const char *name)
271{
272 memset(stats, 0, sizeof(*stats));
273 stats->data_type = BCH_DATA_user;
274 scnprintf(stats->name, sizeof(stats->name), "%s", name);
275}
276
277int bch2_move_extent(struct moving_context *ctxt,
278 struct move_bucket_in_flight *bucket_in_flight,
279 struct btree_iter *iter,
280 struct bkey_s_c k,
281 struct bch_io_opts io_opts,
282 struct data_update_opts data_opts)
283{
284 struct btree_trans *trans = ctxt->trans;
285 struct bch_fs *c = trans->c;
286 int ret = -ENOMEM;
287
288 trace_io_move2(c, k, &io_opts, &data_opts);
289 this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size);
290
291 if (ctxt->stats)
292 ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos);
293
294 bch2_data_update_opts_normalize(k, &data_opts);
295
296 if (!data_opts.rewrite_ptrs &&
297 !data_opts.extra_replicas &&
298 !data_opts.scrub) {
299 if (data_opts.kill_ptrs)
300 return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts);
301 return 0;
302 }
303
304 /*
305 * Before memory allocations & taking nocow locks in
306 * bch2_data_update_init():
307 */
308 bch2_trans_unlock(trans);
309
310 struct moving_io *io = kzalloc(sizeof(struct moving_io), GFP_KERNEL);
311 if (!io)
312 goto err;
313
314 INIT_LIST_HEAD(&io->io_list);
315 io->write.ctxt = ctxt;
316 io->read_sectors = k.k->size;
317 io->write_sectors = k.k->size;
318
319 if (!data_opts.scrub) {
320 ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
321 &io_opts, data_opts, iter->btree_id, k);
322 if (ret)
323 goto err_free;
324
325 io->write.op.end_io = move_write_done;
326 } else {
327 bch2_bkey_buf_init(&io->write.k);
328 bch2_bkey_buf_reassemble(&io->write.k, c, k);
329
330 io->write.op.c = c;
331 io->write.data_opts = data_opts;
332
333 ret = bch2_data_update_bios_init(&io->write, c, &io_opts);
334 if (ret)
335 goto err_free;
336 }
337
338 io->write.rbio.bio.bi_end_io = move_read_endio;
339 io->write.rbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
340
341 if (ctxt->rate)
342 bch2_ratelimit_increment(ctxt->rate, k.k->size);
343
344 if (ctxt->stats) {
345 atomic64_inc(&ctxt->stats->keys_moved);
346 atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
347 }
348
349 if (bucket_in_flight) {
350 io->b = bucket_in_flight;
351 atomic_inc(&io->b->count);
352 }
353
354 trace_io_move_read2(c, k);
355
356 mutex_lock(&ctxt->lock);
357 atomic_add(io->read_sectors, &ctxt->read_sectors);
358 atomic_inc(&ctxt->read_ios);
359
360 list_add_tail(&io->read_list, &ctxt->reads);
361 list_add_tail(&io->io_list, &ctxt->ios);
362 mutex_unlock(&ctxt->lock);
363
364 /*
365 * dropped by move_read_endio() - guards against use after free of
366 * ctxt when doing wakeup
367 */
368 closure_get(&ctxt->cl);
369 __bch2_read_extent(trans, &io->write.rbio,
370 io->write.rbio.bio.bi_iter,
371 bkey_start_pos(k.k),
372 iter->btree_id, k, 0,
373 NULL,
374 BCH_READ_last_fragment,
375 data_opts.scrub ? data_opts.read_dev : -1);
376 return 0;
377err_free:
378 kfree(io);
379err:
380 if (bch2_err_matches(ret, BCH_ERR_data_update_done))
381 return 0;
382
383 if (bch2_err_matches(ret, EROFS) ||
384 bch2_err_matches(ret, BCH_ERR_transaction_restart))
385 return ret;
386
387 count_event(c, io_move_start_fail);
388
389 if (trace_io_move_start_fail_enabled()) {
390 struct printbuf buf = PRINTBUF;
391
392 bch2_bkey_val_to_text(&buf, c, k);
393 prt_str(&buf, ": ");
394 prt_str(&buf, bch2_err_str(ret));
395 trace_io_move_start_fail(c, buf.buf);
396 printbuf_exit(&buf);
397 }
398 return ret;
399}
400
401static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
402 struct per_snapshot_io_opts *io_opts,
403 struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */
404 struct btree_iter *extent_iter,
405 struct bkey_s_c extent_k)
406{
407 struct bch_fs *c = trans->c;
408 u32 restart_count = trans->restart_count;
409 struct bch_io_opts *opts_ret = &io_opts->fs_io_opts;
410 int ret = 0;
411
412 if (extent_k.k->type == KEY_TYPE_reflink_v)
413 goto out;
414
415 if (io_opts->cur_inum != extent_pos.inode) {
416 io_opts->d.nr = 0;
417
418 ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode),
419 BTREE_ITER_all_snapshots, k, ({
420 if (k.k->p.offset != extent_pos.inode)
421 break;
422
423 if (!bkey_is_inode(k.k))
424 continue;
425
426 struct bch_inode_unpacked inode;
427 _ret3 = bch2_inode_unpack(k, &inode);
428 if (_ret3)
429 break;
430
431 struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
432 bch2_inode_opts_get(&e.io_opts, trans->c, &inode);
433
434 darray_push(&io_opts->d, e);
435 }));
436 io_opts->cur_inum = extent_pos.inode;
437 }
438
439 ret = ret ?: trans_was_restarted(trans, restart_count);
440 if (ret)
441 return ERR_PTR(ret);
442
443 if (extent_k.k->p.snapshot)
444 darray_for_each(io_opts->d, i)
445 if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) {
446 opts_ret = &i->io_opts;
447 break;
448 }
449out:
450 ret = bch2_get_update_rebalance_opts(trans, opts_ret, extent_iter, extent_k);
451 if (ret)
452 return ERR_PTR(ret);
453 return opts_ret;
454}
455
456int bch2_move_get_io_opts_one(struct btree_trans *trans,
457 struct bch_io_opts *io_opts,
458 struct btree_iter *extent_iter,
459 struct bkey_s_c extent_k)
460{
461 struct bch_fs *c = trans->c;
462
463 *io_opts = bch2_opts_to_inode_opts(c->opts);
464
465 /* reflink btree? */
466 if (!extent_k.k->p.inode)
467 goto out;
468
469 struct btree_iter inode_iter;
470 struct bkey_s_c inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes,
471 SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot),
472 BTREE_ITER_cached);
473 int ret = bkey_err(inode_k);
474 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
475 return ret;
476
477 if (!ret && bkey_is_inode(inode_k.k)) {
478 struct bch_inode_unpacked inode;
479 bch2_inode_unpack(inode_k, &inode);
480 bch2_inode_opts_get(io_opts, c, &inode);
481 }
482 bch2_trans_iter_exit(trans, &inode_iter);
483out:
484 return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k);
485}
486
487int bch2_move_ratelimit(struct moving_context *ctxt)
488{
489 struct bch_fs *c = ctxt->trans->c;
490 bool is_kthread = current->flags & PF_KTHREAD;
491 u64 delay;
492
493 if (ctxt->wait_on_copygc && c->copygc_running) {
494 bch2_moving_ctxt_flush_all(ctxt);
495 wait_event_killable(c->copygc_running_wq,
496 !c->copygc_running ||
497 (is_kthread && kthread_should_stop()));
498 }
499
500 do {
501 delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0;
502
503 if (is_kthread && kthread_should_stop())
504 return 1;
505
506 if (delay)
507 move_ctxt_wait_event_timeout(ctxt,
508 freezing(current) ||
509 (is_kthread && kthread_should_stop()),
510 delay);
511
512 if (unlikely(freezing(current))) {
513 bch2_moving_ctxt_flush_all(ctxt);
514 try_to_freeze();
515 }
516 } while (delay);
517
518 /*
519 * XXX: these limits really ought to be per device, SSDs and hard drives
520 * will want different limits
521 */
522 move_ctxt_wait_event(ctxt,
523 atomic_read(&ctxt->write_sectors) < c->opts.move_bytes_in_flight >> 9 &&
524 atomic_read(&ctxt->read_sectors) < c->opts.move_bytes_in_flight >> 9 &&
525 atomic_read(&ctxt->write_ios) < c->opts.move_ios_in_flight &&
526 atomic_read(&ctxt->read_ios) < c->opts.move_ios_in_flight);
527
528 return 0;
529}
530
531/*
532 * Move requires non extents iterators, and there's also no need for it to
533 * signal indirect_extent_missing_error:
534 */
535static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans *trans,
536 struct btree_iter *iter,
537 struct bkey_s_c_reflink_p p)
538{
539 if (unlikely(REFLINK_P_ERROR(p.v)))
540 return bkey_s_c_null;
541
542 struct bpos reflink_pos = POS(0, REFLINK_P_IDX(p.v));
543
544 bch2_trans_iter_init(trans, iter,
545 BTREE_ID_reflink, reflink_pos,
546 BTREE_ITER_not_extents);
547
548 struct bkey_s_c k = bch2_btree_iter_peek(trans, iter);
549 if (!k.k || bkey_err(k)) {
550 bch2_trans_iter_exit(trans, iter);
551 return k;
552 }
553
554 if (bkey_lt(reflink_pos, bkey_start_pos(k.k))) {
555 bch2_trans_iter_exit(trans, iter);
556 return bkey_s_c_null;
557 }
558
559 return k;
560}
561
562static int bch2_move_data_btree(struct moving_context *ctxt,
563 struct bpos start,
564 struct bpos end,
565 move_pred_fn pred, void *arg,
566 enum btree_id btree_id)
567{
568 struct btree_trans *trans = ctxt->trans;
569 struct bch_fs *c = trans->c;
570 struct per_snapshot_io_opts snapshot_io_opts;
571 struct bch_io_opts *io_opts;
572 struct bkey_buf sk;
573 struct btree_iter iter, reflink_iter = {};
574 struct bkey_s_c k;
575 struct data_update_opts data_opts;
576 /*
577 * If we're moving a single file, also process reflinked data it points
578 * to (this includes propagating changed io_opts from the inode to the
579 * extent):
580 */
581 bool walk_indirect = start.inode == end.inode;
582 int ret = 0, ret2;
583
584 per_snapshot_io_opts_init(&snapshot_io_opts, c);
585 bch2_bkey_buf_init(&sk);
586
587 if (ctxt->stats) {
588 ctxt->stats->data_type = BCH_DATA_user;
589 ctxt->stats->pos = BBPOS(btree_id, start);
590 }
591
592 bch2_trans_begin(trans);
593 bch2_trans_iter_init(trans, &iter, btree_id, start,
594 BTREE_ITER_prefetch|
595 BTREE_ITER_not_extents|
596 BTREE_ITER_all_snapshots);
597
598 if (ctxt->rate)
599 bch2_ratelimit_reset(ctxt->rate);
600
601 while (!bch2_move_ratelimit(ctxt)) {
602 struct btree_iter *extent_iter = &iter;
603
604 bch2_trans_begin(trans);
605
606 k = bch2_btree_iter_peek(trans, &iter);
607 if (!k.k)
608 break;
609
610 ret = bkey_err(k);
611 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
612 continue;
613 if (ret)
614 break;
615
616 if (bkey_ge(bkey_start_pos(k.k), end))
617 break;
618
619 if (ctxt->stats)
620 ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos);
621
622 if (walk_indirect &&
623 k.k->type == KEY_TYPE_reflink_p &&
624 REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)) {
625 struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
626
627 bch2_trans_iter_exit(trans, &reflink_iter);
628 k = bch2_lookup_indirect_extent_for_move(trans, &reflink_iter, p);
629 ret = bkey_err(k);
630 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
631 continue;
632 if (ret)
633 break;
634
635 if (!k.k)
636 goto next_nondata;
637
638 /*
639 * XXX: reflink pointers may point to multiple indirect
640 * extents, so don't advance past the entire reflink
641 * pointer - need to fixup iter->k
642 */
643 extent_iter = &reflink_iter;
644 }
645
646 if (!bkey_extent_is_direct_data(k.k))
647 goto next_nondata;
648
649 io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts,
650 iter.pos, extent_iter, k);
651 ret = PTR_ERR_OR_ZERO(io_opts);
652 if (ret)
653 continue;
654
655 memset(&data_opts, 0, sizeof(data_opts));
656 if (!pred(c, arg, k, io_opts, &data_opts))
657 goto next;
658
659 /*
660 * The iterator gets unlocked by __bch2_read_extent - need to
661 * save a copy of @k elsewhere:
662 */
663 bch2_bkey_buf_reassemble(&sk, c, k);
664 k = bkey_i_to_s_c(sk.k);
665
666 ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts);
667 if (ret2) {
668 if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
669 continue;
670
671 if (bch2_err_matches(ret2, ENOMEM)) {
672 /* memory allocation failure, wait for some IO to finish */
673 bch2_move_ctxt_wait_for_io(ctxt);
674 continue;
675 }
676
677 /* XXX signal failure */
678 goto next;
679 }
680next:
681 if (ctxt->stats)
682 atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
683next_nondata:
684 bch2_btree_iter_advance(trans, &iter);
685 }
686
687 bch2_trans_iter_exit(trans, &reflink_iter);
688 bch2_trans_iter_exit(trans, &iter);
689 bch2_bkey_buf_exit(&sk, c);
690 per_snapshot_io_opts_exit(&snapshot_io_opts);
691
692 return ret;
693}
694
695int __bch2_move_data(struct moving_context *ctxt,
696 struct bbpos start,
697 struct bbpos end,
698 move_pred_fn pred, void *arg)
699{
700 struct bch_fs *c = ctxt->trans->c;
701 enum btree_id id;
702 int ret = 0;
703
704 for (id = start.btree;
705 id <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1);
706 id++) {
707 ctxt->stats->pos = BBPOS(id, POS_MIN);
708
709 if (!btree_type_has_ptrs(id) ||
710 !bch2_btree_id_root(c, id)->b)
711 continue;
712
713 ret = bch2_move_data_btree(ctxt,
714 id == start.btree ? start.pos : POS_MIN,
715 id == end.btree ? end.pos : POS_MAX,
716 pred, arg, id);
717 if (ret)
718 break;
719 }
720
721 return ret;
722}
723
724int bch2_move_data(struct bch_fs *c,
725 struct bbpos start,
726 struct bbpos end,
727 struct bch_ratelimit *rate,
728 struct bch_move_stats *stats,
729 struct write_point_specifier wp,
730 bool wait_on_copygc,
731 move_pred_fn pred, void *arg)
732{
733 struct moving_context ctxt;
734
735 bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
736 int ret = __bch2_move_data(&ctxt, start, end, pred, arg);
737 bch2_moving_ctxt_exit(&ctxt);
738
739 return ret;
740}
741
742static int __bch2_move_data_phys(struct moving_context *ctxt,
743 struct move_bucket_in_flight *bucket_in_flight,
744 unsigned dev,
745 u64 bucket_start,
746 u64 bucket_end,
747 unsigned data_types,
748 move_pred_fn pred, void *arg)
749{
750 struct btree_trans *trans = ctxt->trans;
751 struct bch_fs *c = trans->c;
752 bool is_kthread = current->flags & PF_KTHREAD;
753 struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
754 struct btree_iter iter = {}, bp_iter = {};
755 struct bkey_buf sk;
756 struct bkey_s_c k;
757 struct bkey_buf last_flushed;
758 int ret = 0;
759
760 struct bch_dev *ca = bch2_dev_tryget(c, dev);
761 if (!ca)
762 return 0;
763
764 bucket_end = min(bucket_end, ca->mi.nbuckets);
765
766 struct bpos bp_start = bucket_pos_to_bp_start(ca, POS(dev, bucket_start));
767 struct bpos bp_end = bucket_pos_to_bp_end(ca, POS(dev, bucket_end));
768 bch2_dev_put(ca);
769 ca = NULL;
770
771 bch2_bkey_buf_init(&last_flushed);
772 bkey_init(&last_flushed.k->k);
773 bch2_bkey_buf_init(&sk);
774
775 /*
776 * We're not run in a context that handles transaction restarts:
777 */
778 bch2_trans_begin(trans);
779
780 bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_start, 0);
781
782 bch_err_msg(c, ret, "looking up alloc key");
783 if (ret)
784 goto err;
785
786 ret = bch2_btree_write_buffer_tryflush(trans);
787 bch_err_msg(c, ret, "flushing btree write buffer");
788 if (ret)
789 goto err;
790
791 while (!(ret = bch2_move_ratelimit(ctxt))) {
792 if (is_kthread && kthread_should_stop())
793 break;
794
795 bch2_trans_begin(trans);
796
797 k = bch2_btree_iter_peek(trans, &bp_iter);
798 ret = bkey_err(k);
799 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
800 continue;
801 if (ret)
802 goto err;
803
804 if (!k.k || bkey_gt(k.k->p, bp_end))
805 break;
806
807 if (k.k->type != KEY_TYPE_backpointer)
808 goto next;
809
810 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
811
812 if (ctxt->stats)
813 ctxt->stats->offset = bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT;
814
815 if (!(data_types & BIT(bp.v->data_type)))
816 goto next;
817
818 if (!bp.v->level && bp.v->btree_id == BTREE_ID_stripes)
819 goto next;
820
821 k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed);
822 ret = bkey_err(k);
823 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
824 continue;
825 if (ret)
826 goto err;
827 if (!k.k)
828 goto next;
829
830 if (!bp.v->level) {
831 ret = bch2_move_get_io_opts_one(trans, &io_opts, &iter, k);
832 if (ret) {
833 bch2_trans_iter_exit(trans, &iter);
834 continue;
835 }
836 }
837
838 struct data_update_opts data_opts = {};
839 if (!pred(c, arg, k, &io_opts, &data_opts)) {
840 bch2_trans_iter_exit(trans, &iter);
841 goto next;
842 }
843
844 if (data_opts.scrub &&
845 !bch2_dev_idx_is_online(c, data_opts.read_dev)) {
846 bch2_trans_iter_exit(trans, &iter);
847 ret = -BCH_ERR_device_offline;
848 break;
849 }
850
851 bch2_bkey_buf_reassemble(&sk, c, k);
852 k = bkey_i_to_s_c(sk.k);
853
854 /* move_extent will drop locks */
855 unsigned sectors = bp.v->bucket_len;
856
857 if (!bp.v->level)
858 ret = bch2_move_extent(ctxt, bucket_in_flight, &iter, k, io_opts, data_opts);
859 else if (!data_opts.scrub)
860 ret = bch2_btree_node_rewrite_pos(trans, bp.v->btree_id, bp.v->level, k.k->p, 0);
861 else
862 ret = bch2_btree_node_scrub(trans, bp.v->btree_id, bp.v->level, k, data_opts.read_dev);
863
864 bch2_trans_iter_exit(trans, &iter);
865
866 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
867 continue;
868 if (ret == -ENOMEM) {
869 /* memory allocation failure, wait for some IO to finish */
870 bch2_move_ctxt_wait_for_io(ctxt);
871 continue;
872 }
873 if (ret)
874 goto err;
875
876 if (ctxt->stats)
877 atomic64_add(sectors, &ctxt->stats->sectors_seen);
878next:
879 bch2_btree_iter_advance(trans, &bp_iter);
880 }
881err:
882 bch2_trans_iter_exit(trans, &bp_iter);
883 bch2_bkey_buf_exit(&sk, c);
884 bch2_bkey_buf_exit(&last_flushed, c);
885 return ret;
886}
887
888static int bch2_move_data_phys(struct bch_fs *c,
889 unsigned dev,
890 u64 start,
891 u64 end,
892 unsigned data_types,
893 struct bch_ratelimit *rate,
894 struct bch_move_stats *stats,
895 struct write_point_specifier wp,
896 bool wait_on_copygc,
897 move_pred_fn pred, void *arg)
898{
899 struct moving_context ctxt;
900
901 bch2_trans_run(c, bch2_btree_write_buffer_flush_sync(trans));
902
903 bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
904 ctxt.stats->phys = true;
905 ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys;
906
907 int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, data_types, pred, arg);
908 bch2_moving_ctxt_exit(&ctxt);
909
910 return ret;
911}
912
913struct evacuate_bucket_arg {
914 struct bpos bucket;
915 int gen;
916 struct data_update_opts data_opts;
917};
918
919static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg, struct bkey_s_c k,
920 struct bch_io_opts *io_opts,
921 struct data_update_opts *data_opts)
922{
923 struct evacuate_bucket_arg *arg = _arg;
924
925 *data_opts = arg->data_opts;
926
927 unsigned i = 0;
928 bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
929 if (ptr->dev == arg->bucket.inode &&
930 (arg->gen < 0 || arg->gen == ptr->gen) &&
931 !ptr->cached)
932 data_opts->rewrite_ptrs |= BIT(i);
933 i++;
934 }
935
936 return data_opts->rewrite_ptrs != 0;
937}
938
939int bch2_evacuate_bucket(struct moving_context *ctxt,
940 struct move_bucket_in_flight *bucket_in_flight,
941 struct bpos bucket, int gen,
942 struct data_update_opts data_opts)
943{
944 struct evacuate_bucket_arg arg = { bucket, gen, data_opts, };
945
946 return __bch2_move_data_phys(ctxt, bucket_in_flight,
947 bucket.inode,
948 bucket.offset,
949 bucket.offset + 1,
950 ~0,
951 evacuate_bucket_pred, &arg);
952}
953
954typedef bool (*move_btree_pred)(struct bch_fs *, void *,
955 struct btree *, struct bch_io_opts *,
956 struct data_update_opts *);
957
958static int bch2_move_btree(struct bch_fs *c,
959 struct bbpos start,
960 struct bbpos end,
961 move_btree_pred pred, void *arg,
962 struct bch_move_stats *stats)
963{
964 bool kthread = (current->flags & PF_KTHREAD) != 0;
965 struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
966 struct moving_context ctxt;
967 struct btree_trans *trans;
968 struct btree_iter iter;
969 struct btree *b;
970 enum btree_id btree;
971 struct data_update_opts data_opts;
972 int ret = 0;
973
974 bch2_moving_ctxt_init(&ctxt, c, NULL, stats,
975 writepoint_ptr(&c->btree_write_point),
976 true);
977 trans = ctxt.trans;
978
979 stats->data_type = BCH_DATA_btree;
980
981 for (btree = start.btree;
982 btree <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1);
983 btree ++) {
984 stats->pos = BBPOS(btree, POS_MIN);
985
986 if (!bch2_btree_id_root(c, btree)->b)
987 continue;
988
989 bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, 0, 0,
990 BTREE_ITER_prefetch);
991retry:
992 ret = 0;
993 while (bch2_trans_begin(trans),
994 (b = bch2_btree_iter_peek_node(trans, &iter)) &&
995 !(ret = PTR_ERR_OR_ZERO(b))) {
996 if (kthread && kthread_should_stop())
997 break;
998
999 if ((cmp_int(btree, end.btree) ?:
1000 bpos_cmp(b->key.k.p, end.pos)) > 0)
1001 break;
1002
1003 stats->pos = BBPOS(iter.btree_id, iter.pos);
1004
1005 if (!pred(c, arg, b, &io_opts, &data_opts))
1006 goto next;
1007
1008 ret = bch2_btree_node_rewrite(trans, &iter, b, 0) ?: ret;
1009 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1010 continue;
1011 if (ret)
1012 break;
1013next:
1014 bch2_btree_iter_next_node(trans, &iter);
1015 }
1016 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1017 goto retry;
1018
1019 bch2_trans_iter_exit(trans, &iter);
1020
1021 if (kthread && kthread_should_stop())
1022 break;
1023 }
1024
1025 bch_err_fn(c, ret);
1026 bch2_moving_ctxt_exit(&ctxt);
1027 bch2_btree_interior_updates_flush(c);
1028
1029 return ret;
1030}
1031
1032static bool rereplicate_pred(struct bch_fs *c, void *arg,
1033 struct bkey_s_c k,
1034 struct bch_io_opts *io_opts,
1035 struct data_update_opts *data_opts)
1036{
1037 unsigned nr_good = bch2_bkey_durability(c, k);
1038 unsigned replicas = bkey_is_btree_ptr(k.k)
1039 ? c->opts.metadata_replicas
1040 : io_opts->data_replicas;
1041
1042 rcu_read_lock();
1043 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
1044 unsigned i = 0;
1045 bkey_for_each_ptr(ptrs, ptr) {
1046 struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
1047 if (!ptr->cached &&
1048 (!ca || !ca->mi.durability))
1049 data_opts->kill_ptrs |= BIT(i);
1050 i++;
1051 }
1052 rcu_read_unlock();
1053
1054 if (!data_opts->kill_ptrs &&
1055 (!nr_good || nr_good >= replicas))
1056 return false;
1057
1058 data_opts->target = 0;
1059 data_opts->extra_replicas = replicas - nr_good;
1060 data_opts->btree_insert_flags = 0;
1061 return true;
1062}
1063
1064static bool migrate_pred(struct bch_fs *c, void *arg,
1065 struct bkey_s_c k,
1066 struct bch_io_opts *io_opts,
1067 struct data_update_opts *data_opts)
1068{
1069 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
1070 struct bch_ioctl_data *op = arg;
1071 unsigned i = 0;
1072
1073 data_opts->rewrite_ptrs = 0;
1074 data_opts->target = 0;
1075 data_opts->extra_replicas = 0;
1076 data_opts->btree_insert_flags = 0;
1077
1078 bkey_for_each_ptr(ptrs, ptr) {
1079 if (ptr->dev == op->migrate.dev)
1080 data_opts->rewrite_ptrs |= 1U << i;
1081 i++;
1082 }
1083
1084 return data_opts->rewrite_ptrs != 0;
1085}
1086
1087static bool rereplicate_btree_pred(struct bch_fs *c, void *arg,
1088 struct btree *b,
1089 struct bch_io_opts *io_opts,
1090 struct data_update_opts *data_opts)
1091{
1092 return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
1093}
1094
1095/*
1096 * Ancient versions of bcachefs produced packed formats which could represent
1097 * keys that the in memory format cannot represent; this checks for those
1098 * formats so we can get rid of them.
1099 */
1100static bool bformat_needs_redo(struct bkey_format *f)
1101{
1102 for (unsigned i = 0; i < f->nr_fields; i++)
1103 if (bch2_bkey_format_field_overflows(f, i))
1104 return true;
1105
1106 return false;
1107}
1108
1109static bool rewrite_old_nodes_pred(struct bch_fs *c, void *arg,
1110 struct btree *b,
1111 struct bch_io_opts *io_opts,
1112 struct data_update_opts *data_opts)
1113{
1114 if (b->version_ondisk != c->sb.version ||
1115 btree_node_need_rewrite(b) ||
1116 bformat_needs_redo(&b->format)) {
1117 data_opts->target = 0;
1118 data_opts->extra_replicas = 0;
1119 data_opts->btree_insert_flags = 0;
1120 return true;
1121 }
1122
1123 return false;
1124}
1125
1126int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
1127{
1128 int ret;
1129
1130 ret = bch2_move_btree(c,
1131 BBPOS_MIN,
1132 BBPOS_MAX,
1133 rewrite_old_nodes_pred, c, stats);
1134 if (!ret) {
1135 mutex_lock(&c->sb_lock);
1136 c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
1137 c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
1138 c->disk_sb.sb->version_min = c->disk_sb.sb->version;
1139 bch2_write_super(c);
1140 mutex_unlock(&c->sb_lock);
1141 }
1142
1143 bch_err_fn(c, ret);
1144 return ret;
1145}
1146
1147static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg,
1148 struct bkey_s_c k,
1149 struct bch_io_opts *io_opts,
1150 struct data_update_opts *data_opts)
1151{
1152 unsigned durability = bch2_bkey_durability(c, k);
1153 unsigned replicas = bkey_is_btree_ptr(k.k)
1154 ? c->opts.metadata_replicas
1155 : io_opts->data_replicas;
1156 const union bch_extent_entry *entry;
1157 struct extent_ptr_decoded p;
1158 unsigned i = 0;
1159
1160 rcu_read_lock();
1161 bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) {
1162 unsigned d = bch2_extent_ptr_durability(c, &p);
1163
1164 if (d && durability - d >= replicas) {
1165 data_opts->kill_ptrs |= BIT(i);
1166 durability -= d;
1167 }
1168
1169 i++;
1170 }
1171 rcu_read_unlock();
1172
1173 return data_opts->kill_ptrs != 0;
1174}
1175
1176static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg,
1177 struct btree *b,
1178 struct bch_io_opts *io_opts,
1179 struct data_update_opts *data_opts)
1180{
1181 return drop_extra_replicas_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
1182}
1183
1184static bool scrub_pred(struct bch_fs *c, void *_arg,
1185 struct bkey_s_c k,
1186 struct bch_io_opts *io_opts,
1187 struct data_update_opts *data_opts)
1188{
1189 struct bch_ioctl_data *arg = _arg;
1190
1191 if (k.k->type != KEY_TYPE_btree_ptr_v2) {
1192 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
1193 const union bch_extent_entry *entry;
1194 struct extent_ptr_decoded p;
1195 bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
1196 if (p.ptr.dev == arg->migrate.dev) {
1197 if (!p.crc.csum_type)
1198 return false;
1199 break;
1200 }
1201 }
1202
1203 data_opts->scrub = true;
1204 data_opts->read_dev = arg->migrate.dev;
1205 return true;
1206}
1207
1208int bch2_data_job(struct bch_fs *c,
1209 struct bch_move_stats *stats,
1210 struct bch_ioctl_data op)
1211{
1212 struct bbpos start = BBPOS(op.start_btree, op.start_pos);
1213 struct bbpos end = BBPOS(op.end_btree, op.end_pos);
1214 int ret = 0;
1215
1216 if (op.op >= BCH_DATA_OP_NR)
1217 return -EINVAL;
1218
1219 bch2_move_stats_init(stats, bch2_data_ops_strs[op.op]);
1220
1221 switch (op.op) {
1222 case BCH_DATA_OP_scrub:
1223 /*
1224 * prevent tests from spuriously failing, make sure we see all
1225 * btree nodes that need to be repaired
1226 */
1227 bch2_btree_interior_updates_flush(c);
1228
1229 ret = bch2_move_data_phys(c, op.scrub.dev, 0, U64_MAX,
1230 op.scrub.data_types,
1231 NULL,
1232 stats,
1233 writepoint_hashed((unsigned long) current),
1234 false,
1235 scrub_pred, &op) ?: ret;
1236 break;
1237
1238 case BCH_DATA_OP_rereplicate:
1239 stats->data_type = BCH_DATA_journal;
1240 ret = bch2_journal_flush_device_pins(&c->journal, -1);
1241 ret = bch2_move_btree(c, start, end,
1242 rereplicate_btree_pred, c, stats) ?: ret;
1243 ret = bch2_move_data(c, start, end,
1244 NULL,
1245 stats,
1246 writepoint_hashed((unsigned long) current),
1247 true,
1248 rereplicate_pred, c) ?: ret;
1249 ret = bch2_replicas_gc2(c) ?: ret;
1250 break;
1251 case BCH_DATA_OP_migrate:
1252 if (op.migrate.dev >= c->sb.nr_devices)
1253 return -EINVAL;
1254
1255 stats->data_type = BCH_DATA_journal;
1256 ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
1257 ret = bch2_move_data_phys(c, op.migrate.dev, 0, U64_MAX,
1258 ~0,
1259 NULL,
1260 stats,
1261 writepoint_hashed((unsigned long) current),
1262 true,
1263 migrate_pred, &op) ?: ret;
1264 bch2_btree_interior_updates_flush(c);
1265 ret = bch2_replicas_gc2(c) ?: ret;
1266 break;
1267 case BCH_DATA_OP_rewrite_old_nodes:
1268 ret = bch2_scan_old_btree_nodes(c, stats);
1269 break;
1270 case BCH_DATA_OP_drop_extra_replicas:
1271 ret = bch2_move_btree(c, start, end,
1272 drop_extra_replicas_btree_pred, c, stats) ?: ret;
1273 ret = bch2_move_data(c, start, end, NULL, stats,
1274 writepoint_hashed((unsigned long) current),
1275 true,
1276 drop_extra_replicas_pred, c) ?: ret;
1277 ret = bch2_replicas_gc2(c) ?: ret;
1278 break;
1279 default:
1280 ret = -EINVAL;
1281 }
1282
1283 bch2_move_stats_exit(stats, c);
1284 return ret;
1285}
1286
1287void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats)
1288{
1289 prt_printf(out, "%s: data type==", stats->name);
1290 bch2_prt_data_type(out, stats->data_type);
1291 prt_str(out, " pos=");
1292 bch2_bbpos_to_text(out, stats->pos);
1293 prt_newline(out);
1294 printbuf_indent_add(out, 2);
1295
1296 prt_printf(out, "keys moved:\t%llu\n", atomic64_read(&stats->keys_moved));
1297 prt_printf(out, "keys raced:\t%llu\n", atomic64_read(&stats->keys_raced));
1298 prt_printf(out, "bytes seen:\t");
1299 prt_human_readable_u64(out, atomic64_read(&stats->sectors_seen) << 9);
1300 prt_newline(out);
1301
1302 prt_printf(out, "bytes moved:\t");
1303 prt_human_readable_u64(out, atomic64_read(&stats->sectors_moved) << 9);
1304 prt_newline(out);
1305
1306 prt_printf(out, "bytes raced:\t");
1307 prt_human_readable_u64(out, atomic64_read(&stats->sectors_raced) << 9);
1308 prt_newline(out);
1309
1310 printbuf_indent_sub(out, 2);
1311}
1312
1313static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt)
1314{
1315 if (!out->nr_tabstops)
1316 printbuf_tabstop_push(out, 32);
1317
1318 bch2_move_stats_to_text(out, ctxt->stats);
1319 printbuf_indent_add(out, 2);
1320
1321 prt_printf(out, "reads: ios %u/%u sectors %u/%u\n",
1322 atomic_read(&ctxt->read_ios),
1323 c->opts.move_ios_in_flight,
1324 atomic_read(&ctxt->read_sectors),
1325 c->opts.move_bytes_in_flight >> 9);
1326
1327 prt_printf(out, "writes: ios %u/%u sectors %u/%u\n",
1328 atomic_read(&ctxt->write_ios),
1329 c->opts.move_ios_in_flight,
1330 atomic_read(&ctxt->write_sectors),
1331 c->opts.move_bytes_in_flight >> 9);
1332
1333 printbuf_indent_add(out, 2);
1334
1335 mutex_lock(&ctxt->lock);
1336 struct moving_io *io;
1337 list_for_each_entry(io, &ctxt->ios, io_list)
1338 bch2_data_update_inflight_to_text(out, &io->write);
1339 mutex_unlock(&ctxt->lock);
1340
1341 printbuf_indent_sub(out, 4);
1342}
1343
1344void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c)
1345{
1346 struct moving_context *ctxt;
1347
1348 mutex_lock(&c->moving_context_lock);
1349 list_for_each_entry(ctxt, &c->moving_context_list, list)
1350 bch2_moving_ctxt_to_text(out, c, ctxt);
1351 mutex_unlock(&c->moving_context_lock);
1352}
1353
1354void bch2_fs_move_init(struct bch_fs *c)
1355{
1356 INIT_LIST_HEAD(&c->moving_context_list);
1357 mutex_init(&c->moving_context_lock);
1358}