Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bcachefs: Data update support for unwritten extents

The data update path requires special support for unwritten extents - we
still need to be able to move them, but there's no need to read or write
anything.

This patch adds a new error code to tell bch2_move_extent() that we're
short circuiting the read, and adds bch2_update_unwritten_extent() to
create a reservation then call __bch2_data_update_index_update().

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

+118 -13
+106 -12
fs/bcachefs/data_update.c
··· 96 96 ptr->cached = true; 97 97 } 98 98 99 - int bch2_data_update_index_update(struct bch_write_op *op) 99 + static int __bch2_data_update_index_update(struct btree_trans *trans, 100 + struct bch_write_op *op) 100 101 { 101 102 struct bch_fs *c = op->c; 102 - struct btree_trans trans; 103 103 struct btree_iter iter; 104 104 struct data_update *m = 105 105 container_of(op, struct data_update, op); ··· 111 111 bch2_bkey_buf_init(&_insert); 112 112 bch2_bkey_buf_realloc(&_insert, c, U8_MAX); 113 113 114 - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); 115 - 116 - bch2_trans_iter_init(&trans, &iter, m->btree_id, 114 + bch2_trans_iter_init(trans, &iter, m->btree_id, 117 115 bkey_start_pos(&bch2_keylist_front(keys)->k), 118 116 BTREE_ITER_SLOTS|BTREE_ITER_INTENT); 119 117 ··· 128 130 s64 i_sectors_delta = 0, disk_sectors_delta = 0; 129 131 unsigned i; 130 132 131 - bch2_trans_begin(&trans); 133 + bch2_trans_begin(trans); 132 134 133 135 k = bch2_btree_iter_peek_slot(&iter); 134 136 ret = bkey_err(k); ··· 209 211 bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 }); 210 212 bch2_extent_normalize(c, bkey_i_to_s(insert)); 211 213 212 - ret = bch2_sum_sector_overwrites(&trans, &iter, insert, 214 + ret = bch2_sum_sector_overwrites(trans, &iter, insert, 213 215 &should_check_enospc, 214 216 &i_sectors_delta, 215 217 &disk_sectors_delta); ··· 227 229 228 230 next_pos = insert->k.p; 229 231 230 - ret = insert_snapshot_whiteouts(&trans, m->btree_id, 232 + ret = insert_snapshot_whiteouts(trans, m->btree_id, 231 233 k.k->p, insert->k.p) ?: 232 - bch2_trans_update(&trans, &iter, insert, 234 + bch2_trans_update(trans, &iter, insert, 233 235 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 234 - bch2_trans_commit(&trans, &op->res, 236 + bch2_trans_commit(trans, &op->res, 235 237 NULL, 236 238 BTREE_INSERT_NOFAIL| 237 239 m->data_opts.btree_insert_flags); ··· 268 270 goto next; 269 271 } 270 272 out: 271 - bch2_trans_iter_exit(&trans, &iter); 272 - bch2_trans_exit(&trans); 273 + bch2_trans_iter_exit(trans, &iter); 273 274 bch2_bkey_buf_exit(&_insert, c); 274 275 bch2_bkey_buf_exit(&_new, c); 275 276 BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); 277 + return ret; 278 + } 279 + 280 + int bch2_data_update_index_update(struct bch_write_op *op) 281 + { 282 + struct bch_fs *c = op->c; 283 + struct btree_trans trans; 284 + int ret; 285 + 286 + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); 287 + ret = __bch2_data_update_index_update(&trans, op); 288 + bch2_trans_exit(&trans); 289 + 276 290 return ret; 277 291 } 278 292 ··· 307 297 bch2_bkey_buf_exit(&update->k, c); 308 298 bch2_disk_reservation_put(c, &update->op.res); 309 299 bch2_bio_free_pages_pool(c, &update->op.wbio.bio); 300 + } 301 + 302 + void bch2_update_unwritten_extent(struct btree_trans *trans, 303 + struct data_update *update) 304 + { 305 + struct bch_fs *c = update->op.c; 306 + struct bio *bio = &update->op.wbio.bio; 307 + struct bkey_i_extent *e; 308 + struct write_point *wp; 309 + struct bch_extent_ptr *ptr; 310 + struct closure cl; 311 + struct btree_iter iter; 312 + struct bkey_s_c k; 313 + int ret; 314 + 315 + closure_init_stack(&cl); 316 + bch2_keylist_init(&update->op.insert_keys, update->op.inline_keys); 317 + 318 + while (bio_sectors(bio)) { 319 + unsigned sectors = bio_sectors(bio); 320 + 321 + bch2_trans_iter_init(trans, &iter, update->btree_id, update->op.pos, 322 + BTREE_ITER_SLOTS); 323 + ret = lockrestart_do(trans, ({ 324 + k = bch2_btree_iter_peek_slot(&iter); 325 + bkey_err(k); 326 + })); 327 + bch2_trans_iter_exit(trans, &iter); 328 + 329 + if (ret || !bch2_extents_match(k, bkey_i_to_s_c(update->k.k))) 330 + break; 331 + 332 + e = bkey_extent_init(update->op.insert_keys.top); 333 + e->k.p = update->op.pos; 334 + 335 + ret = bch2_alloc_sectors_start_trans(trans, 336 + update->op.target, 337 + false, 338 + update->op.write_point, 339 + &update->op.devs_have, 340 + update->op.nr_replicas, 341 + update->op.nr_replicas, 342 + update->op.alloc_reserve, 343 + 0, &cl, &wp); 344 + if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) { 345 + bch2_trans_unlock(trans); 346 + closure_sync(&cl); 347 + continue; 348 + } 349 + 350 + if (ret) 351 + return; 352 + 353 + sectors = min(sectors, wp->sectors_free); 354 + 355 + bch2_key_resize(&e->k, sectors); 356 + 357 + bch2_open_bucket_get(c, wp, &update->op.open_buckets); 358 + bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false); 359 + bch2_alloc_sectors_done(c, wp); 360 + 361 + bio_advance(bio, sectors << 9); 362 + update->op.pos.offset += sectors; 363 + 364 + extent_for_each_ptr(extent_i_to_s(e), ptr) 365 + ptr->unwritten = true; 366 + bch2_keylist_push(&update->op.insert_keys); 367 + 368 + ret = __bch2_data_update_index_update(trans, &update->op); 369 + 370 + bch2_open_buckets_put(c, &update->op.open_buckets); 371 + 372 + if (ret) 373 + break; 374 + } 375 + 376 + if ((atomic_read(&cl.remaining) & CLOSURE_REMAINING_MASK) != 1) { 377 + bch2_trans_unlock(trans); 378 + closure_sync(&cl); 379 + } 310 380 } 311 381 312 382 int bch2_data_update_init(struct bch_fs *c, struct data_update *m, ··· 466 376 hweight32(m->data_opts.rewrite_ptrs) + m->data_opts.extra_replicas; 467 377 468 378 BUG_ON(!m->op.nr_replicas); 379 + 380 + /* Special handling required: */ 381 + if (bkey_extent_is_unwritten(k)) 382 + return -BCH_ERR_unwritten_extent_update; 469 383 return 0; 470 384 } 471 385
+1
fs/bcachefs/data_update.h
··· 32 32 struct bch_extent_crc_unpacked); 33 33 34 34 void bch2_data_update_exit(struct data_update *); 35 + void bch2_update_unwritten_extent(struct btree_trans *, struct data_update *); 35 36 int bch2_data_update_init(struct bch_fs *, struct data_update *, 36 37 struct write_point_specifier, 37 38 struct bch_io_opts, struct data_update_opts,
+1
fs/bcachefs/errcode.h
··· 71 71 x(BCH_ERR_fsck, fsck_repair_impossible) \ 72 72 x(0, need_snapshot_cleanup) \ 73 73 x(0, need_topology_repair) \ 74 + x(0, unwritten_extent_update) \ 74 75 x(EINVAL, device_state_not_allowed) \ 75 76 x(EINVAL, member_info_missing) \ 76 77 x(EINVAL, mismatched_block_size) \
+10 -1
fs/bcachefs/move.c
··· 295 295 296 296 ret = bch2_data_update_init(c, &io->write, ctxt->wp, io_opts, 297 297 data_opts, btree_id, k); 298 - if (ret) 298 + if (ret && ret != -BCH_ERR_unwritten_extent_update) 299 299 goto err_free_pages; 300 300 301 301 io->write.ctxt = ctxt; ··· 303 303 304 304 atomic64_inc(&ctxt->stats->keys_moved); 305 305 atomic64_add(k.k->size, &ctxt->stats->sectors_moved); 306 + 307 + if (ret == -BCH_ERR_unwritten_extent_update) { 308 + bch2_update_unwritten_extent(trans, &io->write); 309 + move_free(io); 310 + return 0; 311 + } 312 + 313 + BUG_ON(ret); 314 + 306 315 this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size); 307 316 this_cpu_add(c->counters[BCH_COUNTER_move_extent_read], k.k->size); 308 317 trace_move_extent_read(k.k);