Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v6.17-rc3 937 lines 23 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef _BCACHEFS_BTREE_TYPES_H 3#define _BCACHEFS_BTREE_TYPES_H 4 5#include <linux/list.h> 6#include <linux/rhashtable.h> 7 8#include "bbpos_types.h" 9#include "btree_key_cache_types.h" 10#include "buckets_types.h" 11#include "darray.h" 12#include "errcode.h" 13#include "journal_types.h" 14#include "replicas_types.h" 15#include "six.h" 16 17struct open_bucket; 18struct btree_update; 19struct btree_trans; 20 21#define MAX_BSETS 3U 22 23struct btree_nr_keys { 24 25 /* 26 * Amount of live metadata (i.e. size of node after a compaction) in 27 * units of u64s 28 */ 29 u16 live_u64s; 30 u16 bset_u64s[MAX_BSETS]; 31 32 /* live keys only: */ 33 u16 packed_keys; 34 u16 unpacked_keys; 35}; 36 37struct bset_tree { 38 /* 39 * We construct a binary tree in an array as if the array 40 * started at 1, so that things line up on the same cachelines 41 * better: see comments in bset.c at cacheline_to_bkey() for 42 * details 43 */ 44 45 /* size of the binary tree and prev array */ 46 u16 size; 47 48 /* function of size - precalculated for to_inorder() */ 49 u16 extra; 50 51 u16 data_offset; 52 u16 aux_data_offset; 53 u16 end_offset; 54}; 55 56struct btree_write { 57 struct journal_entry_pin journal; 58}; 59 60struct btree_alloc { 61 struct open_buckets ob; 62 __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); 63}; 64 65struct btree_bkey_cached_common { 66 struct six_lock lock; 67 u8 level; 68 u8 btree_id; 69 bool cached; 70}; 71 72struct btree { 73 struct btree_bkey_cached_common c; 74 75 struct rhash_head hash; 76 u64 hash_val; 77 78 unsigned long flags; 79 u16 written; 80 u8 nsets; 81 u8 nr_key_bits; 82 u16 version_ondisk; 83 84 struct bkey_format format; 85 86 struct btree_node *data; 87 void *aux_data; 88 89 /* 90 * Sets of sorted keys - the real btree node - plus a binary search tree 91 * 92 * set[0] is special; set[0]->tree, set[0]->prev and set[0]->data point 93 * to the memory we have allocated for this btree node. Additionally, 94 * set[0]->data points to the entire btree node as it exists on disk. 95 */ 96 struct bset_tree set[MAX_BSETS]; 97 98 struct btree_nr_keys nr; 99 u16 sib_u64s[2]; 100 u16 whiteout_u64s; 101 u8 byte_order; 102 u8 unpack_fn_len; 103 104 struct btree_write writes[2]; 105 106 /* Key/pointer for this btree node */ 107 __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX); 108 109 /* 110 * XXX: add a delete sequence number, so when bch2_btree_node_relock() 111 * fails because the lock sequence number has changed - i.e. the 112 * contents were modified - we can still relock the node if it's still 113 * the one we want, without redoing the traversal 114 */ 115 116 /* 117 * For asynchronous splits/interior node updates: 118 * When we do a split, we allocate new child nodes and update the parent 119 * node to point to them: we update the parent in memory immediately, 120 * but then we must wait until the children have been written out before 121 * the update to the parent can be written - this is a list of the 122 * btree_updates that are blocking this node from being 123 * written: 124 */ 125 struct list_head write_blocked; 126 127 /* 128 * Also for asynchronous splits/interior node updates: 129 * If a btree node isn't reachable yet, we don't want to kick off 130 * another write - because that write also won't yet be reachable and 131 * marking it as completed before it's reachable would be incorrect: 132 */ 133 unsigned long will_make_reachable; 134 135 struct open_buckets ob; 136 137 /* lru list */ 138 struct list_head list; 139}; 140 141#define BCH_BTREE_CACHE_NOT_FREED_REASONS() \ 142 x(cache_reserve) \ 143 x(lock_intent) \ 144 x(lock_write) \ 145 x(dirty) \ 146 x(read_in_flight) \ 147 x(write_in_flight) \ 148 x(noevict) \ 149 x(write_blocked) \ 150 x(will_make_reachable) \ 151 x(access_bit) 152 153enum bch_btree_cache_not_freed_reasons { 154#define x(n) BCH_BTREE_CACHE_NOT_FREED_##n, 155 BCH_BTREE_CACHE_NOT_FREED_REASONS() 156#undef x 157 BCH_BTREE_CACHE_NOT_FREED_REASONS_NR, 158}; 159 160struct btree_cache_list { 161 unsigned idx; 162 struct shrinker *shrink; 163 struct list_head list; 164 size_t nr; 165}; 166 167struct btree_cache { 168 struct rhashtable table; 169 bool table_init_done; 170 /* 171 * We never free a struct btree, except on shutdown - we just put it on 172 * the btree_cache_freed list and reuse it later. This simplifies the 173 * code, and it doesn't cost us much memory as the memory usage is 174 * dominated by buffers that hold the actual btree node data and those 175 * can be freed - and the number of struct btrees allocated is 176 * effectively bounded. 177 * 178 * btree_cache_freeable effectively is a small cache - we use it because 179 * high order page allocations can be rather expensive, and it's quite 180 * common to delete and allocate btree nodes in quick succession. It 181 * should never grow past ~2-3 nodes in practice. 182 */ 183 struct mutex lock; 184 struct list_head freeable; 185 struct list_head freed_pcpu; 186 struct list_head freed_nonpcpu; 187 struct btree_cache_list live[2]; 188 189 size_t nr_freeable; 190 size_t nr_reserve; 191 size_t nr_by_btree[BTREE_ID_NR]; 192 atomic_long_t nr_dirty; 193 194 /* shrinker stats */ 195 size_t nr_freed; 196 u64 not_freed[BCH_BTREE_CACHE_NOT_FREED_REASONS_NR]; 197 198 /* 199 * If we need to allocate memory for a new btree node and that 200 * allocation fails, we can cannibalize another node in the btree cache 201 * to satisfy the allocation - lock to guarantee only one thread does 202 * this at a time: 203 */ 204 struct task_struct *alloc_lock; 205 struct closure_waitlist alloc_wait; 206 207 struct bbpos pinned_nodes_start; 208 struct bbpos pinned_nodes_end; 209 /* btree id mask: 0 for leaves, 1 for interior */ 210 u64 pinned_nodes_mask[2]; 211}; 212 213struct btree_node_iter { 214 struct btree_node_iter_set { 215 u16 k, end; 216 } data[MAX_BSETS]; 217}; 218 219#define BTREE_ITER_FLAGS() \ 220 x(slots) \ 221 x(intent) \ 222 x(prefetch) \ 223 x(is_extents) \ 224 x(not_extents) \ 225 x(cached) \ 226 x(with_key_cache) \ 227 x(with_updates) \ 228 x(with_journal) \ 229 x(snapshot_field) \ 230 x(all_snapshots) \ 231 x(filter_snapshots) \ 232 x(nopreserve) \ 233 x(cached_nofill) \ 234 x(key_cache_fill) \ 235 236#define STR_HASH_FLAGS() \ 237 x(must_create) \ 238 x(must_replace) 239 240#define BTREE_UPDATE_FLAGS() \ 241 x(internal_snapshot_node) \ 242 x(nojournal) \ 243 x(key_cache_reclaim) 244 245 246/* 247 * BTREE_TRIGGER_norun - don't run triggers at all 248 * 249 * BTREE_TRIGGER_transactional - we're running transactional triggers as part of 250 * a transaction commit: triggers may generate new updates 251 * 252 * BTREE_TRIGGER_atomic - we're running atomic triggers during a transaction 253 * commit: we have our journal reservation, we're holding btree node write 254 * locks, and we know the transaction is going to commit (returning an error 255 * here is a fatal error, causing us to go emergency read-only) 256 * 257 * BTREE_TRIGGER_gc - we're in gc/fsck: running triggers to recalculate e.g. disk usage 258 * 259 * BTREE_TRIGGER_insert - @new is entering the btree 260 * BTREE_TRIGGER_overwrite - @old is leaving the btree 261 */ 262#define BTREE_TRIGGER_FLAGS() \ 263 x(norun) \ 264 x(transactional) \ 265 x(atomic) \ 266 x(check_repair) \ 267 x(gc) \ 268 x(insert) \ 269 x(overwrite) \ 270 x(is_root) 271 272enum { 273#define x(n) BTREE_ITER_FLAG_BIT_##n, 274 BTREE_ITER_FLAGS() 275 STR_HASH_FLAGS() 276 BTREE_UPDATE_FLAGS() 277 BTREE_TRIGGER_FLAGS() 278#undef x 279}; 280 281/* iter flags must fit in a u16: */ 282//BUILD_BUG_ON(BTREE_ITER_FLAG_BIT_key_cache_fill > 15); 283 284enum btree_iter_update_trigger_flags { 285#define x(n) BTREE_ITER_##n = 1U << BTREE_ITER_FLAG_BIT_##n, 286 BTREE_ITER_FLAGS() 287#undef x 288#define x(n) STR_HASH_##n = 1U << BTREE_ITER_FLAG_BIT_##n, 289 STR_HASH_FLAGS() 290#undef x 291#define x(n) BTREE_UPDATE_##n = 1U << BTREE_ITER_FLAG_BIT_##n, 292 BTREE_UPDATE_FLAGS() 293#undef x 294#define x(n) BTREE_TRIGGER_##n = 1U << BTREE_ITER_FLAG_BIT_##n, 295 BTREE_TRIGGER_FLAGS() 296#undef x 297}; 298 299enum btree_path_uptodate { 300 BTREE_ITER_UPTODATE = 0, 301 BTREE_ITER_NEED_RELOCK = 1, 302 BTREE_ITER_NEED_TRAVERSE = 2, 303}; 304 305#if defined(CONFIG_BCACHEFS_LOCK_TIME_STATS) || defined(CONFIG_BCACHEFS_DEBUG) 306#define TRACK_PATH_ALLOCATED 307#endif 308 309typedef u16 btree_path_idx_t; 310 311struct btree_path { 312 btree_path_idx_t sorted_idx; 313 u8 ref; 314 u8 intent_ref; 315 316 /* btree_iter_copy starts here: */ 317 struct bpos pos; 318 319 enum btree_id btree_id:5; 320 bool cached:1; 321 bool preserve:1; 322 enum btree_path_uptodate uptodate:2; 323 /* 324 * When true, failing to relock this path will cause the transaction to 325 * restart: 326 */ 327 bool should_be_locked:1; 328 unsigned level:3, 329 locks_want:3; 330 u8 nodes_locked; 331 332 struct btree_path_level { 333 struct btree *b; 334 struct btree_node_iter iter; 335 u32 lock_seq; 336#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS 337 u64 lock_taken_time; 338#endif 339 } l[BTREE_MAX_DEPTH]; 340#ifdef TRACK_PATH_ALLOCATED 341 unsigned long ip_allocated; 342#endif 343}; 344 345static inline struct btree_path_level *path_l(struct btree_path *path) 346{ 347 return path->l + path->level; 348} 349 350static inline unsigned long btree_path_ip_allocated(struct btree_path *path) 351{ 352#ifdef TRACK_PATH_ALLOCATED 353 return path->ip_allocated; 354#else 355 return _THIS_IP_; 356#endif 357} 358 359/* 360 * @pos - iterator's current position 361 * @level - current btree depth 362 * @locks_want - btree level below which we start taking intent locks 363 * @nodes_locked - bitmask indicating which nodes in @nodes are locked 364 * @nodes_intent_locked - bitmask indicating which locks are intent locks 365 */ 366struct btree_iter { 367 btree_path_idx_t path; 368 btree_path_idx_t update_path; 369 btree_path_idx_t key_cache_path; 370 371 enum btree_id btree_id:8; 372 u8 min_depth; 373 374 /* btree_iter_copy starts here: */ 375 u16 flags; 376 377 /* When we're filtering by snapshot, the snapshot ID we're looking for: */ 378 unsigned snapshot; 379 380 struct bpos pos; 381 /* 382 * Current unpacked key - so that bch2_btree_iter_next()/ 383 * bch2_btree_iter_next_slot() can correctly advance pos. 384 */ 385 struct bkey k; 386 387 /* BTREE_ITER_with_journal: */ 388 size_t journal_idx; 389#ifdef TRACK_PATH_ALLOCATED 390 unsigned long ip_allocated; 391#endif 392}; 393 394#define BKEY_CACHED_ACCESSED 0 395#define BKEY_CACHED_DIRTY 1 396 397struct bkey_cached { 398 struct btree_bkey_cached_common c; 399 400 unsigned long flags; 401 u16 u64s; 402 struct bkey_cached_key key; 403 404 struct rhash_head hash; 405 406 struct journal_entry_pin journal; 407 u64 seq; 408 409 struct bkey_i *k; 410 struct rcu_head rcu; 411}; 412 413static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b) 414{ 415 return !b->cached 416 ? container_of(b, struct btree, c)->key.k.p 417 : container_of(b, struct bkey_cached, c)->key.pos; 418} 419 420struct btree_insert_entry { 421 unsigned flags; 422 u8 sort_order; 423 u8 bkey_type; 424 enum btree_id btree_id:8; 425 u8 level:4; 426 bool cached:1; 427 bool insert_trigger_run:1; 428 bool overwrite_trigger_run:1; 429 bool key_cache_already_flushed:1; 430 /* 431 * @old_k may be a key from the journal; @old_btree_u64s always refers 432 * to the size of the key being overwritten in the btree: 433 */ 434 u8 old_btree_u64s; 435 btree_path_idx_t path; 436 struct bkey_i *k; 437 /* key being overwritten: */ 438 struct bkey old_k; 439 const struct bch_val *old_v; 440 unsigned long ip_allocated; 441}; 442 443/* Number of btree paths we preallocate, usually enough */ 444#define BTREE_ITER_INITIAL 64 445/* 446 * Lmiit for btree_trans_too_many_iters(); this is enough that almost all code 447 * paths should run inside this limit, and if they don't it usually indicates a 448 * bug (leaking/duplicated btree paths). 449 * 450 * exception: some fsck paths 451 * 452 * bugs with excessive path usage seem to have possibly been eliminated now, so 453 * we might consider eliminating this (and btree_trans_too_many_iter()) at some 454 * point. 455 */ 456#define BTREE_ITER_NORMAL_LIMIT 256 457/* never exceed limit */ 458#define BTREE_ITER_MAX (1U << 10) 459 460struct btree_trans_commit_hook; 461typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *); 462 463struct btree_trans_commit_hook { 464 btree_trans_commit_hook_fn *fn; 465 struct btree_trans_commit_hook *next; 466}; 467 468#define BTREE_TRANS_MEM_MAX (1U << 16) 469 470#define BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS 10000 471 472struct btree_trans_paths { 473 unsigned long nr_paths; 474 struct btree_path paths[]; 475}; 476 477struct trans_kmalloc_trace { 478 unsigned long ip; 479 size_t bytes; 480}; 481typedef DARRAY(struct trans_kmalloc_trace) darray_trans_kmalloc_trace; 482 483struct btree_trans_subbuf { 484 u16 base; 485 u16 u64s; 486 u16 size;; 487}; 488 489struct btree_trans { 490 struct bch_fs *c; 491 492 unsigned long *paths_allocated; 493 struct btree_path *paths; 494 btree_path_idx_t *sorted; 495 struct btree_insert_entry *updates; 496 497 void *mem; 498 unsigned mem_top; 499 unsigned mem_bytes; 500 unsigned realloc_bytes_required; 501#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE 502 darray_trans_kmalloc_trace trans_kmalloc_trace; 503#endif 504 505 btree_path_idx_t nr_sorted; 506 btree_path_idx_t nr_paths; 507 btree_path_idx_t nr_paths_max; 508 btree_path_idx_t nr_updates; 509 u8 fn_idx; 510 u8 lock_must_abort; 511 bool lock_may_not_fail:1; 512 bool srcu_held:1; 513 bool locked:1; 514 bool pf_memalloc_nofs:1; 515 bool write_locked:1; 516 bool used_mempool:1; 517 bool in_traverse_all:1; 518 bool paths_sorted:1; 519 bool memory_allocation_failure:1; 520 bool journal_transaction_names:1; 521 bool journal_replay_not_finished:1; 522 bool notrace_relock_fail:1; 523 enum bch_errcode restarted:16; 524 u32 restart_count; 525#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS 526 u32 restart_count_this_trans; 527#endif 528 529 u64 last_begin_time; 530 unsigned long last_begin_ip; 531 unsigned long last_restarted_ip; 532#ifdef CONFIG_BCACHEFS_DEBUG 533 bch_stacktrace last_restarted_trace; 534#endif 535 unsigned long last_unlock_ip; 536 unsigned long srcu_lock_time; 537 538 const char *fn; 539 struct btree_bkey_cached_common *locking; 540 struct six_lock_waiter locking_wait; 541 int srcu_idx; 542 543 /* update path: */ 544 struct btree_trans_subbuf journal_entries; 545 struct btree_trans_subbuf accounting; 546 547 struct btree_trans_commit_hook *hooks; 548 struct journal_entry_pin *journal_pin; 549 550 struct journal_res journal_res; 551 u64 *journal_seq; 552 struct disk_reservation *disk_res; 553 554 struct bch_fs_usage_base fs_usage_delta; 555 556 unsigned journal_u64s; 557 unsigned extra_disk_res; /* XXX kill */ 558 559 __BKEY_PADDED(btree_path_down, BKEY_BTREE_PTR_VAL_U64s_MAX); 560 561#ifdef CONFIG_DEBUG_LOCK_ALLOC 562 struct lockdep_map dep_map; 563#endif 564 /* Entries before this are zeroed out on every bch2_trans_get() call */ 565 566 struct list_head list; 567 struct closure ref; 568 569 unsigned long _paths_allocated[BITS_TO_LONGS(BTREE_ITER_INITIAL)]; 570 struct btree_trans_paths trans_paths; 571 struct btree_path _paths[BTREE_ITER_INITIAL]; 572 btree_path_idx_t _sorted[BTREE_ITER_INITIAL + 4]; 573 struct btree_insert_entry _updates[BTREE_ITER_INITIAL]; 574}; 575 576static inline struct btree_path *btree_iter_path(struct btree_trans *trans, struct btree_iter *iter) 577{ 578 return trans->paths + iter->path; 579} 580 581static inline struct btree_path *btree_iter_key_cache_path(struct btree_trans *trans, struct btree_iter *iter) 582{ 583 return iter->key_cache_path 584 ? trans->paths + iter->key_cache_path 585 : NULL; 586} 587 588#define BCH_BTREE_WRITE_TYPES() \ 589 x(initial, 0) \ 590 x(init_next_bset, 1) \ 591 x(cache_reclaim, 2) \ 592 x(journal_reclaim, 3) \ 593 x(interior, 4) 594 595enum btree_write_type { 596#define x(t, n) BTREE_WRITE_##t, 597 BCH_BTREE_WRITE_TYPES() 598#undef x 599 BTREE_WRITE_TYPE_NR, 600}; 601 602#define BTREE_WRITE_TYPE_MASK (roundup_pow_of_two(BTREE_WRITE_TYPE_NR) - 1) 603#define BTREE_WRITE_TYPE_BITS ilog2(roundup_pow_of_two(BTREE_WRITE_TYPE_NR)) 604 605#define BTREE_FLAGS() \ 606 x(read_in_flight) \ 607 x(read_error) \ 608 x(dirty) \ 609 x(need_write) \ 610 x(write_blocked) \ 611 x(will_make_reachable) \ 612 x(noevict) \ 613 x(write_idx) \ 614 x(accessed) \ 615 x(write_in_flight) \ 616 x(write_in_flight_inner) \ 617 x(just_written) \ 618 x(dying) \ 619 x(fake) \ 620 x(need_rewrite) \ 621 x(need_rewrite_error) \ 622 x(need_rewrite_degraded) \ 623 x(need_rewrite_ptr_written_zero) \ 624 x(never_write) \ 625 x(pinned) 626 627enum btree_flags { 628 /* First bits for btree node write type */ 629 BTREE_NODE_FLAGS_START = BTREE_WRITE_TYPE_BITS - 1, 630#define x(flag) BTREE_NODE_##flag, 631 BTREE_FLAGS() 632#undef x 633}; 634 635#define x(flag) \ 636static inline bool btree_node_ ## flag(struct btree *b) \ 637{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \ 638 \ 639static inline void set_btree_node_ ## flag(struct btree *b) \ 640{ set_bit(BTREE_NODE_ ## flag, &b->flags); } \ 641 \ 642static inline void clear_btree_node_ ## flag(struct btree *b) \ 643{ clear_bit(BTREE_NODE_ ## flag, &b->flags); } 644 645BTREE_FLAGS() 646#undef x 647 648#define BTREE_NODE_REWRITE_REASON() \ 649 x(none) \ 650 x(unknown) \ 651 x(error) \ 652 x(degraded) \ 653 x(ptr_written_zero) 654 655enum btree_node_rewrite_reason { 656#define x(n) BTREE_NODE_REWRITE_##n, 657 BTREE_NODE_REWRITE_REASON() 658#undef x 659}; 660 661static inline enum btree_node_rewrite_reason btree_node_rewrite_reason(struct btree *b) 662{ 663 if (btree_node_need_rewrite_ptr_written_zero(b)) 664 return BTREE_NODE_REWRITE_ptr_written_zero; 665 if (btree_node_need_rewrite_degraded(b)) 666 return BTREE_NODE_REWRITE_degraded; 667 if (btree_node_need_rewrite_error(b)) 668 return BTREE_NODE_REWRITE_error; 669 if (btree_node_need_rewrite(b)) 670 return BTREE_NODE_REWRITE_unknown; 671 return BTREE_NODE_REWRITE_none; 672} 673 674static inline struct btree_write *btree_current_write(struct btree *b) 675{ 676 return b->writes + btree_node_write_idx(b); 677} 678 679static inline struct btree_write *btree_prev_write(struct btree *b) 680{ 681 return b->writes + (btree_node_write_idx(b) ^ 1); 682} 683 684static inline struct bset_tree *bset_tree_last(struct btree *b) 685{ 686 EBUG_ON(!b->nsets); 687 return b->set + b->nsets - 1; 688} 689 690static inline void * 691__btree_node_offset_to_ptr(const struct btree *b, u16 offset) 692{ 693 return (void *) ((u64 *) b->data + offset); 694} 695 696static inline u16 697__btree_node_ptr_to_offset(const struct btree *b, const void *p) 698{ 699 u16 ret = (u64 *) p - (u64 *) b->data; 700 701 EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p); 702 return ret; 703} 704 705static inline struct bset *bset(const struct btree *b, 706 const struct bset_tree *t) 707{ 708 return __btree_node_offset_to_ptr(b, t->data_offset); 709} 710 711static inline void set_btree_bset_end(struct btree *b, struct bset_tree *t) 712{ 713 t->end_offset = 714 __btree_node_ptr_to_offset(b, vstruct_last(bset(b, t))); 715} 716 717static inline void set_btree_bset(struct btree *b, struct bset_tree *t, 718 const struct bset *i) 719{ 720 t->data_offset = __btree_node_ptr_to_offset(b, i); 721 set_btree_bset_end(b, t); 722} 723 724static inline struct bset *btree_bset_first(struct btree *b) 725{ 726 return bset(b, b->set); 727} 728 729static inline struct bset *btree_bset_last(struct btree *b) 730{ 731 return bset(b, bset_tree_last(b)); 732} 733 734static inline u16 735__btree_node_key_to_offset(const struct btree *b, const struct bkey_packed *k) 736{ 737 return __btree_node_ptr_to_offset(b, k); 738} 739 740static inline struct bkey_packed * 741__btree_node_offset_to_key(const struct btree *b, u16 k) 742{ 743 return __btree_node_offset_to_ptr(b, k); 744} 745 746static inline unsigned btree_bkey_first_offset(const struct bset_tree *t) 747{ 748 return t->data_offset + offsetof(struct bset, _data) / sizeof(u64); 749} 750 751#define btree_bkey_first(_b, _t) \ 752({ \ 753 EBUG_ON(bset(_b, _t)->start != \ 754 __btree_node_offset_to_key(_b, btree_bkey_first_offset(_t)));\ 755 \ 756 bset(_b, _t)->start; \ 757}) 758 759#define btree_bkey_last(_b, _t) \ 760({ \ 761 EBUG_ON(__btree_node_offset_to_key(_b, (_t)->end_offset) != \ 762 vstruct_last(bset(_b, _t))); \ 763 \ 764 __btree_node_offset_to_key(_b, (_t)->end_offset); \ 765}) 766 767static inline unsigned bset_u64s(struct bset_tree *t) 768{ 769 return t->end_offset - t->data_offset - 770 sizeof(struct bset) / sizeof(u64); 771} 772 773static inline unsigned bset_dead_u64s(struct btree *b, struct bset_tree *t) 774{ 775 return bset_u64s(t) - b->nr.bset_u64s[t - b->set]; 776} 777 778static inline unsigned bset_byte_offset(struct btree *b, void *i) 779{ 780 return i - (void *) b->data; 781} 782 783enum btree_node_type { 784 BKEY_TYPE_btree, 785#define x(kwd, val, ...) BKEY_TYPE_##kwd = val + 1, 786 BCH_BTREE_IDS() 787#undef x 788 BKEY_TYPE_NR 789}; 790 791/* Type of a key in btree @id at level @level: */ 792static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id) 793{ 794 return level ? BKEY_TYPE_btree : (unsigned) id + 1; 795} 796 797/* Type of keys @b contains: */ 798static inline enum btree_node_type btree_node_type(struct btree *b) 799{ 800 return __btree_node_type(b->c.level, b->c.btree_id); 801} 802 803const char *bch2_btree_node_type_str(enum btree_node_type); 804 805#define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \ 806 (BIT_ULL(BKEY_TYPE_extents)| \ 807 BIT_ULL(BKEY_TYPE_alloc)| \ 808 BIT_ULL(BKEY_TYPE_inodes)| \ 809 BIT_ULL(BKEY_TYPE_stripes)| \ 810 BIT_ULL(BKEY_TYPE_reflink)| \ 811 BIT_ULL(BKEY_TYPE_subvolumes)| \ 812 BIT_ULL(BKEY_TYPE_btree)) 813 814#define BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS \ 815 (BIT_ULL(BKEY_TYPE_alloc)| \ 816 BIT_ULL(BKEY_TYPE_inodes)| \ 817 BIT_ULL(BKEY_TYPE_stripes)| \ 818 BIT_ULL(BKEY_TYPE_snapshots)) 819 820#define BTREE_NODE_TYPE_HAS_TRIGGERS \ 821 (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \ 822 BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS) 823 824static inline bool btree_node_type_has_trans_triggers(enum btree_node_type type) 825{ 826 return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS; 827} 828 829static inline bool btree_node_type_has_atomic_triggers(enum btree_node_type type) 830{ 831 return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS; 832} 833 834static inline bool btree_node_type_has_triggers(enum btree_node_type type) 835{ 836 return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_TRIGGERS; 837} 838 839static inline bool btree_id_is_extents(enum btree_id btree) 840{ 841 const u64 mask = 0 842#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_extents)) << nr) 843 BCH_BTREE_IDS() 844#undef x 845 ; 846 847 return BIT_ULL(btree) & mask; 848} 849 850static inline bool btree_node_type_is_extents(enum btree_node_type type) 851{ 852 return type != BKEY_TYPE_btree && btree_id_is_extents(type - 1); 853} 854 855static inline bool btree_type_has_snapshots(enum btree_id btree) 856{ 857 const u64 mask = 0 858#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_snapshots)) << nr) 859 BCH_BTREE_IDS() 860#undef x 861 ; 862 863 return BIT_ULL(btree) & mask; 864} 865 866static inline bool btree_type_has_snapshot_field(enum btree_id btree) 867{ 868 const u64 mask = 0 869#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_IS_snapshot_field|BTREE_IS_snapshots))) << nr) 870 BCH_BTREE_IDS() 871#undef x 872 ; 873 874 return BIT_ULL(btree) & mask; 875} 876 877static inline bool btree_type_has_ptrs(enum btree_id btree) 878{ 879 const u64 mask = 0 880#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_data)) << nr) 881 BCH_BTREE_IDS() 882#undef x 883 ; 884 885 return BIT_ULL(btree) & mask; 886} 887 888static inline bool btree_type_uses_write_buffer(enum btree_id btree) 889{ 890 const u64 mask = 0 891#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_write_buffer)) << nr) 892 BCH_BTREE_IDS() 893#undef x 894 ; 895 896 return BIT_ULL(btree) & mask; 897} 898 899static inline u8 btree_trigger_order(enum btree_id btree) 900{ 901 switch (btree) { 902 case BTREE_ID_alloc: 903 return U8_MAX; 904 case BTREE_ID_stripes: 905 return U8_MAX - 1; 906 default: 907 return btree; 908 } 909} 910 911struct btree_root { 912 struct btree *b; 913 914 /* On disk root - see async splits: */ 915 __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX); 916 u8 level; 917 u8 alive; 918 s16 error; 919}; 920 921enum btree_gc_coalesce_fail_reason { 922 BTREE_GC_COALESCE_FAIL_RESERVE_GET, 923 BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC, 924 BTREE_GC_COALESCE_FAIL_FORMAT_FITS, 925}; 926 927enum btree_node_sibling { 928 btree_prev_sib, 929 btree_next_sib, 930}; 931 932struct get_locks_fail { 933 unsigned l; 934 struct btree *b; 935}; 936 937#endif /* _BCACHEFS_BTREE_TYPES_H */