Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bcachefs: growable btree_paths

XXX: we're allocating memory with btree locks held - bad

We need to plumb through an error path so we can do
allocate_dropping_locks() - but we're merging this now because it fixes
a transaction path overflow caused by indirect extent fragmentation, and
the resize path is rare.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

+72 -22
+63 -14
fs/bcachefs/btree_iter.c
··· 1209 1209 bool intent) 1210 1210 { 1211 1211 btree_path_idx_t new = btree_path_alloc(trans, src); 1212 - 1213 1212 btree_path_copy(trans, trans->paths + new, trans->paths + src); 1214 1213 __btree_path_get(trans->paths + new, intent); 1215 1214 return new; ··· 1514 1515 static noinline void btree_path_overflow(struct btree_trans *trans) 1515 1516 { 1516 1517 bch2_dump_trans_paths_updates(trans); 1517 - panic("trans path overflow\n"); 1518 + bch_err(trans->c, "trans path overflow"); 1519 + } 1520 + 1521 + static noinline void btree_paths_realloc(struct btree_trans *trans) 1522 + { 1523 + unsigned nr = trans->nr_paths * 2; 1524 + 1525 + void *p = kzalloc(BITS_TO_LONGS(nr) * sizeof(unsigned long) + 1526 + sizeof(struct btree_trans_paths) + 1527 + nr * sizeof(struct btree_path) + 1528 + nr * sizeof(btree_path_idx_t) + 8 + 1529 + nr * sizeof(struct btree_insert_entry), GFP_KERNEL|__GFP_NOFAIL); 1530 + 1531 + unsigned long *paths_allocated = p; 1532 + memcpy(paths_allocated, trans->paths_allocated, BITS_TO_LONGS(trans->nr_paths) * sizeof(unsigned long)); 1533 + p += BITS_TO_LONGS(nr) * sizeof(unsigned long); 1534 + 1535 + p += sizeof(struct btree_trans_paths); 1536 + struct btree_path *paths = p; 1537 + *trans_paths_nr(paths) = nr; 1538 + memcpy(paths, trans->paths, trans->nr_paths * sizeof(struct btree_path)); 1539 + p += nr * sizeof(struct btree_path); 1540 + 1541 + btree_path_idx_t *sorted = p; 1542 + memcpy(sorted, trans->sorted, trans->nr_sorted * sizeof(btree_path_idx_t)); 1543 + p += nr * sizeof(btree_path_idx_t) + 8; 1544 + 1545 + struct btree_insert_entry *updates = p; 1546 + memcpy(updates, trans->updates, trans->nr_paths * sizeof(struct btree_insert_entry)); 1547 + 1548 + unsigned long *old = trans->paths_allocated; 1549 + 1550 + rcu_assign_pointer(trans->paths_allocated, paths_allocated); 1551 + rcu_assign_pointer(trans->paths, paths); 1552 + rcu_assign_pointer(trans->sorted, sorted); 1553 + rcu_assign_pointer(trans->updates, updates); 1554 + 1555 + trans->nr_paths = nr; 1556 + 1557 + if (old != trans->_paths_allocated) 1558 + kfree_rcu_mightsleep(old); 1518 1559 } 1519 1560 1520 1561 static inline btree_path_idx_t btree_path_alloc(struct btree_trans *trans, ··· 1562 1523 { 1563 1524 btree_path_idx_t idx = find_first_zero_bit(trans->paths_allocated, trans->nr_paths); 1564 1525 1565 - if (unlikely(idx == trans->nr_paths)) 1566 - btree_path_overflow(trans); 1526 + if (unlikely(idx == trans->nr_paths)) { 1527 + if (trans->nr_paths == BTREE_ITER_MAX) { 1528 + btree_path_overflow(trans); 1529 + return 0; 1530 + } 1531 + 1532 + btree_paths_realloc(trans); 1533 + } 1567 1534 1568 1535 /* 1569 1536 * Do this before marking the new path as allocated, since it won't be ··· 2652 2607 static inline void btree_path_list_remove(struct btree_trans *trans, 2653 2608 struct btree_path *path) 2654 2609 { 2655 - unsigned i; 2656 - 2657 2610 EBUG_ON(path->sorted_idx >= trans->nr_sorted); 2658 2611 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 2659 2612 trans->nr_sorted--; 2660 2613 memmove_u64s_down_small(trans->sorted + path->sorted_idx, 2661 2614 trans->sorted + path->sorted_idx + 1, 2662 - DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8)); 2615 + DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 2616 + sizeof(u64) / sizeof(btree_path_idx_t))); 2663 2617 #else 2664 2618 array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx); 2665 2619 #endif 2666 - for (i = path->sorted_idx; i < trans->nr_sorted; i++) 2620 + for (unsigned i = path->sorted_idx; i < trans->nr_sorted; i++) 2667 2621 trans->paths[trans->sorted[i]].sorted_idx = i; 2668 - 2669 - path->sorted_idx = U8_MAX; 2670 2622 } 2671 2623 2672 2624 static inline void btree_path_list_add(struct btree_trans *trans, ··· 2671 2629 btree_path_idx_t path_idx) 2672 2630 { 2673 2631 struct btree_path *path = trans->paths + path_idx; 2674 - unsigned i; 2675 2632 2676 2633 path->sorted_idx = pos ? trans->paths[pos].sorted_idx + 1 : trans->nr_sorted; 2677 2634 2678 2635 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 2679 2636 memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1, 2680 2637 trans->sorted + path->sorted_idx, 2681 - DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8)); 2638 + DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 2639 + sizeof(u64) / sizeof(btree_path_idx_t))); 2682 2640 trans->nr_sorted++; 2683 2641 trans->sorted[path->sorted_idx] = path_idx; 2684 2642 #else 2685 2643 array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path_idx); 2686 2644 #endif 2687 2645 2688 - for (i = path->sorted_idx; i < trans->nr_sorted; i++) 2646 + for (unsigned i = path->sorted_idx; i < trans->nr_sorted; i++) 2689 2647 trans->paths[trans->sorted[i]].sorted_idx = i; 2690 2648 2691 2649 btree_trans_verify_sorted_refs(trans); ··· 2981 2939 trans->paths = trans->_paths; 2982 2940 trans->updates = trans->_updates; 2983 2941 2984 - *trans_paths_nr(trans->paths) = BTREE_ITER_MAX; 2942 + *trans_paths_nr(trans->paths) = BTREE_ITER_INITIAL; 2985 2943 2986 2944 trans->paths_allocated[0] = 1; 2987 2945 ··· 3061 3019 3062 3020 if (unlikely(trans->journal_replay_not_finished)) 3063 3021 bch2_journal_keys_put(c); 3022 + 3023 + unsigned long *paths_allocated = trans->paths_allocated; 3024 + trans->paths_allocated = NULL; 3025 + trans->paths = NULL; 3026 + 3027 + if (paths_allocated != trans->_paths_allocated) 3028 + kfree_rcu_mightsleep(paths_allocated); 3064 3029 3065 3030 if (trans->mem_bytes == BTREE_TRANS_MEM_MAX) 3066 3031 mempool_free(trans->mem, &c->btree_trans_mem_pool);
+1 -1
fs/bcachefs/btree_iter.h
··· 642 642 643 643 static inline int btree_trans_too_many_iters(struct btree_trans *trans) 644 644 { 645 - if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_MAX - 8) 645 + if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_INITIAL - 8) 646 646 return __bch2_btree_trans_too_many_iters(trans); 647 647 648 648 return 0;
+7 -6
fs/bcachefs/btree_types.h
··· 358 358 unsigned long ip_allocated; 359 359 }; 360 360 361 - #define BTREE_ITER_MAX 64 361 + #define BTREE_ITER_INITIAL 64 362 + #define BTREE_ITER_MAX (1U << 10) 362 363 363 364 struct btree_trans_commit_hook; 364 365 typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *); ··· 383 382 384 383 unsigned long *paths_allocated; 385 384 struct btree_path *paths; 386 - u8 *sorted; 385 + btree_path_idx_t *sorted; 387 386 struct btree_insert_entry *updates; 388 387 389 388 void *mem; ··· 439 438 struct list_head list; 440 439 struct closure ref; 441 440 442 - unsigned long _paths_allocated[BITS_TO_LONGS(BTREE_ITER_MAX)]; 441 + unsigned long _paths_allocated[BITS_TO_LONGS(BTREE_ITER_INITIAL)]; 443 442 struct btree_trans_paths trans_paths; 444 - struct btree_path _paths[BTREE_ITER_MAX]; 445 - u8 _sorted[BTREE_ITER_MAX + 8]; 446 - struct btree_insert_entry _updates[BTREE_ITER_MAX]; 443 + struct btree_path _paths[BTREE_ITER_INITIAL]; 444 + btree_path_idx_t _sorted[BTREE_ITER_INITIAL + 4]; 445 + struct btree_insert_entry _updates[BTREE_ITER_INITIAL]; 447 446 }; 448 447 449 448 static inline struct btree_path *btree_iter_path(struct btree_trans *trans, struct btree_iter *iter)
+1 -1
fs/bcachefs/extent_update.c
··· 100 100 return ret2 ?: ret; 101 101 } 102 102 103 - #define EXTENT_ITERS_MAX (BTREE_ITER_MAX / 3) 103 + #define EXTENT_ITERS_MAX (BTREE_ITER_INITIAL / 3) 104 104 105 105 int bch2_extent_atomic_end(struct btree_trans *trans, 106 106 struct btree_iter *iter,