Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

maple_tree: Drop bulk insert support

Bulk insert mode was added to facilitate forking faster, but forking now
uses __mt_dup() to duplicate the tree.

The addition of sheaves has made the bulk allocations difficult to
maintain - since the expected entries would preallocate into the maple
state. A big part of the maple state node allocation was the ability to
push nodes back onto the state for later use, which was essential to the
bulk insert algorithm.

Remove mas_expected_entries() and mas_destroy_rebalance() functions as
well as the MA_STATE_BULK and MA_STATE_REBALANCE maple state flags since
there are no users anymore. Drop the associated testing as well.

Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

authored by

Liam R. Howlett and committed by
Vlastimil Babka
e3852a12 da577f1f

+4 -439
+4 -266
lib/maple_tree.c
··· 83 83 84 84 /* 85 85 * Maple state flags 86 - * * MA_STATE_BULK - Bulk insert mode 87 - * * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert 88 86 * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation 89 87 */ 90 - #define MA_STATE_BULK 1 91 - #define MA_STATE_REBALANCE 2 92 - #define MA_STATE_PREALLOC 4 88 + #define MA_STATE_PREALLOC 1 93 89 94 90 #define ma_parent_ptr(x) ((struct maple_pnode *)(x)) 95 91 #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT) ··· 1028 1032 } 1029 1033 1030 1034 /* 1031 - * mte_set_gap() - Set a maple node gap. 1032 - * @mn: The encoded maple node 1033 - * @gap: The offset of the gap to set 1034 - * @val: The gap value 1035 - */ 1036 - static inline void mte_set_gap(const struct maple_enode *mn, 1037 - unsigned char gap, unsigned long val) 1038 - { 1039 - switch (mte_node_type(mn)) { 1040 - default: 1041 - break; 1042 - case maple_arange_64: 1043 - mte_to_node(mn)->ma64.gap[gap] = val; 1044 - break; 1045 - } 1046 - } 1047 - 1048 - /* 1049 1035 * mas_ascend() - Walk up a level of the tree. 1050 1036 * @mas: The maple state 1051 1037 * ··· 1856 1878 * end on a NULL entry, with the exception of the left-most leaf. The 1857 1879 * limitation means that the split of a node must be checked for this condition 1858 1880 * and be able to put more data in one direction or the other. 1859 - */ 1860 - if (unlikely((mas->mas_flags & MA_STATE_BULK))) { 1861 - *mid_split = 0; 1862 - split = b_end - mt_min_slots[bn->type]; 1863 - 1864 - if (!ma_is_leaf(bn->type)) 1865 - return split; 1866 - 1867 - mas->mas_flags |= MA_STATE_REBALANCE; 1868 - if (!bn->slot[split]) 1869 - split--; 1870 - return split; 1871 - } 1872 - 1873 - /* 1881 + * 1874 1882 * Although extremely rare, it is possible to enter what is known as the 3-way 1875 1883 * split scenario. The 3-way split comes about by means of a store of a range 1876 1884 * that overwrites the end and beginning of two full nodes. The result is a set ··· 2004 2040 } 2005 2041 2006 2042 /* 2007 - * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert. 2008 - * @mas: The maple state 2009 - * @end: The maple node end 2010 - * @mt: The maple node type 2011 - */ 2012 - static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end, 2013 - enum maple_type mt) 2014 - { 2015 - if (!(mas->mas_flags & MA_STATE_BULK)) 2016 - return; 2017 - 2018 - if (mte_is_root(mas->node)) 2019 - return; 2020 - 2021 - if (end > mt_min_slots[mt]) { 2022 - mas->mas_flags &= ~MA_STATE_REBALANCE; 2023 - return; 2024 - } 2025 - } 2026 - 2027 - /* 2028 2043 * mas_store_b_node() - Store an @entry into the b_node while also copying the 2029 2044 * data from a maple encoded node. 2030 2045 * @wr_mas: the maple write state ··· 2052 2109 /* Handle new range ending before old range ends */ 2053 2110 piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type); 2054 2111 if (piv > mas->last) { 2055 - if (piv == ULONG_MAX) 2056 - mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type); 2057 - 2058 2112 if (offset_end != slot) 2059 2113 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 2060 2114 offset_end); ··· 2952 3012 } 2953 3013 2954 3014 /* 2955 - * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple 2956 - * state. 2957 - * @mas: The maple state 2958 - * @end: The end of the left-most node. 2959 - * 2960 - * During a mass-insert event (such as forking), it may be necessary to 2961 - * rebalance the left-most node when it is not sufficient. 2962 - */ 2963 - static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end) 2964 - { 2965 - enum maple_type mt = mte_node_type(mas->node); 2966 - struct maple_node reuse, *newnode, *parent, *new_left, *left, *node; 2967 - struct maple_enode *eparent, *old_eparent; 2968 - unsigned char offset, tmp, split = mt_slots[mt] / 2; 2969 - void __rcu **l_slots, **slots; 2970 - unsigned long *l_pivs, *pivs, gap; 2971 - bool in_rcu = mt_in_rcu(mas->tree); 2972 - unsigned char new_height = mas_mt_height(mas); 2973 - 2974 - MA_STATE(l_mas, mas->tree, mas->index, mas->last); 2975 - 2976 - l_mas = *mas; 2977 - mas_prev_sibling(&l_mas); 2978 - 2979 - /* set up node. */ 2980 - if (in_rcu) { 2981 - newnode = mas_pop_node(mas); 2982 - } else { 2983 - newnode = &reuse; 2984 - } 2985 - 2986 - node = mas_mn(mas); 2987 - newnode->parent = node->parent; 2988 - slots = ma_slots(newnode, mt); 2989 - pivs = ma_pivots(newnode, mt); 2990 - left = mas_mn(&l_mas); 2991 - l_slots = ma_slots(left, mt); 2992 - l_pivs = ma_pivots(left, mt); 2993 - if (!l_slots[split]) 2994 - split++; 2995 - tmp = mas_data_end(&l_mas) - split; 2996 - 2997 - memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp); 2998 - memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp); 2999 - pivs[tmp] = l_mas.max; 3000 - memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end); 3001 - memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end); 3002 - 3003 - l_mas.max = l_pivs[split]; 3004 - mas->min = l_mas.max + 1; 3005 - old_eparent = mt_mk_node(mte_parent(l_mas.node), 3006 - mas_parent_type(&l_mas, l_mas.node)); 3007 - tmp += end; 3008 - if (!in_rcu) { 3009 - unsigned char max_p = mt_pivots[mt]; 3010 - unsigned char max_s = mt_slots[mt]; 3011 - 3012 - if (tmp < max_p) 3013 - memset(pivs + tmp, 0, 3014 - sizeof(unsigned long) * (max_p - tmp)); 3015 - 3016 - if (tmp < mt_slots[mt]) 3017 - memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp)); 3018 - 3019 - memcpy(node, newnode, sizeof(struct maple_node)); 3020 - ma_set_meta(node, mt, 0, tmp - 1); 3021 - mte_set_pivot(old_eparent, mte_parent_slot(l_mas.node), 3022 - l_pivs[split]); 3023 - 3024 - /* Remove data from l_pivs. */ 3025 - tmp = split + 1; 3026 - memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp)); 3027 - memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp)); 3028 - ma_set_meta(left, mt, 0, split); 3029 - eparent = old_eparent; 3030 - 3031 - goto done; 3032 - } 3033 - 3034 - /* RCU requires replacing both l_mas, mas, and parent. */ 3035 - mas->node = mt_mk_node(newnode, mt); 3036 - ma_set_meta(newnode, mt, 0, tmp); 3037 - 3038 - new_left = mas_pop_node(mas); 3039 - new_left->parent = left->parent; 3040 - mt = mte_node_type(l_mas.node); 3041 - slots = ma_slots(new_left, mt); 3042 - pivs = ma_pivots(new_left, mt); 3043 - memcpy(slots, l_slots, sizeof(void *) * split); 3044 - memcpy(pivs, l_pivs, sizeof(unsigned long) * split); 3045 - ma_set_meta(new_left, mt, 0, split); 3046 - l_mas.node = mt_mk_node(new_left, mt); 3047 - 3048 - /* replace parent. */ 3049 - offset = mte_parent_slot(mas->node); 3050 - mt = mas_parent_type(&l_mas, l_mas.node); 3051 - parent = mas_pop_node(mas); 3052 - slots = ma_slots(parent, mt); 3053 - pivs = ma_pivots(parent, mt); 3054 - memcpy(parent, mte_to_node(old_eparent), sizeof(struct maple_node)); 3055 - rcu_assign_pointer(slots[offset], mas->node); 3056 - rcu_assign_pointer(slots[offset - 1], l_mas.node); 3057 - pivs[offset - 1] = l_mas.max; 3058 - eparent = mt_mk_node(parent, mt); 3059 - done: 3060 - gap = mas_leaf_max_gap(mas); 3061 - mte_set_gap(eparent, mte_parent_slot(mas->node), gap); 3062 - gap = mas_leaf_max_gap(&l_mas); 3063 - mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap); 3064 - mas_ascend(mas); 3065 - 3066 - if (in_rcu) { 3067 - mas_replace_node(mas, old_eparent, new_height); 3068 - mas_adopt_children(mas, mas->node); 3069 - } 3070 - 3071 - mas_update_gap(mas); 3072 - } 3073 - 3074 - /* 3075 3015 * mas_split_final_node() - Split the final node in a subtree operation. 3076 3016 * @mast: the maple subtree state 3077 3017 * @mas: The maple state ··· 3657 3837 3658 3838 if (mas->last == wr_mas->end_piv) 3659 3839 offset_end++; /* don't copy this offset */ 3660 - else if (unlikely(wr_mas->r_max == ULONG_MAX)) 3661 - mas_bulk_rebalance(mas, mas->end, wr_mas->type); 3662 3840 3663 3841 /* set up node. */ 3664 3842 if (in_rcu) { ··· 4073 4255 new_end = mas_wr_new_end(wr_mas); 4074 4256 /* Potential spanning rebalance collapsing a node */ 4075 4257 if (new_end < mt_min_slots[wr_mas->type]) { 4076 - if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK)) 4258 + if (!mte_is_root(mas->node)) 4077 4259 return wr_rebalance; 4078 4260 return wr_node_store; 4079 4261 } ··· 5380 5562 struct maple_alloc *node; 5381 5563 unsigned long total; 5382 5564 5383 - /* 5384 - * When using mas_for_each() to insert an expected number of elements, 5385 - * it is possible that the number inserted is less than the expected 5386 - * number. To fix an invalid final node, a check is performed here to 5387 - * rebalance the previous node with the final node. 5388 - */ 5389 - if (mas->mas_flags & MA_STATE_REBALANCE) { 5390 - unsigned char end; 5391 - if (mas_is_err(mas)) 5392 - mas_reset(mas); 5393 - mas_start(mas); 5394 - mtree_range_walk(mas); 5395 - end = mas->end + 1; 5396 - if (end < mt_min_slot_count(mas->node) - 1) 5397 - mas_destroy_rebalance(mas, end); 5398 - 5399 - mas->mas_flags &= ~MA_STATE_REBALANCE; 5400 - } 5401 - mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC); 5565 + mas->mas_flags &= ~MA_STATE_PREALLOC; 5402 5566 5403 5567 total = mas_allocated(mas); 5404 5568 while (total) { ··· 5399 5599 mas->alloc = NULL; 5400 5600 } 5401 5601 EXPORT_SYMBOL_GPL(mas_destroy); 5402 - 5403 - /* 5404 - * mas_expected_entries() - Set the expected number of entries that will be inserted. 5405 - * @mas: The maple state 5406 - * @nr_entries: The number of expected entries. 5407 - * 5408 - * This will attempt to pre-allocate enough nodes to store the expected number 5409 - * of entries. The allocations will occur using the bulk allocator interface 5410 - * for speed. Please call mas_destroy() on the @mas after inserting the entries 5411 - * to ensure any unused nodes are freed. 5412 - * 5413 - * Return: 0 on success, -ENOMEM if memory could not be allocated. 5414 - */ 5415 - int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries) 5416 - { 5417 - int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2; 5418 - struct maple_enode *enode = mas->node; 5419 - int nr_nodes; 5420 - int ret; 5421 - 5422 - /* 5423 - * Sometimes it is necessary to duplicate a tree to a new tree, such as 5424 - * forking a process and duplicating the VMAs from one tree to a new 5425 - * tree. When such a situation arises, it is known that the new tree is 5426 - * not going to be used until the entire tree is populated. For 5427 - * performance reasons, it is best to use a bulk load with RCU disabled. 5428 - * This allows for optimistic splitting that favours the left and reuse 5429 - * of nodes during the operation. 5430 - */ 5431 - 5432 - /* Optimize splitting for bulk insert in-order */ 5433 - mas->mas_flags |= MA_STATE_BULK; 5434 - 5435 - /* 5436 - * Avoid overflow, assume a gap between each entry and a trailing null. 5437 - * If this is wrong, it just means allocation can happen during 5438 - * insertion of entries. 5439 - */ 5440 - nr_nodes = max(nr_entries, nr_entries * 2 + 1); 5441 - if (!mt_is_alloc(mas->tree)) 5442 - nonleaf_cap = MAPLE_RANGE64_SLOTS - 2; 5443 - 5444 - /* Leaves; reduce slots to keep space for expansion */ 5445 - nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2); 5446 - /* Internal nodes */ 5447 - nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap); 5448 - /* Add working room for split (2 nodes) + new parents */ 5449 - mas_node_count_gfp(mas, nr_nodes + 3, GFP_KERNEL); 5450 - 5451 - /* Detect if allocations run out */ 5452 - mas->mas_flags |= MA_STATE_PREALLOC; 5453 - 5454 - if (!mas_is_err(mas)) 5455 - return 0; 5456 - 5457 - ret = xa_err(mas->node); 5458 - mas->node = enode; 5459 - mas_destroy(mas); 5460 - return ret; 5461 - 5462 - } 5463 - EXPORT_SYMBOL_GPL(mas_expected_entries); 5464 5602 5465 5603 static void mas_may_activate(struct ma_state *mas) 5466 5604 {
-137
lib/test_maple_tree.c
··· 2746 2746 mtree_test_erase(mt, ULONG_MAX - 10); 2747 2747 } 2748 2748 2749 - /* duplicate the tree with a specific gap */ 2750 - static noinline void __init check_dup_gaps(struct maple_tree *mt, 2751 - unsigned long nr_entries, bool zero_start, 2752 - unsigned long gap) 2753 - { 2754 - unsigned long i = 0; 2755 - struct maple_tree newmt; 2756 - int ret; 2757 - void *tmp; 2758 - MA_STATE(mas, mt, 0, 0); 2759 - MA_STATE(newmas, &newmt, 0, 0); 2760 - struct rw_semaphore newmt_lock; 2761 - 2762 - init_rwsem(&newmt_lock); 2763 - mt_set_external_lock(&newmt, &newmt_lock); 2764 - 2765 - if (!zero_start) 2766 - i = 1; 2767 - 2768 - mt_zero_nr_tallocated(); 2769 - for (; i <= nr_entries; i++) 2770 - mtree_store_range(mt, i*10, (i+1)*10 - gap, 2771 - xa_mk_value(i), GFP_KERNEL); 2772 - 2773 - mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN); 2774 - mt_set_non_kernel(99999); 2775 - down_write(&newmt_lock); 2776 - ret = mas_expected_entries(&newmas, nr_entries); 2777 - mt_set_non_kernel(0); 2778 - MT_BUG_ON(mt, ret != 0); 2779 - 2780 - rcu_read_lock(); 2781 - mas_for_each(&mas, tmp, ULONG_MAX) { 2782 - newmas.index = mas.index; 2783 - newmas.last = mas.last; 2784 - mas_store(&newmas, tmp); 2785 - } 2786 - rcu_read_unlock(); 2787 - mas_destroy(&newmas); 2788 - 2789 - __mt_destroy(&newmt); 2790 - up_write(&newmt_lock); 2791 - } 2792 - 2793 - /* Duplicate many sizes of trees. Mainly to test expected entry values */ 2794 - static noinline void __init check_dup(struct maple_tree *mt) 2795 - { 2796 - int i; 2797 - int big_start = 100010; 2798 - 2799 - /* Check with a value at zero */ 2800 - for (i = 10; i < 1000; i++) { 2801 - mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); 2802 - check_dup_gaps(mt, i, true, 5); 2803 - mtree_destroy(mt); 2804 - rcu_barrier(); 2805 - } 2806 - 2807 - cond_resched(); 2808 - mt_cache_shrink(); 2809 - /* Check with a value at zero, no gap */ 2810 - for (i = 1000; i < 2000; i++) { 2811 - mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); 2812 - check_dup_gaps(mt, i, true, 0); 2813 - mtree_destroy(mt); 2814 - rcu_barrier(); 2815 - } 2816 - 2817 - cond_resched(); 2818 - mt_cache_shrink(); 2819 - /* Check with a value at zero and unreasonably large */ 2820 - for (i = big_start; i < big_start + 10; i++) { 2821 - mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); 2822 - check_dup_gaps(mt, i, true, 5); 2823 - mtree_destroy(mt); 2824 - rcu_barrier(); 2825 - } 2826 - 2827 - cond_resched(); 2828 - mt_cache_shrink(); 2829 - /* Small to medium size not starting at zero*/ 2830 - for (i = 200; i < 1000; i++) { 2831 - mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); 2832 - check_dup_gaps(mt, i, false, 5); 2833 - mtree_destroy(mt); 2834 - rcu_barrier(); 2835 - } 2836 - 2837 - cond_resched(); 2838 - mt_cache_shrink(); 2839 - /* Unreasonably large not starting at zero*/ 2840 - for (i = big_start; i < big_start + 10; i++) { 2841 - mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); 2842 - check_dup_gaps(mt, i, false, 5); 2843 - mtree_destroy(mt); 2844 - rcu_barrier(); 2845 - cond_resched(); 2846 - mt_cache_shrink(); 2847 - } 2848 - 2849 - /* Check non-allocation tree not starting at zero */ 2850 - for (i = 1500; i < 3000; i++) { 2851 - mt_init_flags(mt, 0); 2852 - check_dup_gaps(mt, i, false, 5); 2853 - mtree_destroy(mt); 2854 - rcu_barrier(); 2855 - cond_resched(); 2856 - if (i % 2 == 0) 2857 - mt_cache_shrink(); 2858 - } 2859 - 2860 - mt_cache_shrink(); 2861 - /* Check non-allocation tree starting at zero */ 2862 - for (i = 200; i < 1000; i++) { 2863 - mt_init_flags(mt, 0); 2864 - check_dup_gaps(mt, i, true, 5); 2865 - mtree_destroy(mt); 2866 - rcu_barrier(); 2867 - cond_resched(); 2868 - } 2869 - 2870 - mt_cache_shrink(); 2871 - /* Unreasonably large */ 2872 - for (i = big_start + 5; i < big_start + 10; i++) { 2873 - mt_init_flags(mt, 0); 2874 - check_dup_gaps(mt, i, true, 5); 2875 - mtree_destroy(mt); 2876 - rcu_barrier(); 2877 - mt_cache_shrink(); 2878 - cond_resched(); 2879 - } 2880 - } 2881 - 2882 2749 static noinline void __init check_bnode_min_spanning(struct maple_tree *mt) 2883 2750 { 2884 2751 int i = 50; ··· 3942 4075 3943 4076 mt_init_flags(&tree, 0); 3944 4077 check_fuzzer(&tree); 3945 - mtree_destroy(&tree); 3946 - 3947 - mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); 3948 - check_dup(&tree); 3949 4078 mtree_destroy(&tree); 3950 4079 3951 4080 mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
-36
tools/testing/radix-tree/maple.c
··· 35455 35455 MT_BUG_ON(mt, count != e); 35456 35456 mtree_destroy(mt); 35457 35457 35458 - mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); 35459 - mas_reset(&mas); 35460 - mt_zero_nr_tallocated(); 35461 - mt_set_non_kernel(200); 35462 - mas_expected_entries(&mas, max); 35463 - for (count = 0; count <= max; count++) { 35464 - mas.index = mas.last = count; 35465 - mas_store(&mas, xa_mk_value(count)); 35466 - MT_BUG_ON(mt, mas_is_err(&mas)); 35467 - } 35468 - mas_destroy(&mas); 35469 35458 rcu_barrier(); 35470 35459 /* 35471 35460 * pr_info(" ->seq test of 0-%lu %luK in %d active (%d total)\n", ··· 36443 36454 return 0; 36444 36455 } 36445 36456 36446 - /* 36447 - * test to check that bulk stores do not use wr_rebalance as the store 36448 - * type. 36449 - */ 36450 - static inline void check_bulk_rebalance(struct maple_tree *mt) 36451 - { 36452 - MA_STATE(mas, mt, ULONG_MAX, ULONG_MAX); 36453 - int max = 10; 36454 - 36455 - build_full_tree(mt, 0, 2); 36456 - 36457 - /* erase every entry in the tree */ 36458 - do { 36459 - /* set up bulk store mode */ 36460 - mas_expected_entries(&mas, max); 36461 - mas_erase(&mas); 36462 - MT_BUG_ON(mt, mas.store_type == wr_rebalance); 36463 - } while (mas_prev(&mas, 0) != NULL); 36464 - 36465 - mas_destroy(&mas); 36466 - } 36467 36457 36468 36458 void farmer_tests(void) 36469 36459 { ··· 36453 36485 36454 36486 mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | MT_FLAGS_USE_RCU); 36455 36487 check_vma_modification(&tree); 36456 - mtree_destroy(&tree); 36457 - 36458 - mt_init(&tree); 36459 - check_bulk_rebalance(&tree); 36460 36488 mtree_destroy(&tree); 36461 36489 36462 36490 tree.ma_root = xa_mk_value(0);