Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

radix-tree: add gang_lookup_slot, gang_lookup_slot_tag

Introduce gang_lookup_slot() and gang_lookup_slot_tag() functions, which
are used by lockless pagecache.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Nick Piggin and committed by
Linus Torvalds
47feff2c 30002ed2

+166 -24
+11 -1
include/linux/radix-tree.h
··· 99 99 * 100 100 * The notable exceptions to this rule are the following functions: 101 101 * radix_tree_lookup 102 + * radix_tree_lookup_slot 102 103 * radix_tree_tag_get 103 104 * radix_tree_gang_lookup 105 + * radix_tree_gang_lookup_slot 104 106 * radix_tree_gang_lookup_tag 107 + * radix_tree_gang_lookup_tag_slot 105 108 * radix_tree_tagged 106 109 * 107 - * The first 4 functions are able to be called locklessly, using RCU. The 110 + * The first 7 functions are able to be called locklessly, using RCU. The 108 111 * caller must ensure calls to these functions are made within rcu_read_lock() 109 112 * regions. Other readers (lock-free or otherwise) and modifications may be 110 113 * running concurrently. ··· 162 159 unsigned int 163 160 radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 164 161 unsigned long first_index, unsigned int max_items); 162 + unsigned int 163 + radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, 164 + unsigned long first_index, unsigned int max_items); 165 165 unsigned long radix_tree_next_hole(struct radix_tree_root *root, 166 166 unsigned long index, unsigned long max_scan); 167 167 int radix_tree_preload(gfp_t gfp_mask); ··· 177 171 unsigned long index, unsigned int tag); 178 172 unsigned int 179 173 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, 174 + unsigned long first_index, unsigned int max_items, 175 + unsigned int tag); 176 + unsigned int 177 + radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, 180 178 unsigned long first_index, unsigned int max_items, 181 179 unsigned int tag); 182 180 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
+155 -23
lib/radix-tree.c
··· 359 359 * Returns: the slot corresponding to the position @index in the 360 360 * radix tree @root. This is useful for update-if-exists operations. 361 361 * 362 - * This function cannot be called under rcu_read_lock, it must be 363 - * excluded from writers, as must the returned slot for subsequent 364 - * use by radix_tree_deref_slot() and radix_tree_replace slot. 365 - * Caller must hold tree write locked across slot lookup and 366 - * replace. 362 + * This function can be called under rcu_read_lock iff the slot is not 363 + * modified by radix_tree_replace_slot, otherwise it must be called 364 + * exclusive from other writers. Any dereference of the slot must be done 365 + * using radix_tree_deref_slot. 367 366 */ 368 367 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) 369 368 { 370 369 unsigned int height, shift; 371 370 struct radix_tree_node *node, **slot; 372 371 373 - node = root->rnode; 372 + node = rcu_dereference(root->rnode); 374 373 if (node == NULL) 375 374 return NULL; 376 375 ··· 389 390 do { 390 391 slot = (struct radix_tree_node **) 391 392 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 392 - node = *slot; 393 + node = rcu_dereference(*slot); 393 394 if (node == NULL) 394 395 return NULL; 395 396 ··· 666 667 EXPORT_SYMBOL(radix_tree_next_hole); 667 668 668 669 static unsigned int 669 - __lookup(struct radix_tree_node *slot, void **results, unsigned long index, 670 + __lookup(struct radix_tree_node *slot, void ***results, unsigned long index, 670 671 unsigned int max_items, unsigned long *next_index) 671 672 { 672 673 unsigned int nr_found = 0; ··· 700 701 701 702 /* Bottom level: grab some items */ 702 703 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { 703 - struct radix_tree_node *node; 704 704 index++; 705 - node = slot->slots[i]; 706 - if (node) { 707 - results[nr_found++] = rcu_dereference(node); 705 + if (slot->slots[i]) { 706 + results[nr_found++] = &(slot->slots[i]); 708 707 if (nr_found == max_items) 709 708 goto out; 710 709 } ··· 756 759 757 760 ret = 0; 758 761 while (ret < max_items) { 759 - unsigned int nr_found; 762 + unsigned int nr_found, slots_found, i; 760 763 unsigned long next_index; /* Index of next search */ 761 764 762 765 if (cur_index > max_index) 763 766 break; 764 - nr_found = __lookup(node, results + ret, cur_index, 767 + slots_found = __lookup(node, (void ***)results + ret, cur_index, 765 768 max_items - ret, &next_index); 769 + nr_found = 0; 770 + for (i = 0; i < slots_found; i++) { 771 + struct radix_tree_node *slot; 772 + slot = *(((void ***)results)[ret + i]); 773 + if (!slot) 774 + continue; 775 + results[ret + nr_found] = rcu_dereference(slot); 776 + nr_found++; 777 + } 766 778 ret += nr_found; 767 779 if (next_index == 0) 768 780 break; ··· 782 776 } 783 777 EXPORT_SYMBOL(radix_tree_gang_lookup); 784 778 779 + /** 780 + * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree 781 + * @root: radix tree root 782 + * @results: where the results of the lookup are placed 783 + * @first_index: start the lookup from this key 784 + * @max_items: place up to this many items at *results 785 + * 786 + * Performs an index-ascending scan of the tree for present items. Places 787 + * their slots at *@results and returns the number of items which were 788 + * placed at *@results. 789 + * 790 + * The implementation is naive. 791 + * 792 + * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must 793 + * be dereferenced with radix_tree_deref_slot, and if using only RCU 794 + * protection, radix_tree_deref_slot may fail requiring a retry. 795 + */ 796 + unsigned int 797 + radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, 798 + unsigned long first_index, unsigned int max_items) 799 + { 800 + unsigned long max_index; 801 + struct radix_tree_node *node; 802 + unsigned long cur_index = first_index; 803 + unsigned int ret; 804 + 805 + node = rcu_dereference(root->rnode); 806 + if (!node) 807 + return 0; 808 + 809 + if (!radix_tree_is_indirect_ptr(node)) { 810 + if (first_index > 0) 811 + return 0; 812 + results[0] = (void **)&root->rnode; 813 + return 1; 814 + } 815 + node = radix_tree_indirect_to_ptr(node); 816 + 817 + max_index = radix_tree_maxindex(node->height); 818 + 819 + ret = 0; 820 + while (ret < max_items) { 821 + unsigned int slots_found; 822 + unsigned long next_index; /* Index of next search */ 823 + 824 + if (cur_index > max_index) 825 + break; 826 + slots_found = __lookup(node, results + ret, cur_index, 827 + max_items - ret, &next_index); 828 + ret += slots_found; 829 + if (next_index == 0) 830 + break; 831 + cur_index = next_index; 832 + } 833 + 834 + return ret; 835 + } 836 + EXPORT_SYMBOL(radix_tree_gang_lookup_slot); 837 + 785 838 /* 786 839 * FIXME: the two tag_get()s here should use find_next_bit() instead of 787 840 * open-coding the search. 788 841 */ 789 842 static unsigned int 790 - __lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index, 843 + __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index, 791 844 unsigned int max_items, unsigned long *next_index, unsigned int tag) 792 845 { 793 846 unsigned int nr_found = 0; ··· 876 811 unsigned long j = index & RADIX_TREE_MAP_MASK; 877 812 878 813 for ( ; j < RADIX_TREE_MAP_SIZE; j++) { 879 - struct radix_tree_node *node; 880 814 index++; 881 815 if (!tag_get(slot, tag, j)) 882 816 continue; 883 - node = slot->slots[j]; 884 817 /* 885 818 * Even though the tag was found set, we need to 886 819 * recheck that we have a non-NULL node, because ··· 889 826 * lookup ->slots[x] without a lock (ie. can't 890 827 * rely on its value remaining the same). 891 828 */ 892 - if (node) { 893 - node = rcu_dereference(node); 894 - results[nr_found++] = node; 829 + if (slot->slots[j]) { 830 + results[nr_found++] = &(slot->slots[j]); 895 831 if (nr_found == max_items) 896 832 goto out; 897 833 } ··· 949 887 950 888 ret = 0; 951 889 while (ret < max_items) { 952 - unsigned int nr_found; 890 + unsigned int nr_found, slots_found, i; 953 891 unsigned long next_index; /* Index of next search */ 954 892 955 893 if (cur_index > max_index) 956 894 break; 957 - nr_found = __lookup_tag(node, results + ret, cur_index, 958 - max_items - ret, &next_index, tag); 895 + slots_found = __lookup_tag(node, (void ***)results + ret, 896 + cur_index, max_items - ret, &next_index, tag); 897 + nr_found = 0; 898 + for (i = 0; i < slots_found; i++) { 899 + struct radix_tree_node *slot; 900 + slot = *(((void ***)results)[ret + i]); 901 + if (!slot) 902 + continue; 903 + results[ret + nr_found] = rcu_dereference(slot); 904 + nr_found++; 905 + } 959 906 ret += nr_found; 960 907 if (next_index == 0) 961 908 break; ··· 974 903 return ret; 975 904 } 976 905 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 906 + 907 + /** 908 + * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 909 + * radix tree based on a tag 910 + * @root: radix tree root 911 + * @results: where the results of the lookup are placed 912 + * @first_index: start the lookup from this key 913 + * @max_items: place up to this many items at *results 914 + * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 915 + * 916 + * Performs an index-ascending scan of the tree for present items which 917 + * have the tag indexed by @tag set. Places the slots at *@results and 918 + * returns the number of slots which were placed at *@results. 919 + */ 920 + unsigned int 921 + radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, 922 + unsigned long first_index, unsigned int max_items, 923 + unsigned int tag) 924 + { 925 + struct radix_tree_node *node; 926 + unsigned long max_index; 927 + unsigned long cur_index = first_index; 928 + unsigned int ret; 929 + 930 + /* check the root's tag bit */ 931 + if (!root_tag_get(root, tag)) 932 + return 0; 933 + 934 + node = rcu_dereference(root->rnode); 935 + if (!node) 936 + return 0; 937 + 938 + if (!radix_tree_is_indirect_ptr(node)) { 939 + if (first_index > 0) 940 + return 0; 941 + results[0] = (void **)&root->rnode; 942 + return 1; 943 + } 944 + node = radix_tree_indirect_to_ptr(node); 945 + 946 + max_index = radix_tree_maxindex(node->height); 947 + 948 + ret = 0; 949 + while (ret < max_items) { 950 + unsigned int slots_found; 951 + unsigned long next_index; /* Index of next search */ 952 + 953 + if (cur_index > max_index) 954 + break; 955 + slots_found = __lookup_tag(node, results + ret, 956 + cur_index, max_items - ret, &next_index, tag); 957 + ret += slots_found; 958 + if (next_index == 0) 959 + break; 960 + cur_index = next_index; 961 + } 962 + 963 + return ret; 964 + } 965 + EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 966 + 977 967 978 968 /** 979 969 * radix_tree_shrink - shrink height of a radix tree to minimal