Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xarray: Change definition of sibling entries

Instead of storing a pointer to the slot containing the canonical entry,
store the offset of the slot. Produces slightly more efficient code
(~300 bytes) and simplifies the implementation.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>

+121 -50
+1 -4
include/linux/radix-tree.h
··· 59 59 60 60 #define RADIX_TREE_MAX_TAGS 3 61 61 62 - #ifndef RADIX_TREE_MAP_SHIFT 63 - #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) 64 - #endif 65 - 62 + #define RADIX_TREE_MAP_SHIFT XA_CHUNK_SHIFT 66 63 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) 67 64 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) 68 65
+92
include/linux/xarray.h
··· 22 22 * x1: Value entry or tagged pointer 23 23 * 24 24 * Attempting to store internal entries in the XArray is a bug. 25 + * 26 + * Most internal entries are pointers to the next node in the tree. 27 + * The following internal entries have a special meaning: 28 + * 29 + * 0-62: Sibling entries 30 + * 256: Retry entry 25 31 */ 26 32 27 33 #define BITS_PER_XA_VALUE (BITS_PER_LONG - 1) ··· 117 111 return (unsigned long)entry & 3UL; 118 112 } 119 113 114 + /* 115 + * xa_mk_internal() - Create an internal entry. 116 + * @v: Value to turn into an internal entry. 117 + * 118 + * Context: Any context. 119 + * Return: An XArray internal entry corresponding to this value. 120 + */ 121 + static inline void *xa_mk_internal(unsigned long v) 122 + { 123 + return (void *)((v << 2) | 2); 124 + } 125 + 126 + /* 127 + * xa_to_internal() - Extract the value from an internal entry. 128 + * @entry: XArray entry. 129 + * 130 + * Context: Any context. 131 + * Return: The value which was stored in the internal entry. 132 + */ 133 + static inline unsigned long xa_to_internal(const void *entry) 134 + { 135 + return (unsigned long)entry >> 2; 136 + } 137 + 138 + /* 139 + * xa_is_internal() - Is the entry an internal entry? 140 + * @entry: XArray entry. 141 + * 142 + * Context: Any context. 143 + * Return: %true if the entry is an internal entry. 144 + */ 145 + static inline bool xa_is_internal(const void *entry) 146 + { 147 + return ((unsigned long)entry & 3) == 2; 148 + } 149 + 120 150 #define xa_trylock(xa) spin_trylock(&(xa)->xa_lock) 121 151 #define xa_lock(xa) spin_lock(&(xa)->xa_lock) 122 152 #define xa_unlock(xa) spin_unlock(&(xa)->xa_lock) ··· 164 122 spin_lock_irqsave(&(xa)->xa_lock, flags) 165 123 #define xa_unlock_irqrestore(xa, flags) \ 166 124 spin_unlock_irqrestore(&(xa)->xa_lock, flags) 125 + 126 + /* Everything below here is the Advanced API. Proceed with caution. */ 127 + 128 + /* 129 + * The xarray is constructed out of a set of 'chunks' of pointers. Choosing 130 + * the best chunk size requires some tradeoffs. A power of two recommends 131 + * itself so that we can walk the tree based purely on shifts and masks. 132 + * Generally, the larger the better; as the number of slots per level of the 133 + * tree increases, the less tall the tree needs to be. But that needs to be 134 + * balanced against the memory consumption of each node. On a 64-bit system, 135 + * xa_node is currently 576 bytes, and we get 7 of them per 4kB page. If we 136 + * doubled the number of slots per node, we'd get only 3 nodes per 4kB page. 137 + */ 138 + #ifndef XA_CHUNK_SHIFT 139 + #define XA_CHUNK_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) 140 + #endif 141 + #define XA_CHUNK_SIZE (1UL << XA_CHUNK_SHIFT) 142 + #define XA_CHUNK_MASK (XA_CHUNK_SIZE - 1) 143 + 144 + /* Private */ 145 + static inline bool xa_is_node(const void *entry) 146 + { 147 + return xa_is_internal(entry) && (unsigned long)entry > 4096; 148 + } 149 + 150 + /* Private */ 151 + static inline void *xa_mk_sibling(unsigned int offset) 152 + { 153 + return xa_mk_internal(offset); 154 + } 155 + 156 + /* Private */ 157 + static inline unsigned long xa_to_sibling(const void *entry) 158 + { 159 + return xa_to_internal(entry); 160 + } 161 + 162 + /** 163 + * xa_is_sibling() - Is the entry a sibling entry? 164 + * @entry: Entry retrieved from the XArray 165 + * 166 + * Return: %true if the entry is a sibling entry. 167 + */ 168 + static inline bool xa_is_sibling(const void *entry) 169 + { 170 + return IS_ENABLED(CONFIG_XARRAY_MULTI) && xa_is_internal(entry) && 171 + (entry < xa_mk_sibling(XA_CHUNK_SIZE - 1)); 172 + } 173 + 174 + #define XA_RETRY_ENTRY xa_mk_internal(256) 167 175 168 176 #endif /* _LINUX_XARRAY_H */
+7
lib/Kconfig
··· 399 399 400 400 for more information. 401 401 402 + config XARRAY_MULTI 403 + bool 404 + help 405 + Support entries which occupy multiple consecutive indices in the 406 + XArray. 407 + 402 408 config RADIX_TREE_MULTIORDER 403 409 bool 410 + select XARRAY_MULTI 404 411 405 412 config ASSOCIATIVE_ARRAY 406 413 bool
+19 -45
lib/radix-tree.c
··· 38 38 #include <linux/rcupdate.h> 39 39 #include <linux/slab.h> 40 40 #include <linux/string.h> 41 + #include <linux/xarray.h> 41 42 42 43 43 44 /* Number of nodes in fully populated tree of given height */ ··· 99 98 return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); 100 99 } 101 100 102 - #define RADIX_TREE_RETRY node_to_entry(NULL) 103 - 104 - #ifdef CONFIG_RADIX_TREE_MULTIORDER 105 - /* Sibling slots point directly to another slot in the same node */ 106 - static inline 107 - bool is_sibling_entry(const struct radix_tree_node *parent, void *node) 108 - { 109 - void __rcu **ptr = node; 110 - return (parent->slots <= ptr) && 111 - (ptr < parent->slots + RADIX_TREE_MAP_SIZE); 112 - } 113 - #else 114 - static inline 115 - bool is_sibling_entry(const struct radix_tree_node *parent, void *node) 116 - { 117 - return false; 118 - } 119 - #endif 101 + #define RADIX_TREE_RETRY XA_RETRY_ENTRY 120 102 121 103 static inline unsigned long 122 104 get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot) ··· 113 129 unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; 114 130 void __rcu **entry = rcu_dereference_raw(parent->slots[offset]); 115 131 116 - #ifdef CONFIG_RADIX_TREE_MULTIORDER 117 - if (radix_tree_is_internal_node(entry)) { 118 - if (is_sibling_entry(parent, entry)) { 119 - void __rcu **sibentry; 120 - sibentry = (void __rcu **) entry_to_node(entry); 121 - offset = get_slot_offset(parent, sibentry); 122 - entry = rcu_dereference_raw(*sibentry); 123 - } 132 + if (xa_is_sibling(entry)) { 133 + offset = xa_to_sibling(entry); 134 + entry = rcu_dereference_raw(parent->slots[offset]); 124 135 } 125 - #endif 126 136 127 137 *nodep = (void *)entry; 128 138 return offset; ··· 278 300 } else if (!radix_tree_is_internal_node(entry)) { 279 301 pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n", 280 302 entry, i, first, last, node); 281 - } else if (is_sibling_entry(node, entry)) { 303 + } else if (xa_is_sibling(entry)) { 282 304 pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n", 283 305 entry, i, first, last, node, 284 - *(void **)entry_to_node(entry)); 306 + node->slots[xa_to_sibling(entry)]); 285 307 } else { 286 308 dump_node(entry_to_node(entry), first); 287 309 } ··· 859 881 860 882 for (;;) { 861 883 void *entry = rcu_dereference_raw(child->slots[offset]); 862 - if (radix_tree_is_internal_node(entry) && child->shift && 863 - !is_sibling_entry(child, entry)) { 884 + if (xa_is_node(entry) && child->shift) { 864 885 child = entry_to_node(entry); 865 886 offset = 0; 866 887 continue; ··· 881 904 static inline int insert_entries(struct radix_tree_node *node, 882 905 void __rcu **slot, void *item, unsigned order, bool replace) 883 906 { 884 - struct radix_tree_node *child; 907 + void *sibling; 885 908 unsigned i, n, tag, offset, tags = 0; 886 909 887 910 if (node) { ··· 899 922 offset = offset & ~(n - 1); 900 923 slot = &node->slots[offset]; 901 924 } 902 - child = node_to_entry(slot); 925 + sibling = xa_mk_sibling(offset); 903 926 904 927 for (i = 0; i < n; i++) { 905 928 if (slot[i]) { ··· 916 939 for (i = 0; i < n; i++) { 917 940 struct radix_tree_node *old = rcu_dereference_raw(slot[i]); 918 941 if (i) { 919 - rcu_assign_pointer(slot[i], child); 942 + rcu_assign_pointer(slot[i], sibling); 920 943 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 921 944 if (tags & (1 << tag)) 922 945 tag_clear(node, tag, offset + i); ··· 926 949 if (tags & (1 << tag)) 927 950 tag_set(node, tag, offset); 928 951 } 929 - if (radix_tree_is_internal_node(old) && 930 - !is_sibling_entry(node, old) && 931 - (old != RADIX_TREE_RETRY)) 952 + if (xa_is_node(old)) 932 953 radix_tree_free_nodes(old); 933 954 if (xa_is_value(old)) 934 955 node->exceptional--; ··· 1087 1112 void __rcu **slot, int count, int exceptional) 1088 1113 { 1089 1114 #ifdef CONFIG_RADIX_TREE_MULTIORDER 1090 - void *ptr = node_to_entry(slot); 1091 - unsigned offset = get_slot_offset(node, slot) + 1; 1115 + unsigned offset = get_slot_offset(node, slot); 1116 + void *ptr = xa_mk_sibling(offset); 1092 1117 1093 - while (offset < RADIX_TREE_MAP_SIZE) { 1118 + while (++offset < RADIX_TREE_MAP_SIZE) { 1094 1119 if (rcu_dereference_raw(node->slots[offset]) != ptr) 1095 1120 break; 1096 1121 if (count < 0) { ··· 1098 1123 node->count--; 1099 1124 } 1100 1125 node->exceptional += exceptional; 1101 - offset++; 1102 1126 } 1103 1127 #endif 1104 1128 } ··· 1293 1319 tags |= 1 << tag; 1294 1320 1295 1321 for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) { 1296 - if (!is_sibling_entry(parent, 1297 - rcu_dereference_raw(parent->slots[end]))) 1322 + if (!xa_is_sibling(rcu_dereference_raw(parent->slots[end]))) 1298 1323 break; 1299 1324 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1300 1325 if (tags & (1 << tag)) ··· 1591 1618 { 1592 1619 while (iter->index < iter->next_index) { 1593 1620 *nodep = rcu_dereference_raw(*slot); 1594 - if (*nodep && !is_sibling_entry(iter->node, *nodep)) 1621 + if (*nodep && !xa_is_sibling(*nodep)) 1595 1622 return slot; 1596 1623 slot++; 1597 1624 iter->index = __radix_tree_iter_add(iter, 1); ··· 1742 1769 while (++offset < RADIX_TREE_MAP_SIZE) { 1743 1770 void *slot = rcu_dereference_raw( 1744 1771 node->slots[offset]); 1745 - if (is_sibling_entry(node, slot)) 1772 + if (xa_is_sibling(slot)) 1746 1773 continue; 1747 1774 if (slot) 1748 1775 break; ··· 2256 2283 2257 2284 BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); 2258 2285 BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK); 2286 + BUILD_BUG_ON(XA_CHUNK_SIZE > 255); 2259 2287 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 2260 2288 sizeof(struct radix_tree_node), 0, 2261 2289 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
+1 -1
tools/testing/radix-tree/Makefile
··· 46 46 47 47 generated/map-shift.h: 48 48 @if ! grep -qws $(SHIFT) generated/map-shift.h; then \ 49 - echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \ 49 + echo "#define XA_CHUNK_SHIFT $(SHIFT)" > \ 50 50 generated/map-shift.h; \ 51 51 fi
+1
tools/testing/radix-tree/generated/autoconf.h
··· 1 1 #define CONFIG_RADIX_TREE_MULTIORDER 1 2 + #define CONFIG_XARRAY_MULTI 1