Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

XArray: Do not return sibling entries from xa_load()

It is possible for xa_load() to observe a sibling entry pointing to
another sibling entry. An example:

Thread A: Thread B:
xa_store_range(xa, entry, 188, 191, gfp);
xa_load(xa, 191);
entry = xa_entry(xa, node, 63);
[entry is a sibling of 188]
xa_store_range(xa, entry, 184, 191, gfp);
if (xa_is_sibling(entry))
offset = xa_to_sibling(entry);
entry = xa_entry(xas->xa, node, offset);
[entry is now a sibling of 184]

It is sufficient to go around this loop until we hit a non-sibling entry.
Sibling entries always point earlier in the node, so we are guaranteed
to terminate this search.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Fixes: 6b24ca4a1a8d ("mm: Use multi-index entries in the page cache")
Cc: stable@vger.kernel.org

+67 -3
+1 -1
lib/xarray.c
··· 206 206 void *entry = xa_entry(xas->xa, node, offset); 207 207 208 208 xas->xa_node = node; 209 - if (xa_is_sibling(entry)) { 209 + while (xa_is_sibling(entry)) { 210 210 offset = xa_to_sibling(entry); 211 211 entry = xa_entry(xas->xa, node, offset); 212 212 if (node->shift && xa_is_node(entry))
+66 -2
tools/testing/radix-tree/multiorder.c
··· 159 159 item_kill_tree(xa); 160 160 } 161 161 162 - bool stop_iteration = false; 162 + bool stop_iteration; 163 163 164 164 static void *creator_func(void *ptr) 165 165 { ··· 201 201 pthread_t worker_thread[num_threads]; 202 202 int i; 203 203 204 + stop_iteration = false; 204 205 pthread_create(&worker_thread[0], NULL, &creator_func, xa); 205 206 for (i = 1; i < num_threads; i++) 206 207 pthread_create(&worker_thread[i], NULL, &iterator_func, xa); 208 + 209 + for (i = 0; i < num_threads; i++) 210 + pthread_join(worker_thread[i], NULL); 211 + 212 + item_kill_tree(xa); 213 + } 214 + 215 + static void *load_creator(void *ptr) 216 + { 217 + /* 'order' is set up to ensure we have sibling entries */ 218 + unsigned int order; 219 + struct radix_tree_root *tree = ptr; 220 + int i; 221 + 222 + rcu_register_thread(); 223 + item_insert_order(tree, 3 << RADIX_TREE_MAP_SHIFT, 0); 224 + item_insert_order(tree, 2 << RADIX_TREE_MAP_SHIFT, 0); 225 + for (i = 0; i < 10000; i++) { 226 + for (order = 1; order < RADIX_TREE_MAP_SHIFT; order++) { 227 + unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) - 228 + (1 << order); 229 + item_insert_order(tree, index, order); 230 + item_delete_rcu(tree, index); 231 + } 232 + } 233 + rcu_unregister_thread(); 234 + 235 + stop_iteration = true; 236 + return NULL; 237 + } 238 + 239 + static void *load_worker(void *ptr) 240 + { 241 + unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) - 1; 242 + 243 + rcu_register_thread(); 244 + while (!stop_iteration) { 245 + struct item *item = xa_load(ptr, index); 246 + assert(!xa_is_internal(item)); 247 + } 248 + rcu_unregister_thread(); 249 + 250 + return NULL; 251 + } 252 + 253 + static void load_race(struct xarray *xa) 254 + { 255 + const int num_threads = sysconf(_SC_NPROCESSORS_ONLN) * 4; 256 + pthread_t worker_thread[num_threads]; 257 + int i; 258 + 259 + stop_iteration = false; 260 + pthread_create(&worker_thread[0], NULL, &load_creator, xa); 261 + for (i = 1; i < num_threads; i++) 262 + pthread_create(&worker_thread[i], NULL, &load_worker, xa); 207 263 208 264 for (i = 0; i < num_threads; i++) 209 265 pthread_join(worker_thread[i], NULL); ··· 274 218 multiorder_iteration(&array); 275 219 multiorder_tagged_iteration(&array); 276 220 multiorder_iteration_race(&array); 221 + load_race(&array); 277 222 278 223 radix_tree_cpu_dead(0); 279 224 } 280 225 281 - int __weak main(void) 226 + int __weak main(int argc, char **argv) 282 227 { 228 + int opt; 229 + 230 + while ((opt = getopt(argc, argv, "ls:v")) != -1) { 231 + if (opt == 'v') 232 + test_verbose++; 233 + } 234 + 283 235 rcu_register_thread(); 284 236 radix_tree_init(); 285 237 multiorder_checks();