Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xarray: Fix early termination of xas_for_each_marked

xas_for_each_marked() is using entry == NULL as a termination condition
of the iteration. When xas_for_each_marked() is used protected only by
RCU, this can however race with xas_store(xas, NULL) in the following
way:

TASK1 TASK2
page_cache_delete() find_get_pages_range_tag()
xas_for_each_marked()
xas_find_marked()
off = xas_find_chunk()

xas_store(&xas, NULL)
xas_init_marks(&xas);
...
rcu_assign_pointer(*slot, NULL);
entry = xa_entry(off);

And thus xas_for_each_marked() terminates prematurely possibly leading
to missed entries in the iteration (translating to missing writeback of
some pages or a similar problem).

If we find a NULL entry that has been marked, skip it (unless we're trying
to allocate an entry).

Reported-by: Jan Kara <jack@suse.cz>
CC: stable@vger.kernel.org
Fixes: ef8e5717db01 ("page cache: Convert delete_batch to XArray")
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

+98 -3
+5 -1
include/linux/xarray.h
··· 1648 1648 xa_mark_t mark) 1649 1649 { 1650 1650 struct xa_node *node = xas->xa_node; 1651 + void *entry; 1651 1652 unsigned int offset; 1652 1653 1653 1654 if (unlikely(xas_not_node(node) || node->shift)) ··· 1660 1659 return NULL; 1661 1660 if (offset == XA_CHUNK_SIZE) 1662 1661 return xas_find_marked(xas, max, mark); 1663 - return xa_entry(xas->xa, node, offset); 1662 + entry = xa_entry(xas->xa, node, offset); 1663 + if (!entry) 1664 + return xas_find_marked(xas, max, mark); 1665 + return entry; 1664 1666 } 1665 1667 1666 1668 /*
+2
lib/xarray.c
··· 1208 1208 } 1209 1209 1210 1210 entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); 1211 + if (!entry && !(xa_track_free(xas->xa) && mark == XA_FREE_MARK)) 1212 + continue; 1211 1213 if (!xa_is_node(entry)) 1212 1214 return entry; 1213 1215 xas->xa_node = xa_to_node(entry);
+2 -2
tools/testing/radix-tree/Makefile
··· 7 7 TARGETS = main idr-test multiorder xarray 8 8 CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o 9 9 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \ 10 - regression4.o \ 11 - tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o 10 + regression4.o tag_check.o multiorder.o idr-test.o iteration_check.o \ 11 + iteration_check_2.o benchmark.o 12 12 13 13 ifndef SHIFT 14 14 SHIFT=3
+87
tools/testing/radix-tree/iteration_check_2.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * iteration_check_2.c: Check that deleting a tagged entry doesn't cause 4 + * an RCU walker to finish early. 5 + * Copyright (c) 2020 Oracle 6 + * Author: Matthew Wilcox <willy@infradead.org> 7 + */ 8 + #include <pthread.h> 9 + #include "test.h" 10 + 11 + static volatile bool test_complete; 12 + 13 + static void *iterator(void *arg) 14 + { 15 + XA_STATE(xas, arg, 0); 16 + void *entry; 17 + 18 + rcu_register_thread(); 19 + 20 + while (!test_complete) { 21 + xas_set(&xas, 0); 22 + rcu_read_lock(); 23 + xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0) 24 + ; 25 + rcu_read_unlock(); 26 + assert(xas.xa_index >= 100); 27 + } 28 + 29 + rcu_unregister_thread(); 30 + return NULL; 31 + } 32 + 33 + static void *throbber(void *arg) 34 + { 35 + struct xarray *xa = arg; 36 + 37 + rcu_register_thread(); 38 + 39 + while (!test_complete) { 40 + int i; 41 + 42 + for (i = 0; i < 100; i++) { 43 + xa_store(xa, i, xa_mk_value(i), GFP_KERNEL); 44 + xa_set_mark(xa, i, XA_MARK_0); 45 + } 46 + for (i = 0; i < 100; i++) 47 + xa_erase(xa, i); 48 + } 49 + 50 + rcu_unregister_thread(); 51 + return NULL; 52 + } 53 + 54 + void iteration_test2(unsigned test_duration) 55 + { 56 + pthread_t threads[2]; 57 + DEFINE_XARRAY(array); 58 + int i; 59 + 60 + printv(1, "Running iteration test 2 for %d seconds\n", test_duration); 61 + 62 + test_complete = false; 63 + 64 + xa_store(&array, 100, xa_mk_value(100), GFP_KERNEL); 65 + xa_set_mark(&array, 100, XA_MARK_0); 66 + 67 + if (pthread_create(&threads[0], NULL, iterator, &array)) { 68 + perror("create iterator thread"); 69 + exit(1); 70 + } 71 + if (pthread_create(&threads[1], NULL, throbber, &array)) { 72 + perror("create throbber thread"); 73 + exit(1); 74 + } 75 + 76 + sleep(test_duration); 77 + test_complete = true; 78 + 79 + for (i = 0; i < 2; i++) { 80 + if (pthread_join(threads[i], NULL)) { 81 + perror("pthread_join"); 82 + exit(1); 83 + } 84 + } 85 + 86 + xa_destroy(&array); 87 + }
+1
tools/testing/radix-tree/main.c
··· 311 311 regression4_test(); 312 312 iteration_test(0, 10 + 90 * long_run); 313 313 iteration_test(7, 10 + 90 * long_run); 314 + iteration_test2(10 + 90 * long_run); 314 315 single_thread_tests(long_run); 315 316 316 317 /* Free any remaining preallocated nodes */
+1
tools/testing/radix-tree/test.h
··· 34 34 void tag_check(void); 35 35 void multiorder_checks(void); 36 36 void iteration_test(unsigned order, unsigned duration); 37 + void iteration_test2(unsigned duration); 37 38 void benchmark(void); 38 39 void idr_checks(void); 39 40 void ida_tests(void);