Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

XArray: add xas_split

In order to use multi-index entries for huge pages in the page cache, we
need to be able to split a multi-index entry (eg if a file is truncated in
the middle of a huge page entry). This version does not support splitting
more than one level of the tree at a time. This is an acceptable
limitation for the page cache as we do not expect to support order-12
pages in the near future.

[akpm@linux-foundation.org: export xas_split_alloc() to modules]
[willy@infradead.org: fix xarray split]
Link: https://lkml.kernel.org/r/20200910175450.GV6583@casper.infradead.org
[willy@infradead.org: fix xarray]
Link: https://lkml.kernel.org/r/20201001233943.GW20115@casper.infradead.org

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Qian Cai <cai@lca.pw>
Cc: Song Liu <songliubraving@fb.com>
Link: https://lkml.kernel.org/r/20200903183029.14930-3-willy@infradead.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Matthew Wilcox (Oracle) and committed by
Linus Torvalds
8fc75643 57417ceb

+224 -15
+8 -6
Documentation/core-api/xarray.rst
··· 475 475 Each entry will only be returned once, no matter how many indices it 476 476 occupies. 477 477 478 - Using xas_next() or xas_prev() with a multi-index xa_state 479 - is not supported. Using either of these functions on a multi-index entry 480 - will reveal sibling entries; these should be skipped over by the caller. 478 + Using xas_next() or xas_prev() with a multi-index xa_state is not 479 + supported. Using either of these functions on a multi-index entry will 480 + reveal sibling entries; these should be skipped over by the caller. 481 481 482 - Storing ``NULL`` into any index of a multi-index entry will set the entry 483 - at every index to ``NULL`` and dissolve the tie. Splitting a multi-index 484 - entry into entries occupying smaller ranges is not yet supported. 482 + Storing ``NULL`` into any index of a multi-index entry will set the 483 + entry at every index to ``NULL`` and dissolve the tie. A multi-index 484 + entry can be split into entries occupying smaller ranges by calling 485 + xas_split_alloc() without the xa_lock held, followed by taking the lock 486 + and calling xas_split(). 485 487 486 488 Functions and structures 487 489 ========================
+13
include/linux/xarray.h
··· 1507 1507 1508 1508 #ifdef CONFIG_XARRAY_MULTI 1509 1509 int xa_get_order(struct xarray *, unsigned long index); 1510 + void xas_split(struct xa_state *, void *entry, unsigned int order); 1511 + void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t); 1510 1512 #else 1511 1513 static inline int xa_get_order(struct xarray *xa, unsigned long index) 1512 1514 { 1513 1515 return 0; 1516 + } 1517 + 1518 + static inline void xas_split(struct xa_state *xas, void *entry, 1519 + unsigned int order) 1520 + { 1521 + xas_store(xas, entry); 1522 + } 1523 + 1524 + static inline void xas_split_alloc(struct xa_state *xas, void *entry, 1525 + unsigned int order, gfp_t gfp) 1526 + { 1514 1527 } 1515 1528 #endif 1516 1529
+44
lib/test_xarray.c
··· 1503 1503 } 1504 1504 } 1505 1505 1506 + #ifdef CONFIG_XARRAY_MULTI 1507 + static void check_split_1(struct xarray *xa, unsigned long index, 1508 + unsigned int order) 1509 + { 1510 + XA_STATE(xas, xa, index); 1511 + void *entry; 1512 + unsigned int i = 0; 1513 + 1514 + xa_store_order(xa, index, order, xa, GFP_KERNEL); 1515 + 1516 + xas_split_alloc(&xas, xa, order, GFP_KERNEL); 1517 + xas_lock(&xas); 1518 + xas_split(&xas, xa, order); 1519 + xas_unlock(&xas); 1520 + 1521 + xa_for_each(xa, index, entry) { 1522 + XA_BUG_ON(xa, entry != xa); 1523 + i++; 1524 + } 1525 + XA_BUG_ON(xa, i != 1 << order); 1526 + 1527 + xa_set_mark(xa, index, XA_MARK_0); 1528 + XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0)); 1529 + 1530 + xa_destroy(xa); 1531 + } 1532 + 1533 + static noinline void check_split(struct xarray *xa) 1534 + { 1535 + unsigned int order; 1536 + 1537 + XA_BUG_ON(xa, !xa_empty(xa)); 1538 + 1539 + for (order = 1; order < 2 * XA_CHUNK_SHIFT; order++) { 1540 + check_split_1(xa, 0, order); 1541 + check_split_1(xa, 1UL << order, order); 1542 + check_split_1(xa, 3UL << order, order); 1543 + } 1544 + } 1545 + #else 1546 + static void check_split(struct xarray *xa) { } 1547 + #endif 1548 + 1506 1549 static void check_align_1(struct xarray *xa, char *name) 1507 1550 { 1508 1551 int i; ··· 1772 1729 check_store_range(&array); 1773 1730 check_store_iter(&array); 1774 1731 check_align(&xa0); 1732 + check_split(&array); 1775 1733 1776 1734 check_workingset(&array, 0); 1777 1735 check_workingset(&array, 64);
+159 -9
lib/xarray.c
··· 266 266 */ 267 267 static void xas_destroy(struct xa_state *xas) 268 268 { 269 - struct xa_node *node = xas->xa_alloc; 269 + struct xa_node *next, *node = xas->xa_alloc; 270 270 271 - if (!node) 272 - return; 273 - XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); 274 - kmem_cache_free(radix_tree_node_cachep, node); 275 - xas->xa_alloc = NULL; 271 + while (node) { 272 + XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); 273 + next = rcu_dereference_raw(node->parent); 274 + radix_tree_node_rcu_free(&node->rcu_head); 275 + xas->xa_alloc = node = next; 276 + } 276 277 } 277 278 278 279 /** ··· 305 304 xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); 306 305 if (!xas->xa_alloc) 307 306 return false; 307 + xas->xa_alloc->parent = NULL; 308 308 XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list)); 309 309 xas->xa_node = XAS_RESTART; 310 310 return true; ··· 341 339 } 342 340 if (!xas->xa_alloc) 343 341 return false; 342 + xas->xa_alloc->parent = NULL; 344 343 XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list)); 345 344 xas->xa_node = XAS_RESTART; 346 345 return true; ··· 406 403 /* 407 404 * Use this to calculate the maximum index that will need to be created 408 405 * in order to add the entry described by @xas. Because we cannot store a 409 - * multiple-index entry at index 0, the calculation is a little more complex 406 + * multi-index entry at index 0, the calculation is a little more complex 410 407 * than you might expect. 411 408 */ 412 409 static unsigned long xas_max(struct xa_state *xas) ··· 949 946 } 950 947 EXPORT_SYMBOL_GPL(xas_init_marks); 951 948 949 + #ifdef CONFIG_XARRAY_MULTI 950 + static unsigned int node_get_marks(struct xa_node *node, unsigned int offset) 951 + { 952 + unsigned int marks = 0; 953 + xa_mark_t mark = XA_MARK_0; 954 + 955 + for (;;) { 956 + if (node_get_mark(node, offset, mark)) 957 + marks |= 1 << (__force unsigned int)mark; 958 + if (mark == XA_MARK_MAX) 959 + break; 960 + mark_inc(mark); 961 + } 962 + 963 + return marks; 964 + } 965 + 966 + static void node_set_marks(struct xa_node *node, unsigned int offset, 967 + struct xa_node *child, unsigned int marks) 968 + { 969 + xa_mark_t mark = XA_MARK_0; 970 + 971 + for (;;) { 972 + if (marks & (1 << (__force unsigned int)mark)) { 973 + node_set_mark(node, offset, mark); 974 + if (child) 975 + node_mark_all(child, mark); 976 + } 977 + if (mark == XA_MARK_MAX) 978 + break; 979 + mark_inc(mark); 980 + } 981 + } 982 + 983 + /** 984 + * xas_split_alloc() - Allocate memory for splitting an entry. 985 + * @xas: XArray operation state. 986 + * @entry: New entry which will be stored in the array. 987 + * @order: New entry order. 988 + * @gfp: Memory allocation flags. 989 + * 990 + * This function should be called before calling xas_split(). 991 + * If necessary, it will allocate new nodes (and fill them with @entry) 992 + * to prepare for the upcoming split of an entry of @order size into 993 + * entries of the order stored in the @xas. 994 + * 995 + * Context: May sleep if @gfp flags permit. 996 + */ 997 + void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order, 998 + gfp_t gfp) 999 + { 1000 + unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1; 1001 + unsigned int mask = xas->xa_sibs; 1002 + 1003 + /* XXX: no support for splitting really large entries yet */ 1004 + if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT < order)) 1005 + goto nomem; 1006 + if (xas->xa_shift + XA_CHUNK_SHIFT > order) 1007 + return; 1008 + 1009 + do { 1010 + unsigned int i; 1011 + void *sibling; 1012 + struct xa_node *node; 1013 + 1014 + node = kmem_cache_alloc(radix_tree_node_cachep, gfp); 1015 + if (!node) 1016 + goto nomem; 1017 + node->array = xas->xa; 1018 + for (i = 0; i < XA_CHUNK_SIZE; i++) { 1019 + if ((i & mask) == 0) { 1020 + RCU_INIT_POINTER(node->slots[i], entry); 1021 + sibling = xa_mk_sibling(0); 1022 + } else { 1023 + RCU_INIT_POINTER(node->slots[i], sibling); 1024 + } 1025 + } 1026 + RCU_INIT_POINTER(node->parent, xas->xa_alloc); 1027 + xas->xa_alloc = node; 1028 + } while (sibs-- > 0); 1029 + 1030 + return; 1031 + nomem: 1032 + xas_destroy(xas); 1033 + xas_set_err(xas, -ENOMEM); 1034 + } 1035 + EXPORT_SYMBOL_GPL(xas_split_alloc); 1036 + 1037 + /** 1038 + * xas_split() - Split a multi-index entry into smaller entries. 1039 + * @xas: XArray operation state. 1040 + * @entry: New entry to store in the array. 1041 + * @order: New entry order. 1042 + * 1043 + * The value in the entry is copied to all the replacement entries. 1044 + * 1045 + * Context: Any context. The caller should hold the xa_lock. 1046 + */ 1047 + void xas_split(struct xa_state *xas, void *entry, unsigned int order) 1048 + { 1049 + unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1; 1050 + unsigned int offset, marks; 1051 + struct xa_node *node; 1052 + void *curr = xas_load(xas); 1053 + int values = 0; 1054 + 1055 + node = xas->xa_node; 1056 + if (xas_top(node)) 1057 + return; 1058 + 1059 + marks = node_get_marks(node, xas->xa_offset); 1060 + 1061 + offset = xas->xa_offset + sibs; 1062 + do { 1063 + if (xas->xa_shift < node->shift) { 1064 + struct xa_node *child = xas->xa_alloc; 1065 + 1066 + xas->xa_alloc = rcu_dereference_raw(child->parent); 1067 + child->shift = node->shift - XA_CHUNK_SHIFT; 1068 + child->offset = offset; 1069 + child->count = XA_CHUNK_SIZE; 1070 + child->nr_values = xa_is_value(entry) ? 1071 + XA_CHUNK_SIZE : 0; 1072 + RCU_INIT_POINTER(child->parent, node); 1073 + node_set_marks(node, offset, child, marks); 1074 + rcu_assign_pointer(node->slots[offset], 1075 + xa_mk_node(child)); 1076 + if (xa_is_value(curr)) 1077 + values--; 1078 + } else { 1079 + unsigned int canon = offset - xas->xa_sibs; 1080 + 1081 + node_set_marks(node, canon, NULL, marks); 1082 + rcu_assign_pointer(node->slots[canon], entry); 1083 + while (offset > canon) 1084 + rcu_assign_pointer(node->slots[offset--], 1085 + xa_mk_sibling(canon)); 1086 + values += (xa_is_value(entry) - xa_is_value(curr)) * 1087 + (xas->xa_sibs + 1); 1088 + } 1089 + } while (offset-- > xas->xa_offset); 1090 + 1091 + node->nr_values += values; 1092 + } 1093 + EXPORT_SYMBOL_GPL(xas_split); 1094 + #endif 1095 + 952 1096 /** 953 1097 * xas_pause() - Pause a walk to drop a lock. 954 1098 * @xas: XArray operation state. ··· 1557 1407 * @gfp: Memory allocation flags. 1558 1408 * 1559 1409 * After this function returns, loads from this index will return @entry. 1560 - * Storing into an existing multislot entry updates the entry of every index. 1410 + * Storing into an existing multi-index entry updates the entry of every index. 1561 1411 * The marks associated with @index are unaffected unless @entry is %NULL. 1562 1412 * 1563 1413 * Context: Any context. Takes and releases the xa_lock. ··· 1699 1549 * 1700 1550 * After this function returns, loads from any index between @first and @last, 1701 1551 * inclusive will return @entry. 1702 - * Storing into an existing multislot entry updates the entry of every index. 1552 + * Storing into an existing multi-index entry updates the entry of every index. 1703 1553 * The marks associated with @index are unaffected unless @entry is %NULL. 1704 1554 * 1705 1555 * Context: Process context. Takes and releases the xa_lock. May sleep