Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

lib: radix_tree: tree node interface

Make struct radix_tree_node part of the public interface and provide API
functions to create, look up, and delete whole nodes. Refactor the
existing insert, look up, delete functions on top of these new node
primitives.

This will allow the VM to track and garbage collect page cache radix
tree nodes.

[sasha.levin@oracle.com: return correct error code on insertion failure]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Bob Liu <bob.liu@oracle.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Luigi Semenzato <semenzato@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Metin Doslu <metin@citusdata.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Ozgun Erdogan <ozgun@citusdata.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roman Gushchin <klamm@yandex-team.ru>
Cc: Ryan Mallon <rmallon@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Johannes Weiner and committed by
Linus Torvalds
139e5616 a528910e

+182 -115
+34
include/linux/radix-tree.h
··· 60 60 61 61 #define RADIX_TREE_MAX_TAGS 3 62 62 63 + #ifdef __KERNEL__ 64 + #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) 65 + #else 66 + #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ 67 + #endif 68 + 69 + #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) 70 + #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) 71 + 72 + #define RADIX_TREE_TAG_LONGS \ 73 + ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) 74 + 75 + struct radix_tree_node { 76 + unsigned int height; /* Height from the bottom */ 77 + unsigned int count; 78 + union { 79 + struct radix_tree_node *parent; /* Used when ascending tree */ 80 + struct rcu_head rcu_head; /* Used when freeing node */ 81 + }; 82 + void __rcu *slots[RADIX_TREE_MAP_SIZE]; 83 + unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 84 + }; 85 + 86 + #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) 87 + #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ 88 + RADIX_TREE_MAP_SHIFT)) 89 + 63 90 /* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */ 64 91 struct radix_tree_root { 65 92 unsigned int height; ··· 128 101 * concurrently with other readers. 129 102 * 130 103 * The notable exceptions to this rule are the following functions: 104 + * __radix_tree_lookup 131 105 * radix_tree_lookup 132 106 * radix_tree_lookup_slot 133 107 * radix_tree_tag_get ··· 244 216 rcu_assign_pointer(*pslot, item); 245 217 } 246 218 219 + int __radix_tree_create(struct radix_tree_root *root, unsigned long index, 220 + struct radix_tree_node **nodep, void ***slotp); 247 221 int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); 222 + void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, 223 + struct radix_tree_node **nodep, void ***slotp); 248 224 void *radix_tree_lookup(struct radix_tree_root *, unsigned long); 249 225 void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); 226 + bool __radix_tree_delete_node(struct radix_tree_root *root, unsigned long index, 227 + struct radix_tree_node *node); 250 228 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); 251 229 void *radix_tree_delete(struct radix_tree_root *, unsigned long); 252 230 unsigned int
+148 -115
lib/radix-tree.c
··· 35 35 #include <linux/hardirq.h> /* in_interrupt() */ 36 36 37 37 38 - #ifdef __KERNEL__ 39 - #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) 40 - #else 41 - #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ 42 - #endif 43 - 44 - #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) 45 - #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) 46 - 47 - #define RADIX_TREE_TAG_LONGS \ 48 - ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) 49 - 50 - struct radix_tree_node { 51 - unsigned int height; /* Height from the bottom */ 52 - unsigned int count; 53 - union { 54 - struct radix_tree_node *parent; /* Used when ascending tree */ 55 - struct rcu_head rcu_head; /* Used when freeing node */ 56 - }; 57 - void __rcu *slots[RADIX_TREE_MAP_SIZE]; 58 - unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 59 - }; 60 - 61 - #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) 62 - #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ 63 - RADIX_TREE_MAP_SHIFT)) 64 - 65 38 /* 66 39 * The height_to_maxindex array needs to be one deeper than the maximum 67 40 * path as height 0 holds only 1 entry. ··· 360 387 } 361 388 362 389 /** 363 - * radix_tree_insert - insert into a radix tree 390 + * __radix_tree_create - create a slot in a radix tree 364 391 * @root: radix tree root 365 392 * @index: index key 366 - * @item: item to insert 393 + * @nodep: returns node 394 + * @slotp: returns slot 367 395 * 368 - * Insert an item into the radix tree at position @index. 396 + * Create, if necessary, and return the node and slot for an item 397 + * at position @index in the radix tree @root. 398 + * 399 + * Until there is more than one item in the tree, no nodes are 400 + * allocated and @root->rnode is used as a direct slot instead of 401 + * pointing to a node, in which case *@nodep will be NULL. 402 + * 403 + * Returns -ENOMEM, or 0 for success. 369 404 */ 370 - int radix_tree_insert(struct radix_tree_root *root, 371 - unsigned long index, void *item) 405 + int __radix_tree_create(struct radix_tree_root *root, unsigned long index, 406 + struct radix_tree_node **nodep, void ***slotp) 372 407 { 373 408 struct radix_tree_node *node = NULL, *slot; 374 - unsigned int height, shift; 375 - int offset; 409 + unsigned int height, shift, offset; 376 410 int error; 377 - 378 - BUG_ON(radix_tree_is_indirect_ptr(item)); 379 411 380 412 /* Make sure the tree is high enough. */ 381 413 if (index > radix_tree_maxindex(root->height)) { ··· 417 439 height--; 418 440 } 419 441 420 - if (slot != NULL) 442 + if (nodep) 443 + *nodep = node; 444 + if (slotp) 445 + *slotp = node ? node->slots + offset : (void **)&root->rnode; 446 + return 0; 447 + } 448 + 449 + /** 450 + * radix_tree_insert - insert into a radix tree 451 + * @root: radix tree root 452 + * @index: index key 453 + * @item: item to insert 454 + * 455 + * Insert an item into the radix tree at position @index. 456 + */ 457 + int radix_tree_insert(struct radix_tree_root *root, 458 + unsigned long index, void *item) 459 + { 460 + struct radix_tree_node *node; 461 + void **slot; 462 + int error; 463 + 464 + BUG_ON(radix_tree_is_indirect_ptr(item)); 465 + 466 + error = __radix_tree_create(root, index, &node, &slot); 467 + if (error) 468 + return error; 469 + if (*slot != NULL) 421 470 return -EEXIST; 471 + rcu_assign_pointer(*slot, item); 422 472 423 473 if (node) { 424 474 node->count++; 425 - rcu_assign_pointer(node->slots[offset], item); 426 - BUG_ON(tag_get(node, 0, offset)); 427 - BUG_ON(tag_get(node, 1, offset)); 475 + BUG_ON(tag_get(node, 0, index & RADIX_TREE_MAP_MASK)); 476 + BUG_ON(tag_get(node, 1, index & RADIX_TREE_MAP_MASK)); 428 477 } else { 429 - rcu_assign_pointer(root->rnode, item); 430 478 BUG_ON(root_tag_get(root, 0)); 431 479 BUG_ON(root_tag_get(root, 1)); 432 480 } ··· 461 457 } 462 458 EXPORT_SYMBOL(radix_tree_insert); 463 459 464 - /* 465 - * is_slot == 1 : search for the slot. 466 - * is_slot == 0 : search for the node. 460 + /** 461 + * __radix_tree_lookup - lookup an item in a radix tree 462 + * @root: radix tree root 463 + * @index: index key 464 + * @nodep: returns node 465 + * @slotp: returns slot 466 + * 467 + * Lookup and return the item at position @index in the radix 468 + * tree @root. 469 + * 470 + * Until there is more than one item in the tree, no nodes are 471 + * allocated and @root->rnode is used as a direct slot instead of 472 + * pointing to a node, in which case *@nodep will be NULL. 467 473 */ 468 - static void *radix_tree_lookup_element(struct radix_tree_root *root, 469 - unsigned long index, int is_slot) 474 + void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, 475 + struct radix_tree_node **nodep, void ***slotp) 470 476 { 477 + struct radix_tree_node *node, *parent; 471 478 unsigned int height, shift; 472 - struct radix_tree_node *node, **slot; 479 + void **slot; 473 480 474 481 node = rcu_dereference_raw(root->rnode); 475 482 if (node == NULL) ··· 489 474 if (!radix_tree_is_indirect_ptr(node)) { 490 475 if (index > 0) 491 476 return NULL; 492 - return is_slot ? (void *)&root->rnode : node; 477 + 478 + if (nodep) 479 + *nodep = NULL; 480 + if (slotp) 481 + *slotp = (void **)&root->rnode; 482 + return node; 493 483 } 494 484 node = indirect_to_ptr(node); 495 485 ··· 505 485 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 506 486 507 487 do { 508 - slot = (struct radix_tree_node **) 509 - (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 488 + parent = node; 489 + slot = node->slots + ((index >> shift) & RADIX_TREE_MAP_MASK); 510 490 node = rcu_dereference_raw(*slot); 511 491 if (node == NULL) 512 492 return NULL; ··· 515 495 height--; 516 496 } while (height > 0); 517 497 518 - return is_slot ? (void *)slot : indirect_to_ptr(node); 498 + if (nodep) 499 + *nodep = parent; 500 + if (slotp) 501 + *slotp = slot; 502 + return node; 519 503 } 520 504 521 505 /** ··· 537 513 */ 538 514 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) 539 515 { 540 - return (void **)radix_tree_lookup_element(root, index, 1); 516 + void **slot; 517 + 518 + if (!__radix_tree_lookup(root, index, NULL, &slot)) 519 + return NULL; 520 + return slot; 541 521 } 542 522 EXPORT_SYMBOL(radix_tree_lookup_slot); 543 523 ··· 559 531 */ 560 532 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) 561 533 { 562 - return radix_tree_lookup_element(root, index, 0); 534 + return __radix_tree_lookup(root, index, NULL, NULL); 563 535 } 564 536 EXPORT_SYMBOL(radix_tree_lookup); 565 537 ··· 1290 1262 } 1291 1263 1292 1264 /** 1265 + * __radix_tree_delete_node - try to free node after clearing a slot 1266 + * @root: radix tree root 1267 + * @index: index key 1268 + * @node: node containing @index 1269 + * 1270 + * After clearing the slot at @index in @node from radix tree 1271 + * rooted at @root, call this function to attempt freeing the 1272 + * node and shrinking the tree. 1273 + * 1274 + * Returns %true if @node was freed, %false otherwise. 1275 + */ 1276 + bool __radix_tree_delete_node(struct radix_tree_root *root, unsigned long index, 1277 + struct radix_tree_node *node) 1278 + { 1279 + bool deleted = false; 1280 + 1281 + do { 1282 + struct radix_tree_node *parent; 1283 + 1284 + if (node->count) { 1285 + if (node == indirect_to_ptr(root->rnode)) { 1286 + radix_tree_shrink(root); 1287 + if (root->height == 0) 1288 + deleted = true; 1289 + } 1290 + return deleted; 1291 + } 1292 + 1293 + parent = node->parent; 1294 + if (parent) { 1295 + index >>= RADIX_TREE_MAP_SHIFT; 1296 + 1297 + parent->slots[index & RADIX_TREE_MAP_MASK] = NULL; 1298 + parent->count--; 1299 + } else { 1300 + root_tag_clear_all(root); 1301 + root->height = 0; 1302 + root->rnode = NULL; 1303 + } 1304 + 1305 + radix_tree_node_free(node); 1306 + deleted = true; 1307 + 1308 + node = parent; 1309 + } while (node); 1310 + 1311 + return deleted; 1312 + } 1313 + 1314 + /** 1293 1315 * radix_tree_delete_item - delete an item from a radix tree 1294 1316 * @root: radix tree root 1295 1317 * @index: index key ··· 1353 1275 void *radix_tree_delete_item(struct radix_tree_root *root, 1354 1276 unsigned long index, void *item) 1355 1277 { 1356 - struct radix_tree_node *node = NULL; 1357 - struct radix_tree_node *slot = NULL; 1358 - struct radix_tree_node *to_free; 1359 - unsigned int height, shift; 1278 + struct radix_tree_node *node; 1279 + unsigned int offset; 1280 + void **slot; 1281 + void *entry; 1360 1282 int tag; 1361 - int uninitialized_var(offset); 1362 1283 1363 - height = root->height; 1364 - if (index > radix_tree_maxindex(height)) 1365 - goto out; 1284 + entry = __radix_tree_lookup(root, index, &node, &slot); 1285 + if (!entry) 1286 + return NULL; 1366 1287 1367 - slot = root->rnode; 1368 - if (height == 0) { 1288 + if (item && entry != item) 1289 + return NULL; 1290 + 1291 + if (!node) { 1369 1292 root_tag_clear_all(root); 1370 1293 root->rnode = NULL; 1371 - goto out; 1294 + return entry; 1372 1295 } 1373 - slot = indirect_to_ptr(slot); 1374 - shift = height * RADIX_TREE_MAP_SHIFT; 1375 1296 1376 - do { 1377 - if (slot == NULL) 1378 - goto out; 1379 - 1380 - shift -= RADIX_TREE_MAP_SHIFT; 1381 - offset = (index >> shift) & RADIX_TREE_MAP_MASK; 1382 - node = slot; 1383 - slot = slot->slots[offset]; 1384 - } while (shift); 1385 - 1386 - if (slot == NULL) 1387 - goto out; 1388 - 1389 - if (item && slot != item) { 1390 - slot = NULL; 1391 - goto out; 1392 - } 1297 + offset = index & RADIX_TREE_MAP_MASK; 1393 1298 1394 1299 /* 1395 1300 * Clear all tags associated with the item to be deleted. ··· 1383 1322 radix_tree_tag_clear(root, index, tag); 1384 1323 } 1385 1324 1386 - to_free = NULL; 1387 - /* Now free the nodes we do not need anymore */ 1388 - while (node) { 1389 - node->slots[offset] = NULL; 1390 - node->count--; 1391 - /* 1392 - * Queue the node for deferred freeing after the 1393 - * last reference to it disappears (set NULL, above). 1394 - */ 1395 - if (to_free) 1396 - radix_tree_node_free(to_free); 1325 + node->slots[offset] = NULL; 1326 + node->count--; 1397 1327 1398 - if (node->count) { 1399 - if (node == indirect_to_ptr(root->rnode)) 1400 - radix_tree_shrink(root); 1401 - goto out; 1402 - } 1328 + __radix_tree_delete_node(root, index, node); 1403 1329 1404 - /* Node with zero slots in use so free it */ 1405 - to_free = node; 1406 - 1407 - index >>= RADIX_TREE_MAP_SHIFT; 1408 - offset = index & RADIX_TREE_MAP_MASK; 1409 - node = node->parent; 1410 - } 1411 - 1412 - root_tag_clear_all(root); 1413 - root->height = 0; 1414 - root->rnode = NULL; 1415 - if (to_free) 1416 - radix_tree_node_free(to_free); 1417 - 1418 - out: 1419 - return slot; 1330 + return entry; 1420 1331 } 1421 1332 EXPORT_SYMBOL(radix_tree_delete_item); 1422 1333