Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dlm: convert rsb list to rb_tree

Change the linked lists to rb_tree's in the rsb
hash table to speed up searches. Slow rsb searches
were having a large impact on gfs2 performance due
to the large number of dlm locks gfs2 uses.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>

authored by

Bob Peterson and committed by
David Teigland
9beb3bf5 c3b92c87

+112 -54
+15 -13
fs/dlm/debug_fs.c
··· 393 393 394 394 static void *table_seq_start(struct seq_file *seq, loff_t *pos) 395 395 { 396 + struct rb_node *node; 396 397 struct dlm_ls *ls = seq->private; 397 398 struct rsbtbl_iter *ri; 398 399 struct dlm_rsb *r; ··· 419 418 ri->format = 3; 420 419 421 420 spin_lock(&ls->ls_rsbtbl[bucket].lock); 422 - if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { 423 - list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, 424 - res_hashchain) { 421 + if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { 422 + for (node = rb_first(&ls->ls_rsbtbl[bucket].keep); node; 423 + node = rb_next(node)) { 424 + r = rb_entry(node, struct dlm_rsb, res_hashnode); 425 425 if (!entry--) { 426 426 dlm_hold_rsb(r); 427 427 ri->rsb = r; ··· 451 449 } 452 450 453 451 spin_lock(&ls->ls_rsbtbl[bucket].lock); 454 - if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { 455 - r = list_first_entry(&ls->ls_rsbtbl[bucket].list, 456 - struct dlm_rsb, res_hashchain); 452 + if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { 453 + node = rb_first(&ls->ls_rsbtbl[bucket].keep); 454 + r = rb_entry(node, struct dlm_rsb, res_hashnode); 457 455 dlm_hold_rsb(r); 458 456 ri->rsb = r; 459 457 ri->bucket = bucket; ··· 469 467 { 470 468 struct dlm_ls *ls = seq->private; 471 469 struct rsbtbl_iter *ri = iter_ptr; 472 - struct list_head *next; 470 + struct rb_node *next; 473 471 struct dlm_rsb *r, *rp; 474 472 loff_t n = *pos; 475 473 unsigned bucket; ··· 482 480 483 481 spin_lock(&ls->ls_rsbtbl[bucket].lock); 484 482 rp = ri->rsb; 485 - next = rp->res_hashchain.next; 483 + next = rb_next(&rp->res_hashnode); 486 484 487 - if (next != &ls->ls_rsbtbl[bucket].list) { 488 - r = list_entry(next, struct dlm_rsb, res_hashchain); 485 + if (next) { 486 + r = rb_entry(next, struct dlm_rsb, res_hashnode); 489 487 dlm_hold_rsb(r); 490 488 ri->rsb = r; 491 489 spin_unlock(&ls->ls_rsbtbl[bucket].lock); ··· 513 511 } 514 512 515 513 spin_lock(&ls->ls_rsbtbl[bucket].lock); 516 - if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { 517 - r = list_first_entry(&ls->ls_rsbtbl[bucket].list, 518 - struct dlm_rsb, res_hashchain); 514 + if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { 515 + next = rb_first(&ls->ls_rsbtbl[bucket].keep); 516 + r = rb_entry(next, struct dlm_rsb, res_hashnode); 519 517 dlm_hold_rsb(r); 520 518 ri->rsb = r; 521 519 ri->bucket = bucket;
+6 -3
fs/dlm/dlm_internal.h
··· 103 103 }; 104 104 105 105 struct dlm_rsbtable { 106 - struct list_head list; 107 - struct list_head toss; 106 + struct rb_root keep; 107 + struct rb_root toss; 108 108 spinlock_t lock; 109 109 }; 110 110 ··· 285 285 unsigned long res_toss_time; 286 286 uint32_t res_first_lkid; 287 287 struct list_head res_lookup; /* lkbs waiting on first */ 288 - struct list_head res_hashchain; /* rsbtbl */ 288 + union { 289 + struct list_head res_hashchain; 290 + struct rb_node res_hashnode; /* rsbtbl */ 291 + }; 289 292 struct list_head res_grantqueue; 290 293 struct list_head res_convertqueue; 291 294 struct list_head res_waitqueue;
+69 -16
fs/dlm/lock.c
··· 56 56 L: receive_xxxx_reply() <- R: send_xxxx_reply() 57 57 */ 58 58 #include <linux/types.h> 59 + #include <linux/rbtree.h> 59 60 #include <linux/slab.h> 60 61 #include "dlm_internal.h" 61 62 #include <linux/dlm_device.h> ··· 381 380 382 381 r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain); 383 382 list_del(&r->res_hashchain); 383 + /* Convert the empty list_head to a NULL rb_node for tree usage: */ 384 + memset(&r->res_hashnode, 0, sizeof(struct rb_node)); 384 385 ls->ls_new_rsb_count--; 385 386 spin_unlock(&ls->ls_new_rsb_spin); 386 387 ··· 391 388 memcpy(r->res_name, name, len); 392 389 mutex_init(&r->res_mutex); 393 390 394 - INIT_LIST_HEAD(&r->res_hashchain); 395 391 INIT_LIST_HEAD(&r->res_lookup); 396 392 INIT_LIST_HEAD(&r->res_grantqueue); 397 393 INIT_LIST_HEAD(&r->res_convertqueue); ··· 402 400 return 0; 403 401 } 404 402 405 - static int search_rsb_list(struct list_head *head, char *name, int len, 403 + static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen) 404 + { 405 + char maxname[DLM_RESNAME_MAXLEN]; 406 + 407 + memset(maxname, 0, DLM_RESNAME_MAXLEN); 408 + memcpy(maxname, name, nlen); 409 + return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN); 410 + } 411 + 412 + static int search_rsb_tree(struct rb_root *tree, char *name, int len, 406 413 unsigned int flags, struct dlm_rsb **r_ret) 407 414 { 415 + struct rb_node *node = tree->rb_node; 408 416 struct dlm_rsb *r; 409 417 int error = 0; 418 + int rc; 410 419 411 - list_for_each_entry(r, head, res_hashchain) { 412 - if (len == r->res_length && !memcmp(name, r->res_name, len)) 420 + while (node) { 421 + r = rb_entry(node, struct dlm_rsb, res_hashnode); 422 + rc = rsb_cmp(r, name, len); 423 + if (rc < 0) 424 + node = node->rb_left; 425 + else if (rc > 0) 426 + node = node->rb_right; 427 + else 413 428 goto found; 414 429 } 415 430 *r_ret = NULL; ··· 439 420 return error; 440 421 } 441 422 423 + static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree) 424 + { 425 + struct rb_node **newn = &tree->rb_node; 426 + struct rb_node *parent = NULL; 427 + int rc; 428 + 429 + while (*newn) { 430 + struct dlm_rsb *cur = rb_entry(*newn, struct dlm_rsb, 431 + res_hashnode); 432 + 433 + parent = *newn; 434 + rc = rsb_cmp(cur, rsb->res_name, rsb->res_length); 435 + if (rc < 0) 436 + newn = &parent->rb_left; 437 + else if (rc > 0) 438 + newn = &parent->rb_right; 439 + else { 440 + log_print("rsb_insert match"); 441 + dlm_dump_rsb(rsb); 442 + dlm_dump_rsb(cur); 443 + return -EEXIST; 444 + } 445 + } 446 + 447 + rb_link_node(&rsb->res_hashnode, parent, newn); 448 + rb_insert_color(&rsb->res_hashnode, tree); 449 + return 0; 450 + } 451 + 442 452 static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, 443 453 unsigned int flags, struct dlm_rsb **r_ret) 444 454 { 445 455 struct dlm_rsb *r; 446 456 int error; 447 457 448 - error = search_rsb_list(&ls->ls_rsbtbl[b].list, name, len, flags, &r); 458 + error = search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r); 449 459 if (!error) { 450 460 kref_get(&r->res_ref); 451 461 goto out; 452 462 } 453 - error = search_rsb_list(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); 463 + error = search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); 454 464 if (error) 455 465 goto out; 456 466 457 - list_move(&r->res_hashchain, &ls->ls_rsbtbl[b].list); 467 + rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); 468 + error = rsb_insert(r, &ls->ls_rsbtbl[b].keep); 469 + if (error) 470 + return error; 458 471 459 472 if (dlm_no_directory(ls)) 460 473 goto out; ··· 578 527 nodeid = 0; 579 528 r->res_nodeid = nodeid; 580 529 } 581 - list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list); 582 - error = 0; 530 + error = rsb_insert(r, &ls->ls_rsbtbl[bucket].keep); 583 531 out_unlock: 584 532 spin_unlock(&ls->ls_rsbtbl[bucket].lock); 585 533 out: ··· 606 556 607 557 DLM_ASSERT(list_empty(&r->res_root_list), dlm_print_rsb(r);); 608 558 kref_init(&r->res_ref); 609 - list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss); 559 + rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[r->res_bucket].keep); 560 + rsb_insert(r, &ls->ls_rsbtbl[r->res_bucket].toss); 610 561 r->res_toss_time = jiffies; 611 562 if (r->res_lvbptr) { 612 563 dlm_free_lvb(r->res_lvbptr); ··· 1133 1082 r->res_name, r->res_length); 1134 1083 } 1135 1084 1136 - /* FIXME: shouldn't this be able to exit as soon as one non-due rsb is 1137 - found since they are in order of newest to oldest? */ 1085 + /* FIXME: make this more efficient */ 1138 1086 1139 1087 static int shrink_bucket(struct dlm_ls *ls, int b) 1140 1088 { 1089 + struct rb_node *n; 1141 1090 struct dlm_rsb *r; 1142 1091 int count = 0, found; 1143 1092 1144 1093 for (;;) { 1145 1094 found = 0; 1146 1095 spin_lock(&ls->ls_rsbtbl[b].lock); 1147 - list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, 1148 - res_hashchain) { 1096 + for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = rb_next(n)) { 1097 + r = rb_entry(n, struct dlm_rsb, res_hashnode); 1149 1098 if (!time_after_eq(jiffies, r->res_toss_time + 1150 1099 dlm_config.ci_toss_secs * HZ)) 1151 1100 continue; ··· 1159 1108 } 1160 1109 1161 1110 if (kref_put(&r->res_ref, kill_rsb)) { 1162 - list_del(&r->res_hashchain); 1111 + rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); 1163 1112 spin_unlock(&ls->ls_rsbtbl[b].lock); 1164 1113 1165 1114 if (is_master(r)) ··· 4492 4441 4493 4442 static struct dlm_rsb *find_purged_rsb(struct dlm_ls *ls, int bucket) 4494 4443 { 4444 + struct rb_node *n; 4495 4445 struct dlm_rsb *r, *r_ret = NULL; 4496 4446 4497 4447 spin_lock(&ls->ls_rsbtbl[bucket].lock); 4498 - list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, res_hashchain) { 4448 + for (n = rb_first(&ls->ls_rsbtbl[bucket].keep); n; n = rb_next(n)) { 4449 + r = rb_entry(n, struct dlm_rsb, res_hashnode); 4499 4450 if (!rsb_flag(r, RSB_LOCKS_PURGED)) 4500 4451 continue; 4501 4452 hold_rsb(r);
+9 -14
fs/dlm/lockspace.c
··· 457 457 if (!ls->ls_rsbtbl) 458 458 goto out_lsfree; 459 459 for (i = 0; i < size; i++) { 460 - INIT_LIST_HEAD(&ls->ls_rsbtbl[i].list); 461 - INIT_LIST_HEAD(&ls->ls_rsbtbl[i].toss); 460 + ls->ls_rsbtbl[i].keep.rb_node = NULL; 461 + ls->ls_rsbtbl[i].toss.rb_node = NULL; 462 462 spin_lock_init(&ls->ls_rsbtbl[i].lock); 463 463 } 464 464 ··· 685 685 static int release_lockspace(struct dlm_ls *ls, int force) 686 686 { 687 687 struct dlm_rsb *rsb; 688 - struct list_head *head; 688 + struct rb_node *n; 689 689 int i, busy, rv; 690 690 691 691 busy = lockspace_busy(ls, force); ··· 746 746 */ 747 747 748 748 for (i = 0; i < ls->ls_rsbtbl_size; i++) { 749 - head = &ls->ls_rsbtbl[i].list; 750 - while (!list_empty(head)) { 751 - rsb = list_entry(head->next, struct dlm_rsb, 752 - res_hashchain); 753 - 754 - list_del(&rsb->res_hashchain); 749 + while ((n = rb_first(&ls->ls_rsbtbl[i].keep))) { 750 + rsb = rb_entry(n, struct dlm_rsb, res_hashnode); 751 + rb_erase(n, &ls->ls_rsbtbl[i].keep); 755 752 dlm_free_rsb(rsb); 756 753 } 757 754 758 - head = &ls->ls_rsbtbl[i].toss; 759 - while (!list_empty(head)) { 760 - rsb = list_entry(head->next, struct dlm_rsb, 761 - res_hashchain); 762 - list_del(&rsb->res_hashchain); 755 + while ((n = rb_first(&ls->ls_rsbtbl[i].toss))) { 756 + rsb = rb_entry(n, struct dlm_rsb, res_hashnode); 757 + rb_erase(n, &ls->ls_rsbtbl[i].toss); 763 758 dlm_free_rsb(rsb); 764 759 } 765 760 }
+13 -8
fs/dlm/recover.c
··· 715 715 716 716 int dlm_create_root_list(struct dlm_ls *ls) 717 717 { 718 + struct rb_node *n; 718 719 struct dlm_rsb *r; 719 720 int i, error = 0; 720 721 ··· 728 727 729 728 for (i = 0; i < ls->ls_rsbtbl_size; i++) { 730 729 spin_lock(&ls->ls_rsbtbl[i].lock); 731 - list_for_each_entry(r, &ls->ls_rsbtbl[i].list, res_hashchain) { 730 + for (n = rb_first(&ls->ls_rsbtbl[i].keep); n; n = rb_next(n)) { 731 + r = rb_entry(n, struct dlm_rsb, res_hashnode); 732 732 list_add(&r->res_root_list, &ls->ls_root_list); 733 733 dlm_hold_rsb(r); 734 734 } ··· 743 741 continue; 744 742 } 745 743 746 - list_for_each_entry(r, &ls->ls_rsbtbl[i].toss, res_hashchain) { 744 + for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = rb_next(n)) { 745 + r = rb_entry(n, struct dlm_rsb, res_hashnode); 747 746 list_add(&r->res_root_list, &ls->ls_root_list); 748 747 dlm_hold_rsb(r); 749 748 } ··· 774 771 775 772 void dlm_clear_toss_list(struct dlm_ls *ls) 776 773 { 777 - struct dlm_rsb *r, *safe; 774 + struct rb_node *n, *next; 775 + struct dlm_rsb *rsb; 778 776 int i; 779 777 780 778 for (i = 0; i < ls->ls_rsbtbl_size; i++) { 781 779 spin_lock(&ls->ls_rsbtbl[i].lock); 782 - list_for_each_entry_safe(r, safe, &ls->ls_rsbtbl[i].toss, 783 - res_hashchain) { 784 - if (dlm_no_directory(ls) || !is_master(r)) { 785 - list_del(&r->res_hashchain); 786 - dlm_free_rsb(r); 780 + for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = next) { 781 + next = rb_next(n);; 782 + rsb = rb_entry(n, struct dlm_rsb, res_hashnode); 783 + if (dlm_no_directory(ls) || !is_master(rsb)) { 784 + rb_erase(n, &ls->ls_rsbtbl[i].toss); 785 + dlm_free_rsb(rsb); 787 786 } 788 787 } 789 788 spin_unlock(&ls->ls_rsbtbl[i].lock);