Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm:
dlm: add recovery callbacks
dlm: add node slots and generation
dlm: move recovery barrier calls
dlm: convert rsb list to rb_tree

+945 -274
+74 -58
fs/dlm/config.c
··· 2 2 ******************************************************************************* 3 3 ** 4 4 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 5 - ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 5 + ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. 6 6 ** 7 7 ** This copyrighted material is made available to anyone wishing to use, 8 8 ** modify, copy, or redistribute it subject to the terms and conditions ··· 17 17 #include <linux/slab.h> 18 18 #include <linux/in.h> 19 19 #include <linux/in6.h> 20 + #include <linux/dlmconstants.h> 20 21 #include <net/ipv6.h> 21 22 #include <net/sock.h> 22 23 ··· 37 36 static struct config_group *space_list; 38 37 static struct config_group *comm_list; 39 38 static struct dlm_comm *local_comm; 39 + static uint32_t dlm_comm_count; 40 40 41 41 struct dlm_clusters; 42 42 struct dlm_cluster; ··· 105 103 unsigned int cl_timewarn_cs; 106 104 unsigned int cl_waitwarn_us; 107 105 unsigned int cl_new_rsb_count; 106 + unsigned int cl_recover_callbacks; 107 + char cl_cluster_name[DLM_LOCKSPACE_LEN]; 108 108 }; 109 109 110 110 enum { ··· 122 118 CLUSTER_ATTR_TIMEWARN_CS, 123 119 CLUSTER_ATTR_WAITWARN_US, 124 120 CLUSTER_ATTR_NEW_RSB_COUNT, 121 + CLUSTER_ATTR_RECOVER_CALLBACKS, 122 + CLUSTER_ATTR_CLUSTER_NAME, 125 123 }; 126 124 127 125 struct cluster_attribute { 128 126 struct configfs_attribute attr; 129 127 ssize_t (*show)(struct dlm_cluster *, char *); 130 128 ssize_t (*store)(struct dlm_cluster *, const char *, size_t); 129 + }; 130 + 131 + static ssize_t cluster_cluster_name_read(struct dlm_cluster *cl, char *buf) 132 + { 133 + return sprintf(buf, "%s\n", cl->cl_cluster_name); 134 + } 135 + 136 + static ssize_t cluster_cluster_name_write(struct dlm_cluster *cl, 137 + const char *buf, size_t len) 138 + { 139 + strncpy(dlm_config.ci_cluster_name, buf, DLM_LOCKSPACE_LEN); 140 + strncpy(cl->cl_cluster_name, buf, DLM_LOCKSPACE_LEN); 141 + return len; 142 + } 143 + 144 + static struct cluster_attribute cluster_attr_cluster_name = { 145 + .attr = { .ca_owner = THIS_MODULE, 146 + .ca_name = "cluster_name", 147 + .ca_mode = S_IRUGO | S_IWUSR }, 148 + .show = cluster_cluster_name_read, 149 + .store = cluster_cluster_name_write, 131 150 }; 132 151 133 152 static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field, ··· 198 171 CLUSTER_ATTR(timewarn_cs, 1); 199 172 CLUSTER_ATTR(waitwarn_us, 0); 200 173 CLUSTER_ATTR(new_rsb_count, 0); 174 + CLUSTER_ATTR(recover_callbacks, 0); 201 175 202 176 static struct configfs_attribute *cluster_attrs[] = { 203 177 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, ··· 213 185 [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, 214 186 [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr, 215 187 [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr, 188 + [CLUSTER_ATTR_RECOVER_CALLBACKS] = &cluster_attr_recover_callbacks.attr, 189 + [CLUSTER_ATTR_CLUSTER_NAME] = &cluster_attr_cluster_name.attr, 216 190 NULL, 217 191 }; 218 192 ··· 323 293 324 294 struct dlm_comm { 325 295 struct config_item item; 296 + int seq; 326 297 int nodeid; 327 298 int local; 328 299 int addr_count; ··· 340 309 int nodeid; 341 310 int weight; 342 311 int new; 312 + int comm_seq; /* copy of cm->seq when nd->nodeid is set */ 343 313 }; 344 314 345 315 static struct configfs_group_operations clusters_ops = { ··· 487 455 cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; 488 456 cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us; 489 457 cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count; 458 + cl->cl_recover_callbacks = dlm_config.ci_recover_callbacks; 459 + memcpy(cl->cl_cluster_name, dlm_config.ci_cluster_name, 460 + DLM_LOCKSPACE_LEN); 490 461 491 462 space_list = &sps->ss_group; 492 463 comm_list = &cms->cs_group; ··· 593 558 return ERR_PTR(-ENOMEM); 594 559 595 560 config_item_init_type_name(&cm->item, name, &comm_type); 561 + 562 + cm->seq = dlm_comm_count++; 563 + if (!cm->seq) 564 + cm->seq = dlm_comm_count++; 565 + 596 566 cm->nodeid = -1; 597 567 cm->local = 0; 598 568 cm->addr_count = 0; ··· 841 801 static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, 842 802 size_t len) 843 803 { 804 + uint32_t seq = 0; 844 805 nd->nodeid = simple_strtol(buf, NULL, 0); 806 + dlm_comm_seq(nd->nodeid, &seq); 807 + nd->comm_seq = seq; 845 808 return len; 846 809 } 847 810 ··· 951 908 } 952 909 953 910 /* caller must free mem */ 954 - int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, 955 - int **new_out, int *new_count_out) 911 + int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, 912 + int *count_out) 956 913 { 957 914 struct dlm_space *sp; 958 915 struct dlm_node *nd; 959 - int i = 0, rv = 0, ids_count = 0, new_count = 0; 960 - int *ids, *new; 916 + struct dlm_config_node *nodes, *node; 917 + int rv, count; 961 918 962 919 sp = get_space(lsname); 963 920 if (!sp) ··· 970 927 goto out; 971 928 } 972 929 973 - ids_count = sp->members_count; 930 + count = sp->members_count; 974 931 975 - ids = kcalloc(ids_count, sizeof(int), GFP_NOFS); 976 - if (!ids) { 932 + nodes = kcalloc(count, sizeof(struct dlm_config_node), GFP_NOFS); 933 + if (!nodes) { 977 934 rv = -ENOMEM; 978 935 goto out; 979 936 } 980 937 938 + node = nodes; 981 939 list_for_each_entry(nd, &sp->members, list) { 982 - ids[i++] = nd->nodeid; 983 - if (nd->new) 984 - new_count++; 940 + node->nodeid = nd->nodeid; 941 + node->weight = nd->weight; 942 + node->new = nd->new; 943 + node->comm_seq = nd->comm_seq; 944 + node++; 945 + 946 + nd->new = 0; 985 947 } 986 948 987 - if (ids_count != i) 988 - printk(KERN_ERR "dlm: bad nodeid count %d %d\n", ids_count, i); 989 - 990 - if (!new_count) 991 - goto out_ids; 992 - 993 - new = kcalloc(new_count, sizeof(int), GFP_NOFS); 994 - if (!new) { 995 - kfree(ids); 996 - rv = -ENOMEM; 997 - goto out; 998 - } 999 - 1000 - i = 0; 1001 - list_for_each_entry(nd, &sp->members, list) { 1002 - if (nd->new) { 1003 - new[i++] = nd->nodeid; 1004 - nd->new = 0; 1005 - } 1006 - } 1007 - *new_count_out = new_count; 1008 - *new_out = new; 1009 - 1010 - out_ids: 1011 - *ids_count_out = ids_count; 1012 - *ids_out = ids; 949 + *count_out = count; 950 + *nodes_out = nodes; 951 + rv = 0; 1013 952 out: 1014 953 mutex_unlock(&sp->members_lock); 1015 954 put_space(sp); 1016 955 return rv; 1017 956 } 1018 957 1019 - int dlm_node_weight(char *lsname, int nodeid) 958 + int dlm_comm_seq(int nodeid, uint32_t *seq) 1020 959 { 1021 - struct dlm_space *sp; 1022 - struct dlm_node *nd; 1023 - int w = -EEXIST; 1024 - 1025 - sp = get_space(lsname); 1026 - if (!sp) 1027 - goto out; 1028 - 1029 - mutex_lock(&sp->members_lock); 1030 - list_for_each_entry(nd, &sp->members, list) { 1031 - if (nd->nodeid != nodeid) 1032 - continue; 1033 - w = nd->weight; 1034 - break; 1035 - } 1036 - mutex_unlock(&sp->members_lock); 1037 - put_space(sp); 1038 - out: 1039 - return w; 960 + struct dlm_comm *cm = get_comm(nodeid, NULL); 961 + if (!cm) 962 + return -EEXIST; 963 + *seq = cm->seq; 964 + put_comm(cm); 965 + return 0; 1040 966 } 1041 967 1042 968 int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) ··· 1059 1047 #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ 1060 1048 #define DEFAULT_WAITWARN_US 0 1061 1049 #define DEFAULT_NEW_RSB_COUNT 128 1050 + #define DEFAULT_RECOVER_CALLBACKS 0 1051 + #define DEFAULT_CLUSTER_NAME "" 1062 1052 1063 1053 struct dlm_config_info dlm_config = { 1064 1054 .ci_tcp_port = DEFAULT_TCP_PORT, ··· 1074 1060 .ci_protocol = DEFAULT_PROTOCOL, 1075 1061 .ci_timewarn_cs = DEFAULT_TIMEWARN_CS, 1076 1062 .ci_waitwarn_us = DEFAULT_WAITWARN_US, 1077 - .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT 1063 + .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT, 1064 + .ci_recover_callbacks = DEFAULT_RECOVER_CALLBACKS, 1065 + .ci_cluster_name = DEFAULT_CLUSTER_NAME 1078 1066 }; 1079 1067
+13 -4
fs/dlm/config.h
··· 2 2 ******************************************************************************* 3 3 ** 4 4 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 5 - ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 5 + ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. 6 6 ** 7 7 ** This copyrighted material is made available to anyone wishing to use, 8 8 ** modify, copy, or redistribute it subject to the terms and conditions ··· 13 13 14 14 #ifndef __CONFIG_DOT_H__ 15 15 #define __CONFIG_DOT_H__ 16 + 17 + struct dlm_config_node { 18 + int nodeid; 19 + int weight; 20 + int new; 21 + uint32_t comm_seq; 22 + }; 16 23 17 24 #define DLM_MAX_ADDR_COUNT 3 18 25 ··· 36 29 int ci_timewarn_cs; 37 30 int ci_waitwarn_us; 38 31 int ci_new_rsb_count; 32 + int ci_recover_callbacks; 33 + char ci_cluster_name[DLM_LOCKSPACE_LEN]; 39 34 }; 40 35 41 36 extern struct dlm_config_info dlm_config; 42 37 43 38 int dlm_config_init(void); 44 39 void dlm_config_exit(void); 45 - int dlm_node_weight(char *lsname, int nodeid); 46 - int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, 47 - int **new_out, int *new_count_out); 40 + int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, 41 + int *count_out); 42 + int dlm_comm_seq(int nodeid, uint32_t *seq); 48 43 int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); 49 44 int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); 50 45 int dlm_our_nodeid(void);
+15 -13
fs/dlm/debug_fs.c
··· 393 393 394 394 static void *table_seq_start(struct seq_file *seq, loff_t *pos) 395 395 { 396 + struct rb_node *node; 396 397 struct dlm_ls *ls = seq->private; 397 398 struct rsbtbl_iter *ri; 398 399 struct dlm_rsb *r; ··· 419 418 ri->format = 3; 420 419 421 420 spin_lock(&ls->ls_rsbtbl[bucket].lock); 422 - if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { 423 - list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, 424 - res_hashchain) { 421 + if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { 422 + for (node = rb_first(&ls->ls_rsbtbl[bucket].keep); node; 423 + node = rb_next(node)) { 424 + r = rb_entry(node, struct dlm_rsb, res_hashnode); 425 425 if (!entry--) { 426 426 dlm_hold_rsb(r); 427 427 ri->rsb = r; ··· 451 449 } 452 450 453 451 spin_lock(&ls->ls_rsbtbl[bucket].lock); 454 - if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { 455 - r = list_first_entry(&ls->ls_rsbtbl[bucket].list, 456 - struct dlm_rsb, res_hashchain); 452 + if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { 453 + node = rb_first(&ls->ls_rsbtbl[bucket].keep); 454 + r = rb_entry(node, struct dlm_rsb, res_hashnode); 457 455 dlm_hold_rsb(r); 458 456 ri->rsb = r; 459 457 ri->bucket = bucket; ··· 469 467 { 470 468 struct dlm_ls *ls = seq->private; 471 469 struct rsbtbl_iter *ri = iter_ptr; 472 - struct list_head *next; 470 + struct rb_node *next; 473 471 struct dlm_rsb *r, *rp; 474 472 loff_t n = *pos; 475 473 unsigned bucket; ··· 482 480 483 481 spin_lock(&ls->ls_rsbtbl[bucket].lock); 484 482 rp = ri->rsb; 485 - next = rp->res_hashchain.next; 483 + next = rb_next(&rp->res_hashnode); 486 484 487 - if (next != &ls->ls_rsbtbl[bucket].list) { 488 - r = list_entry(next, struct dlm_rsb, res_hashchain); 485 + if (next) { 486 + r = rb_entry(next, struct dlm_rsb, res_hashnode); 489 487 dlm_hold_rsb(r); 490 488 ri->rsb = r; 491 489 spin_unlock(&ls->ls_rsbtbl[bucket].lock); ··· 513 511 } 514 512 515 513 spin_lock(&ls->ls_rsbtbl[bucket].lock); 516 - if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { 517 - r = list_first_entry(&ls->ls_rsbtbl[bucket].list, 518 - struct dlm_rsb, res_hashchain); 514 + if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { 515 + next = rb_first(&ls->ls_rsbtbl[bucket].keep); 516 + r = rb_entry(next, struct dlm_rsb, res_hashnode); 519 517 dlm_hold_rsb(r); 520 518 ri->rsb = r; 521 519 ri->bucket = bucket;
-1
fs/dlm/dir.c
··· 290 290 291 291 out_status: 292 292 error = 0; 293 - dlm_set_recover_status(ls, DLM_RS_DIR); 294 293 log_debug(ls, "dlm_recover_directory %d entries", count); 295 294 out_free: 296 295 kfree(last_name);
+50 -10
fs/dlm/dlm_internal.h
··· 2 2 ******************************************************************************* 3 3 ** 4 4 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 5 - ** Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. 5 + ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. 6 6 ** 7 7 ** This copyrighted material is made available to anyone wishing to use, 8 8 ** modify, copy, or redistribute it subject to the terms and conditions ··· 103 103 }; 104 104 105 105 struct dlm_rsbtable { 106 - struct list_head list; 107 - struct list_head toss; 106 + struct rb_root keep; 107 + struct rb_root toss; 108 108 spinlock_t lock; 109 109 }; 110 110 ··· 117 117 struct list_head list; 118 118 int nodeid; 119 119 int weight; 120 + int slot; 121 + int slot_prev; 122 + int comm_seq; 123 + uint32_t generation; 120 124 }; 121 125 122 126 /* ··· 129 125 130 126 struct dlm_recover { 131 127 struct list_head list; 132 - int *nodeids; /* nodeids of all members */ 133 - int node_count; 134 - int *new; /* nodeids of new members */ 135 - int new_count; 128 + struct dlm_config_node *nodes; 129 + int nodes_count; 136 130 uint64_t seq; 137 131 }; 138 132 ··· 287 285 unsigned long res_toss_time; 288 286 uint32_t res_first_lkid; 289 287 struct list_head res_lookup; /* lkbs waiting on first */ 290 - struct list_head res_hashchain; /* rsbtbl */ 288 + union { 289 + struct list_head res_hashchain; 290 + struct rb_node res_hashnode; /* rsbtbl */ 291 + }; 291 292 struct list_head res_grantqueue; 292 293 struct list_head res_convertqueue; 293 294 struct list_head res_waitqueue; ··· 339 334 /* dlm_header is first element of all structs sent between nodes */ 340 335 341 336 #define DLM_HEADER_MAJOR 0x00030000 342 - #define DLM_HEADER_MINOR 0x00000000 337 + #define DLM_HEADER_MINOR 0x00000001 338 + 339 + #define DLM_HEADER_SLOTS 0x00000001 343 340 344 341 #define DLM_MSG 1 345 342 #define DLM_RCOM 2 ··· 429 422 struct dlm_rcom rcom; 430 423 }; 431 424 425 + #define DLM_RSF_NEED_SLOTS 0x00000001 426 + 427 + /* RCOM_STATUS data */ 428 + struct rcom_status { 429 + __le32 rs_flags; 430 + __le32 rs_unused1; 431 + __le64 rs_unused2; 432 + }; 433 + 434 + /* RCOM_STATUS_REPLY data */ 432 435 struct rcom_config { 433 436 __le32 rf_lvblen; 434 437 __le32 rf_lsflags; 435 - __le64 rf_unused; 438 + 439 + /* DLM_HEADER_SLOTS adds: */ 440 + __le32 rf_flags; 441 + __le16 rf_our_slot; 442 + __le16 rf_num_slots; 443 + __le32 rf_generation; 444 + __le32 rf_unused1; 445 + __le64 rf_unused2; 446 + }; 447 + 448 + struct rcom_slot { 449 + __le32 ro_nodeid; 450 + __le16 ro_slot; 451 + __le16 ro_unused1; 452 + __le64 ro_unused2; 436 453 }; 437 454 438 455 struct rcom_lock { ··· 483 452 struct list_head ls_list; /* list of lockspaces */ 484 453 dlm_lockspace_t *ls_local_handle; 485 454 uint32_t ls_global_id; /* global unique lockspace ID */ 455 + uint32_t ls_generation; 486 456 uint32_t ls_exflags; 487 457 int ls_lvblen; 488 458 int ls_count; /* refcount of processes in ··· 521 489 int ls_low_nodeid; 522 490 int ls_total_weight; 523 491 int *ls_node_array; 492 + 493 + int ls_slot; 494 + int ls_num_slots; 495 + int ls_slots_size; 496 + struct dlm_slot *ls_slots; 524 497 525 498 struct dlm_rsb ls_stub_rsb; /* for returning errors */ 526 499 struct dlm_lkb ls_stub_lkb; /* for returning errors */ ··· 573 536 574 537 struct list_head ls_root_list; /* root resources */ 575 538 struct rw_semaphore ls_root_sem; /* protect root_list */ 539 + 540 + const struct dlm_lockspace_ops *ls_ops; 541 + void *ls_ops_arg; 576 542 577 543 int ls_namelen; 578 544 char ls_name[1];
+69 -16
fs/dlm/lock.c
··· 56 56 L: receive_xxxx_reply() <- R: send_xxxx_reply() 57 57 */ 58 58 #include <linux/types.h> 59 + #include <linux/rbtree.h> 59 60 #include <linux/slab.h> 60 61 #include "dlm_internal.h" 61 62 #include <linux/dlm_device.h> ··· 381 380 382 381 r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain); 383 382 list_del(&r->res_hashchain); 383 + /* Convert the empty list_head to a NULL rb_node for tree usage: */ 384 + memset(&r->res_hashnode, 0, sizeof(struct rb_node)); 384 385 ls->ls_new_rsb_count--; 385 386 spin_unlock(&ls->ls_new_rsb_spin); 386 387 ··· 391 388 memcpy(r->res_name, name, len); 392 389 mutex_init(&r->res_mutex); 393 390 394 - INIT_LIST_HEAD(&r->res_hashchain); 395 391 INIT_LIST_HEAD(&r->res_lookup); 396 392 INIT_LIST_HEAD(&r->res_grantqueue); 397 393 INIT_LIST_HEAD(&r->res_convertqueue); ··· 402 400 return 0; 403 401 } 404 402 405 - static int search_rsb_list(struct list_head *head, char *name, int len, 403 + static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen) 404 + { 405 + char maxname[DLM_RESNAME_MAXLEN]; 406 + 407 + memset(maxname, 0, DLM_RESNAME_MAXLEN); 408 + memcpy(maxname, name, nlen); 409 + return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN); 410 + } 411 + 412 + static int search_rsb_tree(struct rb_root *tree, char *name, int len, 406 413 unsigned int flags, struct dlm_rsb **r_ret) 407 414 { 415 + struct rb_node *node = tree->rb_node; 408 416 struct dlm_rsb *r; 409 417 int error = 0; 418 + int rc; 410 419 411 - list_for_each_entry(r, head, res_hashchain) { 412 - if (len == r->res_length && !memcmp(name, r->res_name, len)) 420 + while (node) { 421 + r = rb_entry(node, struct dlm_rsb, res_hashnode); 422 + rc = rsb_cmp(r, name, len); 423 + if (rc < 0) 424 + node = node->rb_left; 425 + else if (rc > 0) 426 + node = node->rb_right; 427 + else 413 428 goto found; 414 429 } 415 430 *r_ret = NULL; ··· 439 420 return error; 440 421 } 441 422 423 + static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree) 424 + { 425 + struct rb_node **newn = &tree->rb_node; 426 + struct rb_node *parent = NULL; 427 + int rc; 428 + 429 + while (*newn) { 430 + struct dlm_rsb *cur = rb_entry(*newn, struct dlm_rsb, 431 + res_hashnode); 432 + 433 + parent = *newn; 434 + rc = rsb_cmp(cur, rsb->res_name, rsb->res_length); 435 + if (rc < 0) 436 + newn = &parent->rb_left; 437 + else if (rc > 0) 438 + newn = &parent->rb_right; 439 + else { 440 + log_print("rsb_insert match"); 441 + dlm_dump_rsb(rsb); 442 + dlm_dump_rsb(cur); 443 + return -EEXIST; 444 + } 445 + } 446 + 447 + rb_link_node(&rsb->res_hashnode, parent, newn); 448 + rb_insert_color(&rsb->res_hashnode, tree); 449 + return 0; 450 + } 451 + 442 452 static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, 443 453 unsigned int flags, struct dlm_rsb **r_ret) 444 454 { 445 455 struct dlm_rsb *r; 446 456 int error; 447 457 448 - error = search_rsb_list(&ls->ls_rsbtbl[b].list, name, len, flags, &r); 458 + error = search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r); 449 459 if (!error) { 450 460 kref_get(&r->res_ref); 451 461 goto out; 452 462 } 453 - error = search_rsb_list(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); 463 + error = search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); 454 464 if (error) 455 465 goto out; 456 466 457 - list_move(&r->res_hashchain, &ls->ls_rsbtbl[b].list); 467 + rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); 468 + error = rsb_insert(r, &ls->ls_rsbtbl[b].keep); 469 + if (error) 470 + return error; 458 471 459 472 if (dlm_no_directory(ls)) 460 473 goto out; ··· 578 527 nodeid = 0; 579 528 r->res_nodeid = nodeid; 580 529 } 581 - list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list); 582 - error = 0; 530 + error = rsb_insert(r, &ls->ls_rsbtbl[bucket].keep); 583 531 out_unlock: 584 532 spin_unlock(&ls->ls_rsbtbl[bucket].lock); 585 533 out: ··· 606 556 607 557 DLM_ASSERT(list_empty(&r->res_root_list), dlm_print_rsb(r);); 608 558 kref_init(&r->res_ref); 609 - list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss); 559 + rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[r->res_bucket].keep); 560 + rsb_insert(r, &ls->ls_rsbtbl[r->res_bucket].toss); 610 561 r->res_toss_time = jiffies; 611 562 if (r->res_lvbptr) { 612 563 dlm_free_lvb(r->res_lvbptr); ··· 1133 1082 r->res_name, r->res_length); 1134 1083 } 1135 1084 1136 - /* FIXME: shouldn't this be able to exit as soon as one non-due rsb is 1137 - found since they are in order of newest to oldest? */ 1085 + /* FIXME: make this more efficient */ 1138 1086 1139 1087 static int shrink_bucket(struct dlm_ls *ls, int b) 1140 1088 { 1089 + struct rb_node *n; 1141 1090 struct dlm_rsb *r; 1142 1091 int count = 0, found; 1143 1092 1144 1093 for (;;) { 1145 1094 found = 0; 1146 1095 spin_lock(&ls->ls_rsbtbl[b].lock); 1147 - list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, 1148 - res_hashchain) { 1096 + for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = rb_next(n)) { 1097 + r = rb_entry(n, struct dlm_rsb, res_hashnode); 1149 1098 if (!time_after_eq(jiffies, r->res_toss_time + 1150 1099 dlm_config.ci_toss_secs * HZ)) 1151 1100 continue; ··· 1159 1108 } 1160 1109 1161 1110 if (kref_put(&r->res_ref, kill_rsb)) { 1162 - list_del(&r->res_hashchain); 1111 + rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); 1163 1112 spin_unlock(&ls->ls_rsbtbl[b].lock); 1164 1113 1165 1114 if (is_master(r)) ··· 4492 4441 4493 4442 static struct dlm_rsb *find_purged_rsb(struct dlm_ls *ls, int bucket) 4494 4443 { 4444 + struct rb_node *n; 4495 4445 struct dlm_rsb *r, *r_ret = NULL; 4496 4446 4497 4447 spin_lock(&ls->ls_rsbtbl[bucket].lock); 4498 - list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, res_hashchain) { 4448 + for (n = rb_first(&ls->ls_rsbtbl[bucket].keep); n; n = rb_next(n)) { 4449 + r = rb_entry(n, struct dlm_rsb, res_hashnode); 4499 4450 if (!rsb_flag(r, RSB_LOCKS_PURGED)) 4500 4451 continue; 4501 4452 hold_rsb(r);
+49 -22
fs/dlm/lockspace.c
··· 2 2 ******************************************************************************* 3 3 ** 4 4 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 5 - ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 5 + ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. 6 6 ** 7 7 ** This copyrighted material is made available to anyone wishing to use, 8 8 ** modify, copy, or redistribute it subject to the terms and conditions ··· 386 386 dlm_lowcomms_stop(); 387 387 } 388 388 389 - static int new_lockspace(const char *name, int namelen, void **lockspace, 390 - uint32_t flags, int lvblen) 389 + static int new_lockspace(const char *name, const char *cluster, 390 + uint32_t flags, int lvblen, 391 + const struct dlm_lockspace_ops *ops, void *ops_arg, 392 + int *ops_result, dlm_lockspace_t **lockspace) 391 393 { 392 394 struct dlm_ls *ls; 393 395 int i, size, error; 394 396 int do_unreg = 0; 397 + int namelen = strlen(name); 395 398 396 399 if (namelen > DLM_LOCKSPACE_LEN) 397 400 return -EINVAL; ··· 406 403 return -EINVAL; 407 404 408 405 if (!dlm_user_daemon_available()) { 409 - module_put(THIS_MODULE); 410 - return -EUNATCH; 406 + log_print("dlm user daemon not available"); 407 + error = -EUNATCH; 408 + goto out; 409 + } 410 + 411 + if (ops && ops_result) { 412 + if (!dlm_config.ci_recover_callbacks) 413 + *ops_result = -EOPNOTSUPP; 414 + else 415 + *ops_result = 0; 416 + } 417 + 418 + if (dlm_config.ci_recover_callbacks && cluster && 419 + strncmp(cluster, dlm_config.ci_cluster_name, DLM_LOCKSPACE_LEN)) { 420 + log_print("dlm cluster name %s mismatch %s", 421 + dlm_config.ci_cluster_name, cluster); 422 + error = -EBADR; 423 + goto out; 411 424 } 412 425 413 426 error = 0; ··· 461 442 ls->ls_flags = 0; 462 443 ls->ls_scan_time = jiffies; 463 444 445 + if (ops && dlm_config.ci_recover_callbacks) { 446 + ls->ls_ops = ops; 447 + ls->ls_ops_arg = ops_arg; 448 + } 449 + 464 450 if (flags & DLM_LSFL_TIMEWARN) 465 451 set_bit(LSFL_TIMEWARN, &ls->ls_flags); 466 452 ··· 481 457 if (!ls->ls_rsbtbl) 482 458 goto out_lsfree; 483 459 for (i = 0; i < size; i++) { 484 - INIT_LIST_HEAD(&ls->ls_rsbtbl[i].list); 485 - INIT_LIST_HEAD(&ls->ls_rsbtbl[i].toss); 460 + ls->ls_rsbtbl[i].keep.rb_node = NULL; 461 + ls->ls_rsbtbl[i].toss.rb_node = NULL; 486 462 spin_lock_init(&ls->ls_rsbtbl[i].lock); 487 463 } 488 464 ··· 548 524 ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS); 549 525 if (!ls->ls_recover_buf) 550 526 goto out_dirfree; 527 + 528 + ls->ls_slot = 0; 529 + ls->ls_num_slots = 0; 530 + ls->ls_slots_size = 0; 531 + ls->ls_slots = NULL; 551 532 552 533 INIT_LIST_HEAD(&ls->ls_recover_list); 553 534 spin_lock_init(&ls->ls_recover_list_lock); ··· 643 614 return error; 644 615 } 645 616 646 - int dlm_new_lockspace(const char *name, int namelen, void **lockspace, 647 - uint32_t flags, int lvblen) 617 + int dlm_new_lockspace(const char *name, const char *cluster, 618 + uint32_t flags, int lvblen, 619 + const struct dlm_lockspace_ops *ops, void *ops_arg, 620 + int *ops_result, dlm_lockspace_t **lockspace) 648 621 { 649 622 int error = 0; 650 623 ··· 656 625 if (error) 657 626 goto out; 658 627 659 - error = new_lockspace(name, namelen, lockspace, flags, lvblen); 628 + error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg, 629 + ops_result, lockspace); 660 630 if (!error) 661 631 ls_count++; 662 632 if (error > 0) ··· 717 685 static int release_lockspace(struct dlm_ls *ls, int force) 718 686 { 719 687 struct dlm_rsb *rsb; 720 - struct list_head *head; 688 + struct rb_node *n; 721 689 int i, busy, rv; 722 690 723 691 busy = lockspace_busy(ls, force); ··· 778 746 */ 779 747 780 748 for (i = 0; i < ls->ls_rsbtbl_size; i++) { 781 - head = &ls->ls_rsbtbl[i].list; 782 - while (!list_empty(head)) { 783 - rsb = list_entry(head->next, struct dlm_rsb, 784 - res_hashchain); 785 - 786 - list_del(&rsb->res_hashchain); 749 + while ((n = rb_first(&ls->ls_rsbtbl[i].keep))) { 750 + rsb = rb_entry(n, struct dlm_rsb, res_hashnode); 751 + rb_erase(n, &ls->ls_rsbtbl[i].keep); 787 752 dlm_free_rsb(rsb); 788 753 } 789 754 790 - head = &ls->ls_rsbtbl[i].toss; 791 - while (!list_empty(head)) { 792 - rsb = list_entry(head->next, struct dlm_rsb, 793 - res_hashchain); 794 - list_del(&rsb->res_hashchain); 755 + while ((n = rb_first(&ls->ls_rsbtbl[i].toss))) { 756 + rsb = rb_entry(n, struct dlm_rsb, res_hashnode); 757 + rb_erase(n, &ls->ls_rsbtbl[i].toss); 795 758 dlm_free_rsb(rsb); 796 759 } 797 760 }
+410 -80
fs/dlm/member.c
··· 1 1 /****************************************************************************** 2 2 ******************************************************************************* 3 3 ** 4 - ** Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved. 4 + ** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved. 5 5 ** 6 6 ** This copyrighted material is made available to anyone wishing to use, 7 7 ** modify, copy, or redistribute it subject to the terms and conditions ··· 18 18 #include "rcom.h" 19 19 #include "config.h" 20 20 #include "lowcomms.h" 21 + 22 + int dlm_slots_version(struct dlm_header *h) 23 + { 24 + if ((h->h_version & 0x0000FFFF) < DLM_HEADER_SLOTS) 25 + return 0; 26 + return 1; 27 + } 28 + 29 + void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc, 30 + struct dlm_member *memb) 31 + { 32 + struct rcom_config *rf = (struct rcom_config *)rc->rc_buf; 33 + 34 + if (!dlm_slots_version(&rc->rc_header)) 35 + return; 36 + 37 + memb->slot = le16_to_cpu(rf->rf_our_slot); 38 + memb->generation = le32_to_cpu(rf->rf_generation); 39 + } 40 + 41 + void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc) 42 + { 43 + struct dlm_slot *slot; 44 + struct rcom_slot *ro; 45 + int i; 46 + 47 + ro = (struct rcom_slot *)(rc->rc_buf + sizeof(struct rcom_config)); 48 + 49 + /* ls_slots array is sparse, but not rcom_slots */ 50 + 51 + for (i = 0; i < ls->ls_slots_size; i++) { 52 + slot = &ls->ls_slots[i]; 53 + if (!slot->nodeid) 54 + continue; 55 + ro->ro_nodeid = cpu_to_le32(slot->nodeid); 56 + ro->ro_slot = cpu_to_le16(slot->slot); 57 + ro++; 58 + } 59 + } 60 + 61 + #define SLOT_DEBUG_LINE 128 62 + 63 + static void log_debug_slots(struct dlm_ls *ls, uint32_t gen, int num_slots, 64 + struct rcom_slot *ro0, struct dlm_slot *array, 65 + int array_size) 66 + { 67 + char line[SLOT_DEBUG_LINE]; 68 + int len = SLOT_DEBUG_LINE - 1; 69 + int pos = 0; 70 + int ret, i; 71 + 72 + if (!dlm_config.ci_log_debug) 73 + return; 74 + 75 + memset(line, 0, sizeof(line)); 76 + 77 + if (array) { 78 + for (i = 0; i < array_size; i++) { 79 + if (!array[i].nodeid) 80 + continue; 81 + 82 + ret = snprintf(line + pos, len - pos, " %d:%d", 83 + array[i].slot, array[i].nodeid); 84 + if (ret >= len - pos) 85 + break; 86 + pos += ret; 87 + } 88 + } else if (ro0) { 89 + for (i = 0; i < num_slots; i++) { 90 + ret = snprintf(line + pos, len - pos, " %d:%d", 91 + ro0[i].ro_slot, ro0[i].ro_nodeid); 92 + if (ret >= len - pos) 93 + break; 94 + pos += ret; 95 + } 96 + } 97 + 98 + log_debug(ls, "generation %u slots %d%s", gen, num_slots, line); 99 + } 100 + 101 + int dlm_slots_copy_in(struct dlm_ls *ls) 102 + { 103 + struct dlm_member *memb; 104 + struct dlm_rcom *rc = ls->ls_recover_buf; 105 + struct rcom_config *rf = (struct rcom_config *)rc->rc_buf; 106 + struct rcom_slot *ro0, *ro; 107 + int our_nodeid = dlm_our_nodeid(); 108 + int i, num_slots; 109 + uint32_t gen; 110 + 111 + if (!dlm_slots_version(&rc->rc_header)) 112 + return -1; 113 + 114 + gen = le32_to_cpu(rf->rf_generation); 115 + if (gen <= ls->ls_generation) { 116 + log_error(ls, "dlm_slots_copy_in gen %u old %u", 117 + gen, ls->ls_generation); 118 + } 119 + ls->ls_generation = gen; 120 + 121 + num_slots = le16_to_cpu(rf->rf_num_slots); 122 + if (!num_slots) 123 + return -1; 124 + 125 + ro0 = (struct rcom_slot *)(rc->rc_buf + sizeof(struct rcom_config)); 126 + 127 + for (i = 0, ro = ro0; i < num_slots; i++, ro++) { 128 + ro->ro_nodeid = le32_to_cpu(ro->ro_nodeid); 129 + ro->ro_slot = le16_to_cpu(ro->ro_slot); 130 + } 131 + 132 + log_debug_slots(ls, gen, num_slots, ro0, NULL, 0); 133 + 134 + list_for_each_entry(memb, &ls->ls_nodes, list) { 135 + for (i = 0, ro = ro0; i < num_slots; i++, ro++) { 136 + if (ro->ro_nodeid != memb->nodeid) 137 + continue; 138 + memb->slot = ro->ro_slot; 139 + memb->slot_prev = memb->slot; 140 + break; 141 + } 142 + 143 + if (memb->nodeid == our_nodeid) { 144 + if (ls->ls_slot && ls->ls_slot != memb->slot) { 145 + log_error(ls, "dlm_slots_copy_in our slot " 146 + "changed %d %d", ls->ls_slot, 147 + memb->slot); 148 + return -1; 149 + } 150 + 151 + if (!ls->ls_slot) 152 + ls->ls_slot = memb->slot; 153 + } 154 + 155 + if (!memb->slot) { 156 + log_error(ls, "dlm_slots_copy_in nodeid %d no slot", 157 + memb->nodeid); 158 + return -1; 159 + } 160 + } 161 + 162 + return 0; 163 + } 164 + 165 + /* for any nodes that do not support slots, we will not have set memb->slot 166 + in wait_status_all(), so memb->slot will remain -1, and we will not 167 + assign slots or set ls_num_slots here */ 168 + 169 + int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size, 170 + struct dlm_slot **slots_out, uint32_t *gen_out) 171 + { 172 + struct dlm_member *memb; 173 + struct dlm_slot *array; 174 + int our_nodeid = dlm_our_nodeid(); 175 + int array_size, max_slots, i; 176 + int need = 0; 177 + int max = 0; 178 + int num = 0; 179 + uint32_t gen = 0; 180 + 181 + /* our own memb struct will have slot -1 gen 0 */ 182 + 183 + list_for_each_entry(memb, &ls->ls_nodes, list) { 184 + if (memb->nodeid == our_nodeid) { 185 + memb->slot = ls->ls_slot; 186 + memb->generation = ls->ls_generation; 187 + break; 188 + } 189 + } 190 + 191 + list_for_each_entry(memb, &ls->ls_nodes, list) { 192 + if (memb->generation > gen) 193 + gen = memb->generation; 194 + 195 + /* node doesn't support slots */ 196 + 197 + if (memb->slot == -1) 198 + return -1; 199 + 200 + /* node needs a slot assigned */ 201 + 202 + if (!memb->slot) 203 + need++; 204 + 205 + /* node has a slot assigned */ 206 + 207 + num++; 208 + 209 + if (!max || max < memb->slot) 210 + max = memb->slot; 211 + 212 + /* sanity check, once slot is assigned it shouldn't change */ 213 + 214 + if (memb->slot_prev && memb->slot && memb->slot_prev != memb->slot) { 215 + log_error(ls, "nodeid %d slot changed %d %d", 216 + memb->nodeid, memb->slot_prev, memb->slot); 217 + return -1; 218 + } 219 + memb->slot_prev = memb->slot; 220 + } 221 + 222 + array_size = max + need; 223 + 224 + array = kzalloc(array_size * sizeof(struct dlm_slot), GFP_NOFS); 225 + if (!array) 226 + return -ENOMEM; 227 + 228 + num = 0; 229 + 230 + /* fill in slots (offsets) that are used */ 231 + 232 + list_for_each_entry(memb, &ls->ls_nodes, list) { 233 + if (!memb->slot) 234 + continue; 235 + 236 + if (memb->slot > array_size) { 237 + log_error(ls, "invalid slot number %d", memb->slot); 238 + kfree(array); 239 + return -1; 240 + } 241 + 242 + array[memb->slot - 1].nodeid = memb->nodeid; 243 + array[memb->slot - 1].slot = memb->slot; 244 + num++; 245 + } 246 + 247 + /* assign new slots from unused offsets */ 248 + 249 + list_for_each_entry(memb, &ls->ls_nodes, list) { 250 + if (memb->slot) 251 + continue; 252 + 253 + for (i = 0; i < array_size; i++) { 254 + if (array[i].nodeid) 255 + continue; 256 + 257 + memb->slot = i + 1; 258 + memb->slot_prev = memb->slot; 259 + array[i].nodeid = memb->nodeid; 260 + array[i].slot = memb->slot; 261 + num++; 262 + 263 + if (!ls->ls_slot && memb->nodeid == our_nodeid) 264 + ls->ls_slot = memb->slot; 265 + break; 266 + } 267 + 268 + if (!memb->slot) { 269 + log_error(ls, "no free slot found"); 270 + kfree(array); 271 + return -1; 272 + } 273 + } 274 + 275 + gen++; 276 + 277 + log_debug_slots(ls, gen, num, NULL, array, array_size); 278 + 279 + max_slots = (dlm_config.ci_buffer_size - sizeof(struct dlm_rcom) - 280 + sizeof(struct rcom_config)) / sizeof(struct rcom_slot); 281 + 282 + if (num > max_slots) { 283 + log_error(ls, "num_slots %d exceeds max_slots %d", 284 + num, max_slots); 285 + kfree(array); 286 + return -1; 287 + } 288 + 289 + *gen_out = gen; 290 + *slots_out = array; 291 + *slots_size = array_size; 292 + *num_slots = num; 293 + return 0; 294 + } 21 295 22 296 static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) 23 297 { ··· 317 43 } 318 44 } 319 45 320 - static int dlm_add_member(struct dlm_ls *ls, int nodeid) 46 + static int dlm_add_member(struct dlm_ls *ls, struct dlm_config_node *node) 321 47 { 322 48 struct dlm_member *memb; 323 - int w, error; 49 + int error; 324 50 325 51 memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS); 326 52 if (!memb) 327 53 return -ENOMEM; 328 54 329 - w = dlm_node_weight(ls->ls_name, nodeid); 330 - if (w < 0) { 331 - kfree(memb); 332 - return w; 333 - } 334 - 335 - error = dlm_lowcomms_connect_node(nodeid); 55 + error = dlm_lowcomms_connect_node(node->nodeid); 336 56 if (error < 0) { 337 57 kfree(memb); 338 58 return error; 339 59 } 340 60 341 - memb->nodeid = nodeid; 342 - memb->weight = w; 61 + memb->nodeid = node->nodeid; 62 + memb->weight = node->weight; 63 + memb->comm_seq = node->comm_seq; 343 64 add_ordered_member(ls, memb); 344 65 ls->ls_num_nodes++; 345 66 return 0; 346 67 } 347 68 348 - static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb) 69 + static struct dlm_member *find_memb(struct list_head *head, int nodeid) 349 70 { 350 - list_move(&memb->list, &ls->ls_nodes_gone); 351 - ls->ls_num_nodes--; 71 + struct dlm_member *memb; 72 + 73 + list_for_each_entry(memb, head, list) { 74 + if (memb->nodeid == nodeid) 75 + return memb; 76 + } 77 + return NULL; 352 78 } 353 79 354 80 int dlm_is_member(struct dlm_ls *ls, int nodeid) 355 81 { 356 - struct dlm_member *memb; 357 - 358 - list_for_each_entry(memb, &ls->ls_nodes, list) { 359 - if (memb->nodeid == nodeid) 360 - return 1; 361 - } 82 + if (find_memb(&ls->ls_nodes, nodeid)) 83 + return 1; 362 84 return 0; 363 85 } 364 86 365 87 int dlm_is_removed(struct dlm_ls *ls, int nodeid) 366 88 { 367 - struct dlm_member *memb; 368 - 369 - list_for_each_entry(memb, &ls->ls_nodes_gone, list) { 370 - if (memb->nodeid == nodeid) 371 - return 1; 372 - } 89 + if (find_memb(&ls->ls_nodes_gone, nodeid)) 90 + return 1; 373 91 return 0; 374 92 } 375 93 ··· 442 176 error = dlm_recovery_stopped(ls); 443 177 if (error) 444 178 break; 445 - error = dlm_rcom_status(ls, memb->nodeid); 179 + error = dlm_rcom_status(ls, memb->nodeid, 0); 446 180 if (error) 447 181 break; 448 182 } ··· 452 186 return error; 453 187 } 454 188 189 + static void dlm_lsop_recover_prep(struct dlm_ls *ls) 190 + { 191 + if (!ls->ls_ops || !ls->ls_ops->recover_prep) 192 + return; 193 + ls->ls_ops->recover_prep(ls->ls_ops_arg); 194 + } 195 + 196 + static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb) 197 + { 198 + struct dlm_slot slot; 199 + uint32_t seq; 200 + int error; 201 + 202 + if (!ls->ls_ops || !ls->ls_ops->recover_slot) 203 + return; 204 + 205 + /* if there is no comms connection with this node 206 + or the present comms connection is newer 207 + than the one when this member was added, then 208 + we consider the node to have failed (versus 209 + being removed due to dlm_release_lockspace) */ 210 + 211 + error = dlm_comm_seq(memb->nodeid, &seq); 212 + 213 + if (!error && seq == memb->comm_seq) 214 + return; 215 + 216 + slot.nodeid = memb->nodeid; 217 + slot.slot = memb->slot; 218 + 219 + ls->ls_ops->recover_slot(ls->ls_ops_arg, &slot); 220 + } 221 + 222 + void dlm_lsop_recover_done(struct dlm_ls *ls) 223 + { 224 + struct dlm_member *memb; 225 + struct dlm_slot *slots; 226 + int i, num; 227 + 228 + if (!ls->ls_ops || !ls->ls_ops->recover_done) 229 + return; 230 + 231 + num = ls->ls_num_nodes; 232 + 233 + slots = kzalloc(num * sizeof(struct dlm_slot), GFP_KERNEL); 234 + if (!slots) 235 + return; 236 + 237 + i = 0; 238 + list_for_each_entry(memb, &ls->ls_nodes, list) { 239 + if (i == num) { 240 + log_error(ls, "dlm_lsop_recover_done bad num %d", num); 241 + goto out; 242 + } 243 + slots[i].nodeid = memb->nodeid; 244 + slots[i].slot = memb->slot; 245 + i++; 246 + } 247 + 248 + ls->ls_ops->recover_done(ls->ls_ops_arg, slots, num, 249 + ls->ls_slot, ls->ls_generation); 250 + out: 251 + kfree(slots); 252 + } 253 + 254 + static struct dlm_config_node *find_config_node(struct dlm_recover *rv, 255 + int nodeid) 256 + { 257 + int i; 258 + 259 + for (i = 0; i < rv->nodes_count; i++) { 260 + if (rv->nodes[i].nodeid == nodeid) 261 + return &rv->nodes[i]; 262 + } 263 + return NULL; 264 + } 265 + 455 266 int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) 456 267 { 457 268 struct dlm_member *memb, *safe; 458 - int i, error, found, pos = 0, neg = 0, low = -1; 269 + struct dlm_config_node *node; 270 + int i, error, neg = 0, low = -1; 459 271 460 272 /* previously removed members that we've not finished removing need to 461 273 count as a negative change so the "neg" recovery steps will happen */ ··· 546 202 /* move departed members from ls_nodes to ls_nodes_gone */ 547 203 548 204 list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) { 549 - found = 0; 550 - for (i = 0; i < rv->node_count; i++) { 551 - if (memb->nodeid == rv->nodeids[i]) { 552 - found = 1; 553 - break; 554 - } 555 - } 556 - 557 - if (!found) { 558 - neg++; 559 - dlm_remove_member(ls, memb); 560 - log_debug(ls, "remove member %d", memb->nodeid); 561 - } 562 - } 563 - 564 - /* Add an entry to ls_nodes_gone for members that were removed and 565 - then added again, so that previous state for these nodes will be 566 - cleared during recovery. */ 567 - 568 - for (i = 0; i < rv->new_count; i++) { 569 - if (!dlm_is_member(ls, rv->new[i])) 205 + node = find_config_node(rv, memb->nodeid); 206 + if (node && !node->new) 570 207 continue; 571 - log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]); 572 208 573 - memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS); 574 - if (!memb) 575 - return -ENOMEM; 576 - memb->nodeid = rv->new[i]; 577 - list_add_tail(&memb->list, &ls->ls_nodes_gone); 209 + if (!node) { 210 + log_debug(ls, "remove member %d", memb->nodeid); 211 + } else { 212 + /* removed and re-added */ 213 + log_debug(ls, "remove member %d comm_seq %u %u", 214 + memb->nodeid, memb->comm_seq, node->comm_seq); 215 + } 216 + 578 217 neg++; 218 + list_move(&memb->list, &ls->ls_nodes_gone); 219 + ls->ls_num_nodes--; 220 + dlm_lsop_recover_slot(ls, memb); 579 221 } 580 222 581 223 /* add new members to ls_nodes */ 582 224 583 - for (i = 0; i < rv->node_count; i++) { 584 - if (dlm_is_member(ls, rv->nodeids[i])) 225 + for (i = 0; i < rv->nodes_count; i++) { 226 + node = &rv->nodes[i]; 227 + if (dlm_is_member(ls, node->nodeid)) 585 228 continue; 586 - dlm_add_member(ls, rv->nodeids[i]); 587 - pos++; 588 - log_debug(ls, "add member %d", rv->nodeids[i]); 229 + dlm_add_member(ls, node); 230 + log_debug(ls, "add member %d", node->nodeid); 589 231 } 590 232 591 233 list_for_each_entry(memb, &ls->ls_nodes, list) { ··· 581 251 ls->ls_low_nodeid = low; 582 252 583 253 make_member_array(ls); 584 - dlm_set_recover_status(ls, DLM_RS_NODES); 585 254 *neg_out = neg; 586 255 587 256 error = ping_members(ls); ··· 590 261 ls->ls_members_result = error; 591 262 complete(&ls->ls_members_done); 592 263 } 593 - if (error) 594 - goto out; 595 264 596 - error = dlm_recover_members_wait(ls); 597 - out: 598 - log_debug(ls, "total members %d error %d", ls->ls_num_nodes, error); 265 + log_debug(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes); 599 266 return error; 600 267 } 601 268 ··· 652 327 */ 653 328 654 329 dlm_recoverd_suspend(ls); 330 + 331 + spin_lock(&ls->ls_recover_lock); 332 + kfree(ls->ls_slots); 333 + ls->ls_slots = NULL; 334 + ls->ls_num_slots = 0; 335 + ls->ls_slots_size = 0; 655 336 ls->ls_recover_status = 0; 337 + spin_unlock(&ls->ls_recover_lock); 338 + 656 339 dlm_recoverd_resume(ls); 657 340 658 341 if (!ls->ls_recover_begin) 659 342 ls->ls_recover_begin = jiffies; 343 + 344 + dlm_lsop_recover_prep(ls); 660 345 return 0; 661 346 } 662 347 663 348 int dlm_ls_start(struct dlm_ls *ls) 664 349 { 665 350 struct dlm_recover *rv = NULL, *rv_old; 666 - int *ids = NULL, *new = NULL; 667 - int error, ids_count = 0, new_count = 0; 351 + struct dlm_config_node *nodes; 352 + int error, count; 668 353 669 354 rv = kzalloc(sizeof(struct dlm_recover), GFP_NOFS); 670 355 if (!rv) 671 356 return -ENOMEM; 672 357 673 - error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count, 674 - &new, &new_count); 358 + error = dlm_config_nodes(ls->ls_name, &nodes, &count); 675 359 if (error < 0) 676 360 goto fail; 677 361 ··· 695 361 goto fail; 696 362 } 697 363 698 - rv->nodeids = ids; 699 - rv->node_count = ids_count; 700 - rv->new = new; 701 - rv->new_count = new_count; 364 + rv->nodes = nodes; 365 + rv->nodes_count = count; 702 366 rv->seq = ++ls->ls_recover_seq; 703 367 rv_old = ls->ls_recover_args; 704 368 ls->ls_recover_args = rv; ··· 704 372 705 373 if (rv_old) { 706 374 log_error(ls, "unused recovery %llx %d", 707 - (unsigned long long)rv_old->seq, rv_old->node_count); 708 - kfree(rv_old->nodeids); 709 - kfree(rv_old->new); 375 + (unsigned long long)rv_old->seq, rv_old->nodes_count); 376 + kfree(rv_old->nodes); 710 377 kfree(rv_old); 711 378 } 712 379 ··· 714 383 715 384 fail: 716 385 kfree(rv); 717 - kfree(ids); 718 - kfree(new); 386 + kfree(nodes); 719 387 return error; 720 388 } 721 389
+9 -1
fs/dlm/member.h
··· 1 1 /****************************************************************************** 2 2 ******************************************************************************* 3 3 ** 4 - ** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. 4 + ** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved. 5 5 ** 6 6 ** This copyrighted material is made available to anyone wishing to use, 7 7 ** modify, copy, or redistribute it subject to the terms and conditions ··· 20 20 int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv,int *neg_out); 21 21 int dlm_is_removed(struct dlm_ls *ls, int nodeid); 22 22 int dlm_is_member(struct dlm_ls *ls, int nodeid); 23 + int dlm_slots_version(struct dlm_header *h); 24 + void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc, 25 + struct dlm_member *memb); 26 + void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc); 27 + int dlm_slots_copy_in(struct dlm_ls *ls); 28 + int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size, 29 + struct dlm_slot **slots_out, uint32_t *gen_out); 30 + void dlm_lsop_recover_done(struct dlm_ls *ls); 23 31 24 32 #endif /* __MEMBER_DOT_H__ */ 25 33
+82 -17
fs/dlm/rcom.c
··· 23 23 #include "memory.h" 24 24 #include "lock.h" 25 25 #include "util.h" 26 + #include "member.h" 26 27 27 28 28 29 static int rcom_response(struct dlm_ls *ls) ··· 73 72 dlm_lowcomms_commit_buffer(mh); 74 73 } 75 74 75 + static void set_rcom_status(struct dlm_ls *ls, struct rcom_status *rs, 76 + uint32_t flags) 77 + { 78 + rs->rs_flags = cpu_to_le32(flags); 79 + } 80 + 76 81 /* When replying to a status request, a node also sends back its 77 82 configuration values. The requesting node then checks that the remote 78 83 node is configured the same way as itself. */ 79 84 80 - static void make_config(struct dlm_ls *ls, struct rcom_config *rf) 85 + static void set_rcom_config(struct dlm_ls *ls, struct rcom_config *rf, 86 + uint32_t num_slots) 81 87 { 82 88 rf->rf_lvblen = cpu_to_le32(ls->ls_lvblen); 83 89 rf->rf_lsflags = cpu_to_le32(ls->ls_exflags); 90 + 91 + rf->rf_our_slot = cpu_to_le16(ls->ls_slot); 92 + rf->rf_num_slots = cpu_to_le16(num_slots); 93 + rf->rf_generation = cpu_to_le32(ls->ls_generation); 84 94 } 85 95 86 - static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) 96 + static int check_rcom_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) 87 97 { 88 98 struct rcom_config *rf = (struct rcom_config *) rc->rc_buf; 89 - size_t conf_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_config); 90 99 91 100 if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) { 92 101 log_error(ls, "version mismatch: %x nodeid %d: %x", 93 102 DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid, 94 103 rc->rc_header.h_version); 95 - return -EPROTO; 96 - } 97 - 98 - if (rc->rc_header.h_length < conf_size) { 99 - log_error(ls, "config too short: %d nodeid %d", 100 - rc->rc_header.h_length, nodeid); 101 104 return -EPROTO; 102 105 } 103 106 ··· 132 127 spin_unlock(&ls->ls_rcom_spin); 133 128 } 134 129 135 - int dlm_rcom_status(struct dlm_ls *ls, int nodeid) 130 + /* 131 + * low nodeid gathers one slot value at a time from each node. 132 + * it sets need_slots=0, and saves rf_our_slot returned from each 133 + * rcom_config. 134 + * 135 + * other nodes gather all slot values at once from the low nodeid. 136 + * they set need_slots=1, and ignore the rf_our_slot returned from each 137 + * rcom_config. they use the rf_num_slots returned from the low 138 + * node's rcom_config. 139 + */ 140 + 141 + int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags) 136 142 { 137 143 struct dlm_rcom *rc; 138 144 struct dlm_mhandle *mh; ··· 157 141 goto out; 158 142 } 159 143 160 - error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, 0, &rc, &mh); 144 + error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, 145 + sizeof(struct rcom_status), &rc, &mh); 161 146 if (error) 162 147 goto out; 148 + 149 + set_rcom_status(ls, (struct rcom_status *)rc->rc_buf, status_flags); 163 150 164 151 allow_sync_reply(ls, &rc->rc_id); 165 152 memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size); ··· 180 161 /* we pretend the remote lockspace exists with 0 status */ 181 162 log_debug(ls, "remote node %d not ready", nodeid); 182 163 rc->rc_result = 0; 183 - } else 184 - error = check_config(ls, rc, nodeid); 164 + error = 0; 165 + } else { 166 + error = check_rcom_config(ls, rc, nodeid); 167 + } 168 + 185 169 /* the caller looks at rc_result for the remote recovery status */ 186 170 out: 187 171 return error; ··· 194 172 { 195 173 struct dlm_rcom *rc; 196 174 struct dlm_mhandle *mh; 197 - int error, nodeid = rc_in->rc_header.h_nodeid; 175 + struct rcom_status *rs; 176 + uint32_t status; 177 + int nodeid = rc_in->rc_header.h_nodeid; 178 + int len = sizeof(struct rcom_config); 179 + int num_slots = 0; 180 + int error; 198 181 182 + if (!dlm_slots_version(&rc_in->rc_header)) { 183 + status = dlm_recover_status(ls); 184 + goto do_create; 185 + } 186 + 187 + rs = (struct rcom_status *)rc_in->rc_buf; 188 + 189 + if (!(rs->rs_flags & DLM_RSF_NEED_SLOTS)) { 190 + status = dlm_recover_status(ls); 191 + goto do_create; 192 + } 193 + 194 + spin_lock(&ls->ls_recover_lock); 195 + status = ls->ls_recover_status; 196 + num_slots = ls->ls_num_slots; 197 + spin_unlock(&ls->ls_recover_lock); 198 + len += num_slots * sizeof(struct rcom_slot); 199 + 200 + do_create: 199 201 error = create_rcom(ls, nodeid, DLM_RCOM_STATUS_REPLY, 200 - sizeof(struct rcom_config), &rc, &mh); 202 + len, &rc, &mh); 201 203 if (error) 202 204 return; 205 + 203 206 rc->rc_id = rc_in->rc_id; 204 207 rc->rc_seq_reply = rc_in->rc_seq; 205 - rc->rc_result = dlm_recover_status(ls); 206 - make_config(ls, (struct rcom_config *) rc->rc_buf); 208 + rc->rc_result = status; 207 209 210 + set_rcom_config(ls, (struct rcom_config *)rc->rc_buf, num_slots); 211 + 212 + if (!num_slots) 213 + goto do_send; 214 + 215 + spin_lock(&ls->ls_recover_lock); 216 + if (ls->ls_num_slots != num_slots) { 217 + spin_unlock(&ls->ls_recover_lock); 218 + log_debug(ls, "receive_rcom_status num_slots %d to %d", 219 + num_slots, ls->ls_num_slots); 220 + rc->rc_result = 0; 221 + set_rcom_config(ls, (struct rcom_config *)rc->rc_buf, 0); 222 + goto do_send; 223 + } 224 + 225 + dlm_slots_copy_out(ls, rc); 226 + spin_unlock(&ls->ls_recover_lock); 227 + 228 + do_send: 208 229 send_rcom(ls, mh, rc); 209 230 } 210 231
+1 -1
fs/dlm/rcom.h
··· 14 14 #ifndef __RCOM_DOT_H__ 15 15 #define __RCOM_DOT_H__ 16 16 17 - int dlm_rcom_status(struct dlm_ls *ls, int nodeid); 17 + int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags); 18 18 int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len); 19 19 int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid); 20 20 int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
+69 -18
fs/dlm/recover.c
··· 85 85 return status; 86 86 } 87 87 88 + static void _set_recover_status(struct dlm_ls *ls, uint32_t status) 89 + { 90 + ls->ls_recover_status |= status; 91 + } 92 + 88 93 void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status) 89 94 { 90 95 spin_lock(&ls->ls_recover_lock); 91 - ls->ls_recover_status |= status; 96 + _set_recover_status(ls, status); 92 97 spin_unlock(&ls->ls_recover_lock); 93 98 } 94 99 95 - static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status) 100 + static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status, 101 + int save_slots) 96 102 { 97 103 struct dlm_rcom *rc = ls->ls_recover_buf; 98 104 struct dlm_member *memb; ··· 112 106 goto out; 113 107 } 114 108 115 - error = dlm_rcom_status(ls, memb->nodeid); 109 + error = dlm_rcom_status(ls, memb->nodeid, 0); 116 110 if (error) 117 111 goto out; 112 + 113 + if (save_slots) 114 + dlm_slot_save(ls, rc, memb); 118 115 119 116 if (rc->rc_result & wait_status) 120 117 break; ··· 130 121 return error; 131 122 } 132 123 133 - static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status) 124 + static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status, 125 + uint32_t status_flags) 134 126 { 135 127 struct dlm_rcom *rc = ls->ls_recover_buf; 136 128 int error = 0, delay = 0, nodeid = ls->ls_low_nodeid; ··· 142 132 goto out; 143 133 } 144 134 145 - error = dlm_rcom_status(ls, nodeid); 135 + error = dlm_rcom_status(ls, nodeid, status_flags); 146 136 if (error) 147 137 break; 148 138 ··· 162 152 int error; 163 153 164 154 if (ls->ls_low_nodeid == dlm_our_nodeid()) { 165 - error = wait_status_all(ls, status); 155 + error = wait_status_all(ls, status, 0); 166 156 if (!error) 167 157 dlm_set_recover_status(ls, status_all); 168 158 } else 169 - error = wait_status_low(ls, status_all); 159 + error = wait_status_low(ls, status_all, 0); 170 160 171 161 return error; 172 162 } 173 163 174 164 int dlm_recover_members_wait(struct dlm_ls *ls) 175 165 { 176 - return wait_status(ls, DLM_RS_NODES); 166 + struct dlm_member *memb; 167 + struct dlm_slot *slots; 168 + int num_slots, slots_size; 169 + int error, rv; 170 + uint32_t gen; 171 + 172 + list_for_each_entry(memb, &ls->ls_nodes, list) { 173 + memb->slot = -1; 174 + memb->generation = 0; 175 + } 176 + 177 + if (ls->ls_low_nodeid == dlm_our_nodeid()) { 178 + error = wait_status_all(ls, DLM_RS_NODES, 1); 179 + if (error) 180 + goto out; 181 + 182 + /* slots array is sparse, slots_size may be > num_slots */ 183 + 184 + rv = dlm_slots_assign(ls, &num_slots, &slots_size, &slots, &gen); 185 + if (!rv) { 186 + spin_lock(&ls->ls_recover_lock); 187 + _set_recover_status(ls, DLM_RS_NODES_ALL); 188 + ls->ls_num_slots = num_slots; 189 + ls->ls_slots_size = slots_size; 190 + ls->ls_slots = slots; 191 + ls->ls_generation = gen; 192 + spin_unlock(&ls->ls_recover_lock); 193 + } else { 194 + dlm_set_recover_status(ls, DLM_RS_NODES_ALL); 195 + } 196 + } else { 197 + error = wait_status_low(ls, DLM_RS_NODES_ALL, DLM_RSF_NEED_SLOTS); 198 + if (error) 199 + goto out; 200 + 201 + dlm_slots_copy_in(ls); 202 + } 203 + out: 204 + return error; 177 205 } 178 206 179 207 int dlm_recover_directory_wait(struct dlm_ls *ls) ··· 590 542 out: 591 543 if (error) 592 544 recover_list_clear(ls); 593 - else 594 - dlm_set_recover_status(ls, DLM_RS_LOCKS); 595 545 return error; 596 546 } 597 547 ··· 761 715 762 716 int dlm_create_root_list(struct dlm_ls *ls) 763 717 { 718 + struct rb_node *n; 764 719 struct dlm_rsb *r; 765 720 int i, error = 0; 766 721 ··· 774 727 775 728 for (i = 0; i < ls->ls_rsbtbl_size; i++) { 776 729 spin_lock(&ls->ls_rsbtbl[i].lock); 777 - list_for_each_entry(r, &ls->ls_rsbtbl[i].list, res_hashchain) { 730 + for (n = rb_first(&ls->ls_rsbtbl[i].keep); n; n = rb_next(n)) { 731 + r = rb_entry(n, struct dlm_rsb, res_hashnode); 778 732 list_add(&r->res_root_list, &ls->ls_root_list); 779 733 dlm_hold_rsb(r); 780 734 } ··· 789 741 continue; 790 742 } 791 743 792 - list_for_each_entry(r, &ls->ls_rsbtbl[i].toss, res_hashchain) { 744 + for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = rb_next(n)) { 745 + r = rb_entry(n, struct dlm_rsb, res_hashnode); 793 746 list_add(&r->res_root_list, &ls->ls_root_list); 794 747 dlm_hold_rsb(r); 795 748 } ··· 820 771 821 772 void dlm_clear_toss_list(struct dlm_ls *ls) 822 773 { 823 - struct dlm_rsb *r, *safe; 774 + struct rb_node *n, *next; 775 + struct dlm_rsb *rsb; 824 776 int i; 825 777 826 778 for (i = 0; i < ls->ls_rsbtbl_size; i++) { 827 779 spin_lock(&ls->ls_rsbtbl[i].lock); 828 - list_for_each_entry_safe(r, safe, &ls->ls_rsbtbl[i].toss, 829 - res_hashchain) { 830 - if (dlm_no_directory(ls) || !is_master(r)) { 831 - list_del(&r->res_hashchain); 832 - dlm_free_rsb(r); 780 + for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = next) { 781 + next = rb_next(n);; 782 + rsb = rb_entry(n, struct dlm_rsb, res_hashnode); 783 + if (dlm_no_directory(ls) || !is_master(rsb)) { 784 + rb_erase(n, &ls->ls_rsbtbl[i].toss); 785 + dlm_free_rsb(rsb); 833 786 } 834 787 } 835 788 spin_unlock(&ls->ls_rsbtbl[i].lock);
+31 -22
fs/dlm/recoverd.c
··· 2 2 ******************************************************************************* 3 3 ** 4 4 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 5 - ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 5 + ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. 6 6 ** 7 7 ** This copyrighted material is made available to anyone wishing to use, 8 8 ** modify, copy, or redistribute it subject to the terms and conditions ··· 54 54 unsigned long start; 55 55 int error, neg = 0; 56 56 57 - log_debug(ls, "recover %llx", (unsigned long long)rv->seq); 57 + log_debug(ls, "dlm_recover %llx", (unsigned long long)rv->seq); 58 58 59 59 mutex_lock(&ls->ls_recoverd_active); 60 60 ··· 76 76 77 77 /* 78 78 * Add or remove nodes from the lockspace's ls_nodes list. 79 - * Also waits for all nodes to complete dlm_recover_members. 80 79 */ 81 80 82 81 error = dlm_recover_members(ls, rv, &neg); 83 82 if (error) { 84 - log_debug(ls, "recover_members failed %d", error); 83 + log_debug(ls, "dlm_recover_members error %d", error); 85 84 goto fail; 86 85 } 86 + 87 + dlm_set_recover_status(ls, DLM_RS_NODES); 88 + 89 + error = dlm_recover_members_wait(ls); 90 + if (error) { 91 + log_debug(ls, "dlm_recover_members_wait error %d", error); 92 + goto fail; 93 + } 94 + 87 95 start = jiffies; 88 96 89 97 /* ··· 101 93 102 94 error = dlm_recover_directory(ls); 103 95 if (error) { 104 - log_debug(ls, "recover_directory failed %d", error); 96 + log_debug(ls, "dlm_recover_directory error %d", error); 105 97 goto fail; 106 98 } 107 99 108 - /* 109 - * Wait for all nodes to complete directory rebuild. 110 - */ 100 + dlm_set_recover_status(ls, DLM_RS_DIR); 111 101 112 102 error = dlm_recover_directory_wait(ls); 113 103 if (error) { 114 - log_debug(ls, "recover_directory_wait failed %d", error); 104 + log_debug(ls, "dlm_recover_directory_wait error %d", error); 115 105 goto fail; 116 106 } 117 107 ··· 139 133 140 134 error = dlm_recover_masters(ls); 141 135 if (error) { 142 - log_debug(ls, "recover_masters failed %d", error); 136 + log_debug(ls, "dlm_recover_masters error %d", error); 143 137 goto fail; 144 138 } 145 139 ··· 149 143 150 144 error = dlm_recover_locks(ls); 151 145 if (error) { 152 - log_debug(ls, "recover_locks failed %d", error); 146 + log_debug(ls, "dlm_recover_locks error %d", error); 153 147 goto fail; 154 148 } 155 149 150 + dlm_set_recover_status(ls, DLM_RS_LOCKS); 151 + 156 152 error = dlm_recover_locks_wait(ls); 157 153 if (error) { 158 - log_debug(ls, "recover_locks_wait failed %d", error); 154 + log_debug(ls, "dlm_recover_locks_wait error %d", error); 159 155 goto fail; 160 156 } 161 157 ··· 178 170 179 171 error = dlm_recover_locks_wait(ls); 180 172 if (error) { 181 - log_debug(ls, "recover_locks_wait failed %d", error); 173 + log_debug(ls, "dlm_recover_locks_wait error %d", error); 182 174 goto fail; 183 175 } 184 176 } ··· 194 186 dlm_purge_requestqueue(ls); 195 187 196 188 dlm_set_recover_status(ls, DLM_RS_DONE); 189 + 197 190 error = dlm_recover_done_wait(ls); 198 191 if (error) { 199 - log_debug(ls, "recover_done_wait failed %d", error); 192 + log_debug(ls, "dlm_recover_done_wait error %d", error); 200 193 goto fail; 201 194 } 202 195 ··· 209 200 210 201 error = enable_locking(ls, rv->seq); 211 202 if (error) { 212 - log_debug(ls, "enable_locking failed %d", error); 203 + log_debug(ls, "enable_locking error %d", error); 213 204 goto fail; 214 205 } 215 206 216 207 error = dlm_process_requestqueue(ls); 217 208 if (error) { 218 - log_debug(ls, "process_requestqueue failed %d", error); 209 + log_debug(ls, "dlm_process_requestqueue error %d", error); 219 210 goto fail; 220 211 } 221 212 222 213 error = dlm_recover_waiters_post(ls); 223 214 if (error) { 224 - log_debug(ls, "recover_waiters_post failed %d", error); 215 + log_debug(ls, "dlm_recover_waiters_post error %d", error); 225 216 goto fail; 226 217 } 227 218 228 219 dlm_grant_after_purge(ls); 229 220 230 - log_debug(ls, "recover %llx done: %u ms", 231 - (unsigned long long)rv->seq, 221 + log_debug(ls, "dlm_recover %llx generation %u done: %u ms", 222 + (unsigned long long)rv->seq, ls->ls_generation, 232 223 jiffies_to_msecs(jiffies - start)); 233 224 mutex_unlock(&ls->ls_recoverd_active); 234 225 226 + dlm_lsop_recover_done(ls); 235 227 return 0; 236 228 237 229 fail: 238 230 dlm_release_root_list(ls); 239 - log_debug(ls, "recover %llx error %d", 231 + log_debug(ls, "dlm_recover %llx error %d", 240 232 (unsigned long long)rv->seq, error); 241 233 mutex_unlock(&ls->ls_recoverd_active); 242 234 return error; ··· 260 250 261 251 if (rv) { 262 252 ls_recover(ls, rv); 263 - kfree(rv->nodeids); 264 - kfree(rv->new); 253 + kfree(rv->nodes); 265 254 kfree(rv); 266 255 } 267 256 }
+3 -2
fs/dlm/user.c
··· 392 392 if (!capable(CAP_SYS_ADMIN)) 393 393 return -EPERM; 394 394 395 - error = dlm_new_lockspace(params->name, strlen(params->name), 396 - &lockspace, params->flags, DLM_USER_LVB_LEN); 395 + error = dlm_new_lockspace(params->name, NULL, params->flags, 396 + DLM_USER_LVB_LEN, NULL, NULL, NULL, 397 + &lockspace); 397 398 if (error) 398 399 return error; 399 400
+2 -2
fs/gfs2/lock_dlm.c
··· 195 195 return -EINVAL; 196 196 } 197 197 198 - error = dlm_new_lockspace(fsname, strlen(fsname), &ls->ls_dlm, 198 + error = dlm_new_lockspace(fsname, NULL, 199 199 DLM_LSFL_FS | DLM_LSFL_NEWEXCL | 200 200 (ls->ls_nodir ? DLM_LSFL_NODIR : 0), 201 - GDLM_LVB_SIZE); 201 + GDLM_LVB_SIZE, NULL, NULL, NULL, &ls->ls_dlm); 202 202 if (error) 203 203 printk(KERN_ERR "dlm_new_lockspace error %d", error); 204 204
+2 -2
fs/ocfs2/stack_user.c
··· 827 827 goto out; 828 828 } 829 829 830 - rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name), 831 - &fsdlm, DLM_LSFL_FS, DLM_LVB_LEN); 830 + rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN, 831 + NULL, NULL, NULL, &fsdlm); 832 832 if (rc) { 833 833 ocfs2_live_connection_drop(control); 834 834 goto out;
+66 -5
include/linux/dlm.h
··· 2 2 ******************************************************************************* 3 3 ** 4 4 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 5 - ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 5 + ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. 6 6 ** 7 7 ** This copyrighted material is made available to anyone wishing to use, 8 8 ** modify, copy, or redistribute it subject to the terms and conditions ··· 74 74 75 75 #ifdef __KERNEL__ 76 76 77 + struct dlm_slot { 78 + int nodeid; /* 1 to MAX_INT */ 79 + int slot; /* 1 to MAX_INT */ 80 + }; 81 + 82 + /* 83 + * recover_prep: called before the dlm begins lock recovery. 84 + * Notfies lockspace user that locks from failed members will be granted. 85 + * recover_slot: called after recover_prep and before recover_done. 86 + * Identifies a failed lockspace member. 87 + * recover_done: called after the dlm completes lock recovery. 88 + * Identifies lockspace members and lockspace generation number. 89 + */ 90 + 91 + struct dlm_lockspace_ops { 92 + void (*recover_prep) (void *ops_arg); 93 + void (*recover_slot) (void *ops_arg, struct dlm_slot *slot); 94 + void (*recover_done) (void *ops_arg, struct dlm_slot *slots, 95 + int num_slots, int our_slot, uint32_t generation); 96 + }; 97 + 77 98 /* 78 99 * dlm_new_lockspace 79 100 * 80 - * Starts a lockspace with the given name. If the named lockspace exists in 81 - * the cluster, the calling node joins it. 101 + * Create/join a lockspace. 102 + * 103 + * name: lockspace name, null terminated, up to DLM_LOCKSPACE_LEN (not 104 + * including terminating null). 105 + * 106 + * cluster: cluster name, null terminated, up to DLM_LOCKSPACE_LEN (not 107 + * including terminating null). Optional. When cluster is null, it 108 + * is not used. When set, dlm_new_lockspace() returns -EBADR if cluster 109 + * is not equal to the dlm cluster name. 110 + * 111 + * flags: 112 + * DLM_LSFL_NODIR 113 + * The dlm should not use a resource directory, but statically assign 114 + * resource mastery to nodes based on the name hash that is otherwise 115 + * used to select the directory node. Must be the same on all nodes. 116 + * DLM_LSFL_TIMEWARN 117 + * The dlm should emit netlink messages if locks have been waiting 118 + * for a configurable amount of time. (Unused.) 119 + * DLM_LSFL_FS 120 + * The lockspace user is in the kernel (i.e. filesystem). Enables 121 + * direct bast/cast callbacks. 122 + * DLM_LSFL_NEWEXCL 123 + * dlm_new_lockspace() should return -EEXIST if the lockspace exists. 124 + * 125 + * lvblen: length of lvb in bytes. Must be multiple of 8. 126 + * dlm_new_lockspace() returns an error if this does not match 127 + * what other nodes are using. 128 + * 129 + * ops: callbacks that indicate lockspace recovery points so the 130 + * caller can coordinate its recovery and know lockspace members. 131 + * This is only used by the initial dlm_new_lockspace() call. 132 + * Optional. 133 + * 134 + * ops_arg: arg for ops callbacks. 135 + * 136 + * ops_result: tells caller if the ops callbacks (if provided) will 137 + * be used or not. 0: will be used, -EXXX will not be used. 138 + * -EOPNOTSUPP: the dlm does not have recovery_callbacks enabled. 139 + * 140 + * lockspace: handle for dlm functions 82 141 */ 83 142 84 - int dlm_new_lockspace(const char *name, int namelen, 85 - dlm_lockspace_t **lockspace, uint32_t flags, int lvblen); 143 + int dlm_new_lockspace(const char *name, const char *cluster, 144 + uint32_t flags, int lvblen, 145 + const struct dlm_lockspace_ops *ops, void *ops_arg, 146 + int *ops_result, dlm_lockspace_t **lockspace); 86 147 87 148 /* 88 149 * dlm_release_lockspace