commit b5dd80304da482d77b2320e1a01a189e656b9770 · tjh.dev/kernel

+161

fs/ocfs2/cluster/nodemanager.c

··· 532 } 533 #endif 534 535 static struct config_item *o2nm_node_group_make_item(struct config_group *group, 536 const char *name) 537 { ··· 768 769 static struct configfs_item_operations o2nm_cluster_item_ops = { 770 .release = o2nm_cluster_release, 771 }; 772 773 static struct config_item_type o2nm_cluster_type = { 774 .ct_item_ops = &o2nm_cluster_item_ops, 775 .ct_owner = THIS_MODULE, 776 }; 777 ··· 825 cluster->cl_group.default_groups[2] = NULL; 826 rwlock_init(&cluster->cl_nodes_lock); 827 cluster->cl_node_ip_tree = RB_ROOT; 828 829 ret = &cluster->cl_group; 830 o2nm_single_cluster = cluster;

··· 532 } 533 #endif 534 535 + struct o2nm_cluster_attribute { 536 + struct configfs_attribute attr; 537 + ssize_t (*show)(struct o2nm_cluster *, char *); 538 + ssize_t (*store)(struct o2nm_cluster *, const char *, size_t); 539 + }; 540 + 541 + static ssize_t o2nm_cluster_attr_write(const char *page, ssize_t count, 542 + unsigned int *val) 543 + { 544 + unsigned long tmp; 545 + char *p = (char *)page; 546 + 547 + tmp = simple_strtoul(p, &p, 0); 548 + if (!p || (*p && (*p != '\n'))) 549 + return -EINVAL; 550 + 551 + if (tmp == 0) 552 + return -EINVAL; 553 + if (tmp >= (u32)-1) 554 + return -ERANGE; 555 + 556 + *val = tmp; 557 + 558 + return count; 559 + } 560 + 561 + static ssize_t o2nm_cluster_attr_idle_timeout_ms_read( 562 + struct o2nm_cluster *cluster, char *page) 563 + { 564 + return sprintf(page, "%u\n", cluster->cl_idle_timeout_ms); 565 + } 566 + 567 + static ssize_t o2nm_cluster_attr_idle_timeout_ms_write( 568 + struct o2nm_cluster *cluster, const char *page, size_t count) 569 + { 570 + ssize_t ret; 571 + unsigned int val; 572 + 573 + ret = o2nm_cluster_attr_write(page, count, &val); 574 + 575 + if (ret > 0) { 576 + if (val <= cluster->cl_keepalive_delay_ms) { 577 + mlog(ML_NOTICE, "o2net: idle timeout must be larger " 578 + "than keepalive delay\n"); 579 + return -EINVAL; 580 + } 581 + cluster->cl_idle_timeout_ms = val; 582 + } 583 + 584 + return ret; 585 + } 586 + 587 + static ssize_t o2nm_cluster_attr_keepalive_delay_ms_read( 588 + struct o2nm_cluster *cluster, char *page) 589 + { 590 + return sprintf(page, "%u\n", cluster->cl_keepalive_delay_ms); 591 + } 592 + 593 + static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write( 594 + struct o2nm_cluster *cluster, const char *page, size_t count) 595 + { 596 + ssize_t ret; 597 + unsigned int val; 598 + 599 + ret = o2nm_cluster_attr_write(page, count, &val); 600 + 601 + if (ret > 0) { 602 + if (val >= cluster->cl_idle_timeout_ms) { 603 + mlog(ML_NOTICE, "o2net: keepalive delay must be " 604 + "smaller than idle timeout\n"); 605 + return -EINVAL; 606 + } 607 + cluster->cl_keepalive_delay_ms = val; 608 + } 609 + 610 + return ret; 611 + } 612 + 613 + static ssize_t o2nm_cluster_attr_reconnect_delay_ms_read( 614 + struct o2nm_cluster *cluster, char *page) 615 + { 616 + return sprintf(page, "%u\n", cluster->cl_reconnect_delay_ms); 617 + } 618 + 619 + static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write( 620 + struct o2nm_cluster *cluster, const char *page, size_t count) 621 + { 622 + return o2nm_cluster_attr_write(page, count, 623 + &cluster->cl_reconnect_delay_ms); 624 + } 625 + static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = { 626 + .attr = { .ca_owner = THIS_MODULE, 627 + .ca_name = "idle_timeout_ms", 628 + .ca_mode = S_IRUGO | S_IWUSR }, 629 + .show = o2nm_cluster_attr_idle_timeout_ms_read, 630 + .store = o2nm_cluster_attr_idle_timeout_ms_write, 631 + }; 632 + 633 + static struct o2nm_cluster_attribute o2nm_cluster_attr_keepalive_delay_ms = { 634 + .attr = { .ca_owner = THIS_MODULE, 635 + .ca_name = "keepalive_delay_ms", 636 + .ca_mode = S_IRUGO | S_IWUSR }, 637 + .show = o2nm_cluster_attr_keepalive_delay_ms_read, 638 + .store = o2nm_cluster_attr_keepalive_delay_ms_write, 639 + }; 640 + 641 + static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = { 642 + .attr = { .ca_owner = THIS_MODULE, 643 + .ca_name = "reconnect_delay_ms", 644 + .ca_mode = S_IRUGO | S_IWUSR }, 645 + .show = o2nm_cluster_attr_reconnect_delay_ms_read, 646 + .store = o2nm_cluster_attr_reconnect_delay_ms_write, 647 + }; 648 + 649 + static struct configfs_attribute *o2nm_cluster_attrs[] = { 650 + &o2nm_cluster_attr_idle_timeout_ms.attr, 651 + &o2nm_cluster_attr_keepalive_delay_ms.attr, 652 + &o2nm_cluster_attr_reconnect_delay_ms.attr, 653 + NULL, 654 + }; 655 + static ssize_t o2nm_cluster_show(struct config_item *item, 656 + struct configfs_attribute *attr, 657 + char *page) 658 + { 659 + struct o2nm_cluster *cluster = to_o2nm_cluster(item); 660 + struct o2nm_cluster_attribute *o2nm_cluster_attr = 661 + container_of(attr, struct o2nm_cluster_attribute, attr); 662 + ssize_t ret = 0; 663 + 664 + if (o2nm_cluster_attr->show) 665 + ret = o2nm_cluster_attr->show(cluster, page); 666 + return ret; 667 + } 668 + 669 + static ssize_t o2nm_cluster_store(struct config_item *item, 670 + struct configfs_attribute *attr, 671 + const char *page, size_t count) 672 + { 673 + struct o2nm_cluster *cluster = to_o2nm_cluster(item); 674 + struct o2nm_cluster_attribute *o2nm_cluster_attr = 675 + container_of(attr, struct o2nm_cluster_attribute, attr); 676 + ssize_t ret; 677 + 678 + if (o2nm_cluster_attr->store == NULL) { 679 + ret = -EINVAL; 680 + goto out; 681 + } 682 + 683 + ret = o2nm_cluster_attr->store(cluster, page, count); 684 + if (ret < count) 685 + goto out; 686 + out: 687 + return ret; 688 + } 689 + 690 static struct config_item *o2nm_node_group_make_item(struct config_group *group, 691 const char *name) 692 { ··· 613 614 static struct configfs_item_operations o2nm_cluster_item_ops = { 615 .release = o2nm_cluster_release, 616 + .show_attribute = o2nm_cluster_show, 617 + .store_attribute = o2nm_cluster_store, 618 }; 619 620 static struct config_item_type o2nm_cluster_type = { 621 .ct_item_ops = &o2nm_cluster_item_ops, 622 + .ct_attrs = o2nm_cluster_attrs, 623 .ct_owner = THIS_MODULE, 624 }; 625 ··· 667 cluster->cl_group.default_groups[2] = NULL; 668 rwlock_init(&cluster->cl_nodes_lock); 669 cluster->cl_node_ip_tree = RB_ROOT; 670 + cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT; 671 + cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; 672 + cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; 673 674 ret = &cluster->cl_group; 675 o2nm_single_cluster = cluster;

+3

fs/ocfs2/cluster/nodemanager.h

··· 60 rwlock_t cl_nodes_lock; 61 struct o2nm_node *cl_nodes[O2NM_MAX_NODES]; 62 struct rb_root cl_node_ip_tree; 63 64 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ 65 unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];

··· 60 rwlock_t cl_nodes_lock; 61 struct o2nm_node *cl_nodes[O2NM_MAX_NODES]; 62 struct rb_root cl_node_ip_tree; 63 + unsigned int cl_idle_timeout_ms; 64 + unsigned int cl_keepalive_delay_ms; 65 + unsigned int cl_reconnect_delay_ms; 66 67 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ 68 unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];

+48 -12

fs/ocfs2/cluster/tcp.c

··· 147 static void o2net_sc_send_keep_req(struct work_struct *work); 148 static void o2net_idle_timer(unsigned long data); 149 static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); 150 151 static inline int o2net_sys_err_to_errno(enum o2net_system_error err) 152 { ··· 293 { 294 struct o2net_sock_container *sc = container_of(kref, 295 struct o2net_sock_container, sc_kref); 296 sclog(sc, "releasing\n"); 297 298 if (sc->sc_sock) { ··· 448 /* delay if we're withing a RECONNECT_DELAY of the 449 * last attempt */ 450 delay = (nn->nn_last_connect_attempt + 451 - msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) 452 - jiffies; 453 - if (delay > msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) 454 delay = 0; 455 mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); 456 queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); ··· 1129 /* set valid and queue the idle timers only if it hasn't been 1130 * shut down already */ 1131 if (nn->nn_sc == sc) { 1132 - o2net_sc_postpone_idle(sc); 1133 o2net_set_nn_state(nn, sc, 1, 0); 1134 } 1135 spin_unlock(&nn->nn_lock); ··· 1311 1312 do_gettimeofday(&now); 1313 1314 - printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for 10 " 1315 - "seconds, shutting it down.\n", SC_NODEF_ARGS(sc)); 1316 mlog(ML_NOTICE, "here are some times that might help debug the " 1317 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " 1318 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", ··· 1332 o2net_sc_queue_work(sc, &sc->sc_shutdown_work); 1333 } 1334 1335 - static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) 1336 { 1337 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); 1338 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, 1339 - O2NET_KEEPALIVE_DELAY_SECS * HZ); 1340 do_gettimeofday(&sc->sc_tv_timer); 1341 mod_timer(&sc->sc_idle_timeout, 1342 - jiffies + (O2NET_IDLE_TIMEOUT_SECS * HZ)); 1343 } 1344 1345 /* this work func is kicked whenever a path sets the nn state which doesn't ··· 1468 1469 spin_lock(&nn->nn_lock); 1470 if (!nn->nn_sc_valid) { 1471 mlog(ML_ERROR, "no connection established with node %u after " 1472 - "%u seconds, giving up and returning errors.\n", 1473 - o2net_num_from_nn(nn), O2NET_IDLE_TIMEOUT_SECS); 1474 1475 o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); 1476 } ··· 1525 1526 /* ensure an immediate connect attempt */ 1527 nn->nn_last_connect_attempt = jiffies - 1528 - (msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS) + 1); 1529 1530 if (node_num != o2nm_this_node()) { 1531 /* heartbeat doesn't work unless a local node number is 1532 * configured and doing so brings up the o2net_wq, so we can 1533 * use it.. */ 1534 queue_delayed_work(o2net_wq, &nn->nn_connect_expired, 1535 - O2NET_IDLE_TIMEOUT_SECS * HZ); 1536 1537 /* believe it or not, accept and node hearbeating testing 1538 * can succeed for this node before we got here.. so

··· 147 static void o2net_sc_send_keep_req(struct work_struct *work); 148 static void o2net_idle_timer(unsigned long data); 149 static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); 150 + static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); 151 + 152 + /* 153 + * FIXME: These should use to_o2nm_cluster_from_node(), but we end up 154 + * losing our parent link to the cluster during shutdown. This can be 155 + * solved by adding a pre-removal callback to configfs, or passing 156 + * around the cluster with the node. -jeffm 157 + */ 158 + static inline int o2net_reconnect_delay(struct o2nm_node *node) 159 + { 160 + return o2nm_single_cluster->cl_reconnect_delay_ms; 161 + } 162 + 163 + static inline int o2net_keepalive_delay(struct o2nm_node *node) 164 + { 165 + return o2nm_single_cluster->cl_keepalive_delay_ms; 166 + } 167 + 168 + static inline int o2net_idle_timeout(struct o2nm_node *node) 169 + { 170 + return o2nm_single_cluster->cl_idle_timeout_ms; 171 + } 172 173 static inline int o2net_sys_err_to_errno(enum o2net_system_error err) 174 { ··· 271 { 272 struct o2net_sock_container *sc = container_of(kref, 273 struct o2net_sock_container, sc_kref); 274 + BUG_ON(timer_pending(&sc->sc_idle_timeout)); 275 + 276 sclog(sc, "releasing\n"); 277 278 if (sc->sc_sock) { ··· 424 /* delay if we're withing a RECONNECT_DELAY of the 425 * last attempt */ 426 delay = (nn->nn_last_connect_attempt + 427 + msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node))) 428 - jiffies; 429 + if (delay > msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node))) 430 delay = 0; 431 mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); 432 queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); ··· 1105 /* set valid and queue the idle timers only if it hasn't been 1106 * shut down already */ 1107 if (nn->nn_sc == sc) { 1108 + o2net_sc_reset_idle_timer(sc); 1109 o2net_set_nn_state(nn, sc, 1, 0); 1110 } 1111 spin_unlock(&nn->nn_lock); ··· 1287 1288 do_gettimeofday(&now); 1289 1290 + printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " 1291 + "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), 1292 + o2net_idle_timeout(sc->sc_node) / 1000, 1293 + o2net_idle_timeout(sc->sc_node) % 1000); 1294 mlog(ML_NOTICE, "here are some times that might help debug the " 1295 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " 1296 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", ··· 1306 o2net_sc_queue_work(sc, &sc->sc_shutdown_work); 1307 } 1308 1309 + static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) 1310 { 1311 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); 1312 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, 1313 + msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node))); 1314 do_gettimeofday(&sc->sc_tv_timer); 1315 mod_timer(&sc->sc_idle_timeout, 1316 + jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node))); 1317 + } 1318 + 1319 + static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) 1320 + { 1321 + /* Only push out an existing timer */ 1322 + if (timer_pending(&sc->sc_idle_timeout)) 1323 + o2net_sc_reset_idle_timer(sc); 1324 } 1325 1326 /* this work func is kicked whenever a path sets the nn state which doesn't ··· 1435 1436 spin_lock(&nn->nn_lock); 1437 if (!nn->nn_sc_valid) { 1438 + struct o2nm_node *node = nn->nn_sc->sc_node; 1439 mlog(ML_ERROR, "no connection established with node %u after " 1440 + "%u.%u seconds, giving up and returning errors.\n", 1441 + o2net_num_from_nn(nn), 1442 + o2net_idle_timeout(node) / 1000, 1443 + o2net_idle_timeout(node) % 1000); 1444 1445 o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); 1446 } ··· 1489 1490 /* ensure an immediate connect attempt */ 1491 nn->nn_last_connect_attempt = jiffies - 1492 + (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); 1493 1494 if (node_num != o2nm_this_node()) { 1495 /* heartbeat doesn't work unless a local node number is 1496 * configured and doing so brings up the o2net_wq, so we can 1497 * use it.. */ 1498 queue_delayed_work(o2net_wq, &nn->nn_connect_expired, 1499 + msecs_to_jiffies(o2net_idle_timeout(node))); 1500 1501 /* believe it or not, accept and node hearbeating testing 1502 * can succeed for this node before we got here.. so

+7

fs/ocfs2/cluster/tcp.h

··· 54 55 #define O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(struct o2net_msg)) 56 57 /* TODO: figure this out.... */ 58 static inline int o2net_link_down(int err, struct socket *sock) 59 {

··· 54 55 #define O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(struct o2net_msg)) 56 57 + /* same as hb delay, we're waiting for another node to recognize our hb */ 58 + #define O2NET_RECONNECT_DELAY_MS_DEFAULT 2000 59 + 60 + #define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 5000 61 + #define O2NET_IDLE_TIMEOUT_MS_DEFAULT 10000 62 + 63 + 64 /* TODO: figure this out.... */ 65 static inline int o2net_link_down(int err, struct socket *sock) 66 {

-6

fs/ocfs2/cluster/tcp_internal.h

··· 27 #define O2NET_MSG_KEEP_REQ_MAGIC ((u16)0xfa57) 28 #define O2NET_MSG_KEEP_RESP_MAGIC ((u16)0xfa58) 29 30 - /* same as hb delay, we're waiting for another node to recognize our hb */ 31 - #define O2NET_RECONNECT_DELAY_MS O2HB_REGION_TIMEOUT_MS 32 - 33 /* we're delaying our quorum decision so that heartbeat will have timed 34 * out truly dead nodes by the time we come around to making decisions 35 * on their number */ 36 #define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS) 37 - 38 - #define O2NET_KEEPALIVE_DELAY_SECS 5 39 - #define O2NET_IDLE_TIMEOUT_SECS 10 40 41 /* 42 * This version number represents quite a lot, unfortunately. It not

··· 27 #define O2NET_MSG_KEEP_REQ_MAGIC ((u16)0xfa57) 28 #define O2NET_MSG_KEEP_RESP_MAGIC ((u16)0xfa58) 29 30 /* we're delaying our quorum decision so that heartbeat will have timed 31 * out truly dead nodes by the time we come around to making decisions 32 * on their number */ 33 #define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS) 34 35 /* 36 * This version number represents quite a lot, unfortunately. It not