commit 741441ab7800f1eb031e74fd720f4f8f361678ed · tjh.dev/kernel

+3

Documentation/filesystems/ocfs2.txt

··· 54 54 intr (*) Allow signals to interrupt cluster operations. 55 55 nointr Do not allow signals to interrupt cluster 56 56 operations. 57 + atime_quantum=60(*) OCFS2 will not update atime unless this number 58 + of seconds has passed since the last update. 59 + Set to zero to always update atime.

+180 -12

fs/ocfs2/cluster/nodemanager.c

··· 35 35 /* for now we operate under the assertion that there can be only one 36 36 * cluster active at a time. Changing this will require trickling 37 37 * cluster references throughout where nodes are looked up */ 38 - static struct o2nm_cluster *o2nm_single_cluster = NULL; 38 + struct o2nm_cluster *o2nm_single_cluster = NULL; 39 39 40 40 #define OCFS2_MAX_HB_CTL_PATH 256 41 41 static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; ··· 96 96 return ocfs2_hb_ctl_path; 97 97 } 98 98 EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path); 99 - 100 - struct o2nm_cluster { 101 - struct config_group cl_group; 102 - unsigned cl_has_local:1; 103 - u8 cl_local_node; 104 - rwlock_t cl_nodes_lock; 105 - struct o2nm_node *cl_nodes[O2NM_MAX_NODES]; 106 - struct rb_root cl_node_ip_tree; 107 - /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ 108 - unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; 109 - }; 110 99 111 100 struct o2nm_node *o2nm_get_node_by_num(u8 node_num) 112 101 { ··· 532 543 } 533 544 #endif 534 545 546 + struct o2nm_cluster_attribute { 547 + struct configfs_attribute attr; 548 + ssize_t (*show)(struct o2nm_cluster *, char *); 549 + ssize_t (*store)(struct o2nm_cluster *, const char *, size_t); 550 + }; 551 + 552 + static ssize_t o2nm_cluster_attr_write(const char *page, ssize_t count, 553 + unsigned int *val) 554 + { 555 + unsigned long tmp; 556 + char *p = (char *)page; 557 + 558 + tmp = simple_strtoul(p, &p, 0); 559 + if (!p || (*p && (*p != '\n'))) 560 + return -EINVAL; 561 + 562 + if (tmp == 0) 563 + return -EINVAL; 564 + if (tmp >= (u32)-1) 565 + return -ERANGE; 566 + 567 + *val = tmp; 568 + 569 + return count; 570 + } 571 + 572 + static ssize_t o2nm_cluster_attr_idle_timeout_ms_read( 573 + struct o2nm_cluster *cluster, char *page) 574 + { 575 + return sprintf(page, "%u\n", cluster->cl_idle_timeout_ms); 576 + } 577 + 578 + static ssize_t o2nm_cluster_attr_idle_timeout_ms_write( 579 + struct o2nm_cluster *cluster, const char *page, size_t count) 580 + { 581 + ssize_t ret; 582 + unsigned int val; 583 + 584 + ret = o2nm_cluster_attr_write(page, count, &val); 585 + 586 + if (ret > 0) { 587 + if (cluster->cl_idle_timeout_ms != val 588 + && o2net_num_connected_peers()) { 589 + mlog(ML_NOTICE, 590 + "o2net: cannot change idle timeout after " 591 + "the first peer has agreed to it." 592 + " %d connected peers\n", 593 + o2net_num_connected_peers()); 594 + ret = -EINVAL; 595 + } else if (val <= cluster->cl_keepalive_delay_ms) { 596 + mlog(ML_NOTICE, "o2net: idle timeout must be larger " 597 + "than keepalive delay\n"); 598 + ret = -EINVAL; 599 + } else { 600 + cluster->cl_idle_timeout_ms = val; 601 + } 602 + } 603 + 604 + return ret; 605 + } 606 + 607 + static ssize_t o2nm_cluster_attr_keepalive_delay_ms_read( 608 + struct o2nm_cluster *cluster, char *page) 609 + { 610 + return sprintf(page, "%u\n", cluster->cl_keepalive_delay_ms); 611 + } 612 + 613 + static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write( 614 + struct o2nm_cluster *cluster, const char *page, size_t count) 615 + { 616 + ssize_t ret; 617 + unsigned int val; 618 + 619 + ret = o2nm_cluster_attr_write(page, count, &val); 620 + 621 + if (ret > 0) { 622 + if (cluster->cl_keepalive_delay_ms != val 623 + && o2net_num_connected_peers()) { 624 + mlog(ML_NOTICE, 625 + "o2net: cannot change keepalive delay after" 626 + " the first peer has agreed to it." 627 + " %d connected peers\n", 628 + o2net_num_connected_peers()); 629 + ret = -EINVAL; 630 + } else if (val >= cluster->cl_idle_timeout_ms) { 631 + mlog(ML_NOTICE, "o2net: keepalive delay must be " 632 + "smaller than idle timeout\n"); 633 + ret = -EINVAL; 634 + } else { 635 + cluster->cl_keepalive_delay_ms = val; 636 + } 637 + } 638 + 639 + return ret; 640 + } 641 + 642 + static ssize_t o2nm_cluster_attr_reconnect_delay_ms_read( 643 + struct o2nm_cluster *cluster, char *page) 644 + { 645 + return sprintf(page, "%u\n", cluster->cl_reconnect_delay_ms); 646 + } 647 + 648 + static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write( 649 + struct o2nm_cluster *cluster, const char *page, size_t count) 650 + { 651 + return o2nm_cluster_attr_write(page, count, 652 + &cluster->cl_reconnect_delay_ms); 653 + } 654 + static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = { 655 + .attr = { .ca_owner = THIS_MODULE, 656 + .ca_name = "idle_timeout_ms", 657 + .ca_mode = S_IRUGO | S_IWUSR }, 658 + .show = o2nm_cluster_attr_idle_timeout_ms_read, 659 + .store = o2nm_cluster_attr_idle_timeout_ms_write, 660 + }; 661 + 662 + static struct o2nm_cluster_attribute o2nm_cluster_attr_keepalive_delay_ms = { 663 + .attr = { .ca_owner = THIS_MODULE, 664 + .ca_name = "keepalive_delay_ms", 665 + .ca_mode = S_IRUGO | S_IWUSR }, 666 + .show = o2nm_cluster_attr_keepalive_delay_ms_read, 667 + .store = o2nm_cluster_attr_keepalive_delay_ms_write, 668 + }; 669 + 670 + static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = { 671 + .attr = { .ca_owner = THIS_MODULE, 672 + .ca_name = "reconnect_delay_ms", 673 + .ca_mode = S_IRUGO | S_IWUSR }, 674 + .show = o2nm_cluster_attr_reconnect_delay_ms_read, 675 + .store = o2nm_cluster_attr_reconnect_delay_ms_write, 676 + }; 677 + 678 + static struct configfs_attribute *o2nm_cluster_attrs[] = { 679 + &o2nm_cluster_attr_idle_timeout_ms.attr, 680 + &o2nm_cluster_attr_keepalive_delay_ms.attr, 681 + &o2nm_cluster_attr_reconnect_delay_ms.attr, 682 + NULL, 683 + }; 684 + static ssize_t o2nm_cluster_show(struct config_item *item, 685 + struct configfs_attribute *attr, 686 + char *page) 687 + { 688 + struct o2nm_cluster *cluster = to_o2nm_cluster(item); 689 + struct o2nm_cluster_attribute *o2nm_cluster_attr = 690 + container_of(attr, struct o2nm_cluster_attribute, attr); 691 + ssize_t ret = 0; 692 + 693 + if (o2nm_cluster_attr->show) 694 + ret = o2nm_cluster_attr->show(cluster, page); 695 + return ret; 696 + } 697 + 698 + static ssize_t o2nm_cluster_store(struct config_item *item, 699 + struct configfs_attribute *attr, 700 + const char *page, size_t count) 701 + { 702 + struct o2nm_cluster *cluster = to_o2nm_cluster(item); 703 + struct o2nm_cluster_attribute *o2nm_cluster_attr = 704 + container_of(attr, struct o2nm_cluster_attribute, attr); 705 + ssize_t ret; 706 + 707 + if (o2nm_cluster_attr->store == NULL) { 708 + ret = -EINVAL; 709 + goto out; 710 + } 711 + 712 + ret = o2nm_cluster_attr->store(cluster, page, count); 713 + if (ret < count) 714 + goto out; 715 + out: 716 + return ret; 717 + } 718 + 535 719 static struct config_item *o2nm_node_group_make_item(struct config_group *group, 536 720 const char *name) 537 721 { ··· 786 624 787 625 static struct configfs_item_operations o2nm_cluster_item_ops = { 788 626 .release = o2nm_cluster_release, 627 + .show_attribute = o2nm_cluster_show, 628 + .store_attribute = o2nm_cluster_store, 789 629 }; 790 630 791 631 static struct config_item_type o2nm_cluster_type = { 792 632 .ct_item_ops = &o2nm_cluster_item_ops, 633 + .ct_attrs = o2nm_cluster_attrs, 793 634 .ct_owner = THIS_MODULE, 794 635 }; 795 636 ··· 843 678 cluster->cl_group.default_groups[2] = NULL; 844 679 rwlock_init(&cluster->cl_nodes_lock); 845 680 cluster->cl_node_ip_tree = RB_ROOT; 681 + cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT; 682 + cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; 683 + cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; 846 684 847 685 ret = &cluster->cl_group; 848 686 o2nm_single_cluster = cluster;

+17

fs/ocfs2/cluster/nodemanager.h

··· 53 53 unsigned long nd_set_attributes; 54 54 }; 55 55 56 + struct o2nm_cluster { 57 + struct config_group cl_group; 58 + unsigned cl_has_local:1; 59 + u8 cl_local_node; 60 + rwlock_t cl_nodes_lock; 61 + struct o2nm_node *cl_nodes[O2NM_MAX_NODES]; 62 + struct rb_root cl_node_ip_tree; 63 + unsigned int cl_idle_timeout_ms; 64 + unsigned int cl_keepalive_delay_ms; 65 + unsigned int cl_reconnect_delay_ms; 66 + 67 + /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ 68 + unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; 69 + }; 70 + 71 + extern struct o2nm_cluster *o2nm_single_cluster; 72 + 56 73 u8 o2nm_this_node(void); 57 74 58 75 int o2nm_configured_node_map(unsigned long *map, unsigned bytes);

+131 -21

fs/ocfs2/cluster/tcp.c

··· 147 147 static void o2net_sc_send_keep_req(struct work_struct *work); 148 148 static void o2net_idle_timer(unsigned long data); 149 149 static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); 150 + static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); 151 + 152 + /* 153 + * FIXME: These should use to_o2nm_cluster_from_node(), but we end up 154 + * losing our parent link to the cluster during shutdown. This can be 155 + * solved by adding a pre-removal callback to configfs, or passing 156 + * around the cluster with the node. -jeffm 157 + */ 158 + static inline int o2net_reconnect_delay(struct o2nm_node *node) 159 + { 160 + return o2nm_single_cluster->cl_reconnect_delay_ms; 161 + } 162 + 163 + static inline int o2net_keepalive_delay(struct o2nm_node *node) 164 + { 165 + return o2nm_single_cluster->cl_keepalive_delay_ms; 166 + } 167 + 168 + static inline int o2net_idle_timeout(struct o2nm_node *node) 169 + { 170 + return o2nm_single_cluster->cl_idle_timeout_ms; 171 + } 150 172 151 173 static inline int o2net_sys_err_to_errno(enum o2net_system_error err) 152 174 { ··· 293 271 { 294 272 struct o2net_sock_container *sc = container_of(kref, 295 273 struct o2net_sock_container, sc_kref); 274 + BUG_ON(timer_pending(&sc->sc_idle_timeout)); 275 + 296 276 sclog(sc, "releasing\n"); 297 277 298 278 if (sc->sc_sock) { ··· 380 356 sc_put(sc); 381 357 } 382 358 359 + static atomic_t o2net_connected_peers = ATOMIC_INIT(0); 360 + 361 + int o2net_num_connected_peers(void) 362 + { 363 + return atomic_read(&o2net_connected_peers); 364 + } 365 + 383 366 static void o2net_set_nn_state(struct o2net_node *nn, 384 367 struct o2net_sock_container *sc, 385 368 unsigned valid, int err) ··· 396 365 struct o2net_sock_container *old_sc = nn->nn_sc; 397 366 398 367 assert_spin_locked(&nn->nn_lock); 368 + 369 + if (old_sc && !sc) 370 + atomic_dec(&o2net_connected_peers); 371 + else if (!old_sc && sc) 372 + atomic_inc(&o2net_connected_peers); 399 373 400 374 /* the node num comparison and single connect/accept path should stop 401 375 * an non-null sc from being overwritten with another */ ··· 460 424 /* delay if we're withing a RECONNECT_DELAY of the 461 425 * last attempt */ 462 426 delay = (nn->nn_last_connect_attempt + 463 - msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) 427 + msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node))) 464 428 - jiffies; 465 - if (delay > msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) 429 + if (delay > msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node))) 466 430 delay = 0; 467 431 mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); 468 432 queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); ··· 1135 1099 return -1; 1136 1100 } 1137 1101 1102 + /* 1103 + * Ensure timeouts are consistent with other nodes, otherwise 1104 + * we can end up with one node thinking that the other must be down, 1105 + * but isn't. This can ultimately cause corruption. 1106 + */ 1107 + if (be32_to_cpu(hand->o2net_idle_timeout_ms) != 1108 + o2net_idle_timeout(sc->sc_node)) { 1109 + mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " 1110 + "%u ms, but we use %u ms locally. disconnecting\n", 1111 + SC_NODEF_ARGS(sc), 1112 + be32_to_cpu(hand->o2net_idle_timeout_ms), 1113 + o2net_idle_timeout(sc->sc_node)); 1114 + o2net_ensure_shutdown(nn, sc, -ENOTCONN); 1115 + return -1; 1116 + } 1117 + 1118 + if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != 1119 + o2net_keepalive_delay(sc->sc_node)) { 1120 + mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " 1121 + "%u ms, but we use %u ms locally. disconnecting\n", 1122 + SC_NODEF_ARGS(sc), 1123 + be32_to_cpu(hand->o2net_keepalive_delay_ms), 1124 + o2net_keepalive_delay(sc->sc_node)); 1125 + o2net_ensure_shutdown(nn, sc, -ENOTCONN); 1126 + return -1; 1127 + } 1128 + 1129 + if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) != 1130 + O2HB_MAX_WRITE_TIMEOUT_MS) { 1131 + mlog(ML_NOTICE, SC_NODEF_FMT " uses a heartbeat timeout of " 1132 + "%u ms, but we use %u ms locally. disconnecting\n", 1133 + SC_NODEF_ARGS(sc), 1134 + be32_to_cpu(hand->o2hb_heartbeat_timeout_ms), 1135 + O2HB_MAX_WRITE_TIMEOUT_MS); 1136 + o2net_ensure_shutdown(nn, sc, -ENOTCONN); 1137 + return -1; 1138 + } 1139 + 1138 1140 sc->sc_handshake_ok = 1; 1139 1141 1140 1142 spin_lock(&nn->nn_lock); 1141 1143 /* set valid and queue the idle timers only if it hasn't been 1142 1144 * shut down already */ 1143 1145 if (nn->nn_sc == sc) { 1144 - o2net_sc_postpone_idle(sc); 1146 + o2net_sc_reset_idle_timer(sc); 1145 1147 o2net_set_nn_state(nn, sc, 1, 0); 1146 1148 } 1147 1149 spin_unlock(&nn->nn_lock); ··· 1205 1131 sclog(sc, "receiving\n"); 1206 1132 do_gettimeofday(&sc->sc_tv_advance_start); 1207 1133 1134 + if (unlikely(sc->sc_handshake_ok == 0)) { 1135 + if(sc->sc_page_off < sizeof(struct o2net_handshake)) { 1136 + data = page_address(sc->sc_page) + sc->sc_page_off; 1137 + datalen = sizeof(struct o2net_handshake) - sc->sc_page_off; 1138 + ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); 1139 + if (ret > 0) 1140 + sc->sc_page_off += ret; 1141 + } 1142 + 1143 + if (sc->sc_page_off == sizeof(struct o2net_handshake)) { 1144 + o2net_check_handshake(sc); 1145 + if (unlikely(sc->sc_handshake_ok == 0)) 1146 + ret = -EPROTO; 1147 + } 1148 + goto out; 1149 + } 1150 + 1208 1151 /* do we need more header? */ 1209 1152 if (sc->sc_page_off < sizeof(struct o2net_msg)) { 1210 1153 data = page_address(sc->sc_page) + sc->sc_page_off; ··· 1229 1138 ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); 1230 1139 if (ret > 0) { 1231 1140 sc->sc_page_off += ret; 1232 - 1233 - /* this working relies on the handshake being 1234 - * smaller than the normal message header */ 1235 - if (sc->sc_page_off >= sizeof(struct o2net_handshake)&& 1236 - !sc->sc_handshake_ok && o2net_check_handshake(sc)) { 1237 - ret = -EPROTO; 1238 - goto out; 1239 - } 1240 - 1241 1141 /* only swab incoming here.. we can 1242 1142 * only get here once as we cross from 1243 1143 * being under to over */ ··· 1330 1248 return ret; 1331 1249 } 1332 1250 1251 + static void o2net_initialize_handshake(void) 1252 + { 1253 + o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( 1254 + O2HB_MAX_WRITE_TIMEOUT_MS); 1255 + o2net_hand->o2net_idle_timeout_ms = cpu_to_be32( 1256 + o2net_idle_timeout(NULL)); 1257 + o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32( 1258 + o2net_keepalive_delay(NULL)); 1259 + o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32( 1260 + o2net_reconnect_delay(NULL)); 1261 + } 1262 + 1333 1263 /* ------------------------------------------------------------ */ 1334 1264 1335 1265 /* called when a connect completes and after a sock is accepted. the ··· 1356 1262 (unsigned long long)O2NET_PROTOCOL_VERSION, 1357 1263 (unsigned long long)be64_to_cpu(o2net_hand->connector_id)); 1358 1264 1265 + o2net_initialize_handshake(); 1359 1266 o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); 1360 1267 sc_put(sc); 1361 1268 } ··· 1382 1287 1383 1288 do_gettimeofday(&now); 1384 1289 1385 - printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for 10 " 1386 - "seconds, shutting it down.\n", SC_NODEF_ARGS(sc)); 1290 + printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " 1291 + "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), 1292 + o2net_idle_timeout(sc->sc_node) / 1000, 1293 + o2net_idle_timeout(sc->sc_node) % 1000); 1387 1294 mlog(ML_NOTICE, "here are some times that might help debug the " 1388 1295 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " 1389 1296 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", ··· 1403 1306 o2net_sc_queue_work(sc, &sc->sc_shutdown_work); 1404 1307 } 1405 1308 1406 - static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) 1309 + static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) 1407 1310 { 1408 1311 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); 1409 1312 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, 1410 - O2NET_KEEPALIVE_DELAY_SECS * HZ); 1313 + msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node))); 1411 1314 do_gettimeofday(&sc->sc_tv_timer); 1412 1315 mod_timer(&sc->sc_idle_timeout, 1413 - jiffies + (O2NET_IDLE_TIMEOUT_SECS * HZ)); 1316 + jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node))); 1317 + } 1318 + 1319 + static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) 1320 + { 1321 + /* Only push out an existing timer */ 1322 + if (timer_pending(&sc->sc_idle_timeout)) 1323 + o2net_sc_reset_idle_timer(sc); 1414 1324 } 1415 1325 1416 1326 /* this work func is kicked whenever a path sets the nn state which doesn't ··· 1539 1435 1540 1436 spin_lock(&nn->nn_lock); 1541 1437 if (!nn->nn_sc_valid) { 1438 + struct o2nm_node *node = nn->nn_sc->sc_node; 1542 1439 mlog(ML_ERROR, "no connection established with node %u after " 1543 - "%u seconds, giving up and returning errors.\n", 1544 - o2net_num_from_nn(nn), O2NET_IDLE_TIMEOUT_SECS); 1440 + "%u.%u seconds, giving up and returning errors.\n", 1441 + o2net_num_from_nn(nn), 1442 + o2net_idle_timeout(node) / 1000, 1443 + o2net_idle_timeout(node) % 1000); 1545 1444 1546 1445 o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); 1547 1446 } ··· 1585 1478 1586 1479 if (node_num != o2nm_this_node()) 1587 1480 o2net_disconnect_node(node); 1481 + 1482 + BUG_ON(atomic_read(&o2net_connected_peers) < 0); 1588 1483 } 1589 1484 1590 1485 static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, ··· 1598 1489 1599 1490 /* ensure an immediate connect attempt */ 1600 1491 nn->nn_last_connect_attempt = jiffies - 1601 - (msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS) + 1); 1492 + (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); 1602 1493 1603 1494 if (node_num != o2nm_this_node()) { 1604 1495 /* heartbeat doesn't work unless a local node number is 1605 1496 * configured and doing so brings up the o2net_wq, so we can 1606 1497 * use it.. */ 1607 1498 queue_delayed_work(o2net_wq, &nn->nn_connect_expired, 1608 - O2NET_IDLE_TIMEOUT_SECS * HZ); 1499 + msecs_to_jiffies(o2net_idle_timeout(node))); 1609 1500 1610 1501 /* believe it or not, accept and node hearbeating testing 1611 1502 * can succeed for this node before we got here.. so ··· 1750 1641 o2net_register_callbacks(sc->sc_sock->sk, sc); 1751 1642 o2net_sc_queue_work(sc, &sc->sc_rx_work); 1752 1643 1644 + o2net_initialize_handshake(); 1753 1645 o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); 1754 1646 1755 1647 out:

+8

fs/ocfs2/cluster/tcp.h

··· 54 54 55 55 #define O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(struct o2net_msg)) 56 56 57 + /* same as hb delay, we're waiting for another node to recognize our hb */ 58 + #define O2NET_RECONNECT_DELAY_MS_DEFAULT 2000 59 + 60 + #define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 5000 61 + #define O2NET_IDLE_TIMEOUT_MS_DEFAULT 10000 62 + 63 + 57 64 /* TODO: figure this out.... */ 58 65 static inline int o2net_link_down(int err, struct socket *sock) 59 66 { ··· 108 101 int o2net_start_listening(struct o2nm_node *node); 109 102 void o2net_stop_listening(struct o2nm_node *node); 110 103 void o2net_disconnect_node(struct o2nm_node *node); 104 + int o2net_num_connected_peers(void); 111 105 112 106 int o2net_init(void); 113 107 void o2net_exit(void);

+8 -7

fs/ocfs2/cluster/tcp_internal.h

··· 27 27 #define O2NET_MSG_KEEP_REQ_MAGIC ((u16)0xfa57) 28 28 #define O2NET_MSG_KEEP_RESP_MAGIC ((u16)0xfa58) 29 29 30 - /* same as hb delay, we're waiting for another node to recognize our hb */ 31 - #define O2NET_RECONNECT_DELAY_MS O2HB_REGION_TIMEOUT_MS 32 - 33 30 /* we're delaying our quorum decision so that heartbeat will have timed 34 31 * out truly dead nodes by the time we come around to making decisions 35 32 * on their number */ 36 33 #define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS) 37 - 38 - #define O2NET_KEEPALIVE_DELAY_SECS 5 39 - #define O2NET_IDLE_TIMEOUT_SECS 10 40 34 41 35 /* 42 36 * This version number represents quite a lot, unfortunately. It not 43 37 * only represents the raw network message protocol on the wire but also 44 38 * locking semantics of the file system using the protocol. It should 45 39 * be somewhere else, I'm sure, but right now it isn't. 40 + * 41 + * New in version 5: 42 + * - Network timeout checking protocol 46 43 * 47 44 * New in version 4: 48 45 * - Remove i_generation from lock names for better stat performance. ··· 51 54 * - full 64 bit i_size in the metadata lock lvbs 52 55 * - introduction of "rw" lock and pushing meta/data locking down 53 56 */ 54 - #define O2NET_PROTOCOL_VERSION 4ULL 57 + #define O2NET_PROTOCOL_VERSION 5ULL 55 58 struct o2net_handshake { 56 59 __be64 protocol_version; 57 60 __be64 connector_id; 61 + __be32 o2hb_heartbeat_timeout_ms; 62 + __be32 o2net_idle_timeout_ms; 63 + __be32 o2net_keepalive_delay_ms; 64 + __be32 o2net_reconnect_delay_ms; 58 65 }; 59 66 60 67 struct o2net_node {

+59 -20

fs/ocfs2/dlmglue.c

··· 770 770 int dlm_flags) 771 771 { 772 772 int ret = 0; 773 - enum dlm_status status; 773 + enum dlm_status status = DLM_NORMAL; 774 774 unsigned long flags; 775 775 776 776 mlog_entry_void(); ··· 1138 1138 { 1139 1139 int status, level; 1140 1140 struct ocfs2_lock_res *lockres; 1141 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1141 1142 1142 1143 BUG_ON(!inode); 1143 1144 ··· 1147 1146 mlog(0, "inode %llu take %s RW lock\n", 1148 1147 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1149 1148 write ? "EXMODE" : "PRMODE"); 1149 + 1150 + if (ocfs2_mount_local(osb)) 1151 + return 0; 1150 1152 1151 1153 lockres = &OCFS2_I(inode)->ip_rw_lockres; 1152 1154 ··· 1168 1164 { 1169 1165 int level = write ? LKM_EXMODE : LKM_PRMODE; 1170 1166 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; 1167 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1171 1168 1172 1169 mlog_entry_void(); 1173 1170 ··· 1176 1171 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1177 1172 write ? "EXMODE" : "PRMODE"); 1178 1173 1179 - ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1174 + if (!ocfs2_mount_local(osb)) 1175 + ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1180 1176 1181 1177 mlog_exit_void(); 1182 1178 } ··· 1188 1182 { 1189 1183 int status = 0, level; 1190 1184 struct ocfs2_lock_res *lockres; 1185 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1191 1186 1192 1187 BUG_ON(!inode); 1193 1188 ··· 1207 1200 } 1208 1201 goto out; 1209 1202 } 1203 + 1204 + if (ocfs2_mount_local(osb)) 1205 + goto out; 1210 1206 1211 1207 lockres = &OCFS2_I(inode)->ip_data_lockres; 1212 1208 ··· 1279 1269 { 1280 1270 int level = write ? LKM_EXMODE : LKM_PRMODE; 1281 1271 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_data_lockres; 1272 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1282 1273 1283 1274 mlog_entry_void(); 1284 1275 ··· 1287 1276 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1288 1277 write ? "EXMODE" : "PRMODE"); 1289 1278 1290 - if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) 1279 + if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && 1280 + !ocfs2_mount_local(osb)) 1291 1281 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1292 1282 1293 1283 mlog_exit_void(); ··· 1479 1467 { 1480 1468 int status = 0; 1481 1469 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1482 - struct ocfs2_lock_res *lockres; 1470 + struct ocfs2_lock_res *lockres = NULL; 1483 1471 struct ocfs2_dinode *fe; 1472 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1484 1473 1485 1474 mlog_entry_void(); 1486 1475 ··· 1496 1483 } 1497 1484 spin_unlock(&oi->ip_lock); 1498 1485 1499 - lockres = &oi->ip_meta_lockres; 1486 + if (!ocfs2_mount_local(osb)) { 1487 + lockres = &oi->ip_meta_lockres; 1500 1488 1501 - if (!ocfs2_should_refresh_lock_res(lockres)) 1502 - goto bail; 1489 + if (!ocfs2_should_refresh_lock_res(lockres)) 1490 + goto bail; 1491 + } 1503 1492 1504 1493 /* This will discard any caching information we might have had 1505 1494 * for the inode metadata. */ ··· 1511 1496 * map (directories, bitmap files, etc) */ 1512 1497 ocfs2_extent_map_trunc(inode, 0); 1513 1498 1514 - if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { 1499 + if (lockres && ocfs2_meta_lvb_is_trustable(inode, lockres)) { 1515 1500 mlog(0, "Trusting LVB on inode %llu\n", 1516 1501 (unsigned long long)oi->ip_blkno); 1517 1502 ocfs2_refresh_inode_from_lvb(inode); ··· 1558 1543 1559 1544 status = 0; 1560 1545 bail_refresh: 1561 - ocfs2_complete_lock_res_refresh(lockres, status); 1546 + if (lockres) 1547 + ocfs2_complete_lock_res_refresh(lockres, status); 1562 1548 bail: 1563 1549 mlog_exit(status); 1564 1550 return status; ··· 1601 1585 int arg_flags) 1602 1586 { 1603 1587 int status, level, dlm_flags, acquired; 1604 - struct ocfs2_lock_res *lockres; 1588 + struct ocfs2_lock_res *lockres = NULL; 1605 1589 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1606 1590 struct buffer_head *local_bh = NULL; 1607 1591 ··· 1622 1606 status = -EROFS; 1623 1607 goto bail; 1624 1608 } 1609 + 1610 + if (ocfs2_mount_local(osb)) 1611 + goto local; 1625 1612 1626 1613 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 1627 1614 wait_event(osb->recovery_event, ··· 1655 1636 wait_event(osb->recovery_event, 1656 1637 ocfs2_node_map_is_empty(osb, &osb->recovery_map)); 1657 1638 1639 + local: 1658 1640 /* 1659 1641 * We only see this flag if we're being called from 1660 1642 * ocfs2_read_locked_inode(). It means we're locking an inode ··· 1664 1644 */ 1665 1645 if (inode->i_state & I_NEW) { 1666 1646 status = 0; 1667 - ocfs2_complete_lock_res_refresh(lockres, 0); 1647 + if (lockres) 1648 + ocfs2_complete_lock_res_refresh(lockres, 0); 1668 1649 goto bail; 1669 1650 } 1670 1651 ··· 1788 1767 { 1789 1768 int level = ex ? LKM_EXMODE : LKM_PRMODE; 1790 1769 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres; 1770 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1791 1771 1792 1772 mlog_entry_void(); 1793 1773 ··· 1796 1774 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1797 1775 ex ? "EXMODE" : "PRMODE"); 1798 1776 1799 - if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) 1777 + if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && 1778 + !ocfs2_mount_local(osb)) 1800 1779 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1801 1780 1802 1781 mlog_exit_void(); ··· 1806 1783 int ocfs2_super_lock(struct ocfs2_super *osb, 1807 1784 int ex) 1808 1785 { 1809 - int status; 1786 + int status = 0; 1810 1787 int level = ex ? LKM_EXMODE : LKM_PRMODE; 1811 1788 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 1812 1789 struct buffer_head *bh; ··· 1816 1793 1817 1794 if (ocfs2_is_hard_readonly(osb)) 1818 1795 return -EROFS; 1796 + 1797 + if (ocfs2_mount_local(osb)) 1798 + goto bail; 1819 1799 1820 1800 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 1821 1801 if (status < 0) { ··· 1858 1832 int level = ex ? LKM_EXMODE : LKM_PRMODE; 1859 1833 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 1860 1834 1861 - ocfs2_cluster_unlock(osb, lockres, level); 1835 + if (!ocfs2_mount_local(osb)) 1836 + ocfs2_cluster_unlock(osb, lockres, level); 1862 1837 } 1863 1838 1864 1839 int ocfs2_rename_lock(struct ocfs2_super *osb) ··· 1869 1842 1870 1843 if (ocfs2_is_hard_readonly(osb)) 1871 1844 return -EROFS; 1845 + 1846 + if (ocfs2_mount_local(osb)) 1847 + return 0; 1872 1848 1873 1849 status = ocfs2_cluster_lock(osb, lockres, LKM_EXMODE, 0, 0); 1874 1850 if (status < 0) ··· 1884 1854 { 1885 1855 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 1886 1856 1887 - ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); 1857 + if (!ocfs2_mount_local(osb)) 1858 + ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); 1888 1859 } 1889 1860 1890 1861 int ocfs2_dentry_lock(struct dentry *dentry, int ex) ··· 1900 1869 if (ocfs2_is_hard_readonly(osb)) 1901 1870 return -EROFS; 1902 1871 1872 + if (ocfs2_mount_local(osb)) 1873 + return 0; 1874 + 1903 1875 ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); 1904 1876 if (ret < 0) 1905 1877 mlog_errno(ret); ··· 1916 1882 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 1917 1883 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 1918 1884 1919 - ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); 1885 + if (!ocfs2_mount_local(osb)) 1886 + ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); 1920 1887 } 1921 1888 1922 1889 /* Reference counting of the dlm debug structure. We want this because ··· 2180 2145 2181 2146 int ocfs2_dlm_init(struct ocfs2_super *osb) 2182 2147 { 2183 - int status; 2148 + int status = 0; 2184 2149 u32 dlm_key; 2185 - struct dlm_ctxt *dlm; 2150 + struct dlm_ctxt *dlm = NULL; 2186 2151 2187 2152 mlog_entry_void(); 2153 + 2154 + if (ocfs2_mount_local(osb)) 2155 + goto local; 2188 2156 2189 2157 status = ocfs2_dlm_init_debug(osb); 2190 2158 if (status < 0) { ··· 2216 2178 goto bail; 2217 2179 } 2218 2180 2181 + dlm_register_eviction_cb(dlm, &osb->osb_eviction_cb); 2182 + 2183 + local: 2219 2184 ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); 2220 2185 ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); 2221 - 2222 - dlm_register_eviction_cb(dlm, &osb->osb_eviction_cb); 2223 2186 2224 2187 osb->dlm = dlm; 2225 2188

+9

fs/ocfs2/heartbeat.c

··· 154 154 { 155 155 int status; 156 156 157 + if (ocfs2_mount_local(osb)) 158 + return 0; 159 + 157 160 status = o2hb_register_callback(&osb->osb_hb_down); 158 161 if (status < 0) { 159 162 mlog_errno(status); ··· 175 172 { 176 173 int status; 177 174 175 + if (ocfs2_mount_local(osb)) 176 + return; 177 + 178 178 status = o2hb_unregister_callback(&osb->osb_hb_down); 179 179 if (status < 0) 180 180 mlog_errno(status); ··· 191 185 { 192 186 int ret; 193 187 char *argv[5], *envp[3]; 188 + 189 + if (ocfs2_mount_local(osb)) 190 + return; 194 191 195 192 if (!osb->uuid_str) { 196 193 /* This can happen if we don't get far enough in mount... */

+2 -1

fs/ocfs2/inode.c

··· 423 423 * cluster lock before trusting anything anyway. 424 424 */ 425 425 can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE) 426 - && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK); 426 + && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK) 427 + && !ocfs2_mount_local(osb); 427 428 428 429 /* 429 430 * To maintain backwards compatibility with older versions of

+33 -13

fs/ocfs2/journal.c

··· 144 144 ocfs2_abort(osb->sb, "Detected aborted journal"); 145 145 handle = ERR_PTR(-EROFS); 146 146 } 147 - } else 148 - atomic_inc(&(osb->journal->j_num_trans)); 147 + } else { 148 + if (!ocfs2_mount_local(osb)) 149 + atomic_inc(&(osb->journal->j_num_trans)); 150 + } 149 151 150 152 return handle; 151 153 } ··· 509 507 510 508 BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); 511 509 512 - status = ocfs2_journal_toggle_dirty(osb, 0); 513 - if (status < 0) 514 - mlog_errno(status); 510 + if (ocfs2_mount_local(osb)) { 511 + journal_lock_updates(journal->j_journal); 512 + status = journal_flush(journal->j_journal); 513 + journal_unlock_updates(journal->j_journal); 514 + if (status < 0) 515 + mlog_errno(status); 516 + } 517 + 518 + if (status == 0) { 519 + /* 520 + * Do not toggle if flush was unsuccessful otherwise 521 + * will leave dirty metadata in a "clean" journal 522 + */ 523 + status = ocfs2_journal_toggle_dirty(osb, 0); 524 + if (status < 0) 525 + mlog_errno(status); 526 + } 515 527 516 528 /* Shutdown the kernel journal system */ 517 529 journal_destroy(journal->j_journal); ··· 565 549 } 566 550 } 567 551 568 - int ocfs2_journal_load(struct ocfs2_journal *journal) 552 + int ocfs2_journal_load(struct ocfs2_journal *journal, int local) 569 553 { 570 554 int status = 0; 571 555 struct ocfs2_super *osb; ··· 592 576 } 593 577 594 578 /* Launch the commit thread */ 595 - osb->commit_task = kthread_run(ocfs2_commit_thread, osb, "ocfs2cmt"); 596 - if (IS_ERR(osb->commit_task)) { 597 - status = PTR_ERR(osb->commit_task); 579 + if (!local) { 580 + osb->commit_task = kthread_run(ocfs2_commit_thread, osb, 581 + "ocfs2cmt"); 582 + if (IS_ERR(osb->commit_task)) { 583 + status = PTR_ERR(osb->commit_task); 584 + osb->commit_task = NULL; 585 + mlog(ML_ERROR, "unable to launch ocfs2commit thread, " 586 + "error=%d", status); 587 + goto done; 588 + } 589 + } else 598 590 osb->commit_task = NULL; 599 - mlog(ML_ERROR, "unable to launch ocfs2commit thread, error=%d", 600 - status); 601 - goto done; 602 - } 603 591 604 592 done: 605 593 mlog_exit(status);

+4 -1

fs/ocfs2/journal.h

··· 157 157 void ocfs2_journal_shutdown(struct ocfs2_super *osb); 158 158 int ocfs2_journal_wipe(struct ocfs2_journal *journal, 159 159 int full); 160 - int ocfs2_journal_load(struct ocfs2_journal *journal); 160 + int ocfs2_journal_load(struct ocfs2_journal *journal, int local); 161 161 int ocfs2_check_journals_nolocks(struct ocfs2_super *osb); 162 162 void ocfs2_recovery_thread(struct ocfs2_super *osb, 163 163 int node_num); ··· 173 173 static inline void ocfs2_checkpoint_inode(struct inode *inode) 174 174 { 175 175 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 176 + 177 + if (ocfs2_mount_local(osb)) 178 + return; 176 179 177 180 if (!ocfs2_inode_fully_checkpointed(inode)) { 178 181 /* WARNING: This only kicks off a single

+4 -2

fs/ocfs2/mmap.c

··· 83 83 int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) 84 84 { 85 85 int ret = 0, lock_level = 0; 86 + struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb); 86 87 87 88 /* We don't want to support shared writable mappings yet. */ 88 - if (((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) 89 - && ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) { 89 + if (!ocfs2_mount_local(osb) && 90 + ((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) && 91 + ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) { 90 92 mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags); 91 93 /* This is -EINVAL because generic_file_readonly_mmap 92 94 * returns it in a similar situation. */

+5 -3

fs/ocfs2/namei.c

··· 587 587 } 588 588 589 589 ocfs2_inode_set_new(osb, inode); 590 - status = ocfs2_create_new_inode_locks(inode); 591 - if (status < 0) 592 - mlog_errno(status); 590 + if (!ocfs2_mount_local(osb)) { 591 + status = ocfs2_create_new_inode_locks(inode); 592 + if (status < 0) 593 + mlog_errno(status); 594 + } 593 595 594 596 status = 0; /* error in ocfs2_create_new_inode_locks is not 595 597 * critical */

+5

fs/ocfs2/ocfs2.h

··· 349 349 return ret; 350 350 } 351 351 352 + static inline int ocfs2_mount_local(struct ocfs2_super *osb) 353 + { 354 + return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); 355 + } 356 + 352 357 #define OCFS2_IS_VALID_DINODE(ptr) \ 353 358 (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) 354 359

+13 -1

fs/ocfs2/ocfs2_fs.h

··· 86 86 OCFS2_SB(sb)->s_feature_incompat &= ~(mask) 87 87 88 88 #define OCFS2_FEATURE_COMPAT_SUPP 0 89 - #define OCFS2_FEATURE_INCOMPAT_SUPP 0 89 + #define OCFS2_FEATURE_INCOMPAT_SUPP OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT 90 90 #define OCFS2_FEATURE_RO_COMPAT_SUPP 0 91 91 92 92 /* ··· 96 96 */ 97 97 #define OCFS2_FEATURE_INCOMPAT_HEARTBEAT_DEV 0x0002 98 98 99 + /* 100 + * tunefs sets this incompat flag before starting the resize and clears it 101 + * at the end. This flag protects users from inadvertently mounting the fs 102 + * after an aborted run without fsck-ing. 103 + */ 104 + #define OCFS2_FEATURE_INCOMPAT_RESIZE_INPROG 0x0004 105 + 106 + /* Used to denote a non-clustered volume */ 107 + #define OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT 0x0008 108 + 109 + /* Support for sparse allocation in b-trees */ 110 + #define OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC 0x0010 99 111 100 112 /* 101 113 * Flags on ocfs2_dinode.i_flags

+65 -25

fs/ocfs2/super.c

··· 508 508 return status; 509 509 } 510 510 511 + static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) 512 + { 513 + if (ocfs2_mount_local(osb)) { 514 + if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { 515 + mlog(ML_ERROR, "Cannot heartbeat on a locally " 516 + "mounted device.\n"); 517 + return -EINVAL; 518 + } 519 + } 520 + 521 + if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { 522 + if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb)) { 523 + mlog(ML_ERROR, "Heartbeat has to be started to mount " 524 + "a read-write clustered device.\n"); 525 + return -EINVAL; 526 + } 527 + } 528 + 529 + return 0; 530 + } 531 + 511 532 static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) 512 533 { 513 534 struct dentry *root; ··· 537 516 struct inode *inode = NULL; 538 517 struct ocfs2_super *osb = NULL; 539 518 struct buffer_head *bh = NULL; 519 + char nodestr[8]; 540 520 541 521 mlog_entry("%p, %p, %i", sb, data, silent); 542 522 543 - /* for now we only have one cluster/node, make sure we see it 544 - * in the heartbeat universe */ 545 - if (!o2hb_check_local_node_heartbeating()) { 523 + if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) { 546 524 status = -EINVAL; 547 525 goto read_super_error; 526 + } 527 + 528 + /* for now we only have one cluster/node, make sure we see it 529 + * in the heartbeat universe */ 530 + if (parsed_opt & OCFS2_MOUNT_HB_LOCAL) { 531 + if (!o2hb_check_local_node_heartbeating()) { 532 + status = -EINVAL; 533 + goto read_super_error; 534 + } 548 535 } 549 536 550 537 /* probe for superblock */ ··· 570 541 } 571 542 brelse(bh); 572 543 bh = NULL; 573 - 574 - if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) { 575 - status = -EINVAL; 576 - goto read_super_error; 577 - } 578 544 osb->s_mount_opt = parsed_opt; 579 545 580 546 sb->s_magic = OCFS2_SUPER_MAGIC; ··· 612 588 } 613 589 614 590 if (!ocfs2_is_hard_readonly(osb)) { 615 - /* If this isn't a hard readonly mount, then we need 616 - * to make sure that heartbeat is in a valid state, 617 - * and that we mark ourselves soft readonly is -oro 618 - * was specified. */ 619 - if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { 620 - mlog(ML_ERROR, "No heartbeat for device (%s)\n", 621 - sb->s_id); 622 - status = -EINVAL; 623 - goto read_super_error; 624 - } 625 - 626 591 if (sb->s_flags & MS_RDONLY) 627 592 ocfs2_set_ro_flag(osb, 0); 593 + } 594 + 595 + status = ocfs2_verify_heartbeat(osb); 596 + if (status < 0) { 597 + mlog_errno(status); 598 + goto read_super_error; 628 599 } 629 600 630 601 osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, ··· 654 635 655 636 ocfs2_complete_mount_recovery(osb); 656 637 657 - printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %d, slot %d) " 638 + if (ocfs2_mount_local(osb)) 639 + snprintf(nodestr, sizeof(nodestr), "local"); 640 + else 641 + snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); 642 + 643 + printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) " 658 644 "with %s data mode.\n", 659 - osb->dev_str, osb->node_num, osb->slot_num, 645 + osb->dev_str, nodestr, osb->slot_num, 660 646 osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : 661 647 "ordered"); 662 648 ··· 1023 999 1024 1000 /* XXX hold a ref on the node while mounte? easy enough, if 1025 1001 * desirable. */ 1026 - osb->node_num = o2nm_this_node(); 1002 + if (ocfs2_mount_local(osb)) 1003 + osb->node_num = 0; 1004 + else 1005 + osb->node_num = o2nm_this_node(); 1006 + 1027 1007 if (osb->node_num == O2NM_MAX_NODES) { 1028 1008 mlog(ML_ERROR, "could not find this host's node number\n"); 1029 1009 status = -ENOENT; ··· 1112 1084 goto leave; 1113 1085 } 1114 1086 1087 + if (ocfs2_mount_local(osb)) 1088 + goto leave; 1089 + 1115 1090 /* This should be sent *after* we recovered our journal as it 1116 1091 * will cause other nodes to unmark us as needing 1117 1092 * recovery. However, we need to send it *before* dropping the ··· 1145 1114 { 1146 1115 int tmp; 1147 1116 struct ocfs2_super *osb = NULL; 1117 + char nodestr[8]; 1148 1118 1149 1119 mlog_entry("(0x%p)\n", sb); 1150 1120 ··· 1209 1177 1210 1178 atomic_set(&osb->vol_state, VOLUME_DISMOUNTED); 1211 1179 1212 - printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %d)\n", 1213 - osb->dev_str, osb->node_num); 1180 + if (ocfs2_mount_local(osb)) 1181 + snprintf(nodestr, sizeof(nodestr), "local"); 1182 + else 1183 + snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); 1184 + 1185 + printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n", 1186 + osb->dev_str, nodestr); 1214 1187 1215 1188 ocfs2_delete_osb(osb); 1216 1189 kfree(osb); ··· 1573 1536 { 1574 1537 int status = 0; 1575 1538 int dirty; 1539 + int local; 1576 1540 struct ocfs2_dinode *local_alloc = NULL; /* only used if we 1577 1541 * recover 1578 1542 * ourselves. */ ··· 1601 1563 "recovering volume.\n"); 1602 1564 } 1603 1565 1566 + local = ocfs2_mount_local(osb); 1567 + 1604 1568 /* will play back anything left in the journal. */ 1605 - ocfs2_journal_load(osb->journal); 1569 + ocfs2_journal_load(osb->journal, local); 1606 1570 1607 1571 if (dirty) { 1608 1572 /* recover my local alloc if we didn't unmount cleanly. */

+3

fs/ocfs2/vote.c

··· 1000 1000 { 1001 1001 int status = 0; 1002 1002 1003 + if (ocfs2_mount_local(osb)) 1004 + return 0; 1005 + 1003 1006 status = o2net_register_handler(OCFS2_MESSAGE_TYPE_RESPONSE, 1004 1007 osb->net_key, 1005 1008 sizeof(struct ocfs2_response_msg),