···208#define DLM_LOCK_RES_IN_PROGRESS 0x00000010209#define DLM_LOCK_RES_MIGRATING 0x00000020210211+/* max milliseconds to wait to sync up a network failure with a node death */212+#define DLM_NODE_DEATH_WAIT_MAX (5 * 1000)213+214#define DLM_PURGE_INTERVAL_MS (8 * 1000)215216struct dlm_lock_resource···658void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);659void dlm_wait_for_recovery(struct dlm_ctxt *dlm);660int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);661+int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout);662663void dlm_put(struct dlm_ctxt *dlm);664struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm);
+9-3
fs/ocfs2/dlm/dlmconvert.c
···392 } else {393 mlog_errno(tmpret);394 if (dlm_is_host_down(tmpret)) {00000395 ret = DLM_RECOVERING;396 mlog(0, "node %u died so returning DLM_RECOVERING "397 "from convert message!\n", res->owner);···426 struct dlm_lockstatus *lksb;427 enum dlm_status status = DLM_NORMAL;428 u32 flags;429- int call_ast = 0, kick_thread = 0;430431 if (!dlm_grab(dlm)) {432 dlm_error(DLM_REJECTED);···495 status = __dlm_lockres_state_to_status(res);496 if (status == DLM_NORMAL) {497 __dlm_lockres_reserve_ast(res);0498 res->state |= DLM_LOCK_RES_IN_PROGRESS;499 status = __dlmconvert_master(dlm, res, lock, flags,500 cnv->requested_type,···518 else519 dlm_lock_put(lock);520521- /* either queue the ast or release it */522 if (call_ast)523 dlm_queue_ast(dlm, lock);524- else525 dlm_lockres_release_ast(dlm, res);526527 if (kick_thread)
···392 } else {393 mlog_errno(tmpret);394 if (dlm_is_host_down(tmpret)) {395+ /* instead of logging the same network error over396+ * and over, sleep here and wait for the heartbeat397+ * to notice the node is dead. times out after 5s. */398+ dlm_wait_for_node_death(dlm, res->owner, 399+ DLM_NODE_DEATH_WAIT_MAX);400 ret = DLM_RECOVERING;401 mlog(0, "node %u died so returning DLM_RECOVERING "402 "from convert message!\n", res->owner);···421 struct dlm_lockstatus *lksb;422 enum dlm_status status = DLM_NORMAL;423 u32 flags;424+ int call_ast = 0, kick_thread = 0, ast_reserved = 0;425426 if (!dlm_grab(dlm)) {427 dlm_error(DLM_REJECTED);···490 status = __dlm_lockres_state_to_status(res);491 if (status == DLM_NORMAL) {492 __dlm_lockres_reserve_ast(res);493+ ast_reserved = 1;494 res->state |= DLM_LOCK_RES_IN_PROGRESS;495 status = __dlmconvert_master(dlm, res, lock, flags,496 cnv->requested_type,···512 else513 dlm_lock_put(lock);514515+ /* either queue the ast or release it, if reserved */516 if (call_ast)517 dlm_queue_ast(dlm, lock);518+ else if (ast_reserved)519 dlm_lockres_release_ast(dlm, res);520521 if (kick_thread)
···220 dlm_error(status);221 dlm_revert_pending_lock(res, lock);222 dlm_lock_put(lock);223+ } else if (dlm_is_recovery_lock(res->lockname.name, 224+ res->lockname.len)) {225+ /* special case for the $RECOVERY lock.226+ * there will never be an AST delivered to put227+ * this lock on the proper secondary queue228+ * (granted), so do it manually. */229+ mlog(0, "%s: $RECOVERY lock for this node (%u) is "230+ "mastered by %u; got lock, manually granting (no ast)\n",231+ dlm->name, dlm->node_num, res->owner);232+ list_del_init(&lock->list);233+ list_add_tail(&lock->list, &res->granted);234 }235 spin_unlock(&res->spinlock);236···646 mlog(0, "retrying lock with migration/"647 "recovery/in progress\n");648 msleep(100);649+ /* no waiting for dlm_reco_thread */650+ if (recovery) {651+ if (status == DLM_RECOVERING) {652+ mlog(0, "%s: got RECOVERING "653+ "for $REOCVERY lock, master "654+ "was %u\n", dlm->name, 655+ res->owner);656+ dlm_wait_for_node_death(dlm, res->owner, 657+ DLM_NODE_DEATH_WAIT_MAX);658+ }659+ } else {660+ dlm_wait_for_recovery(dlm);661+ }662 goto retry_lock;663 }664
+6-1
fs/ocfs2/dlm/dlmmaster.c
···2482 atomic_set(&mle->woken, 1);2483 spin_unlock(&mle->spinlock);2484 wake_up(&mle->wq);2485- /* final put will take care of list removal */002486 __dlm_put_mle(mle);2487 }2488 continue;···2538 dlm_move_lockres_to_recovery_list(dlm, res);2539 spin_unlock(&res->spinlock);2540 dlm_lockres_put(res);00025412542 /* dump the mle */2543 spin_lock(&dlm->master_lock);
···2482 atomic_set(&mle->woken, 1);2483 spin_unlock(&mle->spinlock);2484 wake_up(&mle->wq);2485+ /* do not need events any longer, so detach 2486+ * from heartbeat */2487+ __dlm_mle_detach_hb_events(dlm, mle);2488 __dlm_put_mle(mle);2489 }2490 continue;···2536 dlm_move_lockres_to_recovery_list(dlm, res);2537 spin_unlock(&res->spinlock);2538 dlm_lockres_put(res);2539+2540+ /* about to get rid of mle, detach from heartbeat */2541+ __dlm_mle_detach_hb_events(dlm, mle);25422543 /* dump the mle */2544 spin_lock(&dlm->master_lock);
+42
fs/ocfs2/dlm/dlmrecovery.c
···278 return dead;279}280000000000000000000281/* callers of the top-level api calls (dlmlock/dlmunlock) should282 * block on the dlm->reco.event when recovery is in progress.283 * the dlm recovery thread will set this state when it begins···2050 dlm->reco.new_master);2051 status = -EEXIST;2052 } else {0000000000000000000000002053 status = dlm_send_begin_reco_message(dlm,2054 dlm->reco.dead_node);2055 /* this always succeeds */
···278 return dead;279}280281+int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout)282+{283+ if (timeout) {284+ mlog(ML_NOTICE, "%s: waiting %dms for notification of "285+ "death of node %u\n", dlm->name, timeout, node);286+ wait_event_timeout(dlm->dlm_reco_thread_wq,287+ dlm_is_node_dead(dlm, node),288+ msecs_to_jiffies(timeout));289+ } else {290+ mlog(ML_NOTICE, "%s: waiting indefinitely for notification "291+ "of death of node %u\n", dlm->name, node);292+ wait_event(dlm->dlm_reco_thread_wq,293+ dlm_is_node_dead(dlm, node));294+ }295+ /* for now, return 0 */296+ return 0;297+}298+299/* callers of the top-level api calls (dlmlock/dlmunlock) should300 * block on the dlm->reco.event when recovery is in progress.301 * the dlm recovery thread will set this state when it begins···2032 dlm->reco.new_master);2033 status = -EEXIST;2034 } else {2035+ status = 0;2036+2037+ /* see if recovery was already finished elsewhere */2038+ spin_lock(&dlm->spinlock);2039+ if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {2040+ status = -EINVAL; 2041+ mlog(0, "%s: got reco EX lock, but "2042+ "node got recovered already\n", dlm->name);2043+ if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) {2044+ mlog(ML_ERROR, "%s: new master is %u "2045+ "but no dead node!\n", 2046+ dlm->name, dlm->reco.new_master);2047+ BUG();2048+ }2049+ }2050+ spin_unlock(&dlm->spinlock);2051+ }2052+2053+ /* if this node has actually become the recovery master,2054+ * set the master and send the messages to begin recovery */2055+ if (!status) {2056+ mlog(0, "%s: dead=%u, this=%u, sending "2057+ "begin_reco now\n", dlm->name, 2058+ dlm->reco.dead_node, dlm->node_num);2059 status = dlm_send_begin_reco_message(dlm,2060 dlm->reco.dead_node);2061 /* this always succeeds */
+3-4
fs/ocfs2/journal.c
···1584 while (!(kthread_should_stop() &&1585 atomic_read(&journal->j_num_trans) == 0)) {15861587- wait_event_interruptible_timeout(osb->checkpoint_event,1588- atomic_read(&journal->j_num_trans)1589- || kthread_should_stop(),1590- OCFS2_CHECKPOINT_INTERVAL);15911592 status = ocfs2_commit_cache(osb);1593 if (status < 0)
···1584 while (!(kthread_should_stop() &&1585 atomic_read(&journal->j_num_trans) == 0)) {15861587+ wait_event_interruptible(osb->checkpoint_event,1588+ atomic_read(&journal->j_num_trans)1589+ || kthread_should_stop());015901591 status = ocfs2_commit_cache(osb);1592 if (status < 0)