[PATCH] ocfs2: add dlm_wait_for_node_death

* add dlm_wait_for_node_death function to be used after receiving a network
error. this will wait for the given timeout to allow the heartbeat
callbacks to update the domain map. without this, some paths may spin
and consume enough cpu that the heartbeat gets starved and never updates.

Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>

authored by Kurt Hackel and committed by Mark Fasheh 44465a7d e2b5e450

+40 -1
+4
fs/ocfs2/dlm/dlmcommon.h
··· 208 208 #define DLM_LOCK_RES_IN_PROGRESS 0x00000010 209 209 #define DLM_LOCK_RES_MIGRATING 0x00000020 210 210 211 + /* max milliseconds to wait to sync up a network failure with a node death */ 212 + #define DLM_NODE_DEATH_WAIT_MAX (5 * 1000) 213 + 211 214 #define DLM_PURGE_INTERVAL_MS (8 * 1000) 212 215 213 216 struct dlm_lock_resource ··· 661 658 void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); 662 659 void dlm_wait_for_recovery(struct dlm_ctxt *dlm); 663 660 int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); 661 + int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); 664 662 665 663 void dlm_put(struct dlm_ctxt *dlm); 666 664 struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm);
+5
fs/ocfs2/dlm/dlmconvert.c
··· 392 392 } else { 393 393 mlog_errno(tmpret); 394 394 if (dlm_is_host_down(tmpret)) { 395 + /* instead of logging the same network error over 396 + * and over, sleep here and wait for the heartbeat 397 + * to notice the node is dead. times out after 5s. */ 398 + dlm_wait_for_node_death(dlm, res->owner, 399 + DLM_NODE_DEATH_WAIT_MAX); 395 400 ret = DLM_RECOVERING; 396 401 mlog(0, "node %u died so returning DLM_RECOVERING " 397 402 "from convert message!\n", res->owner);
+13 -1
fs/ocfs2/dlm/dlmlock.c
··· 646 646 mlog(0, "retrying lock with migration/" 647 647 "recovery/in progress\n"); 648 648 msleep(100); 649 - dlm_wait_for_recovery(dlm); 649 + /* no waiting for dlm_reco_thread */ 650 + if (recovery) { 651 + if (status == DLM_RECOVERING) { 652 + mlog(0, "%s: got RECOVERING " 653 + "for $REOCVERY lock, master " 654 + "was %u\n", dlm->name, 655 + res->owner); 656 + dlm_wait_for_node_death(dlm, res->owner, 657 + DLM_NODE_DEATH_WAIT_MAX); 658 + } 659 + } else { 660 + dlm_wait_for_recovery(dlm); 661 + } 650 662 goto retry_lock; 651 663 } 652 664
+18
fs/ocfs2/dlm/dlmrecovery.c
··· 278 278 return dead; 279 279 } 280 280 281 + int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout) 282 + { 283 + if (timeout) { 284 + mlog(ML_NOTICE, "%s: waiting %dms for notification of " 285 + "death of node %u\n", dlm->name, timeout, node); 286 + wait_event_timeout(dlm->dlm_reco_thread_wq, 287 + dlm_is_node_dead(dlm, node), 288 + msecs_to_jiffies(timeout)); 289 + } else { 290 + mlog(ML_NOTICE, "%s: waiting indefinitely for notification " 291 + "of death of node %u\n", dlm->name, node); 292 + wait_event(dlm->dlm_reco_thread_wq, 293 + dlm_is_node_dead(dlm, node)); 294 + } 295 + /* for now, return 0 */ 296 + return 0; 297 + } 298 + 281 299 /* callers of the top-level api calls (dlmlock/dlmunlock) should 282 300 * block on the dlm->reco.event when recovery is in progress. 283 301 * the dlm recovery thread will set this state when it begins