commit ab479995b191b4256183956c13caabb86331af8e · tjh.dev/kernel

tjh.dev / kernel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge branch 'upstream-linus' of git://oss.oracle.com/home/sourcebo/git/ocfs2

Linus Torvalds 20 years ago ab479995 26d451b6

+88 -11

7 changed files

expand all

unified split

ocfs2

dlm

dlmcommon.h

dlmconvert.c

dlmlock.c

dlmmaster.c

dlmrecovery.c

journal.c

journal.h

fs/ocfs2/dlm/dlmcommon.h

··· 208 208 #define DLM_LOCK_RES_IN_PROGRESS 0x00000010 209 209 #define DLM_LOCK_RES_MIGRATING 0x00000020 210 210 211 + /* max milliseconds to wait to sync up a network failure with a node death */ 212 + #define DLM_NODE_DEATH_WAIT_MAX (5 * 1000) 213 + 211 214 #define DLM_PURGE_INTERVAL_MS (8 * 1000) 212 215 213 216 struct dlm_lock_resource ··· 661 658 void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); 662 659 void dlm_wait_for_recovery(struct dlm_ctxt *dlm); 663 660 int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); 661 + int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); 664 662 665 663 void dlm_put(struct dlm_ctxt *dlm); 666 664 struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm);

+9 -3

fs/ocfs2/dlm/dlmconvert.c

··· 392 392 } else { 393 393 mlog_errno(tmpret); 394 394 if (dlm_is_host_down(tmpret)) { 395 + /* instead of logging the same network error over 396 + * and over, sleep here and wait for the heartbeat 397 + * to notice the node is dead. times out after 5s. */ 398 + dlm_wait_for_node_death(dlm, res->owner, 399 + DLM_NODE_DEATH_WAIT_MAX); 395 400 ret = DLM_RECOVERING; 396 401 mlog(0, "node %u died so returning DLM_RECOVERING " 397 402 "from convert message!\n", res->owner); ··· 426 421 struct dlm_lockstatus *lksb; 427 422 enum dlm_status status = DLM_NORMAL; 428 423 u32 flags; 429 - int call_ast = 0, kick_thread = 0; 424 + int call_ast = 0, kick_thread = 0, ast_reserved = 0; 430 425 431 426 if (!dlm_grab(dlm)) { 432 427 dlm_error(DLM_REJECTED); ··· 495 490 status = __dlm_lockres_state_to_status(res); 496 491 if (status == DLM_NORMAL) { 497 492 __dlm_lockres_reserve_ast(res); 493 + ast_reserved = 1; 498 494 res->state |= DLM_LOCK_RES_IN_PROGRESS; 499 495 status = __dlmconvert_master(dlm, res, lock, flags, 500 496 cnv->requested_type, ··· 518 512 else 519 513 dlm_lock_put(lock); 520 514 521 - /* either queue the ast or release it */ 515 + /* either queue the ast or release it, if reserved */ 522 516 if (call_ast) 523 517 dlm_queue_ast(dlm, lock); 524 - else 518 + else if (ast_reserved) 525 519 dlm_lockres_release_ast(dlm, res); 526 520 527 521 if (kick_thread)

+24 -1

fs/ocfs2/dlm/dlmlock.c

··· 220 220 dlm_error(status); 221 221 dlm_revert_pending_lock(res, lock); 222 222 dlm_lock_put(lock); 223 + } else if (dlm_is_recovery_lock(res->lockname.name, 224 + res->lockname.len)) { 225 + /* special case for the $RECOVERY lock. 226 + * there will never be an AST delivered to put 227 + * this lock on the proper secondary queue 228 + * (granted), so do it manually. */ 229 + mlog(0, "%s: $RECOVERY lock for this node (%u) is " 230 + "mastered by %u; got lock, manually granting (no ast)\n", 231 + dlm->name, dlm->node_num, res->owner); 232 + list_del_init(&lock->list); 233 + list_add_tail(&lock->list, &res->granted); 223 234 } 224 235 spin_unlock(&res->spinlock); 225 236 ··· 657 646 mlog(0, "retrying lock with migration/" 658 647 "recovery/in progress\n"); 659 648 msleep(100); 660 - dlm_wait_for_recovery(dlm); 649 + /* no waiting for dlm_reco_thread */ 650 + if (recovery) { 651 + if (status == DLM_RECOVERING) { 652 + mlog(0, "%s: got RECOVERING " 653 + "for $REOCVERY lock, master " 654 + "was %u\n", dlm->name, 655 + res->owner); 656 + dlm_wait_for_node_death(dlm, res->owner, 657 + DLM_NODE_DEATH_WAIT_MAX); 658 + } 659 + } else { 660 + dlm_wait_for_recovery(dlm); 661 + } 661 662 goto retry_lock; 662 663 } 663 664

+6 -1

fs/ocfs2/dlm/dlmmaster.c

··· 2482 2482 atomic_set(&mle->woken, 1); 2483 2483 spin_unlock(&mle->spinlock); 2484 2484 wake_up(&mle->wq); 2485 - /* final put will take care of list removal */ 2485 + /* do not need events any longer, so detach 2486 + * from heartbeat */ 2487 + __dlm_mle_detach_hb_events(dlm, mle); 2486 2488 __dlm_put_mle(mle); 2487 2489 } 2488 2490 continue; ··· 2538 2536 dlm_move_lockres_to_recovery_list(dlm, res); 2539 2537 spin_unlock(&res->spinlock); 2540 2538 dlm_lockres_put(res); 2539 + 2540 + /* about to get rid of mle, detach from heartbeat */ 2541 + __dlm_mle_detach_hb_events(dlm, mle); 2541 2542 2542 2543 /* dump the mle */ 2543 2544 spin_lock(&dlm->master_lock);

+42

fs/ocfs2/dlm/dlmrecovery.c

··· 278 278 return dead; 279 279 } 280 280 281 + int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout) 282 + { 283 + if (timeout) { 284 + mlog(ML_NOTICE, "%s: waiting %dms for notification of " 285 + "death of node %u\n", dlm->name, timeout, node); 286 + wait_event_timeout(dlm->dlm_reco_thread_wq, 287 + dlm_is_node_dead(dlm, node), 288 + msecs_to_jiffies(timeout)); 289 + } else { 290 + mlog(ML_NOTICE, "%s: waiting indefinitely for notification " 291 + "of death of node %u\n", dlm->name, node); 292 + wait_event(dlm->dlm_reco_thread_wq, 293 + dlm_is_node_dead(dlm, node)); 294 + } 295 + /* for now, return 0 */ 296 + return 0; 297 + } 298 + 281 299 /* callers of the top-level api calls (dlmlock/dlmunlock) should 282 300 * block on the dlm->reco.event when recovery is in progress. 283 301 * the dlm recovery thread will set this state when it begins ··· 2050 2032 dlm->reco.new_master); 2051 2033 status = -EEXIST; 2052 2034 } else { 2035 + status = 0; 2036 + 2037 + /* see if recovery was already finished elsewhere */ 2038 + spin_lock(&dlm->spinlock); 2039 + if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { 2040 + status = -EINVAL; 2041 + mlog(0, "%s: got reco EX lock, but " 2042 + "node got recovered already\n", dlm->name); 2043 + if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) { 2044 + mlog(ML_ERROR, "%s: new master is %u " 2045 + "but no dead node!\n", 2046 + dlm->name, dlm->reco.new_master); 2047 + BUG(); 2048 + } 2049 + } 2050 + spin_unlock(&dlm->spinlock); 2051 + } 2052 + 2053 + /* if this node has actually become the recovery master, 2054 + * set the master and send the messages to begin recovery */ 2055 + if (!status) { 2056 + mlog(0, "%s: dead=%u, this=%u, sending " 2057 + "begin_reco now\n", dlm->name, 2058 + dlm->reco.dead_node, dlm->node_num); 2053 2059 status = dlm_send_begin_reco_message(dlm, 2054 2060 dlm->reco.dead_node); 2055 2061 /* this always succeeds */

+3 -4

fs/ocfs2/journal.c

··· 1584 1584 while (!(kthread_should_stop() && 1585 1585 atomic_read(&journal->j_num_trans) == 0)) { 1586 1586 1587 - wait_event_interruptible_timeout(osb->checkpoint_event, 1588 - atomic_read(&journal->j_num_trans) 1589 - || kthread_should_stop(), 1590 - OCFS2_CHECKPOINT_INTERVAL); 1587 + wait_event_interruptible(osb->checkpoint_event, 1588 + atomic_read(&journal->j_num_trans) 1589 + || kthread_should_stop()); 1591 1590 1592 1591 status = ocfs2_commit_cache(osb); 1593 1592 if (status < 0)

-2

fs/ocfs2/journal.h

··· 29 29 #include <linux/fs.h> 30 30 #include <linux/jbd.h> 31 31 32 - #define OCFS2_CHECKPOINT_INTERVAL (8 * HZ) 33 - 34 32 enum ocfs2_journal_state { 35 33 OCFS2_JOURNAL_FREE = 0, 36 34 OCFS2_JOURNAL_LOADED,