Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dlm: fix unlock balance warnings

The in_recovery rw_semaphore has always been acquired and
released by different threads by design. To work around
the "BUG: bad unlock balance detected!" messages, adjust
things so the dlm_recoverd thread always does both down_write
and up_write.

Signed-off-by: David Teigland <teigland@redhat.com>

+78 -30
+36 -10
fs/dlm/dlm_internal.h
··· 604 604 struct idr ls_recover_idr; 605 605 spinlock_t ls_recover_idr_lock; 606 606 wait_queue_head_t ls_wait_general; 607 + wait_queue_head_t ls_recover_lock_wait; 607 608 struct mutex ls_clear_proc_locks; 608 609 609 610 struct list_head ls_root_list; /* root resources */ ··· 617 616 char ls_name[1]; 618 617 }; 619 618 620 - #define LSFL_WORK 0 621 - #define LSFL_RUNNING 1 622 - #define LSFL_RECOVERY_STOP 2 623 - #define LSFL_RCOM_READY 3 624 - #define LSFL_RCOM_WAIT 4 625 - #define LSFL_UEVENT_WAIT 5 626 - #define LSFL_TIMEWARN 6 627 - #define LSFL_CB_DELAY 7 628 - #define LSFL_NODIR 8 619 + /* 620 + * LSFL_RECOVER_STOP - dlm_ls_stop() sets this to tell dlm recovery routines 621 + * that they should abort what they're doing so new recovery can be started. 622 + * 623 + * LSFL_RECOVER_DOWN - dlm_ls_stop() sets this to tell dlm_recoverd that it 624 + * should do down_write() on the in_recovery rw_semaphore. (doing down_write 625 + * within dlm_ls_stop causes complaints about the lock acquired/released 626 + * in different contexts.) 627 + * 628 + * LSFL_RECOVER_LOCK - dlm_recoverd holds the in_recovery rw_semaphore. 629 + * It sets this after it is done with down_write() on the in_recovery 630 + * rw_semaphore and clears it after it has released the rw_semaphore. 631 + * 632 + * LSFL_RECOVER_WORK - dlm_ls_start() sets this to tell dlm_recoverd that it 633 + * should begin recovery of the lockspace. 634 + * 635 + * LSFL_RUNNING - set when normal locking activity is enabled. 636 + * dlm_ls_stop() clears this to tell dlm locking routines that they should 637 + * quit what they are doing so recovery can run. dlm_recoverd sets 638 + * this after recovery is finished. 639 + */ 640 + 641 + #define LSFL_RECOVER_STOP 0 642 + #define LSFL_RECOVER_DOWN 1 643 + #define LSFL_RECOVER_LOCK 2 644 + #define LSFL_RECOVER_WORK 3 645 + #define LSFL_RUNNING 4 646 + 647 + #define LSFL_RCOM_READY 5 648 + #define LSFL_RCOM_WAIT 6 649 + #define LSFL_UEVENT_WAIT 7 650 + #define LSFL_TIMEWARN 8 651 + #define LSFL_CB_DELAY 9 652 + #define LSFL_NODIR 10 629 653 630 654 /* much of this is just saving user space pointers associated with the 631 655 lock that we pass back to the user lib with an ast */ ··· 693 667 694 668 static inline int dlm_recovery_stopped(struct dlm_ls *ls) 695 669 { 696 - return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); 670 + return test_bit(LSFL_RECOVER_STOP, &ls->ls_flags); 697 671 } 698 672 699 673 static inline int dlm_no_directory(struct dlm_ls *ls)
+12 -3
fs/dlm/lockspace.c
··· 582 582 INIT_LIST_HEAD(&ls->ls_root_list); 583 583 init_rwsem(&ls->ls_root_sem); 584 584 585 - down_write(&ls->ls_in_recovery); 586 - 587 585 spin_lock(&lslist_lock); 588 586 ls->ls_create_count = 1; 589 587 list_add(&ls->ls_list, &lslist); ··· 595 597 } 596 598 } 597 599 598 - /* needs to find ls in lslist */ 600 + init_waitqueue_head(&ls->ls_recover_lock_wait); 601 + 602 + /* 603 + * Once started, dlm_recoverd first looks for ls in lslist, then 604 + * initializes ls_in_recovery as locked in "down" mode. We need 605 + * to wait for the wakeup from dlm_recoverd because in_recovery 606 + * has to start out in down mode. 607 + */ 608 + 599 609 error = dlm_recoverd_start(ls); 600 610 if (error) { 601 611 log_error(ls, "can't start dlm_recoverd %d", error); 602 612 goto out_callback; 603 613 } 614 + 615 + wait_event(ls->ls_recover_lock_wait, 616 + test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)); 604 617 605 618 ls->ls_kobj.kset = dlm_kset; 606 619 error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
+11 -6
fs/dlm/member.c
··· 616 616 down_write(&ls->ls_recv_active); 617 617 618 618 /* 619 - * Abort any recovery that's in progress (see RECOVERY_STOP, 619 + * Abort any recovery that's in progress (see RECOVER_STOP, 620 620 * dlm_recovery_stopped()) and tell any other threads running in the 621 621 * dlm to quit any processing (see RUNNING, dlm_locking_stopped()). 622 622 */ 623 623 624 624 spin_lock(&ls->ls_recover_lock); 625 - set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); 625 + set_bit(LSFL_RECOVER_STOP, &ls->ls_flags); 626 626 new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags); 627 627 ls->ls_recover_seq++; 628 628 spin_unlock(&ls->ls_recover_lock); ··· 642 642 * when recovery is complete. 643 643 */ 644 644 645 - if (new) 646 - down_write(&ls->ls_in_recovery); 645 + if (new) { 646 + set_bit(LSFL_RECOVER_DOWN, &ls->ls_flags); 647 + wake_up_process(ls->ls_recoverd_task); 648 + wait_event(ls->ls_recover_lock_wait, 649 + test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)); 650 + } 647 651 648 652 /* 649 653 * The recoverd suspend/resume makes sure that dlm_recoverd (if 650 - * running) has noticed RECOVERY_STOP above and quit processing the 654 + * running) has noticed RECOVER_STOP above and quit processing the 651 655 * previous recovery. 652 656 */ 653 657 ··· 713 709 kfree(rv_old); 714 710 } 715 711 716 - dlm_recoverd_kick(ls); 712 + set_bit(LSFL_RECOVER_WORK, &ls->ls_flags); 713 + wake_up_process(ls->ls_recoverd_task); 717 714 return 0; 718 715 719 716 fail:
+1 -1
fs/dlm/rcom.c
··· 581 581 582 582 spin_lock(&ls->ls_recover_lock); 583 583 status = ls->ls_recover_status; 584 - stop = test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); 584 + stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags); 585 585 seq = ls->ls_recover_seq; 586 586 spin_unlock(&ls->ls_recover_lock); 587 587
+18 -9
fs/dlm/recoverd.c
··· 41 41 set_bit(LSFL_RUNNING, &ls->ls_flags); 42 42 /* unblocks processes waiting to enter the dlm */ 43 43 up_write(&ls->ls_in_recovery); 44 + clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); 44 45 error = 0; 45 46 } 46 47 spin_unlock(&ls->ls_recover_lock); ··· 263 262 rv = ls->ls_recover_args; 264 263 ls->ls_recover_args = NULL; 265 264 if (rv && ls->ls_recover_seq == rv->seq) 266 - clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); 265 + clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags); 267 266 spin_unlock(&ls->ls_recover_lock); 268 267 269 268 if (rv) { ··· 283 282 return -1; 284 283 } 285 284 285 + down_write(&ls->ls_in_recovery); 286 + set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); 287 + wake_up(&ls->ls_recover_lock_wait); 288 + 286 289 while (!kthread_should_stop()) { 287 290 set_current_state(TASK_INTERRUPTIBLE); 288 - if (!test_bit(LSFL_WORK, &ls->ls_flags)) 291 + if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) && 292 + !test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) 289 293 schedule(); 290 294 set_current_state(TASK_RUNNING); 291 295 292 - if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags)) 296 + if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) { 297 + down_write(&ls->ls_in_recovery); 298 + set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); 299 + wake_up(&ls->ls_recover_lock_wait); 300 + } 301 + 302 + if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags)) 293 303 do_ls_recovery(ls); 294 304 } 295 305 306 + if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)) 307 + up_write(&ls->ls_in_recovery); 308 + 296 309 dlm_put_lockspace(ls); 297 310 return 0; 298 - } 299 - 300 - void dlm_recoverd_kick(struct dlm_ls *ls) 301 - { 302 - set_bit(LSFL_WORK, &ls->ls_flags); 303 - wake_up_process(ls->ls_recoverd_task); 304 311 } 305 312 306 313 int dlm_recoverd_start(struct dlm_ls *ls)
-1
fs/dlm/recoverd.h
··· 14 14 #ifndef __RECOVERD_DOT_H__ 15 15 #define __RECOVERD_DOT_H__ 16 16 17 - void dlm_recoverd_kick(struct dlm_ls *ls); 18 17 void dlm_recoverd_stop(struct dlm_ls *ls); 19 18 int dlm_recoverd_start(struct dlm_ls *ls); 20 19 void dlm_recoverd_suspend(struct dlm_ls *ls);