Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw

+1

fs/dlm/dlm_internal.h

··· 491 491 uint64_t ls_recover_seq; 492 492 struct dlm_recover *ls_recover_args; 493 493 struct rw_semaphore ls_in_recovery; /* block local requests */ 494 + struct rw_semaphore ls_recv_active; /* block dlm_recv */ 494 495 struct list_head ls_requestqueue;/* queue remote requests */ 495 496 struct mutex ls_requestqueue_mutex; 496 497 char *ls_recover_buf;

+85 -57

fs/dlm/lock.c

··· 3638 3638 dlm_put_lkb(lkb); 3639 3639 } 3640 3640 3641 - int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) 3641 + static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms) 3642 3642 { 3643 - struct dlm_message *ms = (struct dlm_message *) hd; 3644 - struct dlm_ls *ls; 3645 - int error = 0; 3646 - 3647 - if (!recovery) 3648 - dlm_message_in(ms); 3649 - 3650 - ls = dlm_find_lockspace_global(hd->h_lockspace); 3651 - if (!ls) { 3652 - log_print("drop message %d from %d for unknown lockspace %d", 3653 - ms->m_type, nodeid, hd->h_lockspace); 3654 - return -EINVAL; 3655 - } 3656 - 3657 - /* recovery may have just ended leaving a bunch of backed-up requests 3658 - in the requestqueue; wait while dlm_recoverd clears them */ 3659 - 3660 - if (!recovery) 3661 - dlm_wait_requestqueue(ls); 3662 - 3663 - /* recovery may have just started while there were a bunch of 3664 - in-flight requests -- save them in requestqueue to be processed 3665 - after recovery. we can't let dlm_recvd block on the recovery 3666 - lock. if dlm_recoverd is calling this function to clear the 3667 - requestqueue, it needs to be interrupted (-EINTR) if another 3668 - recovery operation is starting. */ 3669 - 3670 - while (1) { 3671 - if (dlm_locking_stopped(ls)) { 3672 - if (recovery) { 3673 - error = -EINTR; 3674 - goto out; 3675 - } 3676 - error = dlm_add_requestqueue(ls, nodeid, hd); 3677 - if (error == -EAGAIN) 3678 - continue; 3679 - else { 3680 - error = -EINTR; 3681 - goto out; 3682 - } 3683 - } 3684 - 3685 - if (dlm_lock_recovery_try(ls)) 3686 - break; 3687 - schedule(); 3688 - } 3689 - 3690 3643 switch (ms->m_type) { 3691 3644 3692 3645 /* messages sent to a master node */ ··· 3714 3761 log_error(ls, "unknown message type %d", ms->m_type); 3715 3762 } 3716 3763 3717 - dlm_unlock_recovery(ls); 3718 - out: 3719 - dlm_put_lockspace(ls); 3720 3764 dlm_astd_wake(); 3721 - return error; 3722 3765 } 3723 3766 3767 + /* If the lockspace is in recovery mode (locking stopped), then normal 3768 + messages are saved on the requestqueue for processing after recovery is 3769 + done. When not in recovery mode, we wait for dlm_recoverd to drain saved 3770 + messages off the requestqueue before we process new ones. This occurs right 3771 + after recovery completes when we transition from saving all messages on 3772 + requestqueue, to processing all the saved messages, to processing new 3773 + messages as they arrive. */ 3724 3774 3725 - /* 3726 - * Recovery related 3727 - */ 3775 + static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms, 3776 + int nodeid) 3777 + { 3778 + if (dlm_locking_stopped(ls)) { 3779 + dlm_add_requestqueue(ls, nodeid, (struct dlm_header *) ms); 3780 + } else { 3781 + dlm_wait_requestqueue(ls); 3782 + _receive_message(ls, ms); 3783 + } 3784 + } 3785 + 3786 + /* This is called by dlm_recoverd to process messages that were saved on 3787 + the requestqueue. */ 3788 + 3789 + void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms) 3790 + { 3791 + _receive_message(ls, ms); 3792 + } 3793 + 3794 + /* This is called by the midcomms layer when something is received for 3795 + the lockspace. It could be either a MSG (normal message sent as part of 3796 + standard locking activity) or an RCOM (recovery message sent as part of 3797 + lockspace recovery). */ 3798 + 3799 + void dlm_receive_buffer(struct dlm_header *hd, int nodeid) 3800 + { 3801 + struct dlm_message *ms = (struct dlm_message *) hd; 3802 + struct dlm_rcom *rc = (struct dlm_rcom *) hd; 3803 + struct dlm_ls *ls; 3804 + int type = 0; 3805 + 3806 + switch (hd->h_cmd) { 3807 + case DLM_MSG: 3808 + dlm_message_in(ms); 3809 + type = ms->m_type; 3810 + break; 3811 + case DLM_RCOM: 3812 + dlm_rcom_in(rc); 3813 + type = rc->rc_type; 3814 + break; 3815 + default: 3816 + log_print("invalid h_cmd %d from %u", hd->h_cmd, nodeid); 3817 + return; 3818 + } 3819 + 3820 + if (hd->h_nodeid != nodeid) { 3821 + log_print("invalid h_nodeid %d from %d lockspace %x", 3822 + hd->h_nodeid, nodeid, hd->h_lockspace); 3823 + return; 3824 + } 3825 + 3826 + ls = dlm_find_lockspace_global(hd->h_lockspace); 3827 + if (!ls) { 3828 + log_print("invalid h_lockspace %x from %d cmd %d type %d", 3829 + hd->h_lockspace, nodeid, hd->h_cmd, type); 3830 + 3831 + if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS) 3832 + dlm_send_ls_not_ready(nodeid, rc); 3833 + return; 3834 + } 3835 + 3836 + /* this rwsem allows dlm_ls_stop() to wait for all dlm_recv threads to 3837 + be inactive (in this ls) before transitioning to recovery mode */ 3838 + 3839 + down_read(&ls->ls_recv_active); 3840 + if (hd->h_cmd == DLM_MSG) 3841 + dlm_receive_message(ls, ms, nodeid); 3842 + else 3843 + dlm_receive_rcom(ls, rc, nodeid); 3844 + up_read(&ls->ls_recv_active); 3845 + 3846 + dlm_put_lockspace(ls); 3847 + } 3728 3848 3729 3849 static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) 3730 3850 { ··· 4455 4429 4456 4430 if (lvb_in && ua->lksb.sb_lvbptr) 4457 4431 memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN); 4458 - ua->castparam = ua_tmp->castparam; 4432 + if (ua_tmp->castparam) 4433 + ua->castparam = ua_tmp->castparam; 4459 4434 ua->user_lksb = ua_tmp->user_lksb; 4460 4435 4461 4436 error = set_unlock_args(flags, ua, &args); ··· 4501 4474 goto out; 4502 4475 4503 4476 ua = (struct dlm_user_args *)lkb->lkb_astparam; 4504 - ua->castparam = ua_tmp->castparam; 4477 + if (ua_tmp->castparam) 4478 + ua->castparam = ua_tmp->castparam; 4505 4479 ua->user_lksb = ua_tmp->user_lksb; 4506 4480 4507 4481 error = set_unlock_args(flags, ua, &args);

+2 -1

fs/dlm/lock.h

··· 16 16 void dlm_print_rsb(struct dlm_rsb *r); 17 17 void dlm_dump_rsb(struct dlm_rsb *r); 18 18 void dlm_print_lkb(struct dlm_lkb *lkb); 19 - int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery); 19 + void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms); 20 + void dlm_receive_buffer(struct dlm_header *hd, int nodeid); 20 21 int dlm_modes_compat(int mode1, int mode2); 21 22 int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen, 22 23 unsigned int flags, struct dlm_rsb **r_ret);

+1

fs/dlm/lockspace.c

··· 519 519 ls->ls_recover_seq = 0; 520 520 ls->ls_recover_args = NULL; 521 521 init_rwsem(&ls->ls_in_recovery); 522 + init_rwsem(&ls->ls_recv_active); 522 523 INIT_LIST_HEAD(&ls->ls_requestqueue); 523 524 mutex_init(&ls->ls_requestqueue_mutex); 524 525 mutex_init(&ls->ls_clear_proc_locks);

+8 -15

fs/dlm/lowcomms.c

··· 334 334 con->rx_page = NULL; 335 335 } 336 336 337 - /* If we are an 'othercon' then NULL the pointer to us 338 - from the parent and tidy ourself up */ 339 - if (test_bit(CF_IS_OTHERCON, &con->flags)) { 340 - struct connection *parent = __nodeid2con(con->nodeid, 0); 341 - parent->othercon = NULL; 342 - kmem_cache_free(con_cache, con); 343 - } 344 - else { 345 - /* Parent connections get reused */ 346 - con->retries = 0; 347 - mutex_unlock(&con->sock_mutex); 348 - } 337 + con->retries = 0; 338 + mutex_unlock(&con->sock_mutex); 349 339 } 350 340 351 341 /* We only send shutdown messages to nodes that are not part of the cluster */ ··· 721 731 INIT_WORK(&othercon->swork, process_send_sockets); 722 732 INIT_WORK(&othercon->rwork, process_recv_sockets); 723 733 set_bit(CF_IS_OTHERCON, &othercon->flags); 734 + } 735 + if (!othercon->sock) { 724 736 newcon->othercon = othercon; 725 737 othercon->sock = newsock; 726 738 newsock->sk->sk_user_data = othercon; ··· 1264 1272 if (len) { 1265 1273 ret = sendpage(con->sock, e->page, offset, len, 1266 1274 msg_flags); 1267 - if (ret == -EAGAIN || ret == 0) 1275 + if (ret == -EAGAIN || ret == 0) { 1276 + cond_resched(); 1268 1277 goto out; 1278 + } 1269 1279 if (ret <= 0) 1270 1280 goto send_error; 1271 - } else { 1281 + } 1272 1282 /* Don't starve people filling buffers */ 1273 1283 cond_resched(); 1274 - } 1275 1284 1276 1285 spin_lock(&con->writequeue_lock); 1277 1286 e->offset += ret;

+27 -14

fs/dlm/member.c

··· 18 18 #include "rcom.h" 19 19 #include "config.h" 20 20 21 - /* 22 - * Following called by dlm_recoverd thread 23 - */ 24 - 25 21 static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) 26 22 { 27 23 struct dlm_member *memb = NULL; ··· 246 250 return error; 247 251 } 248 252 249 - /* 250 - * Following called from lockspace.c 251 - */ 253 + /* Userspace guarantees that dlm_ls_stop() has completed on all nodes before 254 + dlm_ls_start() is called on any of them to start the new recovery. */ 252 255 253 256 int dlm_ls_stop(struct dlm_ls *ls) 254 257 { 255 258 int new; 256 259 257 260 /* 258 - * A stop cancels any recovery that's in progress (see RECOVERY_STOP, 259 - * dlm_recovery_stopped()) and prevents any new locks from being 260 - * processed (see RUNNING, dlm_locking_stopped()). 261 + * Prevent dlm_recv from being in the middle of something when we do 262 + * the stop. This includes ensuring dlm_recv isn't processing a 263 + * recovery message (rcom), while dlm_recoverd is aborting and 264 + * resetting things from an in-progress recovery. i.e. we want 265 + * dlm_recoverd to abort its recovery without worrying about dlm_recv 266 + * processing an rcom at the same time. Stopping dlm_recv also makes 267 + * it easy for dlm_receive_message() to check locking stopped and add a 268 + * message to the requestqueue without races. 269 + */ 270 + 271 + down_write(&ls->ls_recv_active); 272 + 273 + /* 274 + * Abort any recovery that's in progress (see RECOVERY_STOP, 275 + * dlm_recovery_stopped()) and tell any other threads running in the 276 + * dlm to quit any processing (see RUNNING, dlm_locking_stopped()). 261 277 */ 262 278 263 279 spin_lock(&ls->ls_recover_lock); ··· 279 271 spin_unlock(&ls->ls_recover_lock); 280 272 281 273 /* 274 + * Let dlm_recv run again, now any normal messages will be saved on the 275 + * requestqueue for later. 276 + */ 277 + 278 + up_write(&ls->ls_recv_active); 279 + 280 + /* 282 281 * This in_recovery lock does two things: 283 - * 284 282 * 1) Keeps this function from returning until all threads are out 285 283 * of locking routines and locking is truely stopped. 286 284 * 2) Keeps any new requests from being processed until it's unlocked ··· 298 284 299 285 /* 300 286 * The recoverd suspend/resume makes sure that dlm_recoverd (if 301 - * running) has noticed the clearing of RUNNING above and quit 302 - * processing the previous recovery. This will be true for all nodes 303 - * before any nodes start the new recovery. 287 + * running) has noticed RECOVERY_STOP above and quit processing the 288 + * previous recovery. 304 289 */ 305 290 306 291 dlm_recoverd_suspend(ls);

+2 -15

fs/dlm/midcomms.c

··· 2 2 ******************************************************************************* 3 3 ** 4 4 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 5 - ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 5 + ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 6 6 ** 7 7 ** This copyrighted material is made available to anyone wishing to use, 8 8 ** modify, copy, or redistribute it subject to the terms and conditions ··· 27 27 #include "dlm_internal.h" 28 28 #include "lowcomms.h" 29 29 #include "config.h" 30 - #include "rcom.h" 31 30 #include "lock.h" 32 31 #include "midcomms.h" 33 32 ··· 116 117 offset &= (limit - 1); 117 118 len -= msglen; 118 119 119 - switch (msg->h_cmd) { 120 - case DLM_MSG: 121 - dlm_receive_message(msg, nodeid, 0); 122 - break; 123 - 124 - case DLM_RCOM: 125 - dlm_receive_rcom(msg, nodeid); 126 - break; 127 - 128 - default: 129 - log_print("unknown msg type %x from %u: %u %u %u %u", 130 - msg->h_cmd, nodeid, msglen, len, offset, ret); 131 - } 120 + dlm_receive_buffer(msg, nodeid); 132 121 } 133 122 134 123 if (msg != (struct dlm_header *) __tmp)

+8 -28

fs/dlm/rcom.c

··· 2 2 ******************************************************************************* 3 3 ** 4 4 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 5 - ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 5 + ** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. 6 6 ** 7 7 ** This copyrighted material is made available to anyone wishing to use, 8 8 ** modify, copy, or redistribute it subject to the terms and conditions ··· 386 386 dlm_recover_process_copy(ls, rc_in); 387 387 } 388 388 389 - static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) 389 + /* If the lockspace doesn't exist then still send a status message 390 + back; it's possible that it just doesn't have its global_id yet. */ 391 + 392 + int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) 390 393 { 391 394 struct dlm_rcom *rc; 392 395 struct rcom_config *rf; ··· 449 446 return rv; 450 447 } 451 448 452 - /* Called by dlm_recvd; corresponds to dlm_receive_message() but special 449 + /* Called by dlm_recv; corresponds to dlm_receive_message() but special 453 450 recovery-only comms are sent through here. */ 454 451 455 - void dlm_receive_rcom(struct dlm_header *hd, int nodeid) 452 + void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) 456 453 { 457 - struct dlm_rcom *rc = (struct dlm_rcom *) hd; 458 - struct dlm_ls *ls; 459 - 460 - dlm_rcom_in(rc); 461 - 462 - /* If the lockspace doesn't exist then still send a status message 463 - back; it's possible that it just doesn't have its global_id yet. */ 464 - 465 - ls = dlm_find_lockspace_global(hd->h_lockspace); 466 - if (!ls) { 467 - log_print("lockspace %x from %d type %x not found", 468 - hd->h_lockspace, nodeid, rc->rc_type); 469 - if (rc->rc_type == DLM_RCOM_STATUS) 470 - send_ls_not_ready(nodeid, rc); 471 - return; 472 - } 473 - 474 454 if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { 475 455 log_debug(ls, "ignoring recovery message %x from %d", 476 456 rc->rc_type, nodeid); ··· 462 476 463 477 if (is_old_reply(ls, rc)) 464 478 goto out; 465 - 466 - if (nodeid != rc->rc_header.h_nodeid) { 467 - log_error(ls, "bad rcom nodeid %d from %d", 468 - rc->rc_header.h_nodeid, nodeid); 469 - goto out; 470 - } 471 479 472 480 switch (rc->rc_type) { 473 481 case DLM_RCOM_STATUS: ··· 500 520 DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type);); 501 521 } 502 522 out: 503 - dlm_put_lockspace(ls); 523 + return; 504 524 } 505 525

+3 -2

fs/dlm/rcom.h

··· 2 2 ******************************************************************************* 3 3 ** 4 4 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 5 - ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 5 + ** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. 6 6 ** 7 7 ** This copyrighted material is made available to anyone wishing to use, 8 8 ** modify, copy, or redistribute it subject to the terms and conditions ··· 18 18 int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len); 19 19 int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid); 20 20 int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); 21 - void dlm_receive_rcom(struct dlm_header *hd, int nodeid); 21 + void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid); 22 + int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in); 22 23 23 24 #endif 24 25

+10 -1

fs/dlm/recoverd.c

··· 24 24 25 25 26 26 /* If the start for which we're re-enabling locking (seq) has been superseded 27 - by a newer stop (ls_recover_seq), we need to leave locking disabled. */ 27 + by a newer stop (ls_recover_seq), we need to leave locking disabled. 28 + 29 + We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees 30 + locking stopped and b) adds a message to the requestqueue, but dlm_recoverd 31 + enables locking and clears the requestqueue between a and b. */ 28 32 29 33 static int enable_locking(struct dlm_ls *ls, uint64_t seq) 30 34 { 31 35 int error = -EINTR; 32 36 37 + down_write(&ls->ls_recv_active); 38 + 33 39 spin_lock(&ls->ls_recover_lock); 34 40 if (ls->ls_recover_seq == seq) { 35 41 set_bit(LSFL_RUNNING, &ls->ls_flags); 42 + /* unblocks processes waiting to enter the dlm */ 36 43 up_write(&ls->ls_in_recovery); 37 44 error = 0; 38 45 } 39 46 spin_unlock(&ls->ls_recover_lock); 47 + 48 + up_write(&ls->ls_recv_active); 40 49 return error; 41 50 } 42 51

+24 -34

fs/dlm/requestqueue.c

··· 1 1 /****************************************************************************** 2 2 ******************************************************************************* 3 3 ** 4 - ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 4 + ** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. 5 5 ** 6 6 ** This copyrighted material is made available to anyone wishing to use, 7 7 ** modify, copy, or redistribute it subject to the terms and conditions ··· 20 20 struct rq_entry { 21 21 struct list_head list; 22 22 int nodeid; 23 - char request[1]; 23 + char request[0]; 24 24 }; 25 25 26 26 /* ··· 30 30 * lockspace is enabled on some while still suspended on others. 31 31 */ 32 32 33 - int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) 33 + void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) 34 34 { 35 35 struct rq_entry *e; 36 36 int length = hd->h_length; 37 - int rv = 0; 38 37 39 38 e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); 40 39 if (!e) { 41 - log_print("dlm_add_requestqueue: out of memory\n"); 42 - return 0; 40 + log_print("dlm_add_requestqueue: out of memory len %d", length); 41 + return; 43 42 } 44 43 45 44 e->nodeid = nodeid; 46 45 memcpy(e->request, hd, length); 47 46 48 - /* We need to check dlm_locking_stopped() after taking the mutex to 49 - avoid a race where dlm_recoverd enables locking and runs 50 - process_requestqueue between our earlier dlm_locking_stopped check 51 - and this addition to the requestqueue. */ 52 - 53 47 mutex_lock(&ls->ls_requestqueue_mutex); 54 - if (dlm_locking_stopped(ls)) 55 - list_add_tail(&e->list, &ls->ls_requestqueue); 56 - else { 57 - log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid); 58 - kfree(e); 59 - rv = -EAGAIN; 60 - } 48 + list_add_tail(&e->list, &ls->ls_requestqueue); 61 49 mutex_unlock(&ls->ls_requestqueue_mutex); 62 - return rv; 63 50 } 51 + 52 + /* 53 + * Called by dlm_recoverd to process normal messages saved while recovery was 54 + * happening. Normal locking has been enabled before this is called. dlm_recv 55 + * upon receiving a message, will wait for all saved messages to be drained 56 + * here before processing the message it got. If a new dlm_ls_stop() arrives 57 + * while we're processing these saved messages, it may block trying to suspend 58 + * dlm_recv if dlm_recv is waiting for us in dlm_wait_requestqueue. In that 59 + * case, we don't abort since locking_stopped is still 0. If dlm_recv is not 60 + * waiting for us, then this processing may be aborted due to locking_stopped. 61 + */ 64 62 65 63 int dlm_process_requestqueue(struct dlm_ls *ls) 66 64 { 67 65 struct rq_entry *e; 68 - struct dlm_header *hd; 69 66 int error = 0; 70 67 71 68 mutex_lock(&ls->ls_requestqueue_mutex); ··· 76 79 e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list); 77 80 mutex_unlock(&ls->ls_requestqueue_mutex); 78 81 79 - hd = (struct dlm_header *) e->request; 80 - error = dlm_receive_message(hd, e->nodeid, 1); 81 - 82 - if (error == -EINTR) { 83 - /* entry is left on requestqueue */ 84 - log_debug(ls, "process_requestqueue abort eintr"); 85 - break; 86 - } 82 + dlm_receive_message_saved(ls, (struct dlm_message *)e->request); 87 83 88 84 mutex_lock(&ls->ls_requestqueue_mutex); 89 85 list_del(&e->list); ··· 96 106 97 107 /* 98 108 * After recovery is done, locking is resumed and dlm_recoverd takes all the 99 - * saved requests and processes them as they would have been by dlm_recvd. At 100 - * the same time, dlm_recvd will start receiving new requests from remote 101 - * nodes. We want to delay dlm_recvd processing new requests until 102 - * dlm_recoverd has finished processing the old saved requests. 109 + * saved requests and processes them as they would have been by dlm_recv. At 110 + * the same time, dlm_recv will start receiving new requests from remote nodes. 111 + * We want to delay dlm_recv processing new requests until dlm_recoverd has 112 + * finished processing the old saved requests. We don't check for locking 113 + * stopped here because dlm_ls_stop won't stop locking until it's suspended us 114 + * (dlm_recv). 103 115 */ 104 116 105 117 void dlm_wait_requestqueue(struct dlm_ls *ls) ··· 109 117 for (;;) { 110 118 mutex_lock(&ls->ls_requestqueue_mutex); 111 119 if (list_empty(&ls->ls_requestqueue)) 112 - break; 113 - if (dlm_locking_stopped(ls)) 114 120 break; 115 121 mutex_unlock(&ls->ls_requestqueue_mutex); 116 122 schedule();

+2 -2

fs/dlm/requestqueue.h

··· 1 1 /****************************************************************************** 2 2 ******************************************************************************* 3 3 ** 4 - ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 4 + ** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. 5 5 ** 6 6 ** This copyrighted material is made available to anyone wishing to use, 7 7 ** modify, copy, or redistribute it subject to the terms and conditions ··· 13 13 #ifndef __REQUESTQUEUE_DOT_H__ 14 14 #define __REQUESTQUEUE_DOT_H__ 15 15 16 - int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); 16 + void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); 17 17 int dlm_process_requestqueue(struct dlm_ls *ls); 18 18 void dlm_wait_requestqueue(struct dlm_ls *ls); 19 19 void dlm_purge_requestqueue(struct dlm_ls *ls);

+33 -2

fs/gfs2/bmap.c

··· 93 93 map_bh(bh, inode->i_sb, block); 94 94 95 95 set_buffer_uptodate(bh); 96 + if (!gfs2_is_jdata(ip)) 97 + mark_buffer_dirty(bh); 96 98 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) 97 99 gfs2_trans_add_bh(ip->i_gl, bh, 0); 98 - mark_buffer_dirty(bh); 99 100 100 101 if (release) { 101 102 unlock_page(page); ··· 1086 1085 return error; 1087 1086 } 1088 1087 1088 + static int do_touch(struct gfs2_inode *ip, u64 size) 1089 + { 1090 + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1091 + struct buffer_head *dibh; 1092 + int error; 1093 + 1094 + error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1095 + if (error) 1096 + return error; 1097 + 1098 + down_write(&ip->i_rw_mutex); 1099 + 1100 + error = gfs2_meta_inode_buffer(ip, &dibh); 1101 + if (error) 1102 + goto do_touch_out; 1103 + 1104 + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1105 + gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1106 + gfs2_dinode_out(ip, dibh->b_data); 1107 + brelse(dibh); 1108 + 1109 + do_touch_out: 1110 + up_write(&ip->i_rw_mutex); 1111 + gfs2_trans_end(sdp); 1112 + return error; 1113 + } 1114 + 1089 1115 /** 1090 1116 * gfs2_truncatei - make a file a given size 1091 1117 * @ip: the inode ··· 1133 1105 1134 1106 if (size > ip->i_di.di_size) 1135 1107 error = do_grow(ip, size); 1136 - else 1108 + else if (size < ip->i_di.di_size) 1137 1109 error = do_shrink(ip, size); 1110 + else 1111 + /* update time stamps */ 1112 + error = do_touch(ip, size); 1138 1113 1139 1114 return error; 1140 1115 }

-24

fs/gfs2/daemon.c

··· 35 35 The kthread functions used to start these daemons block and flush signals. */ 36 36 37 37 /** 38 - * gfs2_scand - Look for cached glocks and inodes to toss from memory 39 - * @sdp: Pointer to GFS2 superblock 40 - * 41 - * One of these daemons runs, finding candidates to add to sd_reclaim_list. 42 - * See gfs2_glockd() 43 - */ 44 - 45 - int gfs2_scand(void *data) 46 - { 47 - struct gfs2_sbd *sdp = data; 48 - unsigned long t; 49 - 50 - while (!kthread_should_stop()) { 51 - gfs2_scand_internal(sdp); 52 - t = gfs2_tune_get(sdp, gt_scand_secs) * HZ; 53 - if (freezing(current)) 54 - refrigerator(); 55 - schedule_timeout_interruptible(t); 56 - } 57 - 58 - return 0; 59 - } 60 - 61 - /** 62 38 * gfs2_glockd - Reclaim unused glock structures 63 39 * @sdp: Pointer to GFS2 superblock 64 40 *

-1

fs/gfs2/daemon.h

··· 10 10 #ifndef __DAEMON_DOT_H__ 11 11 #define __DAEMON_DOT_H__ 12 12 13 - int gfs2_scand(void *data); 14 13 int gfs2_glockd(void *data); 15 14 int gfs2_recoverd(void *data); 16 15 int gfs2_logd(void *data);

+2 -1

fs/gfs2/dir.c

··· 1043 1043 1044 1044 error = gfs2_meta_inode_buffer(dip, &dibh); 1045 1045 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { 1046 + gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1046 1047 dip->i_di.di_blocks++; 1047 1048 gfs2_set_inode_blocks(&dip->i_inode); 1048 1049 gfs2_dinode_out(dip, dibh->b_data); ··· 1502 1501 inode = gfs2_inode_lookup(dir->i_sb, 1503 1502 be16_to_cpu(dent->de_type), 1504 1503 be64_to_cpu(dent->de_inum.no_addr), 1505 - be64_to_cpu(dent->de_inum.no_formal_ino)); 1504 + be64_to_cpu(dent->de_inum.no_formal_ino), 0); 1506 1505 brelse(bh); 1507 1506 return inode; 1508 1507 }

+4 -4

fs/gfs2/eaops.c

··· 200 200 return gfs2_ea_remove_i(ip, er); 201 201 } 202 202 203 - static struct gfs2_eattr_operations gfs2_user_eaops = { 203 + static const struct gfs2_eattr_operations gfs2_user_eaops = { 204 204 .eo_get = user_eo_get, 205 205 .eo_set = user_eo_set, 206 206 .eo_remove = user_eo_remove, 207 207 .eo_name = "user", 208 208 }; 209 209 210 - struct gfs2_eattr_operations gfs2_system_eaops = { 210 + const struct gfs2_eattr_operations gfs2_system_eaops = { 211 211 .eo_get = system_eo_get, 212 212 .eo_set = system_eo_set, 213 213 .eo_remove = system_eo_remove, 214 214 .eo_name = "system", 215 215 }; 216 216 217 - static struct gfs2_eattr_operations gfs2_security_eaops = { 217 + static const struct gfs2_eattr_operations gfs2_security_eaops = { 218 218 .eo_get = security_eo_get, 219 219 .eo_set = security_eo_set, 220 220 .eo_remove = security_eo_remove, 221 221 .eo_name = "security", 222 222 }; 223 223 224 - struct gfs2_eattr_operations *gfs2_ea_ops[] = { 224 + const struct gfs2_eattr_operations *gfs2_ea_ops[] = { 225 225 NULL, 226 226 &gfs2_user_eaops, 227 227 &gfs2_system_eaops,

+2 -2

fs/gfs2/eaops.h

··· 22 22 23 23 unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name); 24 24 25 - extern struct gfs2_eattr_operations gfs2_system_eaops; 25 + extern const struct gfs2_eattr_operations gfs2_system_eaops; 26 26 27 - extern struct gfs2_eattr_operations *gfs2_ea_ops[]; 27 + extern const struct gfs2_eattr_operations *gfs2_ea_ops[]; 28 28 29 29 #endif /* __EAOPS_DOT_H__ */ 30 30

+190 -103

fs/gfs2/glock.c

··· 25 25 #include <asm/uaccess.h> 26 26 #include <linux/seq_file.h> 27 27 #include <linux/debugfs.h> 28 - #include <linux/module.h> 29 - #include <linux/kallsyms.h> 28 + #include <linux/kthread.h> 29 + #include <linux/freezer.h> 30 + #include <linux/workqueue.h> 31 + #include <linux/jiffies.h> 30 32 31 33 #include "gfs2.h" 32 34 #include "incore.h" ··· 50 48 int hash; /* hash bucket index */ 51 49 struct gfs2_sbd *sdp; /* incore superblock */ 52 50 struct gfs2_glock *gl; /* current glock struct */ 53 - struct hlist_head *hb_list; /* current hash bucket ptr */ 54 51 struct seq_file *seq; /* sequence file for debugfs */ 55 52 char string[512]; /* scratch space */ 56 53 }; ··· 60 59 static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl); 61 60 static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); 62 61 static void gfs2_glock_drop_th(struct gfs2_glock *gl); 62 + static void run_queue(struct gfs2_glock *gl); 63 + 63 64 static DECLARE_RWSEM(gfs2_umount_flush_sem); 64 65 static struct dentry *gfs2_root; 66 + static struct task_struct *scand_process; 67 + static unsigned int scand_secs = 5; 68 + static struct workqueue_struct *glock_workqueue; 65 69 66 70 #define GFS2_GL_HASH_SHIFT 15 67 71 #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) ··· 282 276 return gl; 283 277 } 284 278 279 + static void glock_work_func(struct work_struct *work) 280 + { 281 + struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); 282 + 283 + spin_lock(&gl->gl_spin); 284 + if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)) 285 + set_bit(GLF_DEMOTE, &gl->gl_flags); 286 + run_queue(gl); 287 + spin_unlock(&gl->gl_spin); 288 + gfs2_glock_put(gl); 289 + } 290 + 285 291 /** 286 292 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist 287 293 * @sdp: The GFS2 superblock ··· 333 315 gl->gl_name = name; 334 316 atomic_set(&gl->gl_ref, 1); 335 317 gl->gl_state = LM_ST_UNLOCKED; 318 + gl->gl_demote_state = LM_ST_EXCLUSIVE; 336 319 gl->gl_hash = hash; 337 320 gl->gl_owner_pid = 0; 338 321 gl->gl_ip = 0; ··· 342 323 gl->gl_req_bh = NULL; 343 324 gl->gl_vn = 0; 344 325 gl->gl_stamp = jiffies; 326 + gl->gl_tchange = jiffies; 345 327 gl->gl_object = NULL; 346 328 gl->gl_sbd = sdp; 347 329 gl->gl_aspace = NULL; 348 330 lops_init_le(&gl->gl_le, &gfs2_glock_lops); 331 + INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); 349 332 350 333 /* If this glock protects actual on-disk data or metadata blocks, 351 334 create a VFS inode to manage the pages/buffers holding them. */ ··· 461 440 462 441 static void gfs2_demote_wake(struct gfs2_glock *gl) 463 442 { 443 + BUG_ON(!spin_is_locked(&gl->gl_spin)); 444 + gl->gl_demote_state = LM_ST_EXCLUSIVE; 464 445 clear_bit(GLF_DEMOTE, &gl->gl_flags); 465 446 smp_mb__after_clear_bit(); 466 447 wake_up_bit(&gl->gl_flags, GLF_DEMOTE); ··· 568 545 return 0; 569 546 } 570 547 set_bit(GLF_LOCK, &gl->gl_flags); 571 - spin_unlock(&gl->gl_spin); 572 548 if (gl->gl_demote_state == LM_ST_UNLOCKED || 573 - gl->gl_state != LM_ST_EXCLUSIVE) 549 + gl->gl_state != LM_ST_EXCLUSIVE) { 550 + spin_unlock(&gl->gl_spin); 574 551 gfs2_glock_drop_th(gl); 575 - else 552 + } else { 553 + spin_unlock(&gl->gl_spin); 576 554 gfs2_glock_xmote_th(gl, NULL); 555 + } 577 556 spin_lock(&gl->gl_spin); 578 557 579 558 return 0; ··· 704 679 * practise: LM_ST_SHARED and LM_ST_UNLOCKED 705 680 */ 706 681 707 - static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote) 682 + static void handle_callback(struct gfs2_glock *gl, unsigned int state, 683 + int remote, unsigned long delay) 708 684 { 685 + int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE; 686 + 709 687 spin_lock(&gl->gl_spin); 710 - if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) { 688 + set_bit(bit, &gl->gl_flags); 689 + if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { 711 690 gl->gl_demote_state = state; 712 691 gl->gl_demote_time = jiffies; 713 692 if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && 714 693 gl->gl_object) { 715 - struct inode *inode = igrab(gl->gl_object); 694 + gfs2_glock_schedule_for_reclaim(gl); 716 695 spin_unlock(&gl->gl_spin); 717 - if (inode) { 718 - d_prune_aliases(inode); 719 - iput(inode); 720 - } 721 696 return; 722 697 } 723 - } else if (gl->gl_demote_state != LM_ST_UNLOCKED) { 724 - gl->gl_demote_state = state; 698 + } else if (gl->gl_demote_state != LM_ST_UNLOCKED && 699 + gl->gl_demote_state != state) { 700 + gl->gl_demote_state = LM_ST_UNLOCKED; 725 701 } 726 702 spin_unlock(&gl->gl_spin); 727 703 } ··· 749 723 } 750 724 751 725 gl->gl_state = new_state; 726 + gl->gl_tchange = jiffies; 752 727 } 753 728 754 729 /** ··· 787 760 788 761 if (!gh) { 789 762 gl->gl_stamp = jiffies; 790 - if (ret & LM_OUT_CANCELED) 763 + if (ret & LM_OUT_CANCELED) { 791 764 op_done = 0; 792 - else 765 + } else { 766 + spin_lock(&gl->gl_spin); 767 + if (gl->gl_state != gl->gl_demote_state) { 768 + gl->gl_req_bh = NULL; 769 + spin_unlock(&gl->gl_spin); 770 + gfs2_glock_drop_th(gl); 771 + gfs2_glock_put(gl); 772 + return; 773 + } 793 774 gfs2_demote_wake(gl); 775 + spin_unlock(&gl->gl_spin); 776 + } 794 777 } else { 795 778 spin_lock(&gl->gl_spin); 796 779 list_del_init(&gh->gh_list); ··· 836 799 gl->gl_req_gh = NULL; 837 800 gl->gl_req_bh = NULL; 838 801 clear_bit(GLF_LOCK, &gl->gl_flags); 839 - run_queue(gl); 840 802 spin_unlock(&gl->gl_spin); 841 803 } 842 804 ··· 853 817 * 854 818 */ 855 819 856 - void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) 820 + static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) 857 821 { 858 822 struct gfs2_sbd *sdp = gl->gl_sbd; 859 823 int flags = gh ? gh->gh_flags : 0; ··· 907 871 gfs2_assert_warn(sdp, !ret); 908 872 909 873 state_change(gl, LM_ST_UNLOCKED); 910 - gfs2_demote_wake(gl); 911 874 912 875 if (glops->go_inval) 913 876 glops->go_inval(gl, DIO_METADATA); ··· 919 884 } 920 885 921 886 spin_lock(&gl->gl_spin); 887 + gfs2_demote_wake(gl); 922 888 gl->gl_req_gh = NULL; 923 889 gl->gl_req_bh = NULL; 924 890 clear_bit(GLF_LOCK, &gl->gl_flags); 925 - run_queue(gl); 926 891 spin_unlock(&gl->gl_spin); 927 892 928 893 gfs2_glock_put(gl); ··· 1102 1067 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) 1103 1068 BUG(); 1104 1069 1105 - existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner_pid); 1106 - if (existing) { 1107 - print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); 1108 - printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid); 1109 - printk(KERN_INFO "lock type : %d lock state : %d\n", 1110 - existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state); 1111 - print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); 1112 - printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid); 1113 - printk(KERN_INFO "lock type : %d lock state : %d\n", 1114 - gl->gl_name.ln_type, gl->gl_state); 1115 - BUG(); 1116 - } 1117 - 1118 - existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner_pid); 1119 - if (existing) { 1120 - print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); 1121 - print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); 1122 - BUG(); 1070 + if (!(gh->gh_flags & GL_FLOCK)) { 1071 + existing = find_holder_by_owner(&gl->gl_holders, 1072 + gh->gh_owner_pid); 1073 + if (existing) { 1074 + print_symbol(KERN_WARNING "original: %s\n", 1075 + existing->gh_ip); 1076 + printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid); 1077 + printk(KERN_INFO "lock type : %d lock state : %d\n", 1078 + existing->gh_gl->gl_name.ln_type, 1079 + existing->gh_gl->gl_state); 1080 + print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); 1081 + printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid); 1082 + printk(KERN_INFO "lock type : %d lock state : %d\n", 1083 + gl->gl_name.ln_type, gl->gl_state); 1084 + BUG(); 1085 + } 1086 + 1087 + existing = find_holder_by_owner(&gl->gl_waiters3, 1088 + gh->gh_owner_pid); 1089 + if (existing) { 1090 + print_symbol(KERN_WARNING "original: %s\n", 1091 + existing->gh_ip); 1092 + print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); 1093 + BUG(); 1094 + } 1123 1095 } 1124 1096 1125 1097 if (gh->gh_flags & LM_FLAG_PRIORITY) ··· 1237 1195 { 1238 1196 struct gfs2_glock *gl = gh->gh_gl; 1239 1197 const struct gfs2_glock_operations *glops = gl->gl_ops; 1198 + unsigned delay = 0; 1240 1199 1241 1200 if (gh->gh_flags & GL_NOCACHE) 1242 - handle_callback(gl, LM_ST_UNLOCKED, 0); 1201 + handle_callback(gl, LM_ST_UNLOCKED, 0, 0); 1243 1202 1244 1203 gfs2_glmutex_lock(gl); 1245 1204 ··· 1258 1215 } 1259 1216 1260 1217 clear_bit(GLF_LOCK, &gl->gl_flags); 1261 - run_queue(gl); 1262 1218 spin_unlock(&gl->gl_spin); 1219 + 1220 + gfs2_glock_hold(gl); 1221 + if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 1222 + !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1223 + delay = gl->gl_ops->go_min_hold_time; 1224 + if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) 1225 + gfs2_glock_put(gl); 1263 1226 } 1264 1227 1265 1228 void gfs2_glock_dq_wait(struct gfs2_holder *gh) ··· 1492 1443 unsigned int state) 1493 1444 { 1494 1445 struct gfs2_glock *gl; 1446 + unsigned long delay = 0; 1447 + unsigned long holdtime; 1448 + unsigned long now = jiffies; 1495 1449 1496 1450 gl = gfs2_glock_find(sdp, name); 1497 1451 if (!gl) 1498 1452 return; 1499 1453 1500 - handle_callback(gl, state, 1); 1454 + holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; 1455 + if (time_before(now, holdtime)) 1456 + delay = holdtime - now; 1501 1457 1502 - spin_lock(&gl->gl_spin); 1503 - run_queue(gl); 1504 - spin_unlock(&gl->gl_spin); 1505 - 1506 - gfs2_glock_put(gl); 1458 + handle_callback(gl, state, 1, delay); 1459 + if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) 1460 + gfs2_glock_put(gl); 1507 1461 } 1508 1462 1509 1463 /** ··· 1547 1495 return; 1548 1496 if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) 1549 1497 gl->gl_req_bh(gl, async->lc_ret); 1550 - gfs2_glock_put(gl); 1498 + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 1499 + gfs2_glock_put(gl); 1551 1500 up_read(&gfs2_umount_flush_sem); 1552 1501 return; 1553 1502 } ··· 1641 1588 if (gfs2_glmutex_trylock(gl)) { 1642 1589 if (list_empty(&gl->gl_holders) && 1643 1590 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) 1644 - handle_callback(gl, LM_ST_UNLOCKED, 0); 1591 + handle_callback(gl, LM_ST_UNLOCKED, 0, 0); 1645 1592 gfs2_glmutex_unlock(gl); 1646 1593 } 1647 1594 ··· 1670 1617 goto out; 1671 1618 gl = list_entry(head->first, struct gfs2_glock, gl_list); 1672 1619 while(1) { 1673 - if (gl->gl_sbd == sdp) { 1620 + if (!sdp || gl->gl_sbd == sdp) { 1674 1621 gfs2_glock_hold(gl); 1675 1622 read_unlock(gl_lock_addr(hash)); 1676 1623 if (prev) ··· 1688 1635 read_unlock(gl_lock_addr(hash)); 1689 1636 if (prev) 1690 1637 gfs2_glock_put(prev); 1638 + cond_resched(); 1691 1639 return has_entries; 1692 1640 } 1693 1641 ··· 1717 1663 } 1718 1664 1719 1665 /** 1720 - * gfs2_scand_internal - Look for glocks and inodes to toss from memory 1721 - * @sdp: the filesystem 1722 - * 1723 - */ 1724 - 1725 - void gfs2_scand_internal(struct gfs2_sbd *sdp) 1726 - { 1727 - unsigned int x; 1728 - 1729 - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) 1730 - examine_bucket(scan_glock, sdp, x); 1731 - } 1732 - 1733 - /** 1734 1666 * clear_glock - look at a glock and see if we can free it from glock cache 1735 1667 * @gl: the glock to look at 1736 1668 * ··· 1741 1701 if (gfs2_glmutex_trylock(gl)) { 1742 1702 if (list_empty(&gl->gl_holders) && 1743 1703 gl->gl_state != LM_ST_UNLOCKED) 1744 - handle_callback(gl, LM_ST_UNLOCKED, 0); 1704 + handle_callback(gl, LM_ST_UNLOCKED, 0, 0); 1745 1705 gfs2_glmutex_unlock(gl); 1746 1706 } 1747 1707 } ··· 1883 1843 1884 1844 spin_lock(&gl->gl_spin); 1885 1845 1886 - print_dbg(gi, "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type, 1846 + print_dbg(gi, "Glock 0x%p (%u, 0x%llx)\n", gl, gl->gl_name.ln_type, 1887 1847 (unsigned long long)gl->gl_name.ln_number); 1888 1848 print_dbg(gi, " gl_flags ="); 1889 1849 for (x = 0; x < 32; x++) { ··· 2003 1963 return error; 2004 1964 } 2005 1965 1966 + /** 1967 + * gfs2_scand - Look for cached glocks and inodes to toss from memory 1968 + * @sdp: Pointer to GFS2 superblock 1969 + * 1970 + * One of these daemons runs, finding candidates to add to sd_reclaim_list. 1971 + * See gfs2_glockd() 1972 + */ 1973 + 1974 + static int gfs2_scand(void *data) 1975 + { 1976 + unsigned x; 1977 + unsigned delay; 1978 + 1979 + while (!kthread_should_stop()) { 1980 + for (x = 0; x < GFS2_GL_HASH_SIZE; x++) 1981 + examine_bucket(scan_glock, NULL, x); 1982 + if (freezing(current)) 1983 + refrigerator(); 1984 + delay = scand_secs; 1985 + if (delay < 1) 1986 + delay = 1; 1987 + schedule_timeout_interruptible(delay * HZ); 1988 + } 1989 + 1990 + return 0; 1991 + } 1992 + 1993 + 1994 + 2006 1995 int __init gfs2_glock_init(void) 2007 1996 { 2008 1997 unsigned i; ··· 2043 1974 rwlock_init(&gl_hash_locks[i]); 2044 1975 } 2045 1976 #endif 1977 + 1978 + scand_process = kthread_run(gfs2_scand, NULL, "gfs2_scand"); 1979 + if (IS_ERR(scand_process)) 1980 + return PTR_ERR(scand_process); 1981 + 1982 + glock_workqueue = create_workqueue("glock_workqueue"); 1983 + if (IS_ERR(glock_workqueue)) { 1984 + kthread_stop(scand_process); 1985 + return PTR_ERR(glock_workqueue); 1986 + } 1987 + 2046 1988 return 0; 2047 1989 } 2048 1990 1991 + void gfs2_glock_exit(void) 1992 + { 1993 + destroy_workqueue(glock_workqueue); 1994 + kthread_stop(scand_process); 1995 + } 1996 + 1997 + module_param(scand_secs, uint, S_IRUGO|S_IWUSR); 1998 + MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs"); 1999 + 2049 2000 static int gfs2_glock_iter_next(struct glock_iter *gi) 2050 2001 { 2002 + struct gfs2_glock *gl; 2003 + 2004 + restart: 2051 2005 read_lock(gl_lock_addr(gi->hash)); 2052 - while (1) { 2053 - if (!gi->hb_list) { /* If we don't have a hash bucket yet */ 2054 - gi->hb_list = &gl_hash_table[gi->hash].hb_list; 2055 - if (hlist_empty(gi->hb_list)) { 2056 - read_unlock(gl_lock_addr(gi->hash)); 2057 - gi->hash++; 2058 - read_lock(gl_lock_addr(gi->hash)); 2059 - gi->hb_list = NULL; 2060 - if (gi->hash >= GFS2_GL_HASH_SIZE) { 2061 - read_unlock(gl_lock_addr(gi->hash)); 2062 - return 1; 2063 - } 2064 - else 2065 - continue; 2066 - } 2067 - if (!hlist_empty(gi->hb_list)) { 2068 - gi->gl = list_entry(gi->hb_list->first, 2069 - struct gfs2_glock, 2070 - gl_list); 2071 - } 2072 - } else { 2073 - if (gi->gl->gl_list.next == NULL) { 2074 - read_unlock(gl_lock_addr(gi->hash)); 2075 - gi->hash++; 2076 - read_lock(gl_lock_addr(gi->hash)); 2077 - gi->hb_list = NULL; 2078 - continue; 2079 - } 2080 - gi->gl = list_entry(gi->gl->gl_list.next, 2081 - struct gfs2_glock, gl_list); 2082 - } 2006 + gl = gi->gl; 2007 + if (gl) { 2008 + gi->gl = hlist_entry(gl->gl_list.next, 2009 + struct gfs2_glock, gl_list); 2083 2010 if (gi->gl) 2084 - break; 2011 + gfs2_glock_hold(gi->gl); 2085 2012 } 2086 2013 read_unlock(gl_lock_addr(gi->hash)); 2014 + if (gl) 2015 + gfs2_glock_put(gl); 2016 + if (gl && gi->gl == NULL) 2017 + gi->hash++; 2018 + while(gi->gl == NULL) { 2019 + if (gi->hash >= GFS2_GL_HASH_SIZE) 2020 + return 1; 2021 + read_lock(gl_lock_addr(gi->hash)); 2022 + gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, 2023 + struct gfs2_glock, gl_list); 2024 + if (gi->gl) 2025 + gfs2_glock_hold(gi->gl); 2026 + read_unlock(gl_lock_addr(gi->hash)); 2027 + gi->hash++; 2028 + } 2029 + 2030 + if (gi->sdp != gi->gl->gl_sbd) 2031 + goto restart; 2032 + 2087 2033 return 0; 2088 2034 } 2089 2035 2090 2036 static void gfs2_glock_iter_free(struct glock_iter *gi) 2091 2037 { 2038 + if (gi->gl) 2039 + gfs2_glock_put(gi->gl); 2092 2040 kfree(gi); 2093 2041 } 2094 2042 ··· 2119 2033 2120 2034 gi->sdp = sdp; 2121 2035 gi->hash = 0; 2122 - gi->gl = NULL; 2123 - gi->hb_list = NULL; 2124 2036 gi->seq = NULL; 2037 + gi->gl = NULL; 2125 2038 memset(gi->string, 0, sizeof(gi->string)); 2126 2039 2127 2040 if (gfs2_glock_iter_next(gi)) { ··· 2140 2055 if (!gi) 2141 2056 return NULL; 2142 2057 2143 - while (n--) { 2058 + while(n--) { 2144 2059 if (gfs2_glock_iter_next(gi)) { 2145 2060 gfs2_glock_iter_free(gi); 2146 2061 return NULL; ··· 2167 2082 2168 2083 static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr) 2169 2084 { 2170 - /* nothing for now */ 2085 + struct glock_iter *gi = iter_ptr; 2086 + if (gi) 2087 + gfs2_glock_iter_free(gi); 2171 2088 } 2172 2089 2173 2090 static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr) ··· 2182 2095 return 0; 2183 2096 } 2184 2097 2185 - static struct seq_operations gfs2_glock_seq_ops = { 2098 + static const struct seq_operations gfs2_glock_seq_ops = { 2186 2099 .start = gfs2_glock_seq_start, 2187 2100 .next = gfs2_glock_seq_next, 2188 2101 .stop = gfs2_glock_seq_stop,

+3 -2

fs/gfs2/glock.h

··· 26 26 #define GL_SKIP 0x00000100 27 27 #define GL_ATIME 0x00000200 28 28 #define GL_NOCACHE 0x00000400 29 + #define GL_FLOCK 0x00000800 29 30 #define GL_NOCANCEL 0x00001000 30 31 31 32 #define GLR_TRYFAILED 13 ··· 133 132 134 133 void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); 135 134 void gfs2_reclaim_glock(struct gfs2_sbd *sdp); 136 - 137 - void gfs2_scand_internal(struct gfs2_sbd *sdp); 138 135 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait); 139 136 140 137 int __init gfs2_glock_init(void); 138 + void gfs2_glock_exit(void); 139 + 141 140 int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); 142 141 void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); 143 142 int gfs2_register_debugfs(void);

+10 -14

fs/gfs2/glops.c

··· 41 41 struct list_head *head = &gl->gl_ail_list; 42 42 struct gfs2_bufdata *bd; 43 43 struct buffer_head *bh; 44 - u64 blkno; 45 44 int error; 46 45 47 46 blocks = atomic_read(&gl->gl_ail_count); ··· 56 57 bd = list_entry(head->next, struct gfs2_bufdata, 57 58 bd_ail_gl_list); 58 59 bh = bd->bd_bh; 59 - blkno = bh->b_blocknr; 60 + gfs2_remove_from_ail(NULL, bd); 61 + bd->bd_bh = NULL; 62 + bh->b_private = NULL; 63 + bd->bd_blkno = bh->b_blocknr; 60 64 gfs2_assert_withdraw(sdp, !buffer_busy(bh)); 61 - 62 - bd->bd_ail = NULL; 63 - list_del(&bd->bd_ail_st_list); 64 - list_del(&bd->bd_ail_gl_list); 65 - atomic_dec(&gl->gl_ail_count); 66 - brelse(bh); 67 - gfs2_log_unlock(sdp); 68 - 69 - gfs2_trans_add_revoke(sdp, blkno); 70 - 71 - gfs2_log_lock(sdp); 65 + gfs2_trans_add_revoke(sdp, bd); 72 66 } 73 67 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); 74 68 gfs2_log_unlock(sdp); ··· 148 156 ip = NULL; 149 157 150 158 if (test_bit(GLF_DIRTY, &gl->gl_flags)) { 151 - if (ip) 159 + if (ip && !gfs2_is_jdata(ip)) 152 160 filemap_fdatawrite(ip->i_inode.i_mapping); 153 161 gfs2_log_flush(gl->gl_sbd, gl); 162 + if (ip && gfs2_is_jdata(ip)) 163 + filemap_fdatawrite(ip->i_inode.i_mapping); 154 164 gfs2_meta_sync(gl); 155 165 if (ip) { 156 166 struct address_space *mapping = ip->i_inode.i_mapping; ··· 446 452 .go_lock = inode_go_lock, 447 453 .go_unlock = inode_go_unlock, 448 454 .go_type = LM_TYPE_INODE, 455 + .go_min_hold_time = HZ / 10, 449 456 }; 450 457 451 458 const struct gfs2_glock_operations gfs2_rgrp_glops = { ··· 457 462 .go_lock = rgrp_go_lock, 458 463 .go_unlock = rgrp_go_unlock, 459 464 .go_type = LM_TYPE_RGRP, 465 + .go_min_hold_time = HZ / 10, 460 466 }; 461 467 462 468 const struct gfs2_glock_operations gfs2_trans_glops = {

+15 -16

fs/gfs2/incore.h

··· 11 11 #define __INCORE_DOT_H__ 12 12 13 13 #include <linux/fs.h> 14 + #include <linux/workqueue.h> 14 15 15 16 #define DIO_WAIT 0x00000010 16 17 #define DIO_METADATA 0x00000020 ··· 114 113 struct buffer_head *bd_bh; 115 114 struct gfs2_glock *bd_gl; 116 115 117 - struct list_head bd_list_tr; 116 + union { 117 + struct list_head list_tr; 118 + u64 blkno; 119 + } u; 120 + #define bd_list_tr u.list_tr 121 + #define bd_blkno u.blkno 122 + 118 123 struct gfs2_log_element bd_le; 119 124 120 125 struct gfs2_ail *bd_ail; ··· 137 130 int (*go_lock) (struct gfs2_holder *gh); 138 131 void (*go_unlock) (struct gfs2_holder *gh); 139 132 const int go_type; 133 + const unsigned long go_min_hold_time; 140 134 }; 141 135 142 136 enum { ··· 169 161 GLF_LOCK = 1, 170 162 GLF_STICKY = 2, 171 163 GLF_DEMOTE = 3, 164 + GLF_PENDING_DEMOTE = 4, 172 165 GLF_DIRTY = 5, 173 166 }; 174 167 ··· 202 193 203 194 u64 gl_vn; 204 195 unsigned long gl_stamp; 196 + unsigned long gl_tchange; 205 197 void *gl_object; 206 198 207 199 struct list_head gl_reclaim; ··· 213 203 struct gfs2_log_element gl_le; 214 204 struct list_head gl_ail_list; 215 205 atomic_t gl_ail_count; 206 + struct delayed_work gl_work; 216 207 }; 217 208 218 209 struct gfs2_alloc { ··· 304 293 struct gfs2_holder f_fl_gh; 305 294 }; 306 295 307 - struct gfs2_revoke { 308 - struct gfs2_log_element rv_le; 309 - u64 rv_blkno; 310 - }; 311 - 312 296 struct gfs2_revoke_replay { 313 297 struct list_head rr_list; 314 298 u64 rr_blkno; ··· 339 333 u64 qd_sync_gen; 340 334 unsigned long qd_last_warn; 341 335 unsigned long qd_last_touched; 342 - }; 343 - 344 - struct gfs2_log_buf { 345 - struct list_head lb_list; 346 - struct buffer_head *lb_bh; 347 - struct buffer_head *lb_real; 348 336 }; 349 337 350 338 struct gfs2_trans { ··· 429 429 unsigned int gt_log_flush_secs; 430 430 unsigned int gt_jindex_refresh_secs; /* Check for new journal index */ 431 431 432 - unsigned int gt_scand_secs; 433 432 unsigned int gt_recoverd_secs; 434 433 unsigned int gt_logd_secs; 435 434 unsigned int gt_quotad_secs; ··· 573 574 574 575 /* Daemon stuff */ 575 576 576 - struct task_struct *sd_scand_process; 577 577 struct task_struct *sd_recoverd_process; 578 578 struct task_struct *sd_logd_process; 579 579 struct task_struct *sd_quotad_process; ··· 607 609 unsigned int sd_log_num_revoke; 608 610 unsigned int sd_log_num_rg; 609 611 unsigned int sd_log_num_databuf; 610 - unsigned int sd_log_num_jdata; 611 612 612 613 struct list_head sd_log_le_gl; 613 614 struct list_head sd_log_le_buf; 614 615 struct list_head sd_log_le_revoke; 615 616 struct list_head sd_log_le_rg; 616 617 struct list_head sd_log_le_databuf; 618 + struct list_head sd_log_le_ordered; 617 619 618 620 unsigned int sd_log_blks_free; 619 621 struct mutex sd_log_reserve_mutex; ··· 625 627 626 628 unsigned long sd_log_flush_time; 627 629 struct rw_semaphore sd_log_flush_lock; 628 - struct list_head sd_log_flush_list; 630 + atomic_t sd_log_in_flight; 631 + wait_queue_head_t sd_log_flush_wait; 629 632 630 633 unsigned int sd_log_flush_head; 631 634 u64 sd_log_flush_wrapped;

+69 -9

fs/gfs2/inode.c

··· 77 77 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); 78 78 } 79 79 80 + struct gfs2_skip_data { 81 + u64 no_addr; 82 + int skipped; 83 + }; 84 + 85 + static int iget_skip_test(struct inode *inode, void *opaque) 86 + { 87 + struct gfs2_inode *ip = GFS2_I(inode); 88 + struct gfs2_skip_data *data = opaque; 89 + 90 + if (ip->i_no_addr == data->no_addr && inode->i_private != NULL){ 91 + if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){ 92 + data->skipped = 1; 93 + return 0; 94 + } 95 + return 1; 96 + } 97 + return 0; 98 + } 99 + 100 + static int iget_skip_set(struct inode *inode, void *opaque) 101 + { 102 + struct gfs2_inode *ip = GFS2_I(inode); 103 + struct gfs2_skip_data *data = opaque; 104 + 105 + if (data->skipped) 106 + return 1; 107 + inode->i_ino = (unsigned long)(data->no_addr); 108 + ip->i_no_addr = data->no_addr; 109 + return 0; 110 + } 111 + 112 + static struct inode *gfs2_iget_skip(struct super_block *sb, 113 + u64 no_addr) 114 + { 115 + struct gfs2_skip_data data; 116 + unsigned long hash = (unsigned long)no_addr; 117 + 118 + data.no_addr = no_addr; 119 + data.skipped = 0; 120 + return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data); 121 + } 122 + 80 123 /** 81 124 * GFS2 lookup code fills in vfs inode contents based on info obtained 82 125 * from directory entry inside gfs2_inode_lookup(). This has caused issues ··· 155 112 * @sb: The super block 156 113 * @no_addr: The inode number 157 114 * @type: The type of the inode 115 + * @skip_freeing: set this not return an inode if it is currently being freed. 158 116 * 159 117 * Returns: A VFS inode, or an error 160 118 */ ··· 163 119 struct inode *gfs2_inode_lookup(struct super_block *sb, 164 120 unsigned int type, 165 121 u64 no_addr, 166 - u64 no_formal_ino) 122 + u64 no_formal_ino, int skip_freeing) 167 123 { 168 - struct inode *inode = gfs2_iget(sb, no_addr); 169 - struct gfs2_inode *ip = GFS2_I(inode); 124 + struct inode *inode; 125 + struct gfs2_inode *ip; 170 126 struct gfs2_glock *io_gl; 171 127 int error; 128 + 129 + if (skip_freeing) 130 + inode = gfs2_iget_skip(sb, no_addr); 131 + else 132 + inode = gfs2_iget(sb, no_addr); 133 + ip = GFS2_I(inode); 172 134 173 135 if (!inode) 174 136 return ERR_PTR(-ENOBUFS); ··· 292 242 293 243 di->di_eattr = be64_to_cpu(str->di_eattr); 294 244 return 0; 245 + } 246 + 247 + static void gfs2_inode_bh(struct gfs2_inode *ip, struct buffer_head *bh) 248 + { 249 + ip->i_cache[0] = bh; 295 250 } 296 251 297 252 /** ··· 743 688 static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 744 689 const struct gfs2_inum_host *inum, unsigned int mode, 745 690 unsigned int uid, unsigned int gid, 746 - const u64 *generation, dev_t dev) 691 + const u64 *generation, dev_t dev, struct buffer_head **bhp) 747 692 { 748 693 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 749 694 struct gfs2_dinode *di; ··· 798 743 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); 799 744 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); 800 745 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 746 + 747 + set_buffer_uptodate(dibh); 801 748 802 - brelse(dibh); 749 + *bhp = dibh; 803 750 } 804 751 805 752 static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 806 753 unsigned int mode, const struct gfs2_inum_host *inum, 807 - const u64 *generation, dev_t dev) 754 + const u64 *generation, dev_t dev, struct buffer_head **bhp) 808 755 { 809 756 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 810 757 unsigned int uid, gid; ··· 827 770 if (error) 828 771 goto out_quota; 829 772 830 - init_dinode(dip, gl, inum, mode, uid, gid, generation, dev); 773 + init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp); 831 774 gfs2_quota_change(dip, +1, uid, gid); 832 775 gfs2_trans_end(sdp); 833 776 ··· 966 909 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; 967 910 int error; 968 911 u64 generation; 912 + struct buffer_head *bh=NULL; 969 913 970 914 if (!name->len || name->len > GFS2_FNAMESIZE) 971 915 return ERR_PTR(-ENAMETOOLONG); ··· 993 935 if (error) 994 936 goto fail_gunlock; 995 937 996 - error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev); 938 + error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh); 997 939 if (error) 998 940 goto fail_gunlock2; 999 941 1000 942 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), 1001 943 inum.no_addr, 1002 - inum.no_formal_ino); 944 + inum.no_formal_ino, 0); 1003 945 if (IS_ERR(inode)) 1004 946 goto fail_gunlock2; 947 + 948 + gfs2_inode_bh(GFS2_I(inode), bh); 1005 949 1006 950 error = gfs2_inode_refresh(GFS2_I(inode)); 1007 951 if (error)

+2 -1

fs/gfs2/inode.h

··· 49 49 void gfs2_inode_attr_in(struct gfs2_inode *ip); 50 50 void gfs2_set_iop(struct inode *inode); 51 51 struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 52 - u64 no_addr, u64 no_formal_ino); 52 + u64 no_addr, u64 no_formal_ino, 53 + int skip_freeing); 53 54 struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); 54 55 55 56 int gfs2_inode_refresh(struct gfs2_inode *ip);

-1

fs/gfs2/locking/dlm/lock_dlm.h

··· 13 13 #include <linux/module.h> 14 14 #include <linux/slab.h> 15 15 #include <linux/spinlock.h> 16 - #include <linux/module.h> 17 16 #include <linux/types.h> 18 17 #include <linux/string.h> 19 18 #include <linux/list.h>

+6 -5

fs/gfs2/locking/dlm/plock.c

··· 346 346 347 347 static unsigned int dev_poll(struct file *file, poll_table *wait) 348 348 { 349 + unsigned int mask = 0; 350 + 349 351 poll_wait(file, &send_wq, wait); 350 352 351 353 spin_lock(&ops_lock); 352 - if (!list_empty(&send_list)) { 353 - spin_unlock(&ops_lock); 354 - return POLLIN | POLLRDNORM; 355 - } 354 + if (!list_empty(&send_list)) 355 + mask = POLLIN | POLLRDNORM; 356 356 spin_unlock(&ops_lock); 357 - return 0; 357 + 358 + return mask; 358 359 } 359 360 360 361 static const struct file_operations dev_fops = {

+13 -7

fs/gfs2/locking/dlm/thread.c

··· 268 268 return 0; 269 269 } 270 270 271 - static int gdlm_thread(void *data) 271 + static int gdlm_thread(void *data, int blist) 272 272 { 273 273 struct gdlm_ls *ls = (struct gdlm_ls *) data; 274 274 struct gdlm_lock *lp = NULL; 275 - int blist = 0; 276 275 uint8_t complete, blocking, submit, drop; 277 276 DECLARE_WAITQUEUE(wait, current); 278 277 279 278 /* Only thread1 is allowed to do blocking callbacks since gfs 280 279 may wait for a completion callback within a blocking cb. */ 281 - 282 - if (current == ls->thread1) 283 - blist = 1; 284 280 285 281 while (!kthread_should_stop()) { 286 282 set_current_state(TASK_INTERRUPTIBLE); ··· 329 333 return 0; 330 334 } 331 335 336 + static int gdlm_thread1(void *data) 337 + { 338 + return gdlm_thread(data, 1); 339 + } 340 + 341 + static int gdlm_thread2(void *data) 342 + { 343 + return gdlm_thread(data, 0); 344 + } 345 + 332 346 int gdlm_init_threads(struct gdlm_ls *ls) 333 347 { 334 348 struct task_struct *p; 335 349 int error; 336 350 337 - p = kthread_run(gdlm_thread, ls, "lock_dlm1"); 351 + p = kthread_run(gdlm_thread1, ls, "lock_dlm1"); 338 352 error = IS_ERR(p); 339 353 if (error) { 340 354 log_error("can't start lock_dlm1 thread %d", error); ··· 352 346 } 353 347 ls->thread1 = p; 354 348 355 - p = kthread_run(gdlm_thread, ls, "lock_dlm2"); 349 + p = kthread_run(gdlm_thread2, ls, "lock_dlm2"); 356 350 error = IS_ERR(p); 357 351 if (error) { 358 352 log_error("can't start lock_dlm2 thread %d", error);

-1

fs/gfs2/locking/nolock/main.c

··· 9 9 10 10 #include <linux/module.h> 11 11 #include <linux/slab.h> 12 - #include <linux/module.h> 13 12 #include <linux/init.h> 14 13 #include <linux/types.h> 15 14 #include <linux/fs.h>

+156 -74

fs/gfs2/log.c

··· 60 60 } 61 61 62 62 /** 63 + * gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters 64 + * @mapping: The associated mapping (maybe NULL) 65 + * @bd: The gfs2_bufdata to remove 66 + * 67 + * The log lock _must_ be held when calling this function 68 + * 69 + */ 70 + 71 + void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd) 72 + { 73 + bd->bd_ail = NULL; 74 + list_del_init(&bd->bd_ail_st_list); 75 + list_del_init(&bd->bd_ail_gl_list); 76 + atomic_dec(&bd->bd_gl->gl_ail_count); 77 + if (mapping) 78 + gfs2_meta_cache_flush(GFS2_I(mapping->host)); 79 + brelse(bd->bd_bh); 80 + } 81 + 82 + /** 63 83 * gfs2_ail1_start_one - Start I/O on a part of the AIL 64 84 * @sdp: the filesystem 65 85 * @tr: the part of the AIL ··· 103 83 104 84 gfs2_assert(sdp, bd->bd_ail == ai); 105 85 106 - if (!bh){ 107 - list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); 108 - continue; 109 - } 110 - 111 86 if (!buffer_busy(bh)) { 112 - if (!buffer_uptodate(bh)) { 113 - gfs2_log_unlock(sdp); 87 + if (!buffer_uptodate(bh)) 114 88 gfs2_io_error_bh(sdp, bh); 115 - gfs2_log_lock(sdp); 116 - } 117 89 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); 118 90 continue; 119 91 } ··· 115 103 116 104 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); 117 105 106 + get_bh(bh); 118 107 gfs2_log_unlock(sdp); 119 - wait_on_buffer(bh); 120 - ll_rw_block(WRITE, 1, &bh); 108 + lock_buffer(bh); 109 + if (test_clear_buffer_dirty(bh)) { 110 + bh->b_end_io = end_buffer_write_sync; 111 + submit_bh(WRITE, bh); 112 + } else { 113 + unlock_buffer(bh); 114 + brelse(bh); 115 + } 121 116 gfs2_log_lock(sdp); 122 117 123 118 retry = 1; ··· 149 130 bd_ail_st_list) { 150 131 bh = bd->bd_bh; 151 132 152 - if (!bh){ 153 - list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); 154 - continue; 155 - } 156 - 157 133 gfs2_assert(sdp, bd->bd_ail == ai); 158 134 159 135 if (buffer_busy(bh)) { ··· 169 155 170 156 static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags) 171 157 { 172 - struct list_head *head = &sdp->sd_ail1_list; 158 + struct list_head *head; 173 159 u64 sync_gen; 174 160 struct list_head *first; 175 161 struct gfs2_ail *first_ai, *ai, *tmp; 176 162 int done = 0; 177 163 178 164 gfs2_log_lock(sdp); 165 + head = &sdp->sd_ail1_list; 179 166 if (list_empty(head)) { 180 167 gfs2_log_unlock(sdp); 181 168 return; ··· 248 233 bd = list_entry(head->prev, struct gfs2_bufdata, 249 234 bd_ail_st_list); 250 235 gfs2_assert(sdp, bd->bd_ail == ai); 251 - bd->bd_ail = NULL; 252 - list_del(&bd->bd_ail_st_list); 253 - list_del(&bd->bd_ail_gl_list); 254 - atomic_dec(&bd->bd_gl->gl_ail_count); 255 - brelse(bd->bd_bh); 236 + gfs2_remove_from_ail(bd->bd_bh->b_page->mapping, bd); 256 237 } 257 238 } 258 239 ··· 450 439 return tail; 451 440 } 452 441 453 - static inline void log_incr_head(struct gfs2_sbd *sdp) 442 + void gfs2_log_incr_head(struct gfs2_sbd *sdp) 454 443 { 455 444 if (sdp->sd_log_flush_head == sdp->sd_log_tail) 456 - gfs2_assert_withdraw(sdp, sdp->sd_log_flush_head == sdp->sd_log_head); 445 + BUG_ON(sdp->sd_log_flush_head != sdp->sd_log_head); 457 446 458 447 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) { 459 448 sdp->sd_log_flush_head = 0; 460 449 sdp->sd_log_flush_wrapped = 1; 461 450 } 451 + } 452 + 453 + /** 454 + * gfs2_log_write_endio - End of I/O for a log buffer 455 + * @bh: The buffer head 456 + * @uptodate: I/O Status 457 + * 458 + */ 459 + 460 + static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate) 461 + { 462 + struct gfs2_sbd *sdp = bh->b_private; 463 + bh->b_private = NULL; 464 + 465 + end_buffer_write_sync(bh, uptodate); 466 + if (atomic_dec_and_test(&sdp->sd_log_in_flight)) 467 + wake_up(&sdp->sd_log_flush_wait); 462 468 } 463 469 464 470 /** ··· 488 460 struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp) 489 461 { 490 462 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head); 491 - struct gfs2_log_buf *lb; 492 463 struct buffer_head *bh; 493 464 494 - lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL); 495 - list_add(&lb->lb_list, &sdp->sd_log_flush_list); 496 - 497 - bh = lb->lb_bh = sb_getblk(sdp->sd_vfs, blkno); 465 + bh = sb_getblk(sdp->sd_vfs, blkno); 498 466 lock_buffer(bh); 499 467 memset(bh->b_data, 0, bh->b_size); 500 468 set_buffer_uptodate(bh); 501 469 clear_buffer_dirty(bh); 502 - unlock_buffer(bh); 503 - 504 - log_incr_head(sdp); 470 + gfs2_log_incr_head(sdp); 471 + atomic_inc(&sdp->sd_log_in_flight); 472 + bh->b_private = sdp; 473 + bh->b_end_io = gfs2_log_write_endio; 505 474 506 475 return bh; 476 + } 477 + 478 + /** 479 + * gfs2_fake_write_endio - 480 + * @bh: The buffer head 481 + * @uptodate: The I/O Status 482 + * 483 + */ 484 + 485 + static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate) 486 + { 487 + struct buffer_head *real_bh = bh->b_private; 488 + struct gfs2_bufdata *bd = real_bh->b_private; 489 + struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd; 490 + 491 + end_buffer_write_sync(bh, uptodate); 492 + free_buffer_head(bh); 493 + unlock_buffer(real_bh); 494 + brelse(real_bh); 495 + if (atomic_dec_and_test(&sdp->sd_log_in_flight)) 496 + wake_up(&sdp->sd_log_flush_wait); 507 497 } 508 498 509 499 /** ··· 536 490 struct buffer_head *real) 537 491 { 538 492 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head); 539 - struct gfs2_log_buf *lb; 540 493 struct buffer_head *bh; 541 494 542 - lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL); 543 - list_add(&lb->lb_list, &sdp->sd_log_flush_list); 544 - lb->lb_real = real; 545 - 546 - bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL); 495 + bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL); 547 496 atomic_set(&bh->b_count, 1); 548 - bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate); 497 + bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock); 549 498 set_bh_page(bh, real->b_page, bh_offset(real)); 550 499 bh->b_blocknr = blkno; 551 500 bh->b_size = sdp->sd_sb.sb_bsize; 552 501 bh->b_bdev = sdp->sd_vfs->s_bdev; 502 + bh->b_private = real; 503 + bh->b_end_io = gfs2_fake_write_endio; 553 504 554 - log_incr_head(sdp); 505 + gfs2_log_incr_head(sdp); 506 + atomic_inc(&sdp->sd_log_in_flight); 555 507 556 508 return bh; 557 509 } ··· 616 572 gfs2_assert_withdraw(sdp, !pull); 617 573 618 574 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); 619 - log_incr_head(sdp); 575 + gfs2_log_incr_head(sdp); 620 576 } 621 577 622 578 static void log_flush_commit(struct gfs2_sbd *sdp) 623 579 { 624 - struct list_head *head = &sdp->sd_log_flush_list; 625 - struct gfs2_log_buf *lb; 580 + DEFINE_WAIT(wait); 581 + 582 + if (atomic_read(&sdp->sd_log_in_flight)) { 583 + do { 584 + prepare_to_wait(&sdp->sd_log_flush_wait, &wait, 585 + TASK_UNINTERRUPTIBLE); 586 + if (atomic_read(&sdp->sd_log_in_flight)) 587 + io_schedule(); 588 + } while(atomic_read(&sdp->sd_log_in_flight)); 589 + finish_wait(&sdp->sd_log_flush_wait, &wait); 590 + } 591 + 592 + log_write_header(sdp, 0, 0); 593 + } 594 + 595 + static void gfs2_ordered_write(struct gfs2_sbd *sdp) 596 + { 597 + struct gfs2_bufdata *bd; 626 598 struct buffer_head *bh; 627 - int flushcount = 0; 599 + LIST_HEAD(written); 628 600 629 - while (!list_empty(head)) { 630 - lb = list_entry(head->next, struct gfs2_log_buf, lb_list); 631 - list_del(&lb->lb_list); 632 - bh = lb->lb_bh; 633 - 634 - wait_on_buffer(bh); 635 - if (!buffer_uptodate(bh)) 636 - gfs2_io_error_bh(sdp, bh); 637 - if (lb->lb_real) { 638 - while (atomic_read(&bh->b_count) != 1) /* Grrrr... */ 639 - schedule(); 640 - free_buffer_head(bh); 641 - } else 601 + gfs2_log_lock(sdp); 602 + while (!list_empty(&sdp->sd_log_le_ordered)) { 603 + bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list); 604 + list_move(&bd->bd_le.le_list, &written); 605 + bh = bd->bd_bh; 606 + if (!buffer_dirty(bh)) 607 + continue; 608 + get_bh(bh); 609 + gfs2_log_unlock(sdp); 610 + lock_buffer(bh); 611 + if (test_clear_buffer_dirty(bh)) { 612 + bh->b_end_io = end_buffer_write_sync; 613 + submit_bh(WRITE, bh); 614 + } else { 615 + unlock_buffer(bh); 642 616 brelse(bh); 643 - kfree(lb); 644 - flushcount++; 617 + } 618 + gfs2_log_lock(sdp); 645 619 } 620 + list_splice(&written, &sdp->sd_log_le_ordered); 621 + gfs2_log_unlock(sdp); 622 + } 646 623 647 - /* If nothing was journaled, the header is unplanned and unwanted. */ 648 - if (flushcount) { 649 - log_write_header(sdp, 0, 0); 650 - } else { 651 - unsigned int tail; 652 - tail = current_tail(sdp); 624 + static void gfs2_ordered_wait(struct gfs2_sbd *sdp) 625 + { 626 + struct gfs2_bufdata *bd; 627 + struct buffer_head *bh; 653 628 654 - gfs2_ail1_empty(sdp, 0); 655 - if (sdp->sd_log_tail != tail) 656 - log_pull_tail(sdp, tail); 629 + gfs2_log_lock(sdp); 630 + while (!list_empty(&sdp->sd_log_le_ordered)) { 631 + bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_le.le_list); 632 + bh = bd->bd_bh; 633 + if (buffer_locked(bh)) { 634 + get_bh(bh); 635 + gfs2_log_unlock(sdp); 636 + wait_on_buffer(bh); 637 + brelse(bh); 638 + gfs2_log_lock(sdp); 639 + continue; 640 + } 641 + list_del_init(&bd->bd_le.le_list); 657 642 } 643 + gfs2_log_unlock(sdp); 658 644 } 659 645 660 646 /** ··· 714 640 INIT_LIST_HEAD(&ai->ai_ail1_list); 715 641 INIT_LIST_HEAD(&ai->ai_ail2_list); 716 642 717 - gfs2_assert_withdraw(sdp, 718 - sdp->sd_log_num_buf + sdp->sd_log_num_jdata == 719 - sdp->sd_log_commited_buf + 720 - sdp->sd_log_commited_databuf); 643 + if (sdp->sd_log_num_buf != sdp->sd_log_commited_buf) { 644 + printk(KERN_INFO "GFS2: log buf %u %u\n", sdp->sd_log_num_buf, 645 + sdp->sd_log_commited_buf); 646 + gfs2_assert_withdraw(sdp, 0); 647 + } 648 + if (sdp->sd_log_num_databuf != sdp->sd_log_commited_databuf) { 649 + printk(KERN_INFO "GFS2: log databuf %u %u\n", 650 + sdp->sd_log_num_databuf, sdp->sd_log_commited_databuf); 651 + gfs2_assert_withdraw(sdp, 0); 652 + } 721 653 gfs2_assert_withdraw(sdp, 722 654 sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke); 723 655 ··· 731 651 sdp->sd_log_flush_wrapped = 0; 732 652 ai->ai_first = sdp->sd_log_flush_head; 733 653 654 + gfs2_ordered_write(sdp); 734 655 lops_before_commit(sdp); 735 - if (!list_empty(&sdp->sd_log_flush_list)) 656 + gfs2_ordered_wait(sdp); 657 + 658 + if (sdp->sd_log_head != sdp->sd_log_flush_head) 736 659 log_flush_commit(sdp); 737 660 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ 738 661 gfs2_log_lock(sdp); ··· 827 744 gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); 828 745 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl); 829 746 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); 830 - gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata); 831 747 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 832 748 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); 833 749 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);

+2

fs/gfs2/log.h

··· 52 52 53 53 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); 54 54 void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); 55 + void gfs2_log_incr_head(struct gfs2_sbd *sdp); 55 56 56 57 struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp); 57 58 struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, 58 59 struct buffer_head *real); 59 60 void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); 60 61 void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); 62 + void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd); 61 63 62 64 void gfs2_log_shutdown(struct gfs2_sbd *sdp); 63 65 void gfs2_meta_syncfs(struct gfs2_sbd *sdp);

+236 -236

fs/gfs2/lops.c

··· 27 27 #include "trans.h" 28 28 #include "util.h" 29 29 30 - static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 30 + /** 31 + * gfs2_pin - Pin a buffer in memory 32 + * @sdp: The superblock 33 + * @bh: The buffer to be pinned 34 + * 35 + * The log lock must be held when calling this function 36 + */ 37 + static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) 38 + { 39 + struct gfs2_bufdata *bd; 40 + 41 + gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)); 42 + 43 + clear_buffer_dirty(bh); 44 + if (test_set_buffer_pinned(bh)) 45 + gfs2_assert_withdraw(sdp, 0); 46 + if (!buffer_uptodate(bh)) 47 + gfs2_io_error_bh(sdp, bh); 48 + bd = bh->b_private; 49 + /* If this buffer is in the AIL and it has already been written 50 + * to in-place disk block, remove it from the AIL. 51 + */ 52 + if (bd->bd_ail) 53 + list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); 54 + get_bh(bh); 55 + } 56 + 57 + /** 58 + * gfs2_unpin - Unpin a buffer 59 + * @sdp: the filesystem the buffer belongs to 60 + * @bh: The buffer to unpin 61 + * @ai: 62 + * 63 + */ 64 + 65 + static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, 66 + struct gfs2_ail *ai) 67 + { 68 + struct gfs2_bufdata *bd = bh->b_private; 69 + 70 + gfs2_assert_withdraw(sdp, buffer_uptodate(bh)); 71 + 72 + if (!buffer_pinned(bh)) 73 + gfs2_assert_withdraw(sdp, 0); 74 + 75 + lock_buffer(bh); 76 + mark_buffer_dirty(bh); 77 + clear_buffer_pinned(bh); 78 + 79 + gfs2_log_lock(sdp); 80 + if (bd->bd_ail) { 81 + list_del(&bd->bd_ail_st_list); 82 + brelse(bh); 83 + } else { 84 + struct gfs2_glock *gl = bd->bd_gl; 85 + list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list); 86 + atomic_inc(&gl->gl_ail_count); 87 + } 88 + bd->bd_ail = ai; 89 + list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); 90 + gfs2_log_unlock(sdp); 91 + unlock_buffer(bh); 92 + } 93 + 94 + 95 + static inline struct gfs2_log_descriptor *bh_log_desc(struct buffer_head *bh) 96 + { 97 + return (struct gfs2_log_descriptor *)bh->b_data; 98 + } 99 + 100 + static inline __be64 *bh_log_ptr(struct buffer_head *bh) 101 + { 102 + struct gfs2_log_descriptor *ld = bh_log_desc(bh); 103 + return (__force __be64 *)(ld + 1); 104 + } 105 + 106 + static inline __be64 *bh_ptr_end(struct buffer_head *bh) 107 + { 108 + return (__force __be64 *)(bh->b_data + bh->b_size); 109 + } 110 + 111 + 112 + static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) 113 + { 114 + struct buffer_head *bh = gfs2_log_get_buf(sdp); 115 + struct gfs2_log_descriptor *ld = bh_log_desc(bh); 116 + ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 117 + ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); 118 + ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); 119 + ld->ld_type = cpu_to_be32(ld_type); 120 + ld->ld_length = 0; 121 + ld->ld_data1 = 0; 122 + ld->ld_data2 = 0; 123 + memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); 124 + return bh; 125 + } 126 + 127 + static void __glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 31 128 { 32 129 struct gfs2_glock *gl; 33 130 struct gfs2_trans *tr = current->journal_info; ··· 135 38 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl))) 136 39 return; 137 40 138 - gfs2_log_lock(sdp); 139 - if (!list_empty(&le->le_list)){ 140 - gfs2_log_unlock(sdp); 41 + if (!list_empty(&le->le_list)) 141 42 return; 142 - } 43 + 143 44 gfs2_glock_hold(gl); 144 45 set_bit(GLF_DIRTY, &gl->gl_flags); 145 46 sdp->sd_log_num_gl++; 146 47 list_add(&le->le_list, &sdp->sd_log_le_gl); 48 + } 49 + 50 + static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 51 + { 52 + gfs2_log_lock(sdp); 53 + __glock_lo_add(sdp, le); 147 54 gfs2_log_unlock(sdp); 148 55 } 149 56 ··· 172 71 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); 173 72 struct gfs2_trans *tr; 174 73 74 + lock_buffer(bd->bd_bh); 175 75 gfs2_log_lock(sdp); 176 - if (!list_empty(&bd->bd_list_tr)) { 177 - gfs2_log_unlock(sdp); 178 - return; 179 - } 76 + if (!list_empty(&bd->bd_list_tr)) 77 + goto out; 180 78 tr = current->journal_info; 181 79 tr->tr_touched = 1; 182 80 tr->tr_num_buf++; 183 81 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 184 - gfs2_log_unlock(sdp); 185 - 186 82 if (!list_empty(&le->le_list)) 187 - return; 188 - 189 - gfs2_trans_add_gl(bd->bd_gl); 190 - 83 + goto out; 84 + __glock_lo_add(sdp, &bd->bd_gl->gl_le); 191 85 gfs2_meta_check(sdp, bd->bd_bh); 192 86 gfs2_pin(sdp, bd->bd_bh); 193 - gfs2_log_lock(sdp); 194 87 sdp->sd_log_num_buf++; 195 88 list_add(&le->le_list, &sdp->sd_log_le_buf); 196 - gfs2_log_unlock(sdp); 197 - 198 89 tr->tr_num_buf_new++; 90 + out: 91 + gfs2_log_unlock(sdp); 92 + unlock_buffer(bd->bd_bh); 199 93 } 200 94 201 95 static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) ··· 213 117 struct buffer_head *bh; 214 118 struct gfs2_log_descriptor *ld; 215 119 struct gfs2_bufdata *bd1 = NULL, *bd2; 216 - unsigned int total = sdp->sd_log_num_buf; 217 - unsigned int offset = BUF_OFFSET; 120 + unsigned int total; 218 121 unsigned int limit; 219 122 unsigned int num; 220 123 unsigned n; ··· 222 127 limit = buf_limit(sdp); 223 128 /* for 4k blocks, limit = 503 */ 224 129 130 + gfs2_log_lock(sdp); 131 + total = sdp->sd_log_num_buf; 225 132 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list); 226 133 while(total) { 227 134 num = total; 228 135 if (total > limit) 229 136 num = limit; 230 - bh = gfs2_log_get_buf(sdp); 231 - ld = (struct gfs2_log_descriptor *)bh->b_data; 232 - ptr = (__be64 *)(bh->b_data + offset); 233 - ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 234 - ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); 235 - ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); 236 - ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA); 137 + gfs2_log_unlock(sdp); 138 + bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA); 139 + gfs2_log_lock(sdp); 140 + ld = bh_log_desc(bh); 141 + ptr = bh_log_ptr(bh); 237 142 ld->ld_length = cpu_to_be32(num + 1); 238 143 ld->ld_data1 = cpu_to_be32(num); 239 - ld->ld_data2 = cpu_to_be32(0); 240 - memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); 241 144 242 145 n = 0; 243 146 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf, ··· 245 152 break; 246 153 } 247 154 248 - set_buffer_dirty(bh); 249 - ll_rw_block(WRITE, 1, &bh); 155 + gfs2_log_unlock(sdp); 156 + submit_bh(WRITE, bh); 157 + gfs2_log_lock(sdp); 250 158 251 159 n = 0; 252 160 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf, 253 161 bd_le.le_list) { 162 + get_bh(bd2->bd_bh); 163 + gfs2_log_unlock(sdp); 164 + lock_buffer(bd2->bd_bh); 254 165 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); 255 - set_buffer_dirty(bh); 256 - ll_rw_block(WRITE, 1, &bh); 166 + submit_bh(WRITE, bh); 167 + gfs2_log_lock(sdp); 257 168 if (++n >= num) 258 169 break; 259 170 } 260 171 172 + BUG_ON(total < num); 261 173 total -= num; 262 174 } 175 + gfs2_log_unlock(sdp); 263 176 } 264 177 265 178 static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) ··· 369 270 tr = current->journal_info; 370 271 tr->tr_touched = 1; 371 272 tr->tr_num_revoke++; 372 - 373 - gfs2_log_lock(sdp); 374 273 sdp->sd_log_num_revoke++; 375 274 list_add(&le->le_list, &sdp->sd_log_le_revoke); 376 - gfs2_log_unlock(sdp); 377 275 } 378 276 379 277 static void revoke_lo_before_commit(struct gfs2_sbd *sdp) ··· 380 284 struct buffer_head *bh; 381 285 unsigned int offset; 382 286 struct list_head *head = &sdp->sd_log_le_revoke; 383 - struct gfs2_revoke *rv; 287 + struct gfs2_bufdata *bd; 384 288 385 289 if (!sdp->sd_log_num_revoke) 386 290 return; 387 291 388 - bh = gfs2_log_get_buf(sdp); 389 - ld = (struct gfs2_log_descriptor *)bh->b_data; 390 - ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 391 - ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); 392 - ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); 393 - ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE); 292 + bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE); 293 + ld = bh_log_desc(bh); 394 294 ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, 395 295 sizeof(u64))); 396 296 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); 397 - ld->ld_data2 = cpu_to_be32(0); 398 - memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); 399 297 offset = sizeof(struct gfs2_log_descriptor); 400 298 401 299 while (!list_empty(head)) { 402 - rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list); 403 - list_del_init(&rv->rv_le.le_list); 300 + bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); 301 + list_del_init(&bd->bd_le.le_list); 404 302 sdp->sd_log_num_revoke--; 405 303 406 304 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { 407 - set_buffer_dirty(bh); 408 - ll_rw_block(WRITE, 1, &bh); 305 + submit_bh(WRITE, bh); 409 306 410 307 bh = gfs2_log_get_buf(sdp); 411 308 mh = (struct gfs2_meta_header *)bh->b_data; ··· 408 319 offset = sizeof(struct gfs2_meta_header); 409 320 } 410 321 411 - *(__be64 *)(bh->b_data + offset) = cpu_to_be64(rv->rv_blkno); 412 - kfree(rv); 322 + *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno); 323 + kmem_cache_free(gfs2_bufdata_cachep, bd); 413 324 414 325 offset += sizeof(u64); 415 326 } 416 327 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 417 328 418 - set_buffer_dirty(bh); 419 - ll_rw_block(WRITE, 1, &bh); 329 + submit_bh(WRITE, bh); 420 330 } 421 331 422 332 static void revoke_lo_before_scan(struct gfs2_jdesc *jd, ··· 554 466 struct address_space *mapping = bd->bd_bh->b_page->mapping; 555 467 struct gfs2_inode *ip = GFS2_I(mapping->host); 556 468 469 + lock_buffer(bd->bd_bh); 557 470 gfs2_log_lock(sdp); 558 - if (!list_empty(&bd->bd_list_tr)) { 559 - gfs2_log_unlock(sdp); 560 - return; 561 - } 471 + if (!list_empty(&bd->bd_list_tr)) 472 + goto out; 562 473 tr->tr_touched = 1; 563 474 if (gfs2_is_jdata(ip)) { 564 475 tr->tr_num_buf++; 565 476 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 566 477 } 567 - gfs2_log_unlock(sdp); 568 478 if (!list_empty(&le->le_list)) 569 - return; 479 + goto out; 570 480 571 - gfs2_trans_add_gl(bd->bd_gl); 481 + __glock_lo_add(sdp, &bd->bd_gl->gl_le); 572 482 if (gfs2_is_jdata(ip)) { 573 - sdp->sd_log_num_jdata++; 574 483 gfs2_pin(sdp, bd->bd_bh); 575 484 tr->tr_num_databuf_new++; 485 + sdp->sd_log_num_databuf++; 486 + list_add(&le->le_list, &sdp->sd_log_le_databuf); 487 + } else { 488 + list_add(&le->le_list, &sdp->sd_log_le_ordered); 576 489 } 577 - gfs2_log_lock(sdp); 578 - sdp->sd_log_num_databuf++; 579 - list_add(&le->le_list, &sdp->sd_log_le_databuf); 490 + out: 580 491 gfs2_log_unlock(sdp); 492 + unlock_buffer(bd->bd_bh); 581 493 } 582 494 583 - static int gfs2_check_magic(struct buffer_head *bh) 495 + static void gfs2_check_magic(struct buffer_head *bh) 584 496 { 585 - struct page *page = bh->b_page; 586 497 void *kaddr; 587 498 __be32 *ptr; 588 - int rv = 0; 589 499 590 - kaddr = kmap_atomic(page, KM_USER0); 500 + clear_buffer_escaped(bh); 501 + kaddr = kmap_atomic(bh->b_page, KM_USER0); 591 502 ptr = kaddr + bh_offset(bh); 592 503 if (*ptr == cpu_to_be32(GFS2_MAGIC)) 593 - rv = 1; 504 + set_buffer_escaped(bh); 594 505 kunmap_atomic(kaddr, KM_USER0); 506 + } 595 507 596 - return rv; 508 + static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, 509 + struct list_head *list, struct list_head *done, 510 + unsigned int n) 511 + { 512 + struct buffer_head *bh1; 513 + struct gfs2_log_descriptor *ld; 514 + struct gfs2_bufdata *bd; 515 + __be64 *ptr; 516 + 517 + if (!bh) 518 + return; 519 + 520 + ld = bh_log_desc(bh); 521 + ld->ld_length = cpu_to_be32(n + 1); 522 + ld->ld_data1 = cpu_to_be32(n); 523 + 524 + ptr = bh_log_ptr(bh); 525 + 526 + get_bh(bh); 527 + submit_bh(WRITE, bh); 528 + gfs2_log_lock(sdp); 529 + while(!list_empty(list)) { 530 + bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); 531 + list_move_tail(&bd->bd_le.le_list, done); 532 + get_bh(bd->bd_bh); 533 + while (be64_to_cpu(*ptr) != bd->bd_bh->b_blocknr) { 534 + gfs2_log_incr_head(sdp); 535 + ptr += 2; 536 + } 537 + gfs2_log_unlock(sdp); 538 + lock_buffer(bd->bd_bh); 539 + if (buffer_escaped(bd->bd_bh)) { 540 + void *kaddr; 541 + bh1 = gfs2_log_get_buf(sdp); 542 + kaddr = kmap_atomic(bd->bd_bh->b_page, KM_USER0); 543 + memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh), 544 + bh1->b_size); 545 + kunmap_atomic(kaddr, KM_USER0); 546 + *(__be32 *)bh1->b_data = 0; 547 + clear_buffer_escaped(bd->bd_bh); 548 + unlock_buffer(bd->bd_bh); 549 + brelse(bd->bd_bh); 550 + } else { 551 + bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); 552 + } 553 + submit_bh(WRITE, bh1); 554 + gfs2_log_lock(sdp); 555 + ptr += 2; 556 + } 557 + gfs2_log_unlock(sdp); 558 + brelse(bh); 597 559 } 598 560 599 561 /** 600 562 * databuf_lo_before_commit - Scan the data buffers, writing as we go 601 563 * 602 - * Here we scan through the lists of buffers and make the assumption 603 - * that any buffer thats been pinned is being journaled, and that 604 - * any unpinned buffer is an ordered write data buffer and therefore 605 - * will be written back rather than journaled. 606 564 */ 565 + 607 566 static void databuf_lo_before_commit(struct gfs2_sbd *sdp) 608 567 { 609 - LIST_HEAD(started); 610 - struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt; 611 - struct buffer_head *bh = NULL,*bh1 = NULL; 612 - struct gfs2_log_descriptor *ld; 613 - unsigned int limit; 614 - unsigned int total_dbuf; 615 - unsigned int total_jdata = sdp->sd_log_num_jdata; 616 - unsigned int num, n; 617 - __be64 *ptr = NULL; 568 + struct gfs2_bufdata *bd = NULL; 569 + struct buffer_head *bh = NULL; 570 + unsigned int n = 0; 571 + __be64 *ptr = NULL, *end = NULL; 572 + LIST_HEAD(processed); 573 + LIST_HEAD(in_progress); 618 574 619 - limit = databuf_limit(sdp); 620 - 621 - /* 622 - * Start writing ordered buffers, write journaled buffers 623 - * into the log along with a header 624 - */ 625 575 gfs2_log_lock(sdp); 626 - total_dbuf = sdp->sd_log_num_databuf; 627 - bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf, 628 - bd_le.le_list); 629 - while(total_dbuf) { 630 - num = total_jdata; 631 - if (num > limit) 632 - num = limit; 633 - n = 0; 634 - list_for_each_entry_safe_continue(bd1, bdt, 635 - &sdp->sd_log_le_databuf, 636 - bd_le.le_list) { 637 - /* store off the buffer head in a local ptr since 638 - * gfs2_bufdata might change when we drop the log lock 639 - */ 640 - bh1 = bd1->bd_bh; 641 - 642 - /* An ordered write buffer */ 643 - if (bh1 && !buffer_pinned(bh1)) { 644 - list_move(&bd1->bd_le.le_list, &started); 645 - if (bd1 == bd2) { 646 - bd2 = NULL; 647 - bd2 = list_prepare_entry(bd2, 648 - &sdp->sd_log_le_databuf, 649 - bd_le.le_list); 650 - } 651 - total_dbuf--; 652 - if (bh1) { 653 - if (buffer_dirty(bh1)) { 654 - get_bh(bh1); 655 - 656 - gfs2_log_unlock(sdp); 657 - 658 - ll_rw_block(SWRITE, 1, &bh1); 659 - brelse(bh1); 660 - 661 - gfs2_log_lock(sdp); 662 - } 663 - continue; 664 - } 665 - continue; 666 - } else if (bh1) { /* A journaled buffer */ 667 - int magic; 668 - gfs2_log_unlock(sdp); 669 - if (!bh) { 670 - bh = gfs2_log_get_buf(sdp); 671 - ld = (struct gfs2_log_descriptor *) 672 - bh->b_data; 673 - ptr = (__be64 *)(bh->b_data + 674 - DATABUF_OFFSET); 675 - ld->ld_header.mh_magic = 676 - cpu_to_be32(GFS2_MAGIC); 677 - ld->ld_header.mh_type = 678 - cpu_to_be32(GFS2_METATYPE_LD); 679 - ld->ld_header.mh_format = 680 - cpu_to_be32(GFS2_FORMAT_LD); 681 - ld->ld_type = 682 - cpu_to_be32(GFS2_LOG_DESC_JDATA); 683 - ld->ld_length = cpu_to_be32(num + 1); 684 - ld->ld_data1 = cpu_to_be32(num); 685 - ld->ld_data2 = cpu_to_be32(0); 686 - memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); 687 - } 688 - magic = gfs2_check_magic(bh1); 689 - *ptr++ = cpu_to_be64(bh1->b_blocknr); 690 - *ptr++ = cpu_to_be64((__u64)magic); 691 - clear_buffer_escaped(bh1); 692 - if (unlikely(magic != 0)) 693 - set_buffer_escaped(bh1); 694 - gfs2_log_lock(sdp); 695 - if (++n >= num) 696 - break; 697 - } else if (!bh1) { 698 - total_dbuf--; 699 - sdp->sd_log_num_databuf--; 700 - list_del_init(&bd1->bd_le.le_list); 701 - if (bd1 == bd2) { 702 - bd2 = NULL; 703 - bd2 = list_prepare_entry(bd2, 704 - &sdp->sd_log_le_databuf, 705 - bd_le.le_list); 706 - } 707 - kmem_cache_free(gfs2_bufdata_cachep, bd1); 708 - } 709 - } 710 - gfs2_log_unlock(sdp); 711 - if (bh) { 712 - set_buffer_mapped(bh); 713 - set_buffer_dirty(bh); 714 - ll_rw_block(WRITE, 1, &bh); 715 - bh = NULL; 716 - } 717 - n = 0; 718 - gfs2_log_lock(sdp); 719 - list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf, 720 - bd_le.le_list) { 721 - if (!bd2->bd_bh) 722 - continue; 723 - /* copy buffer if it needs escaping */ 576 + while (!list_empty(&sdp->sd_log_le_databuf)) { 577 + if (ptr == end) { 724 578 gfs2_log_unlock(sdp); 725 - if (unlikely(buffer_escaped(bd2->bd_bh))) { 726 - void *kaddr; 727 - struct page *page = bd2->bd_bh->b_page; 728 - bh = gfs2_log_get_buf(sdp); 729 - kaddr = kmap_atomic(page, KM_USER0); 730 - memcpy(bh->b_data, 731 - kaddr + bh_offset(bd2->bd_bh), 732 - sdp->sd_sb.sb_bsize); 733 - kunmap_atomic(kaddr, KM_USER0); 734 - *(__be32 *)bh->b_data = 0; 735 - } else { 736 - bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); 737 - } 738 - set_buffer_dirty(bh); 739 - ll_rw_block(WRITE, 1, &bh); 579 + gfs2_write_blocks(sdp, bh, &in_progress, &processed, n); 580 + n = 0; 581 + bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_JDATA); 582 + ptr = bh_log_ptr(bh); 583 + end = bh_ptr_end(bh) - 1; 740 584 gfs2_log_lock(sdp); 741 - if (++n >= num) 742 - break; 585 + continue; 743 586 } 744 - bh = NULL; 745 - BUG_ON(total_dbuf < num); 746 - total_dbuf -= num; 747 - total_jdata -= num; 587 + bd = list_entry(sdp->sd_log_le_databuf.next, struct gfs2_bufdata, bd_le.le_list); 588 + list_move_tail(&bd->bd_le.le_list, &in_progress); 589 + gfs2_check_magic(bd->bd_bh); 590 + *ptr++ = cpu_to_be64(bd->bd_bh->b_blocknr); 591 + *ptr++ = cpu_to_be64(buffer_escaped(bh) ? 1 : 0); 592 + n++; 748 593 } 749 594 gfs2_log_unlock(sdp); 750 - 751 - /* Wait on all ordered buffers */ 752 - while (!list_empty(&started)) { 753 - gfs2_log_lock(sdp); 754 - bd1 = list_entry(started.next, struct gfs2_bufdata, 755 - bd_le.le_list); 756 - list_del_init(&bd1->bd_le.le_list); 757 - sdp->sd_log_num_databuf--; 758 - bh = bd1->bd_bh; 759 - if (bh) { 760 - bh->b_private = NULL; 761 - get_bh(bh); 762 - gfs2_log_unlock(sdp); 763 - wait_on_buffer(bh); 764 - brelse(bh); 765 - } else 766 - gfs2_log_unlock(sdp); 767 - 768 - kmem_cache_free(gfs2_bufdata_cachep, bd1); 769 - } 770 - 771 - /* We've removed all the ordered write bufs here, so only jdata left */ 772 - gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata); 595 + gfs2_write_blocks(sdp, bh, &in_progress, &processed, n); 596 + gfs2_log_lock(sdp); 597 + list_splice(&processed, &sdp->sd_log_le_databuf); 598 + gfs2_log_unlock(sdp); 773 599 } 774 600 775 601 static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, ··· 767 765 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); 768 766 list_del_init(&bd->bd_le.le_list); 769 767 sdp->sd_log_num_databuf--; 770 - sdp->sd_log_num_jdata--; 771 768 gfs2_unpin(sdp, bd->bd_bh, ai); 772 769 } 773 770 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); 774 - gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata); 775 771 } 776 772 777 773 ··· 817 817 818 818 const struct gfs2_log_operations *gfs2_log_ops[] = { 819 819 &gfs2_glock_lops, 820 - &gfs2_buf_lops, 821 - &gfs2_revoke_lops, 822 - &gfs2_rg_lops, 823 820 &gfs2_databuf_lops, 821 + &gfs2_buf_lops, 822 + &gfs2_rg_lops, 823 + &gfs2_revoke_lops, 824 824 NULL, 825 825 }; 826 826

+3

fs/gfs2/main.c

··· 107 107 fail_unregister: 108 108 unregister_filesystem(&gfs2_fs_type); 109 109 fail: 110 + gfs2_glock_exit(); 111 + 110 112 if (gfs2_bufdata_cachep) 111 113 kmem_cache_destroy(gfs2_bufdata_cachep); 112 114 ··· 129 127 130 128 static void __exit exit_gfs2_fs(void) 131 129 { 130 + gfs2_glock_exit(); 132 131 gfs2_unregister_debugfs(); 133 132 unregister_filesystem(&gfs2_fs_type); 134 133 unregister_filesystem(&gfs2meta_fs_type);

+32 -104

fs/gfs2/meta_io.c

··· 297 297 unlock_page(bh->b_page); 298 298 } 299 299 300 - /** 301 - * gfs2_pin - Pin a buffer in memory 302 - * @sdp: the filesystem the buffer belongs to 303 - * @bh: The buffer to be pinned 304 - * 305 - */ 306 - 307 - void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) 300 + void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta) 308 301 { 302 + struct gfs2_sbd *sdp = GFS2_SB(bh->b_page->mapping->host); 309 303 struct gfs2_bufdata *bd = bh->b_private; 310 - 311 - gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)); 312 - 313 - if (test_set_buffer_pinned(bh)) 314 - gfs2_assert_withdraw(sdp, 0); 315 - 316 - wait_on_buffer(bh); 317 - 318 - /* If this buffer is in the AIL and it has already been written 319 - to in-place disk block, remove it from the AIL. */ 320 - 321 - gfs2_log_lock(sdp); 322 - if (bd->bd_ail && !buffer_in_io(bh)) 323 - list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); 324 - gfs2_log_unlock(sdp); 325 - 326 - clear_buffer_dirty(bh); 327 - wait_on_buffer(bh); 328 - 329 - if (!buffer_uptodate(bh)) 330 - gfs2_io_error_bh(sdp, bh); 331 - 332 - get_bh(bh); 333 - } 334 - 335 - /** 336 - * gfs2_unpin - Unpin a buffer 337 - * @sdp: the filesystem the buffer belongs to 338 - * @bh: The buffer to unpin 339 - * @ai: 340 - * 341 - */ 342 - 343 - void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, 344 - struct gfs2_ail *ai) 345 - { 346 - struct gfs2_bufdata *bd = bh->b_private; 347 - 348 - gfs2_assert_withdraw(sdp, buffer_uptodate(bh)); 349 - 350 - if (!buffer_pinned(bh)) 351 - gfs2_assert_withdraw(sdp, 0); 352 - 353 - mark_buffer_dirty(bh); 354 - clear_buffer_pinned(bh); 355 - 356 - gfs2_log_lock(sdp); 357 - if (bd->bd_ail) { 358 - list_del(&bd->bd_ail_st_list); 304 + if (test_clear_buffer_pinned(bh)) { 305 + list_del_init(&bd->bd_le.le_list); 306 + if (meta) { 307 + gfs2_assert_warn(sdp, sdp->sd_log_num_buf); 308 + sdp->sd_log_num_buf--; 309 + tr->tr_num_buf_rm++; 310 + } else { 311 + gfs2_assert_warn(sdp, sdp->sd_log_num_databuf); 312 + sdp->sd_log_num_databuf--; 313 + tr->tr_num_databuf_rm++; 314 + } 315 + tr->tr_touched = 1; 359 316 brelse(bh); 360 - } else { 361 - struct gfs2_glock *gl = bd->bd_gl; 362 - list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list); 363 - atomic_inc(&gl->gl_ail_count); 364 317 } 365 - bd->bd_ail = ai; 366 - list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); 367 - gfs2_log_unlock(sdp); 318 + if (bd) { 319 + if (bd->bd_ail) { 320 + gfs2_remove_from_ail(NULL, bd); 321 + bh->b_private = NULL; 322 + bd->bd_bh = NULL; 323 + bd->bd_blkno = bh->b_blocknr; 324 + gfs2_trans_add_revoke(sdp, bd); 325 + } 326 + } 327 + clear_buffer_dirty(bh); 328 + clear_buffer_uptodate(bh); 368 329 } 369 330 370 331 /** ··· 344 383 while (blen) { 345 384 bh = getbuf(ip->i_gl, bstart, NO_CREATE); 346 385 if (bh) { 347 - struct gfs2_bufdata *bd = bh->b_private; 348 - 349 - if (test_clear_buffer_pinned(bh)) { 350 - struct gfs2_trans *tr = current->journal_info; 351 - struct gfs2_inode *bh_ip = 352 - GFS2_I(bh->b_page->mapping->host); 353 - 354 - gfs2_log_lock(sdp); 355 - list_del_init(&bd->bd_le.le_list); 356 - gfs2_assert_warn(sdp, sdp->sd_log_num_buf); 357 - sdp->sd_log_num_buf--; 358 - gfs2_log_unlock(sdp); 359 - if (bh_ip->i_inode.i_private != NULL) 360 - tr->tr_num_databuf_rm++; 361 - else 362 - tr->tr_num_buf_rm++; 363 - brelse(bh); 364 - } 365 - if (bd) { 366 - gfs2_log_lock(sdp); 367 - if (bd->bd_ail) { 368 - u64 blkno = bh->b_blocknr; 369 - bd->bd_ail = NULL; 370 - list_del(&bd->bd_ail_st_list); 371 - list_del(&bd->bd_ail_gl_list); 372 - atomic_dec(&bd->bd_gl->gl_ail_count); 373 - brelse(bh); 374 - gfs2_log_unlock(sdp); 375 - gfs2_trans_add_revoke(sdp, blkno); 376 - } else 377 - gfs2_log_unlock(sdp); 378 - } 379 - 380 386 lock_buffer(bh); 381 - clear_buffer_dirty(bh); 382 - clear_buffer_uptodate(bh); 387 + gfs2_log_lock(sdp); 388 + gfs2_remove_from_journal(bh, current->journal_info, 1); 389 + gfs2_log_unlock(sdp); 383 390 unlock_buffer(bh); 384 - 385 391 brelse(bh); 386 392 } 387 393 ··· 374 446 375 447 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) { 376 448 bh_slot = &ip->i_cache[x]; 377 - if (!*bh_slot) 378 - break; 379 - brelse(*bh_slot); 380 - *bh_slot = NULL; 449 + if (*bh_slot) { 450 + brelse(*bh_slot); 451 + *bh_slot = NULL; 452 + } 381 453 } 382 454 383 455 spin_unlock(&ip->i_spin);

+3 -3

fs/gfs2/meta_io.h

··· 50 50 51 51 void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, 52 52 int meta); 53 - void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); 54 - void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, 55 - struct gfs2_ail *ai); 53 + 54 + void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, 55 + int meta); 56 56 57 57 void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); 58 58

+4 -1

fs/gfs2/mount.c

··· 42 42 Opt_nosuiddir, 43 43 Opt_data_writeback, 44 44 Opt_data_ordered, 45 + Opt_err, 45 46 }; 46 47 47 48 static match_table_t tokens = { ··· 65 64 {Opt_suiddir, "suiddir"}, 66 65 {Opt_nosuiddir, "nosuiddir"}, 67 66 {Opt_data_writeback, "data=writeback"}, 68 - {Opt_data_ordered, "data=ordered"} 67 + {Opt_data_ordered, "data=ordered"}, 68 + {Opt_err, NULL} 69 69 }; 70 70 71 71 /** ··· 239 237 case Opt_data_ordered: 240 238 args->ar_data = GFS2_DATA_ORDERED; 241 239 break; 240 + case Opt_err: 242 241 default: 243 242 fs_info(sdp, "unknown option: %s\n", o); 244 243 error = -EINVAL;

+46 -106

fs/gfs2/ops_address.c

··· 90 90 error = gfs2_block_map(inode, lblock, 0, bh_result); 91 91 if (error) 92 92 return error; 93 - if (bh_result->b_blocknr == 0) 93 + if (!buffer_mapped(bh_result)) 94 94 return -EIO; 95 95 return 0; 96 96 } ··· 414 414 if (ind_blocks || data_blocks) 415 415 rblocks += RES_STATFS + RES_QUOTA; 416 416 417 - error = gfs2_trans_begin(sdp, rblocks, 0); 417 + error = gfs2_trans_begin(sdp, rblocks, 418 + PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); 418 419 if (error) 419 420 goto out_trans_fail; 420 421 ··· 617 616 return dblock; 618 617 } 619 618 620 - static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh) 619 + static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh) 621 620 { 622 621 struct gfs2_bufdata *bd; 623 622 623 + lock_buffer(bh); 624 624 gfs2_log_lock(sdp); 625 + clear_buffer_dirty(bh); 625 626 bd = bh->b_private; 626 627 if (bd) { 627 - bd->bd_bh = NULL; 628 - bh->b_private = NULL; 629 - if (!bd->bd_ail && list_empty(&bd->bd_le.le_list)) 630 - kmem_cache_free(gfs2_bufdata_cachep, bd); 628 + if (!list_empty(&bd->bd_le.le_list) && !buffer_pinned(bh)) 629 + list_del_init(&bd->bd_le.le_list); 630 + else 631 + gfs2_remove_from_journal(bh, current->journal_info, 0); 631 632 } 632 - gfs2_log_unlock(sdp); 633 - 634 - lock_buffer(bh); 635 - clear_buffer_dirty(bh); 636 633 bh->b_bdev = NULL; 637 634 clear_buffer_mapped(bh); 638 635 clear_buffer_req(bh); 639 636 clear_buffer_new(bh); 640 - clear_buffer_delay(bh); 637 + gfs2_log_unlock(sdp); 641 638 unlock_buffer(bh); 642 639 } 643 640 644 641 static void gfs2_invalidatepage(struct page *page, unsigned long offset) 645 642 { 646 643 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); 647 - struct buffer_head *head, *bh, *next; 648 - unsigned int curr_off = 0; 644 + struct buffer_head *bh, *head; 645 + unsigned long pos = 0; 649 646 650 647 BUG_ON(!PageLocked(page)); 651 648 if (offset == 0) 652 649 ClearPageChecked(page); 653 650 if (!page_has_buffers(page)) 654 - return; 651 + goto out; 655 652 656 653 bh = head = page_buffers(page); 657 654 do { 658 - unsigned int next_off = curr_off + bh->b_size; 659 - next = bh->b_this_page; 660 - 661 - if (offset <= curr_off) 662 - discard_buffer(sdp, bh); 663 - 664 - curr_off = next_off; 665 - bh = next; 655 + if (offset <= pos) 656 + gfs2_discard(sdp, bh); 657 + pos += bh->b_size; 658 + bh = bh->b_this_page; 666 659 } while (bh != head); 667 - 668 - if (!offset) 660 + out: 661 + if (offset == 0) 669 662 try_to_release_page(page, 0); 670 - 671 - return; 672 663 } 673 664 674 665 /** ··· 729 736 } 730 737 731 738 /** 732 - * stuck_releasepage - We're stuck in gfs2_releasepage(). Print stuff out. 733 - * @bh: the buffer we're stuck on 734 - * 735 - */ 736 - 737 - static void stuck_releasepage(struct buffer_head *bh) 738 - { 739 - struct inode *inode = bh->b_page->mapping->host; 740 - struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; 741 - struct gfs2_bufdata *bd = bh->b_private; 742 - struct gfs2_glock *gl; 743 - static unsigned limit = 0; 744 - 745 - if (limit > 3) 746 - return; 747 - limit++; 748 - 749 - fs_warn(sdp, "stuck in gfs2_releasepage() %p\n", inode); 750 - fs_warn(sdp, "blkno = %llu, bh->b_count = %d\n", 751 - (unsigned long long)bh->b_blocknr, atomic_read(&bh->b_count)); 752 - fs_warn(sdp, "pinned = %u\n", buffer_pinned(bh)); 753 - fs_warn(sdp, "bh->b_private = %s\n", (bd) ? "!NULL" : "NULL"); 754 - 755 - if (!bd) 756 - return; 757 - 758 - gl = bd->bd_gl; 759 - 760 - fs_warn(sdp, "gl = (%u, %llu)\n", 761 - gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number); 762 - 763 - fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n", 764 - (list_empty(&bd->bd_list_tr)) ? "no" : "yes", 765 - (list_empty(&bd->bd_le.le_list)) ? "no" : "yes"); 766 - 767 - if (gl->gl_ops == &gfs2_inode_glops) { 768 - struct gfs2_inode *ip = gl->gl_object; 769 - unsigned int x; 770 - 771 - if (!ip) 772 - return; 773 - 774 - fs_warn(sdp, "ip = %llu %llu\n", 775 - (unsigned long long)ip->i_no_formal_ino, 776 - (unsigned long long)ip->i_no_addr); 777 - 778 - for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) 779 - fs_warn(sdp, "ip->i_cache[%u] = %s\n", 780 - x, (ip->i_cache[x]) ? "!NULL" : "NULL"); 781 - } 782 - } 783 - 784 - /** 785 739 * gfs2_releasepage - free the metadata associated with a page 786 740 * @page: the page that's being released 787 741 * @gfp_mask: passed from Linux VFS, ignored by us ··· 745 805 struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info; 746 806 struct buffer_head *bh, *head; 747 807 struct gfs2_bufdata *bd; 748 - unsigned long t = jiffies + gfs2_tune_get(sdp, gt_stall_secs) * HZ; 749 808 750 809 if (!page_has_buffers(page)) 751 - goto out; 810 + return 0; 811 + 812 + gfs2_log_lock(sdp); 813 + head = bh = page_buffers(page); 814 + do { 815 + if (atomic_read(&bh->b_count)) 816 + goto cannot_release; 817 + bd = bh->b_private; 818 + if (bd && bd->bd_ail) 819 + goto cannot_release; 820 + gfs2_assert_warn(sdp, !buffer_pinned(bh)); 821 + gfs2_assert_warn(sdp, !buffer_dirty(bh)); 822 + bh = bh->b_this_page; 823 + } while(bh != head); 824 + gfs2_log_unlock(sdp); 752 825 753 826 head = bh = page_buffers(page); 754 827 do { 755 - while (atomic_read(&bh->b_count)) { 756 - if (!atomic_read(&aspace->i_writecount)) 757 - return 0; 758 - 759 - if (!(gfp_mask & __GFP_WAIT)) 760 - return 0; 761 - 762 - if (time_after_eq(jiffies, t)) { 763 - stuck_releasepage(bh); 764 - /* should we withdraw here? */ 765 - return 0; 766 - } 767 - 768 - yield(); 769 - } 770 - 771 - gfs2_assert_warn(sdp, !buffer_pinned(bh)); 772 - gfs2_assert_warn(sdp, !buffer_dirty(bh)); 773 - 774 828 gfs2_log_lock(sdp); 775 829 bd = bh->b_private; 776 830 if (bd) { 777 831 gfs2_assert_warn(sdp, bd->bd_bh == bh); 778 832 gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr)); 779 - gfs2_assert_warn(sdp, !bd->bd_ail); 780 - bd->bd_bh = NULL; 781 - if (!list_empty(&bd->bd_le.le_list)) 782 - bd = NULL; 833 + if (!list_empty(&bd->bd_le.le_list)) { 834 + if (!buffer_pinned(bh)) 835 + list_del_init(&bd->bd_le.le_list); 836 + else 837 + bd = NULL; 838 + } 839 + if (bd) 840 + bd->bd_bh = NULL; 783 841 bh->b_private = NULL; 784 842 } 785 843 gfs2_log_unlock(sdp); ··· 787 849 bh = bh->b_this_page; 788 850 } while (bh != head); 789 851 790 - out: 791 852 return try_to_free_buffers(page); 853 + cannot_release: 854 + gfs2_log_unlock(sdp); 855 + return 0; 792 856 } 793 857 794 858 const struct address_space_operations gfs2_file_aops = {

+1 -1

fs/gfs2/ops_export.c

··· 237 237 238 238 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, 239 239 inum->no_addr, 240 - 0); 240 + 0, 0); 241 241 if (!inode) 242 242 goto fail; 243 243 if (IS_ERR(inode)) {

+6 -7

fs/gfs2/ops_file.c

··· 571 571 int error = 0; 572 572 573 573 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; 574 - flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; 574 + flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE 575 + | GL_FLOCK; 575 576 576 577 mutex_lock(&fp->f_fl_mutex); 577 578 ··· 580 579 if (gl) { 581 580 if (fl_gh->gh_state == state) 582 581 goto out; 583 - gfs2_glock_hold(gl); 584 582 flock_lock_file_wait(file, 585 583 &(struct file_lock){.fl_type = F_UNLCK}); 586 - gfs2_glock_dq_uninit(fl_gh); 584 + gfs2_glock_dq_wait(fl_gh); 585 + gfs2_holder_reinit(state, flags, fl_gh); 587 586 } else { 588 587 error = gfs2_glock_get(GFS2_SB(&ip->i_inode), 589 588 ip->i_no_addr, &gfs2_flock_glops, 590 589 CREATE, &gl); 591 590 if (error) 592 591 goto out; 592 + gfs2_holder_init(gl, state, flags, fl_gh); 593 + gfs2_glock_put(gl); 593 594 } 594 - 595 - gfs2_holder_init(gl, state, flags, fl_gh); 596 - gfs2_glock_put(gl); 597 - 598 595 error = gfs2_glock_nq(fl_gh); 599 596 if (error) { 600 597 gfs2_holder_uninit(fl_gh);

+18 -22

fs/gfs2/ops_fstype.c

··· 28 28 #include "lm.h" 29 29 #include "mount.h" 30 30 #include "ops_fstype.h" 31 + #include "ops_dentry.h" 31 32 #include "ops_super.h" 32 33 #include "recovery.h" 33 34 #include "rgrp.h" 34 35 #include "super.h" 35 36 #include "sys.h" 36 37 #include "util.h" 38 + #include "log.h" 37 39 38 40 #define DO 0 39 41 #define UNDO 1 40 - 41 - extern struct dentry_operations gfs2_dops; 42 42 43 43 static struct gfs2_sbd *init_sbd(struct super_block *sb) 44 44 { ··· 82 82 INIT_LIST_HEAD(&sdp->sd_log_le_revoke); 83 83 INIT_LIST_HEAD(&sdp->sd_log_le_rg); 84 84 INIT_LIST_HEAD(&sdp->sd_log_le_databuf); 85 + INIT_LIST_HEAD(&sdp->sd_log_le_ordered); 85 86 86 87 mutex_init(&sdp->sd_log_reserve_mutex); 87 88 INIT_LIST_HEAD(&sdp->sd_ail1_list); 88 89 INIT_LIST_HEAD(&sdp->sd_ail2_list); 89 90 90 91 init_rwsem(&sdp->sd_log_flush_lock); 91 - INIT_LIST_HEAD(&sdp->sd_log_flush_list); 92 + atomic_set(&sdp->sd_log_in_flight, 0); 93 + init_waitqueue_head(&sdp->sd_log_flush_wait); 92 94 93 95 INIT_LIST_HEAD(&sdp->sd_revoke_list); 94 96 ··· 147 145 snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto); 148 146 snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table); 149 147 150 - while ((table = strchr(sdp->sd_table_name, '/'))) 148 + table = sdp->sd_table_name; 149 + while ((table = strchr(table, '/'))) 151 150 *table = '_'; 152 151 153 152 out: ··· 163 160 164 161 if (undo) 165 162 goto fail_trans; 166 - 167 - p = kthread_run(gfs2_scand, sdp, "gfs2_scand"); 168 - error = IS_ERR(p); 169 - if (error) { 170 - fs_err(sdp, "can't start scand thread: %d\n", error); 171 - return error; 172 - } 173 - sdp->sd_scand_process = p; 174 163 175 164 for (sdp->sd_glockd_num = 0; 176 165 sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd; ··· 224 229 while (sdp->sd_glockd_num--) 225 230 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]); 226 231 227 - kthread_stop(sdp->sd_scand_process); 228 232 return error; 229 233 } 230 234 231 235 static inline struct inode *gfs2_lookup_root(struct super_block *sb, 232 236 u64 no_addr) 233 237 { 234 - return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); 238 + return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); 235 239 } 236 240 237 241 static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) ··· 295 301 fs_err(sdp, "can't get root dentry\n"); 296 302 error = -ENOMEM; 297 303 iput(inode); 298 - } 299 - sb->s_root->d_op = &gfs2_dops; 304 + } else 305 + sb->s_root->d_op = &gfs2_dops; 306 + 300 307 out: 301 308 gfs2_glock_dq_uninit(&sb_gh); 302 309 return error; ··· 363 368 364 369 ip = GFS2_I(sdp->sd_jdesc->jd_inode); 365 370 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 366 - LM_FLAG_NOEXP | GL_EXACT, 371 + LM_FLAG_NOEXP | GL_EXACT | GL_NOCACHE, 367 372 &sdp->sd_jinode_gh); 368 373 if (error) { 369 374 fs_err(sdp, "can't acquire journal inode glock: %d\n", ··· 813 818 struct nameidata nd; 814 819 struct file_system_type *fstype; 815 820 struct super_block *sb = NULL, *s; 816 - struct list_head *l; 817 821 int error; 818 822 819 823 error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); ··· 824 830 error = vfs_getattr(nd.mnt, nd.dentry, &stat); 825 831 826 832 fstype = get_fs_type("gfs2"); 827 - list_for_each(l, &fstype->fs_supers) { 828 - s = list_entry(l, struct super_block, s_instances); 833 + list_for_each_entry(s, &fstype->fs_supers, s_instances) { 829 834 if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || 830 835 (S_ISDIR(stat.mode) && s == nd.dentry->d_inode->i_sb)) { 831 836 sb = s; ··· 854 861 error = -ENOENT; 855 862 goto error; 856 863 } 857 - sdp = (struct gfs2_sbd*) sb->s_fs_info; 864 + sdp = sb->s_fs_info; 858 865 if (sdp->sd_vfs_meta) { 859 866 printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n"); 860 867 error = -EBUSY; ··· 889 896 890 897 static void gfs2_kill_sb(struct super_block *sb) 891 898 { 892 - gfs2_delete_debugfs_file(sb->s_fs_info); 899 + if (sb->s_fs_info) { 900 + gfs2_delete_debugfs_file(sb->s_fs_info); 901 + gfs2_meta_syncfs(sb->s_fs_info); 902 + } 893 903 kill_block_super(sb); 894 904 } 895 905

+27 -11

fs/gfs2/ops_inode.c

··· 69 69 mark_inode_dirty(inode); 70 70 break; 71 71 } else if (PTR_ERR(inode) != -EEXIST || 72 - (nd->intent.open.flags & O_EXCL)) { 72 + (nd && (nd->intent.open.flags & O_EXCL))) { 73 73 gfs2_holder_uninit(ghs); 74 74 return PTR_ERR(inode); 75 75 } ··· 278 278 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); 279 279 280 280 281 - error = gfs2_glock_nq_m(3, ghs); 281 + error = gfs2_glock_nq(ghs); /* parent */ 282 282 if (error) 283 - goto out; 283 + goto out_parent; 284 + 285 + error = gfs2_glock_nq(ghs + 1); /* child */ 286 + if (error) 287 + goto out_child; 288 + 289 + error = gfs2_glock_nq(ghs + 2); /* rgrp */ 290 + if (error) 291 + goto out_rgrp; 284 292 285 293 error = gfs2_unlink_ok(dip, &dentry->d_name, ip); 286 294 if (error) 287 - goto out_gunlock; 295 + goto out_rgrp; 288 296 289 297 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); 290 298 if (error) 291 - goto out_gunlock; 299 + goto out_rgrp; 292 300 293 301 error = gfs2_dir_del(dip, &dentry->d_name); 294 302 if (error) ··· 306 298 307 299 out_end_trans: 308 300 gfs2_trans_end(sdp); 309 - out_gunlock: 310 - gfs2_glock_dq_m(3, ghs); 311 - out: 312 - gfs2_holder_uninit(ghs); 313 - gfs2_holder_uninit(ghs + 1); 301 + gfs2_glock_dq(ghs + 2); 302 + out_rgrp: 314 303 gfs2_holder_uninit(ghs + 2); 304 + gfs2_glock_dq(ghs + 1); 305 + out_child: 306 + gfs2_holder_uninit(ghs + 1); 307 + gfs2_glock_dq(ghs); 308 + out_parent: 309 + gfs2_holder_uninit(ghs); 315 310 gfs2_glock_dq_uninit(&ri_gh); 316 311 return error; 317 312 } ··· 905 894 static int setattr_size(struct inode *inode, struct iattr *attr) 906 895 { 907 896 struct gfs2_inode *ip = GFS2_I(inode); 897 + struct gfs2_sbd *sdp = GFS2_SB(inode); 908 898 int error; 909 899 910 900 if (attr->ia_size != ip->i_di.di_size) { 911 - error = vmtruncate(inode, attr->ia_size); 901 + error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); 912 902 if (error) 903 + return error; 904 + error = vmtruncate(inode, attr->ia_size); 905 + gfs2_trans_end(sdp); 906 + if (error) 913 907 return error; 914 908 } 915 909

+8 -6

fs/gfs2/ops_super.c

··· 92 92 kthread_stop(sdp->sd_recoverd_process); 93 93 while (sdp->sd_glockd_num--) 94 94 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]); 95 - kthread_stop(sdp->sd_scand_process); 96 95 97 96 if (!(sb->s_flags & MS_RDONLY)) { 98 97 error = gfs2_make_fs_ro(sdp); ··· 455 456 } 456 457 457 458 error = gfs2_dinode_dealloc(ip); 458 - /* 459 - * Must do this before unlock to avoid trying to write back 460 - * potentially dirty data now that inode no longer exists 461 - * on disk. 462 - */ 459 + if (error) 460 + goto out_unlock; 461 + 462 + error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); 463 + if (error) 464 + goto out_unlock; 465 + /* Needs to be done before glock release & also in a transaction */ 463 466 truncate_inode_pages(&inode->i_data, 0); 467 + gfs2_trans_end(sdp); 464 468 465 469 out_unlock: 466 470 gfs2_glock_dq(&ip->i_iopen_gh);

+13

fs/gfs2/quota.c

··· 70 70 u64 qu_limit; 71 71 u64 qu_warn; 72 72 s64 qu_value; 73 + u32 qu_ll_next; 73 74 }; 74 75 75 76 struct gfs2_quota_change_host { ··· 581 580 qu->qu_limit = be64_to_cpu(str->qu_limit); 582 581 qu->qu_warn = be64_to_cpu(str->qu_warn); 583 582 qu->qu_value = be64_to_cpu(str->qu_value); 583 + qu->qu_ll_next = be32_to_cpu(str->qu_ll_next); 584 584 } 585 585 586 586 static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf) ··· 591 589 str->qu_limit = cpu_to_be64(qu->qu_limit); 592 590 str->qu_warn = cpu_to_be64(qu->qu_warn); 593 591 str->qu_value = cpu_to_be64(qu->qu_value); 592 + str->qu_ll_next = cpu_to_be32(qu->qu_ll_next); 594 593 memset(&str->qu_reserved, 0, sizeof(str->qu_reserved)); 595 594 } 596 595 ··· 617 614 s64 value; 618 615 int err = -EIO; 619 616 617 + if (gfs2_is_stuffed(ip)) { 618 + struct gfs2_alloc *al = NULL; 619 + al = gfs2_alloc_get(ip); 620 + /* just request 1 blk */ 621 + al->al_requested = 1; 622 + gfs2_inplace_reserve(ip); 623 + gfs2_unstuff_dinode(ip, NULL); 624 + gfs2_inplace_release(ip); 625 + gfs2_alloc_put(ip); 626 + } 620 627 page = grab_cache_page(mapping, index); 621 628 if (!page) 622 629 return -ENOMEM;

+1 -1

fs/gfs2/recovery.c

··· 469 469 }; 470 470 471 471 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 472 - LM_FLAG_NOEXP, &ji_gh); 472 + LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh); 473 473 if (error) 474 474 goto fail_gunlock_j; 475 475 } else {

+24 -15

fs/gfs2/rgrp.c

··· 31 31 #include "inode.h" 32 32 33 33 #define BFITNOENT ((u32)~0) 34 + #define NO_BLOCK ((u64)~0) 34 35 35 36 /* 36 37 * These routines are used by the resource group routines (rgrp.c) ··· 117 116 * @buffer: the buffer that holds the bitmaps 118 117 * @buflen: the length (in bytes) of the buffer 119 118 * @goal: start search at this block's bit-pair (within @buffer) 120 - * @old_state: GFS2_BLKST_XXX the state of the block we're looking for; 121 - * bit 0 = alloc(1)/free(0), bit 1 = meta(1)/data(0) 119 + * @old_state: GFS2_BLKST_XXX the state of the block we're looking for. 122 120 * 123 121 * Scope of @goal and returned block number is only within this bitmap buffer, 124 122 * not entire rgrp or filesystem. @buffer will be offset from the actual ··· 137 137 byte = buffer + (goal / GFS2_NBBY); 138 138 bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; 139 139 end = buffer + buflen; 140 - alloc = (old_state & 1) ? 0 : 0x55; 140 + alloc = (old_state == GFS2_BLKST_FREE) ? 0x55 : 0; 141 141 142 142 while (byte < end) { 143 + /* If we're looking for a free block we can eliminate all 144 + bitmap settings with 0x55, which represents four data 145 + blocks in a row. If we're looking for a data block, we can 146 + eliminate 0x00 which corresponds to four free blocks. */ 143 147 if ((*byte & 0x55) == alloc) { 144 148 blk += (8 - bit) >> 1; 145 149 ··· 863 859 static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) 864 860 { 865 861 struct inode *inode; 866 - u32 goal = 0; 862 + u32 goal = 0, block; 867 863 u64 no_addr; 864 + struct gfs2_sbd *sdp = rgd->rd_sbd; 868 865 869 866 for(;;) { 870 867 if (goal >= rgd->rd_data) 871 868 break; 872 - goal = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, 873 - GFS2_BLKST_UNLINKED); 874 - if (goal == BFITNOENT) 869 + down_write(&sdp->sd_log_flush_lock); 870 + block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, 871 + GFS2_BLKST_UNLINKED); 872 + up_write(&sdp->sd_log_flush_lock); 873 + if (block == BFITNOENT) 875 874 break; 876 - no_addr = goal + rgd->rd_data0; 875 + /* rgblk_search can return a block < goal, so we need to 876 + keep it marching forward. */ 877 + no_addr = block + rgd->rd_data0; 877 878 goal++; 878 - if (no_addr < *last_unlinked) 879 + if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) 879 880 continue; 880 881 *last_unlinked = no_addr; 881 882 inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN, 882 - no_addr, -1); 883 + no_addr, -1, 1); 883 884 if (!IS_ERR(inode)) 884 885 return inode; 885 886 } ··· 1161 1152 struct gfs2_alloc *al = &ip->i_alloc; 1162 1153 struct inode *inode; 1163 1154 int error = 0; 1164 - u64 last_unlinked = 0; 1155 + u64 last_unlinked = NO_BLOCK; 1165 1156 1166 1157 if (gfs2_assert_warn(sdp, al->al_requested)) 1167 1158 return -EINVAL; ··· 1298 1289 allocatable block anywhere else, we want to be able wrap around and 1299 1290 search in the first part of our first-searched bit block. */ 1300 1291 for (x = 0; x <= length; x++) { 1301 - if (bi->bi_clone) 1292 + /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone 1293 + bitmaps, so we must search the originals for that. */ 1294 + if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) 1302 1295 blk = gfs2_bitfit(rgd, bi->bi_clone + bi->bi_offset, 1303 1296 bi->bi_len, goal, old_state); 1304 1297 else ··· 1316 1305 goal = 0; 1317 1306 } 1318 1307 1319 - if (old_state != new_state) { 1320 - gfs2_assert_withdraw(rgd->rd_sbd, blk != BFITNOENT); 1321 - 1308 + if (blk != BFITNOENT && old_state != new_state) { 1322 1309 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1323 1310 gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset, 1324 1311 bi->bi_len, blk, new_state);

-1

fs/gfs2/super.c

··· 58 58 gt->gt_incore_log_blocks = 1024; 59 59 gt->gt_log_flush_secs = 60; 60 60 gt->gt_jindex_refresh_secs = 60; 61 - gt->gt_scand_secs = 15; 62 61 gt->gt_recoverd_secs = 60; 63 62 gt->gt_logd_secs = 1; 64 63 gt->gt_quotad_secs = 5;

-2

fs/gfs2/sys.c

··· 442 442 TUNE_ATTR(quota_cache_secs, 1); 443 443 TUNE_ATTR(stall_secs, 1); 444 444 TUNE_ATTR(statfs_quantum, 1); 445 - TUNE_ATTR_DAEMON(scand_secs, scand_process); 446 445 TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); 447 446 TUNE_ATTR_DAEMON(logd_secs, logd_process); 448 447 TUNE_ATTR_DAEMON(quotad_secs, quotad_process); ··· 463 464 &tune_attr_quota_cache_secs.attr, 464 465 &tune_attr_stall_secs.attr, 465 466 &tune_attr_statfs_quantum.attr, 466 - &tune_attr_scand_secs.attr, 467 467 &tune_attr_recoverd_secs.attr, 468 468 &tune_attr_logd_secs.attr, 469 469 &tune_attr_quotad_secs.attr,

+11 -11

fs/gfs2/trans.c

··· 142 142 lops_add(sdp, &bd->bd_le); 143 143 } 144 144 145 - void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno) 145 + void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) 146 146 { 147 - struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke), 148 - GFP_NOFS | __GFP_NOFAIL); 149 - lops_init_le(&rv->rv_le, &gfs2_revoke_lops); 150 - rv->rv_blkno = blkno; 151 - lops_add(sdp, &rv->rv_le); 147 + BUG_ON(!list_empty(&bd->bd_le.le_list)); 148 + BUG_ON(!list_empty(&bd->bd_ail_st_list)); 149 + BUG_ON(!list_empty(&bd->bd_ail_gl_list)); 150 + lops_init_le(&bd->bd_le, &gfs2_revoke_lops); 151 + lops_add(sdp, &bd->bd_le); 152 152 } 153 153 154 154 void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno) 155 155 { 156 - struct gfs2_revoke *rv; 156 + struct gfs2_bufdata *bd; 157 157 int found = 0; 158 158 159 159 gfs2_log_lock(sdp); 160 160 161 - list_for_each_entry(rv, &sdp->sd_log_le_revoke, rv_le.le_list) { 162 - if (rv->rv_blkno == blkno) { 163 - list_del(&rv->rv_le.le_list); 161 + list_for_each_entry(bd, &sdp->sd_log_le_revoke, bd_le.le_list) { 162 + if (bd->bd_blkno == blkno) { 163 + list_del_init(&bd->bd_le.le_list); 164 164 gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); 165 165 sdp->sd_log_num_revoke--; 166 166 found = 1; ··· 172 172 173 173 if (found) { 174 174 struct gfs2_trans *tr = current->journal_info; 175 - kfree(rv); 175 + kmem_cache_free(gfs2_bufdata_cachep, bd); 176 176 tr->tr_num_revoke_rm++; 177 177 } 178 178 }

+1 -1

fs/gfs2/trans.h

··· 32 32 33 33 void gfs2_trans_add_gl(struct gfs2_glock *gl); 34 34 void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); 35 - void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno); 35 + void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); 36 36 void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno); 37 37 void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd); 38 38

+29 -1

include/linux/gfs2_ondisk.h

··· 170 170 }; 171 171 172 172 /* 173 + * quota linked list: user quotas and group quotas form two separate 174 + * singly linked lists. ll_next stores uids or gids of next quotas in the 175 + * linked list. 176 + 177 + Given the uid/gid, how to calculate the quota file offsets for the corresponding 178 + gfs2_quota structures on disk: 179 + 180 + for user quotas, given uid, 181 + offset = uid * sizeof(struct gfs2_quota); 182 + 183 + for group quotas, given gid, 184 + offset = (gid * sizeof(struct gfs2_quota)) + sizeof(struct gfs2_quota); 185 + 186 + 187 + uid:0 gid:0 uid:12 gid:12 uid:17 gid:17 uid:5142 gid:5142 188 + +-------+-------+ +-------+-------+ +-------+- - - -+ +- - - -+-------+ 189 + | valid | valid | :: | valid | valid | :: | valid | inval | :: | inval | valid | 190 + +-------+-------+ +-------+-------+ +-------+- - - -+ +- - - -+-------+ 191 + next:12 next:12 next:17 next:5142 next:NULL next:NULL 192 + | | | | |<-- user quota list | 193 + \______|___________/ \______|___________/ group quota list -->| 194 + | | | 195 + \__________________/ \_______________________________________/ 196 + 197 + */ 198 + 199 + /* 173 200 * quota structure 174 201 */ 175 202 ··· 204 177 __be64 qu_limit; 205 178 __be64 qu_warn; 206 179 __be64 qu_value; 207 - __u8 qu_reserved[64]; 180 + __be32 qu_ll_next; /* location of next quota in list */ 181 + __u8 qu_reserved[60]; 208 182 }; 209 183 210 184 /*