Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] NLM: fix a client-side race on blocking locks.

If the lock blocks, the server may send us a GRANTED message that
races with the reply to our LOCK request. Make sure that we catch
the GRANTED by queueing up our request on the nlm_blocked list
before we send off the first LOCK rpc call.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

+96 -50
+58 -41
fs/lockd/clntlock.c
··· 42 42 static LIST_HEAD(nlm_blocked); 43 43 44 44 /* 45 + * Queue up a lock for blocking so that the GRANTED request can see it 46 + */ 47 + int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl) 48 + { 49 + struct nlm_wait *block; 50 + 51 + BUG_ON(req->a_block != NULL); 52 + block = kmalloc(sizeof(*block), GFP_KERNEL); 53 + if (block == NULL) 54 + return -ENOMEM; 55 + block->b_host = host; 56 + block->b_lock = fl; 57 + init_waitqueue_head(&block->b_wait); 58 + block->b_status = NLM_LCK_BLOCKED; 59 + 60 + list_add(&block->b_list, &nlm_blocked); 61 + req->a_block = block; 62 + 63 + return 0; 64 + } 65 + 66 + void nlmclnt_finish_block(struct nlm_rqst *req) 67 + { 68 + struct nlm_wait *block = req->a_block; 69 + 70 + if (block == NULL) 71 + return; 72 + req->a_block = NULL; 73 + list_del(&block->b_list); 74 + kfree(block); 75 + } 76 + 77 + /* 45 78 * Block on a lock 46 79 */ 47 - int 48 - nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp) 80 + long nlmclnt_block(struct nlm_rqst *req, long timeout) 49 81 { 50 - struct nlm_wait block, **head; 51 - int err; 52 - u32 pstate; 82 + struct nlm_wait *block = req->a_block; 83 + long ret; 53 84 54 - block.b_host = host; 55 - block.b_lock = fl; 56 - init_waitqueue_head(&block.b_wait); 57 - block.b_status = NLM_LCK_BLOCKED; 58 - list_add(&block.b_list, &nlm_blocked); 59 - 60 - /* Remember pseudo nsm state */ 61 - pstate = host->h_state; 85 + /* A borken server might ask us to block even if we didn't 86 + * request it. Just say no! 87 + */ 88 + if (!req->a_args.block) 89 + return -EAGAIN; 62 90 63 91 /* Go to sleep waiting for GRANT callback. Some servers seem 64 92 * to lose callbacks, however, so we're going to poll from ··· 96 68 * a 1 minute timeout would do. See the comment before 97 69 * nlmclnt_lock for an explanation. 98 70 */ 99 - sleep_on_timeout(&block.b_wait, 30*HZ); 71 + ret = wait_event_interruptible_timeout(block->b_wait, 72 + block->b_status != NLM_LCK_BLOCKED, 73 + timeout); 100 74 101 - list_del(&block.b_list); 102 - 103 - if (!signalled()) { 104 - *statp = block.b_status; 105 - return 0; 75 + if (block->b_status != NLM_LCK_BLOCKED) { 76 + req->a_res.status = block->b_status; 77 + block->b_status = NLM_LCK_BLOCKED; 106 78 } 107 79 108 - /* Okay, we were interrupted. Cancel the pending request 109 - * unless the server has rebooted. 110 - */ 111 - if (pstate == host->h_state && (err = nlmclnt_cancel(host, fl)) < 0) 112 - printk(KERN_NOTICE 113 - "lockd: CANCEL call failed (errno %d)\n", -err); 114 - 115 - return -ERESTARTSYS; 80 + return ret; 116 81 } 117 82 118 83 /* ··· 115 94 nlmclnt_grant(struct nlm_lock *lock) 116 95 { 117 96 struct nlm_wait *block; 97 + u32 res = nlm_lck_denied; 118 98 119 99 /* 120 100 * Look up blocked request based on arguments. 121 101 * Warning: must not use cookie to match it! 122 102 */ 123 103 list_for_each_entry(block, &nlm_blocked, b_list) { 124 - if (nlm_compare_locks(block->b_lock, &lock->fl)) 125 - break; 104 + if (nlm_compare_locks(block->b_lock, &lock->fl)) { 105 + /* Alright, we found a lock. Set the return status 106 + * and wake up the caller 107 + */ 108 + block->b_status = NLM_LCK_GRANTED; 109 + wake_up(&block->b_wait); 110 + res = nlm_granted; 111 + } 126 112 } 127 - 128 - /* Ooops, no blocked request found. */ 129 - if (block == NULL) 130 - return nlm_lck_denied; 131 - 132 - /* Alright, we found the lock. Set the return status and 133 - * wake up the caller. 134 - */ 135 - block->b_status = NLM_LCK_GRANTED; 136 - wake_up(&block->b_wait); 137 - 138 - return nlm_granted; 113 + return res; 139 114 } 140 115 141 116 /*
+32 -8
fs/lockd/clntproc.c
··· 21 21 22 22 #define NLMDBG_FACILITY NLMDBG_CLIENT 23 23 #define NLMCLNT_GRACE_WAIT (5*HZ) 24 + #define NLMCLNT_POLL_TIMEOUT (30*HZ) 24 25 25 26 static int nlmclnt_test(struct nlm_rqst *, struct file_lock *); 26 27 static int nlmclnt_lock(struct nlm_rqst *, struct file_lock *); ··· 554 553 { 555 554 struct nlm_host *host = req->a_host; 556 555 struct nlm_res *resp = &req->a_res; 557 - int status; 556 + long timeout; 557 + int status; 558 558 559 559 if (!host->h_monitored && nsm_monitor(host) < 0) { 560 560 printk(KERN_NOTICE "lockd: failed to monitor %s\n", ··· 564 562 goto out; 565 563 } 566 564 567 - do { 568 - if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0) { 569 - if (resp->status != NLM_LCK_BLOCKED) 570 - break; 571 - status = nlmclnt_block(host, fl, &resp->status); 572 - } 565 + if (req->a_args.block) { 566 + status = nlmclnt_prepare_block(req, host, fl); 573 567 if (status < 0) 574 568 goto out; 575 - } while (resp->status == NLM_LCK_BLOCKED && req->a_args.block); 569 + } 570 + for(;;) { 571 + status = nlmclnt_call(req, NLMPROC_LOCK); 572 + if (status < 0) 573 + goto out_unblock; 574 + if (resp->status != NLM_LCK_BLOCKED) 575 + break; 576 + /* Wait on an NLM blocking lock */ 577 + timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT); 578 + /* Did a reclaimer thread notify us of a server reboot? */ 579 + if (resp->status == NLM_LCK_DENIED_GRACE_PERIOD) 580 + continue; 581 + if (resp->status != NLM_LCK_BLOCKED) 582 + break; 583 + if (timeout >= 0) 584 + continue; 585 + /* We were interrupted. Send a CANCEL request to the server 586 + * and exit 587 + */ 588 + status = (int)timeout; 589 + goto out_unblock; 590 + } 576 591 577 592 if (resp->status == NLM_LCK_GRANTED) { 578 593 fl->fl_u.nfs_fl.state = host->h_state; ··· 598 579 do_vfs_lock(fl); 599 580 } 600 581 status = nlm_stat_to_errno(resp->status); 582 + out_unblock: 583 + nlmclnt_finish_block(req); 584 + /* Cancel the blocked request if it is still pending */ 585 + if (resp->status == NLM_LCK_BLOCKED) 586 + nlmclnt_cancel(host, fl); 601 587 out: 602 588 nlmclnt_release_lockargs(req); 603 589 return status;
+6 -1
include/linux/lockd/lockd.h
··· 72 72 uint32_t pid; 73 73 }; 74 74 75 + struct nlm_wait; 76 + 75 77 /* 76 78 * Memory chunk for NLM client RPC request. 77 79 */ ··· 83 81 struct nlm_host * a_host; /* host handle */ 84 82 struct nlm_args a_args; /* arguments */ 85 83 struct nlm_res a_res; /* result */ 84 + struct nlm_wait * a_block; 86 85 char a_owner[NLMCLNT_OHSIZE]; 87 86 }; 88 87 ··· 145 142 * Lockd client functions 146 143 */ 147 144 struct nlm_rqst * nlmclnt_alloc_call(void); 148 - int nlmclnt_block(struct nlm_host *, struct file_lock *, u32 *); 145 + int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl); 146 + void nlmclnt_finish_block(struct nlm_rqst *req); 147 + long nlmclnt_block(struct nlm_rqst *req, long timeout); 149 148 int nlmclnt_cancel(struct nlm_host *, struct file_lock *); 150 149 u32 nlmclnt_grant(struct nlm_lock *); 151 150 void nlmclnt_recovery(struct nlm_host *, u32);