Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

NFSD: Enable write delegation support

This patch grants write delegations for OPEN with NFS4_SHARE_ACCESS_WRITE
if there is no conflict with other OPENs.

Write delegation conflicts with another OPEN, REMOVE, RENAME and SETATTR
are handled the same as read delegation using notify_change,
try_break_deleg.

The NFSv4.0 protocol does not enable a server to determine that a
conflicting GETATTR originated from the client holding the
delegation versus coming from some other client. With NFSv4.1 and
later, the SEQUENCE operation that begins each COMPOUND contains a
client ID, so delegation recall can be safely squelched in this case.

With NFSv4.0, however, the server must recall or send a CB_GETATTR
(per RFC 7530 Section 16.7.5) even when the GETATTR originates from
the client holding that delegation.

An NFSv4.0 client can trigger a pathological situation if it always
sends a DELEGRETURN preceded by a conflicting GETATTR in the same
COMPOUND. COMPOUND execution will always stop at the GETATTR and the
DELEGRETURN will never get executed. The server eventually revokes
the delegation, which can result in loss of open or lock state.

Tracepoint added to track whether read or write delegation is granted.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>

authored by

Dai Ngo and committed by
Chuck Lever
1d3dd1d5 50bce06f

+78 -20
+77 -20
fs/nfsd/nfs4state.c
··· 649 649 return ret; 650 650 } 651 651 652 + static struct nfsd_file * 653 + find_rw_file(struct nfs4_file *f) 654 + { 655 + struct nfsd_file *ret; 656 + 657 + spin_lock(&f->fi_lock); 658 + ret = nfsd_file_get(f->fi_fds[O_RDWR]); 659 + spin_unlock(&f->fi_lock); 660 + 661 + return ret; 662 + } 663 + 652 664 struct nfsd_file * 653 665 find_any_file(struct nfs4_file *f) 654 666 { ··· 1156 1144 1157 1145 static struct nfs4_delegation * 1158 1146 alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, 1159 - struct nfs4_clnt_odstate *odstate) 1147 + struct nfs4_clnt_odstate *odstate, u32 dl_type) 1160 1148 { 1161 1149 struct nfs4_delegation *dp; 1162 1150 long n; ··· 1182 1170 INIT_LIST_HEAD(&dp->dl_recall_lru); 1183 1171 dp->dl_clnt_odstate = odstate; 1184 1172 get_clnt_odstate(odstate); 1185 - dp->dl_type = NFS4_OPEN_DELEGATE_READ; 1173 + dp->dl_type = dl_type; 1186 1174 dp->dl_retries = 1; 1187 1175 dp->dl_recalled = false; 1188 1176 nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, ··· 5461 5449 struct nfs4_file *fp = stp->st_stid.sc_file; 5462 5450 struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate; 5463 5451 struct nfs4_delegation *dp; 5464 - struct nfsd_file *nf; 5452 + struct nfsd_file *nf = NULL; 5465 5453 struct file_lock *fl; 5454 + u32 dl_type; 5466 5455 5467 5456 /* 5468 5457 * The fi_had_conflict and nfs_get_existing_delegation checks ··· 5473 5460 if (fp->fi_had_conflict) 5474 5461 return ERR_PTR(-EAGAIN); 5475 5462 5476 - nf = find_readable_file(fp); 5477 - if (!nf) { 5478 - /* 5479 - * We probably could attempt another open and get a read 5480 - * delegation, but for now, don't bother until the 5481 - * client actually sends us one. 5482 - */ 5483 - return ERR_PTR(-EAGAIN); 5463 + /* 5464 + * Try for a write delegation first. RFC8881 section 10.4 says: 5465 + * 5466 + * "An OPEN_DELEGATE_WRITE delegation allows the client to handle, 5467 + * on its own, all opens." 5468 + * 5469 + * Furthermore the client can use a write delegation for most READ 5470 + * operations as well, so we require a O_RDWR file here. 5471 + * 5472 + * Offer a write delegation in the case of a BOTH open, and ensure 5473 + * we get the O_RDWR descriptor. 5474 + */ 5475 + if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) { 5476 + nf = find_rw_file(fp); 5477 + dl_type = NFS4_OPEN_DELEGATE_WRITE; 5484 5478 } 5479 + 5480 + /* 5481 + * If the file is being opened O_RDONLY or we couldn't get a O_RDWR 5482 + * file for some reason, then try for a read delegation instead. 5483 + */ 5484 + if (!nf && (open->op_share_access & NFS4_SHARE_ACCESS_READ)) { 5485 + nf = find_readable_file(fp); 5486 + dl_type = NFS4_OPEN_DELEGATE_READ; 5487 + } 5488 + 5489 + if (!nf) 5490 + return ERR_PTR(-EAGAIN); 5491 + 5485 5492 spin_lock(&state_lock); 5486 5493 spin_lock(&fp->fi_lock); 5487 5494 if (nfs4_delegation_exists(clp, fp)) ··· 5524 5491 return ERR_PTR(status); 5525 5492 5526 5493 status = -ENOMEM; 5527 - dp = alloc_init_deleg(clp, fp, odstate); 5494 + dp = alloc_init_deleg(clp, fp, odstate, dl_type); 5528 5495 if (!dp) 5529 5496 goto out_delegees; 5530 5497 5531 - fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ); 5498 + fl = nfs4_alloc_init_lease(dp, dl_type); 5532 5499 if (!fl) 5533 5500 goto out_clnt_odstate; 5534 5501 ··· 5601 5568 } 5602 5569 5603 5570 /* 5604 - * Attempt to hand out a delegation. 5571 + * The Linux NFS server does not offer write delegations to NFSv4.0 5572 + * clients in order to avoid conflicts between write delegations and 5573 + * GETATTRs requesting CHANGE or SIZE attributes. 5605 5574 * 5606 - * Note we don't support write delegations, and won't until the vfs has 5607 - * proper support for them. 5575 + * With NFSv4.1 and later minorversions, the SEQUENCE operation that 5576 + * begins each COMPOUND contains a client ID. Delegation recall can 5577 + * be avoided when the server recognizes the client sending a 5578 + * GETATTR also holds write delegation it conflicts with. 5579 + * 5580 + * However, the NFSv4.0 protocol does not enable a server to 5581 + * determine that a GETATTR originated from the client holding the 5582 + * conflicting delegation versus coming from some other client. Per 5583 + * RFC 7530 Section 16.7.5, the server must recall or send a 5584 + * CB_GETATTR even when the GETATTR originates from the client that 5585 + * holds the conflicting delegation. 5586 + * 5587 + * An NFSv4.0 client can trigger a pathological situation if it 5588 + * always sends a DELEGRETURN preceded by a conflicting GETATTR in 5589 + * the same COMPOUND. COMPOUND execution will always stop at the 5590 + * GETATTR and the DELEGRETURN will never get executed. The server 5591 + * eventually revokes the delegation, which can result in loss of 5592 + * open or lock state. 5608 5593 */ 5609 5594 static void 5610 5595 nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, ··· 5641 5590 case NFS4_OPEN_CLAIM_PREVIOUS: 5642 5591 if (!cb_up) 5643 5592 open->op_recall = 1; 5644 - if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ) 5645 - goto out_no_deleg; 5646 5593 break; 5647 5594 case NFS4_OPEN_CLAIM_NULL: 5648 5595 parent = currentfh; ··· 5655 5606 goto out_no_deleg; 5656 5607 if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) 5657 5608 goto out_no_deleg; 5609 + if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE && 5610 + !clp->cl_minorversion) 5611 + goto out_no_deleg; 5658 5612 break; 5659 5613 default: 5660 5614 goto out_no_deleg; ··· 5668 5616 5669 5617 memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); 5670 5618 5671 - trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid); 5672 - open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; 5619 + if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) { 5620 + open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE; 5621 + trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid); 5622 + } else { 5623 + open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; 5624 + trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid); 5625 + } 5673 5626 nfs4_put_stid(&dp->dl_stid); 5674 5627 return; 5675 5628 out_no_deleg:
+1
fs/nfsd/trace.h
··· 607 607 608 608 DEFINE_STATEID_EVENT(open); 609 609 DEFINE_STATEID_EVENT(deleg_read); 610 + DEFINE_STATEID_EVENT(deleg_write); 610 611 DEFINE_STATEID_EVENT(deleg_return); 611 612 DEFINE_STATEID_EVENT(deleg_recall); 612 613