Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nfs-for-6.6-2' of git://git.linux-nfs.org/projects/anna/linux-nfs

Pull NFS client fixes from Anna Schumaker:
"Various O_DIRECT related fixes from Trond:
- Error handling
- Locking issues
- Use the correct commit info for joining page groups
- Fixes for rescheduling IO

Sunrpc bad verifier fixes:
- Report EINVAL errors from connect()
- Revalidate creds that the server has rejected
- Revert "SUNRPC: Fail faster on bad verifier"

Misc:
- Fix pNFS session trunking when MDS=DS
- Fix zero-value filehandles for post-open getattr operations
- Fix compiler warning about tautological comparisons
- Revert 'SUNRPC: clean up integer overflow check' before Trond's fix"

* tag 'nfs-for-6.6-2' of git://git.linux-nfs.org/projects/anna/linux-nfs:
SUNRPC: Silence compiler complaints about tautological comparisons
Revert "SUNRPC: clean up integer overflow check"
NFSv4.1: fix zero value filehandle in post open getattr
NFSv4.1: fix pnfs MDS=DS session trunking
Revert "SUNRPC: Fail faster on bad verifier"
SUNRPC: Mark the cred for revalidation if the server rejects it
NFS/pNFS: Report EINVAL errors from connect() to the server
NFS: More fixes for nfs_direct_write_reschedule_io()
NFS: Use the correct commit info in nfs_join_page_group()
NFS: More O_DIRECT accounting fixes for error paths
NFS: Fix O_DIRECT locking issues
NFS: Fix error handling for O_DIRECT write scheduling

+132 -61
+93 -41
fs/nfs/direct.c
··· 93 93 dreq->max_count = dreq_len; 94 94 if (dreq->count > dreq_len) 95 95 dreq->count = dreq_len; 96 - 97 - if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) 98 - dreq->error = hdr->error; 99 - else /* Clear outstanding error if this is EOF */ 100 - dreq->error = 0; 101 96 } 97 + 98 + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && !dreq->error) 99 + dreq->error = hdr->error; 102 100 } 103 101 104 102 static void ··· 116 118 117 119 if (dreq->count < dreq_len) 118 120 dreq->count = dreq_len; 121 + } 122 + 123 + static void nfs_direct_truncate_request(struct nfs_direct_req *dreq, 124 + struct nfs_page *req) 125 + { 126 + loff_t offs = req_offset(req); 127 + size_t req_start = (size_t)(offs - dreq->io_start); 128 + 129 + if (req_start < dreq->max_count) 130 + dreq->max_count = req_start; 131 + if (req_start < dreq->count) 132 + dreq->count = req_start; 119 133 } 120 134 121 135 /** ··· 498 488 kref_get(&head->wb_kref); 499 489 } 500 490 501 - static void nfs_direct_join_group(struct list_head *list, struct inode *inode) 491 + static void nfs_direct_join_group(struct list_head *list, 492 + struct nfs_commit_info *cinfo, 493 + struct inode *inode) 502 494 { 503 495 struct nfs_page *req, *subreq; 504 496 ··· 522 510 nfs_release_request(subreq); 523 511 } 524 512 } while ((subreq = subreq->wb_this_page) != req); 525 - nfs_join_page_group(req, inode); 513 + nfs_join_page_group(req, cinfo, inode); 526 514 } 527 515 } 528 516 ··· 540 528 static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) 541 529 { 542 530 struct nfs_pageio_descriptor desc; 543 - struct nfs_page *req, *tmp; 531 + struct nfs_page *req; 544 532 LIST_HEAD(reqs); 545 533 struct nfs_commit_info cinfo; 546 - LIST_HEAD(failed); 547 534 548 535 nfs_init_cinfo_from_dreq(&cinfo, dreq); 549 536 nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); 550 537 551 - nfs_direct_join_group(&reqs, dreq->inode); 538 + nfs_direct_join_group(&reqs, &cinfo, dreq->inode); 552 539 553 - dreq->count = 0; 554 - dreq->max_count = 0; 555 - list_for_each_entry(req, &reqs, wb_list) 556 - dreq->max_count += req->wb_bytes; 557 540 nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo); 558 541 get_dreq(dreq); 559 542 ··· 556 549 &nfs_direct_write_completion_ops); 557 550 desc.pg_dreq = dreq; 558 551 559 - list_for_each_entry_safe(req, tmp, &reqs, wb_list) { 552 + while (!list_empty(&reqs)) { 553 + req = nfs_list_entry(reqs.next); 560 554 /* Bump the transmission count */ 561 555 req->wb_nio++; 562 556 if (!nfs_pageio_add_request(&desc, req)) { 563 - nfs_list_move_request(req, &failed); 564 - spin_lock(&cinfo.inode->i_lock); 565 - dreq->flags = 0; 566 - if (desc.pg_error < 0) 557 + spin_lock(&dreq->lock); 558 + if (dreq->error < 0) { 559 + desc.pg_error = dreq->error; 560 + } else if (desc.pg_error != -EAGAIN) { 561 + dreq->flags = 0; 562 + if (!desc.pg_error) 563 + desc.pg_error = -EIO; 567 564 dreq->error = desc.pg_error; 568 - else 569 - dreq->error = -EIO; 570 - spin_unlock(&cinfo.inode->i_lock); 565 + } else 566 + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 567 + spin_unlock(&dreq->lock); 568 + break; 571 569 } 572 570 nfs_release_request(req); 573 571 } 574 572 nfs_pageio_complete(&desc); 575 573 576 - while (!list_empty(&failed)) { 577 - req = nfs_list_entry(failed.next); 574 + while (!list_empty(&reqs)) { 575 + req = nfs_list_entry(reqs.next); 578 576 nfs_list_remove_request(req); 579 577 nfs_unlock_and_release_request(req); 578 + if (desc.pg_error == -EAGAIN) { 579 + nfs_mark_request_commit(req, NULL, &cinfo, 0); 580 + } else { 581 + spin_lock(&dreq->lock); 582 + nfs_direct_truncate_request(dreq, req); 583 + spin_unlock(&dreq->lock); 584 + nfs_release_request(req); 585 + } 580 586 } 581 587 582 588 if (put_dreq(dreq)) ··· 609 589 if (status < 0) { 610 590 /* Errors in commit are fatal */ 611 591 dreq->error = status; 612 - dreq->max_count = 0; 613 - dreq->count = 0; 614 592 dreq->flags = NFS_ODIRECT_DONE; 615 593 } else { 616 594 status = dreq->error; ··· 619 601 while (!list_empty(&data->pages)) { 620 602 req = nfs_list_entry(data->pages.next); 621 603 nfs_list_remove_request(req); 622 - if (status >= 0 && !nfs_write_match_verf(verf, req)) { 604 + if (status < 0) { 605 + spin_lock(&dreq->lock); 606 + nfs_direct_truncate_request(dreq, req); 607 + spin_unlock(&dreq->lock); 608 + nfs_release_request(req); 609 + } else if (!nfs_write_match_verf(verf, req)) { 623 610 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 624 611 /* 625 612 * Despite the reboot, the write was successful, ··· 632 609 */ 633 610 req->wb_nio = 0; 634 611 nfs_mark_request_commit(req, NULL, &cinfo, 0); 635 - } else /* Error or match */ 612 + } else 636 613 nfs_release_request(req); 637 614 nfs_unlock_and_release_request(req); 638 615 } ··· 685 662 while (!list_empty(&reqs)) { 686 663 req = nfs_list_entry(reqs.next); 687 664 nfs_list_remove_request(req); 665 + nfs_direct_truncate_request(dreq, req); 688 666 nfs_release_request(req); 689 667 nfs_unlock_and_release_request(req); 690 668 } ··· 735 711 } 736 712 737 713 nfs_direct_count_bytes(dreq, hdr); 738 - if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags)) { 714 + if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags) && 715 + !test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { 739 716 if (!dreq->flags) 740 717 dreq->flags = NFS_ODIRECT_DO_COMMIT; 741 718 flags = dreq->flags; ··· 780 755 static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) 781 756 { 782 757 struct nfs_direct_req *dreq = hdr->dreq; 758 + struct nfs_page *req; 759 + struct nfs_commit_info cinfo; 783 760 784 761 trace_nfs_direct_write_reschedule_io(dreq); 785 762 763 + nfs_init_cinfo_from_dreq(&cinfo, dreq); 786 764 spin_lock(&dreq->lock); 787 - if (dreq->error == 0) { 765 + if (dreq->error == 0) 788 766 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 789 - /* fake unstable write to let common nfs resend pages */ 790 - hdr->verf.committed = NFS_UNSTABLE; 791 - hdr->good_bytes = hdr->args.offset + hdr->args.count - 792 - hdr->io_start; 793 - } 767 + set_bit(NFS_IOHDR_REDO, &hdr->flags); 794 768 spin_unlock(&dreq->lock); 769 + while (!list_empty(&hdr->pages)) { 770 + req = nfs_list_entry(hdr->pages.next); 771 + nfs_list_remove_request(req); 772 + nfs_unlock_request(req); 773 + nfs_mark_request_commit(req, NULL, &cinfo, 0); 774 + } 795 775 } 796 776 797 777 static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { ··· 824 794 { 825 795 struct nfs_pageio_descriptor desc; 826 796 struct inode *inode = dreq->inode; 797 + struct nfs_commit_info cinfo; 827 798 ssize_t result = 0; 828 799 size_t requested_bytes = 0; 829 800 size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); 801 + bool defer = false; 830 802 831 803 trace_nfs_direct_write_schedule_iovec(dreq); 832 804 ··· 869 837 break; 870 838 } 871 839 872 - nfs_lock_request(req); 873 - if (!nfs_pageio_add_request(&desc, req)) { 874 - result = desc.pg_error; 875 - nfs_unlock_and_release_request(req); 876 - break; 877 - } 878 840 pgbase = 0; 879 841 bytes -= req_len; 880 842 requested_bytes += req_len; 881 843 pos += req_len; 882 844 dreq->bytes_left -= req_len; 845 + 846 + if (defer) { 847 + nfs_mark_request_commit(req, NULL, &cinfo, 0); 848 + continue; 849 + } 850 + 851 + nfs_lock_request(req); 852 + if (nfs_pageio_add_request(&desc, req)) 853 + continue; 854 + 855 + /* Exit on hard errors */ 856 + if (desc.pg_error < 0 && desc.pg_error != -EAGAIN) { 857 + result = desc.pg_error; 858 + nfs_unlock_and_release_request(req); 859 + break; 860 + } 861 + 862 + /* If the error is soft, defer remaining requests */ 863 + nfs_init_cinfo_from_dreq(&cinfo, dreq); 864 + spin_lock(&dreq->lock); 865 + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 866 + spin_unlock(&dreq->lock); 867 + nfs_unlock_request(req); 868 + nfs_mark_request_commit(req, NULL, &cinfo, 0); 869 + desc.pg_error = 0; 870 + defer = true; 883 871 } 884 872 nfs_direct_release_pages(pagevec, npages); 885 873 kvfree(pagevec);
+1
fs/nfs/flexfilelayout/flexfilelayout.c
··· 1235 1235 case -EPFNOSUPPORT: 1236 1236 case -EPROTONOSUPPORT: 1237 1237 case -EOPNOTSUPP: 1238 + case -EINVAL: 1238 1239 case -ECONNREFUSED: 1239 1240 case -ECONNRESET: 1240 1241 case -EHOSTDOWN:
+5 -1
fs/nfs/nfs4client.c
··· 417 417 .net = old->cl_net, 418 418 .servername = old->cl_hostname, 419 419 }; 420 + int max_connect = test_bit(NFS_CS_PNFS, &clp->cl_flags) ? 421 + clp->cl_max_connect : old->cl_max_connect; 420 422 421 423 if (clp->cl_proto != old->cl_proto) 422 424 return; ··· 432 430 xprt_args.addrlen = clp_salen; 433 431 434 432 rpc_clnt_add_xprt(old->cl_rpcclient, &xprt_args, 435 - rpc_clnt_test_and_add_xprt, NULL); 433 + rpc_clnt_test_and_add_xprt, &max_connect); 436 434 } 437 435 438 436 /** ··· 1012 1010 __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); 1013 1011 1014 1012 __set_bit(NFS_CS_DS, &cl_init.init_flags); 1013 + __set_bit(NFS_CS_PNFS, &cl_init.init_flags); 1014 + cl_init.max_connect = NFS_MAX_TRANSPORTS; 1015 1015 /* 1016 1016 * Set an authflavor equual to the MDS value. Use the MDS nfs_client 1017 1017 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
+5 -1
fs/nfs/nfs4proc.c
··· 2703 2703 return status; 2704 2704 } 2705 2705 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) { 2706 + struct nfs_fh *fh = &o_res->fh; 2707 + 2706 2708 nfs4_sequence_free_slot(&o_res->seq_res); 2707 - nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, NULL); 2709 + if (o_arg->claim == NFS4_OPEN_CLAIM_FH) 2710 + fh = NFS_FH(d_inode(data->dentry)); 2711 + nfs4_proc_getattr(server, fh, o_res->f_attr, NULL); 2708 2712 } 2709 2713 return 0; 2710 2714 }
+12 -11
fs/nfs/write.c
··· 59 59 static const struct nfs_commit_completion_ops nfs_commit_completion_ops; 60 60 static const struct nfs_rw_ops nfs_rw_write_ops; 61 61 static void nfs_inode_remove_request(struct nfs_page *req); 62 - static void nfs_clear_request_commit(struct nfs_page *req); 62 + static void nfs_clear_request_commit(struct nfs_commit_info *cinfo, 63 + struct nfs_page *req); 63 64 static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, 64 65 struct inode *inode); 65 66 static struct nfs_page * ··· 503 502 * the (former) group. All subrequests are removed from any write or commit 504 503 * lists, unlinked from the group and destroyed. 505 504 */ 506 - void 507 - nfs_join_page_group(struct nfs_page *head, struct inode *inode) 505 + void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, 506 + struct inode *inode) 508 507 { 509 508 struct nfs_page *subreq; 510 509 struct nfs_page *destroy_list = NULL; ··· 534 533 * Commit list removal accounting is done after locks are dropped */ 535 534 subreq = head; 536 535 do { 537 - nfs_clear_request_commit(subreq); 536 + nfs_clear_request_commit(cinfo, subreq); 538 537 subreq = subreq->wb_this_page; 539 538 } while (subreq != head); 540 539 ··· 567 566 { 568 567 struct inode *inode = folio_file_mapping(folio)->host; 569 568 struct nfs_page *head; 569 + struct nfs_commit_info cinfo; 570 570 int ret; 571 571 572 + nfs_init_cinfo_from_inode(&cinfo, inode); 572 573 /* 573 574 * A reference is taken only on the head request which acts as a 574 575 * reference to the whole page group - the group will not be destroyed ··· 587 584 return ERR_PTR(ret); 588 585 } 589 586 590 - nfs_join_page_group(head, inode); 587 + nfs_join_page_group(head, &cinfo, inode); 591 588 592 589 return head; 593 590 } ··· 958 955 } 959 956 960 957 /* Called holding the request lock on @req */ 961 - static void 962 - nfs_clear_request_commit(struct nfs_page *req) 958 + static void nfs_clear_request_commit(struct nfs_commit_info *cinfo, 959 + struct nfs_page *req) 963 960 { 964 961 if (test_bit(PG_CLEAN, &req->wb_flags)) { 965 962 struct nfs_open_context *ctx = nfs_req_openctx(req); 966 963 struct inode *inode = d_inode(ctx->dentry); 967 - struct nfs_commit_info cinfo; 968 964 969 - nfs_init_cinfo_from_inode(&cinfo, inode); 970 965 mutex_lock(&NFS_I(inode)->commit_mutex); 971 - if (!pnfs_clear_request_commit(req, &cinfo)) { 972 - nfs_request_remove_commit_list(req, &cinfo); 966 + if (!pnfs_clear_request_commit(req, cinfo)) { 967 + nfs_request_remove_commit_list(req, cinfo); 973 968 } 974 969 mutex_unlock(&NFS_I(inode)->commit_mutex); 975 970 nfs_folio_clear_commit(nfs_page_to_folio(req));
+1
include/linux/nfs_fs_sb.h
··· 48 48 #define NFS_CS_NOPING 6 /* - don't ping on connect */ 49 49 #define NFS_CS_DS 7 /* - Server is a DS */ 50 50 #define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ 51 + #define NFS_CS_PNFS 9 /* - Server used for pnfs */ 51 52 struct sockaddr_storage cl_addr; /* server identifier */ 52 53 size_t cl_addrlen; 53 54 char * cl_hostname; /* hostname of server */
+3 -1
include/linux/nfs_page.h
··· 157 157 extern void nfs_unlock_and_release_request(struct nfs_page *); 158 158 extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req); 159 159 extern int nfs_page_group_lock_subrequests(struct nfs_page *head); 160 - extern void nfs_join_page_group(struct nfs_page *head, struct inode *inode); 160 + extern void nfs_join_page_group(struct nfs_page *head, 161 + struct nfs_commit_info *cinfo, 162 + struct inode *inode); 161 163 extern int nfs_page_group_lock(struct nfs_page *); 162 164 extern void nfs_page_group_unlock(struct nfs_page *); 163 165 extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
+3 -1
include/linux/sunrpc/xdr.h
··· 779 779 780 780 if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) 781 781 return -EBADMSG; 782 - p = xdr_inline_decode(xdr, size_mul(len, sizeof(*p))); 782 + if (U32_MAX >= SIZE_MAX / sizeof(*p) && len > SIZE_MAX / sizeof(*p)) 783 + return -EBADMSG; 784 + p = xdr_inline_decode(xdr, len * sizeof(*p)); 783 785 if (unlikely(!p)) 784 786 return -EBADMSG; 785 787 if (array == NULL)
+9 -5
net/sunrpc/clnt.c
··· 2725 2725 2726 2726 out_verifier: 2727 2727 trace_rpc_bad_verifier(task); 2728 - goto out_err; 2728 + goto out_garbage; 2729 2729 2730 2730 out_msg_denied: 2731 2731 error = -EACCES; ··· 2751 2751 case rpc_autherr_rejectedverf: 2752 2752 case rpcsec_gsserr_credproblem: 2753 2753 case rpcsec_gsserr_ctxproblem: 2754 + rpcauth_invalcred(task); 2754 2755 if (!task->tk_cred_retry) 2755 2756 break; 2756 2757 task->tk_cred_retry--; ··· 2908 2907 * @clnt: pointer to struct rpc_clnt 2909 2908 * @xps: pointer to struct rpc_xprt_switch, 2910 2909 * @xprt: pointer struct rpc_xprt 2911 - * @dummy: unused 2910 + * @in_max_connect: pointer to the max_connect value for the passed in xprt transport 2912 2911 */ 2913 2912 int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, 2914 2913 struct rpc_xprt_switch *xps, struct rpc_xprt *xprt, 2915 - void *dummy) 2914 + void *in_max_connect) 2916 2915 { 2917 2916 struct rpc_cb_add_xprt_calldata *data; 2918 2917 struct rpc_task *task; 2918 + int max_connect = clnt->cl_max_connect; 2919 2919 2920 - if (xps->xps_nunique_destaddr_xprts + 1 > clnt->cl_max_connect) { 2920 + if (in_max_connect) 2921 + max_connect = *(int *)in_max_connect; 2922 + if (xps->xps_nunique_destaddr_xprts + 1 > max_connect) { 2921 2923 rcu_read_lock(); 2922 2924 pr_warn("SUNRPC: reached max allowed number (%d) did not add " 2923 - "transport to server: %s\n", clnt->cl_max_connect, 2925 + "transport to server: %s\n", max_connect, 2924 2926 rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); 2925 2927 rcu_read_unlock(); 2926 2928 return -EINVAL;