NFS: nfs_lock_and_join_requests and nfs_scan_commit_list can deadlock

Since the commit list is not ordered, it is possible for nfs_scan_commit_list
to hold a request that nfs_lock_and_join_requests() is waiting for, while
at the same time trying to grab a request that nfs_lock_and_join_requests
already holds.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>

+22 -9
+11 -5
fs/nfs/pnfs_nfs.c
··· 91 pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst, 92 struct nfs_commit_info *cinfo, int max) 93 { 94 - struct nfs_page *req; 95 int ret = 0; 96 97 - while(!list_empty(src)) { 98 - req = list_first_entry(src, struct nfs_page, wb_list); 99 - 100 kref_get(&req->wb_kref); 101 if (!nfs_lock_request(req)) { 102 int status; 103 mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 104 status = nfs_wait_on_request(req); 105 nfs_release_request(req); 106 mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 107 if (status < 0) 108 break; 109 - continue; 110 } 111 nfs_request_remove_commit_list(req, cinfo); 112 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
··· 91 pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst, 92 struct nfs_commit_info *cinfo, int max) 93 { 94 + struct nfs_page *req, *tmp; 95 int ret = 0; 96 97 + restart: 98 + list_for_each_entry_safe(req, tmp, src, wb_list) { 99 kref_get(&req->wb_kref); 100 if (!nfs_lock_request(req)) { 101 int status; 102 + 103 + /* Prevent deadlock with nfs_lock_and_join_requests */ 104 + if (!list_empty(dst)) { 105 + nfs_release_request(req); 106 + continue; 107 + } 108 + /* Ensure we make progress to prevent livelock */ 109 mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 110 status = nfs_wait_on_request(req); 111 nfs_release_request(req); 112 mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 113 if (status < 0) 114 break; 115 + goto restart; 116 } 117 nfs_request_remove_commit_list(req, cinfo); 118 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
+11 -4
fs/nfs/write.c
··· 1028 nfs_scan_commit_list(struct list_head *src, struct list_head *dst, 1029 struct nfs_commit_info *cinfo, int max) 1030 { 1031 - struct nfs_page *req; 1032 int ret = 0; 1033 1034 - while(!list_empty(src)) { 1035 - req = list_first_entry(src, struct nfs_page, wb_list); 1036 kref_get(&req->wb_kref); 1037 if (!nfs_lock_request(req)) { 1038 int status; 1039 mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 1040 status = nfs_wait_on_request(req); 1041 nfs_release_request(req); 1042 mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 1043 if (status < 0) 1044 break; 1045 - continue; 1046 } 1047 nfs_request_remove_commit_list(req, cinfo); 1048 nfs_list_add_request(req, dst);
··· 1028 nfs_scan_commit_list(struct list_head *src, struct list_head *dst, 1029 struct nfs_commit_info *cinfo, int max) 1030 { 1031 + struct nfs_page *req, *tmp; 1032 int ret = 0; 1033 1034 + restart: 1035 + list_for_each_entry_safe(req, tmp, src, wb_list) { 1036 kref_get(&req->wb_kref); 1037 if (!nfs_lock_request(req)) { 1038 int status; 1039 + 1040 + /* Prevent deadlock with nfs_lock_and_join_requests */ 1041 + if (!list_empty(dst)) { 1042 + nfs_release_request(req); 1043 + continue; 1044 + } 1045 + /* Ensure we make progress to prevent livelock */ 1046 mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 1047 status = nfs_wait_on_request(req); 1048 nfs_release_request(req); 1049 mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 1050 if (status < 0) 1051 break; 1052 + goto restart; 1053 } 1054 nfs_request_remove_commit_list(req, cinfo); 1055 nfs_list_add_request(req, dst);