Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

NFS: Fix size read races in truncate, fallocate and copy offload

If the pre-operation file size is read before locking the inode and
quiescing O_DIRECT writes, then nfs_truncate_last_folio() might end up
overwriting valid file data.

Fixes: b1817b18ff20 ("NFS: Protect against 'eof page pollution'")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>

+27 -14
+6 -4
fs/nfs/inode.c
··· 716 716 { 717 717 struct inode *inode = d_inode(dentry); 718 718 struct nfs_fattr *fattr; 719 - loff_t oldsize = i_size_read(inode); 719 + loff_t oldsize; 720 720 int error = 0; 721 721 kuid_t task_uid = current_fsuid(); 722 722 kuid_t owner_uid = inode->i_uid; ··· 727 727 if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) 728 728 attr->ia_valid &= ~ATTR_MODE; 729 729 730 + if (S_ISREG(inode->i_mode)) 731 + nfs_file_block_o_direct(NFS_I(inode)); 732 + 733 + oldsize = i_size_read(inode); 730 734 if (attr->ia_valid & ATTR_SIZE) { 731 735 BUG_ON(!S_ISREG(inode->i_mode)); 732 736 ··· 778 774 trace_nfs_setattr_enter(inode); 779 775 780 776 /* Write all dirty data */ 781 - if (S_ISREG(inode->i_mode)) { 782 - nfs_file_block_o_direct(NFS_I(inode)); 777 + if (S_ISREG(inode->i_mode)) 783 778 nfs_sync_inode(inode); 784 - } 785 779 786 780 fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); 787 781 if (fattr == NULL) {
+2
fs/nfs/io.c
··· 84 84 nfs_file_block_o_direct(NFS_I(inode)); 85 85 return err; 86 86 } 87 + EXPORT_SYMBOL_GPL(nfs_start_io_write); 87 88 88 89 /** 89 90 * nfs_end_io_write - declare that the buffered write operation is done ··· 98 97 { 99 98 up_write(&inode->i_rwsem); 100 99 } 100 + EXPORT_SYMBOL_GPL(nfs_end_io_write); 101 101 102 102 /* Call with exclusively locked inode->i_rwsem */ 103 103 static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode)
+19 -10
fs/nfs/nfs42proc.c
··· 114 114 exception.inode = inode; 115 115 exception.state = lock->open_context->state; 116 116 117 - nfs_file_block_o_direct(NFS_I(inode)); 118 117 err = nfs_sync_inode(inode); 119 118 if (err) 120 119 goto out; ··· 137 138 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE], 138 139 }; 139 140 struct inode *inode = file_inode(filep); 140 - loff_t oldsize = i_size_read(inode); 141 + loff_t oldsize; 141 142 int err; 142 143 143 144 if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE)) 144 145 return -EOPNOTSUPP; 145 146 146 - inode_lock(inode); 147 + err = nfs_start_io_write(inode); 148 + if (err) 149 + return err; 150 + 151 + oldsize = i_size_read(inode); 147 152 148 153 err = nfs42_proc_fallocate(&msg, filep, offset, len); 149 154 ··· 158 155 NFS_SERVER(inode)->caps &= ~(NFS_CAP_ALLOCATE | 159 156 NFS_CAP_ZERO_RANGE); 160 157 161 - inode_unlock(inode); 158 + nfs_end_io_write(inode); 162 159 return err; 163 160 } 164 161 ··· 173 170 if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE)) 174 171 return -EOPNOTSUPP; 175 172 176 - inode_lock(inode); 173 + err = nfs_start_io_write(inode); 174 + if (err) 175 + return err; 177 176 178 177 err = nfs42_proc_fallocate(&msg, filep, offset, len); 179 178 if (err == 0) ··· 184 179 NFS_SERVER(inode)->caps &= ~(NFS_CAP_DEALLOCATE | 185 180 NFS_CAP_ZERO_RANGE); 186 181 187 - inode_unlock(inode); 182 + nfs_end_io_write(inode); 188 183 return err; 189 184 } 190 185 ··· 194 189 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ZERO_RANGE], 195 190 }; 196 191 struct inode *inode = file_inode(filep); 197 - loff_t oldsize = i_size_read(inode); 192 + loff_t oldsize; 198 193 int err; 199 194 200 195 if (!nfs_server_capable(inode, NFS_CAP_ZERO_RANGE)) 201 196 return -EOPNOTSUPP; 202 197 203 - inode_lock(inode); 198 + err = nfs_start_io_write(inode); 199 + if (err) 200 + return err; 204 201 202 + oldsize = i_size_read(inode); 205 203 err = nfs42_proc_fallocate(&msg, filep, offset, len); 206 204 if (err == 0) { 207 205 nfs_truncate_last_folio(inode->i_mapping, oldsize, ··· 213 205 } else if (err == -EOPNOTSUPP) 214 206 NFS_SERVER(inode)->caps &= ~NFS_CAP_ZERO_RANGE; 215 207 216 - inode_unlock(inode); 208 + nfs_end_io_write(inode); 217 209 return err; 218 210 } 219 211 ··· 424 416 struct nfs_server *src_server = NFS_SERVER(src_inode); 425 417 loff_t pos_src = args->src_pos; 426 418 loff_t pos_dst = args->dst_pos; 427 - loff_t oldsize_dst = i_size_read(dst_inode); 419 + loff_t oldsize_dst; 428 420 size_t count = args->count; 429 421 ssize_t status; 430 422 ··· 469 461 &src_lock->open_context->state->flags); 470 462 set_bit(NFS_CLNT_DST_SSC_COPY_STATE, 471 463 &dst_lock->open_context->state->flags); 464 + oldsize_dst = i_size_read(dst_inode); 472 465 473 466 status = nfs4_call_sync(dst_server->client, dst_server, &msg, 474 467 &args->seq_args, &res->seq_res, 0);