Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * fs/cifs/file.c
3 *
4 * vfs operations that deal with files
5 *
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
9 *
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24#include <linux/fs.h>
25#include <linux/backing-dev.h>
26#include <linux/stat.h>
27#include <linux/fcntl.h>
28#include <linux/pagemap.h>
29#include <linux/pagevec.h>
30#include <linux/writeback.h>
31#include <linux/task_io_accounting_ops.h>
32#include <linux/delay.h>
33#include <linux/mount.h>
34#include <linux/slab.h>
35#include <linux/swap.h>
36#include <linux/mm.h>
37#include <asm/div64.h>
38#include "cifsfs.h"
39#include "cifspdu.h"
40#include "cifsglob.h"
41#include "cifsproto.h"
42#include "cifs_unicode.h"
43#include "cifs_debug.h"
44#include "cifs_fs_sb.h"
45#include "fscache.h"
46#include "smbdirect.h"
47
48static inline int cifs_convert_flags(unsigned int flags)
49{
50 if ((flags & O_ACCMODE) == O_RDONLY)
51 return GENERIC_READ;
52 else if ((flags & O_ACCMODE) == O_WRONLY)
53 return GENERIC_WRITE;
54 else if ((flags & O_ACCMODE) == O_RDWR) {
55 /* GENERIC_ALL is too much permission to request
56 can cause unnecessary access denied on create */
57 /* return GENERIC_ALL; */
58 return (GENERIC_READ | GENERIC_WRITE);
59 }
60
61 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63 FILE_READ_DATA);
64}
65
66static u32 cifs_posix_convert_flags(unsigned int flags)
67{
68 u32 posix_flags = 0;
69
70 if ((flags & O_ACCMODE) == O_RDONLY)
71 posix_flags = SMB_O_RDONLY;
72 else if ((flags & O_ACCMODE) == O_WRONLY)
73 posix_flags = SMB_O_WRONLY;
74 else if ((flags & O_ACCMODE) == O_RDWR)
75 posix_flags = SMB_O_RDWR;
76
77 if (flags & O_CREAT) {
78 posix_flags |= SMB_O_CREAT;
79 if (flags & O_EXCL)
80 posix_flags |= SMB_O_EXCL;
81 } else if (flags & O_EXCL)
82 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83 current->comm, current->tgid);
84
85 if (flags & O_TRUNC)
86 posix_flags |= SMB_O_TRUNC;
87 /* be safe and imply O_SYNC for O_DSYNC */
88 if (flags & O_DSYNC)
89 posix_flags |= SMB_O_SYNC;
90 if (flags & O_DIRECTORY)
91 posix_flags |= SMB_O_DIRECTORY;
92 if (flags & O_NOFOLLOW)
93 posix_flags |= SMB_O_NOFOLLOW;
94 if (flags & O_DIRECT)
95 posix_flags |= SMB_O_DIRECT;
96
97 return posix_flags;
98}
99
100static inline int cifs_get_disposition(unsigned int flags)
101{
102 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 return FILE_CREATE;
104 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105 return FILE_OVERWRITE_IF;
106 else if ((flags & O_CREAT) == O_CREAT)
107 return FILE_OPEN_IF;
108 else if ((flags & O_TRUNC) == O_TRUNC)
109 return FILE_OVERWRITE;
110 else
111 return FILE_OPEN;
112}
113
114int cifs_posix_open(char *full_path, struct inode **pinode,
115 struct super_block *sb, int mode, unsigned int f_flags,
116 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117{
118 int rc;
119 FILE_UNIX_BASIC_INFO *presp_data;
120 __u32 posix_flags = 0;
121 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122 struct cifs_fattr fattr;
123 struct tcon_link *tlink;
124 struct cifs_tcon *tcon;
125
126 cifs_dbg(FYI, "posix open %s\n", full_path);
127
128 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129 if (presp_data == NULL)
130 return -ENOMEM;
131
132 tlink = cifs_sb_tlink(cifs_sb);
133 if (IS_ERR(tlink)) {
134 rc = PTR_ERR(tlink);
135 goto posix_open_ret;
136 }
137
138 tcon = tlink_tcon(tlink);
139 mode &= ~current_umask();
140
141 posix_flags = cifs_posix_convert_flags(f_flags);
142 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143 poplock, full_path, cifs_sb->local_nls,
144 cifs_remap(cifs_sb));
145 cifs_put_tlink(tlink);
146
147 if (rc)
148 goto posix_open_ret;
149
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153 if (!pinode)
154 goto posix_open_ret; /* caller does not need info */
155
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
162 if (!*pinode) {
163 rc = -ENOMEM;
164 goto posix_open_ret;
165 }
166 } else {
167 cifs_fattr_to_inode(*pinode, &fattr);
168 }
169
170posix_open_ret:
171 kfree(presp_data);
172 return rc;
173}
174
175static int
176cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
179{
180 int rc;
181 int desired_access;
182 int disposition;
183 int create_options = CREATE_NOT_DIR;
184 FILE_ALL_INFO *buf;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
187
188 if (!server->ops->open)
189 return -ENOSYS;
190
191 desired_access = cifs_convert_flags(f_flags);
192
193/*********************************************************************
194 * open flag mapping table:
195 *
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
203 *
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
209 *?
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
216
217 disposition = cifs_get_disposition(f_flags);
218
219 /* BB pass O_SYNC flag through on file attributes .. BB */
220
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222 if (!buf)
223 return -ENOMEM;
224
225 if (backup_cred(cifs_sb))
226 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229 if (f_flags & O_SYNC)
230 create_options |= CREATE_WRITE_THROUGH;
231
232 if (f_flags & O_DIRECT)
233 create_options |= CREATE_NO_BUFFER;
234
235 oparms.tcon = tcon;
236 oparms.cifs_sb = cifs_sb;
237 oparms.desired_access = desired_access;
238 oparms.create_options = create_options;
239 oparms.disposition = disposition;
240 oparms.path = full_path;
241 oparms.fid = fid;
242 oparms.reconnect = false;
243
244 rc = server->ops->open(xid, &oparms, oplock, buf);
245
246 if (rc)
247 goto out;
248
249 if (tcon->unix_ext)
250 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
251 xid);
252 else
253 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254 xid, fid);
255
256out:
257 kfree(buf);
258 return rc;
259}
260
261static bool
262cifs_has_mand_locks(struct cifsInodeInfo *cinode)
263{
264 struct cifs_fid_locks *cur;
265 bool has_locks = false;
266
267 down_read(&cinode->lock_sem);
268 list_for_each_entry(cur, &cinode->llist, llist) {
269 if (!list_empty(&cur->locks)) {
270 has_locks = true;
271 break;
272 }
273 }
274 up_read(&cinode->lock_sem);
275 return has_locks;
276}
277
278struct cifsFileInfo *
279cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
280 struct tcon_link *tlink, __u32 oplock)
281{
282 struct dentry *dentry = file_dentry(file);
283 struct inode *inode = d_inode(dentry);
284 struct cifsInodeInfo *cinode = CIFS_I(inode);
285 struct cifsFileInfo *cfile;
286 struct cifs_fid_locks *fdlocks;
287 struct cifs_tcon *tcon = tlink_tcon(tlink);
288 struct TCP_Server_Info *server = tcon->ses->server;
289
290 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
291 if (cfile == NULL)
292 return cfile;
293
294 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
295 if (!fdlocks) {
296 kfree(cfile);
297 return NULL;
298 }
299
300 INIT_LIST_HEAD(&fdlocks->locks);
301 fdlocks->cfile = cfile;
302 cfile->llist = fdlocks;
303 down_write(&cinode->lock_sem);
304 list_add(&fdlocks->llist, &cinode->llist);
305 up_write(&cinode->lock_sem);
306
307 cfile->count = 1;
308 cfile->pid = current->tgid;
309 cfile->uid = current_fsuid();
310 cfile->dentry = dget(dentry);
311 cfile->f_flags = file->f_flags;
312 cfile->invalidHandle = false;
313 cfile->tlink = cifs_get_tlink(tlink);
314 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
315 mutex_init(&cfile->fh_mutex);
316 spin_lock_init(&cfile->file_info_lock);
317
318 cifs_sb_active(inode->i_sb);
319
320 /*
321 * If the server returned a read oplock and we have mandatory brlocks,
322 * set oplock level to None.
323 */
324 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
325 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
326 oplock = 0;
327 }
328
329 spin_lock(&tcon->open_file_lock);
330 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
331 oplock = fid->pending_open->oplock;
332 list_del(&fid->pending_open->olist);
333
334 fid->purge_cache = false;
335 server->ops->set_fid(cfile, fid, oplock);
336
337 list_add(&cfile->tlist, &tcon->openFileList);
338 atomic_inc(&tcon->num_local_opens);
339
340 /* if readable file instance put first in list*/
341 if (file->f_mode & FMODE_READ)
342 list_add(&cfile->flist, &cinode->openFileList);
343 else
344 list_add_tail(&cfile->flist, &cinode->openFileList);
345 spin_unlock(&tcon->open_file_lock);
346
347 if (fid->purge_cache)
348 cifs_zap_mapping(inode);
349
350 file->private_data = cfile;
351 return cfile;
352}
353
354struct cifsFileInfo *
355cifsFileInfo_get(struct cifsFileInfo *cifs_file)
356{
357 spin_lock(&cifs_file->file_info_lock);
358 cifsFileInfo_get_locked(cifs_file);
359 spin_unlock(&cifs_file->file_info_lock);
360 return cifs_file;
361}
362
363/**
364 * cifsFileInfo_put - release a reference of file priv data
365 *
366 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
367 */
368void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
369{
370 _cifsFileInfo_put(cifs_file, true);
371}
372
373/**
374 * _cifsFileInfo_put - release a reference of file priv data
375 *
376 * This may involve closing the filehandle @cifs_file out on the
377 * server. Must be called without holding tcon->open_file_lock and
378 * cifs_file->file_info_lock.
379 *
380 * If @wait_for_oplock_handler is true and we are releasing the last
381 * reference, wait for any running oplock break handler of the file
382 * and cancel any pending one. If calling this function from the
383 * oplock break handler, you need to pass false.
384 *
385 */
386void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
387{
388 struct inode *inode = d_inode(cifs_file->dentry);
389 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
390 struct TCP_Server_Info *server = tcon->ses->server;
391 struct cifsInodeInfo *cifsi = CIFS_I(inode);
392 struct super_block *sb = inode->i_sb;
393 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
394 struct cifsLockInfo *li, *tmp;
395 struct cifs_fid fid;
396 struct cifs_pending_open open;
397 bool oplock_break_cancelled;
398
399 spin_lock(&tcon->open_file_lock);
400
401 spin_lock(&cifs_file->file_info_lock);
402 if (--cifs_file->count > 0) {
403 spin_unlock(&cifs_file->file_info_lock);
404 spin_unlock(&tcon->open_file_lock);
405 return;
406 }
407 spin_unlock(&cifs_file->file_info_lock);
408
409 if (server->ops->get_lease_key)
410 server->ops->get_lease_key(inode, &fid);
411
412 /* store open in pending opens to make sure we don't miss lease break */
413 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
414
415 /* remove it from the lists */
416 list_del(&cifs_file->flist);
417 list_del(&cifs_file->tlist);
418 atomic_dec(&tcon->num_local_opens);
419
420 if (list_empty(&cifsi->openFileList)) {
421 cifs_dbg(FYI, "closing last open instance for inode %p\n",
422 d_inode(cifs_file->dentry));
423 /*
424 * In strict cache mode we need invalidate mapping on the last
425 * close because it may cause a error when we open this file
426 * again and get at least level II oplock.
427 */
428 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
429 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
430 cifs_set_oplock_level(cifsi, 0);
431 }
432
433 spin_unlock(&tcon->open_file_lock);
434
435 oplock_break_cancelled = wait_oplock_handler ?
436 cancel_work_sync(&cifs_file->oplock_break) : false;
437
438 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
439 struct TCP_Server_Info *server = tcon->ses->server;
440 unsigned int xid;
441
442 xid = get_xid();
443 if (server->ops->close)
444 server->ops->close(xid, tcon, &cifs_file->fid);
445 _free_xid(xid);
446 }
447
448 if (oplock_break_cancelled)
449 cifs_done_oplock_break(cifsi);
450
451 cifs_del_pending_open(&open);
452
453 /*
454 * Delete any outstanding lock records. We'll lose them when the file
455 * is closed anyway.
456 */
457 down_write(&cifsi->lock_sem);
458 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
459 list_del(&li->llist);
460 cifs_del_lock_waiters(li);
461 kfree(li);
462 }
463 list_del(&cifs_file->llist->llist);
464 kfree(cifs_file->llist);
465 up_write(&cifsi->lock_sem);
466
467 cifs_put_tlink(cifs_file->tlink);
468 dput(cifs_file->dentry);
469 cifs_sb_deactive(sb);
470 kfree(cifs_file);
471}
472
473int cifs_open(struct inode *inode, struct file *file)
474
475{
476 int rc = -EACCES;
477 unsigned int xid;
478 __u32 oplock;
479 struct cifs_sb_info *cifs_sb;
480 struct TCP_Server_Info *server;
481 struct cifs_tcon *tcon;
482 struct tcon_link *tlink;
483 struct cifsFileInfo *cfile = NULL;
484 char *full_path = NULL;
485 bool posix_open_ok = false;
486 struct cifs_fid fid;
487 struct cifs_pending_open open;
488
489 xid = get_xid();
490
491 cifs_sb = CIFS_SB(inode->i_sb);
492 tlink = cifs_sb_tlink(cifs_sb);
493 if (IS_ERR(tlink)) {
494 free_xid(xid);
495 return PTR_ERR(tlink);
496 }
497 tcon = tlink_tcon(tlink);
498 server = tcon->ses->server;
499
500 full_path = build_path_from_dentry(file_dentry(file));
501 if (full_path == NULL) {
502 rc = -ENOMEM;
503 goto out;
504 }
505
506 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
507 inode, file->f_flags, full_path);
508
509 if (file->f_flags & O_DIRECT &&
510 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
511 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
512 file->f_op = &cifs_file_direct_nobrl_ops;
513 else
514 file->f_op = &cifs_file_direct_ops;
515 }
516
517 if (server->oplocks)
518 oplock = REQ_OPLOCK;
519 else
520 oplock = 0;
521
522 if (!tcon->broken_posix_open && tcon->unix_ext &&
523 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
524 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
525 /* can not refresh inode info since size could be stale */
526 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
527 cifs_sb->mnt_file_mode /* ignored */,
528 file->f_flags, &oplock, &fid.netfid, xid);
529 if (rc == 0) {
530 cifs_dbg(FYI, "posix open succeeded\n");
531 posix_open_ok = true;
532 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
533 if (tcon->ses->serverNOS)
534 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
535 tcon->ses->serverName,
536 tcon->ses->serverNOS);
537 tcon->broken_posix_open = true;
538 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
539 (rc != -EOPNOTSUPP)) /* path not found or net err */
540 goto out;
541 /*
542 * Else fallthrough to retry open the old way on network i/o
543 * or DFS errors.
544 */
545 }
546
547 if (server->ops->get_lease_key)
548 server->ops->get_lease_key(inode, &fid);
549
550 cifs_add_pending_open(&fid, tlink, &open);
551
552 if (!posix_open_ok) {
553 if (server->ops->get_lease_key)
554 server->ops->get_lease_key(inode, &fid);
555
556 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
557 file->f_flags, &oplock, &fid, xid);
558 if (rc) {
559 cifs_del_pending_open(&open);
560 goto out;
561 }
562 }
563
564 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
565 if (cfile == NULL) {
566 if (server->ops->close)
567 server->ops->close(xid, tcon, &fid);
568 cifs_del_pending_open(&open);
569 rc = -ENOMEM;
570 goto out;
571 }
572
573 cifs_fscache_set_inode_cookie(inode, file);
574
575 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
576 /*
577 * Time to set mode which we can not set earlier due to
578 * problems creating new read-only files.
579 */
580 struct cifs_unix_set_info_args args = {
581 .mode = inode->i_mode,
582 .uid = INVALID_UID, /* no change */
583 .gid = INVALID_GID, /* no change */
584 .ctime = NO_CHANGE_64,
585 .atime = NO_CHANGE_64,
586 .mtime = NO_CHANGE_64,
587 .device = 0,
588 };
589 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
590 cfile->pid);
591 }
592
593out:
594 kfree(full_path);
595 free_xid(xid);
596 cifs_put_tlink(tlink);
597 return rc;
598}
599
600static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
601
602/*
603 * Try to reacquire byte range locks that were released when session
604 * to server was lost.
605 */
606static int
607cifs_relock_file(struct cifsFileInfo *cfile)
608{
609 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
610 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
611 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
612 int rc = 0;
613
614 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
615 if (cinode->can_cache_brlcks) {
616 /* can cache locks - no need to relock */
617 up_read(&cinode->lock_sem);
618 return rc;
619 }
620
621 if (cap_unix(tcon->ses) &&
622 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
623 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
624 rc = cifs_push_posix_locks(cfile);
625 else
626 rc = tcon->ses->server->ops->push_mand_locks(cfile);
627
628 up_read(&cinode->lock_sem);
629 return rc;
630}
631
632static int
633cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
634{
635 int rc = -EACCES;
636 unsigned int xid;
637 __u32 oplock;
638 struct cifs_sb_info *cifs_sb;
639 struct cifs_tcon *tcon;
640 struct TCP_Server_Info *server;
641 struct cifsInodeInfo *cinode;
642 struct inode *inode;
643 char *full_path = NULL;
644 int desired_access;
645 int disposition = FILE_OPEN;
646 int create_options = CREATE_NOT_DIR;
647 struct cifs_open_parms oparms;
648
649 xid = get_xid();
650 mutex_lock(&cfile->fh_mutex);
651 if (!cfile->invalidHandle) {
652 mutex_unlock(&cfile->fh_mutex);
653 rc = 0;
654 free_xid(xid);
655 return rc;
656 }
657
658 inode = d_inode(cfile->dentry);
659 cifs_sb = CIFS_SB(inode->i_sb);
660 tcon = tlink_tcon(cfile->tlink);
661 server = tcon->ses->server;
662
663 /*
664 * Can not grab rename sem here because various ops, including those
665 * that already have the rename sem can end up causing writepage to get
666 * called and if the server was down that means we end up here, and we
667 * can never tell if the caller already has the rename_sem.
668 */
669 full_path = build_path_from_dentry(cfile->dentry);
670 if (full_path == NULL) {
671 rc = -ENOMEM;
672 mutex_unlock(&cfile->fh_mutex);
673 free_xid(xid);
674 return rc;
675 }
676
677 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
678 inode, cfile->f_flags, full_path);
679
680 if (tcon->ses->server->oplocks)
681 oplock = REQ_OPLOCK;
682 else
683 oplock = 0;
684
685 if (tcon->unix_ext && cap_unix(tcon->ses) &&
686 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
687 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
688 /*
689 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
690 * original open. Must mask them off for a reopen.
691 */
692 unsigned int oflags = cfile->f_flags &
693 ~(O_CREAT | O_EXCL | O_TRUNC);
694
695 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
696 cifs_sb->mnt_file_mode /* ignored */,
697 oflags, &oplock, &cfile->fid.netfid, xid);
698 if (rc == 0) {
699 cifs_dbg(FYI, "posix reopen succeeded\n");
700 oparms.reconnect = true;
701 goto reopen_success;
702 }
703 /*
704 * fallthrough to retry open the old way on errors, especially
705 * in the reconnect path it is important to retry hard
706 */
707 }
708
709 desired_access = cifs_convert_flags(cfile->f_flags);
710
711 if (backup_cred(cifs_sb))
712 create_options |= CREATE_OPEN_BACKUP_INTENT;
713
714 if (server->ops->get_lease_key)
715 server->ops->get_lease_key(inode, &cfile->fid);
716
717 oparms.tcon = tcon;
718 oparms.cifs_sb = cifs_sb;
719 oparms.desired_access = desired_access;
720 oparms.create_options = create_options;
721 oparms.disposition = disposition;
722 oparms.path = full_path;
723 oparms.fid = &cfile->fid;
724 oparms.reconnect = true;
725
726 /*
727 * Can not refresh inode by passing in file_info buf to be returned by
728 * ops->open and then calling get_inode_info with returned buf since
729 * file might have write behind data that needs to be flushed and server
730 * version of file size can be stale. If we knew for sure that inode was
731 * not dirty locally we could do this.
732 */
733 rc = server->ops->open(xid, &oparms, &oplock, NULL);
734 if (rc == -ENOENT && oparms.reconnect == false) {
735 /* durable handle timeout is expired - open the file again */
736 rc = server->ops->open(xid, &oparms, &oplock, NULL);
737 /* indicate that we need to relock the file */
738 oparms.reconnect = true;
739 }
740
741 if (rc) {
742 mutex_unlock(&cfile->fh_mutex);
743 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
744 cifs_dbg(FYI, "oplock: %d\n", oplock);
745 goto reopen_error_exit;
746 }
747
748reopen_success:
749 cfile->invalidHandle = false;
750 mutex_unlock(&cfile->fh_mutex);
751 cinode = CIFS_I(inode);
752
753 if (can_flush) {
754 rc = filemap_write_and_wait(inode->i_mapping);
755 if (!is_interrupt_error(rc))
756 mapping_set_error(inode->i_mapping, rc);
757
758 if (tcon->unix_ext)
759 rc = cifs_get_inode_info_unix(&inode, full_path,
760 inode->i_sb, xid);
761 else
762 rc = cifs_get_inode_info(&inode, full_path, NULL,
763 inode->i_sb, xid, NULL);
764 }
765 /*
766 * Else we are writing out data to server already and could deadlock if
767 * we tried to flush data, and since we do not know if we have data that
768 * would invalidate the current end of file on the server we can not go
769 * to the server to get the new inode info.
770 */
771
772 /*
773 * If the server returned a read oplock and we have mandatory brlocks,
774 * set oplock level to None.
775 */
776 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
777 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
778 oplock = 0;
779 }
780
781 server->ops->set_fid(cfile, &cfile->fid, oplock);
782 if (oparms.reconnect)
783 cifs_relock_file(cfile);
784
785reopen_error_exit:
786 kfree(full_path);
787 free_xid(xid);
788 return rc;
789}
790
791int cifs_close(struct inode *inode, struct file *file)
792{
793 if (file->private_data != NULL) {
794 cifsFileInfo_put(file->private_data);
795 file->private_data = NULL;
796 }
797
798 /* return code from the ->release op is always ignored */
799 return 0;
800}
801
802void
803cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
804{
805 struct cifsFileInfo *open_file;
806 struct list_head *tmp;
807 struct list_head *tmp1;
808 struct list_head tmp_list;
809
810 if (!tcon->use_persistent || !tcon->need_reopen_files)
811 return;
812
813 tcon->need_reopen_files = false;
814
815 cifs_dbg(FYI, "Reopen persistent handles");
816 INIT_LIST_HEAD(&tmp_list);
817
818 /* list all files open on tree connection, reopen resilient handles */
819 spin_lock(&tcon->open_file_lock);
820 list_for_each(tmp, &tcon->openFileList) {
821 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
822 if (!open_file->invalidHandle)
823 continue;
824 cifsFileInfo_get(open_file);
825 list_add_tail(&open_file->rlist, &tmp_list);
826 }
827 spin_unlock(&tcon->open_file_lock);
828
829 list_for_each_safe(tmp, tmp1, &tmp_list) {
830 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
831 if (cifs_reopen_file(open_file, false /* do not flush */))
832 tcon->need_reopen_files = true;
833 list_del_init(&open_file->rlist);
834 cifsFileInfo_put(open_file);
835 }
836}
837
838int cifs_closedir(struct inode *inode, struct file *file)
839{
840 int rc = 0;
841 unsigned int xid;
842 struct cifsFileInfo *cfile = file->private_data;
843 struct cifs_tcon *tcon;
844 struct TCP_Server_Info *server;
845 char *buf;
846
847 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
848
849 if (cfile == NULL)
850 return rc;
851
852 xid = get_xid();
853 tcon = tlink_tcon(cfile->tlink);
854 server = tcon->ses->server;
855
856 cifs_dbg(FYI, "Freeing private data in close dir\n");
857 spin_lock(&cfile->file_info_lock);
858 if (server->ops->dir_needs_close(cfile)) {
859 cfile->invalidHandle = true;
860 spin_unlock(&cfile->file_info_lock);
861 if (server->ops->close_dir)
862 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
863 else
864 rc = -ENOSYS;
865 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
866 /* not much we can do if it fails anyway, ignore rc */
867 rc = 0;
868 } else
869 spin_unlock(&cfile->file_info_lock);
870
871 buf = cfile->srch_inf.ntwrk_buf_start;
872 if (buf) {
873 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
874 cfile->srch_inf.ntwrk_buf_start = NULL;
875 if (cfile->srch_inf.smallBuf)
876 cifs_small_buf_release(buf);
877 else
878 cifs_buf_release(buf);
879 }
880
881 cifs_put_tlink(cfile->tlink);
882 kfree(file->private_data);
883 file->private_data = NULL;
884 /* BB can we lock the filestruct while this is going on? */
885 free_xid(xid);
886 return rc;
887}
888
889static struct cifsLockInfo *
890cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
891{
892 struct cifsLockInfo *lock =
893 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
894 if (!lock)
895 return lock;
896 lock->offset = offset;
897 lock->length = length;
898 lock->type = type;
899 lock->pid = current->tgid;
900 lock->flags = flags;
901 INIT_LIST_HEAD(&lock->blist);
902 init_waitqueue_head(&lock->block_q);
903 return lock;
904}
905
906void
907cifs_del_lock_waiters(struct cifsLockInfo *lock)
908{
909 struct cifsLockInfo *li, *tmp;
910 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
911 list_del_init(&li->blist);
912 wake_up(&li->block_q);
913 }
914}
915
916#define CIFS_LOCK_OP 0
917#define CIFS_READ_OP 1
918#define CIFS_WRITE_OP 2
919
920/* @rw_check : 0 - no op, 1 - read, 2 - write */
921static bool
922cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
923 __u64 length, __u8 type, __u16 flags,
924 struct cifsFileInfo *cfile,
925 struct cifsLockInfo **conf_lock, int rw_check)
926{
927 struct cifsLockInfo *li;
928 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
929 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
930
931 list_for_each_entry(li, &fdlocks->locks, llist) {
932 if (offset + length <= li->offset ||
933 offset >= li->offset + li->length)
934 continue;
935 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
936 server->ops->compare_fids(cfile, cur_cfile)) {
937 /* shared lock prevents write op through the same fid */
938 if (!(li->type & server->vals->shared_lock_type) ||
939 rw_check != CIFS_WRITE_OP)
940 continue;
941 }
942 if ((type & server->vals->shared_lock_type) &&
943 ((server->ops->compare_fids(cfile, cur_cfile) &&
944 current->tgid == li->pid) || type == li->type))
945 continue;
946 if (rw_check == CIFS_LOCK_OP &&
947 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
948 server->ops->compare_fids(cfile, cur_cfile))
949 continue;
950 if (conf_lock)
951 *conf_lock = li;
952 return true;
953 }
954 return false;
955}
956
957bool
958cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
959 __u8 type, __u16 flags,
960 struct cifsLockInfo **conf_lock, int rw_check)
961{
962 bool rc = false;
963 struct cifs_fid_locks *cur;
964 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
965
966 list_for_each_entry(cur, &cinode->llist, llist) {
967 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
968 flags, cfile, conf_lock,
969 rw_check);
970 if (rc)
971 break;
972 }
973
974 return rc;
975}
976
977/*
978 * Check if there is another lock that prevents us to set the lock (mandatory
979 * style). If such a lock exists, update the flock structure with its
980 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
981 * or leave it the same if we can't. Returns 0 if we don't need to request to
982 * the server or 1 otherwise.
983 */
984static int
985cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
986 __u8 type, struct file_lock *flock)
987{
988 int rc = 0;
989 struct cifsLockInfo *conf_lock;
990 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
991 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
992 bool exist;
993
994 down_read(&cinode->lock_sem);
995
996 exist = cifs_find_lock_conflict(cfile, offset, length, type,
997 flock->fl_flags, &conf_lock,
998 CIFS_LOCK_OP);
999 if (exist) {
1000 flock->fl_start = conf_lock->offset;
1001 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1002 flock->fl_pid = conf_lock->pid;
1003 if (conf_lock->type & server->vals->shared_lock_type)
1004 flock->fl_type = F_RDLCK;
1005 else
1006 flock->fl_type = F_WRLCK;
1007 } else if (!cinode->can_cache_brlcks)
1008 rc = 1;
1009 else
1010 flock->fl_type = F_UNLCK;
1011
1012 up_read(&cinode->lock_sem);
1013 return rc;
1014}
1015
1016static void
1017cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1018{
1019 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1020 down_write(&cinode->lock_sem);
1021 list_add_tail(&lock->llist, &cfile->llist->locks);
1022 up_write(&cinode->lock_sem);
1023}
1024
1025/*
1026 * Set the byte-range lock (mandatory style). Returns:
1027 * 1) 0, if we set the lock and don't need to request to the server;
1028 * 2) 1, if no locks prevent us but we need to request to the server;
1029 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1030 */
1031static int
1032cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1033 bool wait)
1034{
1035 struct cifsLockInfo *conf_lock;
1036 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1037 bool exist;
1038 int rc = 0;
1039
1040try_again:
1041 exist = false;
1042 down_write(&cinode->lock_sem);
1043
1044 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1045 lock->type, lock->flags, &conf_lock,
1046 CIFS_LOCK_OP);
1047 if (!exist && cinode->can_cache_brlcks) {
1048 list_add_tail(&lock->llist, &cfile->llist->locks);
1049 up_write(&cinode->lock_sem);
1050 return rc;
1051 }
1052
1053 if (!exist)
1054 rc = 1;
1055 else if (!wait)
1056 rc = -EACCES;
1057 else {
1058 list_add_tail(&lock->blist, &conf_lock->blist);
1059 up_write(&cinode->lock_sem);
1060 rc = wait_event_interruptible(lock->block_q,
1061 (lock->blist.prev == &lock->blist) &&
1062 (lock->blist.next == &lock->blist));
1063 if (!rc)
1064 goto try_again;
1065 down_write(&cinode->lock_sem);
1066 list_del_init(&lock->blist);
1067 }
1068
1069 up_write(&cinode->lock_sem);
1070 return rc;
1071}
1072
1073/*
1074 * Check if there is another lock that prevents us to set the lock (posix
1075 * style). If such a lock exists, update the flock structure with its
1076 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1077 * or leave it the same if we can't. Returns 0 if we don't need to request to
1078 * the server or 1 otherwise.
1079 */
1080static int
1081cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1082{
1083 int rc = 0;
1084 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1085 unsigned char saved_type = flock->fl_type;
1086
1087 if ((flock->fl_flags & FL_POSIX) == 0)
1088 return 1;
1089
1090 down_read(&cinode->lock_sem);
1091 posix_test_lock(file, flock);
1092
1093 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1094 flock->fl_type = saved_type;
1095 rc = 1;
1096 }
1097
1098 up_read(&cinode->lock_sem);
1099 return rc;
1100}
1101
1102/*
1103 * Set the byte-range lock (posix style). Returns:
1104 * 1) 0, if we set the lock and don't need to request to the server;
1105 * 2) 1, if we need to request to the server;
1106 * 3) <0, if the error occurs while setting the lock.
1107 */
1108static int
1109cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1110{
1111 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1112 int rc = 1;
1113
1114 if ((flock->fl_flags & FL_POSIX) == 0)
1115 return rc;
1116
1117try_again:
1118 down_write(&cinode->lock_sem);
1119 if (!cinode->can_cache_brlcks) {
1120 up_write(&cinode->lock_sem);
1121 return rc;
1122 }
1123
1124 rc = posix_lock_file(file, flock, NULL);
1125 up_write(&cinode->lock_sem);
1126 if (rc == FILE_LOCK_DEFERRED) {
1127 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
1128 if (!rc)
1129 goto try_again;
1130 locks_delete_block(flock);
1131 }
1132 return rc;
1133}
1134
1135int
1136cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1137{
1138 unsigned int xid;
1139 int rc = 0, stored_rc;
1140 struct cifsLockInfo *li, *tmp;
1141 struct cifs_tcon *tcon;
1142 unsigned int num, max_num, max_buf;
1143 LOCKING_ANDX_RANGE *buf, *cur;
1144 static const int types[] = {
1145 LOCKING_ANDX_LARGE_FILES,
1146 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1147 };
1148 int i;
1149
1150 xid = get_xid();
1151 tcon = tlink_tcon(cfile->tlink);
1152
1153 /*
1154 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1155 * and check it before using.
1156 */
1157 max_buf = tcon->ses->server->maxBuf;
1158 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1159 free_xid(xid);
1160 return -EINVAL;
1161 }
1162
1163 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1164 PAGE_SIZE);
1165 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1166 PAGE_SIZE);
1167 max_num = (max_buf - sizeof(struct smb_hdr)) /
1168 sizeof(LOCKING_ANDX_RANGE);
1169 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1170 if (!buf) {
1171 free_xid(xid);
1172 return -ENOMEM;
1173 }
1174
1175 for (i = 0; i < 2; i++) {
1176 cur = buf;
1177 num = 0;
1178 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1179 if (li->type != types[i])
1180 continue;
1181 cur->Pid = cpu_to_le16(li->pid);
1182 cur->LengthLow = cpu_to_le32((u32)li->length);
1183 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1184 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1185 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1186 if (++num == max_num) {
1187 stored_rc = cifs_lockv(xid, tcon,
1188 cfile->fid.netfid,
1189 (__u8)li->type, 0, num,
1190 buf);
1191 if (stored_rc)
1192 rc = stored_rc;
1193 cur = buf;
1194 num = 0;
1195 } else
1196 cur++;
1197 }
1198
1199 if (num) {
1200 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1201 (__u8)types[i], 0, num, buf);
1202 if (stored_rc)
1203 rc = stored_rc;
1204 }
1205 }
1206
1207 kfree(buf);
1208 free_xid(xid);
1209 return rc;
1210}
1211
1212static __u32
1213hash_lockowner(fl_owner_t owner)
1214{
1215 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1216}
1217
1218struct lock_to_push {
1219 struct list_head llist;
1220 __u64 offset;
1221 __u64 length;
1222 __u32 pid;
1223 __u16 netfid;
1224 __u8 type;
1225};
1226
1227static int
1228cifs_push_posix_locks(struct cifsFileInfo *cfile)
1229{
1230 struct inode *inode = d_inode(cfile->dentry);
1231 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1232 struct file_lock *flock;
1233 struct file_lock_context *flctx = inode->i_flctx;
1234 unsigned int count = 0, i;
1235 int rc = 0, xid, type;
1236 struct list_head locks_to_send, *el;
1237 struct lock_to_push *lck, *tmp;
1238 __u64 length;
1239
1240 xid = get_xid();
1241
1242 if (!flctx)
1243 goto out;
1244
1245 spin_lock(&flctx->flc_lock);
1246 list_for_each(el, &flctx->flc_posix) {
1247 count++;
1248 }
1249 spin_unlock(&flctx->flc_lock);
1250
1251 INIT_LIST_HEAD(&locks_to_send);
1252
1253 /*
1254 * Allocating count locks is enough because no FL_POSIX locks can be
1255 * added to the list while we are holding cinode->lock_sem that
1256 * protects locking operations of this inode.
1257 */
1258 for (i = 0; i < count; i++) {
1259 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1260 if (!lck) {
1261 rc = -ENOMEM;
1262 goto err_out;
1263 }
1264 list_add_tail(&lck->llist, &locks_to_send);
1265 }
1266
1267 el = locks_to_send.next;
1268 spin_lock(&flctx->flc_lock);
1269 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1270 if (el == &locks_to_send) {
1271 /*
1272 * The list ended. We don't have enough allocated
1273 * structures - something is really wrong.
1274 */
1275 cifs_dbg(VFS, "Can't push all brlocks!\n");
1276 break;
1277 }
1278 length = 1 + flock->fl_end - flock->fl_start;
1279 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1280 type = CIFS_RDLCK;
1281 else
1282 type = CIFS_WRLCK;
1283 lck = list_entry(el, struct lock_to_push, llist);
1284 lck->pid = hash_lockowner(flock->fl_owner);
1285 lck->netfid = cfile->fid.netfid;
1286 lck->length = length;
1287 lck->type = type;
1288 lck->offset = flock->fl_start;
1289 }
1290 spin_unlock(&flctx->flc_lock);
1291
1292 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1293 int stored_rc;
1294
1295 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1296 lck->offset, lck->length, NULL,
1297 lck->type, 0);
1298 if (stored_rc)
1299 rc = stored_rc;
1300 list_del(&lck->llist);
1301 kfree(lck);
1302 }
1303
1304out:
1305 free_xid(xid);
1306 return rc;
1307err_out:
1308 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1309 list_del(&lck->llist);
1310 kfree(lck);
1311 }
1312 goto out;
1313}
1314
1315static int
1316cifs_push_locks(struct cifsFileInfo *cfile)
1317{
1318 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1319 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1320 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1321 int rc = 0;
1322
1323 /* we are going to update can_cache_brlcks here - need a write access */
1324 down_write(&cinode->lock_sem);
1325 if (!cinode->can_cache_brlcks) {
1326 up_write(&cinode->lock_sem);
1327 return rc;
1328 }
1329
1330 if (cap_unix(tcon->ses) &&
1331 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1332 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1333 rc = cifs_push_posix_locks(cfile);
1334 else
1335 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1336
1337 cinode->can_cache_brlcks = false;
1338 up_write(&cinode->lock_sem);
1339 return rc;
1340}
1341
1342static void
1343cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1344 bool *wait_flag, struct TCP_Server_Info *server)
1345{
1346 if (flock->fl_flags & FL_POSIX)
1347 cifs_dbg(FYI, "Posix\n");
1348 if (flock->fl_flags & FL_FLOCK)
1349 cifs_dbg(FYI, "Flock\n");
1350 if (flock->fl_flags & FL_SLEEP) {
1351 cifs_dbg(FYI, "Blocking lock\n");
1352 *wait_flag = true;
1353 }
1354 if (flock->fl_flags & FL_ACCESS)
1355 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1356 if (flock->fl_flags & FL_LEASE)
1357 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1358 if (flock->fl_flags &
1359 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1360 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1361 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1362
1363 *type = server->vals->large_lock_type;
1364 if (flock->fl_type == F_WRLCK) {
1365 cifs_dbg(FYI, "F_WRLCK\n");
1366 *type |= server->vals->exclusive_lock_type;
1367 *lock = 1;
1368 } else if (flock->fl_type == F_UNLCK) {
1369 cifs_dbg(FYI, "F_UNLCK\n");
1370 *type |= server->vals->unlock_lock_type;
1371 *unlock = 1;
1372 /* Check if unlock includes more than one lock range */
1373 } else if (flock->fl_type == F_RDLCK) {
1374 cifs_dbg(FYI, "F_RDLCK\n");
1375 *type |= server->vals->shared_lock_type;
1376 *lock = 1;
1377 } else if (flock->fl_type == F_EXLCK) {
1378 cifs_dbg(FYI, "F_EXLCK\n");
1379 *type |= server->vals->exclusive_lock_type;
1380 *lock = 1;
1381 } else if (flock->fl_type == F_SHLCK) {
1382 cifs_dbg(FYI, "F_SHLCK\n");
1383 *type |= server->vals->shared_lock_type;
1384 *lock = 1;
1385 } else
1386 cifs_dbg(FYI, "Unknown type of lock\n");
1387}
1388
1389static int
1390cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1391 bool wait_flag, bool posix_lck, unsigned int xid)
1392{
1393 int rc = 0;
1394 __u64 length = 1 + flock->fl_end - flock->fl_start;
1395 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1396 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1397 struct TCP_Server_Info *server = tcon->ses->server;
1398 __u16 netfid = cfile->fid.netfid;
1399
1400 if (posix_lck) {
1401 int posix_lock_type;
1402
1403 rc = cifs_posix_lock_test(file, flock);
1404 if (!rc)
1405 return rc;
1406
1407 if (type & server->vals->shared_lock_type)
1408 posix_lock_type = CIFS_RDLCK;
1409 else
1410 posix_lock_type = CIFS_WRLCK;
1411 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1412 hash_lockowner(flock->fl_owner),
1413 flock->fl_start, length, flock,
1414 posix_lock_type, wait_flag);
1415 return rc;
1416 }
1417
1418 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1419 if (!rc)
1420 return rc;
1421
1422 /* BB we could chain these into one lock request BB */
1423 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1424 1, 0, false);
1425 if (rc == 0) {
1426 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1427 type, 0, 1, false);
1428 flock->fl_type = F_UNLCK;
1429 if (rc != 0)
1430 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1431 rc);
1432 return 0;
1433 }
1434
1435 if (type & server->vals->shared_lock_type) {
1436 flock->fl_type = F_WRLCK;
1437 return 0;
1438 }
1439
1440 type &= ~server->vals->exclusive_lock_type;
1441
1442 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1443 type | server->vals->shared_lock_type,
1444 1, 0, false);
1445 if (rc == 0) {
1446 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1447 type | server->vals->shared_lock_type, 0, 1, false);
1448 flock->fl_type = F_RDLCK;
1449 if (rc != 0)
1450 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1451 rc);
1452 } else
1453 flock->fl_type = F_WRLCK;
1454
1455 return 0;
1456}
1457
1458void
1459cifs_move_llist(struct list_head *source, struct list_head *dest)
1460{
1461 struct list_head *li, *tmp;
1462 list_for_each_safe(li, tmp, source)
1463 list_move(li, dest);
1464}
1465
1466void
1467cifs_free_llist(struct list_head *llist)
1468{
1469 struct cifsLockInfo *li, *tmp;
1470 list_for_each_entry_safe(li, tmp, llist, llist) {
1471 cifs_del_lock_waiters(li);
1472 list_del(&li->llist);
1473 kfree(li);
1474 }
1475}
1476
1477int
1478cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1479 unsigned int xid)
1480{
1481 int rc = 0, stored_rc;
1482 static const int types[] = {
1483 LOCKING_ANDX_LARGE_FILES,
1484 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1485 };
1486 unsigned int i;
1487 unsigned int max_num, num, max_buf;
1488 LOCKING_ANDX_RANGE *buf, *cur;
1489 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1490 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1491 struct cifsLockInfo *li, *tmp;
1492 __u64 length = 1 + flock->fl_end - flock->fl_start;
1493 struct list_head tmp_llist;
1494
1495 INIT_LIST_HEAD(&tmp_llist);
1496
1497 /*
1498 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1499 * and check it before using.
1500 */
1501 max_buf = tcon->ses->server->maxBuf;
1502 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1503 return -EINVAL;
1504
1505 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1506 PAGE_SIZE);
1507 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1508 PAGE_SIZE);
1509 max_num = (max_buf - sizeof(struct smb_hdr)) /
1510 sizeof(LOCKING_ANDX_RANGE);
1511 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1512 if (!buf)
1513 return -ENOMEM;
1514
1515 down_write(&cinode->lock_sem);
1516 for (i = 0; i < 2; i++) {
1517 cur = buf;
1518 num = 0;
1519 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1520 if (flock->fl_start > li->offset ||
1521 (flock->fl_start + length) <
1522 (li->offset + li->length))
1523 continue;
1524 if (current->tgid != li->pid)
1525 continue;
1526 if (types[i] != li->type)
1527 continue;
1528 if (cinode->can_cache_brlcks) {
1529 /*
1530 * We can cache brlock requests - simply remove
1531 * a lock from the file's list.
1532 */
1533 list_del(&li->llist);
1534 cifs_del_lock_waiters(li);
1535 kfree(li);
1536 continue;
1537 }
1538 cur->Pid = cpu_to_le16(li->pid);
1539 cur->LengthLow = cpu_to_le32((u32)li->length);
1540 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1541 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1542 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1543 /*
1544 * We need to save a lock here to let us add it again to
1545 * the file's list if the unlock range request fails on
1546 * the server.
1547 */
1548 list_move(&li->llist, &tmp_llist);
1549 if (++num == max_num) {
1550 stored_rc = cifs_lockv(xid, tcon,
1551 cfile->fid.netfid,
1552 li->type, num, 0, buf);
1553 if (stored_rc) {
1554 /*
1555 * We failed on the unlock range
1556 * request - add all locks from the tmp
1557 * list to the head of the file's list.
1558 */
1559 cifs_move_llist(&tmp_llist,
1560 &cfile->llist->locks);
1561 rc = stored_rc;
1562 } else
1563 /*
1564 * The unlock range request succeed -
1565 * free the tmp list.
1566 */
1567 cifs_free_llist(&tmp_llist);
1568 cur = buf;
1569 num = 0;
1570 } else
1571 cur++;
1572 }
1573 if (num) {
1574 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1575 types[i], num, 0, buf);
1576 if (stored_rc) {
1577 cifs_move_llist(&tmp_llist,
1578 &cfile->llist->locks);
1579 rc = stored_rc;
1580 } else
1581 cifs_free_llist(&tmp_llist);
1582 }
1583 }
1584
1585 up_write(&cinode->lock_sem);
1586 kfree(buf);
1587 return rc;
1588}
1589
1590static int
1591cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1592 bool wait_flag, bool posix_lck, int lock, int unlock,
1593 unsigned int xid)
1594{
1595 int rc = 0;
1596 __u64 length = 1 + flock->fl_end - flock->fl_start;
1597 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1598 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1599 struct TCP_Server_Info *server = tcon->ses->server;
1600 struct inode *inode = d_inode(cfile->dentry);
1601
1602 if (posix_lck) {
1603 int posix_lock_type;
1604
1605 rc = cifs_posix_lock_set(file, flock);
1606 if (!rc || rc < 0)
1607 return rc;
1608
1609 if (type & server->vals->shared_lock_type)
1610 posix_lock_type = CIFS_RDLCK;
1611 else
1612 posix_lock_type = CIFS_WRLCK;
1613
1614 if (unlock == 1)
1615 posix_lock_type = CIFS_UNLCK;
1616
1617 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1618 hash_lockowner(flock->fl_owner),
1619 flock->fl_start, length,
1620 NULL, posix_lock_type, wait_flag);
1621 goto out;
1622 }
1623
1624 if (lock) {
1625 struct cifsLockInfo *lock;
1626
1627 lock = cifs_lock_init(flock->fl_start, length, type,
1628 flock->fl_flags);
1629 if (!lock)
1630 return -ENOMEM;
1631
1632 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1633 if (rc < 0) {
1634 kfree(lock);
1635 return rc;
1636 }
1637 if (!rc)
1638 goto out;
1639
1640 /*
1641 * Windows 7 server can delay breaking lease from read to None
1642 * if we set a byte-range lock on a file - break it explicitly
1643 * before sending the lock to the server to be sure the next
1644 * read won't conflict with non-overlapted locks due to
1645 * pagereading.
1646 */
1647 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1648 CIFS_CACHE_READ(CIFS_I(inode))) {
1649 cifs_zap_mapping(inode);
1650 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1651 inode);
1652 CIFS_I(inode)->oplock = 0;
1653 }
1654
1655 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1656 type, 1, 0, wait_flag);
1657 if (rc) {
1658 kfree(lock);
1659 return rc;
1660 }
1661
1662 cifs_lock_add(cfile, lock);
1663 } else if (unlock)
1664 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1665
1666out:
1667 if (flock->fl_flags & FL_POSIX) {
1668 /*
1669 * If this is a request to remove all locks because we
1670 * are closing the file, it doesn't matter if the
1671 * unlocking failed as both cifs.ko and the SMB server
1672 * remove the lock on file close
1673 */
1674 if (rc) {
1675 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1676 if (!(flock->fl_flags & FL_CLOSE))
1677 return rc;
1678 }
1679 rc = locks_lock_file_wait(file, flock);
1680 }
1681 return rc;
1682}
1683
1684int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1685{
1686 int rc, xid;
1687 int lock = 0, unlock = 0;
1688 bool wait_flag = false;
1689 bool posix_lck = false;
1690 struct cifs_sb_info *cifs_sb;
1691 struct cifs_tcon *tcon;
1692 struct cifsInodeInfo *cinode;
1693 struct cifsFileInfo *cfile;
1694 __u16 netfid;
1695 __u32 type;
1696
1697 rc = -EACCES;
1698 xid = get_xid();
1699
1700 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1701 cmd, flock->fl_flags, flock->fl_type,
1702 flock->fl_start, flock->fl_end);
1703
1704 cfile = (struct cifsFileInfo *)file->private_data;
1705 tcon = tlink_tcon(cfile->tlink);
1706
1707 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1708 tcon->ses->server);
1709 cifs_sb = CIFS_FILE_SB(file);
1710 netfid = cfile->fid.netfid;
1711 cinode = CIFS_I(file_inode(file));
1712
1713 if (cap_unix(tcon->ses) &&
1714 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1715 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1716 posix_lck = true;
1717 /*
1718 * BB add code here to normalize offset and length to account for
1719 * negative length which we can not accept over the wire.
1720 */
1721 if (IS_GETLK(cmd)) {
1722 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1723 free_xid(xid);
1724 return rc;
1725 }
1726
1727 if (!lock && !unlock) {
1728 /*
1729 * if no lock or unlock then nothing to do since we do not
1730 * know what it is
1731 */
1732 free_xid(xid);
1733 return -EOPNOTSUPP;
1734 }
1735
1736 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1737 xid);
1738 free_xid(xid);
1739 return rc;
1740}
1741
1742/*
1743 * update the file size (if needed) after a write. Should be called with
1744 * the inode->i_lock held
1745 */
1746void
1747cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1748 unsigned int bytes_written)
1749{
1750 loff_t end_of_write = offset + bytes_written;
1751
1752 if (end_of_write > cifsi->server_eof)
1753 cifsi->server_eof = end_of_write;
1754}
1755
1756static ssize_t
1757cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1758 size_t write_size, loff_t *offset)
1759{
1760 int rc = 0;
1761 unsigned int bytes_written = 0;
1762 unsigned int total_written;
1763 struct cifs_sb_info *cifs_sb;
1764 struct cifs_tcon *tcon;
1765 struct TCP_Server_Info *server;
1766 unsigned int xid;
1767 struct dentry *dentry = open_file->dentry;
1768 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1769 struct cifs_io_parms io_parms;
1770
1771 cifs_sb = CIFS_SB(dentry->d_sb);
1772
1773 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1774 write_size, *offset, dentry);
1775
1776 tcon = tlink_tcon(open_file->tlink);
1777 server = tcon->ses->server;
1778
1779 if (!server->ops->sync_write)
1780 return -ENOSYS;
1781
1782 xid = get_xid();
1783
1784 for (total_written = 0; write_size > total_written;
1785 total_written += bytes_written) {
1786 rc = -EAGAIN;
1787 while (rc == -EAGAIN) {
1788 struct kvec iov[2];
1789 unsigned int len;
1790
1791 if (open_file->invalidHandle) {
1792 /* we could deadlock if we called
1793 filemap_fdatawait from here so tell
1794 reopen_file not to flush data to
1795 server now */
1796 rc = cifs_reopen_file(open_file, false);
1797 if (rc != 0)
1798 break;
1799 }
1800
1801 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1802 (unsigned int)write_size - total_written);
1803 /* iov[0] is reserved for smb header */
1804 iov[1].iov_base = (char *)write_data + total_written;
1805 iov[1].iov_len = len;
1806 io_parms.pid = pid;
1807 io_parms.tcon = tcon;
1808 io_parms.offset = *offset;
1809 io_parms.length = len;
1810 rc = server->ops->sync_write(xid, &open_file->fid,
1811 &io_parms, &bytes_written, iov, 1);
1812 }
1813 if (rc || (bytes_written == 0)) {
1814 if (total_written)
1815 break;
1816 else {
1817 free_xid(xid);
1818 return rc;
1819 }
1820 } else {
1821 spin_lock(&d_inode(dentry)->i_lock);
1822 cifs_update_eof(cifsi, *offset, bytes_written);
1823 spin_unlock(&d_inode(dentry)->i_lock);
1824 *offset += bytes_written;
1825 }
1826 }
1827
1828 cifs_stats_bytes_written(tcon, total_written);
1829
1830 if (total_written > 0) {
1831 spin_lock(&d_inode(dentry)->i_lock);
1832 if (*offset > d_inode(dentry)->i_size)
1833 i_size_write(d_inode(dentry), *offset);
1834 spin_unlock(&d_inode(dentry)->i_lock);
1835 }
1836 mark_inode_dirty_sync(d_inode(dentry));
1837 free_xid(xid);
1838 return total_written;
1839}
1840
1841struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1842 bool fsuid_only)
1843{
1844 struct cifsFileInfo *open_file = NULL;
1845 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1846 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1847
1848 /* only filter by fsuid on multiuser mounts */
1849 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1850 fsuid_only = false;
1851
1852 spin_lock(&tcon->open_file_lock);
1853 /* we could simply get the first_list_entry since write-only entries
1854 are always at the end of the list but since the first entry might
1855 have a close pending, we go through the whole list */
1856 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1857 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1858 continue;
1859 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1860 if (!open_file->invalidHandle) {
1861 /* found a good file */
1862 /* lock it so it will not be closed on us */
1863 cifsFileInfo_get(open_file);
1864 spin_unlock(&tcon->open_file_lock);
1865 return open_file;
1866 } /* else might as well continue, and look for
1867 another, or simply have the caller reopen it
1868 again rather than trying to fix this handle */
1869 } else /* write only file */
1870 break; /* write only files are last so must be done */
1871 }
1872 spin_unlock(&tcon->open_file_lock);
1873 return NULL;
1874}
1875
1876/* Return -EBADF if no handle is found and general rc otherwise */
1877int
1878cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
1879 struct cifsFileInfo **ret_file)
1880{
1881 struct cifsFileInfo *open_file, *inv_file = NULL;
1882 struct cifs_sb_info *cifs_sb;
1883 struct cifs_tcon *tcon;
1884 bool any_available = false;
1885 int rc = -EBADF;
1886 unsigned int refind = 0;
1887
1888 *ret_file = NULL;
1889
1890 /*
1891 * Having a null inode here (because mapping->host was set to zero by
1892 * the VFS or MM) should not happen but we had reports of on oops (due
1893 * to it being zero) during stress testcases so we need to check for it
1894 */
1895
1896 if (cifs_inode == NULL) {
1897 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1898 dump_stack();
1899 return rc;
1900 }
1901
1902 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1903 tcon = cifs_sb_master_tcon(cifs_sb);
1904
1905 /* only filter by fsuid on multiuser mounts */
1906 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1907 fsuid_only = false;
1908
1909 spin_lock(&tcon->open_file_lock);
1910refind_writable:
1911 if (refind > MAX_REOPEN_ATT) {
1912 spin_unlock(&tcon->open_file_lock);
1913 return rc;
1914 }
1915 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1916 if (!any_available && open_file->pid != current->tgid)
1917 continue;
1918 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1919 continue;
1920 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1921 if (!open_file->invalidHandle) {
1922 /* found a good writable file */
1923 cifsFileInfo_get(open_file);
1924 spin_unlock(&tcon->open_file_lock);
1925 *ret_file = open_file;
1926 return 0;
1927 } else {
1928 if (!inv_file)
1929 inv_file = open_file;
1930 }
1931 }
1932 }
1933 /* couldn't find useable FH with same pid, try any available */
1934 if (!any_available) {
1935 any_available = true;
1936 goto refind_writable;
1937 }
1938
1939 if (inv_file) {
1940 any_available = false;
1941 cifsFileInfo_get(inv_file);
1942 }
1943
1944 spin_unlock(&tcon->open_file_lock);
1945
1946 if (inv_file) {
1947 rc = cifs_reopen_file(inv_file, false);
1948 if (!rc) {
1949 *ret_file = inv_file;
1950 return 0;
1951 }
1952
1953 spin_lock(&tcon->open_file_lock);
1954 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
1955 spin_unlock(&tcon->open_file_lock);
1956 cifsFileInfo_put(inv_file);
1957 ++refind;
1958 inv_file = NULL;
1959 spin_lock(&tcon->open_file_lock);
1960 goto refind_writable;
1961 }
1962
1963 return rc;
1964}
1965
1966struct cifsFileInfo *
1967find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only)
1968{
1969 struct cifsFileInfo *cfile;
1970 int rc;
1971
1972 rc = cifs_get_writable_file(cifs_inode, fsuid_only, &cfile);
1973 if (rc)
1974 cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc);
1975
1976 return cfile;
1977}
1978
1979static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1980{
1981 struct address_space *mapping = page->mapping;
1982 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1983 char *write_data;
1984 int rc = -EFAULT;
1985 int bytes_written = 0;
1986 struct inode *inode;
1987 struct cifsFileInfo *open_file;
1988
1989 if (!mapping || !mapping->host)
1990 return -EFAULT;
1991
1992 inode = page->mapping->host;
1993
1994 offset += (loff_t)from;
1995 write_data = kmap(page);
1996 write_data += from;
1997
1998 if ((to > PAGE_SIZE) || (from > to)) {
1999 kunmap(page);
2000 return -EIO;
2001 }
2002
2003 /* racing with truncate? */
2004 if (offset > mapping->host->i_size) {
2005 kunmap(page);
2006 return 0; /* don't care */
2007 }
2008
2009 /* check to make sure that we are not extending the file */
2010 if (mapping->host->i_size - offset < (loff_t)to)
2011 to = (unsigned)(mapping->host->i_size - offset);
2012
2013 rc = cifs_get_writable_file(CIFS_I(mapping->host), false, &open_file);
2014 if (!rc) {
2015 bytes_written = cifs_write(open_file, open_file->pid,
2016 write_data, to - from, &offset);
2017 cifsFileInfo_put(open_file);
2018 /* Does mm or vfs already set times? */
2019 inode->i_atime = inode->i_mtime = current_time(inode);
2020 if ((bytes_written > 0) && (offset))
2021 rc = 0;
2022 else if (bytes_written < 0)
2023 rc = bytes_written;
2024 else
2025 rc = -EFAULT;
2026 } else {
2027 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2028 if (!is_retryable_error(rc))
2029 rc = -EIO;
2030 }
2031
2032 kunmap(page);
2033 return rc;
2034}
2035
2036static struct cifs_writedata *
2037wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2038 pgoff_t end, pgoff_t *index,
2039 unsigned int *found_pages)
2040{
2041 struct cifs_writedata *wdata;
2042
2043 wdata = cifs_writedata_alloc((unsigned int)tofind,
2044 cifs_writev_complete);
2045 if (!wdata)
2046 return NULL;
2047
2048 *found_pages = find_get_pages_range_tag(mapping, index, end,
2049 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2050 return wdata;
2051}
2052
2053static unsigned int
2054wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2055 struct address_space *mapping,
2056 struct writeback_control *wbc,
2057 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2058{
2059 unsigned int nr_pages = 0, i;
2060 struct page *page;
2061
2062 for (i = 0; i < found_pages; i++) {
2063 page = wdata->pages[i];
2064 /*
2065 * At this point we hold neither the i_pages lock nor the
2066 * page lock: the page may be truncated or invalidated
2067 * (changing page->mapping to NULL), or even swizzled
2068 * back from swapper_space to tmpfs file mapping
2069 */
2070
2071 if (nr_pages == 0)
2072 lock_page(page);
2073 else if (!trylock_page(page))
2074 break;
2075
2076 if (unlikely(page->mapping != mapping)) {
2077 unlock_page(page);
2078 break;
2079 }
2080
2081 if (!wbc->range_cyclic && page->index > end) {
2082 *done = true;
2083 unlock_page(page);
2084 break;
2085 }
2086
2087 if (*next && (page->index != *next)) {
2088 /* Not next consecutive page */
2089 unlock_page(page);
2090 break;
2091 }
2092
2093 if (wbc->sync_mode != WB_SYNC_NONE)
2094 wait_on_page_writeback(page);
2095
2096 if (PageWriteback(page) ||
2097 !clear_page_dirty_for_io(page)) {
2098 unlock_page(page);
2099 break;
2100 }
2101
2102 /*
2103 * This actually clears the dirty bit in the radix tree.
2104 * See cifs_writepage() for more commentary.
2105 */
2106 set_page_writeback(page);
2107 if (page_offset(page) >= i_size_read(mapping->host)) {
2108 *done = true;
2109 unlock_page(page);
2110 end_page_writeback(page);
2111 break;
2112 }
2113
2114 wdata->pages[i] = page;
2115 *next = page->index + 1;
2116 ++nr_pages;
2117 }
2118
2119 /* reset index to refind any pages skipped */
2120 if (nr_pages == 0)
2121 *index = wdata->pages[0]->index + 1;
2122
2123 /* put any pages we aren't going to use */
2124 for (i = nr_pages; i < found_pages; i++) {
2125 put_page(wdata->pages[i]);
2126 wdata->pages[i] = NULL;
2127 }
2128
2129 return nr_pages;
2130}
2131
2132static int
2133wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2134 struct address_space *mapping, struct writeback_control *wbc)
2135{
2136 int rc;
2137 struct TCP_Server_Info *server =
2138 tlink_tcon(wdata->cfile->tlink)->ses->server;
2139
2140 wdata->sync_mode = wbc->sync_mode;
2141 wdata->nr_pages = nr_pages;
2142 wdata->offset = page_offset(wdata->pages[0]);
2143 wdata->pagesz = PAGE_SIZE;
2144 wdata->tailsz = min(i_size_read(mapping->host) -
2145 page_offset(wdata->pages[nr_pages - 1]),
2146 (loff_t)PAGE_SIZE);
2147 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2148 wdata->pid = wdata->cfile->pid;
2149
2150 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2151 if (rc)
2152 return rc;
2153
2154 if (wdata->cfile->invalidHandle)
2155 rc = -EAGAIN;
2156 else
2157 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2158
2159 return rc;
2160}
2161
2162static int cifs_writepages(struct address_space *mapping,
2163 struct writeback_control *wbc)
2164{
2165 struct inode *inode = mapping->host;
2166 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2167 struct TCP_Server_Info *server;
2168 bool done = false, scanned = false, range_whole = false;
2169 pgoff_t end, index;
2170 struct cifs_writedata *wdata;
2171 struct cifsFileInfo *cfile = NULL;
2172 int rc = 0;
2173 int saved_rc = 0;
2174 unsigned int xid;
2175
2176 /*
2177 * If wsize is smaller than the page cache size, default to writing
2178 * one page at a time via cifs_writepage
2179 */
2180 if (cifs_sb->wsize < PAGE_SIZE)
2181 return generic_writepages(mapping, wbc);
2182
2183 xid = get_xid();
2184 if (wbc->range_cyclic) {
2185 index = mapping->writeback_index; /* Start from prev offset */
2186 end = -1;
2187 } else {
2188 index = wbc->range_start >> PAGE_SHIFT;
2189 end = wbc->range_end >> PAGE_SHIFT;
2190 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2191 range_whole = true;
2192 scanned = true;
2193 }
2194 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2195retry:
2196 while (!done && index <= end) {
2197 unsigned int i, nr_pages, found_pages, wsize;
2198 pgoff_t next = 0, tofind, saved_index = index;
2199 struct cifs_credits credits_on_stack;
2200 struct cifs_credits *credits = &credits_on_stack;
2201 int get_file_rc = 0;
2202
2203 if (cfile)
2204 cifsFileInfo_put(cfile);
2205
2206 rc = cifs_get_writable_file(CIFS_I(inode), false, &cfile);
2207
2208 /* in case of an error store it to return later */
2209 if (rc)
2210 get_file_rc = rc;
2211
2212 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2213 &wsize, credits);
2214 if (rc != 0) {
2215 done = true;
2216 break;
2217 }
2218
2219 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2220
2221 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2222 &found_pages);
2223 if (!wdata) {
2224 rc = -ENOMEM;
2225 done = true;
2226 add_credits_and_wake_if(server, credits, 0);
2227 break;
2228 }
2229
2230 if (found_pages == 0) {
2231 kref_put(&wdata->refcount, cifs_writedata_release);
2232 add_credits_and_wake_if(server, credits, 0);
2233 break;
2234 }
2235
2236 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2237 end, &index, &next, &done);
2238
2239 /* nothing to write? */
2240 if (nr_pages == 0) {
2241 kref_put(&wdata->refcount, cifs_writedata_release);
2242 add_credits_and_wake_if(server, credits, 0);
2243 continue;
2244 }
2245
2246 wdata->credits = credits_on_stack;
2247 wdata->cfile = cfile;
2248 cfile = NULL;
2249
2250 if (!wdata->cfile) {
2251 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2252 get_file_rc);
2253 if (is_retryable_error(get_file_rc))
2254 rc = get_file_rc;
2255 else
2256 rc = -EBADF;
2257 } else
2258 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2259
2260 for (i = 0; i < nr_pages; ++i)
2261 unlock_page(wdata->pages[i]);
2262
2263 /* send failure -- clean up the mess */
2264 if (rc != 0) {
2265 add_credits_and_wake_if(server, &wdata->credits, 0);
2266 for (i = 0; i < nr_pages; ++i) {
2267 if (is_retryable_error(rc))
2268 redirty_page_for_writepage(wbc,
2269 wdata->pages[i]);
2270 else
2271 SetPageError(wdata->pages[i]);
2272 end_page_writeback(wdata->pages[i]);
2273 put_page(wdata->pages[i]);
2274 }
2275 if (!is_retryable_error(rc))
2276 mapping_set_error(mapping, rc);
2277 }
2278 kref_put(&wdata->refcount, cifs_writedata_release);
2279
2280 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2281 index = saved_index;
2282 continue;
2283 }
2284
2285 /* Return immediately if we received a signal during writing */
2286 if (is_interrupt_error(rc)) {
2287 done = true;
2288 break;
2289 }
2290
2291 if (rc != 0 && saved_rc == 0)
2292 saved_rc = rc;
2293
2294 wbc->nr_to_write -= nr_pages;
2295 if (wbc->nr_to_write <= 0)
2296 done = true;
2297
2298 index = next;
2299 }
2300
2301 if (!scanned && !done) {
2302 /*
2303 * We hit the last page and there is more work to be done: wrap
2304 * back to the start of the file
2305 */
2306 scanned = true;
2307 index = 0;
2308 goto retry;
2309 }
2310
2311 if (saved_rc != 0)
2312 rc = saved_rc;
2313
2314 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2315 mapping->writeback_index = index;
2316
2317 if (cfile)
2318 cifsFileInfo_put(cfile);
2319 free_xid(xid);
2320 return rc;
2321}
2322
2323static int
2324cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2325{
2326 int rc;
2327 unsigned int xid;
2328
2329 xid = get_xid();
2330/* BB add check for wbc flags */
2331 get_page(page);
2332 if (!PageUptodate(page))
2333 cifs_dbg(FYI, "ppw - page not up to date\n");
2334
2335 /*
2336 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2337 *
2338 * A writepage() implementation always needs to do either this,
2339 * or re-dirty the page with "redirty_page_for_writepage()" in
2340 * the case of a failure.
2341 *
2342 * Just unlocking the page will cause the radix tree tag-bits
2343 * to fail to update with the state of the page correctly.
2344 */
2345 set_page_writeback(page);
2346retry_write:
2347 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2348 if (is_retryable_error(rc)) {
2349 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2350 goto retry_write;
2351 redirty_page_for_writepage(wbc, page);
2352 } else if (rc != 0) {
2353 SetPageError(page);
2354 mapping_set_error(page->mapping, rc);
2355 } else {
2356 SetPageUptodate(page);
2357 }
2358 end_page_writeback(page);
2359 put_page(page);
2360 free_xid(xid);
2361 return rc;
2362}
2363
2364static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2365{
2366 int rc = cifs_writepage_locked(page, wbc);
2367 unlock_page(page);
2368 return rc;
2369}
2370
2371static int cifs_write_end(struct file *file, struct address_space *mapping,
2372 loff_t pos, unsigned len, unsigned copied,
2373 struct page *page, void *fsdata)
2374{
2375 int rc;
2376 struct inode *inode = mapping->host;
2377 struct cifsFileInfo *cfile = file->private_data;
2378 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2379 __u32 pid;
2380
2381 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2382 pid = cfile->pid;
2383 else
2384 pid = current->tgid;
2385
2386 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2387 page, pos, copied);
2388
2389 if (PageChecked(page)) {
2390 if (copied == len)
2391 SetPageUptodate(page);
2392 ClearPageChecked(page);
2393 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2394 SetPageUptodate(page);
2395
2396 if (!PageUptodate(page)) {
2397 char *page_data;
2398 unsigned offset = pos & (PAGE_SIZE - 1);
2399 unsigned int xid;
2400
2401 xid = get_xid();
2402 /* this is probably better than directly calling
2403 partialpage_write since in this function the file handle is
2404 known which we might as well leverage */
2405 /* BB check if anything else missing out of ppw
2406 such as updating last write time */
2407 page_data = kmap(page);
2408 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2409 /* if (rc < 0) should we set writebehind rc? */
2410 kunmap(page);
2411
2412 free_xid(xid);
2413 } else {
2414 rc = copied;
2415 pos += copied;
2416 set_page_dirty(page);
2417 }
2418
2419 if (rc > 0) {
2420 spin_lock(&inode->i_lock);
2421 if (pos > inode->i_size)
2422 i_size_write(inode, pos);
2423 spin_unlock(&inode->i_lock);
2424 }
2425
2426 unlock_page(page);
2427 put_page(page);
2428
2429 return rc;
2430}
2431
2432int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2433 int datasync)
2434{
2435 unsigned int xid;
2436 int rc = 0;
2437 struct cifs_tcon *tcon;
2438 struct TCP_Server_Info *server;
2439 struct cifsFileInfo *smbfile = file->private_data;
2440 struct inode *inode = file_inode(file);
2441 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2442
2443 rc = file_write_and_wait_range(file, start, end);
2444 if (rc)
2445 return rc;
2446 inode_lock(inode);
2447
2448 xid = get_xid();
2449
2450 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2451 file, datasync);
2452
2453 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2454 rc = cifs_zap_mapping(inode);
2455 if (rc) {
2456 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2457 rc = 0; /* don't care about it in fsync */
2458 }
2459 }
2460
2461 tcon = tlink_tcon(smbfile->tlink);
2462 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2463 server = tcon->ses->server;
2464 if (server->ops->flush)
2465 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2466 else
2467 rc = -ENOSYS;
2468 }
2469
2470 free_xid(xid);
2471 inode_unlock(inode);
2472 return rc;
2473}
2474
2475int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2476{
2477 unsigned int xid;
2478 int rc = 0;
2479 struct cifs_tcon *tcon;
2480 struct TCP_Server_Info *server;
2481 struct cifsFileInfo *smbfile = file->private_data;
2482 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2483 struct inode *inode = file->f_mapping->host;
2484
2485 rc = file_write_and_wait_range(file, start, end);
2486 if (rc)
2487 return rc;
2488 inode_lock(inode);
2489
2490 xid = get_xid();
2491
2492 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2493 file, datasync);
2494
2495 tcon = tlink_tcon(smbfile->tlink);
2496 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2497 server = tcon->ses->server;
2498 if (server->ops->flush)
2499 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2500 else
2501 rc = -ENOSYS;
2502 }
2503
2504 free_xid(xid);
2505 inode_unlock(inode);
2506 return rc;
2507}
2508
2509/*
2510 * As file closes, flush all cached write data for this inode checking
2511 * for write behind errors.
2512 */
2513int cifs_flush(struct file *file, fl_owner_t id)
2514{
2515 struct inode *inode = file_inode(file);
2516 int rc = 0;
2517
2518 if (file->f_mode & FMODE_WRITE)
2519 rc = filemap_write_and_wait(inode->i_mapping);
2520
2521 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2522
2523 return rc;
2524}
2525
2526static int
2527cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2528{
2529 int rc = 0;
2530 unsigned long i;
2531
2532 for (i = 0; i < num_pages; i++) {
2533 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2534 if (!pages[i]) {
2535 /*
2536 * save number of pages we have already allocated and
2537 * return with ENOMEM error
2538 */
2539 num_pages = i;
2540 rc = -ENOMEM;
2541 break;
2542 }
2543 }
2544
2545 if (rc) {
2546 for (i = 0; i < num_pages; i++)
2547 put_page(pages[i]);
2548 }
2549 return rc;
2550}
2551
2552static inline
2553size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2554{
2555 size_t num_pages;
2556 size_t clen;
2557
2558 clen = min_t(const size_t, len, wsize);
2559 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2560
2561 if (cur_len)
2562 *cur_len = clen;
2563
2564 return num_pages;
2565}
2566
2567static void
2568cifs_uncached_writedata_release(struct kref *refcount)
2569{
2570 int i;
2571 struct cifs_writedata *wdata = container_of(refcount,
2572 struct cifs_writedata, refcount);
2573
2574 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2575 for (i = 0; i < wdata->nr_pages; i++)
2576 put_page(wdata->pages[i]);
2577 cifs_writedata_release(refcount);
2578}
2579
2580static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2581
2582static void
2583cifs_uncached_writev_complete(struct work_struct *work)
2584{
2585 struct cifs_writedata *wdata = container_of(work,
2586 struct cifs_writedata, work);
2587 struct inode *inode = d_inode(wdata->cfile->dentry);
2588 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2589
2590 spin_lock(&inode->i_lock);
2591 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2592 if (cifsi->server_eof > inode->i_size)
2593 i_size_write(inode, cifsi->server_eof);
2594 spin_unlock(&inode->i_lock);
2595
2596 complete(&wdata->done);
2597 collect_uncached_write_data(wdata->ctx);
2598 /* the below call can possibly free the last ref to aio ctx */
2599 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2600}
2601
2602static int
2603wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2604 size_t *len, unsigned long *num_pages)
2605{
2606 size_t save_len, copied, bytes, cur_len = *len;
2607 unsigned long i, nr_pages = *num_pages;
2608
2609 save_len = cur_len;
2610 for (i = 0; i < nr_pages; i++) {
2611 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2612 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2613 cur_len -= copied;
2614 /*
2615 * If we didn't copy as much as we expected, then that
2616 * may mean we trod into an unmapped area. Stop copying
2617 * at that point. On the next pass through the big
2618 * loop, we'll likely end up getting a zero-length
2619 * write and bailing out of it.
2620 */
2621 if (copied < bytes)
2622 break;
2623 }
2624 cur_len = save_len - cur_len;
2625 *len = cur_len;
2626
2627 /*
2628 * If we have no data to send, then that probably means that
2629 * the copy above failed altogether. That's most likely because
2630 * the address in the iovec was bogus. Return -EFAULT and let
2631 * the caller free anything we allocated and bail out.
2632 */
2633 if (!cur_len)
2634 return -EFAULT;
2635
2636 /*
2637 * i + 1 now represents the number of pages we actually used in
2638 * the copy phase above.
2639 */
2640 *num_pages = i + 1;
2641 return 0;
2642}
2643
2644static int
2645cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2646 struct cifs_aio_ctx *ctx)
2647{
2648 unsigned int wsize;
2649 struct cifs_credits credits;
2650 int rc;
2651 struct TCP_Server_Info *server =
2652 tlink_tcon(wdata->cfile->tlink)->ses->server;
2653
2654 do {
2655 if (wdata->cfile->invalidHandle) {
2656 rc = cifs_reopen_file(wdata->cfile, false);
2657 if (rc == -EAGAIN)
2658 continue;
2659 else if (rc)
2660 break;
2661 }
2662
2663
2664 /*
2665 * Wait for credits to resend this wdata.
2666 * Note: we are attempting to resend the whole wdata not in
2667 * segments
2668 */
2669 do {
2670 rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2671 &wsize, &credits);
2672 if (rc)
2673 goto fail;
2674
2675 if (wsize < wdata->bytes) {
2676 add_credits_and_wake_if(server, &credits, 0);
2677 msleep(1000);
2678 }
2679 } while (wsize < wdata->bytes);
2680 wdata->credits = credits;
2681
2682 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2683
2684 if (!rc) {
2685 if (wdata->cfile->invalidHandle)
2686 rc = -EAGAIN;
2687 else
2688 rc = server->ops->async_writev(wdata,
2689 cifs_uncached_writedata_release);
2690 }
2691
2692 /* If the write was successfully sent, we are done */
2693 if (!rc) {
2694 list_add_tail(&wdata->list, wdata_list);
2695 return 0;
2696 }
2697
2698 /* Roll back credits and retry if needed */
2699 add_credits_and_wake_if(server, &wdata->credits, 0);
2700 } while (rc == -EAGAIN);
2701
2702fail:
2703 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2704 return rc;
2705}
2706
2707static int
2708cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2709 struct cifsFileInfo *open_file,
2710 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2711 struct cifs_aio_ctx *ctx)
2712{
2713 int rc = 0;
2714 size_t cur_len;
2715 unsigned long nr_pages, num_pages, i;
2716 struct cifs_writedata *wdata;
2717 struct iov_iter saved_from = *from;
2718 loff_t saved_offset = offset;
2719 pid_t pid;
2720 struct TCP_Server_Info *server;
2721 struct page **pagevec;
2722 size_t start;
2723 unsigned int xid;
2724
2725 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2726 pid = open_file->pid;
2727 else
2728 pid = current->tgid;
2729
2730 server = tlink_tcon(open_file->tlink)->ses->server;
2731 xid = get_xid();
2732
2733 do {
2734 unsigned int wsize;
2735 struct cifs_credits credits_on_stack;
2736 struct cifs_credits *credits = &credits_on_stack;
2737
2738 if (open_file->invalidHandle) {
2739 rc = cifs_reopen_file(open_file, false);
2740 if (rc == -EAGAIN)
2741 continue;
2742 else if (rc)
2743 break;
2744 }
2745
2746 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2747 &wsize, credits);
2748 if (rc)
2749 break;
2750
2751 cur_len = min_t(const size_t, len, wsize);
2752
2753 if (ctx->direct_io) {
2754 ssize_t result;
2755
2756 result = iov_iter_get_pages_alloc(
2757 from, &pagevec, cur_len, &start);
2758 if (result < 0) {
2759 cifs_dbg(VFS,
2760 "direct_writev couldn't get user pages "
2761 "(rc=%zd) iter type %d iov_offset %zd "
2762 "count %zd\n",
2763 result, from->type,
2764 from->iov_offset, from->count);
2765 dump_stack();
2766
2767 rc = result;
2768 add_credits_and_wake_if(server, credits, 0);
2769 break;
2770 }
2771 cur_len = (size_t)result;
2772 iov_iter_advance(from, cur_len);
2773
2774 nr_pages =
2775 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2776
2777 wdata = cifs_writedata_direct_alloc(pagevec,
2778 cifs_uncached_writev_complete);
2779 if (!wdata) {
2780 rc = -ENOMEM;
2781 add_credits_and_wake_if(server, credits, 0);
2782 break;
2783 }
2784
2785
2786 wdata->page_offset = start;
2787 wdata->tailsz =
2788 nr_pages > 1 ?
2789 cur_len - (PAGE_SIZE - start) -
2790 (nr_pages - 2) * PAGE_SIZE :
2791 cur_len;
2792 } else {
2793 nr_pages = get_numpages(wsize, len, &cur_len);
2794 wdata = cifs_writedata_alloc(nr_pages,
2795 cifs_uncached_writev_complete);
2796 if (!wdata) {
2797 rc = -ENOMEM;
2798 add_credits_and_wake_if(server, credits, 0);
2799 break;
2800 }
2801
2802 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2803 if (rc) {
2804 kvfree(wdata->pages);
2805 kfree(wdata);
2806 add_credits_and_wake_if(server, credits, 0);
2807 break;
2808 }
2809
2810 num_pages = nr_pages;
2811 rc = wdata_fill_from_iovec(
2812 wdata, from, &cur_len, &num_pages);
2813 if (rc) {
2814 for (i = 0; i < nr_pages; i++)
2815 put_page(wdata->pages[i]);
2816 kvfree(wdata->pages);
2817 kfree(wdata);
2818 add_credits_and_wake_if(server, credits, 0);
2819 break;
2820 }
2821
2822 /*
2823 * Bring nr_pages down to the number of pages we
2824 * actually used, and free any pages that we didn't use.
2825 */
2826 for ( ; nr_pages > num_pages; nr_pages--)
2827 put_page(wdata->pages[nr_pages - 1]);
2828
2829 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2830 }
2831
2832 wdata->sync_mode = WB_SYNC_ALL;
2833 wdata->nr_pages = nr_pages;
2834 wdata->offset = (__u64)offset;
2835 wdata->cfile = cifsFileInfo_get(open_file);
2836 wdata->pid = pid;
2837 wdata->bytes = cur_len;
2838 wdata->pagesz = PAGE_SIZE;
2839 wdata->credits = credits_on_stack;
2840 wdata->ctx = ctx;
2841 kref_get(&ctx->refcount);
2842
2843 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2844
2845 if (!rc) {
2846 if (wdata->cfile->invalidHandle)
2847 rc = -EAGAIN;
2848 else
2849 rc = server->ops->async_writev(wdata,
2850 cifs_uncached_writedata_release);
2851 }
2852
2853 if (rc) {
2854 add_credits_and_wake_if(server, &wdata->credits, 0);
2855 kref_put(&wdata->refcount,
2856 cifs_uncached_writedata_release);
2857 if (rc == -EAGAIN) {
2858 *from = saved_from;
2859 iov_iter_advance(from, offset - saved_offset);
2860 continue;
2861 }
2862 break;
2863 }
2864
2865 list_add_tail(&wdata->list, wdata_list);
2866 offset += cur_len;
2867 len -= cur_len;
2868 } while (len > 0);
2869
2870 free_xid(xid);
2871 return rc;
2872}
2873
2874static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2875{
2876 struct cifs_writedata *wdata, *tmp;
2877 struct cifs_tcon *tcon;
2878 struct cifs_sb_info *cifs_sb;
2879 struct dentry *dentry = ctx->cfile->dentry;
2880 int rc;
2881
2882 tcon = tlink_tcon(ctx->cfile->tlink);
2883 cifs_sb = CIFS_SB(dentry->d_sb);
2884
2885 mutex_lock(&ctx->aio_mutex);
2886
2887 if (list_empty(&ctx->list)) {
2888 mutex_unlock(&ctx->aio_mutex);
2889 return;
2890 }
2891
2892 rc = ctx->rc;
2893 /*
2894 * Wait for and collect replies for any successful sends in order of
2895 * increasing offset. Once an error is hit, then return without waiting
2896 * for any more replies.
2897 */
2898restart_loop:
2899 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2900 if (!rc) {
2901 if (!try_wait_for_completion(&wdata->done)) {
2902 mutex_unlock(&ctx->aio_mutex);
2903 return;
2904 }
2905
2906 if (wdata->result)
2907 rc = wdata->result;
2908 else
2909 ctx->total_len += wdata->bytes;
2910
2911 /* resend call if it's a retryable error */
2912 if (rc == -EAGAIN) {
2913 struct list_head tmp_list;
2914 struct iov_iter tmp_from = ctx->iter;
2915
2916 INIT_LIST_HEAD(&tmp_list);
2917 list_del_init(&wdata->list);
2918
2919 if (ctx->direct_io)
2920 rc = cifs_resend_wdata(
2921 wdata, &tmp_list, ctx);
2922 else {
2923 iov_iter_advance(&tmp_from,
2924 wdata->offset - ctx->pos);
2925
2926 rc = cifs_write_from_iter(wdata->offset,
2927 wdata->bytes, &tmp_from,
2928 ctx->cfile, cifs_sb, &tmp_list,
2929 ctx);
2930
2931 kref_put(&wdata->refcount,
2932 cifs_uncached_writedata_release);
2933 }
2934
2935 list_splice(&tmp_list, &ctx->list);
2936 goto restart_loop;
2937 }
2938 }
2939 list_del_init(&wdata->list);
2940 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2941 }
2942
2943 cifs_stats_bytes_written(tcon, ctx->total_len);
2944 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2945
2946 ctx->rc = (rc == 0) ? ctx->total_len : rc;
2947
2948 mutex_unlock(&ctx->aio_mutex);
2949
2950 if (ctx->iocb && ctx->iocb->ki_complete)
2951 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2952 else
2953 complete(&ctx->done);
2954}
2955
2956static ssize_t __cifs_writev(
2957 struct kiocb *iocb, struct iov_iter *from, bool direct)
2958{
2959 struct file *file = iocb->ki_filp;
2960 ssize_t total_written = 0;
2961 struct cifsFileInfo *cfile;
2962 struct cifs_tcon *tcon;
2963 struct cifs_sb_info *cifs_sb;
2964 struct cifs_aio_ctx *ctx;
2965 struct iov_iter saved_from = *from;
2966 size_t len = iov_iter_count(from);
2967 int rc;
2968
2969 /*
2970 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
2971 * In this case, fall back to non-direct write function.
2972 * this could be improved by getting pages directly in ITER_KVEC
2973 */
2974 if (direct && from->type & ITER_KVEC) {
2975 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
2976 direct = false;
2977 }
2978
2979 rc = generic_write_checks(iocb, from);
2980 if (rc <= 0)
2981 return rc;
2982
2983 cifs_sb = CIFS_FILE_SB(file);
2984 cfile = file->private_data;
2985 tcon = tlink_tcon(cfile->tlink);
2986
2987 if (!tcon->ses->server->ops->async_writev)
2988 return -ENOSYS;
2989
2990 ctx = cifs_aio_ctx_alloc();
2991 if (!ctx)
2992 return -ENOMEM;
2993
2994 ctx->cfile = cifsFileInfo_get(cfile);
2995
2996 if (!is_sync_kiocb(iocb))
2997 ctx->iocb = iocb;
2998
2999 ctx->pos = iocb->ki_pos;
3000
3001 if (direct) {
3002 ctx->direct_io = true;
3003 ctx->iter = *from;
3004 ctx->len = len;
3005 } else {
3006 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3007 if (rc) {
3008 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3009 return rc;
3010 }
3011 }
3012
3013 /* grab a lock here due to read response handlers can access ctx */
3014 mutex_lock(&ctx->aio_mutex);
3015
3016 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3017 cfile, cifs_sb, &ctx->list, ctx);
3018
3019 /*
3020 * If at least one write was successfully sent, then discard any rc
3021 * value from the later writes. If the other write succeeds, then
3022 * we'll end up returning whatever was written. If it fails, then
3023 * we'll get a new rc value from that.
3024 */
3025 if (!list_empty(&ctx->list))
3026 rc = 0;
3027
3028 mutex_unlock(&ctx->aio_mutex);
3029
3030 if (rc) {
3031 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3032 return rc;
3033 }
3034
3035 if (!is_sync_kiocb(iocb)) {
3036 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3037 return -EIOCBQUEUED;
3038 }
3039
3040 rc = wait_for_completion_killable(&ctx->done);
3041 if (rc) {
3042 mutex_lock(&ctx->aio_mutex);
3043 ctx->rc = rc = -EINTR;
3044 total_written = ctx->total_len;
3045 mutex_unlock(&ctx->aio_mutex);
3046 } else {
3047 rc = ctx->rc;
3048 total_written = ctx->total_len;
3049 }
3050
3051 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3052
3053 if (unlikely(!total_written))
3054 return rc;
3055
3056 iocb->ki_pos += total_written;
3057 return total_written;
3058}
3059
3060ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3061{
3062 return __cifs_writev(iocb, from, true);
3063}
3064
3065ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3066{
3067 return __cifs_writev(iocb, from, false);
3068}
3069
3070static ssize_t
3071cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3072{
3073 struct file *file = iocb->ki_filp;
3074 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3075 struct inode *inode = file->f_mapping->host;
3076 struct cifsInodeInfo *cinode = CIFS_I(inode);
3077 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3078 ssize_t rc;
3079
3080 inode_lock(inode);
3081 /*
3082 * We need to hold the sem to be sure nobody modifies lock list
3083 * with a brlock that prevents writing.
3084 */
3085 down_read(&cinode->lock_sem);
3086
3087 rc = generic_write_checks(iocb, from);
3088 if (rc <= 0)
3089 goto out;
3090
3091 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3092 server->vals->exclusive_lock_type, 0,
3093 NULL, CIFS_WRITE_OP))
3094 rc = __generic_file_write_iter(iocb, from);
3095 else
3096 rc = -EACCES;
3097out:
3098 up_read(&cinode->lock_sem);
3099 inode_unlock(inode);
3100
3101 if (rc > 0)
3102 rc = generic_write_sync(iocb, rc);
3103 return rc;
3104}
3105
3106ssize_t
3107cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3108{
3109 struct inode *inode = file_inode(iocb->ki_filp);
3110 struct cifsInodeInfo *cinode = CIFS_I(inode);
3111 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3112 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3113 iocb->ki_filp->private_data;
3114 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3115 ssize_t written;
3116
3117 written = cifs_get_writer(cinode);
3118 if (written)
3119 return written;
3120
3121 if (CIFS_CACHE_WRITE(cinode)) {
3122 if (cap_unix(tcon->ses) &&
3123 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3124 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3125 written = generic_file_write_iter(iocb, from);
3126 goto out;
3127 }
3128 written = cifs_writev(iocb, from);
3129 goto out;
3130 }
3131 /*
3132 * For non-oplocked files in strict cache mode we need to write the data
3133 * to the server exactly from the pos to pos+len-1 rather than flush all
3134 * affected pages because it may cause a error with mandatory locks on
3135 * these pages but not on the region from pos to ppos+len-1.
3136 */
3137 written = cifs_user_writev(iocb, from);
3138 if (CIFS_CACHE_READ(cinode)) {
3139 /*
3140 * We have read level caching and we have just sent a write
3141 * request to the server thus making data in the cache stale.
3142 * Zap the cache and set oplock/lease level to NONE to avoid
3143 * reading stale data from the cache. All subsequent read
3144 * operations will read new data from the server.
3145 */
3146 cifs_zap_mapping(inode);
3147 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3148 inode);
3149 cinode->oplock = 0;
3150 }
3151out:
3152 cifs_put_writer(cinode);
3153 return written;
3154}
3155
3156static struct cifs_readdata *
3157cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3158{
3159 struct cifs_readdata *rdata;
3160
3161 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3162 if (rdata != NULL) {
3163 rdata->pages = pages;
3164 kref_init(&rdata->refcount);
3165 INIT_LIST_HEAD(&rdata->list);
3166 init_completion(&rdata->done);
3167 INIT_WORK(&rdata->work, complete);
3168 }
3169
3170 return rdata;
3171}
3172
3173static struct cifs_readdata *
3174cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3175{
3176 struct page **pages =
3177 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3178 struct cifs_readdata *ret = NULL;
3179
3180 if (pages) {
3181 ret = cifs_readdata_direct_alloc(pages, complete);
3182 if (!ret)
3183 kfree(pages);
3184 }
3185
3186 return ret;
3187}
3188
3189void
3190cifs_readdata_release(struct kref *refcount)
3191{
3192 struct cifs_readdata *rdata = container_of(refcount,
3193 struct cifs_readdata, refcount);
3194#ifdef CONFIG_CIFS_SMB_DIRECT
3195 if (rdata->mr) {
3196 smbd_deregister_mr(rdata->mr);
3197 rdata->mr = NULL;
3198 }
3199#endif
3200 if (rdata->cfile)
3201 cifsFileInfo_put(rdata->cfile);
3202
3203 kvfree(rdata->pages);
3204 kfree(rdata);
3205}
3206
3207static int
3208cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3209{
3210 int rc = 0;
3211 struct page *page;
3212 unsigned int i;
3213
3214 for (i = 0; i < nr_pages; i++) {
3215 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3216 if (!page) {
3217 rc = -ENOMEM;
3218 break;
3219 }
3220 rdata->pages[i] = page;
3221 }
3222
3223 if (rc) {
3224 for (i = 0; i < nr_pages; i++) {
3225 put_page(rdata->pages[i]);
3226 rdata->pages[i] = NULL;
3227 }
3228 }
3229 return rc;
3230}
3231
3232static void
3233cifs_uncached_readdata_release(struct kref *refcount)
3234{
3235 struct cifs_readdata *rdata = container_of(refcount,
3236 struct cifs_readdata, refcount);
3237 unsigned int i;
3238
3239 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3240 for (i = 0; i < rdata->nr_pages; i++) {
3241 put_page(rdata->pages[i]);
3242 }
3243 cifs_readdata_release(refcount);
3244}
3245
3246/**
3247 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3248 * @rdata: the readdata response with list of pages holding data
3249 * @iter: destination for our data
3250 *
3251 * This function copies data from a list of pages in a readdata response into
3252 * an array of iovecs. It will first calculate where the data should go
3253 * based on the info in the readdata and then copy the data into that spot.
3254 */
3255static int
3256cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3257{
3258 size_t remaining = rdata->got_bytes;
3259 unsigned int i;
3260
3261 for (i = 0; i < rdata->nr_pages; i++) {
3262 struct page *page = rdata->pages[i];
3263 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3264 size_t written;
3265
3266 if (unlikely(iov_iter_is_pipe(iter))) {
3267 void *addr = kmap_atomic(page);
3268
3269 written = copy_to_iter(addr, copy, iter);
3270 kunmap_atomic(addr);
3271 } else
3272 written = copy_page_to_iter(page, 0, copy, iter);
3273 remaining -= written;
3274 if (written < copy && iov_iter_count(iter) > 0)
3275 break;
3276 }
3277 return remaining ? -EFAULT : 0;
3278}
3279
3280static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3281
3282static void
3283cifs_uncached_readv_complete(struct work_struct *work)
3284{
3285 struct cifs_readdata *rdata = container_of(work,
3286 struct cifs_readdata, work);
3287
3288 complete(&rdata->done);
3289 collect_uncached_read_data(rdata->ctx);
3290 /* the below call can possibly free the last ref to aio ctx */
3291 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3292}
3293
3294static int
3295uncached_fill_pages(struct TCP_Server_Info *server,
3296 struct cifs_readdata *rdata, struct iov_iter *iter,
3297 unsigned int len)
3298{
3299 int result = 0;
3300 unsigned int i;
3301 unsigned int nr_pages = rdata->nr_pages;
3302 unsigned int page_offset = rdata->page_offset;
3303
3304 rdata->got_bytes = 0;
3305 rdata->tailsz = PAGE_SIZE;
3306 for (i = 0; i < nr_pages; i++) {
3307 struct page *page = rdata->pages[i];
3308 size_t n;
3309 unsigned int segment_size = rdata->pagesz;
3310
3311 if (i == 0)
3312 segment_size -= page_offset;
3313 else
3314 page_offset = 0;
3315
3316
3317 if (len <= 0) {
3318 /* no need to hold page hostage */
3319 rdata->pages[i] = NULL;
3320 rdata->nr_pages--;
3321 put_page(page);
3322 continue;
3323 }
3324
3325 n = len;
3326 if (len >= segment_size)
3327 /* enough data to fill the page */
3328 n = segment_size;
3329 else
3330 rdata->tailsz = len;
3331 len -= n;
3332
3333 if (iter)
3334 result = copy_page_from_iter(
3335 page, page_offset, n, iter);
3336#ifdef CONFIG_CIFS_SMB_DIRECT
3337 else if (rdata->mr)
3338 result = n;
3339#endif
3340 else
3341 result = cifs_read_page_from_socket(
3342 server, page, page_offset, n);
3343 if (result < 0)
3344 break;
3345
3346 rdata->got_bytes += result;
3347 }
3348
3349 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3350 rdata->got_bytes : result;
3351}
3352
3353static int
3354cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3355 struct cifs_readdata *rdata, unsigned int len)
3356{
3357 return uncached_fill_pages(server, rdata, NULL, len);
3358}
3359
3360static int
3361cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3362 struct cifs_readdata *rdata,
3363 struct iov_iter *iter)
3364{
3365 return uncached_fill_pages(server, rdata, iter, iter->count);
3366}
3367
3368static int cifs_resend_rdata(struct cifs_readdata *rdata,
3369 struct list_head *rdata_list,
3370 struct cifs_aio_ctx *ctx)
3371{
3372 unsigned int rsize;
3373 struct cifs_credits credits;
3374 int rc;
3375 struct TCP_Server_Info *server =
3376 tlink_tcon(rdata->cfile->tlink)->ses->server;
3377
3378 do {
3379 if (rdata->cfile->invalidHandle) {
3380 rc = cifs_reopen_file(rdata->cfile, true);
3381 if (rc == -EAGAIN)
3382 continue;
3383 else if (rc)
3384 break;
3385 }
3386
3387 /*
3388 * Wait for credits to resend this rdata.
3389 * Note: we are attempting to resend the whole rdata not in
3390 * segments
3391 */
3392 do {
3393 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3394 &rsize, &credits);
3395
3396 if (rc)
3397 goto fail;
3398
3399 if (rsize < rdata->bytes) {
3400 add_credits_and_wake_if(server, &credits, 0);
3401 msleep(1000);
3402 }
3403 } while (rsize < rdata->bytes);
3404 rdata->credits = credits;
3405
3406 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3407 if (!rc) {
3408 if (rdata->cfile->invalidHandle)
3409 rc = -EAGAIN;
3410 else
3411 rc = server->ops->async_readv(rdata);
3412 }
3413
3414 /* If the read was successfully sent, we are done */
3415 if (!rc) {
3416 /* Add to aio pending list */
3417 list_add_tail(&rdata->list, rdata_list);
3418 return 0;
3419 }
3420
3421 /* Roll back credits and retry if needed */
3422 add_credits_and_wake_if(server, &rdata->credits, 0);
3423 } while (rc == -EAGAIN);
3424
3425fail:
3426 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3427 return rc;
3428}
3429
3430static int
3431cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3432 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3433 struct cifs_aio_ctx *ctx)
3434{
3435 struct cifs_readdata *rdata;
3436 unsigned int npages, rsize;
3437 struct cifs_credits credits_on_stack;
3438 struct cifs_credits *credits = &credits_on_stack;
3439 size_t cur_len;
3440 int rc;
3441 pid_t pid;
3442 struct TCP_Server_Info *server;
3443 struct page **pagevec;
3444 size_t start;
3445 struct iov_iter direct_iov = ctx->iter;
3446
3447 server = tlink_tcon(open_file->tlink)->ses->server;
3448
3449 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3450 pid = open_file->pid;
3451 else
3452 pid = current->tgid;
3453
3454 if (ctx->direct_io)
3455 iov_iter_advance(&direct_iov, offset - ctx->pos);
3456
3457 do {
3458 if (open_file->invalidHandle) {
3459 rc = cifs_reopen_file(open_file, true);
3460 if (rc == -EAGAIN)
3461 continue;
3462 else if (rc)
3463 break;
3464 }
3465
3466 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3467 &rsize, credits);
3468 if (rc)
3469 break;
3470
3471 cur_len = min_t(const size_t, len, rsize);
3472
3473 if (ctx->direct_io) {
3474 ssize_t result;
3475
3476 result = iov_iter_get_pages_alloc(
3477 &direct_iov, &pagevec,
3478 cur_len, &start);
3479 if (result < 0) {
3480 cifs_dbg(VFS,
3481 "couldn't get user pages (rc=%zd)"
3482 " iter type %d"
3483 " iov_offset %zd count %zd\n",
3484 result, direct_iov.type,
3485 direct_iov.iov_offset,
3486 direct_iov.count);
3487 dump_stack();
3488
3489 rc = result;
3490 add_credits_and_wake_if(server, credits, 0);
3491 break;
3492 }
3493 cur_len = (size_t)result;
3494 iov_iter_advance(&direct_iov, cur_len);
3495
3496 rdata = cifs_readdata_direct_alloc(
3497 pagevec, cifs_uncached_readv_complete);
3498 if (!rdata) {
3499 add_credits_and_wake_if(server, credits, 0);
3500 rc = -ENOMEM;
3501 break;
3502 }
3503
3504 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3505 rdata->page_offset = start;
3506 rdata->tailsz = npages > 1 ?
3507 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3508 cur_len;
3509
3510 } else {
3511
3512 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3513 /* allocate a readdata struct */
3514 rdata = cifs_readdata_alloc(npages,
3515 cifs_uncached_readv_complete);
3516 if (!rdata) {
3517 add_credits_and_wake_if(server, credits, 0);
3518 rc = -ENOMEM;
3519 break;
3520 }
3521
3522 rc = cifs_read_allocate_pages(rdata, npages);
3523 if (rc) {
3524 kvfree(rdata->pages);
3525 kfree(rdata);
3526 add_credits_and_wake_if(server, credits, 0);
3527 break;
3528 }
3529
3530 rdata->tailsz = PAGE_SIZE;
3531 }
3532
3533 rdata->cfile = cifsFileInfo_get(open_file);
3534 rdata->nr_pages = npages;
3535 rdata->offset = offset;
3536 rdata->bytes = cur_len;
3537 rdata->pid = pid;
3538 rdata->pagesz = PAGE_SIZE;
3539 rdata->read_into_pages = cifs_uncached_read_into_pages;
3540 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3541 rdata->credits = credits_on_stack;
3542 rdata->ctx = ctx;
3543 kref_get(&ctx->refcount);
3544
3545 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3546
3547 if (!rc) {
3548 if (rdata->cfile->invalidHandle)
3549 rc = -EAGAIN;
3550 else
3551 rc = server->ops->async_readv(rdata);
3552 }
3553
3554 if (rc) {
3555 add_credits_and_wake_if(server, &rdata->credits, 0);
3556 kref_put(&rdata->refcount,
3557 cifs_uncached_readdata_release);
3558 if (rc == -EAGAIN) {
3559 iov_iter_revert(&direct_iov, cur_len);
3560 continue;
3561 }
3562 break;
3563 }
3564
3565 list_add_tail(&rdata->list, rdata_list);
3566 offset += cur_len;
3567 len -= cur_len;
3568 } while (len > 0);
3569
3570 return rc;
3571}
3572
3573static void
3574collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3575{
3576 struct cifs_readdata *rdata, *tmp;
3577 struct iov_iter *to = &ctx->iter;
3578 struct cifs_sb_info *cifs_sb;
3579 struct cifs_tcon *tcon;
3580 int rc;
3581
3582 tcon = tlink_tcon(ctx->cfile->tlink);
3583 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3584
3585 mutex_lock(&ctx->aio_mutex);
3586
3587 if (list_empty(&ctx->list)) {
3588 mutex_unlock(&ctx->aio_mutex);
3589 return;
3590 }
3591
3592 rc = ctx->rc;
3593 /* the loop below should proceed in the order of increasing offsets */
3594again:
3595 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3596 if (!rc) {
3597 if (!try_wait_for_completion(&rdata->done)) {
3598 mutex_unlock(&ctx->aio_mutex);
3599 return;
3600 }
3601
3602 if (rdata->result == -EAGAIN) {
3603 /* resend call if it's a retryable error */
3604 struct list_head tmp_list;
3605 unsigned int got_bytes = rdata->got_bytes;
3606
3607 list_del_init(&rdata->list);
3608 INIT_LIST_HEAD(&tmp_list);
3609
3610 /*
3611 * Got a part of data and then reconnect has
3612 * happened -- fill the buffer and continue
3613 * reading.
3614 */
3615 if (got_bytes && got_bytes < rdata->bytes) {
3616 rc = 0;
3617 if (!ctx->direct_io)
3618 rc = cifs_readdata_to_iov(rdata, to);
3619 if (rc) {
3620 kref_put(&rdata->refcount,
3621 cifs_uncached_readdata_release);
3622 continue;
3623 }
3624 }
3625
3626 if (ctx->direct_io) {
3627 /*
3628 * Re-use rdata as this is a
3629 * direct I/O
3630 */
3631 rc = cifs_resend_rdata(
3632 rdata,
3633 &tmp_list, ctx);
3634 } else {
3635 rc = cifs_send_async_read(
3636 rdata->offset + got_bytes,
3637 rdata->bytes - got_bytes,
3638 rdata->cfile, cifs_sb,
3639 &tmp_list, ctx);
3640
3641 kref_put(&rdata->refcount,
3642 cifs_uncached_readdata_release);
3643 }
3644
3645 list_splice(&tmp_list, &ctx->list);
3646
3647 goto again;
3648 } else if (rdata->result)
3649 rc = rdata->result;
3650 else if (!ctx->direct_io)
3651 rc = cifs_readdata_to_iov(rdata, to);
3652
3653 /* if there was a short read -- discard anything left */
3654 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3655 rc = -ENODATA;
3656
3657 ctx->total_len += rdata->got_bytes;
3658 }
3659 list_del_init(&rdata->list);
3660 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3661 }
3662
3663 if (!ctx->direct_io)
3664 ctx->total_len = ctx->len - iov_iter_count(to);
3665
3666 /* mask nodata case */
3667 if (rc == -ENODATA)
3668 rc = 0;
3669
3670 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3671
3672 mutex_unlock(&ctx->aio_mutex);
3673
3674 if (ctx->iocb && ctx->iocb->ki_complete)
3675 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3676 else
3677 complete(&ctx->done);
3678}
3679
3680static ssize_t __cifs_readv(
3681 struct kiocb *iocb, struct iov_iter *to, bool direct)
3682{
3683 size_t len;
3684 struct file *file = iocb->ki_filp;
3685 struct cifs_sb_info *cifs_sb;
3686 struct cifsFileInfo *cfile;
3687 struct cifs_tcon *tcon;
3688 ssize_t rc, total_read = 0;
3689 loff_t offset = iocb->ki_pos;
3690 struct cifs_aio_ctx *ctx;
3691
3692 /*
3693 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3694 * fall back to data copy read path
3695 * this could be improved by getting pages directly in ITER_KVEC
3696 */
3697 if (direct && to->type & ITER_KVEC) {
3698 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3699 direct = false;
3700 }
3701
3702 len = iov_iter_count(to);
3703 if (!len)
3704 return 0;
3705
3706 cifs_sb = CIFS_FILE_SB(file);
3707 cfile = file->private_data;
3708 tcon = tlink_tcon(cfile->tlink);
3709
3710 if (!tcon->ses->server->ops->async_readv)
3711 return -ENOSYS;
3712
3713 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3714 cifs_dbg(FYI, "attempting read on write only file instance\n");
3715
3716 ctx = cifs_aio_ctx_alloc();
3717 if (!ctx)
3718 return -ENOMEM;
3719
3720 ctx->cfile = cifsFileInfo_get(cfile);
3721
3722 if (!is_sync_kiocb(iocb))
3723 ctx->iocb = iocb;
3724
3725 if (iter_is_iovec(to))
3726 ctx->should_dirty = true;
3727
3728 if (direct) {
3729 ctx->pos = offset;
3730 ctx->direct_io = true;
3731 ctx->iter = *to;
3732 ctx->len = len;
3733 } else {
3734 rc = setup_aio_ctx_iter(ctx, to, READ);
3735 if (rc) {
3736 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3737 return rc;
3738 }
3739 len = ctx->len;
3740 }
3741
3742 /* grab a lock here due to read response handlers can access ctx */
3743 mutex_lock(&ctx->aio_mutex);
3744
3745 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3746
3747 /* if at least one read request send succeeded, then reset rc */
3748 if (!list_empty(&ctx->list))
3749 rc = 0;
3750
3751 mutex_unlock(&ctx->aio_mutex);
3752
3753 if (rc) {
3754 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3755 return rc;
3756 }
3757
3758 if (!is_sync_kiocb(iocb)) {
3759 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3760 return -EIOCBQUEUED;
3761 }
3762
3763 rc = wait_for_completion_killable(&ctx->done);
3764 if (rc) {
3765 mutex_lock(&ctx->aio_mutex);
3766 ctx->rc = rc = -EINTR;
3767 total_read = ctx->total_len;
3768 mutex_unlock(&ctx->aio_mutex);
3769 } else {
3770 rc = ctx->rc;
3771 total_read = ctx->total_len;
3772 }
3773
3774 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3775
3776 if (total_read) {
3777 iocb->ki_pos += total_read;
3778 return total_read;
3779 }
3780 return rc;
3781}
3782
3783ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3784{
3785 return __cifs_readv(iocb, to, true);
3786}
3787
3788ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3789{
3790 return __cifs_readv(iocb, to, false);
3791}
3792
3793ssize_t
3794cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3795{
3796 struct inode *inode = file_inode(iocb->ki_filp);
3797 struct cifsInodeInfo *cinode = CIFS_I(inode);
3798 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3799 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3800 iocb->ki_filp->private_data;
3801 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3802 int rc = -EACCES;
3803
3804 /*
3805 * In strict cache mode we need to read from the server all the time
3806 * if we don't have level II oplock because the server can delay mtime
3807 * change - so we can't make a decision about inode invalidating.
3808 * And we can also fail with pagereading if there are mandatory locks
3809 * on pages affected by this read but not on the region from pos to
3810 * pos+len-1.
3811 */
3812 if (!CIFS_CACHE_READ(cinode))
3813 return cifs_user_readv(iocb, to);
3814
3815 if (cap_unix(tcon->ses) &&
3816 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3817 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3818 return generic_file_read_iter(iocb, to);
3819
3820 /*
3821 * We need to hold the sem to be sure nobody modifies lock list
3822 * with a brlock that prevents reading.
3823 */
3824 down_read(&cinode->lock_sem);
3825 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3826 tcon->ses->server->vals->shared_lock_type,
3827 0, NULL, CIFS_READ_OP))
3828 rc = generic_file_read_iter(iocb, to);
3829 up_read(&cinode->lock_sem);
3830 return rc;
3831}
3832
3833static ssize_t
3834cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3835{
3836 int rc = -EACCES;
3837 unsigned int bytes_read = 0;
3838 unsigned int total_read;
3839 unsigned int current_read_size;
3840 unsigned int rsize;
3841 struct cifs_sb_info *cifs_sb;
3842 struct cifs_tcon *tcon;
3843 struct TCP_Server_Info *server;
3844 unsigned int xid;
3845 char *cur_offset;
3846 struct cifsFileInfo *open_file;
3847 struct cifs_io_parms io_parms;
3848 int buf_type = CIFS_NO_BUFFER;
3849 __u32 pid;
3850
3851 xid = get_xid();
3852 cifs_sb = CIFS_FILE_SB(file);
3853
3854 /* FIXME: set up handlers for larger reads and/or convert to async */
3855 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3856
3857 if (file->private_data == NULL) {
3858 rc = -EBADF;
3859 free_xid(xid);
3860 return rc;
3861 }
3862 open_file = file->private_data;
3863 tcon = tlink_tcon(open_file->tlink);
3864 server = tcon->ses->server;
3865
3866 if (!server->ops->sync_read) {
3867 free_xid(xid);
3868 return -ENOSYS;
3869 }
3870
3871 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3872 pid = open_file->pid;
3873 else
3874 pid = current->tgid;
3875
3876 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3877 cifs_dbg(FYI, "attempting read on write only file instance\n");
3878
3879 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3880 total_read += bytes_read, cur_offset += bytes_read) {
3881 do {
3882 current_read_size = min_t(uint, read_size - total_read,
3883 rsize);
3884 /*
3885 * For windows me and 9x we do not want to request more
3886 * than it negotiated since it will refuse the read
3887 * then.
3888 */
3889 if ((tcon->ses) && !(tcon->ses->capabilities &
3890 tcon->ses->server->vals->cap_large_files)) {
3891 current_read_size = min_t(uint,
3892 current_read_size, CIFSMaxBufSize);
3893 }
3894 if (open_file->invalidHandle) {
3895 rc = cifs_reopen_file(open_file, true);
3896 if (rc != 0)
3897 break;
3898 }
3899 io_parms.pid = pid;
3900 io_parms.tcon = tcon;
3901 io_parms.offset = *offset;
3902 io_parms.length = current_read_size;
3903 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3904 &bytes_read, &cur_offset,
3905 &buf_type);
3906 } while (rc == -EAGAIN);
3907
3908 if (rc || (bytes_read == 0)) {
3909 if (total_read) {
3910 break;
3911 } else {
3912 free_xid(xid);
3913 return rc;
3914 }
3915 } else {
3916 cifs_stats_bytes_read(tcon, total_read);
3917 *offset += bytes_read;
3918 }
3919 }
3920 free_xid(xid);
3921 return total_read;
3922}
3923
3924/*
3925 * If the page is mmap'ed into a process' page tables, then we need to make
3926 * sure that it doesn't change while being written back.
3927 */
3928static vm_fault_t
3929cifs_page_mkwrite(struct vm_fault *vmf)
3930{
3931 struct page *page = vmf->page;
3932
3933 lock_page(page);
3934 return VM_FAULT_LOCKED;
3935}
3936
3937static const struct vm_operations_struct cifs_file_vm_ops = {
3938 .fault = filemap_fault,
3939 .map_pages = filemap_map_pages,
3940 .page_mkwrite = cifs_page_mkwrite,
3941};
3942
3943int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3944{
3945 int xid, rc = 0;
3946 struct inode *inode = file_inode(file);
3947
3948 xid = get_xid();
3949
3950 if (!CIFS_CACHE_READ(CIFS_I(inode)))
3951 rc = cifs_zap_mapping(inode);
3952 if (!rc)
3953 rc = generic_file_mmap(file, vma);
3954 if (!rc)
3955 vma->vm_ops = &cifs_file_vm_ops;
3956
3957 free_xid(xid);
3958 return rc;
3959}
3960
3961int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3962{
3963 int rc, xid;
3964
3965 xid = get_xid();
3966
3967 rc = cifs_revalidate_file(file);
3968 if (rc)
3969 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3970 rc);
3971 if (!rc)
3972 rc = generic_file_mmap(file, vma);
3973 if (!rc)
3974 vma->vm_ops = &cifs_file_vm_ops;
3975
3976 free_xid(xid);
3977 return rc;
3978}
3979
3980static void
3981cifs_readv_complete(struct work_struct *work)
3982{
3983 unsigned int i, got_bytes;
3984 struct cifs_readdata *rdata = container_of(work,
3985 struct cifs_readdata, work);
3986
3987 got_bytes = rdata->got_bytes;
3988 for (i = 0; i < rdata->nr_pages; i++) {
3989 struct page *page = rdata->pages[i];
3990
3991 lru_cache_add_file(page);
3992
3993 if (rdata->result == 0 ||
3994 (rdata->result == -EAGAIN && got_bytes)) {
3995 flush_dcache_page(page);
3996 SetPageUptodate(page);
3997 }
3998
3999 unlock_page(page);
4000
4001 if (rdata->result == 0 ||
4002 (rdata->result == -EAGAIN && got_bytes))
4003 cifs_readpage_to_fscache(rdata->mapping->host, page);
4004
4005 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4006
4007 put_page(page);
4008 rdata->pages[i] = NULL;
4009 }
4010 kref_put(&rdata->refcount, cifs_readdata_release);
4011}
4012
4013static int
4014readpages_fill_pages(struct TCP_Server_Info *server,
4015 struct cifs_readdata *rdata, struct iov_iter *iter,
4016 unsigned int len)
4017{
4018 int result = 0;
4019 unsigned int i;
4020 u64 eof;
4021 pgoff_t eof_index;
4022 unsigned int nr_pages = rdata->nr_pages;
4023 unsigned int page_offset = rdata->page_offset;
4024
4025 /* determine the eof that the server (probably) has */
4026 eof = CIFS_I(rdata->mapping->host)->server_eof;
4027 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4028 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4029
4030 rdata->got_bytes = 0;
4031 rdata->tailsz = PAGE_SIZE;
4032 for (i = 0; i < nr_pages; i++) {
4033 struct page *page = rdata->pages[i];
4034 unsigned int to_read = rdata->pagesz;
4035 size_t n;
4036
4037 if (i == 0)
4038 to_read -= page_offset;
4039 else
4040 page_offset = 0;
4041
4042 n = to_read;
4043
4044 if (len >= to_read) {
4045 len -= to_read;
4046 } else if (len > 0) {
4047 /* enough for partial page, fill and zero the rest */
4048 zero_user(page, len + page_offset, to_read - len);
4049 n = rdata->tailsz = len;
4050 len = 0;
4051 } else if (page->index > eof_index) {
4052 /*
4053 * The VFS will not try to do readahead past the
4054 * i_size, but it's possible that we have outstanding
4055 * writes with gaps in the middle and the i_size hasn't
4056 * caught up yet. Populate those with zeroed out pages
4057 * to prevent the VFS from repeatedly attempting to
4058 * fill them until the writes are flushed.
4059 */
4060 zero_user(page, 0, PAGE_SIZE);
4061 lru_cache_add_file(page);
4062 flush_dcache_page(page);
4063 SetPageUptodate(page);
4064 unlock_page(page);
4065 put_page(page);
4066 rdata->pages[i] = NULL;
4067 rdata->nr_pages--;
4068 continue;
4069 } else {
4070 /* no need to hold page hostage */
4071 lru_cache_add_file(page);
4072 unlock_page(page);
4073 put_page(page);
4074 rdata->pages[i] = NULL;
4075 rdata->nr_pages--;
4076 continue;
4077 }
4078
4079 if (iter)
4080 result = copy_page_from_iter(
4081 page, page_offset, n, iter);
4082#ifdef CONFIG_CIFS_SMB_DIRECT
4083 else if (rdata->mr)
4084 result = n;
4085#endif
4086 else
4087 result = cifs_read_page_from_socket(
4088 server, page, page_offset, n);
4089 if (result < 0)
4090 break;
4091
4092 rdata->got_bytes += result;
4093 }
4094
4095 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4096 rdata->got_bytes : result;
4097}
4098
4099static int
4100cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4101 struct cifs_readdata *rdata, unsigned int len)
4102{
4103 return readpages_fill_pages(server, rdata, NULL, len);
4104}
4105
4106static int
4107cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4108 struct cifs_readdata *rdata,
4109 struct iov_iter *iter)
4110{
4111 return readpages_fill_pages(server, rdata, iter, iter->count);
4112}
4113
4114static int
4115readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4116 unsigned int rsize, struct list_head *tmplist,
4117 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4118{
4119 struct page *page, *tpage;
4120 unsigned int expected_index;
4121 int rc;
4122 gfp_t gfp = readahead_gfp_mask(mapping);
4123
4124 INIT_LIST_HEAD(tmplist);
4125
4126 page = lru_to_page(page_list);
4127
4128 /*
4129 * Lock the page and put it in the cache. Since no one else
4130 * should have access to this page, we're safe to simply set
4131 * PG_locked without checking it first.
4132 */
4133 __SetPageLocked(page);
4134 rc = add_to_page_cache_locked(page, mapping,
4135 page->index, gfp);
4136
4137 /* give up if we can't stick it in the cache */
4138 if (rc) {
4139 __ClearPageLocked(page);
4140 return rc;
4141 }
4142
4143 /* move first page to the tmplist */
4144 *offset = (loff_t)page->index << PAGE_SHIFT;
4145 *bytes = PAGE_SIZE;
4146 *nr_pages = 1;
4147 list_move_tail(&page->lru, tmplist);
4148
4149 /* now try and add more pages onto the request */
4150 expected_index = page->index + 1;
4151 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4152 /* discontinuity ? */
4153 if (page->index != expected_index)
4154 break;
4155
4156 /* would this page push the read over the rsize? */
4157 if (*bytes + PAGE_SIZE > rsize)
4158 break;
4159
4160 __SetPageLocked(page);
4161 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4162 __ClearPageLocked(page);
4163 break;
4164 }
4165 list_move_tail(&page->lru, tmplist);
4166 (*bytes) += PAGE_SIZE;
4167 expected_index++;
4168 (*nr_pages)++;
4169 }
4170 return rc;
4171}
4172
4173static int cifs_readpages(struct file *file, struct address_space *mapping,
4174 struct list_head *page_list, unsigned num_pages)
4175{
4176 int rc;
4177 struct list_head tmplist;
4178 struct cifsFileInfo *open_file = file->private_data;
4179 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4180 struct TCP_Server_Info *server;
4181 pid_t pid;
4182 unsigned int xid;
4183
4184 xid = get_xid();
4185 /*
4186 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4187 * immediately if the cookie is negative
4188 *
4189 * After this point, every page in the list might have PG_fscache set,
4190 * so we will need to clean that up off of every page we don't use.
4191 */
4192 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4193 &num_pages);
4194 if (rc == 0) {
4195 free_xid(xid);
4196 return rc;
4197 }
4198
4199 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4200 pid = open_file->pid;
4201 else
4202 pid = current->tgid;
4203
4204 rc = 0;
4205 server = tlink_tcon(open_file->tlink)->ses->server;
4206
4207 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4208 __func__, file, mapping, num_pages);
4209
4210 /*
4211 * Start with the page at end of list and move it to private
4212 * list. Do the same with any following pages until we hit
4213 * the rsize limit, hit an index discontinuity, or run out of
4214 * pages. Issue the async read and then start the loop again
4215 * until the list is empty.
4216 *
4217 * Note that list order is important. The page_list is in
4218 * the order of declining indexes. When we put the pages in
4219 * the rdata->pages, then we want them in increasing order.
4220 */
4221 while (!list_empty(page_list)) {
4222 unsigned int i, nr_pages, bytes, rsize;
4223 loff_t offset;
4224 struct page *page, *tpage;
4225 struct cifs_readdata *rdata;
4226 struct cifs_credits credits_on_stack;
4227 struct cifs_credits *credits = &credits_on_stack;
4228
4229 if (open_file->invalidHandle) {
4230 rc = cifs_reopen_file(open_file, true);
4231 if (rc == -EAGAIN)
4232 continue;
4233 else if (rc)
4234 break;
4235 }
4236
4237 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4238 &rsize, credits);
4239 if (rc)
4240 break;
4241
4242 /*
4243 * Give up immediately if rsize is too small to read an entire
4244 * page. The VFS will fall back to readpage. We should never
4245 * reach this point however since we set ra_pages to 0 when the
4246 * rsize is smaller than a cache page.
4247 */
4248 if (unlikely(rsize < PAGE_SIZE)) {
4249 add_credits_and_wake_if(server, credits, 0);
4250 free_xid(xid);
4251 return 0;
4252 }
4253
4254 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4255 &nr_pages, &offset, &bytes);
4256 if (rc) {
4257 add_credits_and_wake_if(server, credits, 0);
4258 break;
4259 }
4260
4261 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4262 if (!rdata) {
4263 /* best to give up if we're out of mem */
4264 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4265 list_del(&page->lru);
4266 lru_cache_add_file(page);
4267 unlock_page(page);
4268 put_page(page);
4269 }
4270 rc = -ENOMEM;
4271 add_credits_and_wake_if(server, credits, 0);
4272 break;
4273 }
4274
4275 rdata->cfile = cifsFileInfo_get(open_file);
4276 rdata->mapping = mapping;
4277 rdata->offset = offset;
4278 rdata->bytes = bytes;
4279 rdata->pid = pid;
4280 rdata->pagesz = PAGE_SIZE;
4281 rdata->tailsz = PAGE_SIZE;
4282 rdata->read_into_pages = cifs_readpages_read_into_pages;
4283 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4284 rdata->credits = credits_on_stack;
4285
4286 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4287 list_del(&page->lru);
4288 rdata->pages[rdata->nr_pages++] = page;
4289 }
4290
4291 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4292
4293 if (!rc) {
4294 if (rdata->cfile->invalidHandle)
4295 rc = -EAGAIN;
4296 else
4297 rc = server->ops->async_readv(rdata);
4298 }
4299
4300 if (rc) {
4301 add_credits_and_wake_if(server, &rdata->credits, 0);
4302 for (i = 0; i < rdata->nr_pages; i++) {
4303 page = rdata->pages[i];
4304 lru_cache_add_file(page);
4305 unlock_page(page);
4306 put_page(page);
4307 }
4308 /* Fallback to the readpage in error/reconnect cases */
4309 kref_put(&rdata->refcount, cifs_readdata_release);
4310 break;
4311 }
4312
4313 kref_put(&rdata->refcount, cifs_readdata_release);
4314 }
4315
4316 /* Any pages that have been shown to fscache but didn't get added to
4317 * the pagecache must be uncached before they get returned to the
4318 * allocator.
4319 */
4320 cifs_fscache_readpages_cancel(mapping->host, page_list);
4321 free_xid(xid);
4322 return rc;
4323}
4324
4325/*
4326 * cifs_readpage_worker must be called with the page pinned
4327 */
4328static int cifs_readpage_worker(struct file *file, struct page *page,
4329 loff_t *poffset)
4330{
4331 char *read_data;
4332 int rc;
4333
4334 /* Is the page cached? */
4335 rc = cifs_readpage_from_fscache(file_inode(file), page);
4336 if (rc == 0)
4337 goto read_complete;
4338
4339 read_data = kmap(page);
4340 /* for reads over a certain size could initiate async read ahead */
4341
4342 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4343
4344 if (rc < 0)
4345 goto io_error;
4346 else
4347 cifs_dbg(FYI, "Bytes read %d\n", rc);
4348
4349 /* we do not want atime to be less than mtime, it broke some apps */
4350 file_inode(file)->i_atime = current_time(file_inode(file));
4351 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4352 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4353 else
4354 file_inode(file)->i_atime = current_time(file_inode(file));
4355
4356 if (PAGE_SIZE > rc)
4357 memset(read_data + rc, 0, PAGE_SIZE - rc);
4358
4359 flush_dcache_page(page);
4360 SetPageUptodate(page);
4361
4362 /* send this page to the cache */
4363 cifs_readpage_to_fscache(file_inode(file), page);
4364
4365 rc = 0;
4366
4367io_error:
4368 kunmap(page);
4369 unlock_page(page);
4370
4371read_complete:
4372 return rc;
4373}
4374
4375static int cifs_readpage(struct file *file, struct page *page)
4376{
4377 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4378 int rc = -EACCES;
4379 unsigned int xid;
4380
4381 xid = get_xid();
4382
4383 if (file->private_data == NULL) {
4384 rc = -EBADF;
4385 free_xid(xid);
4386 return rc;
4387 }
4388
4389 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4390 page, (int)offset, (int)offset);
4391
4392 rc = cifs_readpage_worker(file, page, &offset);
4393
4394 free_xid(xid);
4395 return rc;
4396}
4397
4398static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4399{
4400 struct cifsFileInfo *open_file;
4401 struct cifs_tcon *tcon =
4402 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
4403
4404 spin_lock(&tcon->open_file_lock);
4405 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4406 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4407 spin_unlock(&tcon->open_file_lock);
4408 return 1;
4409 }
4410 }
4411 spin_unlock(&tcon->open_file_lock);
4412 return 0;
4413}
4414
4415/* We do not want to update the file size from server for inodes
4416 open for write - to avoid races with writepage extending
4417 the file - in the future we could consider allowing
4418 refreshing the inode only on increases in the file size
4419 but this is tricky to do without racing with writebehind
4420 page caching in the current Linux kernel design */
4421bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4422{
4423 if (!cifsInode)
4424 return true;
4425
4426 if (is_inode_writable(cifsInode)) {
4427 /* This inode is open for write at least once */
4428 struct cifs_sb_info *cifs_sb;
4429
4430 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4431 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4432 /* since no page cache to corrupt on directio
4433 we can change size safely */
4434 return true;
4435 }
4436
4437 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4438 return true;
4439
4440 return false;
4441 } else
4442 return true;
4443}
4444
4445static int cifs_write_begin(struct file *file, struct address_space *mapping,
4446 loff_t pos, unsigned len, unsigned flags,
4447 struct page **pagep, void **fsdata)
4448{
4449 int oncethru = 0;
4450 pgoff_t index = pos >> PAGE_SHIFT;
4451 loff_t offset = pos & (PAGE_SIZE - 1);
4452 loff_t page_start = pos & PAGE_MASK;
4453 loff_t i_size;
4454 struct page *page;
4455 int rc = 0;
4456
4457 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4458
4459start:
4460 page = grab_cache_page_write_begin(mapping, index, flags);
4461 if (!page) {
4462 rc = -ENOMEM;
4463 goto out;
4464 }
4465
4466 if (PageUptodate(page))
4467 goto out;
4468
4469 /*
4470 * If we write a full page it will be up to date, no need to read from
4471 * the server. If the write is short, we'll end up doing a sync write
4472 * instead.
4473 */
4474 if (len == PAGE_SIZE)
4475 goto out;
4476
4477 /*
4478 * optimize away the read when we have an oplock, and we're not
4479 * expecting to use any of the data we'd be reading in. That
4480 * is, when the page lies beyond the EOF, or straddles the EOF
4481 * and the write will cover all of the existing data.
4482 */
4483 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4484 i_size = i_size_read(mapping->host);
4485 if (page_start >= i_size ||
4486 (offset == 0 && (pos + len) >= i_size)) {
4487 zero_user_segments(page, 0, offset,
4488 offset + len,
4489 PAGE_SIZE);
4490 /*
4491 * PageChecked means that the parts of the page
4492 * to which we're not writing are considered up
4493 * to date. Once the data is copied to the
4494 * page, it can be set uptodate.
4495 */
4496 SetPageChecked(page);
4497 goto out;
4498 }
4499 }
4500
4501 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4502 /*
4503 * might as well read a page, it is fast enough. If we get
4504 * an error, we don't need to return it. cifs_write_end will
4505 * do a sync write instead since PG_uptodate isn't set.
4506 */
4507 cifs_readpage_worker(file, page, &page_start);
4508 put_page(page);
4509 oncethru = 1;
4510 goto start;
4511 } else {
4512 /* we could try using another file handle if there is one -
4513 but how would we lock it to prevent close of that handle
4514 racing with this read? In any case
4515 this will be written out by write_end so is fine */
4516 }
4517out:
4518 *pagep = page;
4519 return rc;
4520}
4521
4522static int cifs_release_page(struct page *page, gfp_t gfp)
4523{
4524 if (PagePrivate(page))
4525 return 0;
4526
4527 return cifs_fscache_release_page(page, gfp);
4528}
4529
4530static void cifs_invalidate_page(struct page *page, unsigned int offset,
4531 unsigned int length)
4532{
4533 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4534
4535 if (offset == 0 && length == PAGE_SIZE)
4536 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4537}
4538
4539static int cifs_launder_page(struct page *page)
4540{
4541 int rc = 0;
4542 loff_t range_start = page_offset(page);
4543 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4544 struct writeback_control wbc = {
4545 .sync_mode = WB_SYNC_ALL,
4546 .nr_to_write = 0,
4547 .range_start = range_start,
4548 .range_end = range_end,
4549 };
4550
4551 cifs_dbg(FYI, "Launder page: %p\n", page);
4552
4553 if (clear_page_dirty_for_io(page))
4554 rc = cifs_writepage_locked(page, &wbc);
4555
4556 cifs_fscache_invalidate_page(page, page->mapping->host);
4557 return rc;
4558}
4559
4560void cifs_oplock_break(struct work_struct *work)
4561{
4562 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4563 oplock_break);
4564 struct inode *inode = d_inode(cfile->dentry);
4565 struct cifsInodeInfo *cinode = CIFS_I(inode);
4566 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4567 struct TCP_Server_Info *server = tcon->ses->server;
4568 int rc = 0;
4569
4570 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4571 TASK_UNINTERRUPTIBLE);
4572
4573 server->ops->downgrade_oplock(server, cinode,
4574 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4575
4576 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4577 cifs_has_mand_locks(cinode)) {
4578 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4579 inode);
4580 cinode->oplock = 0;
4581 }
4582
4583 if (inode && S_ISREG(inode->i_mode)) {
4584 if (CIFS_CACHE_READ(cinode))
4585 break_lease(inode, O_RDONLY);
4586 else
4587 break_lease(inode, O_WRONLY);
4588 rc = filemap_fdatawrite(inode->i_mapping);
4589 if (!CIFS_CACHE_READ(cinode)) {
4590 rc = filemap_fdatawait(inode->i_mapping);
4591 mapping_set_error(inode->i_mapping, rc);
4592 cifs_zap_mapping(inode);
4593 }
4594 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4595 }
4596
4597 rc = cifs_push_locks(cfile);
4598 if (rc)
4599 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4600
4601 /*
4602 * releasing stale oplock after recent reconnect of smb session using
4603 * a now incorrect file handle is not a data integrity issue but do
4604 * not bother sending an oplock release if session to server still is
4605 * disconnected since oplock already released by the server
4606 */
4607 if (!cfile->oplock_break_cancelled) {
4608 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4609 cinode);
4610 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4611 }
4612 _cifsFileInfo_put(cfile, false /* do not wait for ourself */);
4613 cifs_done_oplock_break(cinode);
4614}
4615
4616/*
4617 * The presence of cifs_direct_io() in the address space ops vector
4618 * allowes open() O_DIRECT flags which would have failed otherwise.
4619 *
4620 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4621 * so this method should never be called.
4622 *
4623 * Direct IO is not yet supported in the cached mode.
4624 */
4625static ssize_t
4626cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4627{
4628 /*
4629 * FIXME
4630 * Eventually need to support direct IO for non forcedirectio mounts
4631 */
4632 return -EINVAL;
4633}
4634
4635
4636const struct address_space_operations cifs_addr_ops = {
4637 .readpage = cifs_readpage,
4638 .readpages = cifs_readpages,
4639 .writepage = cifs_writepage,
4640 .writepages = cifs_writepages,
4641 .write_begin = cifs_write_begin,
4642 .write_end = cifs_write_end,
4643 .set_page_dirty = __set_page_dirty_nobuffers,
4644 .releasepage = cifs_release_page,
4645 .direct_IO = cifs_direct_io,
4646 .invalidatepage = cifs_invalidate_page,
4647 .launder_page = cifs_launder_page,
4648};
4649
4650/*
4651 * cifs_readpages requires the server to support a buffer large enough to
4652 * contain the header plus one complete page of data. Otherwise, we need
4653 * to leave cifs_readpages out of the address space operations.
4654 */
4655const struct address_space_operations cifs_addr_ops_smallbuf = {
4656 .readpage = cifs_readpage,
4657 .writepage = cifs_writepage,
4658 .writepages = cifs_writepages,
4659 .write_begin = cifs_write_begin,
4660 .write_end = cifs_write_end,
4661 .set_page_dirty = __set_page_dirty_nobuffers,
4662 .releasepage = cifs_release_page,
4663 .invalidatepage = cifs_invalidate_page,
4664 .launder_page = cifs_launder_page,
4665};