Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * fs/cifs/file.c
3 *
4 * vfs operations that deal with files
5 *
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
9 *
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24#include <linux/fs.h>
25#include <linux/backing-dev.h>
26#include <linux/stat.h>
27#include <linux/fcntl.h>
28#include <linux/pagemap.h>
29#include <linux/pagevec.h>
30#include <linux/writeback.h>
31#include <linux/task_io_accounting_ops.h>
32#include <linux/delay.h>
33#include <linux/mount.h>
34#include <linux/slab.h>
35#include <linux/swap.h>
36#include <asm/div64.h>
37#include "cifsfs.h"
38#include "cifspdu.h"
39#include "cifsglob.h"
40#include "cifsproto.h"
41#include "cifs_unicode.h"
42#include "cifs_debug.h"
43#include "cifs_fs_sb.h"
44#include "fscache.h"
45
46
47static inline int cifs_convert_flags(unsigned int flags)
48{
49 if ((flags & O_ACCMODE) == O_RDONLY)
50 return GENERIC_READ;
51 else if ((flags & O_ACCMODE) == O_WRONLY)
52 return GENERIC_WRITE;
53 else if ((flags & O_ACCMODE) == O_RDWR) {
54 /* GENERIC_ALL is too much permission to request
55 can cause unnecessary access denied on create */
56 /* return GENERIC_ALL; */
57 return (GENERIC_READ | GENERIC_WRITE);
58 }
59
60 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62 FILE_READ_DATA);
63}
64
65static u32 cifs_posix_convert_flags(unsigned int flags)
66{
67 u32 posix_flags = 0;
68
69 if ((flags & O_ACCMODE) == O_RDONLY)
70 posix_flags = SMB_O_RDONLY;
71 else if ((flags & O_ACCMODE) == O_WRONLY)
72 posix_flags = SMB_O_WRONLY;
73 else if ((flags & O_ACCMODE) == O_RDWR)
74 posix_flags = SMB_O_RDWR;
75
76 if (flags & O_CREAT) {
77 posix_flags |= SMB_O_CREAT;
78 if (flags & O_EXCL)
79 posix_flags |= SMB_O_EXCL;
80 } else if (flags & O_EXCL)
81 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82 current->comm, current->tgid);
83
84 if (flags & O_TRUNC)
85 posix_flags |= SMB_O_TRUNC;
86 /* be safe and imply O_SYNC for O_DSYNC */
87 if (flags & O_DSYNC)
88 posix_flags |= SMB_O_SYNC;
89 if (flags & O_DIRECTORY)
90 posix_flags |= SMB_O_DIRECTORY;
91 if (flags & O_NOFOLLOW)
92 posix_flags |= SMB_O_NOFOLLOW;
93 if (flags & O_DIRECT)
94 posix_flags |= SMB_O_DIRECT;
95
96 return posix_flags;
97}
98
99static inline int cifs_get_disposition(unsigned int flags)
100{
101 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102 return FILE_CREATE;
103 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104 return FILE_OVERWRITE_IF;
105 else if ((flags & O_CREAT) == O_CREAT)
106 return FILE_OPEN_IF;
107 else if ((flags & O_TRUNC) == O_TRUNC)
108 return FILE_OVERWRITE;
109 else
110 return FILE_OPEN;
111}
112
113int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct super_block *sb, int mode, unsigned int f_flags,
115 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116{
117 int rc;
118 FILE_UNIX_BASIC_INFO *presp_data;
119 __u32 posix_flags = 0;
120 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121 struct cifs_fattr fattr;
122 struct tcon_link *tlink;
123 struct cifs_tcon *tcon;
124
125 cifs_dbg(FYI, "posix open %s\n", full_path);
126
127 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128 if (presp_data == NULL)
129 return -ENOMEM;
130
131 tlink = cifs_sb_tlink(cifs_sb);
132 if (IS_ERR(tlink)) {
133 rc = PTR_ERR(tlink);
134 goto posix_open_ret;
135 }
136
137 tcon = tlink_tcon(tlink);
138 mode &= ~current_umask();
139
140 posix_flags = cifs_posix_convert_flags(f_flags);
141 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142 poplock, full_path, cifs_sb->local_nls,
143 cifs_sb->mnt_cifs_flags &
144 CIFS_MOUNT_MAP_SPECIAL_CHR);
145 cifs_put_tlink(tlink);
146
147 if (rc)
148 goto posix_open_ret;
149
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153 if (!pinode)
154 goto posix_open_ret; /* caller does not need info */
155
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
162 if (!*pinode) {
163 rc = -ENOMEM;
164 goto posix_open_ret;
165 }
166 } else {
167 cifs_fattr_to_inode(*pinode, &fattr);
168 }
169
170posix_open_ret:
171 kfree(presp_data);
172 return rc;
173}
174
175static int
176cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
179{
180 int rc;
181 int desired_access;
182 int disposition;
183 int create_options = CREATE_NOT_DIR;
184 FILE_ALL_INFO *buf;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
187
188 if (!server->ops->open)
189 return -ENOSYS;
190
191 desired_access = cifs_convert_flags(f_flags);
192
193/*********************************************************************
194 * open flag mapping table:
195 *
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
203 *
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
209 *?
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
216
217 disposition = cifs_get_disposition(f_flags);
218
219 /* BB pass O_SYNC flag through on file attributes .. BB */
220
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222 if (!buf)
223 return -ENOMEM;
224
225 if (backup_cred(cifs_sb))
226 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228 oparms.tcon = tcon;
229 oparms.cifs_sb = cifs_sb;
230 oparms.desired_access = desired_access;
231 oparms.create_options = create_options;
232 oparms.disposition = disposition;
233 oparms.path = full_path;
234 oparms.fid = fid;
235 oparms.reconnect = false;
236
237 rc = server->ops->open(xid, &oparms, oplock, buf);
238
239 if (rc)
240 goto out;
241
242 if (tcon->unix_ext)
243 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
244 xid);
245 else
246 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
247 xid, fid);
248
249out:
250 kfree(buf);
251 return rc;
252}
253
254static bool
255cifs_has_mand_locks(struct cifsInodeInfo *cinode)
256{
257 struct cifs_fid_locks *cur;
258 bool has_locks = false;
259
260 down_read(&cinode->lock_sem);
261 list_for_each_entry(cur, &cinode->llist, llist) {
262 if (!list_empty(&cur->locks)) {
263 has_locks = true;
264 break;
265 }
266 }
267 up_read(&cinode->lock_sem);
268 return has_locks;
269}
270
271struct cifsFileInfo *
272cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
273 struct tcon_link *tlink, __u32 oplock)
274{
275 struct dentry *dentry = file->f_path.dentry;
276 struct inode *inode = dentry->d_inode;
277 struct cifsInodeInfo *cinode = CIFS_I(inode);
278 struct cifsFileInfo *cfile;
279 struct cifs_fid_locks *fdlocks;
280 struct cifs_tcon *tcon = tlink_tcon(tlink);
281 struct TCP_Server_Info *server = tcon->ses->server;
282
283 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
284 if (cfile == NULL)
285 return cfile;
286
287 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
288 if (!fdlocks) {
289 kfree(cfile);
290 return NULL;
291 }
292
293 INIT_LIST_HEAD(&fdlocks->locks);
294 fdlocks->cfile = cfile;
295 cfile->llist = fdlocks;
296 down_write(&cinode->lock_sem);
297 list_add(&fdlocks->llist, &cinode->llist);
298 up_write(&cinode->lock_sem);
299
300 cfile->count = 1;
301 cfile->pid = current->tgid;
302 cfile->uid = current_fsuid();
303 cfile->dentry = dget(dentry);
304 cfile->f_flags = file->f_flags;
305 cfile->invalidHandle = false;
306 cfile->tlink = cifs_get_tlink(tlink);
307 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
308 mutex_init(&cfile->fh_mutex);
309
310 cifs_sb_active(inode->i_sb);
311
312 /*
313 * If the server returned a read oplock and we have mandatory brlocks,
314 * set oplock level to None.
315 */
316 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
317 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
318 oplock = 0;
319 }
320
321 spin_lock(&cifs_file_list_lock);
322 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
323 oplock = fid->pending_open->oplock;
324 list_del(&fid->pending_open->olist);
325
326 fid->purge_cache = false;
327 server->ops->set_fid(cfile, fid, oplock);
328
329 list_add(&cfile->tlist, &tcon->openFileList);
330 /* if readable file instance put first in list*/
331 if (file->f_mode & FMODE_READ)
332 list_add(&cfile->flist, &cinode->openFileList);
333 else
334 list_add_tail(&cfile->flist, &cinode->openFileList);
335 spin_unlock(&cifs_file_list_lock);
336
337 if (fid->purge_cache)
338 cifs_zap_mapping(inode);
339
340 file->private_data = cfile;
341 return cfile;
342}
343
344struct cifsFileInfo *
345cifsFileInfo_get(struct cifsFileInfo *cifs_file)
346{
347 spin_lock(&cifs_file_list_lock);
348 cifsFileInfo_get_locked(cifs_file);
349 spin_unlock(&cifs_file_list_lock);
350 return cifs_file;
351}
352
353/*
354 * Release a reference on the file private data. This may involve closing
355 * the filehandle out on the server. Must be called without holding
356 * cifs_file_list_lock.
357 */
358void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
359{
360 struct inode *inode = cifs_file->dentry->d_inode;
361 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
362 struct TCP_Server_Info *server = tcon->ses->server;
363 struct cifsInodeInfo *cifsi = CIFS_I(inode);
364 struct super_block *sb = inode->i_sb;
365 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
366 struct cifsLockInfo *li, *tmp;
367 struct cifs_fid fid;
368 struct cifs_pending_open open;
369 bool oplock_break_cancelled;
370
371 spin_lock(&cifs_file_list_lock);
372 if (--cifs_file->count > 0) {
373 spin_unlock(&cifs_file_list_lock);
374 return;
375 }
376
377 if (server->ops->get_lease_key)
378 server->ops->get_lease_key(inode, &fid);
379
380 /* store open in pending opens to make sure we don't miss lease break */
381 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
382
383 /* remove it from the lists */
384 list_del(&cifs_file->flist);
385 list_del(&cifs_file->tlist);
386
387 if (list_empty(&cifsi->openFileList)) {
388 cifs_dbg(FYI, "closing last open instance for inode %p\n",
389 cifs_file->dentry->d_inode);
390 /*
391 * In strict cache mode we need invalidate mapping on the last
392 * close because it may cause a error when we open this file
393 * again and get at least level II oplock.
394 */
395 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
396 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
397 cifs_set_oplock_level(cifsi, 0);
398 }
399 spin_unlock(&cifs_file_list_lock);
400
401 oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
402
403 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
404 struct TCP_Server_Info *server = tcon->ses->server;
405 unsigned int xid;
406
407 xid = get_xid();
408 if (server->ops->close)
409 server->ops->close(xid, tcon, &cifs_file->fid);
410 _free_xid(xid);
411 }
412
413 if (oplock_break_cancelled)
414 cifs_done_oplock_break(cifsi);
415
416 cifs_del_pending_open(&open);
417
418 /*
419 * Delete any outstanding lock records. We'll lose them when the file
420 * is closed anyway.
421 */
422 down_write(&cifsi->lock_sem);
423 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
424 list_del(&li->llist);
425 cifs_del_lock_waiters(li);
426 kfree(li);
427 }
428 list_del(&cifs_file->llist->llist);
429 kfree(cifs_file->llist);
430 up_write(&cifsi->lock_sem);
431
432 cifs_put_tlink(cifs_file->tlink);
433 dput(cifs_file->dentry);
434 cifs_sb_deactive(sb);
435 kfree(cifs_file);
436}
437
438int cifs_open(struct inode *inode, struct file *file)
439
440{
441 int rc = -EACCES;
442 unsigned int xid;
443 __u32 oplock;
444 struct cifs_sb_info *cifs_sb;
445 struct TCP_Server_Info *server;
446 struct cifs_tcon *tcon;
447 struct tcon_link *tlink;
448 struct cifsFileInfo *cfile = NULL;
449 char *full_path = NULL;
450 bool posix_open_ok = false;
451 struct cifs_fid fid;
452 struct cifs_pending_open open;
453
454 xid = get_xid();
455
456 cifs_sb = CIFS_SB(inode->i_sb);
457 tlink = cifs_sb_tlink(cifs_sb);
458 if (IS_ERR(tlink)) {
459 free_xid(xid);
460 return PTR_ERR(tlink);
461 }
462 tcon = tlink_tcon(tlink);
463 server = tcon->ses->server;
464
465 full_path = build_path_from_dentry(file->f_path.dentry);
466 if (full_path == NULL) {
467 rc = -ENOMEM;
468 goto out;
469 }
470
471 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
472 inode, file->f_flags, full_path);
473
474 if (file->f_flags & O_DIRECT &&
475 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
476 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
477 file->f_op = &cifs_file_direct_nobrl_ops;
478 else
479 file->f_op = &cifs_file_direct_ops;
480 }
481
482 if (server->oplocks)
483 oplock = REQ_OPLOCK;
484 else
485 oplock = 0;
486
487 if (!tcon->broken_posix_open && tcon->unix_ext &&
488 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
489 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
490 /* can not refresh inode info since size could be stale */
491 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
492 cifs_sb->mnt_file_mode /* ignored */,
493 file->f_flags, &oplock, &fid.netfid, xid);
494 if (rc == 0) {
495 cifs_dbg(FYI, "posix open succeeded\n");
496 posix_open_ok = true;
497 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
498 if (tcon->ses->serverNOS)
499 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
500 tcon->ses->serverName,
501 tcon->ses->serverNOS);
502 tcon->broken_posix_open = true;
503 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
504 (rc != -EOPNOTSUPP)) /* path not found or net err */
505 goto out;
506 /*
507 * Else fallthrough to retry open the old way on network i/o
508 * or DFS errors.
509 */
510 }
511
512 if (server->ops->get_lease_key)
513 server->ops->get_lease_key(inode, &fid);
514
515 cifs_add_pending_open(&fid, tlink, &open);
516
517 if (!posix_open_ok) {
518 if (server->ops->get_lease_key)
519 server->ops->get_lease_key(inode, &fid);
520
521 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
522 file->f_flags, &oplock, &fid, xid);
523 if (rc) {
524 cifs_del_pending_open(&open);
525 goto out;
526 }
527 }
528
529 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
530 if (cfile == NULL) {
531 if (server->ops->close)
532 server->ops->close(xid, tcon, &fid);
533 cifs_del_pending_open(&open);
534 rc = -ENOMEM;
535 goto out;
536 }
537
538 cifs_fscache_set_inode_cookie(inode, file);
539
540 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
541 /*
542 * Time to set mode which we can not set earlier due to
543 * problems creating new read-only files.
544 */
545 struct cifs_unix_set_info_args args = {
546 .mode = inode->i_mode,
547 .uid = INVALID_UID, /* no change */
548 .gid = INVALID_GID, /* no change */
549 .ctime = NO_CHANGE_64,
550 .atime = NO_CHANGE_64,
551 .mtime = NO_CHANGE_64,
552 .device = 0,
553 };
554 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
555 cfile->pid);
556 }
557
558out:
559 kfree(full_path);
560 free_xid(xid);
561 cifs_put_tlink(tlink);
562 return rc;
563}
564
565static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
566
567/*
568 * Try to reacquire byte range locks that were released when session
569 * to server was lost.
570 */
571static int
572cifs_relock_file(struct cifsFileInfo *cfile)
573{
574 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
575 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
576 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
577 int rc = 0;
578
579 down_read(&cinode->lock_sem);
580 if (cinode->can_cache_brlcks) {
581 /* can cache locks - no need to relock */
582 up_read(&cinode->lock_sem);
583 return rc;
584 }
585
586 if (cap_unix(tcon->ses) &&
587 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
588 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
589 rc = cifs_push_posix_locks(cfile);
590 else
591 rc = tcon->ses->server->ops->push_mand_locks(cfile);
592
593 up_read(&cinode->lock_sem);
594 return rc;
595}
596
597static int
598cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
599{
600 int rc = -EACCES;
601 unsigned int xid;
602 __u32 oplock;
603 struct cifs_sb_info *cifs_sb;
604 struct cifs_tcon *tcon;
605 struct TCP_Server_Info *server;
606 struct cifsInodeInfo *cinode;
607 struct inode *inode;
608 char *full_path = NULL;
609 int desired_access;
610 int disposition = FILE_OPEN;
611 int create_options = CREATE_NOT_DIR;
612 struct cifs_open_parms oparms;
613
614 xid = get_xid();
615 mutex_lock(&cfile->fh_mutex);
616 if (!cfile->invalidHandle) {
617 mutex_unlock(&cfile->fh_mutex);
618 rc = 0;
619 free_xid(xid);
620 return rc;
621 }
622
623 inode = cfile->dentry->d_inode;
624 cifs_sb = CIFS_SB(inode->i_sb);
625 tcon = tlink_tcon(cfile->tlink);
626 server = tcon->ses->server;
627
628 /*
629 * Can not grab rename sem here because various ops, including those
630 * that already have the rename sem can end up causing writepage to get
631 * called and if the server was down that means we end up here, and we
632 * can never tell if the caller already has the rename_sem.
633 */
634 full_path = build_path_from_dentry(cfile->dentry);
635 if (full_path == NULL) {
636 rc = -ENOMEM;
637 mutex_unlock(&cfile->fh_mutex);
638 free_xid(xid);
639 return rc;
640 }
641
642 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
643 inode, cfile->f_flags, full_path);
644
645 if (tcon->ses->server->oplocks)
646 oplock = REQ_OPLOCK;
647 else
648 oplock = 0;
649
650 if (tcon->unix_ext && cap_unix(tcon->ses) &&
651 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
652 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
653 /*
654 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
655 * original open. Must mask them off for a reopen.
656 */
657 unsigned int oflags = cfile->f_flags &
658 ~(O_CREAT | O_EXCL | O_TRUNC);
659
660 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
661 cifs_sb->mnt_file_mode /* ignored */,
662 oflags, &oplock, &cfile->fid.netfid, xid);
663 if (rc == 0) {
664 cifs_dbg(FYI, "posix reopen succeeded\n");
665 oparms.reconnect = true;
666 goto reopen_success;
667 }
668 /*
669 * fallthrough to retry open the old way on errors, especially
670 * in the reconnect path it is important to retry hard
671 */
672 }
673
674 desired_access = cifs_convert_flags(cfile->f_flags);
675
676 if (backup_cred(cifs_sb))
677 create_options |= CREATE_OPEN_BACKUP_INTENT;
678
679 if (server->ops->get_lease_key)
680 server->ops->get_lease_key(inode, &cfile->fid);
681
682 oparms.tcon = tcon;
683 oparms.cifs_sb = cifs_sb;
684 oparms.desired_access = desired_access;
685 oparms.create_options = create_options;
686 oparms.disposition = disposition;
687 oparms.path = full_path;
688 oparms.fid = &cfile->fid;
689 oparms.reconnect = true;
690
691 /*
692 * Can not refresh inode by passing in file_info buf to be returned by
693 * ops->open and then calling get_inode_info with returned buf since
694 * file might have write behind data that needs to be flushed and server
695 * version of file size can be stale. If we knew for sure that inode was
696 * not dirty locally we could do this.
697 */
698 rc = server->ops->open(xid, &oparms, &oplock, NULL);
699 if (rc == -ENOENT && oparms.reconnect == false) {
700 /* durable handle timeout is expired - open the file again */
701 rc = server->ops->open(xid, &oparms, &oplock, NULL);
702 /* indicate that we need to relock the file */
703 oparms.reconnect = true;
704 }
705
706 if (rc) {
707 mutex_unlock(&cfile->fh_mutex);
708 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
709 cifs_dbg(FYI, "oplock: %d\n", oplock);
710 goto reopen_error_exit;
711 }
712
713reopen_success:
714 cfile->invalidHandle = false;
715 mutex_unlock(&cfile->fh_mutex);
716 cinode = CIFS_I(inode);
717
718 if (can_flush) {
719 rc = filemap_write_and_wait(inode->i_mapping);
720 mapping_set_error(inode->i_mapping, rc);
721
722 if (tcon->unix_ext)
723 rc = cifs_get_inode_info_unix(&inode, full_path,
724 inode->i_sb, xid);
725 else
726 rc = cifs_get_inode_info(&inode, full_path, NULL,
727 inode->i_sb, xid, NULL);
728 }
729 /*
730 * Else we are writing out data to server already and could deadlock if
731 * we tried to flush data, and since we do not know if we have data that
732 * would invalidate the current end of file on the server we can not go
733 * to the server to get the new inode info.
734 */
735
736 server->ops->set_fid(cfile, &cfile->fid, oplock);
737 if (oparms.reconnect)
738 cifs_relock_file(cfile);
739
740reopen_error_exit:
741 kfree(full_path);
742 free_xid(xid);
743 return rc;
744}
745
746int cifs_close(struct inode *inode, struct file *file)
747{
748 if (file->private_data != NULL) {
749 cifsFileInfo_put(file->private_data);
750 file->private_data = NULL;
751 }
752
753 /* return code from the ->release op is always ignored */
754 return 0;
755}
756
757int cifs_closedir(struct inode *inode, struct file *file)
758{
759 int rc = 0;
760 unsigned int xid;
761 struct cifsFileInfo *cfile = file->private_data;
762 struct cifs_tcon *tcon;
763 struct TCP_Server_Info *server;
764 char *buf;
765
766 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
767
768 if (cfile == NULL)
769 return rc;
770
771 xid = get_xid();
772 tcon = tlink_tcon(cfile->tlink);
773 server = tcon->ses->server;
774
775 cifs_dbg(FYI, "Freeing private data in close dir\n");
776 spin_lock(&cifs_file_list_lock);
777 if (server->ops->dir_needs_close(cfile)) {
778 cfile->invalidHandle = true;
779 spin_unlock(&cifs_file_list_lock);
780 if (server->ops->close_dir)
781 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
782 else
783 rc = -ENOSYS;
784 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
785 /* not much we can do if it fails anyway, ignore rc */
786 rc = 0;
787 } else
788 spin_unlock(&cifs_file_list_lock);
789
790 buf = cfile->srch_inf.ntwrk_buf_start;
791 if (buf) {
792 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
793 cfile->srch_inf.ntwrk_buf_start = NULL;
794 if (cfile->srch_inf.smallBuf)
795 cifs_small_buf_release(buf);
796 else
797 cifs_buf_release(buf);
798 }
799
800 cifs_put_tlink(cfile->tlink);
801 kfree(file->private_data);
802 file->private_data = NULL;
803 /* BB can we lock the filestruct while this is going on? */
804 free_xid(xid);
805 return rc;
806}
807
808static struct cifsLockInfo *
809cifs_lock_init(__u64 offset, __u64 length, __u8 type)
810{
811 struct cifsLockInfo *lock =
812 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
813 if (!lock)
814 return lock;
815 lock->offset = offset;
816 lock->length = length;
817 lock->type = type;
818 lock->pid = current->tgid;
819 INIT_LIST_HEAD(&lock->blist);
820 init_waitqueue_head(&lock->block_q);
821 return lock;
822}
823
824void
825cifs_del_lock_waiters(struct cifsLockInfo *lock)
826{
827 struct cifsLockInfo *li, *tmp;
828 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
829 list_del_init(&li->blist);
830 wake_up(&li->block_q);
831 }
832}
833
834#define CIFS_LOCK_OP 0
835#define CIFS_READ_OP 1
836#define CIFS_WRITE_OP 2
837
838/* @rw_check : 0 - no op, 1 - read, 2 - write */
839static bool
840cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
841 __u64 length, __u8 type, struct cifsFileInfo *cfile,
842 struct cifsLockInfo **conf_lock, int rw_check)
843{
844 struct cifsLockInfo *li;
845 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
846 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
847
848 list_for_each_entry(li, &fdlocks->locks, llist) {
849 if (offset + length <= li->offset ||
850 offset >= li->offset + li->length)
851 continue;
852 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
853 server->ops->compare_fids(cfile, cur_cfile)) {
854 /* shared lock prevents write op through the same fid */
855 if (!(li->type & server->vals->shared_lock_type) ||
856 rw_check != CIFS_WRITE_OP)
857 continue;
858 }
859 if ((type & server->vals->shared_lock_type) &&
860 ((server->ops->compare_fids(cfile, cur_cfile) &&
861 current->tgid == li->pid) || type == li->type))
862 continue;
863 if (conf_lock)
864 *conf_lock = li;
865 return true;
866 }
867 return false;
868}
869
870bool
871cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
872 __u8 type, struct cifsLockInfo **conf_lock,
873 int rw_check)
874{
875 bool rc = false;
876 struct cifs_fid_locks *cur;
877 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
878
879 list_for_each_entry(cur, &cinode->llist, llist) {
880 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
881 cfile, conf_lock, rw_check);
882 if (rc)
883 break;
884 }
885
886 return rc;
887}
888
889/*
890 * Check if there is another lock that prevents us to set the lock (mandatory
891 * style). If such a lock exists, update the flock structure with its
892 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
893 * or leave it the same if we can't. Returns 0 if we don't need to request to
894 * the server or 1 otherwise.
895 */
896static int
897cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
898 __u8 type, struct file_lock *flock)
899{
900 int rc = 0;
901 struct cifsLockInfo *conf_lock;
902 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
903 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
904 bool exist;
905
906 down_read(&cinode->lock_sem);
907
908 exist = cifs_find_lock_conflict(cfile, offset, length, type,
909 &conf_lock, CIFS_LOCK_OP);
910 if (exist) {
911 flock->fl_start = conf_lock->offset;
912 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
913 flock->fl_pid = conf_lock->pid;
914 if (conf_lock->type & server->vals->shared_lock_type)
915 flock->fl_type = F_RDLCK;
916 else
917 flock->fl_type = F_WRLCK;
918 } else if (!cinode->can_cache_brlcks)
919 rc = 1;
920 else
921 flock->fl_type = F_UNLCK;
922
923 up_read(&cinode->lock_sem);
924 return rc;
925}
926
927static void
928cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
929{
930 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
931 down_write(&cinode->lock_sem);
932 list_add_tail(&lock->llist, &cfile->llist->locks);
933 up_write(&cinode->lock_sem);
934}
935
936/*
937 * Set the byte-range lock (mandatory style). Returns:
938 * 1) 0, if we set the lock and don't need to request to the server;
939 * 2) 1, if no locks prevent us but we need to request to the server;
940 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
941 */
942static int
943cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
944 bool wait)
945{
946 struct cifsLockInfo *conf_lock;
947 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
948 bool exist;
949 int rc = 0;
950
951try_again:
952 exist = false;
953 down_write(&cinode->lock_sem);
954
955 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
956 lock->type, &conf_lock, CIFS_LOCK_OP);
957 if (!exist && cinode->can_cache_brlcks) {
958 list_add_tail(&lock->llist, &cfile->llist->locks);
959 up_write(&cinode->lock_sem);
960 return rc;
961 }
962
963 if (!exist)
964 rc = 1;
965 else if (!wait)
966 rc = -EACCES;
967 else {
968 list_add_tail(&lock->blist, &conf_lock->blist);
969 up_write(&cinode->lock_sem);
970 rc = wait_event_interruptible(lock->block_q,
971 (lock->blist.prev == &lock->blist) &&
972 (lock->blist.next == &lock->blist));
973 if (!rc)
974 goto try_again;
975 down_write(&cinode->lock_sem);
976 list_del_init(&lock->blist);
977 }
978
979 up_write(&cinode->lock_sem);
980 return rc;
981}
982
983/*
984 * Check if there is another lock that prevents us to set the lock (posix
985 * style). If such a lock exists, update the flock structure with its
986 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
987 * or leave it the same if we can't. Returns 0 if we don't need to request to
988 * the server or 1 otherwise.
989 */
990static int
991cifs_posix_lock_test(struct file *file, struct file_lock *flock)
992{
993 int rc = 0;
994 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
995 unsigned char saved_type = flock->fl_type;
996
997 if ((flock->fl_flags & FL_POSIX) == 0)
998 return 1;
999
1000 down_read(&cinode->lock_sem);
1001 posix_test_lock(file, flock);
1002
1003 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1004 flock->fl_type = saved_type;
1005 rc = 1;
1006 }
1007
1008 up_read(&cinode->lock_sem);
1009 return rc;
1010}
1011
1012/*
1013 * Set the byte-range lock (posix style). Returns:
1014 * 1) 0, if we set the lock and don't need to request to the server;
1015 * 2) 1, if we need to request to the server;
1016 * 3) <0, if the error occurs while setting the lock.
1017 */
1018static int
1019cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1020{
1021 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1022 int rc = 1;
1023
1024 if ((flock->fl_flags & FL_POSIX) == 0)
1025 return rc;
1026
1027try_again:
1028 down_write(&cinode->lock_sem);
1029 if (!cinode->can_cache_brlcks) {
1030 up_write(&cinode->lock_sem);
1031 return rc;
1032 }
1033
1034 rc = posix_lock_file(file, flock, NULL);
1035 up_write(&cinode->lock_sem);
1036 if (rc == FILE_LOCK_DEFERRED) {
1037 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1038 if (!rc)
1039 goto try_again;
1040 posix_unblock_lock(flock);
1041 }
1042 return rc;
1043}
1044
1045int
1046cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1047{
1048 unsigned int xid;
1049 int rc = 0, stored_rc;
1050 struct cifsLockInfo *li, *tmp;
1051 struct cifs_tcon *tcon;
1052 unsigned int num, max_num, max_buf;
1053 LOCKING_ANDX_RANGE *buf, *cur;
1054 int types[] = {LOCKING_ANDX_LARGE_FILES,
1055 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1056 int i;
1057
1058 xid = get_xid();
1059 tcon = tlink_tcon(cfile->tlink);
1060
1061 /*
1062 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1063 * and check it for zero before using.
1064 */
1065 max_buf = tcon->ses->server->maxBuf;
1066 if (!max_buf) {
1067 free_xid(xid);
1068 return -EINVAL;
1069 }
1070
1071 max_num = (max_buf - sizeof(struct smb_hdr)) /
1072 sizeof(LOCKING_ANDX_RANGE);
1073 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1074 if (!buf) {
1075 free_xid(xid);
1076 return -ENOMEM;
1077 }
1078
1079 for (i = 0; i < 2; i++) {
1080 cur = buf;
1081 num = 0;
1082 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1083 if (li->type != types[i])
1084 continue;
1085 cur->Pid = cpu_to_le16(li->pid);
1086 cur->LengthLow = cpu_to_le32((u32)li->length);
1087 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1088 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1089 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1090 if (++num == max_num) {
1091 stored_rc = cifs_lockv(xid, tcon,
1092 cfile->fid.netfid,
1093 (__u8)li->type, 0, num,
1094 buf);
1095 if (stored_rc)
1096 rc = stored_rc;
1097 cur = buf;
1098 num = 0;
1099 } else
1100 cur++;
1101 }
1102
1103 if (num) {
1104 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1105 (__u8)types[i], 0, num, buf);
1106 if (stored_rc)
1107 rc = stored_rc;
1108 }
1109 }
1110
1111 kfree(buf);
1112 free_xid(xid);
1113 return rc;
1114}
1115
1116/* copied from fs/locks.c with a name change */
1117#define cifs_for_each_lock(inode, lockp) \
1118 for (lockp = &inode->i_flock; *lockp != NULL; \
1119 lockp = &(*lockp)->fl_next)
1120
1121struct lock_to_push {
1122 struct list_head llist;
1123 __u64 offset;
1124 __u64 length;
1125 __u32 pid;
1126 __u16 netfid;
1127 __u8 type;
1128};
1129
1130static int
1131cifs_push_posix_locks(struct cifsFileInfo *cfile)
1132{
1133 struct inode *inode = cfile->dentry->d_inode;
1134 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1135 struct file_lock *flock, **before;
1136 unsigned int count = 0, i = 0;
1137 int rc = 0, xid, type;
1138 struct list_head locks_to_send, *el;
1139 struct lock_to_push *lck, *tmp;
1140 __u64 length;
1141
1142 xid = get_xid();
1143
1144 spin_lock(&inode->i_lock);
1145 cifs_for_each_lock(inode, before) {
1146 if ((*before)->fl_flags & FL_POSIX)
1147 count++;
1148 }
1149 spin_unlock(&inode->i_lock);
1150
1151 INIT_LIST_HEAD(&locks_to_send);
1152
1153 /*
1154 * Allocating count locks is enough because no FL_POSIX locks can be
1155 * added to the list while we are holding cinode->lock_sem that
1156 * protects locking operations of this inode.
1157 */
1158 for (; i < count; i++) {
1159 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1160 if (!lck) {
1161 rc = -ENOMEM;
1162 goto err_out;
1163 }
1164 list_add_tail(&lck->llist, &locks_to_send);
1165 }
1166
1167 el = locks_to_send.next;
1168 spin_lock(&inode->i_lock);
1169 cifs_for_each_lock(inode, before) {
1170 flock = *before;
1171 if ((flock->fl_flags & FL_POSIX) == 0)
1172 continue;
1173 if (el == &locks_to_send) {
1174 /*
1175 * The list ended. We don't have enough allocated
1176 * structures - something is really wrong.
1177 */
1178 cifs_dbg(VFS, "Can't push all brlocks!\n");
1179 break;
1180 }
1181 length = 1 + flock->fl_end - flock->fl_start;
1182 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1183 type = CIFS_RDLCK;
1184 else
1185 type = CIFS_WRLCK;
1186 lck = list_entry(el, struct lock_to_push, llist);
1187 lck->pid = flock->fl_pid;
1188 lck->netfid = cfile->fid.netfid;
1189 lck->length = length;
1190 lck->type = type;
1191 lck->offset = flock->fl_start;
1192 el = el->next;
1193 }
1194 spin_unlock(&inode->i_lock);
1195
1196 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1197 int stored_rc;
1198
1199 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1200 lck->offset, lck->length, NULL,
1201 lck->type, 0);
1202 if (stored_rc)
1203 rc = stored_rc;
1204 list_del(&lck->llist);
1205 kfree(lck);
1206 }
1207
1208out:
1209 free_xid(xid);
1210 return rc;
1211err_out:
1212 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1213 list_del(&lck->llist);
1214 kfree(lck);
1215 }
1216 goto out;
1217}
1218
1219static int
1220cifs_push_locks(struct cifsFileInfo *cfile)
1221{
1222 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1223 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1224 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1225 int rc = 0;
1226
1227 /* we are going to update can_cache_brlcks here - need a write access */
1228 down_write(&cinode->lock_sem);
1229 if (!cinode->can_cache_brlcks) {
1230 up_write(&cinode->lock_sem);
1231 return rc;
1232 }
1233
1234 if (cap_unix(tcon->ses) &&
1235 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1236 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1237 rc = cifs_push_posix_locks(cfile);
1238 else
1239 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1240
1241 cinode->can_cache_brlcks = false;
1242 up_write(&cinode->lock_sem);
1243 return rc;
1244}
1245
1246static void
1247cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1248 bool *wait_flag, struct TCP_Server_Info *server)
1249{
1250 if (flock->fl_flags & FL_POSIX)
1251 cifs_dbg(FYI, "Posix\n");
1252 if (flock->fl_flags & FL_FLOCK)
1253 cifs_dbg(FYI, "Flock\n");
1254 if (flock->fl_flags & FL_SLEEP) {
1255 cifs_dbg(FYI, "Blocking lock\n");
1256 *wait_flag = true;
1257 }
1258 if (flock->fl_flags & FL_ACCESS)
1259 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1260 if (flock->fl_flags & FL_LEASE)
1261 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1262 if (flock->fl_flags &
1263 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1264 FL_ACCESS | FL_LEASE | FL_CLOSE)))
1265 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1266
1267 *type = server->vals->large_lock_type;
1268 if (flock->fl_type == F_WRLCK) {
1269 cifs_dbg(FYI, "F_WRLCK\n");
1270 *type |= server->vals->exclusive_lock_type;
1271 *lock = 1;
1272 } else if (flock->fl_type == F_UNLCK) {
1273 cifs_dbg(FYI, "F_UNLCK\n");
1274 *type |= server->vals->unlock_lock_type;
1275 *unlock = 1;
1276 /* Check if unlock includes more than one lock range */
1277 } else if (flock->fl_type == F_RDLCK) {
1278 cifs_dbg(FYI, "F_RDLCK\n");
1279 *type |= server->vals->shared_lock_type;
1280 *lock = 1;
1281 } else if (flock->fl_type == F_EXLCK) {
1282 cifs_dbg(FYI, "F_EXLCK\n");
1283 *type |= server->vals->exclusive_lock_type;
1284 *lock = 1;
1285 } else if (flock->fl_type == F_SHLCK) {
1286 cifs_dbg(FYI, "F_SHLCK\n");
1287 *type |= server->vals->shared_lock_type;
1288 *lock = 1;
1289 } else
1290 cifs_dbg(FYI, "Unknown type of lock\n");
1291}
1292
1293static int
1294cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1295 bool wait_flag, bool posix_lck, unsigned int xid)
1296{
1297 int rc = 0;
1298 __u64 length = 1 + flock->fl_end - flock->fl_start;
1299 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1300 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1301 struct TCP_Server_Info *server = tcon->ses->server;
1302 __u16 netfid = cfile->fid.netfid;
1303
1304 if (posix_lck) {
1305 int posix_lock_type;
1306
1307 rc = cifs_posix_lock_test(file, flock);
1308 if (!rc)
1309 return rc;
1310
1311 if (type & server->vals->shared_lock_type)
1312 posix_lock_type = CIFS_RDLCK;
1313 else
1314 posix_lock_type = CIFS_WRLCK;
1315 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1316 flock->fl_start, length, flock,
1317 posix_lock_type, wait_flag);
1318 return rc;
1319 }
1320
1321 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1322 if (!rc)
1323 return rc;
1324
1325 /* BB we could chain these into one lock request BB */
1326 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1327 1, 0, false);
1328 if (rc == 0) {
1329 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1330 type, 0, 1, false);
1331 flock->fl_type = F_UNLCK;
1332 if (rc != 0)
1333 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1334 rc);
1335 return 0;
1336 }
1337
1338 if (type & server->vals->shared_lock_type) {
1339 flock->fl_type = F_WRLCK;
1340 return 0;
1341 }
1342
1343 type &= ~server->vals->exclusive_lock_type;
1344
1345 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1346 type | server->vals->shared_lock_type,
1347 1, 0, false);
1348 if (rc == 0) {
1349 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1350 type | server->vals->shared_lock_type, 0, 1, false);
1351 flock->fl_type = F_RDLCK;
1352 if (rc != 0)
1353 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1354 rc);
1355 } else
1356 flock->fl_type = F_WRLCK;
1357
1358 return 0;
1359}
1360
1361void
1362cifs_move_llist(struct list_head *source, struct list_head *dest)
1363{
1364 struct list_head *li, *tmp;
1365 list_for_each_safe(li, tmp, source)
1366 list_move(li, dest);
1367}
1368
1369void
1370cifs_free_llist(struct list_head *llist)
1371{
1372 struct cifsLockInfo *li, *tmp;
1373 list_for_each_entry_safe(li, tmp, llist, llist) {
1374 cifs_del_lock_waiters(li);
1375 list_del(&li->llist);
1376 kfree(li);
1377 }
1378}
1379
1380int
1381cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1382 unsigned int xid)
1383{
1384 int rc = 0, stored_rc;
1385 int types[] = {LOCKING_ANDX_LARGE_FILES,
1386 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1387 unsigned int i;
1388 unsigned int max_num, num, max_buf;
1389 LOCKING_ANDX_RANGE *buf, *cur;
1390 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1391 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1392 struct cifsLockInfo *li, *tmp;
1393 __u64 length = 1 + flock->fl_end - flock->fl_start;
1394 struct list_head tmp_llist;
1395
1396 INIT_LIST_HEAD(&tmp_llist);
1397
1398 /*
1399 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1400 * and check it for zero before using.
1401 */
1402 max_buf = tcon->ses->server->maxBuf;
1403 if (!max_buf)
1404 return -EINVAL;
1405
1406 max_num = (max_buf - sizeof(struct smb_hdr)) /
1407 sizeof(LOCKING_ANDX_RANGE);
1408 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1409 if (!buf)
1410 return -ENOMEM;
1411
1412 down_write(&cinode->lock_sem);
1413 for (i = 0; i < 2; i++) {
1414 cur = buf;
1415 num = 0;
1416 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1417 if (flock->fl_start > li->offset ||
1418 (flock->fl_start + length) <
1419 (li->offset + li->length))
1420 continue;
1421 if (current->tgid != li->pid)
1422 continue;
1423 if (types[i] != li->type)
1424 continue;
1425 if (cinode->can_cache_brlcks) {
1426 /*
1427 * We can cache brlock requests - simply remove
1428 * a lock from the file's list.
1429 */
1430 list_del(&li->llist);
1431 cifs_del_lock_waiters(li);
1432 kfree(li);
1433 continue;
1434 }
1435 cur->Pid = cpu_to_le16(li->pid);
1436 cur->LengthLow = cpu_to_le32((u32)li->length);
1437 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1438 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1439 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1440 /*
1441 * We need to save a lock here to let us add it again to
1442 * the file's list if the unlock range request fails on
1443 * the server.
1444 */
1445 list_move(&li->llist, &tmp_llist);
1446 if (++num == max_num) {
1447 stored_rc = cifs_lockv(xid, tcon,
1448 cfile->fid.netfid,
1449 li->type, num, 0, buf);
1450 if (stored_rc) {
1451 /*
1452 * We failed on the unlock range
1453 * request - add all locks from the tmp
1454 * list to the head of the file's list.
1455 */
1456 cifs_move_llist(&tmp_llist,
1457 &cfile->llist->locks);
1458 rc = stored_rc;
1459 } else
1460 /*
1461 * The unlock range request succeed -
1462 * free the tmp list.
1463 */
1464 cifs_free_llist(&tmp_llist);
1465 cur = buf;
1466 num = 0;
1467 } else
1468 cur++;
1469 }
1470 if (num) {
1471 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1472 types[i], num, 0, buf);
1473 if (stored_rc) {
1474 cifs_move_llist(&tmp_llist,
1475 &cfile->llist->locks);
1476 rc = stored_rc;
1477 } else
1478 cifs_free_llist(&tmp_llist);
1479 }
1480 }
1481
1482 up_write(&cinode->lock_sem);
1483 kfree(buf);
1484 return rc;
1485}
1486
1487static int
1488cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1489 bool wait_flag, bool posix_lck, int lock, int unlock,
1490 unsigned int xid)
1491{
1492 int rc = 0;
1493 __u64 length = 1 + flock->fl_end - flock->fl_start;
1494 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1495 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1496 struct TCP_Server_Info *server = tcon->ses->server;
1497 struct inode *inode = cfile->dentry->d_inode;
1498
1499 if (posix_lck) {
1500 int posix_lock_type;
1501
1502 rc = cifs_posix_lock_set(file, flock);
1503 if (!rc || rc < 0)
1504 return rc;
1505
1506 if (type & server->vals->shared_lock_type)
1507 posix_lock_type = CIFS_RDLCK;
1508 else
1509 posix_lock_type = CIFS_WRLCK;
1510
1511 if (unlock == 1)
1512 posix_lock_type = CIFS_UNLCK;
1513
1514 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1515 current->tgid, flock->fl_start, length,
1516 NULL, posix_lock_type, wait_flag);
1517 goto out;
1518 }
1519
1520 if (lock) {
1521 struct cifsLockInfo *lock;
1522
1523 lock = cifs_lock_init(flock->fl_start, length, type);
1524 if (!lock)
1525 return -ENOMEM;
1526
1527 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1528 if (rc < 0) {
1529 kfree(lock);
1530 return rc;
1531 }
1532 if (!rc)
1533 goto out;
1534
1535 /*
1536 * Windows 7 server can delay breaking lease from read to None
1537 * if we set a byte-range lock on a file - break it explicitly
1538 * before sending the lock to the server to be sure the next
1539 * read won't conflict with non-overlapted locks due to
1540 * pagereading.
1541 */
1542 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1543 CIFS_CACHE_READ(CIFS_I(inode))) {
1544 cifs_zap_mapping(inode);
1545 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1546 inode);
1547 CIFS_I(inode)->oplock = 0;
1548 }
1549
1550 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1551 type, 1, 0, wait_flag);
1552 if (rc) {
1553 kfree(lock);
1554 return rc;
1555 }
1556
1557 cifs_lock_add(cfile, lock);
1558 } else if (unlock)
1559 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1560
1561out:
1562 if (flock->fl_flags & FL_POSIX)
1563 posix_lock_file_wait(file, flock);
1564 return rc;
1565}
1566
1567int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1568{
1569 int rc, xid;
1570 int lock = 0, unlock = 0;
1571 bool wait_flag = false;
1572 bool posix_lck = false;
1573 struct cifs_sb_info *cifs_sb;
1574 struct cifs_tcon *tcon;
1575 struct cifsInodeInfo *cinode;
1576 struct cifsFileInfo *cfile;
1577 __u16 netfid;
1578 __u32 type;
1579
1580 rc = -EACCES;
1581 xid = get_xid();
1582
1583 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1584 cmd, flock->fl_flags, flock->fl_type,
1585 flock->fl_start, flock->fl_end);
1586
1587 cfile = (struct cifsFileInfo *)file->private_data;
1588 tcon = tlink_tcon(cfile->tlink);
1589
1590 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1591 tcon->ses->server);
1592
1593 cifs_sb = CIFS_FILE_SB(file);
1594 netfid = cfile->fid.netfid;
1595 cinode = CIFS_I(file_inode(file));
1596
1597 if (cap_unix(tcon->ses) &&
1598 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1599 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1600 posix_lck = true;
1601 /*
1602 * BB add code here to normalize offset and length to account for
1603 * negative length which we can not accept over the wire.
1604 */
1605 if (IS_GETLK(cmd)) {
1606 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1607 free_xid(xid);
1608 return rc;
1609 }
1610
1611 if (!lock && !unlock) {
1612 /*
1613 * if no lock or unlock then nothing to do since we do not
1614 * know what it is
1615 */
1616 free_xid(xid);
1617 return -EOPNOTSUPP;
1618 }
1619
1620 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1621 xid);
1622 free_xid(xid);
1623 return rc;
1624}
1625
1626/*
1627 * update the file size (if needed) after a write. Should be called with
1628 * the inode->i_lock held
1629 */
1630void
1631cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1632 unsigned int bytes_written)
1633{
1634 loff_t end_of_write = offset + bytes_written;
1635
1636 if (end_of_write > cifsi->server_eof)
1637 cifsi->server_eof = end_of_write;
1638}
1639
1640static ssize_t
1641cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1642 size_t write_size, loff_t *offset)
1643{
1644 int rc = 0;
1645 unsigned int bytes_written = 0;
1646 unsigned int total_written;
1647 struct cifs_sb_info *cifs_sb;
1648 struct cifs_tcon *tcon;
1649 struct TCP_Server_Info *server;
1650 unsigned int xid;
1651 struct dentry *dentry = open_file->dentry;
1652 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1653 struct cifs_io_parms io_parms;
1654
1655 cifs_sb = CIFS_SB(dentry->d_sb);
1656
1657 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1658 write_size, *offset, dentry);
1659
1660 tcon = tlink_tcon(open_file->tlink);
1661 server = tcon->ses->server;
1662
1663 if (!server->ops->sync_write)
1664 return -ENOSYS;
1665
1666 xid = get_xid();
1667
1668 for (total_written = 0; write_size > total_written;
1669 total_written += bytes_written) {
1670 rc = -EAGAIN;
1671 while (rc == -EAGAIN) {
1672 struct kvec iov[2];
1673 unsigned int len;
1674
1675 if (open_file->invalidHandle) {
1676 /* we could deadlock if we called
1677 filemap_fdatawait from here so tell
1678 reopen_file not to flush data to
1679 server now */
1680 rc = cifs_reopen_file(open_file, false);
1681 if (rc != 0)
1682 break;
1683 }
1684
1685 len = min(server->ops->wp_retry_size(dentry->d_inode),
1686 (unsigned int)write_size - total_written);
1687 /* iov[0] is reserved for smb header */
1688 iov[1].iov_base = (char *)write_data + total_written;
1689 iov[1].iov_len = len;
1690 io_parms.pid = pid;
1691 io_parms.tcon = tcon;
1692 io_parms.offset = *offset;
1693 io_parms.length = len;
1694 rc = server->ops->sync_write(xid, &open_file->fid,
1695 &io_parms, &bytes_written, iov, 1);
1696 }
1697 if (rc || (bytes_written == 0)) {
1698 if (total_written)
1699 break;
1700 else {
1701 free_xid(xid);
1702 return rc;
1703 }
1704 } else {
1705 spin_lock(&dentry->d_inode->i_lock);
1706 cifs_update_eof(cifsi, *offset, bytes_written);
1707 spin_unlock(&dentry->d_inode->i_lock);
1708 *offset += bytes_written;
1709 }
1710 }
1711
1712 cifs_stats_bytes_written(tcon, total_written);
1713
1714 if (total_written > 0) {
1715 spin_lock(&dentry->d_inode->i_lock);
1716 if (*offset > dentry->d_inode->i_size)
1717 i_size_write(dentry->d_inode, *offset);
1718 spin_unlock(&dentry->d_inode->i_lock);
1719 }
1720 mark_inode_dirty_sync(dentry->d_inode);
1721 free_xid(xid);
1722 return total_written;
1723}
1724
1725struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1726 bool fsuid_only)
1727{
1728 struct cifsFileInfo *open_file = NULL;
1729 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1730
1731 /* only filter by fsuid on multiuser mounts */
1732 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1733 fsuid_only = false;
1734
1735 spin_lock(&cifs_file_list_lock);
1736 /* we could simply get the first_list_entry since write-only entries
1737 are always at the end of the list but since the first entry might
1738 have a close pending, we go through the whole list */
1739 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1740 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1741 continue;
1742 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1743 if (!open_file->invalidHandle) {
1744 /* found a good file */
1745 /* lock it so it will not be closed on us */
1746 cifsFileInfo_get_locked(open_file);
1747 spin_unlock(&cifs_file_list_lock);
1748 return open_file;
1749 } /* else might as well continue, and look for
1750 another, or simply have the caller reopen it
1751 again rather than trying to fix this handle */
1752 } else /* write only file */
1753 break; /* write only files are last so must be done */
1754 }
1755 spin_unlock(&cifs_file_list_lock);
1756 return NULL;
1757}
1758
1759struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1760 bool fsuid_only)
1761{
1762 struct cifsFileInfo *open_file, *inv_file = NULL;
1763 struct cifs_sb_info *cifs_sb;
1764 bool any_available = false;
1765 int rc;
1766 unsigned int refind = 0;
1767
1768 /* Having a null inode here (because mapping->host was set to zero by
1769 the VFS or MM) should not happen but we had reports of on oops (due to
1770 it being zero) during stress testcases so we need to check for it */
1771
1772 if (cifs_inode == NULL) {
1773 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1774 dump_stack();
1775 return NULL;
1776 }
1777
1778 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1779
1780 /* only filter by fsuid on multiuser mounts */
1781 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1782 fsuid_only = false;
1783
1784 spin_lock(&cifs_file_list_lock);
1785refind_writable:
1786 if (refind > MAX_REOPEN_ATT) {
1787 spin_unlock(&cifs_file_list_lock);
1788 return NULL;
1789 }
1790 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1791 if (!any_available && open_file->pid != current->tgid)
1792 continue;
1793 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1794 continue;
1795 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1796 if (!open_file->invalidHandle) {
1797 /* found a good writable file */
1798 cifsFileInfo_get_locked(open_file);
1799 spin_unlock(&cifs_file_list_lock);
1800 return open_file;
1801 } else {
1802 if (!inv_file)
1803 inv_file = open_file;
1804 }
1805 }
1806 }
1807 /* couldn't find useable FH with same pid, try any available */
1808 if (!any_available) {
1809 any_available = true;
1810 goto refind_writable;
1811 }
1812
1813 if (inv_file) {
1814 any_available = false;
1815 cifsFileInfo_get_locked(inv_file);
1816 }
1817
1818 spin_unlock(&cifs_file_list_lock);
1819
1820 if (inv_file) {
1821 rc = cifs_reopen_file(inv_file, false);
1822 if (!rc)
1823 return inv_file;
1824 else {
1825 spin_lock(&cifs_file_list_lock);
1826 list_move_tail(&inv_file->flist,
1827 &cifs_inode->openFileList);
1828 spin_unlock(&cifs_file_list_lock);
1829 cifsFileInfo_put(inv_file);
1830 spin_lock(&cifs_file_list_lock);
1831 ++refind;
1832 goto refind_writable;
1833 }
1834 }
1835
1836 return NULL;
1837}
1838
1839static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1840{
1841 struct address_space *mapping = page->mapping;
1842 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1843 char *write_data;
1844 int rc = -EFAULT;
1845 int bytes_written = 0;
1846 struct inode *inode;
1847 struct cifsFileInfo *open_file;
1848
1849 if (!mapping || !mapping->host)
1850 return -EFAULT;
1851
1852 inode = page->mapping->host;
1853
1854 offset += (loff_t)from;
1855 write_data = kmap(page);
1856 write_data += from;
1857
1858 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1859 kunmap(page);
1860 return -EIO;
1861 }
1862
1863 /* racing with truncate? */
1864 if (offset > mapping->host->i_size) {
1865 kunmap(page);
1866 return 0; /* don't care */
1867 }
1868
1869 /* check to make sure that we are not extending the file */
1870 if (mapping->host->i_size - offset < (loff_t)to)
1871 to = (unsigned)(mapping->host->i_size - offset);
1872
1873 open_file = find_writable_file(CIFS_I(mapping->host), false);
1874 if (open_file) {
1875 bytes_written = cifs_write(open_file, open_file->pid,
1876 write_data, to - from, &offset);
1877 cifsFileInfo_put(open_file);
1878 /* Does mm or vfs already set times? */
1879 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1880 if ((bytes_written > 0) && (offset))
1881 rc = 0;
1882 else if (bytes_written < 0)
1883 rc = bytes_written;
1884 } else {
1885 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1886 rc = -EIO;
1887 }
1888
1889 kunmap(page);
1890 return rc;
1891}
1892
1893static struct cifs_writedata *
1894wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1895 pgoff_t end, pgoff_t *index,
1896 unsigned int *found_pages)
1897{
1898 unsigned int nr_pages;
1899 struct page **pages;
1900 struct cifs_writedata *wdata;
1901
1902 wdata = cifs_writedata_alloc((unsigned int)tofind,
1903 cifs_writev_complete);
1904 if (!wdata)
1905 return NULL;
1906
1907 /*
1908 * find_get_pages_tag seems to return a max of 256 on each
1909 * iteration, so we must call it several times in order to
1910 * fill the array or the wsize is effectively limited to
1911 * 256 * PAGE_CACHE_SIZE.
1912 */
1913 *found_pages = 0;
1914 pages = wdata->pages;
1915 do {
1916 nr_pages = find_get_pages_tag(mapping, index,
1917 PAGECACHE_TAG_DIRTY, tofind,
1918 pages);
1919 *found_pages += nr_pages;
1920 tofind -= nr_pages;
1921 pages += nr_pages;
1922 } while (nr_pages && tofind && *index <= end);
1923
1924 return wdata;
1925}
1926
1927static unsigned int
1928wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1929 struct address_space *mapping,
1930 struct writeback_control *wbc,
1931 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1932{
1933 unsigned int nr_pages = 0, i;
1934 struct page *page;
1935
1936 for (i = 0; i < found_pages; i++) {
1937 page = wdata->pages[i];
1938 /*
1939 * At this point we hold neither mapping->tree_lock nor
1940 * lock on the page itself: the page may be truncated or
1941 * invalidated (changing page->mapping to NULL), or even
1942 * swizzled back from swapper_space to tmpfs file
1943 * mapping
1944 */
1945
1946 if (nr_pages == 0)
1947 lock_page(page);
1948 else if (!trylock_page(page))
1949 break;
1950
1951 if (unlikely(page->mapping != mapping)) {
1952 unlock_page(page);
1953 break;
1954 }
1955
1956 if (!wbc->range_cyclic && page->index > end) {
1957 *done = true;
1958 unlock_page(page);
1959 break;
1960 }
1961
1962 if (*next && (page->index != *next)) {
1963 /* Not next consecutive page */
1964 unlock_page(page);
1965 break;
1966 }
1967
1968 if (wbc->sync_mode != WB_SYNC_NONE)
1969 wait_on_page_writeback(page);
1970
1971 if (PageWriteback(page) ||
1972 !clear_page_dirty_for_io(page)) {
1973 unlock_page(page);
1974 break;
1975 }
1976
1977 /*
1978 * This actually clears the dirty bit in the radix tree.
1979 * See cifs_writepage() for more commentary.
1980 */
1981 set_page_writeback(page);
1982 if (page_offset(page) >= i_size_read(mapping->host)) {
1983 *done = true;
1984 unlock_page(page);
1985 end_page_writeback(page);
1986 break;
1987 }
1988
1989 wdata->pages[i] = page;
1990 *next = page->index + 1;
1991 ++nr_pages;
1992 }
1993
1994 /* reset index to refind any pages skipped */
1995 if (nr_pages == 0)
1996 *index = wdata->pages[0]->index + 1;
1997
1998 /* put any pages we aren't going to use */
1999 for (i = nr_pages; i < found_pages; i++) {
2000 page_cache_release(wdata->pages[i]);
2001 wdata->pages[i] = NULL;
2002 }
2003
2004 return nr_pages;
2005}
2006
2007static int
2008wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2009 struct address_space *mapping, struct writeback_control *wbc)
2010{
2011 int rc = 0;
2012 struct TCP_Server_Info *server;
2013 unsigned int i;
2014
2015 wdata->sync_mode = wbc->sync_mode;
2016 wdata->nr_pages = nr_pages;
2017 wdata->offset = page_offset(wdata->pages[0]);
2018 wdata->pagesz = PAGE_CACHE_SIZE;
2019 wdata->tailsz = min(i_size_read(mapping->host) -
2020 page_offset(wdata->pages[nr_pages - 1]),
2021 (loff_t)PAGE_CACHE_SIZE);
2022 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
2023
2024 if (wdata->cfile != NULL)
2025 cifsFileInfo_put(wdata->cfile);
2026 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2027 if (!wdata->cfile) {
2028 cifs_dbg(VFS, "No writable handles for inode\n");
2029 rc = -EBADF;
2030 } else {
2031 wdata->pid = wdata->cfile->pid;
2032 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2033 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2034 }
2035
2036 for (i = 0; i < nr_pages; ++i)
2037 unlock_page(wdata->pages[i]);
2038
2039 return rc;
2040}
2041
2042static int cifs_writepages(struct address_space *mapping,
2043 struct writeback_control *wbc)
2044{
2045 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2046 struct TCP_Server_Info *server;
2047 bool done = false, scanned = false, range_whole = false;
2048 pgoff_t end, index;
2049 struct cifs_writedata *wdata;
2050 int rc = 0;
2051
2052 /*
2053 * If wsize is smaller than the page cache size, default to writing
2054 * one page at a time via cifs_writepage
2055 */
2056 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
2057 return generic_writepages(mapping, wbc);
2058
2059 if (wbc->range_cyclic) {
2060 index = mapping->writeback_index; /* Start from prev offset */
2061 end = -1;
2062 } else {
2063 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2064 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2065 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2066 range_whole = true;
2067 scanned = true;
2068 }
2069 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2070retry:
2071 while (!done && index <= end) {
2072 unsigned int i, nr_pages, found_pages, wsize, credits;
2073 pgoff_t next = 0, tofind, saved_index = index;
2074
2075 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2076 &wsize, &credits);
2077 if (rc)
2078 break;
2079
2080 tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
2081
2082 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2083 &found_pages);
2084 if (!wdata) {
2085 rc = -ENOMEM;
2086 add_credits_and_wake_if(server, credits, 0);
2087 break;
2088 }
2089
2090 if (found_pages == 0) {
2091 kref_put(&wdata->refcount, cifs_writedata_release);
2092 add_credits_and_wake_if(server, credits, 0);
2093 break;
2094 }
2095
2096 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2097 end, &index, &next, &done);
2098
2099 /* nothing to write? */
2100 if (nr_pages == 0) {
2101 kref_put(&wdata->refcount, cifs_writedata_release);
2102 add_credits_and_wake_if(server, credits, 0);
2103 continue;
2104 }
2105
2106 wdata->credits = credits;
2107
2108 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2109
2110 /* send failure -- clean up the mess */
2111 if (rc != 0) {
2112 add_credits_and_wake_if(server, wdata->credits, 0);
2113 for (i = 0; i < nr_pages; ++i) {
2114 if (rc == -EAGAIN)
2115 redirty_page_for_writepage(wbc,
2116 wdata->pages[i]);
2117 else
2118 SetPageError(wdata->pages[i]);
2119 end_page_writeback(wdata->pages[i]);
2120 page_cache_release(wdata->pages[i]);
2121 }
2122 if (rc != -EAGAIN)
2123 mapping_set_error(mapping, rc);
2124 }
2125 kref_put(&wdata->refcount, cifs_writedata_release);
2126
2127 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2128 index = saved_index;
2129 continue;
2130 }
2131
2132 wbc->nr_to_write -= nr_pages;
2133 if (wbc->nr_to_write <= 0)
2134 done = true;
2135
2136 index = next;
2137 }
2138
2139 if (!scanned && !done) {
2140 /*
2141 * We hit the last page and there is more work to be done: wrap
2142 * back to the start of the file
2143 */
2144 scanned = true;
2145 index = 0;
2146 goto retry;
2147 }
2148
2149 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2150 mapping->writeback_index = index;
2151
2152 return rc;
2153}
2154
2155static int
2156cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2157{
2158 int rc;
2159 unsigned int xid;
2160
2161 xid = get_xid();
2162/* BB add check for wbc flags */
2163 page_cache_get(page);
2164 if (!PageUptodate(page))
2165 cifs_dbg(FYI, "ppw - page not up to date\n");
2166
2167 /*
2168 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2169 *
2170 * A writepage() implementation always needs to do either this,
2171 * or re-dirty the page with "redirty_page_for_writepage()" in
2172 * the case of a failure.
2173 *
2174 * Just unlocking the page will cause the radix tree tag-bits
2175 * to fail to update with the state of the page correctly.
2176 */
2177 set_page_writeback(page);
2178retry_write:
2179 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2180 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2181 goto retry_write;
2182 else if (rc == -EAGAIN)
2183 redirty_page_for_writepage(wbc, page);
2184 else if (rc != 0)
2185 SetPageError(page);
2186 else
2187 SetPageUptodate(page);
2188 end_page_writeback(page);
2189 page_cache_release(page);
2190 free_xid(xid);
2191 return rc;
2192}
2193
2194static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2195{
2196 int rc = cifs_writepage_locked(page, wbc);
2197 unlock_page(page);
2198 return rc;
2199}
2200
2201static int cifs_write_end(struct file *file, struct address_space *mapping,
2202 loff_t pos, unsigned len, unsigned copied,
2203 struct page *page, void *fsdata)
2204{
2205 int rc;
2206 struct inode *inode = mapping->host;
2207 struct cifsFileInfo *cfile = file->private_data;
2208 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2209 __u32 pid;
2210
2211 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2212 pid = cfile->pid;
2213 else
2214 pid = current->tgid;
2215
2216 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2217 page, pos, copied);
2218
2219 if (PageChecked(page)) {
2220 if (copied == len)
2221 SetPageUptodate(page);
2222 ClearPageChecked(page);
2223 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2224 SetPageUptodate(page);
2225
2226 if (!PageUptodate(page)) {
2227 char *page_data;
2228 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2229 unsigned int xid;
2230
2231 xid = get_xid();
2232 /* this is probably better than directly calling
2233 partialpage_write since in this function the file handle is
2234 known which we might as well leverage */
2235 /* BB check if anything else missing out of ppw
2236 such as updating last write time */
2237 page_data = kmap(page);
2238 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2239 /* if (rc < 0) should we set writebehind rc? */
2240 kunmap(page);
2241
2242 free_xid(xid);
2243 } else {
2244 rc = copied;
2245 pos += copied;
2246 set_page_dirty(page);
2247 }
2248
2249 if (rc > 0) {
2250 spin_lock(&inode->i_lock);
2251 if (pos > inode->i_size)
2252 i_size_write(inode, pos);
2253 spin_unlock(&inode->i_lock);
2254 }
2255
2256 unlock_page(page);
2257 page_cache_release(page);
2258
2259 return rc;
2260}
2261
2262int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2263 int datasync)
2264{
2265 unsigned int xid;
2266 int rc = 0;
2267 struct cifs_tcon *tcon;
2268 struct TCP_Server_Info *server;
2269 struct cifsFileInfo *smbfile = file->private_data;
2270 struct inode *inode = file_inode(file);
2271 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2272
2273 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2274 if (rc)
2275 return rc;
2276 mutex_lock(&inode->i_mutex);
2277
2278 xid = get_xid();
2279
2280 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2281 file, datasync);
2282
2283 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2284 rc = cifs_zap_mapping(inode);
2285 if (rc) {
2286 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2287 rc = 0; /* don't care about it in fsync */
2288 }
2289 }
2290
2291 tcon = tlink_tcon(smbfile->tlink);
2292 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2293 server = tcon->ses->server;
2294 if (server->ops->flush)
2295 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2296 else
2297 rc = -ENOSYS;
2298 }
2299
2300 free_xid(xid);
2301 mutex_unlock(&inode->i_mutex);
2302 return rc;
2303}
2304
2305int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2306{
2307 unsigned int xid;
2308 int rc = 0;
2309 struct cifs_tcon *tcon;
2310 struct TCP_Server_Info *server;
2311 struct cifsFileInfo *smbfile = file->private_data;
2312 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2313 struct inode *inode = file->f_mapping->host;
2314
2315 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2316 if (rc)
2317 return rc;
2318 mutex_lock(&inode->i_mutex);
2319
2320 xid = get_xid();
2321
2322 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2323 file, datasync);
2324
2325 tcon = tlink_tcon(smbfile->tlink);
2326 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2327 server = tcon->ses->server;
2328 if (server->ops->flush)
2329 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2330 else
2331 rc = -ENOSYS;
2332 }
2333
2334 free_xid(xid);
2335 mutex_unlock(&inode->i_mutex);
2336 return rc;
2337}
2338
2339/*
2340 * As file closes, flush all cached write data for this inode checking
2341 * for write behind errors.
2342 */
2343int cifs_flush(struct file *file, fl_owner_t id)
2344{
2345 struct inode *inode = file_inode(file);
2346 int rc = 0;
2347
2348 if (file->f_mode & FMODE_WRITE)
2349 rc = filemap_write_and_wait(inode->i_mapping);
2350
2351 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2352
2353 return rc;
2354}
2355
2356static int
2357cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2358{
2359 int rc = 0;
2360 unsigned long i;
2361
2362 for (i = 0; i < num_pages; i++) {
2363 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2364 if (!pages[i]) {
2365 /*
2366 * save number of pages we have already allocated and
2367 * return with ENOMEM error
2368 */
2369 num_pages = i;
2370 rc = -ENOMEM;
2371 break;
2372 }
2373 }
2374
2375 if (rc) {
2376 for (i = 0; i < num_pages; i++)
2377 put_page(pages[i]);
2378 }
2379 return rc;
2380}
2381
2382static inline
2383size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2384{
2385 size_t num_pages;
2386 size_t clen;
2387
2388 clen = min_t(const size_t, len, wsize);
2389 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2390
2391 if (cur_len)
2392 *cur_len = clen;
2393
2394 return num_pages;
2395}
2396
2397static void
2398cifs_uncached_writedata_release(struct kref *refcount)
2399{
2400 int i;
2401 struct cifs_writedata *wdata = container_of(refcount,
2402 struct cifs_writedata, refcount);
2403
2404 for (i = 0; i < wdata->nr_pages; i++)
2405 put_page(wdata->pages[i]);
2406 cifs_writedata_release(refcount);
2407}
2408
2409static void
2410cifs_uncached_writev_complete(struct work_struct *work)
2411{
2412 struct cifs_writedata *wdata = container_of(work,
2413 struct cifs_writedata, work);
2414 struct inode *inode = wdata->cfile->dentry->d_inode;
2415 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2416
2417 spin_lock(&inode->i_lock);
2418 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2419 if (cifsi->server_eof > inode->i_size)
2420 i_size_write(inode, cifsi->server_eof);
2421 spin_unlock(&inode->i_lock);
2422
2423 complete(&wdata->done);
2424
2425 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2426}
2427
2428static int
2429wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2430 size_t *len, unsigned long *num_pages)
2431{
2432 size_t save_len, copied, bytes, cur_len = *len;
2433 unsigned long i, nr_pages = *num_pages;
2434
2435 save_len = cur_len;
2436 for (i = 0; i < nr_pages; i++) {
2437 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2438 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2439 cur_len -= copied;
2440 /*
2441 * If we didn't copy as much as we expected, then that
2442 * may mean we trod into an unmapped area. Stop copying
2443 * at that point. On the next pass through the big
2444 * loop, we'll likely end up getting a zero-length
2445 * write and bailing out of it.
2446 */
2447 if (copied < bytes)
2448 break;
2449 }
2450 cur_len = save_len - cur_len;
2451 *len = cur_len;
2452
2453 /*
2454 * If we have no data to send, then that probably means that
2455 * the copy above failed altogether. That's most likely because
2456 * the address in the iovec was bogus. Return -EFAULT and let
2457 * the caller free anything we allocated and bail out.
2458 */
2459 if (!cur_len)
2460 return -EFAULT;
2461
2462 /*
2463 * i + 1 now represents the number of pages we actually used in
2464 * the copy phase above.
2465 */
2466 *num_pages = i + 1;
2467 return 0;
2468}
2469
2470static int
2471cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2472 struct cifsFileInfo *open_file,
2473 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2474{
2475 int rc = 0;
2476 size_t cur_len;
2477 unsigned long nr_pages, num_pages, i;
2478 struct cifs_writedata *wdata;
2479 struct iov_iter saved_from;
2480 loff_t saved_offset = offset;
2481 pid_t pid;
2482 struct TCP_Server_Info *server;
2483
2484 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2485 pid = open_file->pid;
2486 else
2487 pid = current->tgid;
2488
2489 server = tlink_tcon(open_file->tlink)->ses->server;
2490 memcpy(&saved_from, from, sizeof(struct iov_iter));
2491
2492 do {
2493 unsigned int wsize, credits;
2494
2495 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2496 &wsize, &credits);
2497 if (rc)
2498 break;
2499
2500 nr_pages = get_numpages(wsize, len, &cur_len);
2501 wdata = cifs_writedata_alloc(nr_pages,
2502 cifs_uncached_writev_complete);
2503 if (!wdata) {
2504 rc = -ENOMEM;
2505 add_credits_and_wake_if(server, credits, 0);
2506 break;
2507 }
2508
2509 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2510 if (rc) {
2511 kfree(wdata);
2512 add_credits_and_wake_if(server, credits, 0);
2513 break;
2514 }
2515
2516 num_pages = nr_pages;
2517 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2518 if (rc) {
2519 for (i = 0; i < nr_pages; i++)
2520 put_page(wdata->pages[i]);
2521 kfree(wdata);
2522 add_credits_and_wake_if(server, credits, 0);
2523 break;
2524 }
2525
2526 /*
2527 * Bring nr_pages down to the number of pages we actually used,
2528 * and free any pages that we didn't use.
2529 */
2530 for ( ; nr_pages > num_pages; nr_pages--)
2531 put_page(wdata->pages[nr_pages - 1]);
2532
2533 wdata->sync_mode = WB_SYNC_ALL;
2534 wdata->nr_pages = nr_pages;
2535 wdata->offset = (__u64)offset;
2536 wdata->cfile = cifsFileInfo_get(open_file);
2537 wdata->pid = pid;
2538 wdata->bytes = cur_len;
2539 wdata->pagesz = PAGE_SIZE;
2540 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2541 wdata->credits = credits;
2542
2543 if (!wdata->cfile->invalidHandle ||
2544 !cifs_reopen_file(wdata->cfile, false))
2545 rc = server->ops->async_writev(wdata,
2546 cifs_uncached_writedata_release);
2547 if (rc) {
2548 add_credits_and_wake_if(server, wdata->credits, 0);
2549 kref_put(&wdata->refcount,
2550 cifs_uncached_writedata_release);
2551 if (rc == -EAGAIN) {
2552 memcpy(from, &saved_from,
2553 sizeof(struct iov_iter));
2554 iov_iter_advance(from, offset - saved_offset);
2555 continue;
2556 }
2557 break;
2558 }
2559
2560 list_add_tail(&wdata->list, wdata_list);
2561 offset += cur_len;
2562 len -= cur_len;
2563 } while (len > 0);
2564
2565 return rc;
2566}
2567
2568static ssize_t
2569cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2570{
2571 size_t len;
2572 ssize_t total_written = 0;
2573 struct cifsFileInfo *open_file;
2574 struct cifs_tcon *tcon;
2575 struct cifs_sb_info *cifs_sb;
2576 struct cifs_writedata *wdata, *tmp;
2577 struct list_head wdata_list;
2578 struct iov_iter saved_from;
2579 int rc;
2580
2581 len = iov_iter_count(from);
2582 rc = generic_write_checks(file, poffset, &len, 0);
2583 if (rc)
2584 return rc;
2585
2586 if (!len)
2587 return 0;
2588
2589 iov_iter_truncate(from, len);
2590
2591 INIT_LIST_HEAD(&wdata_list);
2592 cifs_sb = CIFS_FILE_SB(file);
2593 open_file = file->private_data;
2594 tcon = tlink_tcon(open_file->tlink);
2595
2596 if (!tcon->ses->server->ops->async_writev)
2597 return -ENOSYS;
2598
2599 memcpy(&saved_from, from, sizeof(struct iov_iter));
2600
2601 rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb,
2602 &wdata_list);
2603
2604 /*
2605 * If at least one write was successfully sent, then discard any rc
2606 * value from the later writes. If the other write succeeds, then
2607 * we'll end up returning whatever was written. If it fails, then
2608 * we'll get a new rc value from that.
2609 */
2610 if (!list_empty(&wdata_list))
2611 rc = 0;
2612
2613 /*
2614 * Wait for and collect replies for any successful sends in order of
2615 * increasing offset. Once an error is hit or we get a fatal signal
2616 * while waiting, then return without waiting for any more replies.
2617 */
2618restart_loop:
2619 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2620 if (!rc) {
2621 /* FIXME: freezable too? */
2622 rc = wait_for_completion_killable(&wdata->done);
2623 if (rc)
2624 rc = -EINTR;
2625 else if (wdata->result)
2626 rc = wdata->result;
2627 else
2628 total_written += wdata->bytes;
2629
2630 /* resend call if it's a retryable error */
2631 if (rc == -EAGAIN) {
2632 struct list_head tmp_list;
2633 struct iov_iter tmp_from;
2634
2635 INIT_LIST_HEAD(&tmp_list);
2636 list_del_init(&wdata->list);
2637
2638 memcpy(&tmp_from, &saved_from,
2639 sizeof(struct iov_iter));
2640 iov_iter_advance(&tmp_from,
2641 wdata->offset - *poffset);
2642
2643 rc = cifs_write_from_iter(wdata->offset,
2644 wdata->bytes, &tmp_from,
2645 open_file, cifs_sb, &tmp_list);
2646
2647 list_splice(&tmp_list, &wdata_list);
2648
2649 kref_put(&wdata->refcount,
2650 cifs_uncached_writedata_release);
2651 goto restart_loop;
2652 }
2653 }
2654 list_del_init(&wdata->list);
2655 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2656 }
2657
2658 if (total_written > 0)
2659 *poffset += total_written;
2660
2661 cifs_stats_bytes_written(tcon, total_written);
2662 return total_written ? total_written : (ssize_t)rc;
2663}
2664
2665ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2666{
2667 ssize_t written;
2668 struct inode *inode;
2669 loff_t pos = iocb->ki_pos;
2670
2671 inode = file_inode(iocb->ki_filp);
2672
2673 /*
2674 * BB - optimize the way when signing is disabled. We can drop this
2675 * extra memory-to-memory copying and use iovec buffers for constructing
2676 * write request.
2677 */
2678
2679 written = cifs_iovec_write(iocb->ki_filp, from, &pos);
2680 if (written > 0) {
2681 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags);
2682 iocb->ki_pos = pos;
2683 }
2684
2685 return written;
2686}
2687
2688static ssize_t
2689cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2690{
2691 struct file *file = iocb->ki_filp;
2692 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2693 struct inode *inode = file->f_mapping->host;
2694 struct cifsInodeInfo *cinode = CIFS_I(inode);
2695 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2696 ssize_t rc = -EACCES;
2697 loff_t lock_pos = iocb->ki_pos;
2698
2699 /*
2700 * We need to hold the sem to be sure nobody modifies lock list
2701 * with a brlock that prevents writing.
2702 */
2703 down_read(&cinode->lock_sem);
2704 mutex_lock(&inode->i_mutex);
2705 if (file->f_flags & O_APPEND)
2706 lock_pos = i_size_read(inode);
2707 if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from),
2708 server->vals->exclusive_lock_type, NULL,
2709 CIFS_WRITE_OP)) {
2710 rc = __generic_file_write_iter(iocb, from);
2711 mutex_unlock(&inode->i_mutex);
2712
2713 if (rc > 0) {
2714 ssize_t err;
2715
2716 err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2717 if (err < 0)
2718 rc = err;
2719 }
2720 } else {
2721 mutex_unlock(&inode->i_mutex);
2722 }
2723 up_read(&cinode->lock_sem);
2724 return rc;
2725}
2726
2727ssize_t
2728cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2729{
2730 struct inode *inode = file_inode(iocb->ki_filp);
2731 struct cifsInodeInfo *cinode = CIFS_I(inode);
2732 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2733 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2734 iocb->ki_filp->private_data;
2735 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2736 ssize_t written;
2737
2738 written = cifs_get_writer(cinode);
2739 if (written)
2740 return written;
2741
2742 if (CIFS_CACHE_WRITE(cinode)) {
2743 if (cap_unix(tcon->ses) &&
2744 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2745 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2746 written = generic_file_write_iter(iocb, from);
2747 goto out;
2748 }
2749 written = cifs_writev(iocb, from);
2750 goto out;
2751 }
2752 /*
2753 * For non-oplocked files in strict cache mode we need to write the data
2754 * to the server exactly from the pos to pos+len-1 rather than flush all
2755 * affected pages because it may cause a error with mandatory locks on
2756 * these pages but not on the region from pos to ppos+len-1.
2757 */
2758 written = cifs_user_writev(iocb, from);
2759 if (written > 0 && CIFS_CACHE_READ(cinode)) {
2760 /*
2761 * Windows 7 server can delay breaking level2 oplock if a write
2762 * request comes - break it on the client to prevent reading
2763 * an old data.
2764 */
2765 cifs_zap_mapping(inode);
2766 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2767 inode);
2768 cinode->oplock = 0;
2769 }
2770out:
2771 cifs_put_writer(cinode);
2772 return written;
2773}
2774
2775static struct cifs_readdata *
2776cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2777{
2778 struct cifs_readdata *rdata;
2779
2780 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2781 GFP_KERNEL);
2782 if (rdata != NULL) {
2783 kref_init(&rdata->refcount);
2784 INIT_LIST_HEAD(&rdata->list);
2785 init_completion(&rdata->done);
2786 INIT_WORK(&rdata->work, complete);
2787 }
2788
2789 return rdata;
2790}
2791
2792void
2793cifs_readdata_release(struct kref *refcount)
2794{
2795 struct cifs_readdata *rdata = container_of(refcount,
2796 struct cifs_readdata, refcount);
2797
2798 if (rdata->cfile)
2799 cifsFileInfo_put(rdata->cfile);
2800
2801 kfree(rdata);
2802}
2803
2804static int
2805cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2806{
2807 int rc = 0;
2808 struct page *page;
2809 unsigned int i;
2810
2811 for (i = 0; i < nr_pages; i++) {
2812 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2813 if (!page) {
2814 rc = -ENOMEM;
2815 break;
2816 }
2817 rdata->pages[i] = page;
2818 }
2819
2820 if (rc) {
2821 for (i = 0; i < nr_pages; i++) {
2822 put_page(rdata->pages[i]);
2823 rdata->pages[i] = NULL;
2824 }
2825 }
2826 return rc;
2827}
2828
2829static void
2830cifs_uncached_readdata_release(struct kref *refcount)
2831{
2832 struct cifs_readdata *rdata = container_of(refcount,
2833 struct cifs_readdata, refcount);
2834 unsigned int i;
2835
2836 for (i = 0; i < rdata->nr_pages; i++) {
2837 put_page(rdata->pages[i]);
2838 rdata->pages[i] = NULL;
2839 }
2840 cifs_readdata_release(refcount);
2841}
2842
2843/**
2844 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2845 * @rdata: the readdata response with list of pages holding data
2846 * @iter: destination for our data
2847 *
2848 * This function copies data from a list of pages in a readdata response into
2849 * an array of iovecs. It will first calculate where the data should go
2850 * based on the info in the readdata and then copy the data into that spot.
2851 */
2852static int
2853cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2854{
2855 size_t remaining = rdata->got_bytes;
2856 unsigned int i;
2857
2858 for (i = 0; i < rdata->nr_pages; i++) {
2859 struct page *page = rdata->pages[i];
2860 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2861 size_t written = copy_page_to_iter(page, 0, copy, iter);
2862 remaining -= written;
2863 if (written < copy && iov_iter_count(iter) > 0)
2864 break;
2865 }
2866 return remaining ? -EFAULT : 0;
2867}
2868
2869static void
2870cifs_uncached_readv_complete(struct work_struct *work)
2871{
2872 struct cifs_readdata *rdata = container_of(work,
2873 struct cifs_readdata, work);
2874
2875 complete(&rdata->done);
2876 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2877}
2878
2879static int
2880cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2881 struct cifs_readdata *rdata, unsigned int len)
2882{
2883 int result = 0;
2884 unsigned int i;
2885 unsigned int nr_pages = rdata->nr_pages;
2886 struct kvec iov;
2887
2888 rdata->got_bytes = 0;
2889 rdata->tailsz = PAGE_SIZE;
2890 for (i = 0; i < nr_pages; i++) {
2891 struct page *page = rdata->pages[i];
2892
2893 if (len >= PAGE_SIZE) {
2894 /* enough data to fill the page */
2895 iov.iov_base = kmap(page);
2896 iov.iov_len = PAGE_SIZE;
2897 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2898 i, iov.iov_base, iov.iov_len);
2899 len -= PAGE_SIZE;
2900 } else if (len > 0) {
2901 /* enough for partial page, fill and zero the rest */
2902 iov.iov_base = kmap(page);
2903 iov.iov_len = len;
2904 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2905 i, iov.iov_base, iov.iov_len);
2906 memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2907 rdata->tailsz = len;
2908 len = 0;
2909 } else {
2910 /* no need to hold page hostage */
2911 rdata->pages[i] = NULL;
2912 rdata->nr_pages--;
2913 put_page(page);
2914 continue;
2915 }
2916
2917 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2918 kunmap(page);
2919 if (result < 0)
2920 break;
2921
2922 rdata->got_bytes += result;
2923 }
2924
2925 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2926 rdata->got_bytes : result;
2927}
2928
2929static int
2930cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2931 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2932{
2933 struct cifs_readdata *rdata;
2934 unsigned int npages, rsize, credits;
2935 size_t cur_len;
2936 int rc;
2937 pid_t pid;
2938 struct TCP_Server_Info *server;
2939
2940 server = tlink_tcon(open_file->tlink)->ses->server;
2941
2942 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2943 pid = open_file->pid;
2944 else
2945 pid = current->tgid;
2946
2947 do {
2948 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2949 &rsize, &credits);
2950 if (rc)
2951 break;
2952
2953 cur_len = min_t(const size_t, len, rsize);
2954 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2955
2956 /* allocate a readdata struct */
2957 rdata = cifs_readdata_alloc(npages,
2958 cifs_uncached_readv_complete);
2959 if (!rdata) {
2960 add_credits_and_wake_if(server, credits, 0);
2961 rc = -ENOMEM;
2962 break;
2963 }
2964
2965 rc = cifs_read_allocate_pages(rdata, npages);
2966 if (rc)
2967 goto error;
2968
2969 rdata->cfile = cifsFileInfo_get(open_file);
2970 rdata->nr_pages = npages;
2971 rdata->offset = offset;
2972 rdata->bytes = cur_len;
2973 rdata->pid = pid;
2974 rdata->pagesz = PAGE_SIZE;
2975 rdata->read_into_pages = cifs_uncached_read_into_pages;
2976 rdata->credits = credits;
2977
2978 if (!rdata->cfile->invalidHandle ||
2979 !cifs_reopen_file(rdata->cfile, true))
2980 rc = server->ops->async_readv(rdata);
2981error:
2982 if (rc) {
2983 add_credits_and_wake_if(server, rdata->credits, 0);
2984 kref_put(&rdata->refcount,
2985 cifs_uncached_readdata_release);
2986 if (rc == -EAGAIN)
2987 continue;
2988 break;
2989 }
2990
2991 list_add_tail(&rdata->list, rdata_list);
2992 offset += cur_len;
2993 len -= cur_len;
2994 } while (len > 0);
2995
2996 return rc;
2997}
2998
2999ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3000{
3001 struct file *file = iocb->ki_filp;
3002 ssize_t rc;
3003 size_t len;
3004 ssize_t total_read = 0;
3005 loff_t offset = iocb->ki_pos;
3006 struct cifs_sb_info *cifs_sb;
3007 struct cifs_tcon *tcon;
3008 struct cifsFileInfo *open_file;
3009 struct cifs_readdata *rdata, *tmp;
3010 struct list_head rdata_list;
3011
3012 len = iov_iter_count(to);
3013 if (!len)
3014 return 0;
3015
3016 INIT_LIST_HEAD(&rdata_list);
3017 cifs_sb = CIFS_FILE_SB(file);
3018 open_file = file->private_data;
3019 tcon = tlink_tcon(open_file->tlink);
3020
3021 if (!tcon->ses->server->ops->async_readv)
3022 return -ENOSYS;
3023
3024 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3025 cifs_dbg(FYI, "attempting read on write only file instance\n");
3026
3027 rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3028
3029 /* if at least one read request send succeeded, then reset rc */
3030 if (!list_empty(&rdata_list))
3031 rc = 0;
3032
3033 len = iov_iter_count(to);
3034 /* the loop below should proceed in the order of increasing offsets */
3035again:
3036 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3037 if (!rc) {
3038 /* FIXME: freezable sleep too? */
3039 rc = wait_for_completion_killable(&rdata->done);
3040 if (rc)
3041 rc = -EINTR;
3042 else if (rdata->result == -EAGAIN) {
3043 /* resend call if it's a retryable error */
3044 struct list_head tmp_list;
3045 unsigned int got_bytes = rdata->got_bytes;
3046
3047 list_del_init(&rdata->list);
3048 INIT_LIST_HEAD(&tmp_list);
3049
3050 /*
3051 * Got a part of data and then reconnect has
3052 * happened -- fill the buffer and continue
3053 * reading.
3054 */
3055 if (got_bytes && got_bytes < rdata->bytes) {
3056 rc = cifs_readdata_to_iov(rdata, to);
3057 if (rc) {
3058 kref_put(&rdata->refcount,
3059 cifs_uncached_readdata_release);
3060 continue;
3061 }
3062 }
3063
3064 rc = cifs_send_async_read(
3065 rdata->offset + got_bytes,
3066 rdata->bytes - got_bytes,
3067 rdata->cfile, cifs_sb,
3068 &tmp_list);
3069
3070 list_splice(&tmp_list, &rdata_list);
3071
3072 kref_put(&rdata->refcount,
3073 cifs_uncached_readdata_release);
3074 goto again;
3075 } else if (rdata->result)
3076 rc = rdata->result;
3077 else
3078 rc = cifs_readdata_to_iov(rdata, to);
3079
3080 /* if there was a short read -- discard anything left */
3081 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3082 rc = -ENODATA;
3083 }
3084 list_del_init(&rdata->list);
3085 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3086 }
3087
3088 total_read = len - iov_iter_count(to);
3089
3090 cifs_stats_bytes_read(tcon, total_read);
3091
3092 /* mask nodata case */
3093 if (rc == -ENODATA)
3094 rc = 0;
3095
3096 if (total_read) {
3097 iocb->ki_pos += total_read;
3098 return total_read;
3099 }
3100 return rc;
3101}
3102
3103ssize_t
3104cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3105{
3106 struct inode *inode = file_inode(iocb->ki_filp);
3107 struct cifsInodeInfo *cinode = CIFS_I(inode);
3108 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3109 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3110 iocb->ki_filp->private_data;
3111 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3112 int rc = -EACCES;
3113
3114 /*
3115 * In strict cache mode we need to read from the server all the time
3116 * if we don't have level II oplock because the server can delay mtime
3117 * change - so we can't make a decision about inode invalidating.
3118 * And we can also fail with pagereading if there are mandatory locks
3119 * on pages affected by this read but not on the region from pos to
3120 * pos+len-1.
3121 */
3122 if (!CIFS_CACHE_READ(cinode))
3123 return cifs_user_readv(iocb, to);
3124
3125 if (cap_unix(tcon->ses) &&
3126 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3127 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3128 return generic_file_read_iter(iocb, to);
3129
3130 /*
3131 * We need to hold the sem to be sure nobody modifies lock list
3132 * with a brlock that prevents reading.
3133 */
3134 down_read(&cinode->lock_sem);
3135 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3136 tcon->ses->server->vals->shared_lock_type,
3137 NULL, CIFS_READ_OP))
3138 rc = generic_file_read_iter(iocb, to);
3139 up_read(&cinode->lock_sem);
3140 return rc;
3141}
3142
3143static ssize_t
3144cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3145{
3146 int rc = -EACCES;
3147 unsigned int bytes_read = 0;
3148 unsigned int total_read;
3149 unsigned int current_read_size;
3150 unsigned int rsize;
3151 struct cifs_sb_info *cifs_sb;
3152 struct cifs_tcon *tcon;
3153 struct TCP_Server_Info *server;
3154 unsigned int xid;
3155 char *cur_offset;
3156 struct cifsFileInfo *open_file;
3157 struct cifs_io_parms io_parms;
3158 int buf_type = CIFS_NO_BUFFER;
3159 __u32 pid;
3160
3161 xid = get_xid();
3162 cifs_sb = CIFS_FILE_SB(file);
3163
3164 /* FIXME: set up handlers for larger reads and/or convert to async */
3165 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3166
3167 if (file->private_data == NULL) {
3168 rc = -EBADF;
3169 free_xid(xid);
3170 return rc;
3171 }
3172 open_file = file->private_data;
3173 tcon = tlink_tcon(open_file->tlink);
3174 server = tcon->ses->server;
3175
3176 if (!server->ops->sync_read) {
3177 free_xid(xid);
3178 return -ENOSYS;
3179 }
3180
3181 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3182 pid = open_file->pid;
3183 else
3184 pid = current->tgid;
3185
3186 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3187 cifs_dbg(FYI, "attempting read on write only file instance\n");
3188
3189 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3190 total_read += bytes_read, cur_offset += bytes_read) {
3191 do {
3192 current_read_size = min_t(uint, read_size - total_read,
3193 rsize);
3194 /*
3195 * For windows me and 9x we do not want to request more
3196 * than it negotiated since it will refuse the read
3197 * then.
3198 */
3199 if ((tcon->ses) && !(tcon->ses->capabilities &
3200 tcon->ses->server->vals->cap_large_files)) {
3201 current_read_size = min_t(uint,
3202 current_read_size, CIFSMaxBufSize);
3203 }
3204 if (open_file->invalidHandle) {
3205 rc = cifs_reopen_file(open_file, true);
3206 if (rc != 0)
3207 break;
3208 }
3209 io_parms.pid = pid;
3210 io_parms.tcon = tcon;
3211 io_parms.offset = *offset;
3212 io_parms.length = current_read_size;
3213 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3214 &bytes_read, &cur_offset,
3215 &buf_type);
3216 } while (rc == -EAGAIN);
3217
3218 if (rc || (bytes_read == 0)) {
3219 if (total_read) {
3220 break;
3221 } else {
3222 free_xid(xid);
3223 return rc;
3224 }
3225 } else {
3226 cifs_stats_bytes_read(tcon, total_read);
3227 *offset += bytes_read;
3228 }
3229 }
3230 free_xid(xid);
3231 return total_read;
3232}
3233
3234/*
3235 * If the page is mmap'ed into a process' page tables, then we need to make
3236 * sure that it doesn't change while being written back.
3237 */
3238static int
3239cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3240{
3241 struct page *page = vmf->page;
3242
3243 lock_page(page);
3244 return VM_FAULT_LOCKED;
3245}
3246
3247static struct vm_operations_struct cifs_file_vm_ops = {
3248 .fault = filemap_fault,
3249 .map_pages = filemap_map_pages,
3250 .page_mkwrite = cifs_page_mkwrite,
3251 .remap_pages = generic_file_remap_pages,
3252};
3253
3254int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3255{
3256 int rc, xid;
3257 struct inode *inode = file_inode(file);
3258
3259 xid = get_xid();
3260
3261 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3262 rc = cifs_zap_mapping(inode);
3263 if (rc)
3264 return rc;
3265 }
3266
3267 rc = generic_file_mmap(file, vma);
3268 if (rc == 0)
3269 vma->vm_ops = &cifs_file_vm_ops;
3270 free_xid(xid);
3271 return rc;
3272}
3273
3274int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3275{
3276 int rc, xid;
3277
3278 xid = get_xid();
3279 rc = cifs_revalidate_file(file);
3280 if (rc) {
3281 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3282 rc);
3283 free_xid(xid);
3284 return rc;
3285 }
3286 rc = generic_file_mmap(file, vma);
3287 if (rc == 0)
3288 vma->vm_ops = &cifs_file_vm_ops;
3289 free_xid(xid);
3290 return rc;
3291}
3292
3293static void
3294cifs_readv_complete(struct work_struct *work)
3295{
3296 unsigned int i, got_bytes;
3297 struct cifs_readdata *rdata = container_of(work,
3298 struct cifs_readdata, work);
3299
3300 got_bytes = rdata->got_bytes;
3301 for (i = 0; i < rdata->nr_pages; i++) {
3302 struct page *page = rdata->pages[i];
3303
3304 lru_cache_add_file(page);
3305
3306 if (rdata->result == 0 ||
3307 (rdata->result == -EAGAIN && got_bytes)) {
3308 flush_dcache_page(page);
3309 SetPageUptodate(page);
3310 }
3311
3312 unlock_page(page);
3313
3314 if (rdata->result == 0 ||
3315 (rdata->result == -EAGAIN && got_bytes))
3316 cifs_readpage_to_fscache(rdata->mapping->host, page);
3317
3318 got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
3319
3320 page_cache_release(page);
3321 rdata->pages[i] = NULL;
3322 }
3323 kref_put(&rdata->refcount, cifs_readdata_release);
3324}
3325
3326static int
3327cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3328 struct cifs_readdata *rdata, unsigned int len)
3329{
3330 int result = 0;
3331 unsigned int i;
3332 u64 eof;
3333 pgoff_t eof_index;
3334 unsigned int nr_pages = rdata->nr_pages;
3335 struct kvec iov;
3336
3337 /* determine the eof that the server (probably) has */
3338 eof = CIFS_I(rdata->mapping->host)->server_eof;
3339 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3340 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3341
3342 rdata->got_bytes = 0;
3343 rdata->tailsz = PAGE_CACHE_SIZE;
3344 for (i = 0; i < nr_pages; i++) {
3345 struct page *page = rdata->pages[i];
3346
3347 if (len >= PAGE_CACHE_SIZE) {
3348 /* enough data to fill the page */
3349 iov.iov_base = kmap(page);
3350 iov.iov_len = PAGE_CACHE_SIZE;
3351 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3352 i, page->index, iov.iov_base, iov.iov_len);
3353 len -= PAGE_CACHE_SIZE;
3354 } else if (len > 0) {
3355 /* enough for partial page, fill and zero the rest */
3356 iov.iov_base = kmap(page);
3357 iov.iov_len = len;
3358 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3359 i, page->index, iov.iov_base, iov.iov_len);
3360 memset(iov.iov_base + len,
3361 '\0', PAGE_CACHE_SIZE - len);
3362 rdata->tailsz = len;
3363 len = 0;
3364 } else if (page->index > eof_index) {
3365 /*
3366 * The VFS will not try to do readahead past the
3367 * i_size, but it's possible that we have outstanding
3368 * writes with gaps in the middle and the i_size hasn't
3369 * caught up yet. Populate those with zeroed out pages
3370 * to prevent the VFS from repeatedly attempting to
3371 * fill them until the writes are flushed.
3372 */
3373 zero_user(page, 0, PAGE_CACHE_SIZE);
3374 lru_cache_add_file(page);
3375 flush_dcache_page(page);
3376 SetPageUptodate(page);
3377 unlock_page(page);
3378 page_cache_release(page);
3379 rdata->pages[i] = NULL;
3380 rdata->nr_pages--;
3381 continue;
3382 } else {
3383 /* no need to hold page hostage */
3384 lru_cache_add_file(page);
3385 unlock_page(page);
3386 page_cache_release(page);
3387 rdata->pages[i] = NULL;
3388 rdata->nr_pages--;
3389 continue;
3390 }
3391
3392 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3393 kunmap(page);
3394 if (result < 0)
3395 break;
3396
3397 rdata->got_bytes += result;
3398 }
3399
3400 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3401 rdata->got_bytes : result;
3402}
3403
3404static int
3405readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3406 unsigned int rsize, struct list_head *tmplist,
3407 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3408{
3409 struct page *page, *tpage;
3410 unsigned int expected_index;
3411 int rc;
3412
3413 INIT_LIST_HEAD(tmplist);
3414
3415 page = list_entry(page_list->prev, struct page, lru);
3416
3417 /*
3418 * Lock the page and put it in the cache. Since no one else
3419 * should have access to this page, we're safe to simply set
3420 * PG_locked without checking it first.
3421 */
3422 __set_page_locked(page);
3423 rc = add_to_page_cache_locked(page, mapping,
3424 page->index, GFP_KERNEL);
3425
3426 /* give up if we can't stick it in the cache */
3427 if (rc) {
3428 __clear_page_locked(page);
3429 return rc;
3430 }
3431
3432 /* move first page to the tmplist */
3433 *offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3434 *bytes = PAGE_CACHE_SIZE;
3435 *nr_pages = 1;
3436 list_move_tail(&page->lru, tmplist);
3437
3438 /* now try and add more pages onto the request */
3439 expected_index = page->index + 1;
3440 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3441 /* discontinuity ? */
3442 if (page->index != expected_index)
3443 break;
3444
3445 /* would this page push the read over the rsize? */
3446 if (*bytes + PAGE_CACHE_SIZE > rsize)
3447 break;
3448
3449 __set_page_locked(page);
3450 if (add_to_page_cache_locked(page, mapping, page->index,
3451 GFP_KERNEL)) {
3452 __clear_page_locked(page);
3453 break;
3454 }
3455 list_move_tail(&page->lru, tmplist);
3456 (*bytes) += PAGE_CACHE_SIZE;
3457 expected_index++;
3458 (*nr_pages)++;
3459 }
3460 return rc;
3461}
3462
3463static int cifs_readpages(struct file *file, struct address_space *mapping,
3464 struct list_head *page_list, unsigned num_pages)
3465{
3466 int rc;
3467 struct list_head tmplist;
3468 struct cifsFileInfo *open_file = file->private_data;
3469 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3470 struct TCP_Server_Info *server;
3471 pid_t pid;
3472
3473 /*
3474 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3475 * immediately if the cookie is negative
3476 *
3477 * After this point, every page in the list might have PG_fscache set,
3478 * so we will need to clean that up off of every page we don't use.
3479 */
3480 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3481 &num_pages);
3482 if (rc == 0)
3483 return rc;
3484
3485 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3486 pid = open_file->pid;
3487 else
3488 pid = current->tgid;
3489
3490 rc = 0;
3491 server = tlink_tcon(open_file->tlink)->ses->server;
3492
3493 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3494 __func__, file, mapping, num_pages);
3495
3496 /*
3497 * Start with the page at end of list and move it to private
3498 * list. Do the same with any following pages until we hit
3499 * the rsize limit, hit an index discontinuity, or run out of
3500 * pages. Issue the async read and then start the loop again
3501 * until the list is empty.
3502 *
3503 * Note that list order is important. The page_list is in
3504 * the order of declining indexes. When we put the pages in
3505 * the rdata->pages, then we want them in increasing order.
3506 */
3507 while (!list_empty(page_list)) {
3508 unsigned int i, nr_pages, bytes, rsize;
3509 loff_t offset;
3510 struct page *page, *tpage;
3511 struct cifs_readdata *rdata;
3512 unsigned credits;
3513
3514 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3515 &rsize, &credits);
3516 if (rc)
3517 break;
3518
3519 /*
3520 * Give up immediately if rsize is too small to read an entire
3521 * page. The VFS will fall back to readpage. We should never
3522 * reach this point however since we set ra_pages to 0 when the
3523 * rsize is smaller than a cache page.
3524 */
3525 if (unlikely(rsize < PAGE_CACHE_SIZE)) {
3526 add_credits_and_wake_if(server, credits, 0);
3527 return 0;
3528 }
3529
3530 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3531 &nr_pages, &offset, &bytes);
3532 if (rc) {
3533 add_credits_and_wake_if(server, credits, 0);
3534 break;
3535 }
3536
3537 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3538 if (!rdata) {
3539 /* best to give up if we're out of mem */
3540 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3541 list_del(&page->lru);
3542 lru_cache_add_file(page);
3543 unlock_page(page);
3544 page_cache_release(page);
3545 }
3546 rc = -ENOMEM;
3547 add_credits_and_wake_if(server, credits, 0);
3548 break;
3549 }
3550
3551 rdata->cfile = cifsFileInfo_get(open_file);
3552 rdata->mapping = mapping;
3553 rdata->offset = offset;
3554 rdata->bytes = bytes;
3555 rdata->pid = pid;
3556 rdata->pagesz = PAGE_CACHE_SIZE;
3557 rdata->read_into_pages = cifs_readpages_read_into_pages;
3558 rdata->credits = credits;
3559
3560 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3561 list_del(&page->lru);
3562 rdata->pages[rdata->nr_pages++] = page;
3563 }
3564
3565 if (!rdata->cfile->invalidHandle ||
3566 !cifs_reopen_file(rdata->cfile, true))
3567 rc = server->ops->async_readv(rdata);
3568 if (rc) {
3569 add_credits_and_wake_if(server, rdata->credits, 0);
3570 for (i = 0; i < rdata->nr_pages; i++) {
3571 page = rdata->pages[i];
3572 lru_cache_add_file(page);
3573 unlock_page(page);
3574 page_cache_release(page);
3575 }
3576 /* Fallback to the readpage in error/reconnect cases */
3577 kref_put(&rdata->refcount, cifs_readdata_release);
3578 break;
3579 }
3580
3581 kref_put(&rdata->refcount, cifs_readdata_release);
3582 }
3583
3584 /* Any pages that have been shown to fscache but didn't get added to
3585 * the pagecache must be uncached before they get returned to the
3586 * allocator.
3587 */
3588 cifs_fscache_readpages_cancel(mapping->host, page_list);
3589 return rc;
3590}
3591
3592/*
3593 * cifs_readpage_worker must be called with the page pinned
3594 */
3595static int cifs_readpage_worker(struct file *file, struct page *page,
3596 loff_t *poffset)
3597{
3598 char *read_data;
3599 int rc;
3600
3601 /* Is the page cached? */
3602 rc = cifs_readpage_from_fscache(file_inode(file), page);
3603 if (rc == 0)
3604 goto read_complete;
3605
3606 read_data = kmap(page);
3607 /* for reads over a certain size could initiate async read ahead */
3608
3609 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3610
3611 if (rc < 0)
3612 goto io_error;
3613 else
3614 cifs_dbg(FYI, "Bytes read %d\n", rc);
3615
3616 file_inode(file)->i_atime =
3617 current_fs_time(file_inode(file)->i_sb);
3618
3619 if (PAGE_CACHE_SIZE > rc)
3620 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3621
3622 flush_dcache_page(page);
3623 SetPageUptodate(page);
3624
3625 /* send this page to the cache */
3626 cifs_readpage_to_fscache(file_inode(file), page);
3627
3628 rc = 0;
3629
3630io_error:
3631 kunmap(page);
3632 unlock_page(page);
3633
3634read_complete:
3635 return rc;
3636}
3637
3638static int cifs_readpage(struct file *file, struct page *page)
3639{
3640 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3641 int rc = -EACCES;
3642 unsigned int xid;
3643
3644 xid = get_xid();
3645
3646 if (file->private_data == NULL) {
3647 rc = -EBADF;
3648 free_xid(xid);
3649 return rc;
3650 }
3651
3652 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3653 page, (int)offset, (int)offset);
3654
3655 rc = cifs_readpage_worker(file, page, &offset);
3656
3657 free_xid(xid);
3658 return rc;
3659}
3660
3661static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3662{
3663 struct cifsFileInfo *open_file;
3664
3665 spin_lock(&cifs_file_list_lock);
3666 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3667 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3668 spin_unlock(&cifs_file_list_lock);
3669 return 1;
3670 }
3671 }
3672 spin_unlock(&cifs_file_list_lock);
3673 return 0;
3674}
3675
3676/* We do not want to update the file size from server for inodes
3677 open for write - to avoid races with writepage extending
3678 the file - in the future we could consider allowing
3679 refreshing the inode only on increases in the file size
3680 but this is tricky to do without racing with writebehind
3681 page caching in the current Linux kernel design */
3682bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3683{
3684 if (!cifsInode)
3685 return true;
3686
3687 if (is_inode_writable(cifsInode)) {
3688 /* This inode is open for write at least once */
3689 struct cifs_sb_info *cifs_sb;
3690
3691 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3692 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3693 /* since no page cache to corrupt on directio
3694 we can change size safely */
3695 return true;
3696 }
3697
3698 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3699 return true;
3700
3701 return false;
3702 } else
3703 return true;
3704}
3705
3706static int cifs_write_begin(struct file *file, struct address_space *mapping,
3707 loff_t pos, unsigned len, unsigned flags,
3708 struct page **pagep, void **fsdata)
3709{
3710 int oncethru = 0;
3711 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3712 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3713 loff_t page_start = pos & PAGE_MASK;
3714 loff_t i_size;
3715 struct page *page;
3716 int rc = 0;
3717
3718 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3719
3720start:
3721 page = grab_cache_page_write_begin(mapping, index, flags);
3722 if (!page) {
3723 rc = -ENOMEM;
3724 goto out;
3725 }
3726
3727 if (PageUptodate(page))
3728 goto out;
3729
3730 /*
3731 * If we write a full page it will be up to date, no need to read from
3732 * the server. If the write is short, we'll end up doing a sync write
3733 * instead.
3734 */
3735 if (len == PAGE_CACHE_SIZE)
3736 goto out;
3737
3738 /*
3739 * optimize away the read when we have an oplock, and we're not
3740 * expecting to use any of the data we'd be reading in. That
3741 * is, when the page lies beyond the EOF, or straddles the EOF
3742 * and the write will cover all of the existing data.
3743 */
3744 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3745 i_size = i_size_read(mapping->host);
3746 if (page_start >= i_size ||
3747 (offset == 0 && (pos + len) >= i_size)) {
3748 zero_user_segments(page, 0, offset,
3749 offset + len,
3750 PAGE_CACHE_SIZE);
3751 /*
3752 * PageChecked means that the parts of the page
3753 * to which we're not writing are considered up
3754 * to date. Once the data is copied to the
3755 * page, it can be set uptodate.
3756 */
3757 SetPageChecked(page);
3758 goto out;
3759 }
3760 }
3761
3762 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3763 /*
3764 * might as well read a page, it is fast enough. If we get
3765 * an error, we don't need to return it. cifs_write_end will
3766 * do a sync write instead since PG_uptodate isn't set.
3767 */
3768 cifs_readpage_worker(file, page, &page_start);
3769 page_cache_release(page);
3770 oncethru = 1;
3771 goto start;
3772 } else {
3773 /* we could try using another file handle if there is one -
3774 but how would we lock it to prevent close of that handle
3775 racing with this read? In any case
3776 this will be written out by write_end so is fine */
3777 }
3778out:
3779 *pagep = page;
3780 return rc;
3781}
3782
3783static int cifs_release_page(struct page *page, gfp_t gfp)
3784{
3785 if (PagePrivate(page))
3786 return 0;
3787
3788 return cifs_fscache_release_page(page, gfp);
3789}
3790
3791static void cifs_invalidate_page(struct page *page, unsigned int offset,
3792 unsigned int length)
3793{
3794 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3795
3796 if (offset == 0 && length == PAGE_CACHE_SIZE)
3797 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3798}
3799
3800static int cifs_launder_page(struct page *page)
3801{
3802 int rc = 0;
3803 loff_t range_start = page_offset(page);
3804 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3805 struct writeback_control wbc = {
3806 .sync_mode = WB_SYNC_ALL,
3807 .nr_to_write = 0,
3808 .range_start = range_start,
3809 .range_end = range_end,
3810 };
3811
3812 cifs_dbg(FYI, "Launder page: %p\n", page);
3813
3814 if (clear_page_dirty_for_io(page))
3815 rc = cifs_writepage_locked(page, &wbc);
3816
3817 cifs_fscache_invalidate_page(page, page->mapping->host);
3818 return rc;
3819}
3820
3821void cifs_oplock_break(struct work_struct *work)
3822{
3823 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3824 oplock_break);
3825 struct inode *inode = cfile->dentry->d_inode;
3826 struct cifsInodeInfo *cinode = CIFS_I(inode);
3827 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3828 struct TCP_Server_Info *server = tcon->ses->server;
3829 int rc = 0;
3830
3831 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3832 TASK_UNINTERRUPTIBLE);
3833
3834 server->ops->downgrade_oplock(server, cinode,
3835 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3836
3837 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3838 cifs_has_mand_locks(cinode)) {
3839 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3840 inode);
3841 cinode->oplock = 0;
3842 }
3843
3844 if (inode && S_ISREG(inode->i_mode)) {
3845 if (CIFS_CACHE_READ(cinode))
3846 break_lease(inode, O_RDONLY);
3847 else
3848 break_lease(inode, O_WRONLY);
3849 rc = filemap_fdatawrite(inode->i_mapping);
3850 if (!CIFS_CACHE_READ(cinode)) {
3851 rc = filemap_fdatawait(inode->i_mapping);
3852 mapping_set_error(inode->i_mapping, rc);
3853 cifs_zap_mapping(inode);
3854 }
3855 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3856 }
3857
3858 rc = cifs_push_locks(cfile);
3859 if (rc)
3860 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3861
3862 /*
3863 * releasing stale oplock after recent reconnect of smb session using
3864 * a now incorrect file handle is not a data integrity issue but do
3865 * not bother sending an oplock release if session to server still is
3866 * disconnected since oplock already released by the server
3867 */
3868 if (!cfile->oplock_break_cancelled) {
3869 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3870 cinode);
3871 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3872 }
3873 cifs_done_oplock_break(cinode);
3874}
3875
3876/*
3877 * The presence of cifs_direct_io() in the address space ops vector
3878 * allowes open() O_DIRECT flags which would have failed otherwise.
3879 *
3880 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3881 * so this method should never be called.
3882 *
3883 * Direct IO is not yet supported in the cached mode.
3884 */
3885static ssize_t
3886cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter,
3887 loff_t pos)
3888{
3889 /*
3890 * FIXME
3891 * Eventually need to support direct IO for non forcedirectio mounts
3892 */
3893 return -EINVAL;
3894}
3895
3896
3897const struct address_space_operations cifs_addr_ops = {
3898 .readpage = cifs_readpage,
3899 .readpages = cifs_readpages,
3900 .writepage = cifs_writepage,
3901 .writepages = cifs_writepages,
3902 .write_begin = cifs_write_begin,
3903 .write_end = cifs_write_end,
3904 .set_page_dirty = __set_page_dirty_nobuffers,
3905 .releasepage = cifs_release_page,
3906 .direct_IO = cifs_direct_io,
3907 .invalidatepage = cifs_invalidate_page,
3908 .launder_page = cifs_launder_page,
3909};
3910
3911/*
3912 * cifs_readpages requires the server to support a buffer large enough to
3913 * contain the header plus one complete page of data. Otherwise, we need
3914 * to leave cifs_readpages out of the address space operations.
3915 */
3916const struct address_space_operations cifs_addr_ops_smallbuf = {
3917 .readpage = cifs_readpage,
3918 .writepage = cifs_writepage,
3919 .writepages = cifs_writepages,
3920 .write_begin = cifs_write_begin,
3921 .write_end = cifs_write_end,
3922 .set_page_dirty = __set_page_dirty_nobuffers,
3923 .releasepage = cifs_release_page,
3924 .invalidatepage = cifs_invalidate_page,
3925 .launder_page = cifs_launder_page,
3926};