Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: LGPL-2.1
2/*
3 *
4 * vfs operations that deal with files
5 *
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
9 *
10 */
11#include <linux/fs.h>
12#include <linux/backing-dev.h>
13#include <linux/stat.h>
14#include <linux/fcntl.h>
15#include <linux/pagemap.h>
16#include <linux/pagevec.h>
17#include <linux/writeback.h>
18#include <linux/task_io_accounting_ops.h>
19#include <linux/delay.h>
20#include <linux/mount.h>
21#include <linux/slab.h>
22#include <linux/swap.h>
23#include <linux/mm.h>
24#include <asm/div64.h>
25#include "cifsfs.h"
26#include "cifspdu.h"
27#include "cifsglob.h"
28#include "cifsproto.h"
29#include "smb2proto.h"
30#include "cifs_unicode.h"
31#include "cifs_debug.h"
32#include "cifs_fs_sb.h"
33#include "fscache.h"
34#include "smbdirect.h"
35#include "fs_context.h"
36#include "cifs_ioctl.h"
37#include "cached_dir.h"
38
39/*
40 * Mark as invalid, all open files on tree connections since they
41 * were closed when session to server was lost.
42 */
43void
44cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
45{
46 struct cifsFileInfo *open_file = NULL;
47 struct list_head *tmp;
48 struct list_head *tmp1;
49
50 /* only send once per connect */
51 spin_lock(&tcon->ses->ses_lock);
52 if ((tcon->ses->ses_status != SES_GOOD) || (tcon->status != TID_NEED_RECON)) {
53 spin_unlock(&tcon->ses->ses_lock);
54 return;
55 }
56 tcon->status = TID_IN_FILES_INVALIDATE;
57 spin_unlock(&tcon->ses->ses_lock);
58
59 /* list all files open on tree connection and mark them invalid */
60 spin_lock(&tcon->open_file_lock);
61 list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
62 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
63 open_file->invalidHandle = true;
64 open_file->oplock_break_cancelled = true;
65 }
66 spin_unlock(&tcon->open_file_lock);
67
68 invalidate_all_cached_dirs(tcon);
69 spin_lock(&tcon->tc_lock);
70 if (tcon->status == TID_IN_FILES_INVALIDATE)
71 tcon->status = TID_NEED_TCON;
72 spin_unlock(&tcon->tc_lock);
73
74 /*
75 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
76 * to this tcon.
77 */
78}
79
80static inline int cifs_convert_flags(unsigned int flags)
81{
82 if ((flags & O_ACCMODE) == O_RDONLY)
83 return GENERIC_READ;
84 else if ((flags & O_ACCMODE) == O_WRONLY)
85 return GENERIC_WRITE;
86 else if ((flags & O_ACCMODE) == O_RDWR) {
87 /* GENERIC_ALL is too much permission to request
88 can cause unnecessary access denied on create */
89 /* return GENERIC_ALL; */
90 return (GENERIC_READ | GENERIC_WRITE);
91 }
92
93 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
94 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
95 FILE_READ_DATA);
96}
97
98#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
99static u32 cifs_posix_convert_flags(unsigned int flags)
100{
101 u32 posix_flags = 0;
102
103 if ((flags & O_ACCMODE) == O_RDONLY)
104 posix_flags = SMB_O_RDONLY;
105 else if ((flags & O_ACCMODE) == O_WRONLY)
106 posix_flags = SMB_O_WRONLY;
107 else if ((flags & O_ACCMODE) == O_RDWR)
108 posix_flags = SMB_O_RDWR;
109
110 if (flags & O_CREAT) {
111 posix_flags |= SMB_O_CREAT;
112 if (flags & O_EXCL)
113 posix_flags |= SMB_O_EXCL;
114 } else if (flags & O_EXCL)
115 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
116 current->comm, current->tgid);
117
118 if (flags & O_TRUNC)
119 posix_flags |= SMB_O_TRUNC;
120 /* be safe and imply O_SYNC for O_DSYNC */
121 if (flags & O_DSYNC)
122 posix_flags |= SMB_O_SYNC;
123 if (flags & O_DIRECTORY)
124 posix_flags |= SMB_O_DIRECTORY;
125 if (flags & O_NOFOLLOW)
126 posix_flags |= SMB_O_NOFOLLOW;
127 if (flags & O_DIRECT)
128 posix_flags |= SMB_O_DIRECT;
129
130 return posix_flags;
131}
132#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
133
134static inline int cifs_get_disposition(unsigned int flags)
135{
136 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
137 return FILE_CREATE;
138 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
139 return FILE_OVERWRITE_IF;
140 else if ((flags & O_CREAT) == O_CREAT)
141 return FILE_OPEN_IF;
142 else if ((flags & O_TRUNC) == O_TRUNC)
143 return FILE_OVERWRITE;
144 else
145 return FILE_OPEN;
146}
147
148#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
149int cifs_posix_open(const char *full_path, struct inode **pinode,
150 struct super_block *sb, int mode, unsigned int f_flags,
151 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
152{
153 int rc;
154 FILE_UNIX_BASIC_INFO *presp_data;
155 __u32 posix_flags = 0;
156 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
157 struct cifs_fattr fattr;
158 struct tcon_link *tlink;
159 struct cifs_tcon *tcon;
160
161 cifs_dbg(FYI, "posix open %s\n", full_path);
162
163 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
164 if (presp_data == NULL)
165 return -ENOMEM;
166
167 tlink = cifs_sb_tlink(cifs_sb);
168 if (IS_ERR(tlink)) {
169 rc = PTR_ERR(tlink);
170 goto posix_open_ret;
171 }
172
173 tcon = tlink_tcon(tlink);
174 mode &= ~current_umask();
175
176 posix_flags = cifs_posix_convert_flags(f_flags);
177 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
178 poplock, full_path, cifs_sb->local_nls,
179 cifs_remap(cifs_sb));
180 cifs_put_tlink(tlink);
181
182 if (rc)
183 goto posix_open_ret;
184
185 if (presp_data->Type == cpu_to_le32(-1))
186 goto posix_open_ret; /* open ok, caller does qpathinfo */
187
188 if (!pinode)
189 goto posix_open_ret; /* caller does not need info */
190
191 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
192
193 /* get new inode and set it up */
194 if (*pinode == NULL) {
195 cifs_fill_uniqueid(sb, &fattr);
196 *pinode = cifs_iget(sb, &fattr);
197 if (!*pinode) {
198 rc = -ENOMEM;
199 goto posix_open_ret;
200 }
201 } else {
202 cifs_revalidate_mapping(*pinode);
203 rc = cifs_fattr_to_inode(*pinode, &fattr);
204 }
205
206posix_open_ret:
207 kfree(presp_data);
208 return rc;
209}
210#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
211
212static int
213cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
214 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
215 struct cifs_fid *fid, unsigned int xid)
216{
217 int rc;
218 int desired_access;
219 int disposition;
220 int create_options = CREATE_NOT_DIR;
221 FILE_ALL_INFO *buf;
222 struct TCP_Server_Info *server = tcon->ses->server;
223 struct cifs_open_parms oparms;
224
225 if (!server->ops->open)
226 return -ENOSYS;
227
228 desired_access = cifs_convert_flags(f_flags);
229
230/*********************************************************************
231 * open flag mapping table:
232 *
233 * POSIX Flag CIFS Disposition
234 * ---------- ----------------
235 * O_CREAT FILE_OPEN_IF
236 * O_CREAT | O_EXCL FILE_CREATE
237 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
238 * O_TRUNC FILE_OVERWRITE
239 * none of the above FILE_OPEN
240 *
241 * Note that there is not a direct match between disposition
242 * FILE_SUPERSEDE (ie create whether or not file exists although
243 * O_CREAT | O_TRUNC is similar but truncates the existing
244 * file rather than creating a new file as FILE_SUPERSEDE does
245 * (which uses the attributes / metadata passed in on open call)
246 *?
247 *? O_SYNC is a reasonable match to CIFS writethrough flag
248 *? and the read write flags match reasonably. O_LARGEFILE
249 *? is irrelevant because largefile support is always used
250 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
251 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
252 *********************************************************************/
253
254 disposition = cifs_get_disposition(f_flags);
255
256 /* BB pass O_SYNC flag through on file attributes .. BB */
257
258 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
259 if (!buf)
260 return -ENOMEM;
261
262 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
263 if (f_flags & O_SYNC)
264 create_options |= CREATE_WRITE_THROUGH;
265
266 if (f_flags & O_DIRECT)
267 create_options |= CREATE_NO_BUFFER;
268
269 oparms.tcon = tcon;
270 oparms.cifs_sb = cifs_sb;
271 oparms.desired_access = desired_access;
272 oparms.create_options = cifs_create_options(cifs_sb, create_options);
273 oparms.disposition = disposition;
274 oparms.path = full_path;
275 oparms.fid = fid;
276 oparms.reconnect = false;
277
278 rc = server->ops->open(xid, &oparms, oplock, buf);
279
280 if (rc)
281 goto out;
282
283 /* TODO: Add support for calling posix query info but with passing in fid */
284 if (tcon->unix_ext)
285 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
286 xid);
287 else
288 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
289 xid, fid);
290
291 if (rc) {
292 server->ops->close(xid, tcon, fid);
293 if (rc == -ESTALE)
294 rc = -EOPENSTALE;
295 }
296
297out:
298 kfree(buf);
299 return rc;
300}
301
302static bool
303cifs_has_mand_locks(struct cifsInodeInfo *cinode)
304{
305 struct cifs_fid_locks *cur;
306 bool has_locks = false;
307
308 down_read(&cinode->lock_sem);
309 list_for_each_entry(cur, &cinode->llist, llist) {
310 if (!list_empty(&cur->locks)) {
311 has_locks = true;
312 break;
313 }
314 }
315 up_read(&cinode->lock_sem);
316 return has_locks;
317}
318
319void
320cifs_down_write(struct rw_semaphore *sem)
321{
322 while (!down_write_trylock(sem))
323 msleep(10);
324}
325
326static void cifsFileInfo_put_work(struct work_struct *work);
327
328struct cifsFileInfo *
329cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
330 struct tcon_link *tlink, __u32 oplock)
331{
332 struct dentry *dentry = file_dentry(file);
333 struct inode *inode = d_inode(dentry);
334 struct cifsInodeInfo *cinode = CIFS_I(inode);
335 struct cifsFileInfo *cfile;
336 struct cifs_fid_locks *fdlocks;
337 struct cifs_tcon *tcon = tlink_tcon(tlink);
338 struct TCP_Server_Info *server = tcon->ses->server;
339
340 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
341 if (cfile == NULL)
342 return cfile;
343
344 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
345 if (!fdlocks) {
346 kfree(cfile);
347 return NULL;
348 }
349
350 INIT_LIST_HEAD(&fdlocks->locks);
351 fdlocks->cfile = cfile;
352 cfile->llist = fdlocks;
353
354 cfile->count = 1;
355 cfile->pid = current->tgid;
356 cfile->uid = current_fsuid();
357 cfile->dentry = dget(dentry);
358 cfile->f_flags = file->f_flags;
359 cfile->invalidHandle = false;
360 cfile->deferred_close_scheduled = false;
361 cfile->tlink = cifs_get_tlink(tlink);
362 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
363 INIT_WORK(&cfile->put, cifsFileInfo_put_work);
364 INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
365 mutex_init(&cfile->fh_mutex);
366 spin_lock_init(&cfile->file_info_lock);
367
368 cifs_sb_active(inode->i_sb);
369
370 /*
371 * If the server returned a read oplock and we have mandatory brlocks,
372 * set oplock level to None.
373 */
374 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
375 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
376 oplock = 0;
377 }
378
379 cifs_down_write(&cinode->lock_sem);
380 list_add(&fdlocks->llist, &cinode->llist);
381 up_write(&cinode->lock_sem);
382
383 spin_lock(&tcon->open_file_lock);
384 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
385 oplock = fid->pending_open->oplock;
386 list_del(&fid->pending_open->olist);
387
388 fid->purge_cache = false;
389 server->ops->set_fid(cfile, fid, oplock);
390
391 list_add(&cfile->tlist, &tcon->openFileList);
392 atomic_inc(&tcon->num_local_opens);
393
394 /* if readable file instance put first in list*/
395 spin_lock(&cinode->open_file_lock);
396 if (file->f_mode & FMODE_READ)
397 list_add(&cfile->flist, &cinode->openFileList);
398 else
399 list_add_tail(&cfile->flist, &cinode->openFileList);
400 spin_unlock(&cinode->open_file_lock);
401 spin_unlock(&tcon->open_file_lock);
402
403 if (fid->purge_cache)
404 cifs_zap_mapping(inode);
405
406 file->private_data = cfile;
407 return cfile;
408}
409
410struct cifsFileInfo *
411cifsFileInfo_get(struct cifsFileInfo *cifs_file)
412{
413 spin_lock(&cifs_file->file_info_lock);
414 cifsFileInfo_get_locked(cifs_file);
415 spin_unlock(&cifs_file->file_info_lock);
416 return cifs_file;
417}
418
419static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
420{
421 struct inode *inode = d_inode(cifs_file->dentry);
422 struct cifsInodeInfo *cifsi = CIFS_I(inode);
423 struct cifsLockInfo *li, *tmp;
424 struct super_block *sb = inode->i_sb;
425
426 /*
427 * Delete any outstanding lock records. We'll lose them when the file
428 * is closed anyway.
429 */
430 cifs_down_write(&cifsi->lock_sem);
431 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
432 list_del(&li->llist);
433 cifs_del_lock_waiters(li);
434 kfree(li);
435 }
436 list_del(&cifs_file->llist->llist);
437 kfree(cifs_file->llist);
438 up_write(&cifsi->lock_sem);
439
440 cifs_put_tlink(cifs_file->tlink);
441 dput(cifs_file->dentry);
442 cifs_sb_deactive(sb);
443 kfree(cifs_file);
444}
445
446static void cifsFileInfo_put_work(struct work_struct *work)
447{
448 struct cifsFileInfo *cifs_file = container_of(work,
449 struct cifsFileInfo, put);
450
451 cifsFileInfo_put_final(cifs_file);
452}
453
454/**
455 * cifsFileInfo_put - release a reference of file priv data
456 *
457 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
458 *
459 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
460 */
461void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
462{
463 _cifsFileInfo_put(cifs_file, true, true);
464}
465
466/**
467 * _cifsFileInfo_put - release a reference of file priv data
468 *
469 * This may involve closing the filehandle @cifs_file out on the
470 * server. Must be called without holding tcon->open_file_lock,
471 * cinode->open_file_lock and cifs_file->file_info_lock.
472 *
473 * If @wait_for_oplock_handler is true and we are releasing the last
474 * reference, wait for any running oplock break handler of the file
475 * and cancel any pending one.
476 *
477 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
478 * @wait_oplock_handler: must be false if called from oplock_break_handler
479 * @offload: not offloaded on close and oplock breaks
480 *
481 */
482void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
483 bool wait_oplock_handler, bool offload)
484{
485 struct inode *inode = d_inode(cifs_file->dentry);
486 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
487 struct TCP_Server_Info *server = tcon->ses->server;
488 struct cifsInodeInfo *cifsi = CIFS_I(inode);
489 struct super_block *sb = inode->i_sb;
490 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
491 struct cifs_fid fid;
492 struct cifs_pending_open open;
493 bool oplock_break_cancelled;
494
495 spin_lock(&tcon->open_file_lock);
496 spin_lock(&cifsi->open_file_lock);
497 spin_lock(&cifs_file->file_info_lock);
498 if (--cifs_file->count > 0) {
499 spin_unlock(&cifs_file->file_info_lock);
500 spin_unlock(&cifsi->open_file_lock);
501 spin_unlock(&tcon->open_file_lock);
502 return;
503 }
504 spin_unlock(&cifs_file->file_info_lock);
505
506 if (server->ops->get_lease_key)
507 server->ops->get_lease_key(inode, &fid);
508
509 /* store open in pending opens to make sure we don't miss lease break */
510 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
511
512 /* remove it from the lists */
513 list_del(&cifs_file->flist);
514 list_del(&cifs_file->tlist);
515 atomic_dec(&tcon->num_local_opens);
516
517 if (list_empty(&cifsi->openFileList)) {
518 cifs_dbg(FYI, "closing last open instance for inode %p\n",
519 d_inode(cifs_file->dentry));
520 /*
521 * In strict cache mode we need invalidate mapping on the last
522 * close because it may cause a error when we open this file
523 * again and get at least level II oplock.
524 */
525 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
526 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
527 cifs_set_oplock_level(cifsi, 0);
528 }
529
530 spin_unlock(&cifsi->open_file_lock);
531 spin_unlock(&tcon->open_file_lock);
532
533 oplock_break_cancelled = wait_oplock_handler ?
534 cancel_work_sync(&cifs_file->oplock_break) : false;
535
536 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
537 struct TCP_Server_Info *server = tcon->ses->server;
538 unsigned int xid;
539
540 xid = get_xid();
541 if (server->ops->close_getattr)
542 server->ops->close_getattr(xid, tcon, cifs_file);
543 else if (server->ops->close)
544 server->ops->close(xid, tcon, &cifs_file->fid);
545 _free_xid(xid);
546 }
547
548 if (oplock_break_cancelled)
549 cifs_done_oplock_break(cifsi);
550
551 cifs_del_pending_open(&open);
552
553 if (offload)
554 queue_work(fileinfo_put_wq, &cifs_file->put);
555 else
556 cifsFileInfo_put_final(cifs_file);
557}
558
559int cifs_open(struct inode *inode, struct file *file)
560
561{
562 int rc = -EACCES;
563 unsigned int xid;
564 __u32 oplock;
565 struct cifs_sb_info *cifs_sb;
566 struct TCP_Server_Info *server;
567 struct cifs_tcon *tcon;
568 struct tcon_link *tlink;
569 struct cifsFileInfo *cfile = NULL;
570 void *page;
571 const char *full_path;
572 bool posix_open_ok = false;
573 struct cifs_fid fid;
574 struct cifs_pending_open open;
575
576 xid = get_xid();
577
578 cifs_sb = CIFS_SB(inode->i_sb);
579 if (unlikely(cifs_forced_shutdown(cifs_sb))) {
580 free_xid(xid);
581 return -EIO;
582 }
583
584 tlink = cifs_sb_tlink(cifs_sb);
585 if (IS_ERR(tlink)) {
586 free_xid(xid);
587 return PTR_ERR(tlink);
588 }
589 tcon = tlink_tcon(tlink);
590 server = tcon->ses->server;
591
592 page = alloc_dentry_path();
593 full_path = build_path_from_dentry(file_dentry(file), page);
594 if (IS_ERR(full_path)) {
595 rc = PTR_ERR(full_path);
596 goto out;
597 }
598
599 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
600 inode, file->f_flags, full_path);
601
602 if (file->f_flags & O_DIRECT &&
603 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
604 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
605 file->f_op = &cifs_file_direct_nobrl_ops;
606 else
607 file->f_op = &cifs_file_direct_ops;
608 }
609
610 /* Get the cached handle as SMB2 close is deferred */
611 rc = cifs_get_readable_path(tcon, full_path, &cfile);
612 if (rc == 0) {
613 if (file->f_flags == cfile->f_flags) {
614 file->private_data = cfile;
615 spin_lock(&CIFS_I(inode)->deferred_lock);
616 cifs_del_deferred_close(cfile);
617 spin_unlock(&CIFS_I(inode)->deferred_lock);
618 goto use_cache;
619 } else {
620 _cifsFileInfo_put(cfile, true, false);
621 }
622 }
623
624 if (server->oplocks)
625 oplock = REQ_OPLOCK;
626 else
627 oplock = 0;
628
629#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
630 if (!tcon->broken_posix_open && tcon->unix_ext &&
631 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
632 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
633 /* can not refresh inode info since size could be stale */
634 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
635 cifs_sb->ctx->file_mode /* ignored */,
636 file->f_flags, &oplock, &fid.netfid, xid);
637 if (rc == 0) {
638 cifs_dbg(FYI, "posix open succeeded\n");
639 posix_open_ok = true;
640 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
641 if (tcon->ses->serverNOS)
642 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
643 tcon->ses->ip_addr,
644 tcon->ses->serverNOS);
645 tcon->broken_posix_open = true;
646 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
647 (rc != -EOPNOTSUPP)) /* path not found or net err */
648 goto out;
649 /*
650 * Else fallthrough to retry open the old way on network i/o
651 * or DFS errors.
652 */
653 }
654#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
655
656 if (server->ops->get_lease_key)
657 server->ops->get_lease_key(inode, &fid);
658
659 cifs_add_pending_open(&fid, tlink, &open);
660
661 if (!posix_open_ok) {
662 if (server->ops->get_lease_key)
663 server->ops->get_lease_key(inode, &fid);
664
665 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
666 file->f_flags, &oplock, &fid, xid);
667 if (rc) {
668 cifs_del_pending_open(&open);
669 goto out;
670 }
671 }
672
673 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
674 if (cfile == NULL) {
675 if (server->ops->close)
676 server->ops->close(xid, tcon, &fid);
677 cifs_del_pending_open(&open);
678 rc = -ENOMEM;
679 goto out;
680 }
681
682#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
683 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
684 /*
685 * Time to set mode which we can not set earlier due to
686 * problems creating new read-only files.
687 */
688 struct cifs_unix_set_info_args args = {
689 .mode = inode->i_mode,
690 .uid = INVALID_UID, /* no change */
691 .gid = INVALID_GID, /* no change */
692 .ctime = NO_CHANGE_64,
693 .atime = NO_CHANGE_64,
694 .mtime = NO_CHANGE_64,
695 .device = 0,
696 };
697 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
698 cfile->pid);
699 }
700#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
701
702use_cache:
703 fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
704 file->f_mode & FMODE_WRITE);
705 if (file->f_flags & O_DIRECT &&
706 (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
707 file->f_flags & O_APPEND))
708 cifs_invalidate_cache(file_inode(file),
709 FSCACHE_INVAL_DIO_WRITE);
710
711out:
712 free_dentry_path(page);
713 free_xid(xid);
714 cifs_put_tlink(tlink);
715 return rc;
716}
717
718#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
719static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
720#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
721
722/*
723 * Try to reacquire byte range locks that were released when session
724 * to server was lost.
725 */
726static int
727cifs_relock_file(struct cifsFileInfo *cfile)
728{
729 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
730 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
731 int rc = 0;
732#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
733 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
734#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
735
736 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
737 if (cinode->can_cache_brlcks) {
738 /* can cache locks - no need to relock */
739 up_read(&cinode->lock_sem);
740 return rc;
741 }
742
743#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
744 if (cap_unix(tcon->ses) &&
745 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
746 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
747 rc = cifs_push_posix_locks(cfile);
748 else
749#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
750 rc = tcon->ses->server->ops->push_mand_locks(cfile);
751
752 up_read(&cinode->lock_sem);
753 return rc;
754}
755
756static int
757cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
758{
759 int rc = -EACCES;
760 unsigned int xid;
761 __u32 oplock;
762 struct cifs_sb_info *cifs_sb;
763 struct cifs_tcon *tcon;
764 struct TCP_Server_Info *server;
765 struct cifsInodeInfo *cinode;
766 struct inode *inode;
767 void *page;
768 const char *full_path;
769 int desired_access;
770 int disposition = FILE_OPEN;
771 int create_options = CREATE_NOT_DIR;
772 struct cifs_open_parms oparms;
773
774 xid = get_xid();
775 mutex_lock(&cfile->fh_mutex);
776 if (!cfile->invalidHandle) {
777 mutex_unlock(&cfile->fh_mutex);
778 free_xid(xid);
779 return 0;
780 }
781
782 inode = d_inode(cfile->dentry);
783 cifs_sb = CIFS_SB(inode->i_sb);
784 tcon = tlink_tcon(cfile->tlink);
785 server = tcon->ses->server;
786
787 /*
788 * Can not grab rename sem here because various ops, including those
789 * that already have the rename sem can end up causing writepage to get
790 * called and if the server was down that means we end up here, and we
791 * can never tell if the caller already has the rename_sem.
792 */
793 page = alloc_dentry_path();
794 full_path = build_path_from_dentry(cfile->dentry, page);
795 if (IS_ERR(full_path)) {
796 mutex_unlock(&cfile->fh_mutex);
797 free_dentry_path(page);
798 free_xid(xid);
799 return PTR_ERR(full_path);
800 }
801
802 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
803 inode, cfile->f_flags, full_path);
804
805 if (tcon->ses->server->oplocks)
806 oplock = REQ_OPLOCK;
807 else
808 oplock = 0;
809
810#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
811 if (tcon->unix_ext && cap_unix(tcon->ses) &&
812 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
813 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
814 /*
815 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
816 * original open. Must mask them off for a reopen.
817 */
818 unsigned int oflags = cfile->f_flags &
819 ~(O_CREAT | O_EXCL | O_TRUNC);
820
821 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
822 cifs_sb->ctx->file_mode /* ignored */,
823 oflags, &oplock, &cfile->fid.netfid, xid);
824 if (rc == 0) {
825 cifs_dbg(FYI, "posix reopen succeeded\n");
826 oparms.reconnect = true;
827 goto reopen_success;
828 }
829 /*
830 * fallthrough to retry open the old way on errors, especially
831 * in the reconnect path it is important to retry hard
832 */
833 }
834#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
835
836 desired_access = cifs_convert_flags(cfile->f_flags);
837
838 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
839 if (cfile->f_flags & O_SYNC)
840 create_options |= CREATE_WRITE_THROUGH;
841
842 if (cfile->f_flags & O_DIRECT)
843 create_options |= CREATE_NO_BUFFER;
844
845 if (server->ops->get_lease_key)
846 server->ops->get_lease_key(inode, &cfile->fid);
847
848 oparms.tcon = tcon;
849 oparms.cifs_sb = cifs_sb;
850 oparms.desired_access = desired_access;
851 oparms.create_options = cifs_create_options(cifs_sb, create_options);
852 oparms.disposition = disposition;
853 oparms.path = full_path;
854 oparms.fid = &cfile->fid;
855 oparms.reconnect = true;
856
857 /*
858 * Can not refresh inode by passing in file_info buf to be returned by
859 * ops->open and then calling get_inode_info with returned buf since
860 * file might have write behind data that needs to be flushed and server
861 * version of file size can be stale. If we knew for sure that inode was
862 * not dirty locally we could do this.
863 */
864 rc = server->ops->open(xid, &oparms, &oplock, NULL);
865 if (rc == -ENOENT && oparms.reconnect == false) {
866 /* durable handle timeout is expired - open the file again */
867 rc = server->ops->open(xid, &oparms, &oplock, NULL);
868 /* indicate that we need to relock the file */
869 oparms.reconnect = true;
870 }
871
872 if (rc) {
873 mutex_unlock(&cfile->fh_mutex);
874 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
875 cifs_dbg(FYI, "oplock: %d\n", oplock);
876 goto reopen_error_exit;
877 }
878
879#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
880reopen_success:
881#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
882 cfile->invalidHandle = false;
883 mutex_unlock(&cfile->fh_mutex);
884 cinode = CIFS_I(inode);
885
886 if (can_flush) {
887 rc = filemap_write_and_wait(inode->i_mapping);
888 if (!is_interrupt_error(rc))
889 mapping_set_error(inode->i_mapping, rc);
890
891 if (tcon->posix_extensions)
892 rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
893 else if (tcon->unix_ext)
894 rc = cifs_get_inode_info_unix(&inode, full_path,
895 inode->i_sb, xid);
896 else
897 rc = cifs_get_inode_info(&inode, full_path, NULL,
898 inode->i_sb, xid, NULL);
899 }
900 /*
901 * Else we are writing out data to server already and could deadlock if
902 * we tried to flush data, and since we do not know if we have data that
903 * would invalidate the current end of file on the server we can not go
904 * to the server to get the new inode info.
905 */
906
907 /*
908 * If the server returned a read oplock and we have mandatory brlocks,
909 * set oplock level to None.
910 */
911 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
912 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
913 oplock = 0;
914 }
915
916 server->ops->set_fid(cfile, &cfile->fid, oplock);
917 if (oparms.reconnect)
918 cifs_relock_file(cfile);
919
920reopen_error_exit:
921 free_dentry_path(page);
922 free_xid(xid);
923 return rc;
924}
925
926void smb2_deferred_work_close(struct work_struct *work)
927{
928 struct cifsFileInfo *cfile = container_of(work,
929 struct cifsFileInfo, deferred.work);
930
931 spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
932 cifs_del_deferred_close(cfile);
933 cfile->deferred_close_scheduled = false;
934 spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
935 _cifsFileInfo_put(cfile, true, false);
936}
937
938int cifs_close(struct inode *inode, struct file *file)
939{
940 struct cifsFileInfo *cfile;
941 struct cifsInodeInfo *cinode = CIFS_I(inode);
942 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
943 struct cifs_deferred_close *dclose;
944
945 cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
946
947 if (file->private_data != NULL) {
948 cfile = file->private_data;
949 file->private_data = NULL;
950 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
951 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
952 cinode->lease_granted &&
953 !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
954 dclose) {
955 if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
956 inode->i_ctime = inode->i_mtime = current_time(inode);
957 }
958 spin_lock(&cinode->deferred_lock);
959 cifs_add_deferred_close(cfile, dclose);
960 if (cfile->deferred_close_scheduled &&
961 delayed_work_pending(&cfile->deferred)) {
962 /*
963 * If there is no pending work, mod_delayed_work queues new work.
964 * So, Increase the ref count to avoid use-after-free.
965 */
966 if (!mod_delayed_work(deferredclose_wq,
967 &cfile->deferred, cifs_sb->ctx->closetimeo))
968 cifsFileInfo_get(cfile);
969 } else {
970 /* Deferred close for files */
971 queue_delayed_work(deferredclose_wq,
972 &cfile->deferred, cifs_sb->ctx->closetimeo);
973 cfile->deferred_close_scheduled = true;
974 spin_unlock(&cinode->deferred_lock);
975 return 0;
976 }
977 spin_unlock(&cinode->deferred_lock);
978 _cifsFileInfo_put(cfile, true, false);
979 } else {
980 _cifsFileInfo_put(cfile, true, false);
981 kfree(dclose);
982 }
983 }
984
985 /* return code from the ->release op is always ignored */
986 return 0;
987}
988
989void
990cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
991{
992 struct cifsFileInfo *open_file, *tmp;
993 struct list_head tmp_list;
994
995 if (!tcon->use_persistent || !tcon->need_reopen_files)
996 return;
997
998 tcon->need_reopen_files = false;
999
1000 cifs_dbg(FYI, "Reopen persistent handles\n");
1001 INIT_LIST_HEAD(&tmp_list);
1002
1003 /* list all files open on tree connection, reopen resilient handles */
1004 spin_lock(&tcon->open_file_lock);
1005 list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1006 if (!open_file->invalidHandle)
1007 continue;
1008 cifsFileInfo_get(open_file);
1009 list_add_tail(&open_file->rlist, &tmp_list);
1010 }
1011 spin_unlock(&tcon->open_file_lock);
1012
1013 list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1014 if (cifs_reopen_file(open_file, false /* do not flush */))
1015 tcon->need_reopen_files = true;
1016 list_del_init(&open_file->rlist);
1017 cifsFileInfo_put(open_file);
1018 }
1019}
1020
1021int cifs_closedir(struct inode *inode, struct file *file)
1022{
1023 int rc = 0;
1024 unsigned int xid;
1025 struct cifsFileInfo *cfile = file->private_data;
1026 struct cifs_tcon *tcon;
1027 struct TCP_Server_Info *server;
1028 char *buf;
1029
1030 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1031
1032 if (cfile == NULL)
1033 return rc;
1034
1035 xid = get_xid();
1036 tcon = tlink_tcon(cfile->tlink);
1037 server = tcon->ses->server;
1038
1039 cifs_dbg(FYI, "Freeing private data in close dir\n");
1040 spin_lock(&cfile->file_info_lock);
1041 if (server->ops->dir_needs_close(cfile)) {
1042 cfile->invalidHandle = true;
1043 spin_unlock(&cfile->file_info_lock);
1044 if (server->ops->close_dir)
1045 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1046 else
1047 rc = -ENOSYS;
1048 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1049 /* not much we can do if it fails anyway, ignore rc */
1050 rc = 0;
1051 } else
1052 spin_unlock(&cfile->file_info_lock);
1053
1054 buf = cfile->srch_inf.ntwrk_buf_start;
1055 if (buf) {
1056 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1057 cfile->srch_inf.ntwrk_buf_start = NULL;
1058 if (cfile->srch_inf.smallBuf)
1059 cifs_small_buf_release(buf);
1060 else
1061 cifs_buf_release(buf);
1062 }
1063
1064 cifs_put_tlink(cfile->tlink);
1065 kfree(file->private_data);
1066 file->private_data = NULL;
1067 /* BB can we lock the filestruct while this is going on? */
1068 free_xid(xid);
1069 return rc;
1070}
1071
1072static struct cifsLockInfo *
1073cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1074{
1075 struct cifsLockInfo *lock =
1076 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1077 if (!lock)
1078 return lock;
1079 lock->offset = offset;
1080 lock->length = length;
1081 lock->type = type;
1082 lock->pid = current->tgid;
1083 lock->flags = flags;
1084 INIT_LIST_HEAD(&lock->blist);
1085 init_waitqueue_head(&lock->block_q);
1086 return lock;
1087}
1088
1089void
1090cifs_del_lock_waiters(struct cifsLockInfo *lock)
1091{
1092 struct cifsLockInfo *li, *tmp;
1093 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1094 list_del_init(&li->blist);
1095 wake_up(&li->block_q);
1096 }
1097}
1098
1099#define CIFS_LOCK_OP 0
1100#define CIFS_READ_OP 1
1101#define CIFS_WRITE_OP 2
1102
1103/* @rw_check : 0 - no op, 1 - read, 2 - write */
1104static bool
1105cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1106 __u64 length, __u8 type, __u16 flags,
1107 struct cifsFileInfo *cfile,
1108 struct cifsLockInfo **conf_lock, int rw_check)
1109{
1110 struct cifsLockInfo *li;
1111 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1112 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1113
1114 list_for_each_entry(li, &fdlocks->locks, llist) {
1115 if (offset + length <= li->offset ||
1116 offset >= li->offset + li->length)
1117 continue;
1118 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1119 server->ops->compare_fids(cfile, cur_cfile)) {
1120 /* shared lock prevents write op through the same fid */
1121 if (!(li->type & server->vals->shared_lock_type) ||
1122 rw_check != CIFS_WRITE_OP)
1123 continue;
1124 }
1125 if ((type & server->vals->shared_lock_type) &&
1126 ((server->ops->compare_fids(cfile, cur_cfile) &&
1127 current->tgid == li->pid) || type == li->type))
1128 continue;
1129 if (rw_check == CIFS_LOCK_OP &&
1130 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1131 server->ops->compare_fids(cfile, cur_cfile))
1132 continue;
1133 if (conf_lock)
1134 *conf_lock = li;
1135 return true;
1136 }
1137 return false;
1138}
1139
1140bool
1141cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1142 __u8 type, __u16 flags,
1143 struct cifsLockInfo **conf_lock, int rw_check)
1144{
1145 bool rc = false;
1146 struct cifs_fid_locks *cur;
1147 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1148
1149 list_for_each_entry(cur, &cinode->llist, llist) {
1150 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1151 flags, cfile, conf_lock,
1152 rw_check);
1153 if (rc)
1154 break;
1155 }
1156
1157 return rc;
1158}
1159
1160/*
1161 * Check if there is another lock that prevents us to set the lock (mandatory
1162 * style). If such a lock exists, update the flock structure with its
1163 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1164 * or leave it the same if we can't. Returns 0 if we don't need to request to
1165 * the server or 1 otherwise.
1166 */
1167static int
1168cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1169 __u8 type, struct file_lock *flock)
1170{
1171 int rc = 0;
1172 struct cifsLockInfo *conf_lock;
1173 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1174 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1175 bool exist;
1176
1177 down_read(&cinode->lock_sem);
1178
1179 exist = cifs_find_lock_conflict(cfile, offset, length, type,
1180 flock->fl_flags, &conf_lock,
1181 CIFS_LOCK_OP);
1182 if (exist) {
1183 flock->fl_start = conf_lock->offset;
1184 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1185 flock->fl_pid = conf_lock->pid;
1186 if (conf_lock->type & server->vals->shared_lock_type)
1187 flock->fl_type = F_RDLCK;
1188 else
1189 flock->fl_type = F_WRLCK;
1190 } else if (!cinode->can_cache_brlcks)
1191 rc = 1;
1192 else
1193 flock->fl_type = F_UNLCK;
1194
1195 up_read(&cinode->lock_sem);
1196 return rc;
1197}
1198
1199static void
1200cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1201{
1202 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1203 cifs_down_write(&cinode->lock_sem);
1204 list_add_tail(&lock->llist, &cfile->llist->locks);
1205 up_write(&cinode->lock_sem);
1206}
1207
1208/*
1209 * Set the byte-range lock (mandatory style). Returns:
1210 * 1) 0, if we set the lock and don't need to request to the server;
1211 * 2) 1, if no locks prevent us but we need to request to the server;
1212 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1213 */
1214static int
1215cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1216 bool wait)
1217{
1218 struct cifsLockInfo *conf_lock;
1219 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1220 bool exist;
1221 int rc = 0;
1222
1223try_again:
1224 exist = false;
1225 cifs_down_write(&cinode->lock_sem);
1226
1227 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1228 lock->type, lock->flags, &conf_lock,
1229 CIFS_LOCK_OP);
1230 if (!exist && cinode->can_cache_brlcks) {
1231 list_add_tail(&lock->llist, &cfile->llist->locks);
1232 up_write(&cinode->lock_sem);
1233 return rc;
1234 }
1235
1236 if (!exist)
1237 rc = 1;
1238 else if (!wait)
1239 rc = -EACCES;
1240 else {
1241 list_add_tail(&lock->blist, &conf_lock->blist);
1242 up_write(&cinode->lock_sem);
1243 rc = wait_event_interruptible(lock->block_q,
1244 (lock->blist.prev == &lock->blist) &&
1245 (lock->blist.next == &lock->blist));
1246 if (!rc)
1247 goto try_again;
1248 cifs_down_write(&cinode->lock_sem);
1249 list_del_init(&lock->blist);
1250 }
1251
1252 up_write(&cinode->lock_sem);
1253 return rc;
1254}
1255
1256#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1257/*
1258 * Check if there is another lock that prevents us to set the lock (posix
1259 * style). If such a lock exists, update the flock structure with its
1260 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1261 * or leave it the same if we can't. Returns 0 if we don't need to request to
1262 * the server or 1 otherwise.
1263 */
1264static int
1265cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1266{
1267 int rc = 0;
1268 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1269 unsigned char saved_type = flock->fl_type;
1270
1271 if ((flock->fl_flags & FL_POSIX) == 0)
1272 return 1;
1273
1274 down_read(&cinode->lock_sem);
1275 posix_test_lock(file, flock);
1276
1277 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1278 flock->fl_type = saved_type;
1279 rc = 1;
1280 }
1281
1282 up_read(&cinode->lock_sem);
1283 return rc;
1284}
1285
1286/*
1287 * Set the byte-range lock (posix style). Returns:
1288 * 1) <0, if the error occurs while setting the lock;
1289 * 2) 0, if we set the lock and don't need to request to the server;
1290 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1291 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1292 */
1293static int
1294cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1295{
1296 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1297 int rc = FILE_LOCK_DEFERRED + 1;
1298
1299 if ((flock->fl_flags & FL_POSIX) == 0)
1300 return rc;
1301
1302 cifs_down_write(&cinode->lock_sem);
1303 if (!cinode->can_cache_brlcks) {
1304 up_write(&cinode->lock_sem);
1305 return rc;
1306 }
1307
1308 rc = posix_lock_file(file, flock, NULL);
1309 up_write(&cinode->lock_sem);
1310 return rc;
1311}
1312
1313int
1314cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1315{
1316 unsigned int xid;
1317 int rc = 0, stored_rc;
1318 struct cifsLockInfo *li, *tmp;
1319 struct cifs_tcon *tcon;
1320 unsigned int num, max_num, max_buf;
1321 LOCKING_ANDX_RANGE *buf, *cur;
1322 static const int types[] = {
1323 LOCKING_ANDX_LARGE_FILES,
1324 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1325 };
1326 int i;
1327
1328 xid = get_xid();
1329 tcon = tlink_tcon(cfile->tlink);
1330
1331 /*
1332 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1333 * and check it before using.
1334 */
1335 max_buf = tcon->ses->server->maxBuf;
1336 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1337 free_xid(xid);
1338 return -EINVAL;
1339 }
1340
1341 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1342 PAGE_SIZE);
1343 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1344 PAGE_SIZE);
1345 max_num = (max_buf - sizeof(struct smb_hdr)) /
1346 sizeof(LOCKING_ANDX_RANGE);
1347 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1348 if (!buf) {
1349 free_xid(xid);
1350 return -ENOMEM;
1351 }
1352
1353 for (i = 0; i < 2; i++) {
1354 cur = buf;
1355 num = 0;
1356 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1357 if (li->type != types[i])
1358 continue;
1359 cur->Pid = cpu_to_le16(li->pid);
1360 cur->LengthLow = cpu_to_le32((u32)li->length);
1361 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1362 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1363 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1364 if (++num == max_num) {
1365 stored_rc = cifs_lockv(xid, tcon,
1366 cfile->fid.netfid,
1367 (__u8)li->type, 0, num,
1368 buf);
1369 if (stored_rc)
1370 rc = stored_rc;
1371 cur = buf;
1372 num = 0;
1373 } else
1374 cur++;
1375 }
1376
1377 if (num) {
1378 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1379 (__u8)types[i], 0, num, buf);
1380 if (stored_rc)
1381 rc = stored_rc;
1382 }
1383 }
1384
1385 kfree(buf);
1386 free_xid(xid);
1387 return rc;
1388}
1389
1390static __u32
1391hash_lockowner(fl_owner_t owner)
1392{
1393 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1394}
1395#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1396
1397struct lock_to_push {
1398 struct list_head llist;
1399 __u64 offset;
1400 __u64 length;
1401 __u32 pid;
1402 __u16 netfid;
1403 __u8 type;
1404};
1405
1406#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1407static int
1408cifs_push_posix_locks(struct cifsFileInfo *cfile)
1409{
1410 struct inode *inode = d_inode(cfile->dentry);
1411 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1412 struct file_lock *flock;
1413 struct file_lock_context *flctx = inode->i_flctx;
1414 unsigned int count = 0, i;
1415 int rc = 0, xid, type;
1416 struct list_head locks_to_send, *el;
1417 struct lock_to_push *lck, *tmp;
1418 __u64 length;
1419
1420 xid = get_xid();
1421
1422 if (!flctx)
1423 goto out;
1424
1425 spin_lock(&flctx->flc_lock);
1426 list_for_each(el, &flctx->flc_posix) {
1427 count++;
1428 }
1429 spin_unlock(&flctx->flc_lock);
1430
1431 INIT_LIST_HEAD(&locks_to_send);
1432
1433 /*
1434 * Allocating count locks is enough because no FL_POSIX locks can be
1435 * added to the list while we are holding cinode->lock_sem that
1436 * protects locking operations of this inode.
1437 */
1438 for (i = 0; i < count; i++) {
1439 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1440 if (!lck) {
1441 rc = -ENOMEM;
1442 goto err_out;
1443 }
1444 list_add_tail(&lck->llist, &locks_to_send);
1445 }
1446
1447 el = locks_to_send.next;
1448 spin_lock(&flctx->flc_lock);
1449 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1450 if (el == &locks_to_send) {
1451 /*
1452 * The list ended. We don't have enough allocated
1453 * structures - something is really wrong.
1454 */
1455 cifs_dbg(VFS, "Can't push all brlocks!\n");
1456 break;
1457 }
1458 length = cifs_flock_len(flock);
1459 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1460 type = CIFS_RDLCK;
1461 else
1462 type = CIFS_WRLCK;
1463 lck = list_entry(el, struct lock_to_push, llist);
1464 lck->pid = hash_lockowner(flock->fl_owner);
1465 lck->netfid = cfile->fid.netfid;
1466 lck->length = length;
1467 lck->type = type;
1468 lck->offset = flock->fl_start;
1469 }
1470 spin_unlock(&flctx->flc_lock);
1471
1472 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1473 int stored_rc;
1474
1475 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1476 lck->offset, lck->length, NULL,
1477 lck->type, 0);
1478 if (stored_rc)
1479 rc = stored_rc;
1480 list_del(&lck->llist);
1481 kfree(lck);
1482 }
1483
1484out:
1485 free_xid(xid);
1486 return rc;
1487err_out:
1488 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1489 list_del(&lck->llist);
1490 kfree(lck);
1491 }
1492 goto out;
1493}
1494#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1495
1496static int
1497cifs_push_locks(struct cifsFileInfo *cfile)
1498{
1499 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1500 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1501 int rc = 0;
1502#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1503 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1504#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1505
1506 /* we are going to update can_cache_brlcks here - need a write access */
1507 cifs_down_write(&cinode->lock_sem);
1508 if (!cinode->can_cache_brlcks) {
1509 up_write(&cinode->lock_sem);
1510 return rc;
1511 }
1512
1513#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1514 if (cap_unix(tcon->ses) &&
1515 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1516 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1517 rc = cifs_push_posix_locks(cfile);
1518 else
1519#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1520 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1521
1522 cinode->can_cache_brlcks = false;
1523 up_write(&cinode->lock_sem);
1524 return rc;
1525}
1526
1527static void
1528cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1529 bool *wait_flag, struct TCP_Server_Info *server)
1530{
1531 if (flock->fl_flags & FL_POSIX)
1532 cifs_dbg(FYI, "Posix\n");
1533 if (flock->fl_flags & FL_FLOCK)
1534 cifs_dbg(FYI, "Flock\n");
1535 if (flock->fl_flags & FL_SLEEP) {
1536 cifs_dbg(FYI, "Blocking lock\n");
1537 *wait_flag = true;
1538 }
1539 if (flock->fl_flags & FL_ACCESS)
1540 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1541 if (flock->fl_flags & FL_LEASE)
1542 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1543 if (flock->fl_flags &
1544 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1545 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1546 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1547
1548 *type = server->vals->large_lock_type;
1549 if (flock->fl_type == F_WRLCK) {
1550 cifs_dbg(FYI, "F_WRLCK\n");
1551 *type |= server->vals->exclusive_lock_type;
1552 *lock = 1;
1553 } else if (flock->fl_type == F_UNLCK) {
1554 cifs_dbg(FYI, "F_UNLCK\n");
1555 *type |= server->vals->unlock_lock_type;
1556 *unlock = 1;
1557 /* Check if unlock includes more than one lock range */
1558 } else if (flock->fl_type == F_RDLCK) {
1559 cifs_dbg(FYI, "F_RDLCK\n");
1560 *type |= server->vals->shared_lock_type;
1561 *lock = 1;
1562 } else if (flock->fl_type == F_EXLCK) {
1563 cifs_dbg(FYI, "F_EXLCK\n");
1564 *type |= server->vals->exclusive_lock_type;
1565 *lock = 1;
1566 } else if (flock->fl_type == F_SHLCK) {
1567 cifs_dbg(FYI, "F_SHLCK\n");
1568 *type |= server->vals->shared_lock_type;
1569 *lock = 1;
1570 } else
1571 cifs_dbg(FYI, "Unknown type of lock\n");
1572}
1573
1574static int
1575cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1576 bool wait_flag, bool posix_lck, unsigned int xid)
1577{
1578 int rc = 0;
1579 __u64 length = cifs_flock_len(flock);
1580 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1581 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1582 struct TCP_Server_Info *server = tcon->ses->server;
1583#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1584 __u16 netfid = cfile->fid.netfid;
1585
1586 if (posix_lck) {
1587 int posix_lock_type;
1588
1589 rc = cifs_posix_lock_test(file, flock);
1590 if (!rc)
1591 return rc;
1592
1593 if (type & server->vals->shared_lock_type)
1594 posix_lock_type = CIFS_RDLCK;
1595 else
1596 posix_lock_type = CIFS_WRLCK;
1597 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1598 hash_lockowner(flock->fl_owner),
1599 flock->fl_start, length, flock,
1600 posix_lock_type, wait_flag);
1601 return rc;
1602 }
1603#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1604
1605 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1606 if (!rc)
1607 return rc;
1608
1609 /* BB we could chain these into one lock request BB */
1610 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1611 1, 0, false);
1612 if (rc == 0) {
1613 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1614 type, 0, 1, false);
1615 flock->fl_type = F_UNLCK;
1616 if (rc != 0)
1617 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1618 rc);
1619 return 0;
1620 }
1621
1622 if (type & server->vals->shared_lock_type) {
1623 flock->fl_type = F_WRLCK;
1624 return 0;
1625 }
1626
1627 type &= ~server->vals->exclusive_lock_type;
1628
1629 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1630 type | server->vals->shared_lock_type,
1631 1, 0, false);
1632 if (rc == 0) {
1633 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1634 type | server->vals->shared_lock_type, 0, 1, false);
1635 flock->fl_type = F_RDLCK;
1636 if (rc != 0)
1637 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1638 rc);
1639 } else
1640 flock->fl_type = F_WRLCK;
1641
1642 return 0;
1643}
1644
1645void
1646cifs_move_llist(struct list_head *source, struct list_head *dest)
1647{
1648 struct list_head *li, *tmp;
1649 list_for_each_safe(li, tmp, source)
1650 list_move(li, dest);
1651}
1652
1653void
1654cifs_free_llist(struct list_head *llist)
1655{
1656 struct cifsLockInfo *li, *tmp;
1657 list_for_each_entry_safe(li, tmp, llist, llist) {
1658 cifs_del_lock_waiters(li);
1659 list_del(&li->llist);
1660 kfree(li);
1661 }
1662}
1663
1664#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1665int
1666cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1667 unsigned int xid)
1668{
1669 int rc = 0, stored_rc;
1670 static const int types[] = {
1671 LOCKING_ANDX_LARGE_FILES,
1672 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1673 };
1674 unsigned int i;
1675 unsigned int max_num, num, max_buf;
1676 LOCKING_ANDX_RANGE *buf, *cur;
1677 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1678 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1679 struct cifsLockInfo *li, *tmp;
1680 __u64 length = cifs_flock_len(flock);
1681 struct list_head tmp_llist;
1682
1683 INIT_LIST_HEAD(&tmp_llist);
1684
1685 /*
1686 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1687 * and check it before using.
1688 */
1689 max_buf = tcon->ses->server->maxBuf;
1690 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1691 return -EINVAL;
1692
1693 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1694 PAGE_SIZE);
1695 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1696 PAGE_SIZE);
1697 max_num = (max_buf - sizeof(struct smb_hdr)) /
1698 sizeof(LOCKING_ANDX_RANGE);
1699 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1700 if (!buf)
1701 return -ENOMEM;
1702
1703 cifs_down_write(&cinode->lock_sem);
1704 for (i = 0; i < 2; i++) {
1705 cur = buf;
1706 num = 0;
1707 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1708 if (flock->fl_start > li->offset ||
1709 (flock->fl_start + length) <
1710 (li->offset + li->length))
1711 continue;
1712 if (current->tgid != li->pid)
1713 continue;
1714 if (types[i] != li->type)
1715 continue;
1716 if (cinode->can_cache_brlcks) {
1717 /*
1718 * We can cache brlock requests - simply remove
1719 * a lock from the file's list.
1720 */
1721 list_del(&li->llist);
1722 cifs_del_lock_waiters(li);
1723 kfree(li);
1724 continue;
1725 }
1726 cur->Pid = cpu_to_le16(li->pid);
1727 cur->LengthLow = cpu_to_le32((u32)li->length);
1728 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1729 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1730 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1731 /*
1732 * We need to save a lock here to let us add it again to
1733 * the file's list if the unlock range request fails on
1734 * the server.
1735 */
1736 list_move(&li->llist, &tmp_llist);
1737 if (++num == max_num) {
1738 stored_rc = cifs_lockv(xid, tcon,
1739 cfile->fid.netfid,
1740 li->type, num, 0, buf);
1741 if (stored_rc) {
1742 /*
1743 * We failed on the unlock range
1744 * request - add all locks from the tmp
1745 * list to the head of the file's list.
1746 */
1747 cifs_move_llist(&tmp_llist,
1748 &cfile->llist->locks);
1749 rc = stored_rc;
1750 } else
1751 /*
1752 * The unlock range request succeed -
1753 * free the tmp list.
1754 */
1755 cifs_free_llist(&tmp_llist);
1756 cur = buf;
1757 num = 0;
1758 } else
1759 cur++;
1760 }
1761 if (num) {
1762 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1763 types[i], num, 0, buf);
1764 if (stored_rc) {
1765 cifs_move_llist(&tmp_llist,
1766 &cfile->llist->locks);
1767 rc = stored_rc;
1768 } else
1769 cifs_free_llist(&tmp_llist);
1770 }
1771 }
1772
1773 up_write(&cinode->lock_sem);
1774 kfree(buf);
1775 return rc;
1776}
1777#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1778
1779static int
1780cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1781 bool wait_flag, bool posix_lck, int lock, int unlock,
1782 unsigned int xid)
1783{
1784 int rc = 0;
1785 __u64 length = cifs_flock_len(flock);
1786 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1787 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1788 struct TCP_Server_Info *server = tcon->ses->server;
1789 struct inode *inode = d_inode(cfile->dentry);
1790
1791#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1792 if (posix_lck) {
1793 int posix_lock_type;
1794
1795 rc = cifs_posix_lock_set(file, flock);
1796 if (rc <= FILE_LOCK_DEFERRED)
1797 return rc;
1798
1799 if (type & server->vals->shared_lock_type)
1800 posix_lock_type = CIFS_RDLCK;
1801 else
1802 posix_lock_type = CIFS_WRLCK;
1803
1804 if (unlock == 1)
1805 posix_lock_type = CIFS_UNLCK;
1806
1807 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1808 hash_lockowner(flock->fl_owner),
1809 flock->fl_start, length,
1810 NULL, posix_lock_type, wait_flag);
1811 goto out;
1812 }
1813#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1814 if (lock) {
1815 struct cifsLockInfo *lock;
1816
1817 lock = cifs_lock_init(flock->fl_start, length, type,
1818 flock->fl_flags);
1819 if (!lock)
1820 return -ENOMEM;
1821
1822 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1823 if (rc < 0) {
1824 kfree(lock);
1825 return rc;
1826 }
1827 if (!rc)
1828 goto out;
1829
1830 /*
1831 * Windows 7 server can delay breaking lease from read to None
1832 * if we set a byte-range lock on a file - break it explicitly
1833 * before sending the lock to the server to be sure the next
1834 * read won't conflict with non-overlapted locks due to
1835 * pagereading.
1836 */
1837 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1838 CIFS_CACHE_READ(CIFS_I(inode))) {
1839 cifs_zap_mapping(inode);
1840 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1841 inode);
1842 CIFS_I(inode)->oplock = 0;
1843 }
1844
1845 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1846 type, 1, 0, wait_flag);
1847 if (rc) {
1848 kfree(lock);
1849 return rc;
1850 }
1851
1852 cifs_lock_add(cfile, lock);
1853 } else if (unlock)
1854 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1855
1856out:
1857 if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1858 /*
1859 * If this is a request to remove all locks because we
1860 * are closing the file, it doesn't matter if the
1861 * unlocking failed as both cifs.ko and the SMB server
1862 * remove the lock on file close
1863 */
1864 if (rc) {
1865 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1866 if (!(flock->fl_flags & FL_CLOSE))
1867 return rc;
1868 }
1869 rc = locks_lock_file_wait(file, flock);
1870 }
1871 return rc;
1872}
1873
1874int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1875{
1876 int rc, xid;
1877 int lock = 0, unlock = 0;
1878 bool wait_flag = false;
1879 bool posix_lck = false;
1880 struct cifs_sb_info *cifs_sb;
1881 struct cifs_tcon *tcon;
1882 struct cifsFileInfo *cfile;
1883 __u32 type;
1884
1885 rc = -EACCES;
1886 xid = get_xid();
1887
1888 if (!(fl->fl_flags & FL_FLOCK))
1889 return -ENOLCK;
1890
1891 cfile = (struct cifsFileInfo *)file->private_data;
1892 tcon = tlink_tcon(cfile->tlink);
1893
1894 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1895 tcon->ses->server);
1896 cifs_sb = CIFS_FILE_SB(file);
1897
1898 if (cap_unix(tcon->ses) &&
1899 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1900 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1901 posix_lck = true;
1902
1903 if (!lock && !unlock) {
1904 /*
1905 * if no lock or unlock then nothing to do since we do not
1906 * know what it is
1907 */
1908 free_xid(xid);
1909 return -EOPNOTSUPP;
1910 }
1911
1912 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1913 xid);
1914 free_xid(xid);
1915 return rc;
1916
1917
1918}
1919
1920int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1921{
1922 int rc, xid;
1923 int lock = 0, unlock = 0;
1924 bool wait_flag = false;
1925 bool posix_lck = false;
1926 struct cifs_sb_info *cifs_sb;
1927 struct cifs_tcon *tcon;
1928 struct cifsFileInfo *cfile;
1929 __u32 type;
1930
1931 rc = -EACCES;
1932 xid = get_xid();
1933
1934 cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
1935 flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
1936 (long long)flock->fl_end);
1937
1938 cfile = (struct cifsFileInfo *)file->private_data;
1939 tcon = tlink_tcon(cfile->tlink);
1940
1941 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1942 tcon->ses->server);
1943 cifs_sb = CIFS_FILE_SB(file);
1944 set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
1945
1946 if (cap_unix(tcon->ses) &&
1947 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1948 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1949 posix_lck = true;
1950 /*
1951 * BB add code here to normalize offset and length to account for
1952 * negative length which we can not accept over the wire.
1953 */
1954 if (IS_GETLK(cmd)) {
1955 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1956 free_xid(xid);
1957 return rc;
1958 }
1959
1960 if (!lock && !unlock) {
1961 /*
1962 * if no lock or unlock then nothing to do since we do not
1963 * know what it is
1964 */
1965 free_xid(xid);
1966 return -EOPNOTSUPP;
1967 }
1968
1969 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1970 xid);
1971 free_xid(xid);
1972 return rc;
1973}
1974
1975/*
1976 * update the file size (if needed) after a write. Should be called with
1977 * the inode->i_lock held
1978 */
1979void
1980cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1981 unsigned int bytes_written)
1982{
1983 loff_t end_of_write = offset + bytes_written;
1984
1985 if (end_of_write > cifsi->server_eof)
1986 cifsi->server_eof = end_of_write;
1987}
1988
1989static ssize_t
1990cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1991 size_t write_size, loff_t *offset)
1992{
1993 int rc = 0;
1994 unsigned int bytes_written = 0;
1995 unsigned int total_written;
1996 struct cifs_tcon *tcon;
1997 struct TCP_Server_Info *server;
1998 unsigned int xid;
1999 struct dentry *dentry = open_file->dentry;
2000 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2001 struct cifs_io_parms io_parms = {0};
2002
2003 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2004 write_size, *offset, dentry);
2005
2006 tcon = tlink_tcon(open_file->tlink);
2007 server = tcon->ses->server;
2008
2009 if (!server->ops->sync_write)
2010 return -ENOSYS;
2011
2012 xid = get_xid();
2013
2014 for (total_written = 0; write_size > total_written;
2015 total_written += bytes_written) {
2016 rc = -EAGAIN;
2017 while (rc == -EAGAIN) {
2018 struct kvec iov[2];
2019 unsigned int len;
2020
2021 if (open_file->invalidHandle) {
2022 /* we could deadlock if we called
2023 filemap_fdatawait from here so tell
2024 reopen_file not to flush data to
2025 server now */
2026 rc = cifs_reopen_file(open_file, false);
2027 if (rc != 0)
2028 break;
2029 }
2030
2031 len = min(server->ops->wp_retry_size(d_inode(dentry)),
2032 (unsigned int)write_size - total_written);
2033 /* iov[0] is reserved for smb header */
2034 iov[1].iov_base = (char *)write_data + total_written;
2035 iov[1].iov_len = len;
2036 io_parms.pid = pid;
2037 io_parms.tcon = tcon;
2038 io_parms.offset = *offset;
2039 io_parms.length = len;
2040 rc = server->ops->sync_write(xid, &open_file->fid,
2041 &io_parms, &bytes_written, iov, 1);
2042 }
2043 if (rc || (bytes_written == 0)) {
2044 if (total_written)
2045 break;
2046 else {
2047 free_xid(xid);
2048 return rc;
2049 }
2050 } else {
2051 spin_lock(&d_inode(dentry)->i_lock);
2052 cifs_update_eof(cifsi, *offset, bytes_written);
2053 spin_unlock(&d_inode(dentry)->i_lock);
2054 *offset += bytes_written;
2055 }
2056 }
2057
2058 cifs_stats_bytes_written(tcon, total_written);
2059
2060 if (total_written > 0) {
2061 spin_lock(&d_inode(dentry)->i_lock);
2062 if (*offset > d_inode(dentry)->i_size) {
2063 i_size_write(d_inode(dentry), *offset);
2064 d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2065 }
2066 spin_unlock(&d_inode(dentry)->i_lock);
2067 }
2068 mark_inode_dirty_sync(d_inode(dentry));
2069 free_xid(xid);
2070 return total_written;
2071}
2072
2073struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2074 bool fsuid_only)
2075{
2076 struct cifsFileInfo *open_file = NULL;
2077 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2078
2079 /* only filter by fsuid on multiuser mounts */
2080 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2081 fsuid_only = false;
2082
2083 spin_lock(&cifs_inode->open_file_lock);
2084 /* we could simply get the first_list_entry since write-only entries
2085 are always at the end of the list but since the first entry might
2086 have a close pending, we go through the whole list */
2087 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2088 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2089 continue;
2090 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2091 if ((!open_file->invalidHandle)) {
2092 /* found a good file */
2093 /* lock it so it will not be closed on us */
2094 cifsFileInfo_get(open_file);
2095 spin_unlock(&cifs_inode->open_file_lock);
2096 return open_file;
2097 } /* else might as well continue, and look for
2098 another, or simply have the caller reopen it
2099 again rather than trying to fix this handle */
2100 } else /* write only file */
2101 break; /* write only files are last so must be done */
2102 }
2103 spin_unlock(&cifs_inode->open_file_lock);
2104 return NULL;
2105}
2106
2107/* Return -EBADF if no handle is found and general rc otherwise */
2108int
2109cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2110 struct cifsFileInfo **ret_file)
2111{
2112 struct cifsFileInfo *open_file, *inv_file = NULL;
2113 struct cifs_sb_info *cifs_sb;
2114 bool any_available = false;
2115 int rc = -EBADF;
2116 unsigned int refind = 0;
2117 bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2118 bool with_delete = flags & FIND_WR_WITH_DELETE;
2119 *ret_file = NULL;
2120
2121 /*
2122 * Having a null inode here (because mapping->host was set to zero by
2123 * the VFS or MM) should not happen but we had reports of on oops (due
2124 * to it being zero) during stress testcases so we need to check for it
2125 */
2126
2127 if (cifs_inode == NULL) {
2128 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2129 dump_stack();
2130 return rc;
2131 }
2132
2133 cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2134
2135 /* only filter by fsuid on multiuser mounts */
2136 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2137 fsuid_only = false;
2138
2139 spin_lock(&cifs_inode->open_file_lock);
2140refind_writable:
2141 if (refind > MAX_REOPEN_ATT) {
2142 spin_unlock(&cifs_inode->open_file_lock);
2143 return rc;
2144 }
2145 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2146 if (!any_available && open_file->pid != current->tgid)
2147 continue;
2148 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2149 continue;
2150 if (with_delete && !(open_file->fid.access & DELETE))
2151 continue;
2152 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2153 if (!open_file->invalidHandle) {
2154 /* found a good writable file */
2155 cifsFileInfo_get(open_file);
2156 spin_unlock(&cifs_inode->open_file_lock);
2157 *ret_file = open_file;
2158 return 0;
2159 } else {
2160 if (!inv_file)
2161 inv_file = open_file;
2162 }
2163 }
2164 }
2165 /* couldn't find useable FH with same pid, try any available */
2166 if (!any_available) {
2167 any_available = true;
2168 goto refind_writable;
2169 }
2170
2171 if (inv_file) {
2172 any_available = false;
2173 cifsFileInfo_get(inv_file);
2174 }
2175
2176 spin_unlock(&cifs_inode->open_file_lock);
2177
2178 if (inv_file) {
2179 rc = cifs_reopen_file(inv_file, false);
2180 if (!rc) {
2181 *ret_file = inv_file;
2182 return 0;
2183 }
2184
2185 spin_lock(&cifs_inode->open_file_lock);
2186 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2187 spin_unlock(&cifs_inode->open_file_lock);
2188 cifsFileInfo_put(inv_file);
2189 ++refind;
2190 inv_file = NULL;
2191 spin_lock(&cifs_inode->open_file_lock);
2192 goto refind_writable;
2193 }
2194
2195 return rc;
2196}
2197
2198struct cifsFileInfo *
2199find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2200{
2201 struct cifsFileInfo *cfile;
2202 int rc;
2203
2204 rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2205 if (rc)
2206 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2207
2208 return cfile;
2209}
2210
2211int
2212cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2213 int flags,
2214 struct cifsFileInfo **ret_file)
2215{
2216 struct cifsFileInfo *cfile;
2217 void *page = alloc_dentry_path();
2218
2219 *ret_file = NULL;
2220
2221 spin_lock(&tcon->open_file_lock);
2222 list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2223 struct cifsInodeInfo *cinode;
2224 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2225 if (IS_ERR(full_path)) {
2226 spin_unlock(&tcon->open_file_lock);
2227 free_dentry_path(page);
2228 return PTR_ERR(full_path);
2229 }
2230 if (strcmp(full_path, name))
2231 continue;
2232
2233 cinode = CIFS_I(d_inode(cfile->dentry));
2234 spin_unlock(&tcon->open_file_lock);
2235 free_dentry_path(page);
2236 return cifs_get_writable_file(cinode, flags, ret_file);
2237 }
2238
2239 spin_unlock(&tcon->open_file_lock);
2240 free_dentry_path(page);
2241 return -ENOENT;
2242}
2243
2244int
2245cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2246 struct cifsFileInfo **ret_file)
2247{
2248 struct cifsFileInfo *cfile;
2249 void *page = alloc_dentry_path();
2250
2251 *ret_file = NULL;
2252
2253 spin_lock(&tcon->open_file_lock);
2254 list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2255 struct cifsInodeInfo *cinode;
2256 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2257 if (IS_ERR(full_path)) {
2258 spin_unlock(&tcon->open_file_lock);
2259 free_dentry_path(page);
2260 return PTR_ERR(full_path);
2261 }
2262 if (strcmp(full_path, name))
2263 continue;
2264
2265 cinode = CIFS_I(d_inode(cfile->dentry));
2266 spin_unlock(&tcon->open_file_lock);
2267 free_dentry_path(page);
2268 *ret_file = find_readable_file(cinode, 0);
2269 return *ret_file ? 0 : -ENOENT;
2270 }
2271
2272 spin_unlock(&tcon->open_file_lock);
2273 free_dentry_path(page);
2274 return -ENOENT;
2275}
2276
2277void
2278cifs_writedata_release(struct kref *refcount)
2279{
2280 struct cifs_writedata *wdata = container_of(refcount,
2281 struct cifs_writedata, refcount);
2282#ifdef CONFIG_CIFS_SMB_DIRECT
2283 if (wdata->mr) {
2284 smbd_deregister_mr(wdata->mr);
2285 wdata->mr = NULL;
2286 }
2287#endif
2288
2289 if (wdata->cfile)
2290 cifsFileInfo_put(wdata->cfile);
2291
2292 kvfree(wdata->pages);
2293 kfree(wdata);
2294}
2295
2296/*
2297 * Write failed with a retryable error. Resend the write request. It's also
2298 * possible that the page was redirtied so re-clean the page.
2299 */
2300static void
2301cifs_writev_requeue(struct cifs_writedata *wdata)
2302{
2303 int i, rc = 0;
2304 struct inode *inode = d_inode(wdata->cfile->dentry);
2305 struct TCP_Server_Info *server;
2306 unsigned int rest_len;
2307
2308 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2309 i = 0;
2310 rest_len = wdata->bytes;
2311 do {
2312 struct cifs_writedata *wdata2;
2313 unsigned int j, nr_pages, wsize, tailsz, cur_len;
2314
2315 wsize = server->ops->wp_retry_size(inode);
2316 if (wsize < rest_len) {
2317 nr_pages = wsize / PAGE_SIZE;
2318 if (!nr_pages) {
2319 rc = -EOPNOTSUPP;
2320 break;
2321 }
2322 cur_len = nr_pages * PAGE_SIZE;
2323 tailsz = PAGE_SIZE;
2324 } else {
2325 nr_pages = DIV_ROUND_UP(rest_len, PAGE_SIZE);
2326 cur_len = rest_len;
2327 tailsz = rest_len - (nr_pages - 1) * PAGE_SIZE;
2328 }
2329
2330 wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
2331 if (!wdata2) {
2332 rc = -ENOMEM;
2333 break;
2334 }
2335
2336 for (j = 0; j < nr_pages; j++) {
2337 wdata2->pages[j] = wdata->pages[i + j];
2338 lock_page(wdata2->pages[j]);
2339 clear_page_dirty_for_io(wdata2->pages[j]);
2340 }
2341
2342 wdata2->sync_mode = wdata->sync_mode;
2343 wdata2->nr_pages = nr_pages;
2344 wdata2->offset = page_offset(wdata2->pages[0]);
2345 wdata2->pagesz = PAGE_SIZE;
2346 wdata2->tailsz = tailsz;
2347 wdata2->bytes = cur_len;
2348
2349 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2350 &wdata2->cfile);
2351 if (!wdata2->cfile) {
2352 cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2353 rc);
2354 if (!is_retryable_error(rc))
2355 rc = -EBADF;
2356 } else {
2357 wdata2->pid = wdata2->cfile->pid;
2358 rc = server->ops->async_writev(wdata2,
2359 cifs_writedata_release);
2360 }
2361
2362 for (j = 0; j < nr_pages; j++) {
2363 unlock_page(wdata2->pages[j]);
2364 if (rc != 0 && !is_retryable_error(rc)) {
2365 SetPageError(wdata2->pages[j]);
2366 end_page_writeback(wdata2->pages[j]);
2367 put_page(wdata2->pages[j]);
2368 }
2369 }
2370
2371 kref_put(&wdata2->refcount, cifs_writedata_release);
2372 if (rc) {
2373 if (is_retryable_error(rc))
2374 continue;
2375 i += nr_pages;
2376 break;
2377 }
2378
2379 rest_len -= cur_len;
2380 i += nr_pages;
2381 } while (i < wdata->nr_pages);
2382
2383 /* cleanup remaining pages from the original wdata */
2384 for (; i < wdata->nr_pages; i++) {
2385 SetPageError(wdata->pages[i]);
2386 end_page_writeback(wdata->pages[i]);
2387 put_page(wdata->pages[i]);
2388 }
2389
2390 if (rc != 0 && !is_retryable_error(rc))
2391 mapping_set_error(inode->i_mapping, rc);
2392 kref_put(&wdata->refcount, cifs_writedata_release);
2393}
2394
2395void
2396cifs_writev_complete(struct work_struct *work)
2397{
2398 struct cifs_writedata *wdata = container_of(work,
2399 struct cifs_writedata, work);
2400 struct inode *inode = d_inode(wdata->cfile->dentry);
2401 int i = 0;
2402
2403 if (wdata->result == 0) {
2404 spin_lock(&inode->i_lock);
2405 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2406 spin_unlock(&inode->i_lock);
2407 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2408 wdata->bytes);
2409 } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2410 return cifs_writev_requeue(wdata);
2411
2412 for (i = 0; i < wdata->nr_pages; i++) {
2413 struct page *page = wdata->pages[i];
2414
2415 if (wdata->result == -EAGAIN)
2416 __set_page_dirty_nobuffers(page);
2417 else if (wdata->result < 0)
2418 SetPageError(page);
2419 end_page_writeback(page);
2420 cifs_readpage_to_fscache(inode, page);
2421 put_page(page);
2422 }
2423 if (wdata->result != -EAGAIN)
2424 mapping_set_error(inode->i_mapping, wdata->result);
2425 kref_put(&wdata->refcount, cifs_writedata_release);
2426}
2427
2428struct cifs_writedata *
2429cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete)
2430{
2431 struct page **pages =
2432 kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
2433 if (pages)
2434 return cifs_writedata_direct_alloc(pages, complete);
2435
2436 return NULL;
2437}
2438
2439struct cifs_writedata *
2440cifs_writedata_direct_alloc(struct page **pages, work_func_t complete)
2441{
2442 struct cifs_writedata *wdata;
2443
2444 wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2445 if (wdata != NULL) {
2446 wdata->pages = pages;
2447 kref_init(&wdata->refcount);
2448 INIT_LIST_HEAD(&wdata->list);
2449 init_completion(&wdata->done);
2450 INIT_WORK(&wdata->work, complete);
2451 }
2452 return wdata;
2453}
2454
2455
2456static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2457{
2458 struct address_space *mapping = page->mapping;
2459 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2460 char *write_data;
2461 int rc = -EFAULT;
2462 int bytes_written = 0;
2463 struct inode *inode;
2464 struct cifsFileInfo *open_file;
2465
2466 if (!mapping || !mapping->host)
2467 return -EFAULT;
2468
2469 inode = page->mapping->host;
2470
2471 offset += (loff_t)from;
2472 write_data = kmap(page);
2473 write_data += from;
2474
2475 if ((to > PAGE_SIZE) || (from > to)) {
2476 kunmap(page);
2477 return -EIO;
2478 }
2479
2480 /* racing with truncate? */
2481 if (offset > mapping->host->i_size) {
2482 kunmap(page);
2483 return 0; /* don't care */
2484 }
2485
2486 /* check to make sure that we are not extending the file */
2487 if (mapping->host->i_size - offset < (loff_t)to)
2488 to = (unsigned)(mapping->host->i_size - offset);
2489
2490 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2491 &open_file);
2492 if (!rc) {
2493 bytes_written = cifs_write(open_file, open_file->pid,
2494 write_data, to - from, &offset);
2495 cifsFileInfo_put(open_file);
2496 /* Does mm or vfs already set times? */
2497 inode->i_atime = inode->i_mtime = current_time(inode);
2498 if ((bytes_written > 0) && (offset))
2499 rc = 0;
2500 else if (bytes_written < 0)
2501 rc = bytes_written;
2502 else
2503 rc = -EFAULT;
2504 } else {
2505 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2506 if (!is_retryable_error(rc))
2507 rc = -EIO;
2508 }
2509
2510 kunmap(page);
2511 return rc;
2512}
2513
2514static struct cifs_writedata *
2515wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2516 pgoff_t end, pgoff_t *index,
2517 unsigned int *found_pages)
2518{
2519 struct cifs_writedata *wdata;
2520
2521 wdata = cifs_writedata_alloc((unsigned int)tofind,
2522 cifs_writev_complete);
2523 if (!wdata)
2524 return NULL;
2525
2526 *found_pages = find_get_pages_range_tag(mapping, index, end,
2527 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2528 return wdata;
2529}
2530
2531static unsigned int
2532wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2533 struct address_space *mapping,
2534 struct writeback_control *wbc,
2535 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2536{
2537 unsigned int nr_pages = 0, i;
2538 struct page *page;
2539
2540 for (i = 0; i < found_pages; i++) {
2541 page = wdata->pages[i];
2542 /*
2543 * At this point we hold neither the i_pages lock nor the
2544 * page lock: the page may be truncated or invalidated
2545 * (changing page->mapping to NULL), or even swizzled
2546 * back from swapper_space to tmpfs file mapping
2547 */
2548
2549 if (nr_pages == 0)
2550 lock_page(page);
2551 else if (!trylock_page(page))
2552 break;
2553
2554 if (unlikely(page->mapping != mapping)) {
2555 unlock_page(page);
2556 break;
2557 }
2558
2559 if (!wbc->range_cyclic && page->index > end) {
2560 *done = true;
2561 unlock_page(page);
2562 break;
2563 }
2564
2565 if (*next && (page->index != *next)) {
2566 /* Not next consecutive page */
2567 unlock_page(page);
2568 break;
2569 }
2570
2571 if (wbc->sync_mode != WB_SYNC_NONE)
2572 wait_on_page_writeback(page);
2573
2574 if (PageWriteback(page) ||
2575 !clear_page_dirty_for_io(page)) {
2576 unlock_page(page);
2577 break;
2578 }
2579
2580 /*
2581 * This actually clears the dirty bit in the radix tree.
2582 * See cifs_writepage() for more commentary.
2583 */
2584 set_page_writeback(page);
2585 if (page_offset(page) >= i_size_read(mapping->host)) {
2586 *done = true;
2587 unlock_page(page);
2588 end_page_writeback(page);
2589 break;
2590 }
2591
2592 wdata->pages[i] = page;
2593 *next = page->index + 1;
2594 ++nr_pages;
2595 }
2596
2597 /* reset index to refind any pages skipped */
2598 if (nr_pages == 0)
2599 *index = wdata->pages[0]->index + 1;
2600
2601 /* put any pages we aren't going to use */
2602 for (i = nr_pages; i < found_pages; i++) {
2603 put_page(wdata->pages[i]);
2604 wdata->pages[i] = NULL;
2605 }
2606
2607 return nr_pages;
2608}
2609
2610static int
2611wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2612 struct address_space *mapping, struct writeback_control *wbc)
2613{
2614 int rc;
2615
2616 wdata->sync_mode = wbc->sync_mode;
2617 wdata->nr_pages = nr_pages;
2618 wdata->offset = page_offset(wdata->pages[0]);
2619 wdata->pagesz = PAGE_SIZE;
2620 wdata->tailsz = min(i_size_read(mapping->host) -
2621 page_offset(wdata->pages[nr_pages - 1]),
2622 (loff_t)PAGE_SIZE);
2623 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2624 wdata->pid = wdata->cfile->pid;
2625
2626 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2627 if (rc)
2628 return rc;
2629
2630 if (wdata->cfile->invalidHandle)
2631 rc = -EAGAIN;
2632 else
2633 rc = wdata->server->ops->async_writev(wdata,
2634 cifs_writedata_release);
2635
2636 return rc;
2637}
2638
2639static int cifs_writepages(struct address_space *mapping,
2640 struct writeback_control *wbc)
2641{
2642 struct inode *inode = mapping->host;
2643 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2644 struct TCP_Server_Info *server;
2645 bool done = false, scanned = false, range_whole = false;
2646 pgoff_t end, index;
2647 struct cifs_writedata *wdata;
2648 struct cifsFileInfo *cfile = NULL;
2649 int rc = 0;
2650 int saved_rc = 0;
2651 unsigned int xid;
2652
2653 /*
2654 * If wsize is smaller than the page cache size, default to writing
2655 * one page at a time via cifs_writepage
2656 */
2657 if (cifs_sb->ctx->wsize < PAGE_SIZE)
2658 return generic_writepages(mapping, wbc);
2659
2660 xid = get_xid();
2661 if (wbc->range_cyclic) {
2662 index = mapping->writeback_index; /* Start from prev offset */
2663 end = -1;
2664 } else {
2665 index = wbc->range_start >> PAGE_SHIFT;
2666 end = wbc->range_end >> PAGE_SHIFT;
2667 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2668 range_whole = true;
2669 scanned = true;
2670 }
2671 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2672
2673retry:
2674 while (!done && index <= end) {
2675 unsigned int i, nr_pages, found_pages, wsize;
2676 pgoff_t next = 0, tofind, saved_index = index;
2677 struct cifs_credits credits_on_stack;
2678 struct cifs_credits *credits = &credits_on_stack;
2679 int get_file_rc = 0;
2680
2681 if (cfile)
2682 cifsFileInfo_put(cfile);
2683
2684 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2685
2686 /* in case of an error store it to return later */
2687 if (rc)
2688 get_file_rc = rc;
2689
2690 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2691 &wsize, credits);
2692 if (rc != 0) {
2693 done = true;
2694 break;
2695 }
2696
2697 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2698
2699 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2700 &found_pages);
2701 if (!wdata) {
2702 rc = -ENOMEM;
2703 done = true;
2704 add_credits_and_wake_if(server, credits, 0);
2705 break;
2706 }
2707
2708 if (found_pages == 0) {
2709 kref_put(&wdata->refcount, cifs_writedata_release);
2710 add_credits_and_wake_if(server, credits, 0);
2711 break;
2712 }
2713
2714 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2715 end, &index, &next, &done);
2716
2717 /* nothing to write? */
2718 if (nr_pages == 0) {
2719 kref_put(&wdata->refcount, cifs_writedata_release);
2720 add_credits_and_wake_if(server, credits, 0);
2721 continue;
2722 }
2723
2724 wdata->credits = credits_on_stack;
2725 wdata->cfile = cfile;
2726 wdata->server = server;
2727 cfile = NULL;
2728
2729 if (!wdata->cfile) {
2730 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2731 get_file_rc);
2732 if (is_retryable_error(get_file_rc))
2733 rc = get_file_rc;
2734 else
2735 rc = -EBADF;
2736 } else
2737 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2738
2739 for (i = 0; i < nr_pages; ++i)
2740 unlock_page(wdata->pages[i]);
2741
2742 /* send failure -- clean up the mess */
2743 if (rc != 0) {
2744 add_credits_and_wake_if(server, &wdata->credits, 0);
2745 for (i = 0; i < nr_pages; ++i) {
2746 if (is_retryable_error(rc))
2747 redirty_page_for_writepage(wbc,
2748 wdata->pages[i]);
2749 else
2750 SetPageError(wdata->pages[i]);
2751 end_page_writeback(wdata->pages[i]);
2752 put_page(wdata->pages[i]);
2753 }
2754 if (!is_retryable_error(rc))
2755 mapping_set_error(mapping, rc);
2756 }
2757 kref_put(&wdata->refcount, cifs_writedata_release);
2758
2759 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2760 index = saved_index;
2761 continue;
2762 }
2763
2764 /* Return immediately if we received a signal during writing */
2765 if (is_interrupt_error(rc)) {
2766 done = true;
2767 break;
2768 }
2769
2770 if (rc != 0 && saved_rc == 0)
2771 saved_rc = rc;
2772
2773 wbc->nr_to_write -= nr_pages;
2774 if (wbc->nr_to_write <= 0)
2775 done = true;
2776
2777 index = next;
2778 }
2779
2780 if (!scanned && !done) {
2781 /*
2782 * We hit the last page and there is more work to be done: wrap
2783 * back to the start of the file
2784 */
2785 scanned = true;
2786 index = 0;
2787 goto retry;
2788 }
2789
2790 if (saved_rc != 0)
2791 rc = saved_rc;
2792
2793 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2794 mapping->writeback_index = index;
2795
2796 if (cfile)
2797 cifsFileInfo_put(cfile);
2798 free_xid(xid);
2799 /* Indication to update ctime and mtime as close is deferred */
2800 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2801 return rc;
2802}
2803
2804static int
2805cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2806{
2807 int rc;
2808 unsigned int xid;
2809
2810 xid = get_xid();
2811/* BB add check for wbc flags */
2812 get_page(page);
2813 if (!PageUptodate(page))
2814 cifs_dbg(FYI, "ppw - page not up to date\n");
2815
2816 /*
2817 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2818 *
2819 * A writepage() implementation always needs to do either this,
2820 * or re-dirty the page with "redirty_page_for_writepage()" in
2821 * the case of a failure.
2822 *
2823 * Just unlocking the page will cause the radix tree tag-bits
2824 * to fail to update with the state of the page correctly.
2825 */
2826 set_page_writeback(page);
2827retry_write:
2828 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2829 if (is_retryable_error(rc)) {
2830 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2831 goto retry_write;
2832 redirty_page_for_writepage(wbc, page);
2833 } else if (rc != 0) {
2834 SetPageError(page);
2835 mapping_set_error(page->mapping, rc);
2836 } else {
2837 SetPageUptodate(page);
2838 }
2839 end_page_writeback(page);
2840 put_page(page);
2841 free_xid(xid);
2842 return rc;
2843}
2844
2845static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2846{
2847 int rc = cifs_writepage_locked(page, wbc);
2848 unlock_page(page);
2849 return rc;
2850}
2851
2852static int cifs_write_end(struct file *file, struct address_space *mapping,
2853 loff_t pos, unsigned len, unsigned copied,
2854 struct page *page, void *fsdata)
2855{
2856 int rc;
2857 struct inode *inode = mapping->host;
2858 struct cifsFileInfo *cfile = file->private_data;
2859 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2860 __u32 pid;
2861
2862 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2863 pid = cfile->pid;
2864 else
2865 pid = current->tgid;
2866
2867 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2868 page, pos, copied);
2869
2870 if (PageChecked(page)) {
2871 if (copied == len)
2872 SetPageUptodate(page);
2873 ClearPageChecked(page);
2874 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2875 SetPageUptodate(page);
2876
2877 if (!PageUptodate(page)) {
2878 char *page_data;
2879 unsigned offset = pos & (PAGE_SIZE - 1);
2880 unsigned int xid;
2881
2882 xid = get_xid();
2883 /* this is probably better than directly calling
2884 partialpage_write since in this function the file handle is
2885 known which we might as well leverage */
2886 /* BB check if anything else missing out of ppw
2887 such as updating last write time */
2888 page_data = kmap(page);
2889 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2890 /* if (rc < 0) should we set writebehind rc? */
2891 kunmap(page);
2892
2893 free_xid(xid);
2894 } else {
2895 rc = copied;
2896 pos += copied;
2897 set_page_dirty(page);
2898 }
2899
2900 if (rc > 0) {
2901 spin_lock(&inode->i_lock);
2902 if (pos > inode->i_size) {
2903 i_size_write(inode, pos);
2904 inode->i_blocks = (512 - 1 + pos) >> 9;
2905 }
2906 spin_unlock(&inode->i_lock);
2907 }
2908
2909 unlock_page(page);
2910 put_page(page);
2911 /* Indication to update ctime and mtime as close is deferred */
2912 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2913
2914 return rc;
2915}
2916
2917int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2918 int datasync)
2919{
2920 unsigned int xid;
2921 int rc = 0;
2922 struct cifs_tcon *tcon;
2923 struct TCP_Server_Info *server;
2924 struct cifsFileInfo *smbfile = file->private_data;
2925 struct inode *inode = file_inode(file);
2926 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2927
2928 rc = file_write_and_wait_range(file, start, end);
2929 if (rc) {
2930 trace_cifs_fsync_err(inode->i_ino, rc);
2931 return rc;
2932 }
2933
2934 xid = get_xid();
2935
2936 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2937 file, datasync);
2938
2939 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2940 rc = cifs_zap_mapping(inode);
2941 if (rc) {
2942 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2943 rc = 0; /* don't care about it in fsync */
2944 }
2945 }
2946
2947 tcon = tlink_tcon(smbfile->tlink);
2948 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2949 server = tcon->ses->server;
2950 if (server->ops->flush == NULL) {
2951 rc = -ENOSYS;
2952 goto strict_fsync_exit;
2953 }
2954
2955 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2956 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2957 if (smbfile) {
2958 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2959 cifsFileInfo_put(smbfile);
2960 } else
2961 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2962 } else
2963 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2964 }
2965
2966strict_fsync_exit:
2967 free_xid(xid);
2968 return rc;
2969}
2970
2971int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2972{
2973 unsigned int xid;
2974 int rc = 0;
2975 struct cifs_tcon *tcon;
2976 struct TCP_Server_Info *server;
2977 struct cifsFileInfo *smbfile = file->private_data;
2978 struct inode *inode = file_inode(file);
2979 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2980
2981 rc = file_write_and_wait_range(file, start, end);
2982 if (rc) {
2983 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2984 return rc;
2985 }
2986
2987 xid = get_xid();
2988
2989 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2990 file, datasync);
2991
2992 tcon = tlink_tcon(smbfile->tlink);
2993 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2994 server = tcon->ses->server;
2995 if (server->ops->flush == NULL) {
2996 rc = -ENOSYS;
2997 goto fsync_exit;
2998 }
2999
3000 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3001 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3002 if (smbfile) {
3003 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3004 cifsFileInfo_put(smbfile);
3005 } else
3006 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3007 } else
3008 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3009 }
3010
3011fsync_exit:
3012 free_xid(xid);
3013 return rc;
3014}
3015
3016/*
3017 * As file closes, flush all cached write data for this inode checking
3018 * for write behind errors.
3019 */
3020int cifs_flush(struct file *file, fl_owner_t id)
3021{
3022 struct inode *inode = file_inode(file);
3023 int rc = 0;
3024
3025 if (file->f_mode & FMODE_WRITE)
3026 rc = filemap_write_and_wait(inode->i_mapping);
3027
3028 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3029 if (rc) {
3030 /* get more nuanced writeback errors */
3031 rc = filemap_check_wb_err(file->f_mapping, 0);
3032 trace_cifs_flush_err(inode->i_ino, rc);
3033 }
3034 return rc;
3035}
3036
3037static int
3038cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
3039{
3040 int rc = 0;
3041 unsigned long i;
3042
3043 for (i = 0; i < num_pages; i++) {
3044 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3045 if (!pages[i]) {
3046 /*
3047 * save number of pages we have already allocated and
3048 * return with ENOMEM error
3049 */
3050 num_pages = i;
3051 rc = -ENOMEM;
3052 break;
3053 }
3054 }
3055
3056 if (rc) {
3057 for (i = 0; i < num_pages; i++)
3058 put_page(pages[i]);
3059 }
3060 return rc;
3061}
3062
3063static inline
3064size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
3065{
3066 size_t num_pages;
3067 size_t clen;
3068
3069 clen = min_t(const size_t, len, wsize);
3070 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
3071
3072 if (cur_len)
3073 *cur_len = clen;
3074
3075 return num_pages;
3076}
3077
3078static void
3079cifs_uncached_writedata_release(struct kref *refcount)
3080{
3081 int i;
3082 struct cifs_writedata *wdata = container_of(refcount,
3083 struct cifs_writedata, refcount);
3084
3085 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3086 for (i = 0; i < wdata->nr_pages; i++)
3087 put_page(wdata->pages[i]);
3088 cifs_writedata_release(refcount);
3089}
3090
3091static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3092
3093static void
3094cifs_uncached_writev_complete(struct work_struct *work)
3095{
3096 struct cifs_writedata *wdata = container_of(work,
3097 struct cifs_writedata, work);
3098 struct inode *inode = d_inode(wdata->cfile->dentry);
3099 struct cifsInodeInfo *cifsi = CIFS_I(inode);
3100
3101 spin_lock(&inode->i_lock);
3102 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3103 if (cifsi->server_eof > inode->i_size)
3104 i_size_write(inode, cifsi->server_eof);
3105 spin_unlock(&inode->i_lock);
3106
3107 complete(&wdata->done);
3108 collect_uncached_write_data(wdata->ctx);
3109 /* the below call can possibly free the last ref to aio ctx */
3110 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3111}
3112
3113static int
3114wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
3115 size_t *len, unsigned long *num_pages)
3116{
3117 size_t save_len, copied, bytes, cur_len = *len;
3118 unsigned long i, nr_pages = *num_pages;
3119
3120 save_len = cur_len;
3121 for (i = 0; i < nr_pages; i++) {
3122 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
3123 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
3124 cur_len -= copied;
3125 /*
3126 * If we didn't copy as much as we expected, then that
3127 * may mean we trod into an unmapped area. Stop copying
3128 * at that point. On the next pass through the big
3129 * loop, we'll likely end up getting a zero-length
3130 * write and bailing out of it.
3131 */
3132 if (copied < bytes)
3133 break;
3134 }
3135 cur_len = save_len - cur_len;
3136 *len = cur_len;
3137
3138 /*
3139 * If we have no data to send, then that probably means that
3140 * the copy above failed altogether. That's most likely because
3141 * the address in the iovec was bogus. Return -EFAULT and let
3142 * the caller free anything we allocated and bail out.
3143 */
3144 if (!cur_len)
3145 return -EFAULT;
3146
3147 /*
3148 * i + 1 now represents the number of pages we actually used in
3149 * the copy phase above.
3150 */
3151 *num_pages = i + 1;
3152 return 0;
3153}
3154
3155static int
3156cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3157 struct cifs_aio_ctx *ctx)
3158{
3159 unsigned int wsize;
3160 struct cifs_credits credits;
3161 int rc;
3162 struct TCP_Server_Info *server = wdata->server;
3163
3164 do {
3165 if (wdata->cfile->invalidHandle) {
3166 rc = cifs_reopen_file(wdata->cfile, false);
3167 if (rc == -EAGAIN)
3168 continue;
3169 else if (rc)
3170 break;
3171 }
3172
3173
3174 /*
3175 * Wait for credits to resend this wdata.
3176 * Note: we are attempting to resend the whole wdata not in
3177 * segments
3178 */
3179 do {
3180 rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3181 &wsize, &credits);
3182 if (rc)
3183 goto fail;
3184
3185 if (wsize < wdata->bytes) {
3186 add_credits_and_wake_if(server, &credits, 0);
3187 msleep(1000);
3188 }
3189 } while (wsize < wdata->bytes);
3190 wdata->credits = credits;
3191
3192 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3193
3194 if (!rc) {
3195 if (wdata->cfile->invalidHandle)
3196 rc = -EAGAIN;
3197 else {
3198#ifdef CONFIG_CIFS_SMB_DIRECT
3199 if (wdata->mr) {
3200 wdata->mr->need_invalidate = true;
3201 smbd_deregister_mr(wdata->mr);
3202 wdata->mr = NULL;
3203 }
3204#endif
3205 rc = server->ops->async_writev(wdata,
3206 cifs_uncached_writedata_release);
3207 }
3208 }
3209
3210 /* If the write was successfully sent, we are done */
3211 if (!rc) {
3212 list_add_tail(&wdata->list, wdata_list);
3213 return 0;
3214 }
3215
3216 /* Roll back credits and retry if needed */
3217 add_credits_and_wake_if(server, &wdata->credits, 0);
3218 } while (rc == -EAGAIN);
3219
3220fail:
3221 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3222 return rc;
3223}
3224
3225static int
3226cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
3227 struct cifsFileInfo *open_file,
3228 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3229 struct cifs_aio_ctx *ctx)
3230{
3231 int rc = 0;
3232 size_t cur_len;
3233 unsigned long nr_pages, num_pages, i;
3234 struct cifs_writedata *wdata;
3235 struct iov_iter saved_from = *from;
3236 loff_t saved_offset = offset;
3237 pid_t pid;
3238 struct TCP_Server_Info *server;
3239 struct page **pagevec;
3240 size_t start;
3241 unsigned int xid;
3242
3243 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3244 pid = open_file->pid;
3245 else
3246 pid = current->tgid;
3247
3248 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3249 xid = get_xid();
3250
3251 do {
3252 unsigned int wsize;
3253 struct cifs_credits credits_on_stack;
3254 struct cifs_credits *credits = &credits_on_stack;
3255
3256 if (open_file->invalidHandle) {
3257 rc = cifs_reopen_file(open_file, false);
3258 if (rc == -EAGAIN)
3259 continue;
3260 else if (rc)
3261 break;
3262 }
3263
3264 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3265 &wsize, credits);
3266 if (rc)
3267 break;
3268
3269 cur_len = min_t(const size_t, len, wsize);
3270
3271 if (ctx->direct_io) {
3272 ssize_t result;
3273
3274 result = iov_iter_get_pages_alloc2(
3275 from, &pagevec, cur_len, &start);
3276 if (result < 0) {
3277 cifs_dbg(VFS,
3278 "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3279 result, iov_iter_type(from),
3280 from->iov_offset, from->count);
3281 dump_stack();
3282
3283 rc = result;
3284 add_credits_and_wake_if(server, credits, 0);
3285 break;
3286 }
3287 cur_len = (size_t)result;
3288
3289 nr_pages =
3290 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3291
3292 wdata = cifs_writedata_direct_alloc(pagevec,
3293 cifs_uncached_writev_complete);
3294 if (!wdata) {
3295 rc = -ENOMEM;
3296 add_credits_and_wake_if(server, credits, 0);
3297 break;
3298 }
3299
3300
3301 wdata->page_offset = start;
3302 wdata->tailsz =
3303 nr_pages > 1 ?
3304 cur_len - (PAGE_SIZE - start) -
3305 (nr_pages - 2) * PAGE_SIZE :
3306 cur_len;
3307 } else {
3308 nr_pages = get_numpages(wsize, len, &cur_len);
3309 wdata = cifs_writedata_alloc(nr_pages,
3310 cifs_uncached_writev_complete);
3311 if (!wdata) {
3312 rc = -ENOMEM;
3313 add_credits_and_wake_if(server, credits, 0);
3314 break;
3315 }
3316
3317 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3318 if (rc) {
3319 kvfree(wdata->pages);
3320 kfree(wdata);
3321 add_credits_and_wake_if(server, credits, 0);
3322 break;
3323 }
3324
3325 num_pages = nr_pages;
3326 rc = wdata_fill_from_iovec(
3327 wdata, from, &cur_len, &num_pages);
3328 if (rc) {
3329 for (i = 0; i < nr_pages; i++)
3330 put_page(wdata->pages[i]);
3331 kvfree(wdata->pages);
3332 kfree(wdata);
3333 add_credits_and_wake_if(server, credits, 0);
3334 break;
3335 }
3336
3337 /*
3338 * Bring nr_pages down to the number of pages we
3339 * actually used, and free any pages that we didn't use.
3340 */
3341 for ( ; nr_pages > num_pages; nr_pages--)
3342 put_page(wdata->pages[nr_pages - 1]);
3343
3344 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3345 }
3346
3347 wdata->sync_mode = WB_SYNC_ALL;
3348 wdata->nr_pages = nr_pages;
3349 wdata->offset = (__u64)offset;
3350 wdata->cfile = cifsFileInfo_get(open_file);
3351 wdata->server = server;
3352 wdata->pid = pid;
3353 wdata->bytes = cur_len;
3354 wdata->pagesz = PAGE_SIZE;
3355 wdata->credits = credits_on_stack;
3356 wdata->ctx = ctx;
3357 kref_get(&ctx->refcount);
3358
3359 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3360
3361 if (!rc) {
3362 if (wdata->cfile->invalidHandle)
3363 rc = -EAGAIN;
3364 else
3365 rc = server->ops->async_writev(wdata,
3366 cifs_uncached_writedata_release);
3367 }
3368
3369 if (rc) {
3370 add_credits_and_wake_if(server, &wdata->credits, 0);
3371 kref_put(&wdata->refcount,
3372 cifs_uncached_writedata_release);
3373 if (rc == -EAGAIN) {
3374 *from = saved_from;
3375 iov_iter_advance(from, offset - saved_offset);
3376 continue;
3377 }
3378 break;
3379 }
3380
3381 list_add_tail(&wdata->list, wdata_list);
3382 offset += cur_len;
3383 len -= cur_len;
3384 } while (len > 0);
3385
3386 free_xid(xid);
3387 return rc;
3388}
3389
3390static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3391{
3392 struct cifs_writedata *wdata, *tmp;
3393 struct cifs_tcon *tcon;
3394 struct cifs_sb_info *cifs_sb;
3395 struct dentry *dentry = ctx->cfile->dentry;
3396 ssize_t rc;
3397
3398 tcon = tlink_tcon(ctx->cfile->tlink);
3399 cifs_sb = CIFS_SB(dentry->d_sb);
3400
3401 mutex_lock(&ctx->aio_mutex);
3402
3403 if (list_empty(&ctx->list)) {
3404 mutex_unlock(&ctx->aio_mutex);
3405 return;
3406 }
3407
3408 rc = ctx->rc;
3409 /*
3410 * Wait for and collect replies for any successful sends in order of
3411 * increasing offset. Once an error is hit, then return without waiting
3412 * for any more replies.
3413 */
3414restart_loop:
3415 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3416 if (!rc) {
3417 if (!try_wait_for_completion(&wdata->done)) {
3418 mutex_unlock(&ctx->aio_mutex);
3419 return;
3420 }
3421
3422 if (wdata->result)
3423 rc = wdata->result;
3424 else
3425 ctx->total_len += wdata->bytes;
3426
3427 /* resend call if it's a retryable error */
3428 if (rc == -EAGAIN) {
3429 struct list_head tmp_list;
3430 struct iov_iter tmp_from = ctx->iter;
3431
3432 INIT_LIST_HEAD(&tmp_list);
3433 list_del_init(&wdata->list);
3434
3435 if (ctx->direct_io)
3436 rc = cifs_resend_wdata(
3437 wdata, &tmp_list, ctx);
3438 else {
3439 iov_iter_advance(&tmp_from,
3440 wdata->offset - ctx->pos);
3441
3442 rc = cifs_write_from_iter(wdata->offset,
3443 wdata->bytes, &tmp_from,
3444 ctx->cfile, cifs_sb, &tmp_list,
3445 ctx);
3446
3447 kref_put(&wdata->refcount,
3448 cifs_uncached_writedata_release);
3449 }
3450
3451 list_splice(&tmp_list, &ctx->list);
3452 goto restart_loop;
3453 }
3454 }
3455 list_del_init(&wdata->list);
3456 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3457 }
3458
3459 cifs_stats_bytes_written(tcon, ctx->total_len);
3460 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3461
3462 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3463
3464 mutex_unlock(&ctx->aio_mutex);
3465
3466 if (ctx->iocb && ctx->iocb->ki_complete)
3467 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3468 else
3469 complete(&ctx->done);
3470}
3471
3472static ssize_t __cifs_writev(
3473 struct kiocb *iocb, struct iov_iter *from, bool direct)
3474{
3475 struct file *file = iocb->ki_filp;
3476 ssize_t total_written = 0;
3477 struct cifsFileInfo *cfile;
3478 struct cifs_tcon *tcon;
3479 struct cifs_sb_info *cifs_sb;
3480 struct cifs_aio_ctx *ctx;
3481 struct iov_iter saved_from = *from;
3482 size_t len = iov_iter_count(from);
3483 int rc;
3484
3485 /*
3486 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3487 * In this case, fall back to non-direct write function.
3488 * this could be improved by getting pages directly in ITER_KVEC
3489 */
3490 if (direct && iov_iter_is_kvec(from)) {
3491 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3492 direct = false;
3493 }
3494
3495 rc = generic_write_checks(iocb, from);
3496 if (rc <= 0)
3497 return rc;
3498
3499 cifs_sb = CIFS_FILE_SB(file);
3500 cfile = file->private_data;
3501 tcon = tlink_tcon(cfile->tlink);
3502
3503 if (!tcon->ses->server->ops->async_writev)
3504 return -ENOSYS;
3505
3506 ctx = cifs_aio_ctx_alloc();
3507 if (!ctx)
3508 return -ENOMEM;
3509
3510 ctx->cfile = cifsFileInfo_get(cfile);
3511
3512 if (!is_sync_kiocb(iocb))
3513 ctx->iocb = iocb;
3514
3515 ctx->pos = iocb->ki_pos;
3516
3517 if (direct) {
3518 ctx->direct_io = true;
3519 ctx->iter = *from;
3520 ctx->len = len;
3521 } else {
3522 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3523 if (rc) {
3524 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3525 return rc;
3526 }
3527 }
3528
3529 /* grab a lock here due to read response handlers can access ctx */
3530 mutex_lock(&ctx->aio_mutex);
3531
3532 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3533 cfile, cifs_sb, &ctx->list, ctx);
3534
3535 /*
3536 * If at least one write was successfully sent, then discard any rc
3537 * value from the later writes. If the other write succeeds, then
3538 * we'll end up returning whatever was written. If it fails, then
3539 * we'll get a new rc value from that.
3540 */
3541 if (!list_empty(&ctx->list))
3542 rc = 0;
3543
3544 mutex_unlock(&ctx->aio_mutex);
3545
3546 if (rc) {
3547 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3548 return rc;
3549 }
3550
3551 if (!is_sync_kiocb(iocb)) {
3552 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3553 return -EIOCBQUEUED;
3554 }
3555
3556 rc = wait_for_completion_killable(&ctx->done);
3557 if (rc) {
3558 mutex_lock(&ctx->aio_mutex);
3559 ctx->rc = rc = -EINTR;
3560 total_written = ctx->total_len;
3561 mutex_unlock(&ctx->aio_mutex);
3562 } else {
3563 rc = ctx->rc;
3564 total_written = ctx->total_len;
3565 }
3566
3567 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3568
3569 if (unlikely(!total_written))
3570 return rc;
3571
3572 iocb->ki_pos += total_written;
3573 return total_written;
3574}
3575
3576ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3577{
3578 struct file *file = iocb->ki_filp;
3579
3580 cifs_revalidate_mapping(file->f_inode);
3581 return __cifs_writev(iocb, from, true);
3582}
3583
3584ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3585{
3586 return __cifs_writev(iocb, from, false);
3587}
3588
3589static ssize_t
3590cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3591{
3592 struct file *file = iocb->ki_filp;
3593 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3594 struct inode *inode = file->f_mapping->host;
3595 struct cifsInodeInfo *cinode = CIFS_I(inode);
3596 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3597 ssize_t rc;
3598
3599 inode_lock(inode);
3600 /*
3601 * We need to hold the sem to be sure nobody modifies lock list
3602 * with a brlock that prevents writing.
3603 */
3604 down_read(&cinode->lock_sem);
3605
3606 rc = generic_write_checks(iocb, from);
3607 if (rc <= 0)
3608 goto out;
3609
3610 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3611 server->vals->exclusive_lock_type, 0,
3612 NULL, CIFS_WRITE_OP))
3613 rc = __generic_file_write_iter(iocb, from);
3614 else
3615 rc = -EACCES;
3616out:
3617 up_read(&cinode->lock_sem);
3618 inode_unlock(inode);
3619
3620 if (rc > 0)
3621 rc = generic_write_sync(iocb, rc);
3622 return rc;
3623}
3624
3625ssize_t
3626cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3627{
3628 struct inode *inode = file_inode(iocb->ki_filp);
3629 struct cifsInodeInfo *cinode = CIFS_I(inode);
3630 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3631 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3632 iocb->ki_filp->private_data;
3633 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3634 ssize_t written;
3635
3636 written = cifs_get_writer(cinode);
3637 if (written)
3638 return written;
3639
3640 if (CIFS_CACHE_WRITE(cinode)) {
3641 if (cap_unix(tcon->ses) &&
3642 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3643 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3644 written = generic_file_write_iter(iocb, from);
3645 goto out;
3646 }
3647 written = cifs_writev(iocb, from);
3648 goto out;
3649 }
3650 /*
3651 * For non-oplocked files in strict cache mode we need to write the data
3652 * to the server exactly from the pos to pos+len-1 rather than flush all
3653 * affected pages because it may cause a error with mandatory locks on
3654 * these pages but not on the region from pos to ppos+len-1.
3655 */
3656 written = cifs_user_writev(iocb, from);
3657 if (CIFS_CACHE_READ(cinode)) {
3658 /*
3659 * We have read level caching and we have just sent a write
3660 * request to the server thus making data in the cache stale.
3661 * Zap the cache and set oplock/lease level to NONE to avoid
3662 * reading stale data from the cache. All subsequent read
3663 * operations will read new data from the server.
3664 */
3665 cifs_zap_mapping(inode);
3666 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3667 inode);
3668 cinode->oplock = 0;
3669 }
3670out:
3671 cifs_put_writer(cinode);
3672 return written;
3673}
3674
3675static struct cifs_readdata *
3676cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3677{
3678 struct cifs_readdata *rdata;
3679
3680 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3681 if (rdata != NULL) {
3682 rdata->pages = pages;
3683 kref_init(&rdata->refcount);
3684 INIT_LIST_HEAD(&rdata->list);
3685 init_completion(&rdata->done);
3686 INIT_WORK(&rdata->work, complete);
3687 }
3688
3689 return rdata;
3690}
3691
3692static struct cifs_readdata *
3693cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3694{
3695 struct page **pages =
3696 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3697 struct cifs_readdata *ret = NULL;
3698
3699 if (pages) {
3700 ret = cifs_readdata_direct_alloc(pages, complete);
3701 if (!ret)
3702 kfree(pages);
3703 }
3704
3705 return ret;
3706}
3707
3708void
3709cifs_readdata_release(struct kref *refcount)
3710{
3711 struct cifs_readdata *rdata = container_of(refcount,
3712 struct cifs_readdata, refcount);
3713#ifdef CONFIG_CIFS_SMB_DIRECT
3714 if (rdata->mr) {
3715 smbd_deregister_mr(rdata->mr);
3716 rdata->mr = NULL;
3717 }
3718#endif
3719 if (rdata->cfile)
3720 cifsFileInfo_put(rdata->cfile);
3721
3722 kvfree(rdata->pages);
3723 kfree(rdata);
3724}
3725
3726static int
3727cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3728{
3729 int rc = 0;
3730 struct page *page;
3731 unsigned int i;
3732
3733 for (i = 0; i < nr_pages; i++) {
3734 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3735 if (!page) {
3736 rc = -ENOMEM;
3737 break;
3738 }
3739 rdata->pages[i] = page;
3740 }
3741
3742 if (rc) {
3743 unsigned int nr_page_failed = i;
3744
3745 for (i = 0; i < nr_page_failed; i++) {
3746 put_page(rdata->pages[i]);
3747 rdata->pages[i] = NULL;
3748 }
3749 }
3750 return rc;
3751}
3752
3753static void
3754cifs_uncached_readdata_release(struct kref *refcount)
3755{
3756 struct cifs_readdata *rdata = container_of(refcount,
3757 struct cifs_readdata, refcount);
3758 unsigned int i;
3759
3760 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3761 for (i = 0; i < rdata->nr_pages; i++) {
3762 put_page(rdata->pages[i]);
3763 }
3764 cifs_readdata_release(refcount);
3765}
3766
3767/**
3768 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3769 * @rdata: the readdata response with list of pages holding data
3770 * @iter: destination for our data
3771 *
3772 * This function copies data from a list of pages in a readdata response into
3773 * an array of iovecs. It will first calculate where the data should go
3774 * based on the info in the readdata and then copy the data into that spot.
3775 */
3776static int
3777cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3778{
3779 size_t remaining = rdata->got_bytes;
3780 unsigned int i;
3781
3782 for (i = 0; i < rdata->nr_pages; i++) {
3783 struct page *page = rdata->pages[i];
3784 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3785 size_t written;
3786
3787 if (unlikely(iov_iter_is_pipe(iter))) {
3788 void *addr = kmap_atomic(page);
3789
3790 written = copy_to_iter(addr, copy, iter);
3791 kunmap_atomic(addr);
3792 } else
3793 written = copy_page_to_iter(page, 0, copy, iter);
3794 remaining -= written;
3795 if (written < copy && iov_iter_count(iter) > 0)
3796 break;
3797 }
3798 return remaining ? -EFAULT : 0;
3799}
3800
3801static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3802
3803static void
3804cifs_uncached_readv_complete(struct work_struct *work)
3805{
3806 struct cifs_readdata *rdata = container_of(work,
3807 struct cifs_readdata, work);
3808
3809 complete(&rdata->done);
3810 collect_uncached_read_data(rdata->ctx);
3811 /* the below call can possibly free the last ref to aio ctx */
3812 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3813}
3814
3815static int
3816uncached_fill_pages(struct TCP_Server_Info *server,
3817 struct cifs_readdata *rdata, struct iov_iter *iter,
3818 unsigned int len)
3819{
3820 int result = 0;
3821 unsigned int i;
3822 unsigned int nr_pages = rdata->nr_pages;
3823 unsigned int page_offset = rdata->page_offset;
3824
3825 rdata->got_bytes = 0;
3826 rdata->tailsz = PAGE_SIZE;
3827 for (i = 0; i < nr_pages; i++) {
3828 struct page *page = rdata->pages[i];
3829 size_t n;
3830 unsigned int segment_size = rdata->pagesz;
3831
3832 if (i == 0)
3833 segment_size -= page_offset;
3834 else
3835 page_offset = 0;
3836
3837
3838 if (len <= 0) {
3839 /* no need to hold page hostage */
3840 rdata->pages[i] = NULL;
3841 rdata->nr_pages--;
3842 put_page(page);
3843 continue;
3844 }
3845
3846 n = len;
3847 if (len >= segment_size)
3848 /* enough data to fill the page */
3849 n = segment_size;
3850 else
3851 rdata->tailsz = len;
3852 len -= n;
3853
3854 if (iter)
3855 result = copy_page_from_iter(
3856 page, page_offset, n, iter);
3857#ifdef CONFIG_CIFS_SMB_DIRECT
3858 else if (rdata->mr)
3859 result = n;
3860#endif
3861 else
3862 result = cifs_read_page_from_socket(
3863 server, page, page_offset, n);
3864 if (result < 0)
3865 break;
3866
3867 rdata->got_bytes += result;
3868 }
3869
3870 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3871 rdata->got_bytes : result;
3872}
3873
3874static int
3875cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3876 struct cifs_readdata *rdata, unsigned int len)
3877{
3878 return uncached_fill_pages(server, rdata, NULL, len);
3879}
3880
3881static int
3882cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3883 struct cifs_readdata *rdata,
3884 struct iov_iter *iter)
3885{
3886 return uncached_fill_pages(server, rdata, iter, iter->count);
3887}
3888
3889static int cifs_resend_rdata(struct cifs_readdata *rdata,
3890 struct list_head *rdata_list,
3891 struct cifs_aio_ctx *ctx)
3892{
3893 unsigned int rsize;
3894 struct cifs_credits credits;
3895 int rc;
3896 struct TCP_Server_Info *server;
3897
3898 /* XXX: should we pick a new channel here? */
3899 server = rdata->server;
3900
3901 do {
3902 if (rdata->cfile->invalidHandle) {
3903 rc = cifs_reopen_file(rdata->cfile, true);
3904 if (rc == -EAGAIN)
3905 continue;
3906 else if (rc)
3907 break;
3908 }
3909
3910 /*
3911 * Wait for credits to resend this rdata.
3912 * Note: we are attempting to resend the whole rdata not in
3913 * segments
3914 */
3915 do {
3916 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3917 &rsize, &credits);
3918
3919 if (rc)
3920 goto fail;
3921
3922 if (rsize < rdata->bytes) {
3923 add_credits_and_wake_if(server, &credits, 0);
3924 msleep(1000);
3925 }
3926 } while (rsize < rdata->bytes);
3927 rdata->credits = credits;
3928
3929 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3930 if (!rc) {
3931 if (rdata->cfile->invalidHandle)
3932 rc = -EAGAIN;
3933 else {
3934#ifdef CONFIG_CIFS_SMB_DIRECT
3935 if (rdata->mr) {
3936 rdata->mr->need_invalidate = true;
3937 smbd_deregister_mr(rdata->mr);
3938 rdata->mr = NULL;
3939 }
3940#endif
3941 rc = server->ops->async_readv(rdata);
3942 }
3943 }
3944
3945 /* If the read was successfully sent, we are done */
3946 if (!rc) {
3947 /* Add to aio pending list */
3948 list_add_tail(&rdata->list, rdata_list);
3949 return 0;
3950 }
3951
3952 /* Roll back credits and retry if needed */
3953 add_credits_and_wake_if(server, &rdata->credits, 0);
3954 } while (rc == -EAGAIN);
3955
3956fail:
3957 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3958 return rc;
3959}
3960
3961static int
3962cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3963 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3964 struct cifs_aio_ctx *ctx)
3965{
3966 struct cifs_readdata *rdata;
3967 unsigned int npages, rsize;
3968 struct cifs_credits credits_on_stack;
3969 struct cifs_credits *credits = &credits_on_stack;
3970 size_t cur_len;
3971 int rc;
3972 pid_t pid;
3973 struct TCP_Server_Info *server;
3974 struct page **pagevec;
3975 size_t start;
3976 struct iov_iter direct_iov = ctx->iter;
3977
3978 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3979
3980 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3981 pid = open_file->pid;
3982 else
3983 pid = current->tgid;
3984
3985 if (ctx->direct_io)
3986 iov_iter_advance(&direct_iov, offset - ctx->pos);
3987
3988 do {
3989 if (open_file->invalidHandle) {
3990 rc = cifs_reopen_file(open_file, true);
3991 if (rc == -EAGAIN)
3992 continue;
3993 else if (rc)
3994 break;
3995 }
3996
3997 if (cifs_sb->ctx->rsize == 0)
3998 cifs_sb->ctx->rsize =
3999 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4000 cifs_sb->ctx);
4001
4002 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4003 &rsize, credits);
4004 if (rc)
4005 break;
4006
4007 cur_len = min_t(const size_t, len, rsize);
4008
4009 if (ctx->direct_io) {
4010 ssize_t result;
4011
4012 result = iov_iter_get_pages_alloc2(
4013 &direct_iov, &pagevec,
4014 cur_len, &start);
4015 if (result < 0) {
4016 cifs_dbg(VFS,
4017 "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
4018 result, iov_iter_type(&direct_iov),
4019 direct_iov.iov_offset,
4020 direct_iov.count);
4021 dump_stack();
4022
4023 rc = result;
4024 add_credits_and_wake_if(server, credits, 0);
4025 break;
4026 }
4027 cur_len = (size_t)result;
4028
4029 rdata = cifs_readdata_direct_alloc(
4030 pagevec, cifs_uncached_readv_complete);
4031 if (!rdata) {
4032 add_credits_and_wake_if(server, credits, 0);
4033 rc = -ENOMEM;
4034 break;
4035 }
4036
4037 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
4038 rdata->page_offset = start;
4039 rdata->tailsz = npages > 1 ?
4040 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
4041 cur_len;
4042
4043 } else {
4044
4045 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
4046 /* allocate a readdata struct */
4047 rdata = cifs_readdata_alloc(npages,
4048 cifs_uncached_readv_complete);
4049 if (!rdata) {
4050 add_credits_and_wake_if(server, credits, 0);
4051 rc = -ENOMEM;
4052 break;
4053 }
4054
4055 rc = cifs_read_allocate_pages(rdata, npages);
4056 if (rc) {
4057 kvfree(rdata->pages);
4058 kfree(rdata);
4059 add_credits_and_wake_if(server, credits, 0);
4060 break;
4061 }
4062
4063 rdata->tailsz = PAGE_SIZE;
4064 }
4065
4066 rdata->server = server;
4067 rdata->cfile = cifsFileInfo_get(open_file);
4068 rdata->nr_pages = npages;
4069 rdata->offset = offset;
4070 rdata->bytes = cur_len;
4071 rdata->pid = pid;
4072 rdata->pagesz = PAGE_SIZE;
4073 rdata->read_into_pages = cifs_uncached_read_into_pages;
4074 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
4075 rdata->credits = credits_on_stack;
4076 rdata->ctx = ctx;
4077 kref_get(&ctx->refcount);
4078
4079 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4080
4081 if (!rc) {
4082 if (rdata->cfile->invalidHandle)
4083 rc = -EAGAIN;
4084 else
4085 rc = server->ops->async_readv(rdata);
4086 }
4087
4088 if (rc) {
4089 add_credits_and_wake_if(server, &rdata->credits, 0);
4090 kref_put(&rdata->refcount,
4091 cifs_uncached_readdata_release);
4092 if (rc == -EAGAIN) {
4093 iov_iter_revert(&direct_iov, cur_len);
4094 continue;
4095 }
4096 break;
4097 }
4098
4099 list_add_tail(&rdata->list, rdata_list);
4100 offset += cur_len;
4101 len -= cur_len;
4102 } while (len > 0);
4103
4104 return rc;
4105}
4106
4107static void
4108collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4109{
4110 struct cifs_readdata *rdata, *tmp;
4111 struct iov_iter *to = &ctx->iter;
4112 struct cifs_sb_info *cifs_sb;
4113 int rc;
4114
4115 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4116
4117 mutex_lock(&ctx->aio_mutex);
4118
4119 if (list_empty(&ctx->list)) {
4120 mutex_unlock(&ctx->aio_mutex);
4121 return;
4122 }
4123
4124 rc = ctx->rc;
4125 /* the loop below should proceed in the order of increasing offsets */
4126again:
4127 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4128 if (!rc) {
4129 if (!try_wait_for_completion(&rdata->done)) {
4130 mutex_unlock(&ctx->aio_mutex);
4131 return;
4132 }
4133
4134 if (rdata->result == -EAGAIN) {
4135 /* resend call if it's a retryable error */
4136 struct list_head tmp_list;
4137 unsigned int got_bytes = rdata->got_bytes;
4138
4139 list_del_init(&rdata->list);
4140 INIT_LIST_HEAD(&tmp_list);
4141
4142 /*
4143 * Got a part of data and then reconnect has
4144 * happened -- fill the buffer and continue
4145 * reading.
4146 */
4147 if (got_bytes && got_bytes < rdata->bytes) {
4148 rc = 0;
4149 if (!ctx->direct_io)
4150 rc = cifs_readdata_to_iov(rdata, to);
4151 if (rc) {
4152 kref_put(&rdata->refcount,
4153 cifs_uncached_readdata_release);
4154 continue;
4155 }
4156 }
4157
4158 if (ctx->direct_io) {
4159 /*
4160 * Re-use rdata as this is a
4161 * direct I/O
4162 */
4163 rc = cifs_resend_rdata(
4164 rdata,
4165 &tmp_list, ctx);
4166 } else {
4167 rc = cifs_send_async_read(
4168 rdata->offset + got_bytes,
4169 rdata->bytes - got_bytes,
4170 rdata->cfile, cifs_sb,
4171 &tmp_list, ctx);
4172
4173 kref_put(&rdata->refcount,
4174 cifs_uncached_readdata_release);
4175 }
4176
4177 list_splice(&tmp_list, &ctx->list);
4178
4179 goto again;
4180 } else if (rdata->result)
4181 rc = rdata->result;
4182 else if (!ctx->direct_io)
4183 rc = cifs_readdata_to_iov(rdata, to);
4184
4185 /* if there was a short read -- discard anything left */
4186 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4187 rc = -ENODATA;
4188
4189 ctx->total_len += rdata->got_bytes;
4190 }
4191 list_del_init(&rdata->list);
4192 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
4193 }
4194
4195 if (!ctx->direct_io)
4196 ctx->total_len = ctx->len - iov_iter_count(to);
4197
4198 /* mask nodata case */
4199 if (rc == -ENODATA)
4200 rc = 0;
4201
4202 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4203
4204 mutex_unlock(&ctx->aio_mutex);
4205
4206 if (ctx->iocb && ctx->iocb->ki_complete)
4207 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4208 else
4209 complete(&ctx->done);
4210}
4211
4212static ssize_t __cifs_readv(
4213 struct kiocb *iocb, struct iov_iter *to, bool direct)
4214{
4215 size_t len;
4216 struct file *file = iocb->ki_filp;
4217 struct cifs_sb_info *cifs_sb;
4218 struct cifsFileInfo *cfile;
4219 struct cifs_tcon *tcon;
4220 ssize_t rc, total_read = 0;
4221 loff_t offset = iocb->ki_pos;
4222 struct cifs_aio_ctx *ctx;
4223
4224 /*
4225 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
4226 * fall back to data copy read path
4227 * this could be improved by getting pages directly in ITER_KVEC
4228 */
4229 if (direct && iov_iter_is_kvec(to)) {
4230 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
4231 direct = false;
4232 }
4233
4234 len = iov_iter_count(to);
4235 if (!len)
4236 return 0;
4237
4238 cifs_sb = CIFS_FILE_SB(file);
4239 cfile = file->private_data;
4240 tcon = tlink_tcon(cfile->tlink);
4241
4242 if (!tcon->ses->server->ops->async_readv)
4243 return -ENOSYS;
4244
4245 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4246 cifs_dbg(FYI, "attempting read on write only file instance\n");
4247
4248 ctx = cifs_aio_ctx_alloc();
4249 if (!ctx)
4250 return -ENOMEM;
4251
4252 ctx->cfile = cifsFileInfo_get(cfile);
4253
4254 if (!is_sync_kiocb(iocb))
4255 ctx->iocb = iocb;
4256
4257 if (user_backed_iter(to))
4258 ctx->should_dirty = true;
4259
4260 if (direct) {
4261 ctx->pos = offset;
4262 ctx->direct_io = true;
4263 ctx->iter = *to;
4264 ctx->len = len;
4265 } else {
4266 rc = setup_aio_ctx_iter(ctx, to, READ);
4267 if (rc) {
4268 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4269 return rc;
4270 }
4271 len = ctx->len;
4272 }
4273
4274 /* grab a lock here due to read response handlers can access ctx */
4275 mutex_lock(&ctx->aio_mutex);
4276
4277 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4278
4279 /* if at least one read request send succeeded, then reset rc */
4280 if (!list_empty(&ctx->list))
4281 rc = 0;
4282
4283 mutex_unlock(&ctx->aio_mutex);
4284
4285 if (rc) {
4286 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4287 return rc;
4288 }
4289
4290 if (!is_sync_kiocb(iocb)) {
4291 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4292 return -EIOCBQUEUED;
4293 }
4294
4295 rc = wait_for_completion_killable(&ctx->done);
4296 if (rc) {
4297 mutex_lock(&ctx->aio_mutex);
4298 ctx->rc = rc = -EINTR;
4299 total_read = ctx->total_len;
4300 mutex_unlock(&ctx->aio_mutex);
4301 } else {
4302 rc = ctx->rc;
4303 total_read = ctx->total_len;
4304 }
4305
4306 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4307
4308 if (total_read) {
4309 iocb->ki_pos += total_read;
4310 return total_read;
4311 }
4312 return rc;
4313}
4314
4315ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4316{
4317 return __cifs_readv(iocb, to, true);
4318}
4319
4320ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4321{
4322 return __cifs_readv(iocb, to, false);
4323}
4324
4325ssize_t
4326cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4327{
4328 struct inode *inode = file_inode(iocb->ki_filp);
4329 struct cifsInodeInfo *cinode = CIFS_I(inode);
4330 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4331 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4332 iocb->ki_filp->private_data;
4333 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4334 int rc = -EACCES;
4335
4336 /*
4337 * In strict cache mode we need to read from the server all the time
4338 * if we don't have level II oplock because the server can delay mtime
4339 * change - so we can't make a decision about inode invalidating.
4340 * And we can also fail with pagereading if there are mandatory locks
4341 * on pages affected by this read but not on the region from pos to
4342 * pos+len-1.
4343 */
4344 if (!CIFS_CACHE_READ(cinode))
4345 return cifs_user_readv(iocb, to);
4346
4347 if (cap_unix(tcon->ses) &&
4348 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4349 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4350 return generic_file_read_iter(iocb, to);
4351
4352 /*
4353 * We need to hold the sem to be sure nobody modifies lock list
4354 * with a brlock that prevents reading.
4355 */
4356 down_read(&cinode->lock_sem);
4357 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4358 tcon->ses->server->vals->shared_lock_type,
4359 0, NULL, CIFS_READ_OP))
4360 rc = generic_file_read_iter(iocb, to);
4361 up_read(&cinode->lock_sem);
4362 return rc;
4363}
4364
4365static ssize_t
4366cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4367{
4368 int rc = -EACCES;
4369 unsigned int bytes_read = 0;
4370 unsigned int total_read;
4371 unsigned int current_read_size;
4372 unsigned int rsize;
4373 struct cifs_sb_info *cifs_sb;
4374 struct cifs_tcon *tcon;
4375 struct TCP_Server_Info *server;
4376 unsigned int xid;
4377 char *cur_offset;
4378 struct cifsFileInfo *open_file;
4379 struct cifs_io_parms io_parms = {0};
4380 int buf_type = CIFS_NO_BUFFER;
4381 __u32 pid;
4382
4383 xid = get_xid();
4384 cifs_sb = CIFS_FILE_SB(file);
4385
4386 /* FIXME: set up handlers for larger reads and/or convert to async */
4387 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4388
4389 if (file->private_data == NULL) {
4390 rc = -EBADF;
4391 free_xid(xid);
4392 return rc;
4393 }
4394 open_file = file->private_data;
4395 tcon = tlink_tcon(open_file->tlink);
4396 server = cifs_pick_channel(tcon->ses);
4397
4398 if (!server->ops->sync_read) {
4399 free_xid(xid);
4400 return -ENOSYS;
4401 }
4402
4403 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4404 pid = open_file->pid;
4405 else
4406 pid = current->tgid;
4407
4408 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4409 cifs_dbg(FYI, "attempting read on write only file instance\n");
4410
4411 for (total_read = 0, cur_offset = read_data; read_size > total_read;
4412 total_read += bytes_read, cur_offset += bytes_read) {
4413 do {
4414 current_read_size = min_t(uint, read_size - total_read,
4415 rsize);
4416 /*
4417 * For windows me and 9x we do not want to request more
4418 * than it negotiated since it will refuse the read
4419 * then.
4420 */
4421 if (!(tcon->ses->capabilities &
4422 tcon->ses->server->vals->cap_large_files)) {
4423 current_read_size = min_t(uint,
4424 current_read_size, CIFSMaxBufSize);
4425 }
4426 if (open_file->invalidHandle) {
4427 rc = cifs_reopen_file(open_file, true);
4428 if (rc != 0)
4429 break;
4430 }
4431 io_parms.pid = pid;
4432 io_parms.tcon = tcon;
4433 io_parms.offset = *offset;
4434 io_parms.length = current_read_size;
4435 io_parms.server = server;
4436 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4437 &bytes_read, &cur_offset,
4438 &buf_type);
4439 } while (rc == -EAGAIN);
4440
4441 if (rc || (bytes_read == 0)) {
4442 if (total_read) {
4443 break;
4444 } else {
4445 free_xid(xid);
4446 return rc;
4447 }
4448 } else {
4449 cifs_stats_bytes_read(tcon, total_read);
4450 *offset += bytes_read;
4451 }
4452 }
4453 free_xid(xid);
4454 return total_read;
4455}
4456
4457/*
4458 * If the page is mmap'ed into a process' page tables, then we need to make
4459 * sure that it doesn't change while being written back.
4460 */
4461static vm_fault_t
4462cifs_page_mkwrite(struct vm_fault *vmf)
4463{
4464 struct page *page = vmf->page;
4465
4466 /* Wait for the page to be written to the cache before we allow it to
4467 * be modified. We then assume the entire page will need writing back.
4468 */
4469#ifdef CONFIG_CIFS_FSCACHE
4470 if (PageFsCache(page) &&
4471 wait_on_page_fscache_killable(page) < 0)
4472 return VM_FAULT_RETRY;
4473#endif
4474
4475 wait_on_page_writeback(page);
4476
4477 if (lock_page_killable(page) < 0)
4478 return VM_FAULT_RETRY;
4479 return VM_FAULT_LOCKED;
4480}
4481
4482static const struct vm_operations_struct cifs_file_vm_ops = {
4483 .fault = filemap_fault,
4484 .map_pages = filemap_map_pages,
4485 .page_mkwrite = cifs_page_mkwrite,
4486};
4487
4488int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4489{
4490 int xid, rc = 0;
4491 struct inode *inode = file_inode(file);
4492
4493 xid = get_xid();
4494
4495 if (!CIFS_CACHE_READ(CIFS_I(inode)))
4496 rc = cifs_zap_mapping(inode);
4497 if (!rc)
4498 rc = generic_file_mmap(file, vma);
4499 if (!rc)
4500 vma->vm_ops = &cifs_file_vm_ops;
4501
4502 free_xid(xid);
4503 return rc;
4504}
4505
4506int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4507{
4508 int rc, xid;
4509
4510 xid = get_xid();
4511
4512 rc = cifs_revalidate_file(file);
4513 if (rc)
4514 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4515 rc);
4516 if (!rc)
4517 rc = generic_file_mmap(file, vma);
4518 if (!rc)
4519 vma->vm_ops = &cifs_file_vm_ops;
4520
4521 free_xid(xid);
4522 return rc;
4523}
4524
4525static void
4526cifs_readv_complete(struct work_struct *work)
4527{
4528 unsigned int i, got_bytes;
4529 struct cifs_readdata *rdata = container_of(work,
4530 struct cifs_readdata, work);
4531
4532 got_bytes = rdata->got_bytes;
4533 for (i = 0; i < rdata->nr_pages; i++) {
4534 struct page *page = rdata->pages[i];
4535
4536 if (rdata->result == 0 ||
4537 (rdata->result == -EAGAIN && got_bytes)) {
4538 flush_dcache_page(page);
4539 SetPageUptodate(page);
4540 } else
4541 SetPageError(page);
4542
4543 if (rdata->result == 0 ||
4544 (rdata->result == -EAGAIN && got_bytes))
4545 cifs_readpage_to_fscache(rdata->mapping->host, page);
4546
4547 unlock_page(page);
4548
4549 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4550
4551 put_page(page);
4552 rdata->pages[i] = NULL;
4553 }
4554 kref_put(&rdata->refcount, cifs_readdata_release);
4555}
4556
4557static int
4558readpages_fill_pages(struct TCP_Server_Info *server,
4559 struct cifs_readdata *rdata, struct iov_iter *iter,
4560 unsigned int len)
4561{
4562 int result = 0;
4563 unsigned int i;
4564 u64 eof;
4565 pgoff_t eof_index;
4566 unsigned int nr_pages = rdata->nr_pages;
4567 unsigned int page_offset = rdata->page_offset;
4568
4569 /* determine the eof that the server (probably) has */
4570 eof = CIFS_I(rdata->mapping->host)->server_eof;
4571 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4572 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4573
4574 rdata->got_bytes = 0;
4575 rdata->tailsz = PAGE_SIZE;
4576 for (i = 0; i < nr_pages; i++) {
4577 struct page *page = rdata->pages[i];
4578 unsigned int to_read = rdata->pagesz;
4579 size_t n;
4580
4581 if (i == 0)
4582 to_read -= page_offset;
4583 else
4584 page_offset = 0;
4585
4586 n = to_read;
4587
4588 if (len >= to_read) {
4589 len -= to_read;
4590 } else if (len > 0) {
4591 /* enough for partial page, fill and zero the rest */
4592 zero_user(page, len + page_offset, to_read - len);
4593 n = rdata->tailsz = len;
4594 len = 0;
4595 } else if (page->index > eof_index) {
4596 /*
4597 * The VFS will not try to do readahead past the
4598 * i_size, but it's possible that we have outstanding
4599 * writes with gaps in the middle and the i_size hasn't
4600 * caught up yet. Populate those with zeroed out pages
4601 * to prevent the VFS from repeatedly attempting to
4602 * fill them until the writes are flushed.
4603 */
4604 zero_user(page, 0, PAGE_SIZE);
4605 flush_dcache_page(page);
4606 SetPageUptodate(page);
4607 unlock_page(page);
4608 put_page(page);
4609 rdata->pages[i] = NULL;
4610 rdata->nr_pages--;
4611 continue;
4612 } else {
4613 /* no need to hold page hostage */
4614 unlock_page(page);
4615 put_page(page);
4616 rdata->pages[i] = NULL;
4617 rdata->nr_pages--;
4618 continue;
4619 }
4620
4621 if (iter)
4622 result = copy_page_from_iter(
4623 page, page_offset, n, iter);
4624#ifdef CONFIG_CIFS_SMB_DIRECT
4625 else if (rdata->mr)
4626 result = n;
4627#endif
4628 else
4629 result = cifs_read_page_from_socket(
4630 server, page, page_offset, n);
4631 if (result < 0)
4632 break;
4633
4634 rdata->got_bytes += result;
4635 }
4636
4637 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4638 rdata->got_bytes : result;
4639}
4640
4641static int
4642cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4643 struct cifs_readdata *rdata, unsigned int len)
4644{
4645 return readpages_fill_pages(server, rdata, NULL, len);
4646}
4647
4648static int
4649cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4650 struct cifs_readdata *rdata,
4651 struct iov_iter *iter)
4652{
4653 return readpages_fill_pages(server, rdata, iter, iter->count);
4654}
4655
4656static void cifs_readahead(struct readahead_control *ractl)
4657{
4658 int rc;
4659 struct cifsFileInfo *open_file = ractl->file->private_data;
4660 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4661 struct TCP_Server_Info *server;
4662 pid_t pid;
4663 unsigned int xid, nr_pages, last_batch_size = 0, cache_nr_pages = 0;
4664 pgoff_t next_cached = ULONG_MAX;
4665 bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4666 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4667 bool check_cache = caching;
4668
4669 xid = get_xid();
4670
4671 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4672 pid = open_file->pid;
4673 else
4674 pid = current->tgid;
4675
4676 rc = 0;
4677 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4678
4679 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4680 __func__, ractl->file, ractl->mapping, readahead_count(ractl));
4681
4682 /*
4683 * Chop the readahead request up into rsize-sized read requests.
4684 */
4685 while ((nr_pages = readahead_count(ractl) - last_batch_size)) {
4686 unsigned int i, got, rsize;
4687 struct page *page;
4688 struct cifs_readdata *rdata;
4689 struct cifs_credits credits_on_stack;
4690 struct cifs_credits *credits = &credits_on_stack;
4691 pgoff_t index = readahead_index(ractl) + last_batch_size;
4692
4693 /*
4694 * Find out if we have anything cached in the range of
4695 * interest, and if so, where the next chunk of cached data is.
4696 */
4697 if (caching) {
4698 if (check_cache) {
4699 rc = cifs_fscache_query_occupancy(
4700 ractl->mapping->host, index, nr_pages,
4701 &next_cached, &cache_nr_pages);
4702 if (rc < 0)
4703 caching = false;
4704 check_cache = false;
4705 }
4706
4707 if (index == next_cached) {
4708 /*
4709 * TODO: Send a whole batch of pages to be read
4710 * by the cache.
4711 */
4712 struct folio *folio = readahead_folio(ractl);
4713
4714 last_batch_size = folio_nr_pages(folio);
4715 if (cifs_readpage_from_fscache(ractl->mapping->host,
4716 &folio->page) < 0) {
4717 /*
4718 * TODO: Deal with cache read failure
4719 * here, but for the moment, delegate
4720 * that to readpage.
4721 */
4722 caching = false;
4723 }
4724 folio_unlock(folio);
4725 next_cached++;
4726 cache_nr_pages--;
4727 if (cache_nr_pages == 0)
4728 check_cache = true;
4729 continue;
4730 }
4731 }
4732
4733 if (open_file->invalidHandle) {
4734 rc = cifs_reopen_file(open_file, true);
4735 if (rc) {
4736 if (rc == -EAGAIN)
4737 continue;
4738 break;
4739 }
4740 }
4741
4742 if (cifs_sb->ctx->rsize == 0)
4743 cifs_sb->ctx->rsize =
4744 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4745 cifs_sb->ctx);
4746
4747 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4748 &rsize, credits);
4749 if (rc)
4750 break;
4751 nr_pages = min_t(size_t, rsize / PAGE_SIZE, readahead_count(ractl));
4752 nr_pages = min_t(size_t, nr_pages, next_cached - index);
4753
4754 /*
4755 * Give up immediately if rsize is too small to read an entire
4756 * page. The VFS will fall back to readpage. We should never
4757 * reach this point however since we set ra_pages to 0 when the
4758 * rsize is smaller than a cache page.
4759 */
4760 if (unlikely(!nr_pages)) {
4761 add_credits_and_wake_if(server, credits, 0);
4762 break;
4763 }
4764
4765 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4766 if (!rdata) {
4767 /* best to give up if we're out of mem */
4768 add_credits_and_wake_if(server, credits, 0);
4769 break;
4770 }
4771
4772 got = __readahead_batch(ractl, rdata->pages, nr_pages);
4773 if (got != nr_pages) {
4774 pr_warn("__readahead_batch() returned %u/%u\n",
4775 got, nr_pages);
4776 nr_pages = got;
4777 }
4778
4779 rdata->nr_pages = nr_pages;
4780 rdata->bytes = readahead_batch_length(ractl);
4781 rdata->cfile = cifsFileInfo_get(open_file);
4782 rdata->server = server;
4783 rdata->mapping = ractl->mapping;
4784 rdata->offset = readahead_pos(ractl);
4785 rdata->pid = pid;
4786 rdata->pagesz = PAGE_SIZE;
4787 rdata->tailsz = PAGE_SIZE;
4788 rdata->read_into_pages = cifs_readpages_read_into_pages;
4789 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4790 rdata->credits = credits_on_stack;
4791
4792 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4793 if (!rc) {
4794 if (rdata->cfile->invalidHandle)
4795 rc = -EAGAIN;
4796 else
4797 rc = server->ops->async_readv(rdata);
4798 }
4799
4800 if (rc) {
4801 add_credits_and_wake_if(server, &rdata->credits, 0);
4802 for (i = 0; i < rdata->nr_pages; i++) {
4803 page = rdata->pages[i];
4804 unlock_page(page);
4805 put_page(page);
4806 }
4807 /* Fallback to the readpage in error/reconnect cases */
4808 kref_put(&rdata->refcount, cifs_readdata_release);
4809 break;
4810 }
4811
4812 kref_put(&rdata->refcount, cifs_readdata_release);
4813 last_batch_size = nr_pages;
4814 }
4815
4816 free_xid(xid);
4817}
4818
4819/*
4820 * cifs_readpage_worker must be called with the page pinned
4821 */
4822static int cifs_readpage_worker(struct file *file, struct page *page,
4823 loff_t *poffset)
4824{
4825 char *read_data;
4826 int rc;
4827
4828 /* Is the page cached? */
4829 rc = cifs_readpage_from_fscache(file_inode(file), page);
4830 if (rc == 0)
4831 goto read_complete;
4832
4833 read_data = kmap(page);
4834 /* for reads over a certain size could initiate async read ahead */
4835
4836 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4837
4838 if (rc < 0)
4839 goto io_error;
4840 else
4841 cifs_dbg(FYI, "Bytes read %d\n", rc);
4842
4843 /* we do not want atime to be less than mtime, it broke some apps */
4844 file_inode(file)->i_atime = current_time(file_inode(file));
4845 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4846 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4847 else
4848 file_inode(file)->i_atime = current_time(file_inode(file));
4849
4850 if (PAGE_SIZE > rc)
4851 memset(read_data + rc, 0, PAGE_SIZE - rc);
4852
4853 flush_dcache_page(page);
4854 SetPageUptodate(page);
4855
4856 /* send this page to the cache */
4857 cifs_readpage_to_fscache(file_inode(file), page);
4858
4859 rc = 0;
4860
4861io_error:
4862 kunmap(page);
4863 unlock_page(page);
4864
4865read_complete:
4866 return rc;
4867}
4868
4869static int cifs_read_folio(struct file *file, struct folio *folio)
4870{
4871 struct page *page = &folio->page;
4872 loff_t offset = page_file_offset(page);
4873 int rc = -EACCES;
4874 unsigned int xid;
4875
4876 xid = get_xid();
4877
4878 if (file->private_data == NULL) {
4879 rc = -EBADF;
4880 free_xid(xid);
4881 return rc;
4882 }
4883
4884 cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4885 page, (int)offset, (int)offset);
4886
4887 rc = cifs_readpage_worker(file, page, &offset);
4888
4889 free_xid(xid);
4890 return rc;
4891}
4892
4893static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4894{
4895 struct cifsFileInfo *open_file;
4896
4897 spin_lock(&cifs_inode->open_file_lock);
4898 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4899 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4900 spin_unlock(&cifs_inode->open_file_lock);
4901 return 1;
4902 }
4903 }
4904 spin_unlock(&cifs_inode->open_file_lock);
4905 return 0;
4906}
4907
4908/* We do not want to update the file size from server for inodes
4909 open for write - to avoid races with writepage extending
4910 the file - in the future we could consider allowing
4911 refreshing the inode only on increases in the file size
4912 but this is tricky to do without racing with writebehind
4913 page caching in the current Linux kernel design */
4914bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4915{
4916 if (!cifsInode)
4917 return true;
4918
4919 if (is_inode_writable(cifsInode)) {
4920 /* This inode is open for write at least once */
4921 struct cifs_sb_info *cifs_sb;
4922
4923 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4924 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4925 /* since no page cache to corrupt on directio
4926 we can change size safely */
4927 return true;
4928 }
4929
4930 if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4931 return true;
4932
4933 return false;
4934 } else
4935 return true;
4936}
4937
4938static int cifs_write_begin(struct file *file, struct address_space *mapping,
4939 loff_t pos, unsigned len,
4940 struct page **pagep, void **fsdata)
4941{
4942 int oncethru = 0;
4943 pgoff_t index = pos >> PAGE_SHIFT;
4944 loff_t offset = pos & (PAGE_SIZE - 1);
4945 loff_t page_start = pos & PAGE_MASK;
4946 loff_t i_size;
4947 struct page *page;
4948 int rc = 0;
4949
4950 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4951
4952start:
4953 page = grab_cache_page_write_begin(mapping, index);
4954 if (!page) {
4955 rc = -ENOMEM;
4956 goto out;
4957 }
4958
4959 if (PageUptodate(page))
4960 goto out;
4961
4962 /*
4963 * If we write a full page it will be up to date, no need to read from
4964 * the server. If the write is short, we'll end up doing a sync write
4965 * instead.
4966 */
4967 if (len == PAGE_SIZE)
4968 goto out;
4969
4970 /*
4971 * optimize away the read when we have an oplock, and we're not
4972 * expecting to use any of the data we'd be reading in. That
4973 * is, when the page lies beyond the EOF, or straddles the EOF
4974 * and the write will cover all of the existing data.
4975 */
4976 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4977 i_size = i_size_read(mapping->host);
4978 if (page_start >= i_size ||
4979 (offset == 0 && (pos + len) >= i_size)) {
4980 zero_user_segments(page, 0, offset,
4981 offset + len,
4982 PAGE_SIZE);
4983 /*
4984 * PageChecked means that the parts of the page
4985 * to which we're not writing are considered up
4986 * to date. Once the data is copied to the
4987 * page, it can be set uptodate.
4988 */
4989 SetPageChecked(page);
4990 goto out;
4991 }
4992 }
4993
4994 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4995 /*
4996 * might as well read a page, it is fast enough. If we get
4997 * an error, we don't need to return it. cifs_write_end will
4998 * do a sync write instead since PG_uptodate isn't set.
4999 */
5000 cifs_readpage_worker(file, page, &page_start);
5001 put_page(page);
5002 oncethru = 1;
5003 goto start;
5004 } else {
5005 /* we could try using another file handle if there is one -
5006 but how would we lock it to prevent close of that handle
5007 racing with this read? In any case
5008 this will be written out by write_end so is fine */
5009 }
5010out:
5011 *pagep = page;
5012 return rc;
5013}
5014
5015static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
5016{
5017 if (folio_test_private(folio))
5018 return 0;
5019 if (folio_test_fscache(folio)) {
5020 if (current_is_kswapd() || !(gfp & __GFP_FS))
5021 return false;
5022 folio_wait_fscache(folio);
5023 }
5024 fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
5025 return true;
5026}
5027
5028static void cifs_invalidate_folio(struct folio *folio, size_t offset,
5029 size_t length)
5030{
5031 folio_wait_fscache(folio);
5032}
5033
5034static int cifs_launder_folio(struct folio *folio)
5035{
5036 int rc = 0;
5037 loff_t range_start = folio_pos(folio);
5038 loff_t range_end = range_start + folio_size(folio);
5039 struct writeback_control wbc = {
5040 .sync_mode = WB_SYNC_ALL,
5041 .nr_to_write = 0,
5042 .range_start = range_start,
5043 .range_end = range_end,
5044 };
5045
5046 cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
5047
5048 if (folio_clear_dirty_for_io(folio))
5049 rc = cifs_writepage_locked(&folio->page, &wbc);
5050
5051 folio_wait_fscache(folio);
5052 return rc;
5053}
5054
5055void cifs_oplock_break(struct work_struct *work)
5056{
5057 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5058 oplock_break);
5059 struct inode *inode = d_inode(cfile->dentry);
5060 struct cifsInodeInfo *cinode = CIFS_I(inode);
5061 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
5062 struct TCP_Server_Info *server = tcon->ses->server;
5063 int rc = 0;
5064 bool purge_cache = false;
5065
5066 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5067 TASK_UNINTERRUPTIBLE);
5068
5069 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5070 cfile->oplock_epoch, &purge_cache);
5071
5072 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5073 cifs_has_mand_locks(cinode)) {
5074 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5075 inode);
5076 cinode->oplock = 0;
5077 }
5078
5079 if (inode && S_ISREG(inode->i_mode)) {
5080 if (CIFS_CACHE_READ(cinode))
5081 break_lease(inode, O_RDONLY);
5082 else
5083 break_lease(inode, O_WRONLY);
5084 rc = filemap_fdatawrite(inode->i_mapping);
5085 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5086 rc = filemap_fdatawait(inode->i_mapping);
5087 mapping_set_error(inode->i_mapping, rc);
5088 cifs_zap_mapping(inode);
5089 }
5090 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5091 if (CIFS_CACHE_WRITE(cinode))
5092 goto oplock_break_ack;
5093 }
5094
5095 rc = cifs_push_locks(cfile);
5096 if (rc)
5097 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5098
5099oplock_break_ack:
5100 /*
5101 * releasing stale oplock after recent reconnect of smb session using
5102 * a now incorrect file handle is not a data integrity issue but do
5103 * not bother sending an oplock release if session to server still is
5104 * disconnected since oplock already released by the server
5105 */
5106 if (!cfile->oplock_break_cancelled) {
5107 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
5108 cinode);
5109 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5110 }
5111
5112 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5113 cifs_done_oplock_break(cinode);
5114}
5115
5116/*
5117 * The presence of cifs_direct_io() in the address space ops vector
5118 * allowes open() O_DIRECT flags which would have failed otherwise.
5119 *
5120 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5121 * so this method should never be called.
5122 *
5123 * Direct IO is not yet supported in the cached mode.
5124 */
5125static ssize_t
5126cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5127{
5128 /*
5129 * FIXME
5130 * Eventually need to support direct IO for non forcedirectio mounts
5131 */
5132 return -EINVAL;
5133}
5134
5135static int cifs_swap_activate(struct swap_info_struct *sis,
5136 struct file *swap_file, sector_t *span)
5137{
5138 struct cifsFileInfo *cfile = swap_file->private_data;
5139 struct inode *inode = swap_file->f_mapping->host;
5140 unsigned long blocks;
5141 long long isize;
5142
5143 cifs_dbg(FYI, "swap activate\n");
5144
5145 if (!swap_file->f_mapping->a_ops->swap_rw)
5146 /* Cannot support swap */
5147 return -EINVAL;
5148
5149 spin_lock(&inode->i_lock);
5150 blocks = inode->i_blocks;
5151 isize = inode->i_size;
5152 spin_unlock(&inode->i_lock);
5153 if (blocks*512 < isize) {
5154 pr_warn("swap activate: swapfile has holes\n");
5155 return -EINVAL;
5156 }
5157 *span = sis->pages;
5158
5159 pr_warn_once("Swap support over SMB3 is experimental\n");
5160
5161 /*
5162 * TODO: consider adding ACL (or documenting how) to prevent other
5163 * users (on this or other systems) from reading it
5164 */
5165
5166
5167 /* TODO: add sk_set_memalloc(inet) or similar */
5168
5169 if (cfile)
5170 cfile->swapfile = true;
5171 /*
5172 * TODO: Since file already open, we can't open with DENY_ALL here
5173 * but we could add call to grab a byte range lock to prevent others
5174 * from reading or writing the file
5175 */
5176
5177 sis->flags |= SWP_FS_OPS;
5178 return add_swap_extent(sis, 0, sis->max, 0);
5179}
5180
5181static void cifs_swap_deactivate(struct file *file)
5182{
5183 struct cifsFileInfo *cfile = file->private_data;
5184
5185 cifs_dbg(FYI, "swap deactivate\n");
5186
5187 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5188
5189 if (cfile)
5190 cfile->swapfile = false;
5191
5192 /* do we need to unpin (or unlock) the file */
5193}
5194
5195/*
5196 * Mark a page as having been made dirty and thus needing writeback. We also
5197 * need to pin the cache object to write back to.
5198 */
5199#ifdef CONFIG_CIFS_FSCACHE
5200static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5201{
5202 return fscache_dirty_folio(mapping, folio,
5203 cifs_inode_cookie(mapping->host));
5204}
5205#else
5206#define cifs_dirty_folio filemap_dirty_folio
5207#endif
5208
5209const struct address_space_operations cifs_addr_ops = {
5210 .read_folio = cifs_read_folio,
5211 .readahead = cifs_readahead,
5212 .writepage = cifs_writepage,
5213 .writepages = cifs_writepages,
5214 .write_begin = cifs_write_begin,
5215 .write_end = cifs_write_end,
5216 .dirty_folio = cifs_dirty_folio,
5217 .release_folio = cifs_release_folio,
5218 .direct_IO = cifs_direct_io,
5219 .invalidate_folio = cifs_invalidate_folio,
5220 .launder_folio = cifs_launder_folio,
5221 /*
5222 * TODO: investigate and if useful we could add an cifs_migratePage
5223 * helper (under an CONFIG_MIGRATION) in the future, and also
5224 * investigate and add an is_dirty_writeback helper if needed
5225 */
5226 .swap_activate = cifs_swap_activate,
5227 .swap_deactivate = cifs_swap_deactivate,
5228};
5229
5230/*
5231 * cifs_readahead requires the server to support a buffer large enough to
5232 * contain the header plus one complete page of data. Otherwise, we need
5233 * to leave cifs_readahead out of the address space operations.
5234 */
5235const struct address_space_operations cifs_addr_ops_smallbuf = {
5236 .read_folio = cifs_read_folio,
5237 .writepage = cifs_writepage,
5238 .writepages = cifs_writepages,
5239 .write_begin = cifs_write_begin,
5240 .write_end = cifs_write_end,
5241 .dirty_folio = cifs_dirty_folio,
5242 .release_folio = cifs_release_folio,
5243 .invalidate_folio = cifs_invalidate_folio,
5244 .launder_folio = cifs_launder_folio,
5245};