at v4.5 37 kB view raw
1/* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7#include <linux/slab.h> 8#include <linux/stat.h> 9#include <linux/fcntl.h> 10#include <linux/file.h> 11#include <linux/uio.h> 12#include <linux/fsnotify.h> 13#include <linux/security.h> 14#include <linux/export.h> 15#include <linux/syscalls.h> 16#include <linux/pagemap.h> 17#include <linux/splice.h> 18#include <linux/compat.h> 19#include <linux/mount.h> 20#include <linux/fs.h> 21#include "internal.h" 22 23#include <asm/uaccess.h> 24#include <asm/unistd.h> 25 26typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); 27typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *); 28 29const struct file_operations generic_ro_fops = { 30 .llseek = generic_file_llseek, 31 .read_iter = generic_file_read_iter, 32 .mmap = generic_file_readonly_mmap, 33 .splice_read = generic_file_splice_read, 34}; 35 36EXPORT_SYMBOL(generic_ro_fops); 37 38static inline int unsigned_offsets(struct file *file) 39{ 40 return file->f_mode & FMODE_UNSIGNED_OFFSET; 41} 42 43/** 44 * vfs_setpos - update the file offset for lseek 45 * @file: file structure in question 46 * @offset: file offset to seek to 47 * @maxsize: maximum file size 48 * 49 * This is a low-level filesystem helper for updating the file offset to 50 * the value specified by @offset if the given offset is valid and it is 51 * not equal to the current file offset. 52 * 53 * Return the specified offset on success and -EINVAL on invalid offset. 54 */ 55loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) 56{ 57 if (offset < 0 && !unsigned_offsets(file)) 58 return -EINVAL; 59 if (offset > maxsize) 60 return -EINVAL; 61 62 if (offset != file->f_pos) { 63 file->f_pos = offset; 64 file->f_version = 0; 65 } 66 return offset; 67} 68EXPORT_SYMBOL(vfs_setpos); 69 70/** 71 * generic_file_llseek_size - generic llseek implementation for regular files 72 * @file: file structure to seek on 73 * @offset: file offset to seek to 74 * @whence: type of seek 75 * @size: max size of this file in file system 76 * @eof: offset used for SEEK_END position 77 * 78 * This is a variant of generic_file_llseek that allows passing in a custom 79 * maximum file size and a custom EOF position, for e.g. hashed directories 80 * 81 * Synchronization: 82 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) 83 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. 84 * read/writes behave like SEEK_SET against seeks. 85 */ 86loff_t 87generic_file_llseek_size(struct file *file, loff_t offset, int whence, 88 loff_t maxsize, loff_t eof) 89{ 90 switch (whence) { 91 case SEEK_END: 92 offset += eof; 93 break; 94 case SEEK_CUR: 95 /* 96 * Here we special-case the lseek(fd, 0, SEEK_CUR) 97 * position-querying operation. Avoid rewriting the "same" 98 * f_pos value back to the file because a concurrent read(), 99 * write() or lseek() might have altered it 100 */ 101 if (offset == 0) 102 return file->f_pos; 103 /* 104 * f_lock protects against read/modify/write race with other 105 * SEEK_CURs. Note that parallel writes and reads behave 106 * like SEEK_SET. 107 */ 108 spin_lock(&file->f_lock); 109 offset = vfs_setpos(file, file->f_pos + offset, maxsize); 110 spin_unlock(&file->f_lock); 111 return offset; 112 case SEEK_DATA: 113 /* 114 * In the generic case the entire file is data, so as long as 115 * offset isn't at the end of the file then the offset is data. 116 */ 117 if (offset >= eof) 118 return -ENXIO; 119 break; 120 case SEEK_HOLE: 121 /* 122 * There is a virtual hole at the end of the file, so as long as 123 * offset isn't i_size or larger, return i_size. 124 */ 125 if (offset >= eof) 126 return -ENXIO; 127 offset = eof; 128 break; 129 } 130 131 return vfs_setpos(file, offset, maxsize); 132} 133EXPORT_SYMBOL(generic_file_llseek_size); 134 135/** 136 * generic_file_llseek - generic llseek implementation for regular files 137 * @file: file structure to seek on 138 * @offset: file offset to seek to 139 * @whence: type of seek 140 * 141 * This is a generic implemenation of ->llseek useable for all normal local 142 * filesystems. It just updates the file offset to the value specified by 143 * @offset and @whence. 144 */ 145loff_t generic_file_llseek(struct file *file, loff_t offset, int whence) 146{ 147 struct inode *inode = file->f_mapping->host; 148 149 return generic_file_llseek_size(file, offset, whence, 150 inode->i_sb->s_maxbytes, 151 i_size_read(inode)); 152} 153EXPORT_SYMBOL(generic_file_llseek); 154 155/** 156 * fixed_size_llseek - llseek implementation for fixed-sized devices 157 * @file: file structure to seek on 158 * @offset: file offset to seek to 159 * @whence: type of seek 160 * @size: size of the file 161 * 162 */ 163loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size) 164{ 165 switch (whence) { 166 case SEEK_SET: case SEEK_CUR: case SEEK_END: 167 return generic_file_llseek_size(file, offset, whence, 168 size, size); 169 default: 170 return -EINVAL; 171 } 172} 173EXPORT_SYMBOL(fixed_size_llseek); 174 175/** 176 * no_seek_end_llseek - llseek implementation for fixed-sized devices 177 * @file: file structure to seek on 178 * @offset: file offset to seek to 179 * @whence: type of seek 180 * 181 */ 182loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence) 183{ 184 switch (whence) { 185 case SEEK_SET: case SEEK_CUR: 186 return generic_file_llseek_size(file, offset, whence, 187 OFFSET_MAX, 0); 188 default: 189 return -EINVAL; 190 } 191} 192EXPORT_SYMBOL(no_seek_end_llseek); 193 194/** 195 * no_seek_end_llseek_size - llseek implementation for fixed-sized devices 196 * @file: file structure to seek on 197 * @offset: file offset to seek to 198 * @whence: type of seek 199 * @size: maximal offset allowed 200 * 201 */ 202loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size) 203{ 204 switch (whence) { 205 case SEEK_SET: case SEEK_CUR: 206 return generic_file_llseek_size(file, offset, whence, 207 size, 0); 208 default: 209 return -EINVAL; 210 } 211} 212EXPORT_SYMBOL(no_seek_end_llseek_size); 213 214/** 215 * noop_llseek - No Operation Performed llseek implementation 216 * @file: file structure to seek on 217 * @offset: file offset to seek to 218 * @whence: type of seek 219 * 220 * This is an implementation of ->llseek useable for the rare special case when 221 * userspace expects the seek to succeed but the (device) file is actually not 222 * able to perform the seek. In this case you use noop_llseek() instead of 223 * falling back to the default implementation of ->llseek. 224 */ 225loff_t noop_llseek(struct file *file, loff_t offset, int whence) 226{ 227 return file->f_pos; 228} 229EXPORT_SYMBOL(noop_llseek); 230 231loff_t no_llseek(struct file *file, loff_t offset, int whence) 232{ 233 return -ESPIPE; 234} 235EXPORT_SYMBOL(no_llseek); 236 237loff_t default_llseek(struct file *file, loff_t offset, int whence) 238{ 239 struct inode *inode = file_inode(file); 240 loff_t retval; 241 242 inode_lock(inode); 243 switch (whence) { 244 case SEEK_END: 245 offset += i_size_read(inode); 246 break; 247 case SEEK_CUR: 248 if (offset == 0) { 249 retval = file->f_pos; 250 goto out; 251 } 252 offset += file->f_pos; 253 break; 254 case SEEK_DATA: 255 /* 256 * In the generic case the entire file is data, so as 257 * long as offset isn't at the end of the file then the 258 * offset is data. 259 */ 260 if (offset >= inode->i_size) { 261 retval = -ENXIO; 262 goto out; 263 } 264 break; 265 case SEEK_HOLE: 266 /* 267 * There is a virtual hole at the end of the file, so 268 * as long as offset isn't i_size or larger, return 269 * i_size. 270 */ 271 if (offset >= inode->i_size) { 272 retval = -ENXIO; 273 goto out; 274 } 275 offset = inode->i_size; 276 break; 277 } 278 retval = -EINVAL; 279 if (offset >= 0 || unsigned_offsets(file)) { 280 if (offset != file->f_pos) { 281 file->f_pos = offset; 282 file->f_version = 0; 283 } 284 retval = offset; 285 } 286out: 287 inode_unlock(inode); 288 return retval; 289} 290EXPORT_SYMBOL(default_llseek); 291 292loff_t vfs_llseek(struct file *file, loff_t offset, int whence) 293{ 294 loff_t (*fn)(struct file *, loff_t, int); 295 296 fn = no_llseek; 297 if (file->f_mode & FMODE_LSEEK) { 298 if (file->f_op->llseek) 299 fn = file->f_op->llseek; 300 } 301 return fn(file, offset, whence); 302} 303EXPORT_SYMBOL(vfs_llseek); 304 305static inline struct fd fdget_pos(int fd) 306{ 307 return __to_fd(__fdget_pos(fd)); 308} 309 310static inline void fdput_pos(struct fd f) 311{ 312 if (f.flags & FDPUT_POS_UNLOCK) 313 mutex_unlock(&f.file->f_pos_lock); 314 fdput(f); 315} 316 317SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) 318{ 319 off_t retval; 320 struct fd f = fdget_pos(fd); 321 if (!f.file) 322 return -EBADF; 323 324 retval = -EINVAL; 325 if (whence <= SEEK_MAX) { 326 loff_t res = vfs_llseek(f.file, offset, whence); 327 retval = res; 328 if (res != (loff_t)retval) 329 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 330 } 331 fdput_pos(f); 332 return retval; 333} 334 335#ifdef CONFIG_COMPAT 336COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence) 337{ 338 return sys_lseek(fd, offset, whence); 339} 340#endif 341 342#ifdef __ARCH_WANT_SYS_LLSEEK 343SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 344 unsigned long, offset_low, loff_t __user *, result, 345 unsigned int, whence) 346{ 347 int retval; 348 struct fd f = fdget_pos(fd); 349 loff_t offset; 350 351 if (!f.file) 352 return -EBADF; 353 354 retval = -EINVAL; 355 if (whence > SEEK_MAX) 356 goto out_putf; 357 358 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, 359 whence); 360 361 retval = (int)offset; 362 if (offset >= 0) { 363 retval = -EFAULT; 364 if (!copy_to_user(result, &offset, sizeof(offset))) 365 retval = 0; 366 } 367out_putf: 368 fdput_pos(f); 369 return retval; 370} 371#endif 372 373ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos) 374{ 375 struct kiocb kiocb; 376 ssize_t ret; 377 378 if (!file->f_op->read_iter) 379 return -EINVAL; 380 381 init_sync_kiocb(&kiocb, file); 382 kiocb.ki_pos = *ppos; 383 384 iter->type |= READ; 385 ret = file->f_op->read_iter(&kiocb, iter); 386 BUG_ON(ret == -EIOCBQUEUED); 387 if (ret > 0) 388 *ppos = kiocb.ki_pos; 389 return ret; 390} 391EXPORT_SYMBOL(vfs_iter_read); 392 393ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos) 394{ 395 struct kiocb kiocb; 396 ssize_t ret; 397 398 if (!file->f_op->write_iter) 399 return -EINVAL; 400 401 init_sync_kiocb(&kiocb, file); 402 kiocb.ki_pos = *ppos; 403 404 iter->type |= WRITE; 405 ret = file->f_op->write_iter(&kiocb, iter); 406 BUG_ON(ret == -EIOCBQUEUED); 407 if (ret > 0) 408 *ppos = kiocb.ki_pos; 409 return ret; 410} 411EXPORT_SYMBOL(vfs_iter_write); 412 413/* 414 * rw_verify_area doesn't like huge counts. We limit 415 * them to something that fits in "int" so that others 416 * won't have to do range checks all the time. 417 */ 418int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count) 419{ 420 struct inode *inode; 421 loff_t pos; 422 int retval = -EINVAL; 423 424 inode = file_inode(file); 425 if (unlikely((ssize_t) count < 0)) 426 return retval; 427 pos = *ppos; 428 if (unlikely(pos < 0)) { 429 if (!unsigned_offsets(file)) 430 return retval; 431 if (count >= -pos) /* both values are in 0..LLONG_MAX */ 432 return -EOVERFLOW; 433 } else if (unlikely((loff_t) (pos + count) < 0)) { 434 if (!unsigned_offsets(file)) 435 return retval; 436 } 437 438 if (unlikely(inode->i_flctx && mandatory_lock(inode))) { 439 retval = locks_mandatory_area(inode, file, pos, pos + count - 1, 440 read_write == READ ? F_RDLCK : F_WRLCK); 441 if (retval < 0) 442 return retval; 443 } 444 retval = security_file_permission(file, 445 read_write == READ ? MAY_READ : MAY_WRITE); 446 if (retval) 447 return retval; 448 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 449} 450 451static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 452{ 453 struct iovec iov = { .iov_base = buf, .iov_len = len }; 454 struct kiocb kiocb; 455 struct iov_iter iter; 456 ssize_t ret; 457 458 init_sync_kiocb(&kiocb, filp); 459 kiocb.ki_pos = *ppos; 460 iov_iter_init(&iter, READ, &iov, 1, len); 461 462 ret = filp->f_op->read_iter(&kiocb, &iter); 463 BUG_ON(ret == -EIOCBQUEUED); 464 *ppos = kiocb.ki_pos; 465 return ret; 466} 467 468ssize_t __vfs_read(struct file *file, char __user *buf, size_t count, 469 loff_t *pos) 470{ 471 if (file->f_op->read) 472 return file->f_op->read(file, buf, count, pos); 473 else if (file->f_op->read_iter) 474 return new_sync_read(file, buf, count, pos); 475 else 476 return -EINVAL; 477} 478EXPORT_SYMBOL(__vfs_read); 479 480ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 481{ 482 ssize_t ret; 483 484 if (!(file->f_mode & FMODE_READ)) 485 return -EBADF; 486 if (!(file->f_mode & FMODE_CAN_READ)) 487 return -EINVAL; 488 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 489 return -EFAULT; 490 491 ret = rw_verify_area(READ, file, pos, count); 492 if (ret >= 0) { 493 count = ret; 494 ret = __vfs_read(file, buf, count, pos); 495 if (ret > 0) { 496 fsnotify_access(file); 497 add_rchar(current, ret); 498 } 499 inc_syscr(current); 500 } 501 502 return ret; 503} 504 505EXPORT_SYMBOL(vfs_read); 506 507static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 508{ 509 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 510 struct kiocb kiocb; 511 struct iov_iter iter; 512 ssize_t ret; 513 514 init_sync_kiocb(&kiocb, filp); 515 kiocb.ki_pos = *ppos; 516 iov_iter_init(&iter, WRITE, &iov, 1, len); 517 518 ret = filp->f_op->write_iter(&kiocb, &iter); 519 BUG_ON(ret == -EIOCBQUEUED); 520 if (ret > 0) 521 *ppos = kiocb.ki_pos; 522 return ret; 523} 524 525ssize_t __vfs_write(struct file *file, const char __user *p, size_t count, 526 loff_t *pos) 527{ 528 if (file->f_op->write) 529 return file->f_op->write(file, p, count, pos); 530 else if (file->f_op->write_iter) 531 return new_sync_write(file, p, count, pos); 532 else 533 return -EINVAL; 534} 535EXPORT_SYMBOL(__vfs_write); 536 537ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) 538{ 539 mm_segment_t old_fs; 540 const char __user *p; 541 ssize_t ret; 542 543 if (!(file->f_mode & FMODE_CAN_WRITE)) 544 return -EINVAL; 545 546 old_fs = get_fs(); 547 set_fs(get_ds()); 548 p = (__force const char __user *)buf; 549 if (count > MAX_RW_COUNT) 550 count = MAX_RW_COUNT; 551 ret = __vfs_write(file, p, count, pos); 552 set_fs(old_fs); 553 if (ret > 0) { 554 fsnotify_modify(file); 555 add_wchar(current, ret); 556 } 557 inc_syscw(current); 558 return ret; 559} 560 561EXPORT_SYMBOL(__kernel_write); 562 563ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 564{ 565 ssize_t ret; 566 567 if (!(file->f_mode & FMODE_WRITE)) 568 return -EBADF; 569 if (!(file->f_mode & FMODE_CAN_WRITE)) 570 return -EINVAL; 571 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 572 return -EFAULT; 573 574 ret = rw_verify_area(WRITE, file, pos, count); 575 if (ret >= 0) { 576 count = ret; 577 file_start_write(file); 578 ret = __vfs_write(file, buf, count, pos); 579 if (ret > 0) { 580 fsnotify_modify(file); 581 add_wchar(current, ret); 582 } 583 inc_syscw(current); 584 file_end_write(file); 585 } 586 587 return ret; 588} 589 590EXPORT_SYMBOL(vfs_write); 591 592static inline loff_t file_pos_read(struct file *file) 593{ 594 return file->f_pos; 595} 596 597static inline void file_pos_write(struct file *file, loff_t pos) 598{ 599 file->f_pos = pos; 600} 601 602SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 603{ 604 struct fd f = fdget_pos(fd); 605 ssize_t ret = -EBADF; 606 607 if (f.file) { 608 loff_t pos = file_pos_read(f.file); 609 ret = vfs_read(f.file, buf, count, &pos); 610 if (ret >= 0) 611 file_pos_write(f.file, pos); 612 fdput_pos(f); 613 } 614 return ret; 615} 616 617SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 618 size_t, count) 619{ 620 struct fd f = fdget_pos(fd); 621 ssize_t ret = -EBADF; 622 623 if (f.file) { 624 loff_t pos = file_pos_read(f.file); 625 ret = vfs_write(f.file, buf, count, &pos); 626 if (ret >= 0) 627 file_pos_write(f.file, pos); 628 fdput_pos(f); 629 } 630 631 return ret; 632} 633 634SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, 635 size_t, count, loff_t, pos) 636{ 637 struct fd f; 638 ssize_t ret = -EBADF; 639 640 if (pos < 0) 641 return -EINVAL; 642 643 f = fdget(fd); 644 if (f.file) { 645 ret = -ESPIPE; 646 if (f.file->f_mode & FMODE_PREAD) 647 ret = vfs_read(f.file, buf, count, &pos); 648 fdput(f); 649 } 650 651 return ret; 652} 653 654SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, 655 size_t, count, loff_t, pos) 656{ 657 struct fd f; 658 ssize_t ret = -EBADF; 659 660 if (pos < 0) 661 return -EINVAL; 662 663 f = fdget(fd); 664 if (f.file) { 665 ret = -ESPIPE; 666 if (f.file->f_mode & FMODE_PWRITE) 667 ret = vfs_write(f.file, buf, count, &pos); 668 fdput(f); 669 } 670 671 return ret; 672} 673 674/* 675 * Reduce an iovec's length in-place. Return the resulting number of segments 676 */ 677unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 678{ 679 unsigned long seg = 0; 680 size_t len = 0; 681 682 while (seg < nr_segs) { 683 seg++; 684 if (len + iov->iov_len >= to) { 685 iov->iov_len = to - len; 686 break; 687 } 688 len += iov->iov_len; 689 iov++; 690 } 691 return seg; 692} 693EXPORT_SYMBOL(iov_shorten); 694 695static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, 696 loff_t *ppos, iter_fn_t fn) 697{ 698 struct kiocb kiocb; 699 ssize_t ret; 700 701 init_sync_kiocb(&kiocb, filp); 702 kiocb.ki_pos = *ppos; 703 704 ret = fn(&kiocb, iter); 705 BUG_ON(ret == -EIOCBQUEUED); 706 *ppos = kiocb.ki_pos; 707 return ret; 708} 709 710/* Do it by hand, with file-ops */ 711static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, 712 loff_t *ppos, io_fn_t fn) 713{ 714 ssize_t ret = 0; 715 716 while (iov_iter_count(iter)) { 717 struct iovec iovec = iov_iter_iovec(iter); 718 ssize_t nr; 719 720 nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos); 721 722 if (nr < 0) { 723 if (!ret) 724 ret = nr; 725 break; 726 } 727 ret += nr; 728 if (nr != iovec.iov_len) 729 break; 730 iov_iter_advance(iter, nr); 731 } 732 733 return ret; 734} 735 736/* A write operation does a read from user space and vice versa */ 737#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 738 739ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 740 unsigned long nr_segs, unsigned long fast_segs, 741 struct iovec *fast_pointer, 742 struct iovec **ret_pointer) 743{ 744 unsigned long seg; 745 ssize_t ret; 746 struct iovec *iov = fast_pointer; 747 748 /* 749 * SuS says "The readv() function *may* fail if the iovcnt argument 750 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 751 * traditionally returned zero for zero segments, so... 752 */ 753 if (nr_segs == 0) { 754 ret = 0; 755 goto out; 756 } 757 758 /* 759 * First get the "struct iovec" from user memory and 760 * verify all the pointers 761 */ 762 if (nr_segs > UIO_MAXIOV) { 763 ret = -EINVAL; 764 goto out; 765 } 766 if (nr_segs > fast_segs) { 767 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 768 if (iov == NULL) { 769 ret = -ENOMEM; 770 goto out; 771 } 772 } 773 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { 774 ret = -EFAULT; 775 goto out; 776 } 777 778 /* 779 * According to the Single Unix Specification we should return EINVAL 780 * if an element length is < 0 when cast to ssize_t or if the 781 * total length would overflow the ssize_t return value of the 782 * system call. 783 * 784 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 785 * overflow case. 786 */ 787 ret = 0; 788 for (seg = 0; seg < nr_segs; seg++) { 789 void __user *buf = iov[seg].iov_base; 790 ssize_t len = (ssize_t)iov[seg].iov_len; 791 792 /* see if we we're about to use an invalid len or if 793 * it's about to overflow ssize_t */ 794 if (len < 0) { 795 ret = -EINVAL; 796 goto out; 797 } 798 if (type >= 0 799 && unlikely(!access_ok(vrfy_dir(type), buf, len))) { 800 ret = -EFAULT; 801 goto out; 802 } 803 if (len > MAX_RW_COUNT - ret) { 804 len = MAX_RW_COUNT - ret; 805 iov[seg].iov_len = len; 806 } 807 ret += len; 808 } 809out: 810 *ret_pointer = iov; 811 return ret; 812} 813 814static ssize_t do_readv_writev(int type, struct file *file, 815 const struct iovec __user * uvector, 816 unsigned long nr_segs, loff_t *pos) 817{ 818 size_t tot_len; 819 struct iovec iovstack[UIO_FASTIOV]; 820 struct iovec *iov = iovstack; 821 struct iov_iter iter; 822 ssize_t ret; 823 io_fn_t fn; 824 iter_fn_t iter_fn; 825 826 ret = import_iovec(type, uvector, nr_segs, 827 ARRAY_SIZE(iovstack), &iov, &iter); 828 if (ret < 0) 829 return ret; 830 831 tot_len = iov_iter_count(&iter); 832 if (!tot_len) 833 goto out; 834 ret = rw_verify_area(type, file, pos, tot_len); 835 if (ret < 0) 836 goto out; 837 838 if (type == READ) { 839 fn = file->f_op->read; 840 iter_fn = file->f_op->read_iter; 841 } else { 842 fn = (io_fn_t)file->f_op->write; 843 iter_fn = file->f_op->write_iter; 844 file_start_write(file); 845 } 846 847 if (iter_fn) 848 ret = do_iter_readv_writev(file, &iter, pos, iter_fn); 849 else 850 ret = do_loop_readv_writev(file, &iter, pos, fn); 851 852 if (type != READ) 853 file_end_write(file); 854 855out: 856 kfree(iov); 857 if ((ret + (type == READ)) > 0) { 858 if (type == READ) 859 fsnotify_access(file); 860 else 861 fsnotify_modify(file); 862 } 863 return ret; 864} 865 866ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 867 unsigned long vlen, loff_t *pos) 868{ 869 if (!(file->f_mode & FMODE_READ)) 870 return -EBADF; 871 if (!(file->f_mode & FMODE_CAN_READ)) 872 return -EINVAL; 873 874 return do_readv_writev(READ, file, vec, vlen, pos); 875} 876 877EXPORT_SYMBOL(vfs_readv); 878 879ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 880 unsigned long vlen, loff_t *pos) 881{ 882 if (!(file->f_mode & FMODE_WRITE)) 883 return -EBADF; 884 if (!(file->f_mode & FMODE_CAN_WRITE)) 885 return -EINVAL; 886 887 return do_readv_writev(WRITE, file, vec, vlen, pos); 888} 889 890EXPORT_SYMBOL(vfs_writev); 891 892SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 893 unsigned long, vlen) 894{ 895 struct fd f = fdget_pos(fd); 896 ssize_t ret = -EBADF; 897 898 if (f.file) { 899 loff_t pos = file_pos_read(f.file); 900 ret = vfs_readv(f.file, vec, vlen, &pos); 901 if (ret >= 0) 902 file_pos_write(f.file, pos); 903 fdput_pos(f); 904 } 905 906 if (ret > 0) 907 add_rchar(current, ret); 908 inc_syscr(current); 909 return ret; 910} 911 912SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 913 unsigned long, vlen) 914{ 915 struct fd f = fdget_pos(fd); 916 ssize_t ret = -EBADF; 917 918 if (f.file) { 919 loff_t pos = file_pos_read(f.file); 920 ret = vfs_writev(f.file, vec, vlen, &pos); 921 if (ret >= 0) 922 file_pos_write(f.file, pos); 923 fdput_pos(f); 924 } 925 926 if (ret > 0) 927 add_wchar(current, ret); 928 inc_syscw(current); 929 return ret; 930} 931 932static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 933{ 934#define HALF_LONG_BITS (BITS_PER_LONG / 2) 935 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 936} 937 938SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 939 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 940{ 941 loff_t pos = pos_from_hilo(pos_h, pos_l); 942 struct fd f; 943 ssize_t ret = -EBADF; 944 945 if (pos < 0) 946 return -EINVAL; 947 948 f = fdget(fd); 949 if (f.file) { 950 ret = -ESPIPE; 951 if (f.file->f_mode & FMODE_PREAD) 952 ret = vfs_readv(f.file, vec, vlen, &pos); 953 fdput(f); 954 } 955 956 if (ret > 0) 957 add_rchar(current, ret); 958 inc_syscr(current); 959 return ret; 960} 961 962SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 963 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 964{ 965 loff_t pos = pos_from_hilo(pos_h, pos_l); 966 struct fd f; 967 ssize_t ret = -EBADF; 968 969 if (pos < 0) 970 return -EINVAL; 971 972 f = fdget(fd); 973 if (f.file) { 974 ret = -ESPIPE; 975 if (f.file->f_mode & FMODE_PWRITE) 976 ret = vfs_writev(f.file, vec, vlen, &pos); 977 fdput(f); 978 } 979 980 if (ret > 0) 981 add_wchar(current, ret); 982 inc_syscw(current); 983 return ret; 984} 985 986#ifdef CONFIG_COMPAT 987 988static ssize_t compat_do_readv_writev(int type, struct file *file, 989 const struct compat_iovec __user *uvector, 990 unsigned long nr_segs, loff_t *pos) 991{ 992 compat_ssize_t tot_len; 993 struct iovec iovstack[UIO_FASTIOV]; 994 struct iovec *iov = iovstack; 995 struct iov_iter iter; 996 ssize_t ret; 997 io_fn_t fn; 998 iter_fn_t iter_fn; 999 1000 ret = compat_import_iovec(type, uvector, nr_segs, 1001 UIO_FASTIOV, &iov, &iter); 1002 if (ret < 0) 1003 return ret; 1004 1005 tot_len = iov_iter_count(&iter); 1006 if (!tot_len) 1007 goto out; 1008 ret = rw_verify_area(type, file, pos, tot_len); 1009 if (ret < 0) 1010 goto out; 1011 1012 if (type == READ) { 1013 fn = file->f_op->read; 1014 iter_fn = file->f_op->read_iter; 1015 } else { 1016 fn = (io_fn_t)file->f_op->write; 1017 iter_fn = file->f_op->write_iter; 1018 file_start_write(file); 1019 } 1020 1021 if (iter_fn) 1022 ret = do_iter_readv_writev(file, &iter, pos, iter_fn); 1023 else 1024 ret = do_loop_readv_writev(file, &iter, pos, fn); 1025 1026 if (type != READ) 1027 file_end_write(file); 1028 1029out: 1030 kfree(iov); 1031 if ((ret + (type == READ)) > 0) { 1032 if (type == READ) 1033 fsnotify_access(file); 1034 else 1035 fsnotify_modify(file); 1036 } 1037 return ret; 1038} 1039 1040static size_t compat_readv(struct file *file, 1041 const struct compat_iovec __user *vec, 1042 unsigned long vlen, loff_t *pos) 1043{ 1044 ssize_t ret = -EBADF; 1045 1046 if (!(file->f_mode & FMODE_READ)) 1047 goto out; 1048 1049 ret = -EINVAL; 1050 if (!(file->f_mode & FMODE_CAN_READ)) 1051 goto out; 1052 1053 ret = compat_do_readv_writev(READ, file, vec, vlen, pos); 1054 1055out: 1056 if (ret > 0) 1057 add_rchar(current, ret); 1058 inc_syscr(current); 1059 return ret; 1060} 1061 1062COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, 1063 const struct compat_iovec __user *,vec, 1064 compat_ulong_t, vlen) 1065{ 1066 struct fd f = fdget_pos(fd); 1067 ssize_t ret; 1068 loff_t pos; 1069 1070 if (!f.file) 1071 return -EBADF; 1072 pos = f.file->f_pos; 1073 ret = compat_readv(f.file, vec, vlen, &pos); 1074 if (ret >= 0) 1075 f.file->f_pos = pos; 1076 fdput_pos(f); 1077 return ret; 1078} 1079 1080static long __compat_sys_preadv64(unsigned long fd, 1081 const struct compat_iovec __user *vec, 1082 unsigned long vlen, loff_t pos) 1083{ 1084 struct fd f; 1085 ssize_t ret; 1086 1087 if (pos < 0) 1088 return -EINVAL; 1089 f = fdget(fd); 1090 if (!f.file) 1091 return -EBADF; 1092 ret = -ESPIPE; 1093 if (f.file->f_mode & FMODE_PREAD) 1094 ret = compat_readv(f.file, vec, vlen, &pos); 1095 fdput(f); 1096 return ret; 1097} 1098 1099#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 1100COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, 1101 const struct compat_iovec __user *,vec, 1102 unsigned long, vlen, loff_t, pos) 1103{ 1104 return __compat_sys_preadv64(fd, vec, vlen, pos); 1105} 1106#endif 1107 1108COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, 1109 const struct compat_iovec __user *,vec, 1110 compat_ulong_t, vlen, u32, pos_low, u32, pos_high) 1111{ 1112 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1113 1114 return __compat_sys_preadv64(fd, vec, vlen, pos); 1115} 1116 1117static size_t compat_writev(struct file *file, 1118 const struct compat_iovec __user *vec, 1119 unsigned long vlen, loff_t *pos) 1120{ 1121 ssize_t ret = -EBADF; 1122 1123 if (!(file->f_mode & FMODE_WRITE)) 1124 goto out; 1125 1126 ret = -EINVAL; 1127 if (!(file->f_mode & FMODE_CAN_WRITE)) 1128 goto out; 1129 1130 ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); 1131 1132out: 1133 if (ret > 0) 1134 add_wchar(current, ret); 1135 inc_syscw(current); 1136 return ret; 1137} 1138 1139COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, 1140 const struct compat_iovec __user *, vec, 1141 compat_ulong_t, vlen) 1142{ 1143 struct fd f = fdget_pos(fd); 1144 ssize_t ret; 1145 loff_t pos; 1146 1147 if (!f.file) 1148 return -EBADF; 1149 pos = f.file->f_pos; 1150 ret = compat_writev(f.file, vec, vlen, &pos); 1151 if (ret >= 0) 1152 f.file->f_pos = pos; 1153 fdput_pos(f); 1154 return ret; 1155} 1156 1157static long __compat_sys_pwritev64(unsigned long fd, 1158 const struct compat_iovec __user *vec, 1159 unsigned long vlen, loff_t pos) 1160{ 1161 struct fd f; 1162 ssize_t ret; 1163 1164 if (pos < 0) 1165 return -EINVAL; 1166 f = fdget(fd); 1167 if (!f.file) 1168 return -EBADF; 1169 ret = -ESPIPE; 1170 if (f.file->f_mode & FMODE_PWRITE) 1171 ret = compat_writev(f.file, vec, vlen, &pos); 1172 fdput(f); 1173 return ret; 1174} 1175 1176#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 1177COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, 1178 const struct compat_iovec __user *,vec, 1179 unsigned long, vlen, loff_t, pos) 1180{ 1181 return __compat_sys_pwritev64(fd, vec, vlen, pos); 1182} 1183#endif 1184 1185COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, 1186 const struct compat_iovec __user *,vec, 1187 compat_ulong_t, vlen, u32, pos_low, u32, pos_high) 1188{ 1189 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1190 1191 return __compat_sys_pwritev64(fd, vec, vlen, pos); 1192} 1193#endif 1194 1195static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 1196 size_t count, loff_t max) 1197{ 1198 struct fd in, out; 1199 struct inode *in_inode, *out_inode; 1200 loff_t pos; 1201 loff_t out_pos; 1202 ssize_t retval; 1203 int fl; 1204 1205 /* 1206 * Get input file, and verify that it is ok.. 1207 */ 1208 retval = -EBADF; 1209 in = fdget(in_fd); 1210 if (!in.file) 1211 goto out; 1212 if (!(in.file->f_mode & FMODE_READ)) 1213 goto fput_in; 1214 retval = -ESPIPE; 1215 if (!ppos) { 1216 pos = in.file->f_pos; 1217 } else { 1218 pos = *ppos; 1219 if (!(in.file->f_mode & FMODE_PREAD)) 1220 goto fput_in; 1221 } 1222 retval = rw_verify_area(READ, in.file, &pos, count); 1223 if (retval < 0) 1224 goto fput_in; 1225 count = retval; 1226 1227 /* 1228 * Get output file, and verify that it is ok.. 1229 */ 1230 retval = -EBADF; 1231 out = fdget(out_fd); 1232 if (!out.file) 1233 goto fput_in; 1234 if (!(out.file->f_mode & FMODE_WRITE)) 1235 goto fput_out; 1236 retval = -EINVAL; 1237 in_inode = file_inode(in.file); 1238 out_inode = file_inode(out.file); 1239 out_pos = out.file->f_pos; 1240 retval = rw_verify_area(WRITE, out.file, &out_pos, count); 1241 if (retval < 0) 1242 goto fput_out; 1243 count = retval; 1244 1245 if (!max) 1246 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 1247 1248 if (unlikely(pos + count > max)) { 1249 retval = -EOVERFLOW; 1250 if (pos >= max) 1251 goto fput_out; 1252 count = max - pos; 1253 } 1254 1255 fl = 0; 1256#if 0 1257 /* 1258 * We need to debate whether we can enable this or not. The 1259 * man page documents EAGAIN return for the output at least, 1260 * and the application is arguably buggy if it doesn't expect 1261 * EAGAIN on a non-blocking file descriptor. 1262 */ 1263 if (in.file->f_flags & O_NONBLOCK) 1264 fl = SPLICE_F_NONBLOCK; 1265#endif 1266 file_start_write(out.file); 1267 retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl); 1268 file_end_write(out.file); 1269 1270 if (retval > 0) { 1271 add_rchar(current, retval); 1272 add_wchar(current, retval); 1273 fsnotify_access(in.file); 1274 fsnotify_modify(out.file); 1275 out.file->f_pos = out_pos; 1276 if (ppos) 1277 *ppos = pos; 1278 else 1279 in.file->f_pos = pos; 1280 } 1281 1282 inc_syscr(current); 1283 inc_syscw(current); 1284 if (pos > max) 1285 retval = -EOVERFLOW; 1286 1287fput_out: 1288 fdput(out); 1289fput_in: 1290 fdput(in); 1291out: 1292 return retval; 1293} 1294 1295SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 1296{ 1297 loff_t pos; 1298 off_t off; 1299 ssize_t ret; 1300 1301 if (offset) { 1302 if (unlikely(get_user(off, offset))) 1303 return -EFAULT; 1304 pos = off; 1305 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1306 if (unlikely(put_user(pos, offset))) 1307 return -EFAULT; 1308 return ret; 1309 } 1310 1311 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1312} 1313 1314SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 1315{ 1316 loff_t pos; 1317 ssize_t ret; 1318 1319 if (offset) { 1320 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1321 return -EFAULT; 1322 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1323 if (unlikely(put_user(pos, offset))) 1324 return -EFAULT; 1325 return ret; 1326 } 1327 1328 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1329} 1330 1331#ifdef CONFIG_COMPAT 1332COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, 1333 compat_off_t __user *, offset, compat_size_t, count) 1334{ 1335 loff_t pos; 1336 off_t off; 1337 ssize_t ret; 1338 1339 if (offset) { 1340 if (unlikely(get_user(off, offset))) 1341 return -EFAULT; 1342 pos = off; 1343 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1344 if (unlikely(put_user(pos, offset))) 1345 return -EFAULT; 1346 return ret; 1347 } 1348 1349 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1350} 1351 1352COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, 1353 compat_loff_t __user *, offset, compat_size_t, count) 1354{ 1355 loff_t pos; 1356 ssize_t ret; 1357 1358 if (offset) { 1359 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1360 return -EFAULT; 1361 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1362 if (unlikely(put_user(pos, offset))) 1363 return -EFAULT; 1364 return ret; 1365 } 1366 1367 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1368} 1369#endif 1370 1371/* 1372 * copy_file_range() differs from regular file read and write in that it 1373 * specifically allows return partial success. When it does so is up to 1374 * the copy_file_range method. 1375 */ 1376ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, 1377 struct file *file_out, loff_t pos_out, 1378 size_t len, unsigned int flags) 1379{ 1380 struct inode *inode_in = file_inode(file_in); 1381 struct inode *inode_out = file_inode(file_out); 1382 ssize_t ret; 1383 1384 if (flags != 0) 1385 return -EINVAL; 1386 1387 /* copy_file_range allows full ssize_t len, ignoring MAX_RW_COUNT */ 1388 ret = rw_verify_area(READ, file_in, &pos_in, len); 1389 if (ret >= 0) 1390 ret = rw_verify_area(WRITE, file_out, &pos_out, len); 1391 if (ret < 0) 1392 return ret; 1393 1394 if (!(file_in->f_mode & FMODE_READ) || 1395 !(file_out->f_mode & FMODE_WRITE) || 1396 (file_out->f_flags & O_APPEND)) 1397 return -EBADF; 1398 1399 /* this could be relaxed once a method supports cross-fs copies */ 1400 if (inode_in->i_sb != inode_out->i_sb) 1401 return -EXDEV; 1402 1403 if (len == 0) 1404 return 0; 1405 1406 ret = mnt_want_write_file(file_out); 1407 if (ret) 1408 return ret; 1409 1410 ret = -EOPNOTSUPP; 1411 if (file_out->f_op->copy_file_range) 1412 ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, 1413 pos_out, len, flags); 1414 if (ret == -EOPNOTSUPP) 1415 ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, 1416 len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); 1417 1418 if (ret > 0) { 1419 fsnotify_access(file_in); 1420 add_rchar(current, ret); 1421 fsnotify_modify(file_out); 1422 add_wchar(current, ret); 1423 } 1424 inc_syscr(current); 1425 inc_syscw(current); 1426 1427 mnt_drop_write_file(file_out); 1428 1429 return ret; 1430} 1431EXPORT_SYMBOL(vfs_copy_file_range); 1432 1433SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, 1434 int, fd_out, loff_t __user *, off_out, 1435 size_t, len, unsigned int, flags) 1436{ 1437 loff_t pos_in; 1438 loff_t pos_out; 1439 struct fd f_in; 1440 struct fd f_out; 1441 ssize_t ret = -EBADF; 1442 1443 f_in = fdget(fd_in); 1444 if (!f_in.file) 1445 goto out2; 1446 1447 f_out = fdget(fd_out); 1448 if (!f_out.file) 1449 goto out1; 1450 1451 ret = -EFAULT; 1452 if (off_in) { 1453 if (copy_from_user(&pos_in, off_in, sizeof(loff_t))) 1454 goto out; 1455 } else { 1456 pos_in = f_in.file->f_pos; 1457 } 1458 1459 if (off_out) { 1460 if (copy_from_user(&pos_out, off_out, sizeof(loff_t))) 1461 goto out; 1462 } else { 1463 pos_out = f_out.file->f_pos; 1464 } 1465 1466 ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, 1467 flags); 1468 if (ret > 0) { 1469 pos_in += ret; 1470 pos_out += ret; 1471 1472 if (off_in) { 1473 if (copy_to_user(off_in, &pos_in, sizeof(loff_t))) 1474 ret = -EFAULT; 1475 } else { 1476 f_in.file->f_pos = pos_in; 1477 } 1478 1479 if (off_out) { 1480 if (copy_to_user(off_out, &pos_out, sizeof(loff_t))) 1481 ret = -EFAULT; 1482 } else { 1483 f_out.file->f_pos = pos_out; 1484 } 1485 } 1486 1487out: 1488 fdput(f_out); 1489out1: 1490 fdput(f_in); 1491out2: 1492 return ret; 1493} 1494 1495static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write) 1496{ 1497 struct inode *inode = file_inode(file); 1498 1499 if (unlikely(pos < 0)) 1500 return -EINVAL; 1501 1502 if (unlikely((loff_t) (pos + len) < 0)) 1503 return -EINVAL; 1504 1505 if (unlikely(inode->i_flctx && mandatory_lock(inode))) { 1506 loff_t end = len ? pos + len - 1 : OFFSET_MAX; 1507 int retval; 1508 1509 retval = locks_mandatory_area(inode, file, pos, end, 1510 write ? F_WRLCK : F_RDLCK); 1511 if (retval < 0) 1512 return retval; 1513 } 1514 1515 return security_file_permission(file, write ? MAY_WRITE : MAY_READ); 1516} 1517 1518int vfs_clone_file_range(struct file *file_in, loff_t pos_in, 1519 struct file *file_out, loff_t pos_out, u64 len) 1520{ 1521 struct inode *inode_in = file_inode(file_in); 1522 struct inode *inode_out = file_inode(file_out); 1523 int ret; 1524 1525 if (inode_in->i_sb != inode_out->i_sb || 1526 file_in->f_path.mnt != file_out->f_path.mnt) 1527 return -EXDEV; 1528 1529 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) 1530 return -EISDIR; 1531 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) 1532 return -EINVAL; 1533 1534 if (!(file_in->f_mode & FMODE_READ) || 1535 !(file_out->f_mode & FMODE_WRITE) || 1536 (file_out->f_flags & O_APPEND)) 1537 return -EBADF; 1538 1539 if (!file_in->f_op->clone_file_range) 1540 return -EOPNOTSUPP; 1541 1542 ret = clone_verify_area(file_in, pos_in, len, false); 1543 if (ret) 1544 return ret; 1545 1546 ret = clone_verify_area(file_out, pos_out, len, true); 1547 if (ret) 1548 return ret; 1549 1550 if (pos_in + len > i_size_read(inode_in)) 1551 return -EINVAL; 1552 1553 ret = mnt_want_write_file(file_out); 1554 if (ret) 1555 return ret; 1556 1557 ret = file_in->f_op->clone_file_range(file_in, pos_in, 1558 file_out, pos_out, len); 1559 if (!ret) { 1560 fsnotify_access(file_in); 1561 fsnotify_modify(file_out); 1562 } 1563 1564 mnt_drop_write_file(file_out); 1565 return ret; 1566} 1567EXPORT_SYMBOL(vfs_clone_file_range); 1568 1569int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) 1570{ 1571 struct file_dedupe_range_info *info; 1572 struct inode *src = file_inode(file); 1573 u64 off; 1574 u64 len; 1575 int i; 1576 int ret; 1577 bool is_admin = capable(CAP_SYS_ADMIN); 1578 u16 count = same->dest_count; 1579 struct file *dst_file; 1580 loff_t dst_off; 1581 ssize_t deduped; 1582 1583 if (!(file->f_mode & FMODE_READ)) 1584 return -EINVAL; 1585 1586 if (same->reserved1 || same->reserved2) 1587 return -EINVAL; 1588 1589 off = same->src_offset; 1590 len = same->src_length; 1591 1592 ret = -EISDIR; 1593 if (S_ISDIR(src->i_mode)) 1594 goto out; 1595 1596 ret = -EINVAL; 1597 if (!S_ISREG(src->i_mode)) 1598 goto out; 1599 1600 ret = clone_verify_area(file, off, len, false); 1601 if (ret < 0) 1602 goto out; 1603 ret = 0; 1604 1605 /* pre-format output fields to sane values */ 1606 for (i = 0; i < count; i++) { 1607 same->info[i].bytes_deduped = 0ULL; 1608 same->info[i].status = FILE_DEDUPE_RANGE_SAME; 1609 } 1610 1611 for (i = 0, info = same->info; i < count; i++, info++) { 1612 struct inode *dst; 1613 struct fd dst_fd = fdget(info->dest_fd); 1614 1615 dst_file = dst_fd.file; 1616 if (!dst_file) { 1617 info->status = -EBADF; 1618 goto next_loop; 1619 } 1620 dst = file_inode(dst_file); 1621 1622 ret = mnt_want_write_file(dst_file); 1623 if (ret) { 1624 info->status = ret; 1625 goto next_loop; 1626 } 1627 1628 dst_off = info->dest_offset; 1629 ret = clone_verify_area(dst_file, dst_off, len, true); 1630 if (ret < 0) { 1631 info->status = ret; 1632 goto next_file; 1633 } 1634 ret = 0; 1635 1636 if (info->reserved) { 1637 info->status = -EINVAL; 1638 } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { 1639 info->status = -EINVAL; 1640 } else if (file->f_path.mnt != dst_file->f_path.mnt) { 1641 info->status = -EXDEV; 1642 } else if (S_ISDIR(dst->i_mode)) { 1643 info->status = -EISDIR; 1644 } else if (dst_file->f_op->dedupe_file_range == NULL) { 1645 info->status = -EINVAL; 1646 } else { 1647 deduped = dst_file->f_op->dedupe_file_range(file, off, 1648 len, dst_file, 1649 info->dest_offset); 1650 if (deduped == -EBADE) 1651 info->status = FILE_DEDUPE_RANGE_DIFFERS; 1652 else if (deduped < 0) 1653 info->status = deduped; 1654 else 1655 info->bytes_deduped += deduped; 1656 } 1657 1658next_file: 1659 mnt_drop_write_file(dst_file); 1660next_loop: 1661 fdput(dst_fd); 1662 1663 if (fatal_signal_pending(current)) 1664 goto out; 1665 } 1666 1667out: 1668 return ret; 1669} 1670EXPORT_SYMBOL(vfs_dedupe_file_range);