at v2.6.18-rc2 768 lines 17 kB view raw
1/* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7#include <linux/slab.h> 8#include <linux/stat.h> 9#include <linux/fcntl.h> 10#include <linux/file.h> 11#include <linux/uio.h> 12#include <linux/smp_lock.h> 13#include <linux/fsnotify.h> 14#include <linux/security.h> 15#include <linux/module.h> 16#include <linux/syscalls.h> 17#include <linux/pagemap.h> 18 19#include <asm/uaccess.h> 20#include <asm/unistd.h> 21 22const struct file_operations generic_ro_fops = { 23 .llseek = generic_file_llseek, 24 .read = generic_file_read, 25 .mmap = generic_file_readonly_mmap, 26 .sendfile = generic_file_sendfile, 27}; 28 29EXPORT_SYMBOL(generic_ro_fops); 30 31loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) 32{ 33 long long retval; 34 struct inode *inode = file->f_mapping->host; 35 36 mutex_lock(&inode->i_mutex); 37 switch (origin) { 38 case 2: 39 offset += inode->i_size; 40 break; 41 case 1: 42 offset += file->f_pos; 43 } 44 retval = -EINVAL; 45 if (offset>=0 && offset<=inode->i_sb->s_maxbytes) { 46 if (offset != file->f_pos) { 47 file->f_pos = offset; 48 file->f_version = 0; 49 } 50 retval = offset; 51 } 52 mutex_unlock(&inode->i_mutex); 53 return retval; 54} 55 56EXPORT_SYMBOL(generic_file_llseek); 57 58loff_t remote_llseek(struct file *file, loff_t offset, int origin) 59{ 60 long long retval; 61 62 lock_kernel(); 63 switch (origin) { 64 case 2: 65 offset += i_size_read(file->f_dentry->d_inode); 66 break; 67 case 1: 68 offset += file->f_pos; 69 } 70 retval = -EINVAL; 71 if (offset>=0 && offset<=file->f_dentry->d_inode->i_sb->s_maxbytes) { 72 if (offset != file->f_pos) { 73 file->f_pos = offset; 74 file->f_version = 0; 75 } 76 retval = offset; 77 } 78 unlock_kernel(); 79 return retval; 80} 81EXPORT_SYMBOL(remote_llseek); 82 83loff_t no_llseek(struct file *file, loff_t offset, int origin) 84{ 85 return -ESPIPE; 86} 87EXPORT_SYMBOL(no_llseek); 88 89loff_t default_llseek(struct file *file, loff_t offset, int origin) 90{ 91 long long retval; 92 93 lock_kernel(); 94 switch (origin) { 95 case 2: 96 offset += i_size_read(file->f_dentry->d_inode); 97 break; 98 case 1: 99 offset += file->f_pos; 100 } 101 retval = -EINVAL; 102 if (offset >= 0) { 103 if (offset != file->f_pos) { 104 file->f_pos = offset; 105 file->f_version = 0; 106 } 107 retval = offset; 108 } 109 unlock_kernel(); 110 return retval; 111} 112EXPORT_SYMBOL(default_llseek); 113 114loff_t vfs_llseek(struct file *file, loff_t offset, int origin) 115{ 116 loff_t (*fn)(struct file *, loff_t, int); 117 118 fn = no_llseek; 119 if (file->f_mode & FMODE_LSEEK) { 120 fn = default_llseek; 121 if (file->f_op && file->f_op->llseek) 122 fn = file->f_op->llseek; 123 } 124 return fn(file, offset, origin); 125} 126EXPORT_SYMBOL(vfs_llseek); 127 128asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin) 129{ 130 off_t retval; 131 struct file * file; 132 int fput_needed; 133 134 retval = -EBADF; 135 file = fget_light(fd, &fput_needed); 136 if (!file) 137 goto bad; 138 139 retval = -EINVAL; 140 if (origin <= 2) { 141 loff_t res = vfs_llseek(file, offset, origin); 142 retval = res; 143 if (res != (loff_t)retval) 144 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 145 } 146 fput_light(file, fput_needed); 147bad: 148 return retval; 149} 150 151#ifdef __ARCH_WANT_SYS_LLSEEK 152asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, 153 unsigned long offset_low, loff_t __user * result, 154 unsigned int origin) 155{ 156 int retval; 157 struct file * file; 158 loff_t offset; 159 int fput_needed; 160 161 retval = -EBADF; 162 file = fget_light(fd, &fput_needed); 163 if (!file) 164 goto bad; 165 166 retval = -EINVAL; 167 if (origin > 2) 168 goto out_putf; 169 170 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 171 origin); 172 173 retval = (int)offset; 174 if (offset >= 0) { 175 retval = -EFAULT; 176 if (!copy_to_user(result, &offset, sizeof(offset))) 177 retval = 0; 178 } 179out_putf: 180 fput_light(file, fput_needed); 181bad: 182 return retval; 183} 184#endif 185 186/* 187 * rw_verify_area doesn't like huge counts. We limit 188 * them to something that fits in "int" so that others 189 * won't have to do range checks all the time. 190 */ 191#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) 192 193int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) 194{ 195 struct inode *inode; 196 loff_t pos; 197 198 if (unlikely((ssize_t) count < 0)) 199 goto Einval; 200 pos = *ppos; 201 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) 202 goto Einval; 203 204 inode = file->f_dentry->d_inode; 205 if (unlikely(inode->i_flock && MANDATORY_LOCK(inode))) { 206 int retval = locks_mandatory_area( 207 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 208 inode, file, pos, count); 209 if (retval < 0) 210 return retval; 211 } 212 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 213 214Einval: 215 return -EINVAL; 216} 217 218static void wait_on_retry_sync_kiocb(struct kiocb *iocb) 219{ 220 set_current_state(TASK_UNINTERRUPTIBLE); 221 if (!kiocbIsKicked(iocb)) 222 schedule(); 223 else 224 kiocbClearKicked(iocb); 225 __set_current_state(TASK_RUNNING); 226} 227 228ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 229{ 230 struct kiocb kiocb; 231 ssize_t ret; 232 233 init_sync_kiocb(&kiocb, filp); 234 kiocb.ki_pos = *ppos; 235 while (-EIOCBRETRY == 236 (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos))) 237 wait_on_retry_sync_kiocb(&kiocb); 238 239 if (-EIOCBQUEUED == ret) 240 ret = wait_on_sync_kiocb(&kiocb); 241 *ppos = kiocb.ki_pos; 242 return ret; 243} 244 245EXPORT_SYMBOL(do_sync_read); 246 247ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 248{ 249 ssize_t ret; 250 251 if (!(file->f_mode & FMODE_READ)) 252 return -EBADF; 253 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) 254 return -EINVAL; 255 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 256 return -EFAULT; 257 258 ret = rw_verify_area(READ, file, pos, count); 259 if (ret >= 0) { 260 count = ret; 261 ret = security_file_permission (file, MAY_READ); 262 if (!ret) { 263 if (file->f_op->read) 264 ret = file->f_op->read(file, buf, count, pos); 265 else 266 ret = do_sync_read(file, buf, count, pos); 267 if (ret > 0) { 268 fsnotify_access(file->f_dentry); 269 current->rchar += ret; 270 } 271 current->syscr++; 272 } 273 } 274 275 return ret; 276} 277 278EXPORT_SYMBOL(vfs_read); 279 280ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 281{ 282 struct kiocb kiocb; 283 ssize_t ret; 284 285 init_sync_kiocb(&kiocb, filp); 286 kiocb.ki_pos = *ppos; 287 while (-EIOCBRETRY == 288 (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos))) 289 wait_on_retry_sync_kiocb(&kiocb); 290 291 if (-EIOCBQUEUED == ret) 292 ret = wait_on_sync_kiocb(&kiocb); 293 *ppos = kiocb.ki_pos; 294 return ret; 295} 296 297EXPORT_SYMBOL(do_sync_write); 298 299ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 300{ 301 ssize_t ret; 302 303 if (!(file->f_mode & FMODE_WRITE)) 304 return -EBADF; 305 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 306 return -EINVAL; 307 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 308 return -EFAULT; 309 310 ret = rw_verify_area(WRITE, file, pos, count); 311 if (ret >= 0) { 312 count = ret; 313 ret = security_file_permission (file, MAY_WRITE); 314 if (!ret) { 315 if (file->f_op->write) 316 ret = file->f_op->write(file, buf, count, pos); 317 else 318 ret = do_sync_write(file, buf, count, pos); 319 if (ret > 0) { 320 fsnotify_modify(file->f_dentry); 321 current->wchar += ret; 322 } 323 current->syscw++; 324 } 325 } 326 327 return ret; 328} 329 330EXPORT_SYMBOL(vfs_write); 331 332static inline loff_t file_pos_read(struct file *file) 333{ 334 return file->f_pos; 335} 336 337static inline void file_pos_write(struct file *file, loff_t pos) 338{ 339 file->f_pos = pos; 340} 341 342asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count) 343{ 344 struct file *file; 345 ssize_t ret = -EBADF; 346 int fput_needed; 347 348 file = fget_light(fd, &fput_needed); 349 if (file) { 350 loff_t pos = file_pos_read(file); 351 ret = vfs_read(file, buf, count, &pos); 352 file_pos_write(file, pos); 353 fput_light(file, fput_needed); 354 } 355 356 return ret; 357} 358EXPORT_SYMBOL_GPL(sys_read); 359 360asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t count) 361{ 362 struct file *file; 363 ssize_t ret = -EBADF; 364 int fput_needed; 365 366 file = fget_light(fd, &fput_needed); 367 if (file) { 368 loff_t pos = file_pos_read(file); 369 ret = vfs_write(file, buf, count, &pos); 370 file_pos_write(file, pos); 371 fput_light(file, fput_needed); 372 } 373 374 return ret; 375} 376 377asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf, 378 size_t count, loff_t pos) 379{ 380 struct file *file; 381 ssize_t ret = -EBADF; 382 int fput_needed; 383 384 if (pos < 0) 385 return -EINVAL; 386 387 file = fget_light(fd, &fput_needed); 388 if (file) { 389 ret = -ESPIPE; 390 if (file->f_mode & FMODE_PREAD) 391 ret = vfs_read(file, buf, count, &pos); 392 fput_light(file, fput_needed); 393 } 394 395 return ret; 396} 397 398asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char __user *buf, 399 size_t count, loff_t pos) 400{ 401 struct file *file; 402 ssize_t ret = -EBADF; 403 int fput_needed; 404 405 if (pos < 0) 406 return -EINVAL; 407 408 file = fget_light(fd, &fput_needed); 409 if (file) { 410 ret = -ESPIPE; 411 if (file->f_mode & FMODE_PWRITE) 412 ret = vfs_write(file, buf, count, &pos); 413 fput_light(file, fput_needed); 414 } 415 416 return ret; 417} 418 419/* 420 * Reduce an iovec's length in-place. Return the resulting number of segments 421 */ 422unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 423{ 424 unsigned long seg = 0; 425 size_t len = 0; 426 427 while (seg < nr_segs) { 428 seg++; 429 if (len + iov->iov_len >= to) { 430 iov->iov_len = to - len; 431 break; 432 } 433 len += iov->iov_len; 434 iov++; 435 } 436 return seg; 437} 438 439EXPORT_UNUSED_SYMBOL(iov_shorten); /* June 2006 */ 440 441/* A write operation does a read from user space and vice versa */ 442#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 443 444static ssize_t do_readv_writev(int type, struct file *file, 445 const struct iovec __user * uvector, 446 unsigned long nr_segs, loff_t *pos) 447{ 448 typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); 449 typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); 450 451 size_t tot_len; 452 struct iovec iovstack[UIO_FASTIOV]; 453 struct iovec *iov=iovstack, *vector; 454 ssize_t ret; 455 int seg; 456 io_fn_t fn; 457 iov_fn_t fnv; 458 459 /* 460 * SuS says "The readv() function *may* fail if the iovcnt argument 461 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 462 * traditionally returned zero for zero segments, so... 463 */ 464 ret = 0; 465 if (nr_segs == 0) 466 goto out; 467 468 /* 469 * First get the "struct iovec" from user memory and 470 * verify all the pointers 471 */ 472 ret = -EINVAL; 473 if (nr_segs > UIO_MAXIOV) 474 goto out; 475 if (!file->f_op) 476 goto out; 477 if (nr_segs > UIO_FASTIOV) { 478 ret = -ENOMEM; 479 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 480 if (!iov) 481 goto out; 482 } 483 ret = -EFAULT; 484 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) 485 goto out; 486 487 /* 488 * Single unix specification: 489 * We should -EINVAL if an element length is not >= 0 and fitting an 490 * ssize_t. The total length is fitting an ssize_t 491 * 492 * Be careful here because iov_len is a size_t not an ssize_t 493 */ 494 tot_len = 0; 495 ret = -EINVAL; 496 for (seg = 0; seg < nr_segs; seg++) { 497 void __user *buf = iov[seg].iov_base; 498 ssize_t len = (ssize_t)iov[seg].iov_len; 499 500 if (len < 0) /* size_t not fitting an ssize_t .. */ 501 goto out; 502 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) 503 goto Efault; 504 tot_len += len; 505 if ((ssize_t)tot_len < 0) /* maths overflow on the ssize_t */ 506 goto out; 507 } 508 if (tot_len == 0) { 509 ret = 0; 510 goto out; 511 } 512 513 ret = rw_verify_area(type, file, pos, tot_len); 514 if (ret < 0) 515 goto out; 516 ret = security_file_permission(file, type == READ ? MAY_READ : MAY_WRITE); 517 if (ret) 518 goto out; 519 520 fnv = NULL; 521 if (type == READ) { 522 fn = file->f_op->read; 523 fnv = file->f_op->readv; 524 } else { 525 fn = (io_fn_t)file->f_op->write; 526 fnv = file->f_op->writev; 527 } 528 if (fnv) { 529 ret = fnv(file, iov, nr_segs, pos); 530 goto out; 531 } 532 533 /* Do it by hand, with file-ops */ 534 ret = 0; 535 vector = iov; 536 while (nr_segs > 0) { 537 void __user * base; 538 size_t len; 539 ssize_t nr; 540 541 base = vector->iov_base; 542 len = vector->iov_len; 543 vector++; 544 nr_segs--; 545 546 nr = fn(file, base, len, pos); 547 548 if (nr < 0) { 549 if (!ret) ret = nr; 550 break; 551 } 552 ret += nr; 553 if (nr != len) 554 break; 555 } 556out: 557 if (iov != iovstack) 558 kfree(iov); 559 if ((ret + (type == READ)) > 0) { 560 if (type == READ) 561 fsnotify_access(file->f_dentry); 562 else 563 fsnotify_modify(file->f_dentry); 564 } 565 return ret; 566Efault: 567 ret = -EFAULT; 568 goto out; 569} 570 571ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 572 unsigned long vlen, loff_t *pos) 573{ 574 if (!(file->f_mode & FMODE_READ)) 575 return -EBADF; 576 if (!file->f_op || (!file->f_op->readv && !file->f_op->read)) 577 return -EINVAL; 578 579 return do_readv_writev(READ, file, vec, vlen, pos); 580} 581 582EXPORT_SYMBOL(vfs_readv); 583 584ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 585 unsigned long vlen, loff_t *pos) 586{ 587 if (!(file->f_mode & FMODE_WRITE)) 588 return -EBADF; 589 if (!file->f_op || (!file->f_op->writev && !file->f_op->write)) 590 return -EINVAL; 591 592 return do_readv_writev(WRITE, file, vec, vlen, pos); 593} 594 595EXPORT_SYMBOL(vfs_writev); 596 597asmlinkage ssize_t 598sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) 599{ 600 struct file *file; 601 ssize_t ret = -EBADF; 602 int fput_needed; 603 604 file = fget_light(fd, &fput_needed); 605 if (file) { 606 loff_t pos = file_pos_read(file); 607 ret = vfs_readv(file, vec, vlen, &pos); 608 file_pos_write(file, pos); 609 fput_light(file, fput_needed); 610 } 611 612 if (ret > 0) 613 current->rchar += ret; 614 current->syscr++; 615 return ret; 616} 617 618asmlinkage ssize_t 619sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) 620{ 621 struct file *file; 622 ssize_t ret = -EBADF; 623 int fput_needed; 624 625 file = fget_light(fd, &fput_needed); 626 if (file) { 627 loff_t pos = file_pos_read(file); 628 ret = vfs_writev(file, vec, vlen, &pos); 629 file_pos_write(file, pos); 630 fput_light(file, fput_needed); 631 } 632 633 if (ret > 0) 634 current->wchar += ret; 635 current->syscw++; 636 return ret; 637} 638 639static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 640 size_t count, loff_t max) 641{ 642 struct file * in_file, * out_file; 643 struct inode * in_inode, * out_inode; 644 loff_t pos; 645 ssize_t retval; 646 int fput_needed_in, fput_needed_out; 647 648 /* 649 * Get input file, and verify that it is ok.. 650 */ 651 retval = -EBADF; 652 in_file = fget_light(in_fd, &fput_needed_in); 653 if (!in_file) 654 goto out; 655 if (!(in_file->f_mode & FMODE_READ)) 656 goto fput_in; 657 retval = -EINVAL; 658 in_inode = in_file->f_dentry->d_inode; 659 if (!in_inode) 660 goto fput_in; 661 if (!in_file->f_op || !in_file->f_op->sendfile) 662 goto fput_in; 663 retval = -ESPIPE; 664 if (!ppos) 665 ppos = &in_file->f_pos; 666 else 667 if (!(in_file->f_mode & FMODE_PREAD)) 668 goto fput_in; 669 retval = rw_verify_area(READ, in_file, ppos, count); 670 if (retval < 0) 671 goto fput_in; 672 count = retval; 673 674 retval = security_file_permission (in_file, MAY_READ); 675 if (retval) 676 goto fput_in; 677 678 /* 679 * Get output file, and verify that it is ok.. 680 */ 681 retval = -EBADF; 682 out_file = fget_light(out_fd, &fput_needed_out); 683 if (!out_file) 684 goto fput_in; 685 if (!(out_file->f_mode & FMODE_WRITE)) 686 goto fput_out; 687 retval = -EINVAL; 688 if (!out_file->f_op || !out_file->f_op->sendpage) 689 goto fput_out; 690 out_inode = out_file->f_dentry->d_inode; 691 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 692 if (retval < 0) 693 goto fput_out; 694 count = retval; 695 696 retval = security_file_permission (out_file, MAY_WRITE); 697 if (retval) 698 goto fput_out; 699 700 if (!max) 701 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 702 703 pos = *ppos; 704 retval = -EINVAL; 705 if (unlikely(pos < 0)) 706 goto fput_out; 707 if (unlikely(pos + count > max)) { 708 retval = -EOVERFLOW; 709 if (pos >= max) 710 goto fput_out; 711 count = max - pos; 712 } 713 714 retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); 715 716 if (retval > 0) { 717 current->rchar += retval; 718 current->wchar += retval; 719 } 720 current->syscr++; 721 current->syscw++; 722 723 if (*ppos > max) 724 retval = -EOVERFLOW; 725 726fput_out: 727 fput_light(out_file, fput_needed_out); 728fput_in: 729 fput_light(in_file, fput_needed_in); 730out: 731 return retval; 732} 733 734asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t __user *offset, size_t count) 735{ 736 loff_t pos; 737 off_t off; 738 ssize_t ret; 739 740 if (offset) { 741 if (unlikely(get_user(off, offset))) 742 return -EFAULT; 743 pos = off; 744 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 745 if (unlikely(put_user(pos, offset))) 746 return -EFAULT; 747 return ret; 748 } 749 750 return do_sendfile(out_fd, in_fd, NULL, count, 0); 751} 752 753asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, loff_t __user *offset, size_t count) 754{ 755 loff_t pos; 756 ssize_t ret; 757 758 if (offset) { 759 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 760 return -EFAULT; 761 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 762 if (unlikely(put_user(pos, offset))) 763 return -EFAULT; 764 return ret; 765 } 766 767 return do_sendfile(out_fd, in_fd, NULL, count, 0); 768}