Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

new methods: ->read_iter() and ->write_iter()

Beginning to introduce those. Just the callers for now, and it's
clumsier than it'll eventually become; once we finish converting
aio_read and aio_write instances, the things will get nicer.

For now, these guys are in parallel to ->aio_read() and ->aio_write();
they take iocb and iov_iter, with everything in iov_iter already
validated. File offset is passed in iocb->ki_pos, iov/nr_segs -
in iov_iter.

Main concerns in that series are stack footprint and ability to
split the damn thing cleanly.

[fix from Peter Ujfalusi <peter.ujfalusi@ti.com> folded]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Al Viro 293bc982 7f7f25e8

+121 -13
+2
Documentation/filesystems/Locking
··· 430 430 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); 431 431 ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 432 432 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 433 + ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); 434 + ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); 433 435 int (*iterate) (struct file *, struct dir_context *); 434 436 unsigned int (*poll) (struct file *, struct poll_table_struct *); 435 437 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
+8 -2
Documentation/filesystems/vfs.txt
··· 806 806 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); 807 807 ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 808 808 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 809 + ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); 810 + ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); 809 811 int (*iterate) (struct file *, struct dir_context *); 810 812 unsigned int (*poll) (struct file *, struct poll_table_struct *); 811 813 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); ··· 838 836 839 837 read: called by read(2) and related system calls 840 838 841 - aio_read: called by io_submit(2) and other asynchronous I/O operations 839 + aio_read: vectored, possibly asynchronous read 840 + 841 + read_iter: possibly asynchronous read with iov_iter as destination 842 842 843 843 write: called by write(2) and related system calls 844 844 845 - aio_write: called by io_submit(2) and other asynchronous I/O operations 845 + aio_write: vectored, possibly asynchronous write 846 + 847 + write_iter: possibly asynchronous write with iov_iter as source 846 848 847 849 iterate: called when the VFS needs to read the directory contents 848 850
+12 -2
fs/aio.c
··· 1241 1241 1242 1242 typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, 1243 1243 unsigned long, loff_t); 1244 + typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); 1244 1245 1245 1246 static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, 1246 1247 int rw, char __user *buf, ··· 1299 1298 int rw; 1300 1299 fmode_t mode; 1301 1300 aio_rw_op *rw_op; 1301 + rw_iter_op *iter_op; 1302 1302 struct iovec inline_vec, *iovec = &inline_vec; 1303 + struct iov_iter iter; 1303 1304 1304 1305 switch (opcode) { 1305 1306 case IOCB_CMD_PREAD: ··· 1309 1306 mode = FMODE_READ; 1310 1307 rw = READ; 1311 1308 rw_op = file->f_op->aio_read; 1309 + iter_op = file->f_op->read_iter; 1312 1310 goto rw_common; 1313 1311 1314 1312 case IOCB_CMD_PWRITE: ··· 1317 1313 mode = FMODE_WRITE; 1318 1314 rw = WRITE; 1319 1315 rw_op = file->f_op->aio_write; 1316 + iter_op = file->f_op->write_iter; 1320 1317 goto rw_common; 1321 1318 rw_common: 1322 1319 if (unlikely(!(file->f_mode & mode))) 1323 1320 return -EBADF; 1324 1321 1325 - if (!rw_op) 1322 + if (!rw_op && !iter_op) 1326 1323 return -EINVAL; 1327 1324 1328 1325 ret = (opcode == IOCB_CMD_PREADV || ··· 1352 1347 if (rw == WRITE) 1353 1348 file_start_write(file); 1354 1349 1355 - ret = rw_op(req, iovec, nr_segs, req->ki_pos); 1350 + if (iter_op) { 1351 + iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes); 1352 + ret = iter_op(req, &iter); 1353 + } else { 1354 + ret = rw_op(req, iovec, nr_segs, req->ki_pos); 1355 + } 1356 1356 1357 1357 if (rw == WRITE) 1358 1358 file_end_write(file);
+4 -2
fs/file_table.c
··· 175 175 file->f_path = *path; 176 176 file->f_inode = path->dentry->d_inode; 177 177 file->f_mapping = path->dentry->d_inode->i_mapping; 178 - if ((mode & FMODE_READ) && likely(fop->read || fop->aio_read)) 178 + if ((mode & FMODE_READ) && 179 + likely(fop->read || fop->aio_read || fop->read_iter)) 179 180 mode |= FMODE_CAN_READ; 180 - if ((mode & FMODE_WRITE) && likely(fop->write || fop->aio_write)) 181 + if ((mode & FMODE_WRITE) && 182 + likely(fop->write || fop->aio_write || fop->write_iter)) 181 183 mode |= FMODE_CAN_WRITE; 182 184 file->f_mode = mode; 183 185 file->f_op = fop;
+4 -2
fs/open.c
··· 725 725 } 726 726 if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) 727 727 i_readcount_inc(inode); 728 - if ((f->f_mode & FMODE_READ) && likely(f->f_op->read || f->f_op->aio_read)) 728 + if ((f->f_mode & FMODE_READ) && 729 + likely(f->f_op->read || f->f_op->aio_read || f->f_op->read_iter)) 729 730 f->f_mode |= FMODE_CAN_READ; 730 - if ((f->f_mode & FMODE_WRITE) && likely(f->f_op->write || f->f_op->aio_write)) 731 + if ((f->f_mode & FMODE_WRITE) && 732 + likely(f->f_op->write || f->f_op->aio_write || f->f_op->write_iter)) 731 733 f->f_mode |= FMODE_CAN_WRITE; 732 734 733 735 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+85 -5
fs/read_write.c
··· 25 25 typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); 26 26 typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *, 27 27 unsigned long, loff_t); 28 + typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *); 28 29 29 30 const struct file_operations generic_ro_fops = { 30 31 .llseek = generic_file_llseek, ··· 391 390 392 391 EXPORT_SYMBOL(do_sync_read); 393 392 393 + ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 394 + { 395 + struct iovec iov = { .iov_base = buf, .iov_len = len }; 396 + struct kiocb kiocb; 397 + struct iov_iter iter; 398 + ssize_t ret; 399 + 400 + init_sync_kiocb(&kiocb, filp); 401 + kiocb.ki_pos = *ppos; 402 + kiocb.ki_nbytes = len; 403 + iov_iter_init(&iter, READ, &iov, 1, len); 404 + 405 + ret = filp->f_op->read_iter(&kiocb, &iter); 406 + if (-EIOCBQUEUED == ret) 407 + ret = wait_on_sync_kiocb(&kiocb); 408 + *ppos = kiocb.ki_pos; 409 + return ret; 410 + } 411 + 412 + EXPORT_SYMBOL(new_sync_read); 413 + 394 414 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 395 415 { 396 416 ssize_t ret; ··· 428 406 count = ret; 429 407 if (file->f_op->read) 430 408 ret = file->f_op->read(file, buf, count, pos); 431 - else 409 + else if (file->f_op->aio_read) 432 410 ret = do_sync_read(file, buf, count, pos); 411 + else 412 + ret = new_sync_read(file, buf, count, pos); 433 413 if (ret > 0) { 434 414 fsnotify_access(file); 435 415 add_rchar(current, ret); ··· 463 439 464 440 EXPORT_SYMBOL(do_sync_write); 465 441 442 + ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 443 + { 444 + struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 445 + struct kiocb kiocb; 446 + struct iov_iter iter; 447 + ssize_t ret; 448 + 449 + init_sync_kiocb(&kiocb, filp); 450 + kiocb.ki_pos = *ppos; 451 + kiocb.ki_nbytes = len; 452 + iov_iter_init(&iter, WRITE, &iov, 1, len); 453 + 454 + ret = filp->f_op->write_iter(&kiocb, &iter); 455 + if (-EIOCBQUEUED == ret) 456 + ret = wait_on_sync_kiocb(&kiocb); 457 + *ppos = kiocb.ki_pos; 458 + return ret; 459 + } 460 + 461 + EXPORT_SYMBOL(new_sync_write); 462 + 466 463 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) 467 464 { 468 465 mm_segment_t old_fs; ··· 500 455 count = MAX_RW_COUNT; 501 456 if (file->f_op->write) 502 457 ret = file->f_op->write(file, p, count, pos); 503 - else 458 + else if (file->f_op->aio_write) 504 459 ret = do_sync_write(file, p, count, pos); 460 + else 461 + ret = new_sync_write(file, p, count, pos); 505 462 set_fs(old_fs); 506 463 if (ret > 0) { 507 464 fsnotify_modify(file); ··· 530 483 file_start_write(file); 531 484 if (file->f_op->write) 532 485 ret = file->f_op->write(file, buf, count, pos); 533 - else 486 + else if (file->f_op->aio_write) 534 487 ret = do_sync_write(file, buf, count, pos); 488 + else 489 + ret = new_sync_write(file, buf, count, pos); 535 490 if (ret > 0) { 536 491 fsnotify_modify(file); 537 492 add_wchar(current, ret); ··· 649 600 return seg; 650 601 } 651 602 EXPORT_SYMBOL(iov_shorten); 603 + 604 + static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov, 605 + unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn) 606 + { 607 + struct kiocb kiocb; 608 + struct iov_iter iter; 609 + ssize_t ret; 610 + 611 + init_sync_kiocb(&kiocb, filp); 612 + kiocb.ki_pos = *ppos; 613 + kiocb.ki_nbytes = len; 614 + 615 + iov_iter_init(&iter, rw, iov, nr_segs, len); 616 + ret = fn(&kiocb, &iter); 617 + if (ret == -EIOCBQUEUED) 618 + ret = wait_on_sync_kiocb(&kiocb); 619 + *ppos = kiocb.ki_pos; 620 + return ret; 621 + } 652 622 653 623 static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, 654 624 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) ··· 806 738 ssize_t ret; 807 739 io_fn_t fn; 808 740 iov_fn_t fnv; 741 + iter_fn_t iter_fn; 809 742 810 743 ret = rw_copy_check_uvector(type, uvector, nr_segs, 811 744 ARRAY_SIZE(iovstack), iovstack, &iov); ··· 822 753 if (type == READ) { 823 754 fn = file->f_op->read; 824 755 fnv = file->f_op->aio_read; 756 + iter_fn = file->f_op->read_iter; 825 757 } else { 826 758 fn = (io_fn_t)file->f_op->write; 827 759 fnv = file->f_op->aio_write; 760 + iter_fn = file->f_op->write_iter; 828 761 file_start_write(file); 829 762 } 830 763 831 - if (fnv) 764 + if (iter_fn) 765 + ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, 766 + pos, iter_fn); 767 + else if (fnv) 832 768 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 833 769 pos, fnv); 834 770 else ··· 986 912 ssize_t ret; 987 913 io_fn_t fn; 988 914 iov_fn_t fnv; 915 + iter_fn_t iter_fn; 989 916 990 917 ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, 991 918 UIO_FASTIOV, iovstack, &iov); ··· 1002 927 if (type == READ) { 1003 928 fn = file->f_op->read; 1004 929 fnv = file->f_op->aio_read; 930 + iter_fn = file->f_op->read_iter; 1005 931 } else { 1006 932 fn = (io_fn_t)file->f_op->write; 1007 933 fnv = file->f_op->aio_write; 934 + iter_fn = file->f_op->write_iter; 1008 935 file_start_write(file); 1009 936 } 1010 937 1011 - if (fnv) 938 + if (iter_fn) 939 + ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, 940 + pos, iter_fn); 941 + else if (fnv) 1012 942 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 1013 943 pos, fnv); 1014 944 else
+6
include/linux/fs.h
··· 1451 1451 #define HAVE_COMPAT_IOCTL 1 1452 1452 #define HAVE_UNLOCKED_IOCTL 1 1453 1453 1454 + struct iov_iter; 1455 + 1454 1456 struct file_operations { 1455 1457 struct module *owner; 1456 1458 loff_t (*llseek) (struct file *, loff_t, int); ··· 1460 1458 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); 1461 1459 ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 1462 1460 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 1461 + ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); 1462 + ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); 1463 1463 int (*iterate) (struct file *, struct dir_context *); 1464 1464 unsigned int (*poll) (struct file *, struct poll_table_struct *); 1465 1465 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); ··· 2419 2415 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); 2420 2416 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); 2421 2417 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); 2418 + extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); 2419 + extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); 2422 2420 2423 2421 /* fs/block_dev.c */ 2424 2422 extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,