Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs updates from Al Viro:
"The first vfs pile, with deep apologies for being very late in this
window.

Assorted cleanups and fixes, plus a large preparatory part of iov_iter
work. There's a lot more of that, but it'll probably go into the next
merge window - it *does* shape up nicely, removes a lot of
boilerplate, gets rid of locking inconsistencie between aio_write and
splice_write and I hope to get Kent's direct-io rewrite merged into
the same queue, but some of the stuff after this point is having
(mostly trivial) conflicts with the things already merged into
mainline and with some I want more testing.

This one passes LTP and xfstests without regressions, in addition to
usual beating. BTW, readahead02 in ltp syscalls testsuite has started
giving failures since "mm/readahead.c: fix readahead failure for
memoryless NUMA nodes and limit readahead pages" - might be a false
positive, might be a real regression..."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (63 commits)
missing bits of "splice: fix racy pipe->buffers uses"
cifs: fix the race in cifs_writev()
ceph_sync_{,direct_}write: fix an oops on ceph_osdc_new_request() failure
kill generic_file_buffered_write()
ocfs2_file_aio_write(): switch to generic_perform_write()
ceph_aio_write(): switch to generic_perform_write()
xfs_file_buffered_aio_write(): switch to generic_perform_write()
export generic_perform_write(), start getting rid of generic_file_buffer_write()
generic_file_direct_write(): get rid of ppos argument
btrfs_file_aio_write(): get rid of ppos
kill the 5th argument of generic_file_buffered_write()
kill the 4th argument of __generic_file_aio_write()
lustre: don't open-code kernel_recvmsg()
ocfs2: don't open-code kernel_recvmsg()
drbd: don't open-code kernel_recvmsg()
constify blk_rq_map_user_iov() and friends
lustre: switch to kernel_sendmsg()
ocfs2: don't open-code kernel_sendmsg()
take iov_iter stuff to mm/iov_iter.c
process_vm_access: tidy up a bit
...

+911 -1537
+1 -1
Documentation/filesystems/Locking
··· 202 202 unsigned long *); 203 203 int (*migratepage)(struct address_space *, struct page *, struct page *); 204 204 int (*launder_page)(struct page *); 205 - int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long); 205 + int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long); 206 206 int (*error_remove_page)(struct address_space *, struct page *); 207 207 int (*swap_activate)(struct file *); 208 208 int (*swap_deactivate)(struct file *);
+1 -1
Documentation/filesystems/vfs.txt
··· 596 596 /* migrate the contents of a page to the specified target */ 597 597 int (*migratepage) (struct page *, struct page *); 598 598 int (*launder_page) (struct page *); 599 - int (*is_partially_uptodate) (struct page *, read_descriptor_t *, 599 + int (*is_partially_uptodate) (struct page *, unsigned long, 600 600 unsigned long); 601 601 void (*is_dirty_writeback) (struct page *, bool *, bool *); 602 602 int (*error_remove_page) (struct mapping *mapping, struct page *page);
+2 -2
arch/mn10300/include/asm/highmem.h
··· 70 70 * be used in IRQ contexts, so in some (very limited) cases we need 71 71 * it. 72 72 */ 73 - static inline unsigned long kmap_atomic(struct page *page) 73 + static inline void *kmap_atomic(struct page *page) 74 74 { 75 75 unsigned long vaddr; 76 76 int idx, type; ··· 89 89 set_pte(kmap_pte - idx, mk_pte(page, kmap_prot)); 90 90 local_flush_tlb_one(vaddr); 91 91 92 - return vaddr; 92 + return (void *)vaddr; 93 93 } 94 94 95 95 static inline void __kunmap_atomic(unsigned long vaddr)
-1
arch/powerpc/configs/ppc6xx_defconfig
··· 1244 1244 CONFIG_DEBUG_HIGHMEM=y 1245 1245 CONFIG_DEBUG_INFO=y 1246 1246 CONFIG_DEBUG_VM=y 1247 - CONFIG_DEBUG_WRITECOUNT=y 1248 1247 CONFIG_DEBUG_LIST=y 1249 1248 CONFIG_DEBUG_SG=y 1250 1249 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
-1
arch/powerpc/configs/ps3_defconfig
··· 174 174 CONFIG_PROVE_LOCKING=y 175 175 CONFIG_DEBUG_LOCKDEP=y 176 176 CONFIG_DEBUG_INFO=y 177 - CONFIG_DEBUG_WRITECOUNT=y 178 177 CONFIG_DEBUG_MEMORY_INIT=y 179 178 CONFIG_DEBUG_LIST=y 180 179 CONFIG_RCU_CPU_STALL_TIMEOUT=60
-1
arch/s390/configs/default_defconfig
··· 581 581 CONFIG_DEBUG_LOCKDEP=y 582 582 CONFIG_DEBUG_ATOMIC_SLEEP=y 583 583 CONFIG_DEBUG_LOCKING_API_SELFTESTS=y 584 - CONFIG_DEBUG_WRITECOUNT=y 585 584 CONFIG_DEBUG_LIST=y 586 585 CONFIG_DEBUG_SG=y 587 586 CONFIG_DEBUG_NOTIFIERS=y
-1
arch/sh/configs/rsk7203_defconfig
··· 128 128 CONFIG_DEBUG_SPINLOCK_SLEEP=y 129 129 CONFIG_DEBUG_INFO=y 130 130 CONFIG_DEBUG_VM=y 131 - CONFIG_DEBUG_WRITECOUNT=y 132 131 CONFIG_DEBUG_LIST=y 133 132 CONFIG_DEBUG_SG=y 134 133 CONFIG_FRAME_POINTER=y
-1
arch/xtensa/configs/iss_defconfig
··· 627 627 # CONFIG_DEBUG_KOBJECT is not set 628 628 # CONFIG_DEBUG_INFO is not set 629 629 # CONFIG_DEBUG_VM is not set 630 - # CONFIG_DEBUG_WRITECOUNT is not set 631 630 # CONFIG_DEBUG_MEMORY_INIT is not set 632 631 # CONFIG_DEBUG_LIST is not set 633 632 # CONFIG_DEBUG_SG is not set
-1
arch/xtensa/configs/s6105_defconfig
··· 569 569 # CONFIG_DEBUG_INFO is not set 570 570 # CONFIG_DEBUG_VM is not set 571 571 CONFIG_DEBUG_NOMMU_REGIONS=y 572 - # CONFIG_DEBUG_WRITECOUNT is not set 573 572 # CONFIG_DEBUG_MEMORY_INIT is not set 574 573 # CONFIG_DEBUG_LIST is not set 575 574 # CONFIG_DEBUG_SG is not set
+1 -1
block/blk-map.c
··· 188 188 * unmapping. 189 189 */ 190 190 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, 191 - struct rq_map_data *map_data, struct sg_iovec *iov, 191 + struct rq_map_data *map_data, const struct sg_iovec *iov, 192 192 int iov_count, unsigned int len, gfp_t gfp_mask) 193 193 { 194 194 struct bio *bio;
+1 -11
drivers/block/drbd/drbd_receiver.c
··· 469 469 470 470 static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags) 471 471 { 472 - mm_segment_t oldfs; 473 472 struct kvec iov = { 474 473 .iov_base = buf, 475 474 .iov_len = size, 476 475 }; 477 476 struct msghdr msg = { 478 - .msg_iovlen = 1, 479 - .msg_iov = (struct iovec *)&iov, 480 477 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL) 481 478 }; 482 - int rv; 483 - 484 - oldfs = get_fs(); 485 - set_fs(KERNEL_DS); 486 - rv = sock_recvmsg(sock, &msg, size, msg.msg_flags); 487 - set_fs(oldfs); 488 - 489 - return rv; 479 + return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags); 490 480 } 491 481 492 482 static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
+19 -29
drivers/block/nbd.c
··· 630 630 } 631 631 632 632 case NBD_CLEAR_SOCK: { 633 - struct file *file; 634 - 633 + struct socket *sock = nbd->sock; 635 634 nbd->sock = NULL; 636 - file = nbd->file; 637 - nbd->file = NULL; 638 635 nbd_clear_que(nbd); 639 636 BUG_ON(!list_empty(&nbd->queue_head)); 640 637 BUG_ON(!list_empty(&nbd->waiting_queue)); 641 638 kill_bdev(bdev); 642 - if (file) 643 - fput(file); 639 + if (sock) 640 + sockfd_put(sock); 644 641 return 0; 645 642 } 646 643 647 644 case NBD_SET_SOCK: { 648 - struct file *file; 649 - if (nbd->file) 645 + struct socket *sock; 646 + int err; 647 + if (nbd->sock) 650 648 return -EBUSY; 651 - file = fget(arg); 652 - if (file) { 653 - struct inode *inode = file_inode(file); 654 - if (S_ISSOCK(inode->i_mode)) { 655 - nbd->file = file; 656 - nbd->sock = SOCKET_I(inode); 657 - if (max_part > 0) 658 - bdev->bd_invalidated = 1; 659 - nbd->disconnect = 0; /* we're connected now */ 660 - return 0; 661 - } else { 662 - fput(file); 663 - } 649 + sock = sockfd_lookup(arg, &err); 650 + if (sock) { 651 + nbd->sock = sock; 652 + if (max_part > 0) 653 + bdev->bd_invalidated = 1; 654 + nbd->disconnect = 0; /* we're connected now */ 655 + return 0; 664 656 } 665 657 return -EINVAL; 666 658 } ··· 689 697 690 698 case NBD_DO_IT: { 691 699 struct task_struct *thread; 692 - struct file *file; 700 + struct socket *sock; 693 701 int error; 694 702 695 703 if (nbd->pid) 696 704 return -EBUSY; 697 - if (!nbd->file) 705 + if (!nbd->sock) 698 706 return -EINVAL; 699 707 700 708 mutex_unlock(&nbd->tx_lock); ··· 723 731 if (error) 724 732 return error; 725 733 sock_shutdown(nbd, 0); 726 - file = nbd->file; 727 - nbd->file = NULL; 734 + sock = nbd->sock; 735 + nbd->sock = NULL; 728 736 nbd_clear_que(nbd); 729 737 dev_warn(disk_to_dev(nbd->disk), "queue cleared\n"); 730 738 kill_bdev(bdev); 731 739 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); 732 740 set_device_ro(bdev, false); 733 - if (file) 734 - fput(file); 741 + if (sock) 742 + sockfd_put(sock); 735 743 nbd->flags = 0; 736 744 nbd->bytesize = 0; 737 745 bdev->bd_inode->i_size = 0; ··· 867 875 868 876 for (i = 0; i < nbds_max; i++) { 869 877 struct gendisk *disk = nbd_dev[i].disk; 870 - nbd_dev[i].file = NULL; 871 878 nbd_dev[i].magic = NBD_MAGIC; 872 - nbd_dev[i].flags = 0; 873 879 INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); 874 880 spin_lock_init(&nbd_dev[i].queue_lock); 875 881 INIT_LIST_HEAD(&nbd_dev[i].queue_head);
+2 -2
drivers/char/virtio_console.c
··· 901 901 if (len + offset > PAGE_SIZE) 902 902 len = PAGE_SIZE - offset; 903 903 904 - src = buf->ops->map(pipe, buf, 1); 904 + src = kmap_atomic(buf->page); 905 905 memcpy(page_address(page) + offset, src + buf->offset, len); 906 - buf->ops->unmap(pipe, buf, src); 906 + kunmap_atomic(src); 907 907 908 908 sg_set_page(&(sgl->sg[sgl->n]), page, len, offset); 909 909 }
+11 -49
drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.c
··· 99 99 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; 100 100 unsigned int niov = tx->tx_niov; 101 101 #endif 102 - struct msghdr msg = { 103 - .msg_name = NULL, 104 - .msg_namelen = 0, 105 - .msg_iov = scratchiov, 106 - .msg_iovlen = niov, 107 - .msg_control = NULL, 108 - .msg_controllen = 0, 109 - .msg_flags = MSG_DONTWAIT 110 - }; 111 - mm_segment_t oldmm = get_fs(); 102 + struct msghdr msg = {.msg_flags = MSG_DONTWAIT}; 112 103 int i; 113 104 114 105 for (nob = i = 0; i < niov; i++) { ··· 111 120 nob < tx->tx_resid) 112 121 msg.msg_flags |= MSG_MORE; 113 122 114 - set_fs (KERNEL_DS); 115 - rc = sock_sendmsg(sock, &msg, nob); 116 - set_fs (oldmm); 123 + rc = kernel_sendmsg(sock, &msg, (struct kvec *)scratchiov, niov, nob); 117 124 } 118 125 return rc; 119 126 } ··· 163 174 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; 164 175 unsigned int niov = tx->tx_nkiov; 165 176 #endif 166 - struct msghdr msg = { 167 - .msg_name = NULL, 168 - .msg_namelen = 0, 169 - .msg_iov = scratchiov, 170 - .msg_iovlen = niov, 171 - .msg_control = NULL, 172 - .msg_controllen = 0, 173 - .msg_flags = MSG_DONTWAIT 174 - }; 175 - mm_segment_t oldmm = get_fs(); 177 + struct msghdr msg = {.msg_flags = MSG_DONTWAIT}; 176 178 int i; 177 179 178 180 for (nob = i = 0; i < niov; i++) { ··· 176 196 nob < tx->tx_resid) 177 197 msg.msg_flags |= MSG_MORE; 178 198 179 - set_fs (KERNEL_DS); 180 - rc = sock_sendmsg(sock, &msg, nob); 181 - set_fs (oldmm); 199 + rc = kernel_sendmsg(sock, &msg, (struct kvec *)scratchiov, niov, nob); 182 200 183 201 for (i = 0; i < niov; i++) 184 202 kunmap(kiov[i].kiov_page); ··· 215 237 #endif 216 238 struct iovec *iov = conn->ksnc_rx_iov; 217 239 struct msghdr msg = { 218 - .msg_name = NULL, 219 - .msg_namelen = 0, 220 - .msg_iov = scratchiov, 221 - .msg_iovlen = niov, 222 - .msg_control = NULL, 223 - .msg_controllen = 0, 224 240 .msg_flags = 0 225 241 }; 226 - mm_segment_t oldmm = get_fs(); 227 242 int nob; 228 243 int i; 229 244 int rc; ··· 234 263 } 235 264 LASSERT (nob <= conn->ksnc_rx_nob_wanted); 236 265 237 - set_fs (KERNEL_DS); 238 - rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT); 239 - /* NB this is just a boolean..........................^ */ 240 - set_fs (oldmm); 266 + rc = kernel_recvmsg(conn->ksnc_sock, &msg, 267 + (struct kvec *)scratchiov, niov, nob, MSG_DONTWAIT); 241 268 242 269 saved_csum = 0; 243 270 if (conn->ksnc_proto == &ksocknal_protocol_v2x) { ··· 324 355 #endif 325 356 lnet_kiov_t *kiov = conn->ksnc_rx_kiov; 326 357 struct msghdr msg = { 327 - .msg_name = NULL, 328 - .msg_namelen = 0, 329 - .msg_iov = scratchiov, 330 - .msg_control = NULL, 331 - .msg_controllen = 0, 332 358 .msg_flags = 0 333 359 }; 334 - mm_segment_t oldmm = get_fs(); 335 360 int nob; 336 361 int i; 337 362 int rc; ··· 333 370 void *addr; 334 371 int sum; 335 372 int fragnob; 373 + int n; 336 374 337 375 /* NB we can't trust socket ops to either consume our iovs 338 376 * or leave them alone. */ 339 377 addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages); 340 378 if (addr != NULL) { 341 379 nob = scratchiov[0].iov_len; 342 - msg.msg_iovlen = 1; 380 + n = 1; 343 381 344 382 } else { 345 383 for (nob = i = 0; i < niov; i++) { ··· 348 384 scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + 349 385 kiov[i].kiov_offset; 350 386 } 351 - msg.msg_iovlen = niov; 387 + n = niov; 352 388 } 353 389 354 390 LASSERT (nob <= conn->ksnc_rx_nob_wanted); 355 391 356 - set_fs (KERNEL_DS); 357 - rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT); 358 - /* NB this is just a boolean.......................^ */ 359 - set_fs (oldmm); 392 + rc = kernel_recvmsg(conn->ksnc_sock, &msg, 393 + (struct kvec *)scratchiov, n, nob, MSG_DONTWAIT); 360 394 361 395 if (conn->ksnc_msg.ksm_csum != 0) { 362 396 for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
+4 -20
drivers/staging/lustre/lustre/libcfs/linux/linux-tcpip.c
··· 265 265 * empty enough to take the whole message immediately */ 266 266 267 267 for (;;) { 268 - struct iovec iov = { 268 + struct kvec iov = { 269 269 .iov_base = buffer, 270 270 .iov_len = nob 271 271 }; 272 272 struct msghdr msg = { 273 - .msg_name = NULL, 274 - .msg_namelen = 0, 275 - .msg_iov = &iov, 276 - .msg_iovlen = 1, 277 - .msg_control = NULL, 278 - .msg_controllen = 0, 279 273 .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0 280 274 }; 281 275 ··· 291 297 } 292 298 } 293 299 294 - set_fs (KERNEL_DS); 295 300 then = jiffies; 296 - rc = sock_sendmsg (sock, &msg, iov.iov_len); 301 + rc = kernel_sendmsg(sock, &msg, &iov, 1, nob); 297 302 ticks -= jiffies - then; 298 - set_fs (oldmm); 299 303 300 304 if (rc == nob) 301 305 return 0; ··· 330 338 LASSERT (ticks > 0); 331 339 332 340 for (;;) { 333 - struct iovec iov = { 341 + struct kvec iov = { 334 342 .iov_base = buffer, 335 343 .iov_len = nob 336 344 }; 337 345 struct msghdr msg = { 338 - .msg_name = NULL, 339 - .msg_namelen = 0, 340 - .msg_iov = &iov, 341 - .msg_iovlen = 1, 342 - .msg_control = NULL, 343 - .msg_controllen = 0, 344 346 .msg_flags = 0 345 347 }; 346 348 ··· 353 367 return rc; 354 368 } 355 369 356 - set_fs(KERNEL_DS); 357 370 then = jiffies; 358 - rc = sock_recvmsg(sock, &msg, iov.iov_len, 0); 371 + rc = kernel_recvmsg(sock, &msg, &iov, 1, nob, 0); 359 372 ticks -= jiffies - then; 360 - set_fs(oldmm); 361 373 362 374 if (rc < 0) 363 375 return rc;
+1 -22
drivers/staging/lustre/lustre/llite/symlink.c
··· 115 115 return rc; 116 116 } 117 117 118 - static int ll_readlink(struct dentry *dentry, char *buffer, int buflen) 119 - { 120 - struct inode *inode = dentry->d_inode; 121 - struct ptlrpc_request *request; 122 - char *symname; 123 - int rc; 124 - 125 - CDEBUG(D_VFSTRACE, "VFS Op\n"); 126 - 127 - ll_inode_size_lock(inode); 128 - rc = ll_readlink_internal(inode, &request, &symname); 129 - if (rc) 130 - GOTO(out, rc); 131 - 132 - rc = vfs_readlink(dentry, buffer, buflen, symname); 133 - out: 134 - ptlrpc_req_finished(request); 135 - ll_inode_size_unlock(inode); 136 - return rc; 137 - } 138 - 139 118 static void *ll_follow_link(struct dentry *dentry, struct nameidata *nd) 140 119 { 141 120 struct inode *inode = dentry->d_inode; ··· 154 175 } 155 176 156 177 struct inode_operations ll_fast_symlink_inode_operations = { 157 - .readlink = ll_readlink, 178 + .readlink = generic_readlink, 158 179 .setattr = ll_setattr, 159 180 .follow_link = ll_follow_link, 160 181 .put_link = ll_put_link,
+4 -4
drivers/staging/usbip/stub_dev.c
··· 86 86 struct stub_device *sdev = dev_get_drvdata(dev); 87 87 int sockfd = 0; 88 88 struct socket *socket; 89 - ssize_t err = -EINVAL; 90 89 int rv; 91 90 92 91 if (!sdev) { ··· 98 99 return -EINVAL; 99 100 100 101 if (sockfd != -1) { 102 + int err; 101 103 dev_info(dev, "stub up\n"); 102 104 103 105 spin_lock_irq(&sdev->ud.lock); ··· 108 108 goto err; 109 109 } 110 110 111 - socket = sockfd_to_socket(sockfd); 111 + socket = sockfd_lookup(sockfd, &err); 112 112 if (!socket) 113 113 goto err; 114 114 ··· 141 141 142 142 err: 143 143 spin_unlock_irq(&sdev->ud.lock); 144 - return err; 144 + return -EINVAL; 145 145 } 146 146 static DEVICE_ATTR(usbip_sockfd, S_IWUSR, NULL, store_sockfd); 147 147 ··· 211 211 * not touch NULL socket. 212 212 */ 213 213 if (ud->tcp_socket) { 214 - fput(ud->tcp_socket->file); 214 + sockfd_put(ud->tcp_socket); 215 215 ud->tcp_socket = NULL; 216 216 } 217 217
-25
drivers/staging/usbip/usbip_common.c
··· 382 382 } 383 383 EXPORT_SYMBOL_GPL(usbip_recv); 384 384 385 - struct socket *sockfd_to_socket(unsigned int sockfd) 386 - { 387 - struct socket *socket; 388 - struct file *file; 389 - struct inode *inode; 390 - 391 - file = fget(sockfd); 392 - if (!file) { 393 - pr_err("invalid sockfd\n"); 394 - return NULL; 395 - } 396 - 397 - inode = file_inode(file); 398 - 399 - if (!inode || !S_ISSOCK(inode->i_mode)) { 400 - fput(file); 401 - return NULL; 402 - } 403 - 404 - socket = SOCKET_I(inode); 405 - 406 - return socket; 407 - } 408 - EXPORT_SYMBOL_GPL(sockfd_to_socket); 409 - 410 385 /* there may be more cases to tweak the flags. */ 411 386 static unsigned int tweak_transfer_flags(unsigned int flags) 412 387 {
-1
drivers/staging/usbip/usbip_common.h
··· 299 299 void usbip_dump_header(struct usbip_header *pdu); 300 300 301 301 int usbip_recv(struct socket *sock, void *buf, int size); 302 - struct socket *sockfd_to_socket(unsigned int sockfd); 303 302 304 303 void usbip_pack_pdu(struct usbip_header *pdu, struct urb *urb, int cmd, 305 304 int pack);
+2 -2
drivers/staging/usbip/vhci_hcd.c
··· 788 788 789 789 /* active connection is closed */ 790 790 if (vdev->ud.tcp_socket) { 791 - fput(vdev->ud.tcp_socket->file); 791 + sockfd_put(vdev->ud.tcp_socket); 792 792 vdev->ud.tcp_socket = NULL; 793 793 } 794 794 pr_info("release socket\n"); ··· 835 835 vdev->udev = NULL; 836 836 837 837 if (ud->tcp_socket) { 838 - fput(ud->tcp_socket->file); 838 + sockfd_put(ud->tcp_socket); 839 839 ud->tcp_socket = NULL; 840 840 } 841 841 ud->status = VDEV_ST_NULL;
+3 -3
drivers/staging/usbip/vhci_sysfs.c
··· 176 176 struct socket *socket; 177 177 int sockfd = 0; 178 178 __u32 rhport = 0, devid = 0, speed = 0; 179 + int err; 179 180 180 181 /* 181 182 * @rhport: port number of vhci_hcd ··· 195 194 return -EINVAL; 196 195 197 196 /* Extract socket from fd. */ 198 - /* The correct way to clean this up is to fput(socket->file). */ 199 - socket = sockfd_to_socket(sockfd); 197 + socket = sockfd_lookup(sockfd, &err); 200 198 if (!socket) 201 199 return -EINVAL; 202 200 ··· 211 211 spin_unlock(&vdev->ud.lock); 212 212 spin_unlock(&the_controller->lock); 213 213 214 - fput(socket->file); 214 + sockfd_put(socket); 215 215 216 216 dev_err(dev, "port %d already used\n", rhport); 217 217 return -EINVAL;
+7 -7
drivers/vhost/net.c
··· 818 818 vhost_dev_cleanup(&n->dev, false); 819 819 vhost_net_vq_reset(n); 820 820 if (tx_sock) 821 - fput(tx_sock->file); 821 + sockfd_put(tx_sock); 822 822 if (rx_sock) 823 - fput(rx_sock->file); 823 + sockfd_put(rx_sock); 824 824 /* Make sure no callbacks are outstanding */ 825 825 synchronize_rcu_bh(); 826 826 /* We do an extra flush before freeing memory, ··· 860 860 } 861 861 return sock; 862 862 err: 863 - fput(sock->file); 863 + sockfd_put(sock); 864 864 return ERR_PTR(r); 865 865 } 866 866 ··· 966 966 967 967 if (oldsock) { 968 968 vhost_net_flush_vq(n, index); 969 - fput(oldsock->file); 969 + sockfd_put(oldsock); 970 970 } 971 971 972 972 mutex_unlock(&n->dev.mutex); ··· 978 978 if (ubufs) 979 979 vhost_net_ubuf_put_wait_and_free(ubufs); 980 980 err_ubufs: 981 - fput(sock->file); 981 + sockfd_put(sock); 982 982 err_vq: 983 983 mutex_unlock(&vq->mutex); 984 984 err: ··· 1009 1009 done: 1010 1010 mutex_unlock(&n->dev.mutex); 1011 1011 if (tx_sock) 1012 - fput(tx_sock->file); 1012 + sockfd_put(tx_sock); 1013 1013 if (rx_sock) 1014 - fput(rx_sock->file); 1014 + sockfd_put(rx_sock); 1015 1015 return err; 1016 1016 } 1017 1017
+5 -5
fs/bio.c
··· 1002 1002 }; 1003 1003 1004 1004 static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, 1005 - struct sg_iovec *iov, int iov_count, 1005 + const struct sg_iovec *iov, int iov_count, 1006 1006 int is_our_pages) 1007 1007 { 1008 1008 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); ··· 1022 1022 sizeof(struct sg_iovec) * iov_count, gfp_mask); 1023 1023 } 1024 1024 1025 - static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, 1025 + static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count, 1026 1026 int to_user, int from_user, int do_free_page) 1027 1027 { 1028 1028 int ret = 0, i; ··· 1120 1120 */ 1121 1121 struct bio *bio_copy_user_iov(struct request_queue *q, 1122 1122 struct rq_map_data *map_data, 1123 - struct sg_iovec *iov, int iov_count, 1123 + const struct sg_iovec *iov, int iov_count, 1124 1124 int write_to_vm, gfp_t gfp_mask) 1125 1125 { 1126 1126 struct bio_map_data *bmd; ··· 1259 1259 1260 1260 static struct bio *__bio_map_user_iov(struct request_queue *q, 1261 1261 struct block_device *bdev, 1262 - struct sg_iovec *iov, int iov_count, 1262 + const struct sg_iovec *iov, int iov_count, 1263 1263 int write_to_vm, gfp_t gfp_mask) 1264 1264 { 1265 1265 int i, j; ··· 1407 1407 * device. Returns an error pointer in case of error. 1408 1408 */ 1409 1409 struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, 1410 - struct sg_iovec *iov, int iov_count, 1410 + const struct sg_iovec *iov, int iov_count, 1411 1411 int write_to_vm, gfp_t gfp_mask) 1412 1412 { 1413 1413 struct bio *bio;
+1 -1
fs/block_dev.c
··· 1518 1518 BUG_ON(iocb->ki_pos != pos); 1519 1519 1520 1520 blk_start_plug(&plug); 1521 - ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 1521 + ret = __generic_file_aio_write(iocb, iov, nr_segs); 1522 1522 if (ret > 0) { 1523 1523 ssize_t err; 1524 1524
+5 -11
fs/btrfs/file.c
··· 425 425 struct page *page = prepared_pages[pg]; 426 426 /* 427 427 * Copy data from userspace to the current page 428 - * 429 - * Disable pagefault to avoid recursive lock since 430 - * the pages are already locked 431 428 */ 432 - pagefault_disable(); 433 429 copied = iov_iter_copy_from_user_atomic(page, i, offset, count); 434 - pagefault_enable(); 435 430 436 431 /* Flush processor's dcache for this page */ 437 432 flush_dcache_page(page); ··· 1660 1665 static ssize_t __btrfs_direct_write(struct kiocb *iocb, 1661 1666 const struct iovec *iov, 1662 1667 unsigned long nr_segs, loff_t pos, 1663 - loff_t *ppos, size_t count, size_t ocount) 1668 + size_t count, size_t ocount) 1664 1669 { 1665 1670 struct file *file = iocb->ki_filp; 1666 1671 struct iov_iter i; ··· 1669 1674 loff_t endbyte; 1670 1675 int err; 1671 1676 1672 - written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos, 1677 + written = generic_file_direct_write(iocb, iov, &nr_segs, pos, 1673 1678 count, ocount); 1674 1679 1675 1680 if (written < 0 || written == count) ··· 1688 1693 if (err) 1689 1694 goto out; 1690 1695 written += written_buffered; 1691 - *ppos = pos + written_buffered; 1696 + iocb->ki_pos = pos + written_buffered; 1692 1697 invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT, 1693 1698 endbyte >> PAGE_CACHE_SHIFT); 1694 1699 out: ··· 1720 1725 struct file *file = iocb->ki_filp; 1721 1726 struct inode *inode = file_inode(file); 1722 1727 struct btrfs_root *root = BTRFS_I(inode)->root; 1723 - loff_t *ppos = &iocb->ki_pos; 1724 1728 u64 start_pos; 1725 1729 u64 end_pos; 1726 1730 ssize_t num_written = 0; ··· 1790 1796 1791 1797 if (unlikely(file->f_flags & O_DIRECT)) { 1792 1798 num_written = __btrfs_direct_write(iocb, iov, nr_segs, 1793 - pos, ppos, count, ocount); 1799 + pos, count, ocount); 1794 1800 } else { 1795 1801 struct iov_iter i; 1796 1802 ··· 1798 1804 1799 1805 num_written = __btrfs_buffered_write(file, &i, pos); 1800 1806 if (num_written > 0) 1801 - *ppos = pos + num_written; 1807 + iocb->ki_pos = pos + num_written; 1802 1808 } 1803 1809 1804 1810 mutex_unlock(&inode->i_mutex);
+3 -3
fs/buffer.c
··· 2114 2114 * Returns true if all buffers which correspond to a file portion 2115 2115 * we want to read are uptodate. 2116 2116 */ 2117 - int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, 2118 - unsigned long from) 2117 + int block_is_partially_uptodate(struct page *page, unsigned long from, 2118 + unsigned long count) 2119 2119 { 2120 2120 unsigned block_start, block_end, blocksize; 2121 2121 unsigned to; ··· 2127 2127 2128 2128 head = page_buffers(page); 2129 2129 blocksize = head->b_size; 2130 - to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); 2130 + to = min_t(unsigned, PAGE_CACHE_SIZE - from, count); 2131 2131 to = from + to; 2132 2132 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) 2133 2133 return 0;
-1
fs/cachefiles/bind.c
··· 124 124 /* check parameters */ 125 125 ret = -EOPNOTSUPP; 126 126 if (!root->d_inode || 127 - !root->d_inode->i_op || 128 127 !root->d_inode->i_op->lookup || 129 128 !root->d_inode->i_op->mkdir || 130 129 !root->d_inode->i_op->setxattr ||
+1 -2
fs/cachefiles/namei.c
··· 779 779 } 780 780 781 781 ret = -EPERM; 782 - if (!subdir->d_inode->i_op || 783 - !subdir->d_inode->i_op->setxattr || 782 + if (!subdir->d_inode->i_op->setxattr || 784 783 !subdir->d_inode->i_op->getxattr || 785 784 !subdir->d_inode->i_op->lookup || 786 785 !subdir->d_inode->i_op->mkdir ||
+7 -5
fs/ceph/file.c
··· 601 601 false); 602 602 if (IS_ERR(req)) { 603 603 ret = PTR_ERR(req); 604 - goto out; 604 + break; 605 605 } 606 606 607 607 num_pages = calc_pages_for(page_align, len); ··· 719 719 false); 720 720 if (IS_ERR(req)) { 721 721 ret = PTR_ERR(req); 722 - goto out; 722 + break; 723 723 } 724 724 725 725 /* ··· 972 972 } 973 973 } else { 974 974 loff_t old_size = inode->i_size; 975 + struct iov_iter from; 975 976 /* 976 977 * No need to acquire the i_truncate_mutex. Because 977 978 * the MDS revokes Fwb caps before sending truncate ··· 980 979 * are pending vmtruncate. So write and vmtruncate 981 980 * can not run at the same time 982 981 */ 983 - written = generic_file_buffered_write(iocb, iov, nr_segs, 984 - pos, &iocb->ki_pos, 985 - count, 0); 982 + iov_iter_init(&from, iov, nr_segs, count, 0); 983 + written = generic_perform_write(file, &from, pos); 984 + if (likely(written >= 0)) 985 + iocb->ki_pos = pos + written; 986 986 if (inode->i_size > old_size) 987 987 ceph_fscache_update_objectsize(inode); 988 988 mutex_unlock(&inode->i_mutex);
-1
fs/cifs/cifsfs.c
··· 850 850 /* revalidate:cifs_revalidate, */ 851 851 .setattr = cifs_setattr, 852 852 .getattr = cifs_getattr, /* do we need this anymore? */ 853 - .rename = cifs_rename, 854 853 .permission = cifs_permission, 855 854 #ifdef CONFIG_CIFS_XATTR 856 855 .setxattr = cifs_setxattr,
+52 -76
fs/cifs/file.c
··· 2579 2579 struct cifsInodeInfo *cinode = CIFS_I(inode); 2580 2580 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 2581 2581 ssize_t rc = -EACCES; 2582 - loff_t lock_pos = pos; 2582 + loff_t lock_pos = iocb->ki_pos; 2583 2583 2584 - if (file->f_flags & O_APPEND) 2585 - lock_pos = i_size_read(inode); 2586 2584 /* 2587 2585 * We need to hold the sem to be sure nobody modifies lock list 2588 2586 * with a brlock that prevents writing. 2589 2587 */ 2590 2588 down_read(&cinode->lock_sem); 2589 + mutex_lock(&inode->i_mutex); 2590 + if (file->f_flags & O_APPEND) 2591 + lock_pos = i_size_read(inode); 2591 2592 if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs), 2592 2593 server->vals->exclusive_lock_type, NULL, 2593 - CIFS_WRITE_OP)) 2594 - rc = generic_file_aio_write(iocb, iov, nr_segs, pos); 2594 + CIFS_WRITE_OP)) { 2595 + rc = __generic_file_aio_write(iocb, iov, nr_segs); 2596 + mutex_unlock(&inode->i_mutex); 2597 + 2598 + if (rc > 0) { 2599 + ssize_t err; 2600 + 2601 + err = generic_write_sync(file, iocb->ki_pos - rc, rc); 2602 + if (rc < 0) 2603 + rc = err; 2604 + } 2605 + } else { 2606 + mutex_unlock(&inode->i_mutex); 2607 + } 2595 2608 up_read(&cinode->lock_sem); 2596 2609 return rc; 2597 2610 } ··· 2740 2727 /** 2741 2728 * cifs_readdata_to_iov - copy data from pages in response to an iovec 2742 2729 * @rdata: the readdata response with list of pages holding data 2743 - * @iov: vector in which we should copy the data 2744 - * @nr_segs: number of segments in vector 2745 - * @offset: offset into file of the first iovec 2746 - * @copied: used to return the amount of data copied to the iov 2730 + * @iter: destination for our data 2747 2731 * 2748 2732 * This function copies data from a list of pages in a readdata response into 2749 2733 * an array of iovecs. It will first calculate where the data should go 2750 2734 * based on the info in the readdata and then copy the data into that spot. 2751 2735 */ 2752 - static ssize_t 2753 - cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov, 2754 - unsigned long nr_segs, loff_t offset, ssize_t *copied) 2736 + static int 2737 + cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter) 2755 2738 { 2756 - int rc = 0; 2757 - struct iov_iter ii; 2758 - size_t pos = rdata->offset - offset; 2759 - ssize_t remaining = rdata->bytes; 2760 - unsigned char *pdata; 2739 + size_t remaining = rdata->bytes; 2761 2740 unsigned int i; 2762 2741 2763 - /* set up iov_iter and advance to the correct offset */ 2764 - iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0); 2765 - iov_iter_advance(&ii, pos); 2766 - 2767 - *copied = 0; 2768 2742 for (i = 0; i < rdata->nr_pages; i++) { 2769 - ssize_t copy; 2770 2743 struct page *page = rdata->pages[i]; 2771 - 2772 - /* copy a whole page or whatever's left */ 2773 - copy = min_t(ssize_t, remaining, PAGE_SIZE); 2774 - 2775 - /* ...but limit it to whatever space is left in the iov */ 2776 - copy = min_t(ssize_t, copy, iov_iter_count(&ii)); 2777 - 2778 - /* go while there's data to be copied and no errors */ 2779 - if (copy && !rc) { 2780 - pdata = kmap(page); 2781 - rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset, 2782 - (int)copy); 2783 - kunmap(page); 2784 - if (!rc) { 2785 - *copied += copy; 2786 - remaining -= copy; 2787 - iov_iter_advance(&ii, copy); 2788 - } 2789 - } 2744 + size_t copy = min(remaining, PAGE_SIZE); 2745 + size_t written = copy_page_to_iter(page, 0, copy, iter); 2746 + remaining -= written; 2747 + if (written < copy && iov_iter_count(iter) > 0) 2748 + break; 2790 2749 } 2791 - 2792 - return rc; 2750 + return remaining ? -EFAULT : 0; 2793 2751 } 2794 2752 2795 2753 static void ··· 2821 2837 return total_read > 0 ? total_read : result; 2822 2838 } 2823 2839 2824 - static ssize_t 2825 - cifs_iovec_read(struct file *file, const struct iovec *iov, 2826 - unsigned long nr_segs, loff_t *poffset) 2840 + ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov, 2841 + unsigned long nr_segs, loff_t pos) 2827 2842 { 2843 + struct file *file = iocb->ki_filp; 2828 2844 ssize_t rc; 2829 2845 size_t len, cur_len; 2830 2846 ssize_t total_read = 0; 2831 - loff_t offset = *poffset; 2847 + loff_t offset = pos; 2832 2848 unsigned int npages; 2833 2849 struct cifs_sb_info *cifs_sb; 2834 2850 struct cifs_tcon *tcon; 2835 2851 struct cifsFileInfo *open_file; 2836 2852 struct cifs_readdata *rdata, *tmp; 2837 2853 struct list_head rdata_list; 2854 + struct iov_iter to; 2838 2855 pid_t pid; 2839 2856 2840 2857 if (!nr_segs) ··· 2844 2859 len = iov_length(iov, nr_segs); 2845 2860 if (!len) 2846 2861 return 0; 2862 + 2863 + iov_iter_init(&to, iov, nr_segs, len, 0); 2847 2864 2848 2865 INIT_LIST_HEAD(&rdata_list); 2849 2866 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); ··· 2904 2917 if (!list_empty(&rdata_list)) 2905 2918 rc = 0; 2906 2919 2920 + len = iov_iter_count(&to); 2907 2921 /* the loop below should proceed in the order of increasing offsets */ 2908 - restart_loop: 2909 2922 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) { 2923 + again: 2910 2924 if (!rc) { 2911 - ssize_t copied; 2912 - 2913 2925 /* FIXME: freezable sleep too? */ 2914 2926 rc = wait_for_completion_killable(&rdata->done); 2915 2927 if (rc) 2916 2928 rc = -EINTR; 2917 - else if (rdata->result) 2929 + else if (rdata->result) { 2918 2930 rc = rdata->result; 2919 - else { 2920 - rc = cifs_readdata_to_iov(rdata, iov, 2921 - nr_segs, *poffset, 2922 - &copied); 2923 - total_read += copied; 2931 + /* resend call if it's a retryable error */ 2932 + if (rc == -EAGAIN) { 2933 + rc = cifs_retry_async_readv(rdata); 2934 + goto again; 2935 + } 2936 + } else { 2937 + rc = cifs_readdata_to_iov(rdata, &to); 2924 2938 } 2925 2939 2926 - /* resend call if it's a retryable error */ 2927 - if (rc == -EAGAIN) { 2928 - rc = cifs_retry_async_readv(rdata); 2929 - goto restart_loop; 2930 - } 2931 2940 } 2932 2941 list_del_init(&rdata->list); 2933 2942 kref_put(&rdata->refcount, cifs_uncached_readdata_release); 2934 2943 } 2935 2944 2945 + total_read = len - iov_iter_count(&to); 2946 + 2936 2947 cifs_stats_bytes_read(tcon, total_read); 2937 - *poffset += total_read; 2938 2948 2939 2949 /* mask nodata case */ 2940 2950 if (rc == -ENODATA) 2941 2951 rc = 0; 2942 2952 2943 - return total_read ? total_read : rc; 2944 - } 2945 - 2946 - ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov, 2947 - unsigned long nr_segs, loff_t pos) 2948 - { 2949 - ssize_t read; 2950 - 2951 - read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos); 2952 - if (read > 0) 2953 - iocb->ki_pos = pos; 2954 - 2955 - return read; 2953 + if (total_read) { 2954 + iocb->ki_pos = pos + total_read; 2955 + return total_read; 2956 + } 2957 + return rc; 2956 2958 } 2957 2959 2958 2960 ssize_t
+1 -1
fs/exec.c
··· 813 813 814 814 ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len) 815 815 { 816 - ssize_t res = file->f_op->read(file, (void __user *)addr, len, &pos); 816 + ssize_t res = vfs_read(file, (void __user *)addr, len, &pos); 817 817 if (res > 0) 818 818 flush_icache_range(addr, addr + len); 819 819 return res;
+1 -1
fs/ext4/file.c
··· 146 146 overwrite = 1; 147 147 } 148 148 149 - ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 149 + ret = __generic_file_aio_write(iocb, iov, nr_segs); 150 150 mutex_unlock(&inode->i_mutex); 151 151 152 152 if (ret > 0) {
+4 -7
fs/file.c
··· 25 25 26 26 int sysctl_nr_open __read_mostly = 1024*1024; 27 27 int sysctl_nr_open_min = BITS_PER_LONG; 28 - int sysctl_nr_open_max = 1024 * 1024; /* raised later */ 28 + /* our max() is unusable in constant expressions ;-/ */ 29 + #define __const_max(x, y) ((x) < (y) ? (x) : (y)) 30 + int sysctl_nr_open_max = __const_max(INT_MAX, ~(size_t)0/sizeof(void *)) & 31 + -BITS_PER_LONG; 29 32 30 33 static void *alloc_fdmem(size_t size) 31 34 { ··· 430 427 task_unlock(tsk); 431 428 put_files_struct(files); 432 429 } 433 - } 434 - 435 - void __init files_defer_init(void) 436 - { 437 - sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & 438 - -BITS_PER_LONG; 439 430 } 440 431 441 432 struct files_struct init_files = {
+4 -39
fs/file_table.c
··· 52 52 static inline void file_free(struct file *f) 53 53 { 54 54 percpu_counter_dec(&nr_files); 55 - file_check_state(f); 56 55 call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); 57 56 } 58 57 ··· 177 178 file->f_mapping = path->dentry->d_inode->i_mapping; 178 179 file->f_mode = mode; 179 180 file->f_op = fop; 180 - 181 - /* 182 - * These mounts don't really matter in practice 183 - * for r/o bind mounts. They aren't userspace- 184 - * visible. We do this for consistency, and so 185 - * that we can do debugging checks at __fput() 186 - */ 187 - if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) { 188 - file_take_write(file); 189 - WARN_ON(mnt_clone_write(path->mnt)); 190 - } 191 181 if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) 192 182 i_readcount_inc(path->dentry->d_inode); 193 183 return file; 194 184 } 195 185 EXPORT_SYMBOL(alloc_file); 196 - 197 - /** 198 - * drop_file_write_access - give up ability to write to a file 199 - * @file: the file to which we will stop writing 200 - * 201 - * This is a central place which will give up the ability 202 - * to write to @file, along with access to write through 203 - * its vfsmount. 204 - */ 205 - static void drop_file_write_access(struct file *file) 206 - { 207 - struct vfsmount *mnt = file->f_path.mnt; 208 - struct dentry *dentry = file->f_path.dentry; 209 - struct inode *inode = dentry->d_inode; 210 - 211 - put_write_access(inode); 212 - 213 - if (special_file(inode->i_mode)) 214 - return; 215 - if (file_check_writeable(file) != 0) 216 - return; 217 - __mnt_drop_write(mnt); 218 - file_release_write(file); 219 - } 220 186 221 187 /* the real guts of fput() - releasing the last reference to file 222 188 */ ··· 217 253 put_pid(file->f_owner.pid); 218 254 if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) 219 255 i_readcount_dec(inode); 220 - if (file->f_mode & FMODE_WRITE) 221 - drop_file_write_access(file); 256 + if (file->f_mode & FMODE_WRITER) { 257 + put_write_access(inode); 258 + __mnt_drop_write(mnt); 259 + } 222 260 file->f_path.dentry = NULL; 223 261 file->f_path.mnt = NULL; 224 262 file->f_inode = NULL; ··· 325 359 326 360 n = (mempages * (PAGE_SIZE / 1024)) / 10; 327 361 files_stat.max_files = max_t(unsigned long, n, NR_FILE); 328 - files_defer_init(); 329 362 percpu_counter_init(&nr_files, 0); 330 363 }
+7 -7
fs/fuse/dev.c
··· 667 667 struct pipe_buffer *buf = cs->currbuf; 668 668 669 669 if (!cs->write) { 670 - buf->ops->unmap(cs->pipe, buf, cs->mapaddr); 670 + kunmap_atomic(cs->mapaddr); 671 671 } else { 672 - kunmap(buf->page); 672 + kunmap_atomic(cs->mapaddr); 673 673 buf->len = PAGE_SIZE - cs->len; 674 674 } 675 675 cs->currbuf = NULL; 676 676 cs->mapaddr = NULL; 677 677 } else if (cs->mapaddr) { 678 - kunmap(cs->pg); 678 + kunmap_atomic(cs->mapaddr); 679 679 if (cs->write) { 680 680 flush_dcache_page(cs->pg); 681 681 set_page_dirty_lock(cs->pg); ··· 706 706 707 707 BUG_ON(!cs->nr_segs); 708 708 cs->currbuf = buf; 709 - cs->mapaddr = buf->ops->map(cs->pipe, buf, 0); 709 + cs->mapaddr = kmap_atomic(buf->page); 710 710 cs->len = buf->len; 711 711 cs->buf = cs->mapaddr + buf->offset; 712 712 cs->pipebufs++; ··· 726 726 buf->len = 0; 727 727 728 728 cs->currbuf = buf; 729 - cs->mapaddr = kmap(page); 729 + cs->mapaddr = kmap_atomic(page); 730 730 cs->buf = cs->mapaddr; 731 731 cs->len = PAGE_SIZE; 732 732 cs->pipebufs++; ··· 745 745 return err; 746 746 BUG_ON(err != 1); 747 747 offset = cs->addr % PAGE_SIZE; 748 - cs->mapaddr = kmap(cs->pg); 748 + cs->mapaddr = kmap_atomic(cs->pg); 749 749 cs->buf = cs->mapaddr + offset; 750 750 cs->len = min(PAGE_SIZE - offset, cs->seglen); 751 751 cs->seglen -= cs->len; ··· 874 874 out_fallback_unlock: 875 875 unlock_page(newpage); 876 876 out_fallback: 877 - cs->mapaddr = buf->ops->map(cs->pipe, buf, 1); 877 + cs->mapaddr = kmap_atomic(buf->page); 878 878 cs->buf = cs->mapaddr + buf->offset; 879 879 880 880 err = lock_request(cs->fc, cs->req);
+1 -4
fs/fuse/file.c
··· 1086 1086 if (mapping_writably_mapped(mapping)) 1087 1087 flush_dcache_page(page); 1088 1088 1089 - pagefault_disable(); 1090 1089 tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); 1091 - pagefault_enable(); 1092 1090 flush_dcache_page(page); 1093 1091 1094 1092 mark_page_accessed(page); ··· 1235 1237 goto out; 1236 1238 1237 1239 if (file->f_flags & O_DIRECT) { 1238 - written = generic_file_direct_write(iocb, iov, &nr_segs, 1239 - pos, &iocb->ki_pos, 1240 + written = generic_file_direct_write(iocb, iov, &nr_segs, pos, 1240 1241 count, ocount); 1241 1242 if (written < 0 || written == count) 1242 1243 goto out;
+4 -1
fs/mount.h
··· 10 10 struct user_namespace *user_ns; 11 11 u64 seq; /* Sequence number to prevent loops */ 12 12 wait_queue_head_t poll; 13 - int event; 13 + u64 event; 14 14 }; 15 15 16 16 struct mnt_pcp { ··· 104 104 struct mnt_namespace *ns; 105 105 struct path root; 106 106 int (*show)(struct seq_file *, struct vfsmount *); 107 + void *cached_mount; 108 + u64 cached_event; 109 + loff_t cached_index; 107 110 }; 108 111 109 112 #define proc_mounts(p) (container_of((p), struct proc_mounts, m))
+31 -36
fs/namei.c
··· 358 358 359 359 return -EACCES; 360 360 } 361 + EXPORT_SYMBOL(generic_permission); 361 362 362 363 /* 363 364 * We _really_ want to just do "generic_permission()" without ··· 456 455 return retval; 457 456 return __inode_permission(inode, mask); 458 457 } 458 + EXPORT_SYMBOL(inode_permission); 459 459 460 460 /** 461 461 * path_get - get a reference to a path ··· 926 924 path->mnt = &parent->mnt; 927 925 return 1; 928 926 } 927 + EXPORT_SYMBOL(follow_up); 929 928 930 929 /* 931 930 * Perform an automount ··· 1088 1085 } 1089 1086 return 0; 1090 1087 } 1088 + EXPORT_SYMBOL(follow_down_one); 1091 1089 1092 1090 static inline bool managed_dentry_might_block(struct dentry *dentry) 1093 1091 { ··· 1227 1223 } 1228 1224 return 0; 1229 1225 } 1226 + EXPORT_SYMBOL(follow_down); 1230 1227 1231 1228 /* 1232 1229 * Skip to top of mountpoint pile in refwalk mode for follow_dotdot() ··· 2030 2025 *path = nd.path; 2031 2026 return res; 2032 2027 } 2028 + EXPORT_SYMBOL(kern_path); 2033 2029 2034 2030 /** 2035 2031 * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair ··· 2055 2049 *path = nd.path; 2056 2050 return err; 2057 2051 } 2052 + EXPORT_SYMBOL(vfs_path_lookup); 2058 2053 2059 2054 /* 2060 2055 * Restricted form of lookup. Doesn't follow links, single-component only, ··· 2118 2111 2119 2112 return __lookup_hash(&this, base, 0); 2120 2113 } 2114 + EXPORT_SYMBOL(lookup_one_len); 2121 2115 2122 2116 int user_path_at_empty(int dfd, const char __user *name, unsigned flags, 2123 2117 struct path *path, int *empty) ··· 2143 2135 { 2144 2136 return user_path_at_empty(dfd, name, flags, path, NULL); 2145 2137 } 2138 + EXPORT_SYMBOL(user_path_at); 2146 2139 2147 2140 /* 2148 2141 * NB: most callers don't do anything directly with the reference to the ··· 2486 2477 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); 2487 2478 return NULL; 2488 2479 } 2480 + EXPORT_SYMBOL(lock_rename); 2489 2481 2490 2482 void unlock_rename(struct dentry *p1, struct dentry *p2) 2491 2483 { ··· 2496 2486 mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex); 2497 2487 } 2498 2488 } 2489 + EXPORT_SYMBOL(unlock_rename); 2499 2490 2500 2491 int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 2501 2492 bool want_excl) ··· 2517 2506 fsnotify_create(dir, dentry); 2518 2507 return error; 2519 2508 } 2509 + EXPORT_SYMBOL(vfs_create); 2520 2510 2521 2511 static int may_open(struct path *path, int acc_mode, int flag) 2522 2512 { ··· 3387 3375 fsnotify_create(dir, dentry); 3388 3376 return error; 3389 3377 } 3378 + EXPORT_SYMBOL(vfs_mknod); 3390 3379 3391 3380 static int may_mknod(umode_t mode) 3392 3381 { ··· 3477 3464 fsnotify_mkdir(dir, dentry); 3478 3465 return error; 3479 3466 } 3467 + EXPORT_SYMBOL(vfs_mkdir); 3480 3468 3481 3469 SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) 3482 3470 { ··· 3532 3518 __d_drop(dentry); 3533 3519 spin_unlock(&dentry->d_lock); 3534 3520 } 3521 + EXPORT_SYMBOL(dentry_unhash); 3535 3522 3536 3523 int vfs_rmdir(struct inode *dir, struct dentry *dentry) 3537 3524 { ··· 3570 3555 d_delete(dentry); 3571 3556 return error; 3572 3557 } 3558 + EXPORT_SYMBOL(vfs_rmdir); 3573 3559 3574 3560 static long do_rmdir(int dfd, const char __user *pathname) 3575 3561 { ··· 3688 3672 3689 3673 return error; 3690 3674 } 3675 + EXPORT_SYMBOL(vfs_unlink); 3691 3676 3692 3677 /* 3693 3678 * Make sure that the actual truncation of the file will occur outside its ··· 3802 3785 fsnotify_create(dir, dentry); 3803 3786 return error; 3804 3787 } 3788 + EXPORT_SYMBOL(vfs_symlink); 3805 3789 3806 3790 SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, 3807 3791 int, newdfd, const char __user *, newname) ··· 3911 3893 fsnotify_link(dir, inode, new_dentry); 3912 3894 return error; 3913 3895 } 3896 + EXPORT_SYMBOL(vfs_link); 3914 3897 3915 3898 /* 3916 3899 * Hardlinks are often used in delicate situations. We avoid ··· 4171 4152 4172 4153 return error; 4173 4154 } 4155 + EXPORT_SYMBOL(vfs_rename); 4174 4156 4175 4157 SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, 4176 4158 int, newdfd, const char __user *, newname, unsigned int, flags) ··· 4324 4304 return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); 4325 4305 } 4326 4306 4327 - int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) 4307 + int readlink_copy(char __user *buffer, int buflen, const char *link) 4328 4308 { 4329 - int len; 4330 - 4331 - len = PTR_ERR(link); 4309 + int len = PTR_ERR(link); 4332 4310 if (IS_ERR(link)) 4333 4311 goto out; 4334 4312 ··· 4338 4320 out: 4339 4321 return len; 4340 4322 } 4323 + EXPORT_SYMBOL(readlink_copy); 4341 4324 4342 4325 /* 4343 4326 * A helper for ->readlink(). This should be used *ONLY* for symlinks that ··· 4356 4337 if (IS_ERR(cookie)) 4357 4338 return PTR_ERR(cookie); 4358 4339 4359 - res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); 4340 + res = readlink_copy(buffer, buflen, nd_get_link(&nd)); 4360 4341 if (dentry->d_inode->i_op->put_link) 4361 4342 dentry->d_inode->i_op->put_link(dentry, &nd, cookie); 4362 4343 return res; 4363 4344 } 4345 + EXPORT_SYMBOL(generic_readlink); 4364 4346 4365 4347 /* get the link contents into pagecache */ 4366 4348 static char *page_getlink(struct dentry * dentry, struct page **ppage) ··· 4381 4361 int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) 4382 4362 { 4383 4363 struct page *page = NULL; 4384 - char *s = page_getlink(dentry, &page); 4385 - int res = vfs_readlink(dentry,buffer,buflen,s); 4364 + int res = readlink_copy(buffer, buflen, page_getlink(dentry, &page)); 4386 4365 if (page) { 4387 4366 kunmap(page); 4388 4367 page_cache_release(page); 4389 4368 } 4390 4369 return res; 4391 4370 } 4371 + EXPORT_SYMBOL(page_readlink); 4392 4372 4393 4373 void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd) 4394 4374 { ··· 4396 4376 nd_set_link(nd, page_getlink(dentry, &page)); 4397 4377 return page; 4398 4378 } 4379 + EXPORT_SYMBOL(page_follow_link_light); 4399 4380 4400 4381 void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) 4401 4382 { ··· 4407 4386 page_cache_release(page); 4408 4387 } 4409 4388 } 4389 + EXPORT_SYMBOL(page_put_link); 4410 4390 4411 4391 /* 4412 4392 * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS ··· 4445 4423 fail: 4446 4424 return err; 4447 4425 } 4426 + EXPORT_SYMBOL(__page_symlink); 4448 4427 4449 4428 int page_symlink(struct inode *inode, const char *symname, int len) 4450 4429 { 4451 4430 return __page_symlink(inode, symname, len, 4452 4431 !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS)); 4453 4432 } 4433 + EXPORT_SYMBOL(page_symlink); 4454 4434 4455 4435 const struct inode_operations page_symlink_inode_operations = { 4456 4436 .readlink = generic_readlink, 4457 4437 .follow_link = page_follow_link_light, 4458 4438 .put_link = page_put_link, 4459 4439 }; 4460 - 4461 - EXPORT_SYMBOL(user_path_at); 4462 - EXPORT_SYMBOL(follow_down_one); 4463 - EXPORT_SYMBOL(follow_down); 4464 - EXPORT_SYMBOL(follow_up); 4465 - EXPORT_SYMBOL(get_write_access); /* nfsd */ 4466 - EXPORT_SYMBOL(lock_rename); 4467 - EXPORT_SYMBOL(lookup_one_len); 4468 - EXPORT_SYMBOL(page_follow_link_light); 4469 - EXPORT_SYMBOL(page_put_link); 4470 - EXPORT_SYMBOL(page_readlink); 4471 - EXPORT_SYMBOL(__page_symlink); 4472 - EXPORT_SYMBOL(page_symlink); 4473 4440 EXPORT_SYMBOL(page_symlink_inode_operations); 4474 - EXPORT_SYMBOL(kern_path); 4475 - EXPORT_SYMBOL(vfs_path_lookup); 4476 - EXPORT_SYMBOL(inode_permission); 4477 - EXPORT_SYMBOL(unlock_rename); 4478 - EXPORT_SYMBOL(vfs_create); 4479 - EXPORT_SYMBOL(vfs_link); 4480 - EXPORT_SYMBOL(vfs_mkdir); 4481 - EXPORT_SYMBOL(vfs_mknod); 4482 - EXPORT_SYMBOL(generic_permission); 4483 - EXPORT_SYMBOL(vfs_readlink); 4484 - EXPORT_SYMBOL(vfs_rename); 4485 - EXPORT_SYMBOL(vfs_rmdir); 4486 - EXPORT_SYMBOL(vfs_symlink); 4487 - EXPORT_SYMBOL(vfs_unlink); 4488 - EXPORT_SYMBOL(dentry_unhash); 4489 - EXPORT_SYMBOL(generic_readlink);
+35 -21
fs/namespace.c
··· 52 52 } 53 53 __setup("mphash_entries=", set_mphash_entries); 54 54 55 - static int event; 55 + static u64 event; 56 56 static DEFINE_IDA(mnt_id_ida); 57 57 static DEFINE_IDA(mnt_group_ida); 58 58 static DEFINE_SPINLOCK(mnt_id_lock); ··· 414 414 */ 415 415 int __mnt_want_write_file(struct file *file) 416 416 { 417 - struct inode *inode = file_inode(file); 418 - 419 - if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode)) 417 + if (!(file->f_mode & FMODE_WRITER)) 420 418 return __mnt_want_write(file->f_path.mnt); 421 419 else 422 420 return mnt_clone_write(file->f_path.mnt); ··· 568 570 static void free_vfsmnt(struct mount *mnt) 569 571 { 570 572 kfree(mnt->mnt_devname); 571 - mnt_free_id(mnt); 572 573 #ifdef CONFIG_SMP 573 574 free_percpu(mnt->mnt_pcp); 574 575 #endif 575 576 kmem_cache_free(mnt_cache, mnt); 577 + } 578 + 579 + static void delayed_free_vfsmnt(struct rcu_head *head) 580 + { 581 + free_vfsmnt(container_of(head, struct mount, mnt_rcu)); 576 582 } 577 583 578 584 /* call under rcu_read_lock */ ··· 850 848 851 849 root = mount_fs(type, flags, name, data); 852 850 if (IS_ERR(root)) { 851 + mnt_free_id(mnt); 853 852 free_vfsmnt(mnt); 854 853 return ERR_CAST(root); 855 854 } ··· 888 885 goto out_free; 889 886 } 890 887 891 - mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; 888 + mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED); 892 889 /* Don't allow unprivileged users to change mount flags */ 893 890 if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) 894 891 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; ··· 931 928 return mnt; 932 929 933 930 out_free: 931 + mnt_free_id(mnt); 934 932 free_vfsmnt(mnt); 935 933 return ERR_PTR(err); 936 - } 937 - 938 - static void delayed_free(struct rcu_head *head) 939 - { 940 - struct mount *mnt = container_of(head, struct mount, mnt_rcu); 941 - kfree(mnt->mnt_devname); 942 - #ifdef CONFIG_SMP 943 - free_percpu(mnt->mnt_pcp); 944 - #endif 945 - kmem_cache_free(mnt_cache, mnt); 946 934 } 947 935 948 936 static void mntput_no_expire(struct mount *mnt) ··· 985 991 dput(mnt->mnt.mnt_root); 986 992 deactivate_super(mnt->mnt.mnt_sb); 987 993 mnt_free_id(mnt); 988 - call_rcu(&mnt->mnt_rcu, delayed_free); 994 + call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt); 989 995 } 990 996 991 997 void mntput(struct vfsmount *mnt) ··· 1094 1100 struct proc_mounts *p = proc_mounts(m); 1095 1101 1096 1102 down_read(&namespace_sem); 1097 - return seq_list_start(&p->ns->list, *pos); 1103 + if (p->cached_event == p->ns->event) { 1104 + void *v = p->cached_mount; 1105 + if (*pos == p->cached_index) 1106 + return v; 1107 + if (*pos == p->cached_index + 1) { 1108 + v = seq_list_next(v, &p->ns->list, &p->cached_index); 1109 + return p->cached_mount = v; 1110 + } 1111 + } 1112 + 1113 + p->cached_event = p->ns->event; 1114 + p->cached_mount = seq_list_start(&p->ns->list, *pos); 1115 + p->cached_index = *pos; 1116 + return p->cached_mount; 1098 1117 } 1099 1118 1100 1119 static void *m_next(struct seq_file *m, void *v, loff_t *pos) 1101 1120 { 1102 1121 struct proc_mounts *p = proc_mounts(m); 1103 1122 1104 - return seq_list_next(v, &p->ns->list, pos); 1123 + p->cached_mount = seq_list_next(v, &p->ns->list, pos); 1124 + p->cached_index = *pos; 1125 + return p->cached_mount; 1105 1126 } 1106 1127 1107 1128 static void m_stop(struct seq_file *m, void *v) ··· 1670 1661 if (err) 1671 1662 goto out; 1672 1663 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); 1664 + lock_mount_hash(); 1673 1665 if (err) 1674 1666 goto out_cleanup_ids; 1675 - lock_mount_hash(); 1676 1667 for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 1677 1668 set_mnt_shared(p); 1678 1669 } else { ··· 1699 1690 return 0; 1700 1691 1701 1692 out_cleanup_ids: 1693 + while (!hlist_empty(&tree_list)) { 1694 + child = hlist_entry(tree_list.first, struct mount, mnt_hash); 1695 + umount_tree(child, 0); 1696 + } 1697 + unlock_mount_hash(); 1702 1698 cleanup_group_ids(source_mnt, NULL); 1703 1699 out: 1704 1700 return err; ··· 2058 2044 struct mount *parent; 2059 2045 int err; 2060 2046 2061 - mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT); 2047 + mnt_flags &= ~MNT_INTERNAL_FLAGS; 2062 2048 2063 2049 mp = lock_mount(path); 2064 2050 if (IS_ERR(mp))
+12 -38
fs/ncpfs/inode.c
··· 470 470 { 471 471 struct ncp_mount_data_kernel data; 472 472 struct ncp_server *server; 473 - struct file *ncp_filp; 474 473 struct inode *root_inode; 475 - struct inode *sock_inode; 476 474 struct socket *sock; 477 475 int error; 478 476 int default_bufsize; ··· 539 541 if (!uid_valid(data.mounted_uid) || !uid_valid(data.uid) || 540 542 !gid_valid(data.gid)) 541 543 goto out; 542 - error = -EBADF; 543 - ncp_filp = fget(data.ncp_fd); 544 - if (!ncp_filp) 545 - goto out; 546 - error = -ENOTSOCK; 547 - sock_inode = file_inode(ncp_filp); 548 - if (!S_ISSOCK(sock_inode->i_mode)) 549 - goto out_fput; 550 - sock = SOCKET_I(sock_inode); 544 + sock = sockfd_lookup(data.ncp_fd, &error); 551 545 if (!sock) 552 - goto out_fput; 553 - 546 + goto out; 547 + 554 548 if (sock->type == SOCK_STREAM) 555 549 default_bufsize = 0xF000; 556 550 else ··· 564 574 if (error) 565 575 goto out_fput; 566 576 567 - server->ncp_filp = ncp_filp; 568 577 server->ncp_sock = sock; 569 578 570 579 if (data.info_fd != -1) { 571 - struct socket *info_sock; 572 - 573 - error = -EBADF; 574 - server->info_filp = fget(data.info_fd); 575 - if (!server->info_filp) 576 - goto out_bdi; 577 - error = -ENOTSOCK; 578 - sock_inode = file_inode(server->info_filp); 579 - if (!S_ISSOCK(sock_inode->i_mode)) 580 - goto out_fput2; 581 - info_sock = SOCKET_I(sock_inode); 580 + struct socket *info_sock = sockfd_lookup(data.info_fd, &error); 582 581 if (!info_sock) 583 - goto out_fput2; 582 + goto out_bdi; 583 + server->info_sock = info_sock; 584 584 error = -EBADFD; 585 585 if (info_sock->type != SOCK_STREAM) 586 586 goto out_fput2; 587 - server->info_sock = info_sock; 588 587 } 589 588 590 589 /* server->lock = 0; */ ··· 745 766 mutex_destroy(&server->root_setup_lock); 746 767 mutex_destroy(&server->mutex); 747 768 out_fput2: 748 - if (server->info_filp) 749 - fput(server->info_filp); 769 + if (server->info_sock) 770 + sockfd_put(server->info_sock); 750 771 out_bdi: 751 772 bdi_destroy(&server->bdi); 752 773 out_fput: 753 - /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>: 754 - * 755 - * The previously used put_filp(ncp_filp); was bogus, since 756 - * it doesn't perform proper unlocking. 757 - */ 758 - fput(ncp_filp); 774 + sockfd_put(sock); 759 775 out: 760 776 put_pid(data.wdog_pid); 761 777 sb->s_fs_info = NULL; ··· 783 809 mutex_destroy(&server->root_setup_lock); 784 810 mutex_destroy(&server->mutex); 785 811 786 - if (server->info_filp) 787 - fput(server->info_filp); 788 - fput(server->ncp_filp); 812 + if (server->info_sock) 813 + sockfd_put(server->info_sock); 814 + sockfd_put(server->ncp_sock); 789 815 kill_pid(server->m.wdog_pid, SIGTERM, 1); 790 816 put_pid(server->m.wdog_pid); 791 817
-2
fs/ncpfs/ncp_fs_sb.h
··· 45 45 46 46 __u8 name_space[NCP_NUMBER_OF_VOLUMES + 2]; 47 47 48 - struct file *ncp_filp; /* File pointer to ncp socket */ 49 48 struct socket *ncp_sock;/* ncp socket */ 50 - struct file *info_filp; 51 49 struct socket *info_sock; 52 50 53 51 u8 sequence;
-2
fs/ntfs/inode.c
··· 1704 1704 iput(bvi); 1705 1705 skip_large_index_stuff: 1706 1706 /* Setup the operations for this index inode. */ 1707 - vi->i_op = NULL; 1708 - vi->i_fop = NULL; 1709 1707 vi->i_mapping->a_ops = &ntfs_mst_aops; 1710 1708 vi->i_blocks = ni->allocated_size >> 9; 1711 1709 /*
+11 -38
fs/ocfs2/cluster/tcp.c
··· 916 916 917 917 static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len) 918 918 { 919 - int ret; 920 - mm_segment_t oldfs; 921 - struct kvec vec = { 922 - .iov_len = len, 923 - .iov_base = data, 924 - }; 925 - struct msghdr msg = { 926 - .msg_iovlen = 1, 927 - .msg_iov = (struct iovec *)&vec, 928 - .msg_flags = MSG_DONTWAIT, 929 - }; 930 - 931 - oldfs = get_fs(); 932 - set_fs(get_ds()); 933 - ret = sock_recvmsg(sock, &msg, len, msg.msg_flags); 934 - set_fs(oldfs); 935 - 936 - return ret; 919 + struct kvec vec = { .iov_len = len, .iov_base = data, }; 920 + struct msghdr msg = { .msg_flags = MSG_DONTWAIT, }; 921 + return kernel_recvmsg(sock, &msg, &vec, 1, len, msg.msg_flags); 937 922 } 938 923 939 924 static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec, 940 925 size_t veclen, size_t total) 941 926 { 942 927 int ret; 943 - mm_segment_t oldfs; 944 - struct msghdr msg = { 945 - .msg_iov = (struct iovec *)vec, 946 - .msg_iovlen = veclen, 947 - }; 928 + struct msghdr msg; 948 929 949 930 if (sock == NULL) { 950 931 ret = -EINVAL; 951 932 goto out; 952 933 } 953 934 954 - oldfs = get_fs(); 955 - set_fs(get_ds()); 956 - ret = sock_sendmsg(sock, &msg, total); 957 - set_fs(oldfs); 958 - if (ret != total) { 959 - mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret, 960 - total); 961 - if (ret >= 0) 962 - ret = -EPIPE; /* should be smarter, I bet */ 963 - goto out; 964 - } 965 - 966 - ret = 0; 935 + ret = kernel_sendmsg(sock, &msg, vec, veclen, total); 936 + if (likely(ret == total)) 937 + return 0; 938 + mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret, total); 939 + if (ret >= 0) 940 + ret = -EPIPE; /* should be smarter, I bet */ 967 941 out: 968 - if (ret < 0) 969 - mlog(0, "returning error: %d\n", ret); 942 + mlog(0, "returning error: %d\n", ret); 970 943 return ret; 971 944 } 972 945
+6 -3
fs/ocfs2/file.c
··· 2367 2367 2368 2368 if (direct_io) { 2369 2369 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, 2370 - ppos, count, ocount); 2370 + count, ocount); 2371 2371 if (written < 0) { 2372 2372 ret = written; 2373 2373 goto out_dio; 2374 2374 } 2375 2375 } else { 2376 + struct iov_iter from; 2377 + iov_iter_init(&from, iov, nr_segs, count, 0); 2376 2378 current->backing_dev_info = file->f_mapping->backing_dev_info; 2377 - written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos, 2378 - ppos, count, 0); 2379 + written = generic_perform_write(file, &from, *ppos); 2380 + if (likely(written >= 0)) 2381 + iocb->ki_pos = *ppos + written; 2379 2382 current->backing_dev_info = NULL; 2380 2383 } 2381 2384
+16 -52
fs/open.c
··· 655 655 return error; 656 656 } 657 657 658 - /* 659 - * You have to be very careful that these write 660 - * counts get cleaned up in error cases and 661 - * upon __fput(). This should probably never 662 - * be called outside of __dentry_open(). 663 - */ 664 - static inline int __get_file_write_access(struct inode *inode, 665 - struct vfsmount *mnt) 666 - { 667 - int error; 668 - error = get_write_access(inode); 669 - if (error) 670 - return error; 671 - /* 672 - * Do not take mount writer counts on 673 - * special files since no writes to 674 - * the mount itself will occur. 675 - */ 676 - if (!special_file(inode->i_mode)) { 677 - /* 678 - * Balanced in __fput() 679 - */ 680 - error = __mnt_want_write(mnt); 681 - if (error) 682 - put_write_access(inode); 683 - } 684 - return error; 685 - } 686 - 687 658 int open_check_o_direct(struct file *f) 688 659 { 689 660 /* NB: we're sure to have correct a_ops only after f_op->open */ ··· 679 708 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | 680 709 FMODE_PREAD | FMODE_PWRITE; 681 710 682 - if (unlikely(f->f_flags & O_PATH)) 683 - f->f_mode = FMODE_PATH; 684 - 685 711 path_get(&f->f_path); 686 712 inode = f->f_inode = f->f_path.dentry->d_inode; 687 - if (f->f_mode & FMODE_WRITE) { 688 - error = __get_file_write_access(inode, f->f_path.mnt); 689 - if (error) 690 - goto cleanup_file; 691 - if (!special_file(inode->i_mode)) 692 - file_take_write(f); 693 - } 694 - 695 713 f->f_mapping = inode->i_mapping; 696 714 697 - if (unlikely(f->f_mode & FMODE_PATH)) { 715 + if (unlikely(f->f_flags & O_PATH)) { 716 + f->f_mode = FMODE_PATH; 698 717 f->f_op = &empty_fops; 699 718 return 0; 719 + } 720 + 721 + if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { 722 + error = get_write_access(inode); 723 + if (unlikely(error)) 724 + goto cleanup_file; 725 + error = __mnt_want_write(f->f_path.mnt); 726 + if (unlikely(error)) { 727 + put_write_access(inode); 728 + goto cleanup_file; 729 + } 730 + f->f_mode |= FMODE_WRITER; 700 731 } 701 732 702 733 /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */ ··· 737 764 738 765 cleanup_all: 739 766 fops_put(f->f_op); 740 - if (f->f_mode & FMODE_WRITE) { 767 + if (f->f_mode & FMODE_WRITER) { 741 768 put_write_access(inode); 742 - if (!special_file(inode->i_mode)) { 743 - /* 744 - * We don't consider this a real 745 - * mnt_want/drop_write() pair 746 - * because it all happenend right 747 - * here, so just reset the state. 748 - */ 749 - file_reset_write(f); 750 - __mnt_drop_write(f->f_path.mnt); 751 - } 769 + __mnt_drop_write(f->f_path.mnt); 752 770 } 753 771 cleanup_file: 754 772 path_put(&f->f_path);
+16 -117
fs/pipe.c
··· 142 142 return 0; 143 143 } 144 144 145 - static int 146 - pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len, 147 - int atomic) 148 - { 149 - unsigned long copy; 150 - 151 - while (len > 0) { 152 - while (!iov->iov_len) 153 - iov++; 154 - copy = min_t(unsigned long, len, iov->iov_len); 155 - 156 - if (atomic) { 157 - if (__copy_to_user_inatomic(iov->iov_base, from, copy)) 158 - return -EFAULT; 159 - } else { 160 - if (copy_to_user(iov->iov_base, from, copy)) 161 - return -EFAULT; 162 - } 163 - from += copy; 164 - len -= copy; 165 - iov->iov_base += copy; 166 - iov->iov_len -= copy; 167 - } 168 - return 0; 169 - } 170 - 171 - /* 172 - * Attempt to pre-fault in the user memory, so we can use atomic copies. 173 - * Returns the number of bytes not faulted in. 174 - */ 175 - static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len) 176 - { 177 - while (!iov->iov_len) 178 - iov++; 179 - 180 - while (len > 0) { 181 - unsigned long this_len; 182 - 183 - this_len = min_t(unsigned long, len, iov->iov_len); 184 - if (fault_in_pages_writeable(iov->iov_base, this_len)) 185 - break; 186 - 187 - len -= this_len; 188 - iov++; 189 - } 190 - 191 - return len; 192 - } 193 - 194 145 /* 195 146 * Pre-fault in the user memory, so we can use atomic copies. 196 147 */ ··· 175 224 else 176 225 page_cache_release(page); 177 226 } 178 - 179 - /** 180 - * generic_pipe_buf_map - virtually map a pipe buffer 181 - * @pipe: the pipe that the buffer belongs to 182 - * @buf: the buffer that should be mapped 183 - * @atomic: whether to use an atomic map 184 - * 185 - * Description: 186 - * This function returns a kernel virtual address mapping for the 187 - * pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided 188 - * and the caller has to be careful not to fault before calling 189 - * the unmap function. 190 - * 191 - * Note that this function calls kmap_atomic() if @atomic != 0. 192 - */ 193 - void *generic_pipe_buf_map(struct pipe_inode_info *pipe, 194 - struct pipe_buffer *buf, int atomic) 195 - { 196 - if (atomic) { 197 - buf->flags |= PIPE_BUF_FLAG_ATOMIC; 198 - return kmap_atomic(buf->page); 199 - } 200 - 201 - return kmap(buf->page); 202 - } 203 - EXPORT_SYMBOL(generic_pipe_buf_map); 204 - 205 - /** 206 - * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer 207 - * @pipe: the pipe that the buffer belongs to 208 - * @buf: the buffer that should be unmapped 209 - * @map_data: the data that the mapping function returned 210 - * 211 - * Description: 212 - * This function undoes the mapping that ->map() provided. 213 - */ 214 - void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, 215 - struct pipe_buffer *buf, void *map_data) 216 - { 217 - if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { 218 - buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; 219 - kunmap_atomic(map_data); 220 - } else 221 - kunmap(buf->page); 222 - } 223 - EXPORT_SYMBOL(generic_pipe_buf_unmap); 224 227 225 228 /** 226 229 * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer ··· 256 351 257 352 static const struct pipe_buf_operations anon_pipe_buf_ops = { 258 353 .can_merge = 1, 259 - .map = generic_pipe_buf_map, 260 - .unmap = generic_pipe_buf_unmap, 261 354 .confirm = generic_pipe_buf_confirm, 262 355 .release = anon_pipe_buf_release, 263 356 .steal = generic_pipe_buf_steal, ··· 264 361 265 362 static const struct pipe_buf_operations packet_pipe_buf_ops = { 266 363 .can_merge = 0, 267 - .map = generic_pipe_buf_map, 268 - .unmap = generic_pipe_buf_unmap, 269 364 .confirm = generic_pipe_buf_confirm, 270 365 .release = anon_pipe_buf_release, 271 366 .steal = generic_pipe_buf_steal, ··· 280 379 ssize_t ret; 281 380 struct iovec *iov = (struct iovec *)_iov; 282 381 size_t total_len; 382 + struct iov_iter iter; 283 383 284 384 total_len = iov_length(iov, nr_segs); 285 385 /* Null read succeeds. */ 286 386 if (unlikely(total_len == 0)) 287 387 return 0; 388 + 389 + iov_iter_init(&iter, iov, nr_segs, total_len, 0); 288 390 289 391 do_wakeup = 0; 290 392 ret = 0; ··· 298 394 int curbuf = pipe->curbuf; 299 395 struct pipe_buffer *buf = pipe->bufs + curbuf; 300 396 const struct pipe_buf_operations *ops = buf->ops; 301 - void *addr; 302 397 size_t chars = buf->len; 303 - int error, atomic; 398 + size_t written; 399 + int error; 304 400 305 401 if (chars > total_len) 306 402 chars = total_len; ··· 312 408 break; 313 409 } 314 410 315 - atomic = !iov_fault_in_pages_write(iov, chars); 316 - redo: 317 - addr = ops->map(pipe, buf, atomic); 318 - error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); 319 - ops->unmap(pipe, buf, addr); 320 - if (unlikely(error)) { 321 - /* 322 - * Just retry with the slow path if we failed. 323 - */ 324 - if (atomic) { 325 - atomic = 0; 326 - goto redo; 327 - } 411 + written = copy_page_to_iter(buf->page, buf->offset, chars, &iter); 412 + if (unlikely(written < chars)) { 328 413 if (!ret) 329 - ret = error; 414 + ret = -EFAULT; 330 415 break; 331 416 } 332 417 ret += chars; ··· 431 538 432 539 iov_fault_in_pages_read(iov, chars); 433 540 redo1: 434 - addr = ops->map(pipe, buf, atomic); 541 + if (atomic) 542 + addr = kmap_atomic(buf->page); 543 + else 544 + addr = kmap(buf->page); 435 545 error = pipe_iov_copy_from_user(offset + addr, iov, 436 546 chars, atomic); 437 - ops->unmap(pipe, buf, addr); 547 + if (atomic) 548 + kunmap_atomic(addr); 549 + else 550 + kunmap(buf->page); 438 551 ret = error; 439 552 do_wakeup = 1; 440 553 if (error) {
+119 -79
fs/pnode.c
··· 164 164 } 165 165 } 166 166 167 - /* 168 - * return the source mount to be used for cloning 169 - * 170 - * @dest the current destination mount 171 - * @last_dest the last seen destination mount 172 - * @last_src the last seen source mount 173 - * @type return CL_SLAVE if the new mount has to be 174 - * cloned as a slave. 175 - */ 176 - static struct mount *get_source(struct mount *dest, 177 - struct mount *last_dest, 178 - struct mount *last_src, 179 - int *type) 167 + static struct mount *next_group(struct mount *m, struct mount *origin) 180 168 { 181 - struct mount *p_last_src = NULL; 182 - struct mount *p_last_dest = NULL; 183 - 184 - while (last_dest != dest->mnt_master) { 185 - p_last_dest = last_dest; 186 - p_last_src = last_src; 187 - last_dest = last_dest->mnt_master; 188 - last_src = last_src->mnt_master; 189 - } 190 - 191 - if (p_last_dest) { 192 - do { 193 - p_last_dest = next_peer(p_last_dest); 194 - } while (IS_MNT_NEW(p_last_dest)); 195 - /* is that a peer of the earlier? */ 196 - if (dest == p_last_dest) { 197 - *type = CL_MAKE_SHARED; 198 - return p_last_src; 169 + while (1) { 170 + while (1) { 171 + struct mount *next; 172 + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) 173 + return first_slave(m); 174 + next = next_peer(m); 175 + if (m->mnt_group_id == origin->mnt_group_id) { 176 + if (next == origin) 177 + return NULL; 178 + } else if (m->mnt_slave.next != &next->mnt_slave) 179 + break; 180 + m = next; 199 181 } 182 + /* m is the last peer */ 183 + while (1) { 184 + struct mount *master = m->mnt_master; 185 + if (m->mnt_slave.next != &master->mnt_slave_list) 186 + return next_slave(m); 187 + m = next_peer(master); 188 + if (master->mnt_group_id == origin->mnt_group_id) 189 + break; 190 + if (master->mnt_slave.next == &m->mnt_slave) 191 + break; 192 + m = master; 193 + } 194 + if (m == origin) 195 + return NULL; 200 196 } 201 - /* slave of the earlier, then */ 202 - *type = CL_SLAVE; 203 - /* beginning of peer group among the slaves? */ 204 - if (IS_MNT_SHARED(dest)) 205 - *type |= CL_MAKE_SHARED; 206 - return last_src; 197 + } 198 + 199 + /* all accesses are serialized by namespace_sem */ 200 + static struct user_namespace *user_ns; 201 + static struct mount *last_dest, *last_source, *dest_master; 202 + static struct mountpoint *mp; 203 + static struct hlist_head *list; 204 + 205 + static int propagate_one(struct mount *m) 206 + { 207 + struct mount *child; 208 + int type; 209 + /* skip ones added by this propagate_mnt() */ 210 + if (IS_MNT_NEW(m)) 211 + return 0; 212 + /* skip if mountpoint isn't covered by it */ 213 + if (!is_subdir(mp->m_dentry, m->mnt.mnt_root)) 214 + return 0; 215 + if (m->mnt_group_id == last_dest->mnt_group_id) { 216 + type = CL_MAKE_SHARED; 217 + } else { 218 + struct mount *n, *p; 219 + for (n = m; ; n = p) { 220 + p = n->mnt_master; 221 + if (p == dest_master || IS_MNT_MARKED(p)) { 222 + while (last_dest->mnt_master != p) { 223 + last_source = last_source->mnt_master; 224 + last_dest = last_source->mnt_parent; 225 + } 226 + if (n->mnt_group_id != last_dest->mnt_group_id) { 227 + last_source = last_source->mnt_master; 228 + last_dest = last_source->mnt_parent; 229 + } 230 + break; 231 + } 232 + } 233 + type = CL_SLAVE; 234 + /* beginning of peer group among the slaves? */ 235 + if (IS_MNT_SHARED(m)) 236 + type |= CL_MAKE_SHARED; 237 + } 238 + 239 + /* Notice when we are propagating across user namespaces */ 240 + if (m->mnt_ns->user_ns != user_ns) 241 + type |= CL_UNPRIVILEGED; 242 + child = copy_tree(last_source, last_source->mnt.mnt_root, type); 243 + if (IS_ERR(child)) 244 + return PTR_ERR(child); 245 + mnt_set_mountpoint(m, mp, child); 246 + last_dest = m; 247 + last_source = child; 248 + if (m->mnt_master != dest_master) { 249 + read_seqlock_excl(&mount_lock); 250 + SET_MNT_MARK(m->mnt_master); 251 + read_sequnlock_excl(&mount_lock); 252 + } 253 + hlist_add_head(&child->mnt_hash, list); 254 + return 0; 207 255 } 208 256 209 257 /* ··· 270 222 int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, 271 223 struct mount *source_mnt, struct hlist_head *tree_list) 272 224 { 273 - struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; 274 - struct mount *m, *child; 225 + struct mount *m, *n; 275 226 int ret = 0; 276 - struct mount *prev_dest_mnt = dest_mnt; 277 - struct mount *prev_src_mnt = source_mnt; 278 - HLIST_HEAD(tmp_list); 279 227 280 - for (m = propagation_next(dest_mnt, dest_mnt); m; 281 - m = propagation_next(m, dest_mnt)) { 282 - int type; 283 - struct mount *source; 228 + /* 229 + * we don't want to bother passing tons of arguments to 230 + * propagate_one(); everything is serialized by namespace_sem, 231 + * so globals will do just fine. 232 + */ 233 + user_ns = current->nsproxy->mnt_ns->user_ns; 234 + last_dest = dest_mnt; 235 + last_source = source_mnt; 236 + mp = dest_mp; 237 + list = tree_list; 238 + dest_master = dest_mnt->mnt_master; 284 239 285 - if (IS_MNT_NEW(m)) 286 - continue; 287 - 288 - source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); 289 - 290 - /* Notice when we are propagating across user namespaces */ 291 - if (m->mnt_ns->user_ns != user_ns) 292 - type |= CL_UNPRIVILEGED; 293 - 294 - child = copy_tree(source, source->mnt.mnt_root, type); 295 - if (IS_ERR(child)) { 296 - ret = PTR_ERR(child); 297 - tmp_list = *tree_list; 298 - tmp_list.first->pprev = &tmp_list.first; 299 - INIT_HLIST_HEAD(tree_list); 240 + /* all peers of dest_mnt, except dest_mnt itself */ 241 + for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) { 242 + ret = propagate_one(n); 243 + if (ret) 300 244 goto out; 301 - } 245 + } 302 246 303 - if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) { 304 - mnt_set_mountpoint(m, dest_mp, child); 305 - hlist_add_head(&child->mnt_hash, tree_list); 306 - } else { 307 - /* 308 - * This can happen if the parent mount was bind mounted 309 - * on some subdirectory of a shared/slave mount. 310 - */ 311 - hlist_add_head(&child->mnt_hash, &tmp_list); 312 - } 313 - prev_dest_mnt = m; 314 - prev_src_mnt = child; 247 + /* all slave groups */ 248 + for (m = next_group(dest_mnt, dest_mnt); m; 249 + m = next_group(m, dest_mnt)) { 250 + /* everything in that slave group */ 251 + n = m; 252 + do { 253 + ret = propagate_one(n); 254 + if (ret) 255 + goto out; 256 + n = next_peer(n); 257 + } while (n != m); 315 258 } 316 259 out: 317 - lock_mount_hash(); 318 - while (!hlist_empty(&tmp_list)) { 319 - child = hlist_entry(tmp_list.first, struct mount, mnt_hash); 320 - umount_tree(child, 0); 260 + read_seqlock_excl(&mount_lock); 261 + hlist_for_each_entry(n, tree_list, mnt_hash) { 262 + m = n->mnt_parent; 263 + if (m->mnt_master != dest_mnt->mnt_master) 264 + CLEAR_MNT_MARK(m->mnt_master); 321 265 } 322 - unlock_mount_hash(); 266 + read_sequnlock_excl(&mount_lock); 323 267 return ret; 324 268 } 325 269
+3
fs/pnode.h
··· 16 16 #define IS_MNT_NEW(m) (!(m)->mnt_ns) 17 17 #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED) 18 18 #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE) 19 + #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) 20 + #define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED) 21 + #define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED) 19 22 20 23 #define CL_EXPIRE 0x01 21 24 #define CL_SLAVE 0x02
+4 -10
fs/proc/namespaces.c
··· 146 146 struct task_struct *task; 147 147 void *ns; 148 148 char name[50]; 149 - int len = -EACCES; 149 + int res = -EACCES; 150 150 151 151 task = get_proc_task(inode); 152 152 if (!task) ··· 155 155 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 156 156 goto out_put_task; 157 157 158 - len = -ENOENT; 158 + res = -ENOENT; 159 159 ns = ns_ops->get(task); 160 160 if (!ns) 161 161 goto out_put_task; 162 162 163 163 snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns)); 164 - len = strlen(name); 165 - 166 - if (len > buflen) 167 - len = buflen; 168 - if (copy_to_user(buffer, name, len)) 169 - len = -EFAULT; 170 - 164 + res = readlink_copy(buffer, buflen, name); 171 165 ns_ops->put(ns); 172 166 out_put_task: 173 167 put_task_struct(task); 174 168 out: 175 - return len; 169 + return res; 176 170 } 177 171 178 172 static const struct inode_operations proc_ns_link_inode_operations = {
+1 -1
fs/proc/self.c
··· 16 16 if (!tgid) 17 17 return -ENOENT; 18 18 sprintf(tmp, "%d", tgid); 19 - return vfs_readlink(dentry,buffer,buflen,tmp); 19 + return readlink_copy(buffer, buflen, tmp); 20 20 } 21 21 22 22 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+1
fs/proc_namespace.c
··· 267 267 p->root = root; 268 268 p->m.poll_event = ns->event; 269 269 p->show = show; 270 + p->cached_event = ~0ULL; 270 271 271 272 return 0; 272 273
+27 -103
fs/splice.c
··· 136 136 137 137 const struct pipe_buf_operations page_cache_pipe_buf_ops = { 138 138 .can_merge = 0, 139 - .map = generic_pipe_buf_map, 140 - .unmap = generic_pipe_buf_unmap, 141 139 .confirm = page_cache_pipe_buf_confirm, 142 140 .release = page_cache_pipe_buf_release, 143 141 .steal = page_cache_pipe_buf_steal, ··· 154 156 155 157 static const struct pipe_buf_operations user_page_pipe_buf_ops = { 156 158 .can_merge = 0, 157 - .map = generic_pipe_buf_map, 158 - .unmap = generic_pipe_buf_unmap, 159 159 .confirm = generic_pipe_buf_confirm, 160 160 .release = page_cache_pipe_buf_release, 161 161 .steal = user_page_pipe_buf_steal, ··· 543 547 544 548 static const struct pipe_buf_operations default_pipe_buf_ops = { 545 549 .can_merge = 0, 546 - .map = generic_pipe_buf_map, 547 - .unmap = generic_pipe_buf_unmap, 548 550 .confirm = generic_pipe_buf_confirm, 549 551 .release = generic_pipe_buf_release, 550 552 .steal = generic_pipe_buf_steal, ··· 558 564 /* Pipe buffer operations for a socket and similar. */ 559 565 const struct pipe_buf_operations nosteal_pipe_buf_ops = { 560 566 .can_merge = 0, 561 - .map = generic_pipe_buf_map, 562 - .unmap = generic_pipe_buf_unmap, 563 567 .confirm = generic_pipe_buf_confirm, 564 568 .release = generic_pipe_buf_release, 565 569 .steal = generic_pipe_buf_nosteal, ··· 759 767 goto out; 760 768 761 769 if (buf->page != page) { 762 - char *src = buf->ops->map(pipe, buf, 1); 770 + char *src = kmap_atomic(buf->page); 763 771 char *dst = kmap_atomic(page); 764 772 765 773 memcpy(dst + offset, src + buf->offset, this_len); 766 774 flush_dcache_page(page); 767 775 kunmap_atomic(dst); 768 - buf->ops->unmap(pipe, buf, src); 776 + kunmap_atomic(src); 769 777 } 770 778 ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, 771 779 page, fsdata); ··· 1059 1067 void *data; 1060 1068 loff_t tmp = sd->pos; 1061 1069 1062 - data = buf->ops->map(pipe, buf, 0); 1070 + data = kmap(buf->page); 1063 1071 ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp); 1064 - buf->ops->unmap(pipe, buf, data); 1072 + kunmap(buf->page); 1065 1073 1066 1074 return ret; 1067 1075 } ··· 1520 1528 static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 1521 1529 struct splice_desc *sd) 1522 1530 { 1523 - char *src; 1524 - int ret; 1525 - 1526 - /* 1527 - * See if we can use the atomic maps, by prefaulting in the 1528 - * pages and doing an atomic copy 1529 - */ 1530 - if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) { 1531 - src = buf->ops->map(pipe, buf, 1); 1532 - ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset, 1533 - sd->len); 1534 - buf->ops->unmap(pipe, buf, src); 1535 - if (!ret) { 1536 - ret = sd->len; 1537 - goto out; 1538 - } 1539 - } 1540 - 1541 - /* 1542 - * No dice, use slow non-atomic map and copy 1543 - */ 1544 - src = buf->ops->map(pipe, buf, 0); 1545 - 1546 - ret = sd->len; 1547 - if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len)) 1548 - ret = -EFAULT; 1549 - 1550 - buf->ops->unmap(pipe, buf, src); 1551 - out: 1552 - if (ret > 0) 1553 - sd->u.userptr += ret; 1554 - return ret; 1531 + int n = copy_page_to_iter(buf->page, buf->offset, sd->len, sd->u.data); 1532 + return n == sd->len ? n : -EFAULT; 1555 1533 } 1556 1534 1557 1535 /* 1558 1536 * For lack of a better implementation, implement vmsplice() to userspace 1559 1537 * as a simple copy of the pipes pages to the user iov. 1560 1538 */ 1561 - static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, 1539 + static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov, 1562 1540 unsigned long nr_segs, unsigned int flags) 1563 1541 { 1564 1542 struct pipe_inode_info *pipe; 1565 1543 struct splice_desc sd; 1566 - ssize_t size; 1567 - int error; 1568 1544 long ret; 1545 + struct iovec iovstack[UIO_FASTIOV]; 1546 + struct iovec *iov = iovstack; 1547 + struct iov_iter iter; 1548 + ssize_t count = 0; 1569 1549 1570 1550 pipe = get_pipe_info(file); 1571 1551 if (!pipe) 1572 1552 return -EBADF; 1573 1553 1554 + ret = rw_copy_check_uvector(READ, uiov, nr_segs, 1555 + ARRAY_SIZE(iovstack), iovstack, &iov); 1556 + if (ret <= 0) 1557 + return ret; 1558 + 1559 + iov_iter_init(&iter, iov, nr_segs, count, 0); 1560 + 1561 + sd.len = 0; 1562 + sd.total_len = count; 1563 + sd.flags = flags; 1564 + sd.u.data = &iter; 1565 + sd.pos = 0; 1566 + 1574 1567 pipe_lock(pipe); 1575 - 1576 - error = ret = 0; 1577 - while (nr_segs) { 1578 - void __user *base; 1579 - size_t len; 1580 - 1581 - /* 1582 - * Get user address base and length for this iovec. 1583 - */ 1584 - error = get_user(base, &iov->iov_base); 1585 - if (unlikely(error)) 1586 - break; 1587 - error = get_user(len, &iov->iov_len); 1588 - if (unlikely(error)) 1589 - break; 1590 - 1591 - /* 1592 - * Sanity check this iovec. 0 read succeeds. 1593 - */ 1594 - if (unlikely(!len)) 1595 - break; 1596 - if (unlikely(!base)) { 1597 - error = -EFAULT; 1598 - break; 1599 - } 1600 - 1601 - if (unlikely(!access_ok(VERIFY_WRITE, base, len))) { 1602 - error = -EFAULT; 1603 - break; 1604 - } 1605 - 1606 - sd.len = 0; 1607 - sd.total_len = len; 1608 - sd.flags = flags; 1609 - sd.u.userptr = base; 1610 - sd.pos = 0; 1611 - 1612 - size = __splice_from_pipe(pipe, &sd, pipe_to_user); 1613 - if (size < 0) { 1614 - if (!ret) 1615 - ret = size; 1616 - 1617 - break; 1618 - } 1619 - 1620 - ret += size; 1621 - 1622 - if (size < len) 1623 - break; 1624 - 1625 - nr_segs--; 1626 - iov++; 1627 - } 1628 - 1568 + ret = __splice_from_pipe(pipe, &sd, pipe_to_user); 1629 1569 pipe_unlock(pipe); 1630 1570 1631 - if (!ret) 1632 - ret = error; 1571 + if (iov != iovstack) 1572 + kfree(iov); 1633 1573 1634 1574 return ret; 1635 1575 }
+1 -1
fs/udf/file.c
··· 171 171 } else 172 172 up_write(&iinfo->i_data_sem); 173 173 174 - retval = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 174 + retval = __generic_file_aio_write(iocb, iov, nr_segs); 175 175 mutex_unlock(&inode->i_mutex); 176 176 177 177 if (retval > 0) {
+7 -6
fs/xfs/xfs_file.c
··· 699 699 700 700 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); 701 701 ret = generic_file_direct_write(iocb, iovp, 702 - &nr_segs, pos, &iocb->ki_pos, count, ocount); 702 + &nr_segs, pos, count, ocount); 703 703 704 704 out: 705 705 xfs_rw_iunlock(ip, iolock); ··· 715 715 const struct iovec *iovp, 716 716 unsigned long nr_segs, 717 717 loff_t pos, 718 - size_t ocount) 718 + size_t count) 719 719 { 720 720 struct file *file = iocb->ki_filp; 721 721 struct address_space *mapping = file->f_mapping; ··· 724 724 ssize_t ret; 725 725 int enospc = 0; 726 726 int iolock = XFS_IOLOCK_EXCL; 727 - size_t count = ocount; 727 + struct iov_iter from; 728 728 729 729 xfs_rw_ilock(ip, iolock); 730 730 ··· 732 732 if (ret) 733 733 goto out; 734 734 735 + iov_iter_init(&from, iovp, nr_segs, count, 0); 735 736 /* We can write back this queue in page reclaim */ 736 737 current->backing_dev_info = mapping->backing_dev_info; 737 738 738 739 write_retry: 739 740 trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); 740 - ret = generic_file_buffered_write(iocb, iovp, nr_segs, 741 - pos, &iocb->ki_pos, count, 0); 742 - 741 + ret = generic_perform_write(file, &from, pos); 742 + if (likely(ret >= 0)) 743 + iocb->ki_pos = pos + ret; 743 744 /* 744 745 * If we just got an ENOSPC, try to write back all dirty inodes to 745 746 * convert delalloc space to free up some of the excess reserved
+1 -27
fs/xfs/xfs_ioctl.c
··· 271 271 return error; 272 272 } 273 273 274 - /* 275 - * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's 276 - * unused first argument. 277 - */ 278 - STATIC int 279 - do_readlink( 280 - char __user *buffer, 281 - int buflen, 282 - const char *link) 283 - { 284 - int len; 285 - 286 - len = PTR_ERR(link); 287 - if (IS_ERR(link)) 288 - goto out; 289 - 290 - len = strlen(link); 291 - if (len > (unsigned) buflen) 292 - len = buflen; 293 - if (copy_to_user(buffer, link, len)) 294 - len = -EFAULT; 295 - out: 296 - return len; 297 - } 298 - 299 - 300 274 int 301 275 xfs_readlink_by_handle( 302 276 struct file *parfilp, ··· 308 334 error = -xfs_readlink(XFS_I(dentry->d_inode), link); 309 335 if (error) 310 336 goto out_kfree; 311 - error = do_readlink(hreq->ohandle, olen, link); 337 + error = readlink_copy(hreq->ohandle, olen, link); 312 338 if (error) 313 339 goto out_kfree; 314 340
+3 -2
include/linux/bio.h
··· 388 388 struct rq_map_data; 389 389 extern struct bio *bio_map_user_iov(struct request_queue *, 390 390 struct block_device *, 391 - struct sg_iovec *, int, int, gfp_t); 391 + const struct sg_iovec *, int, int, gfp_t); 392 392 extern void bio_unmap_user(struct bio *); 393 393 extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, 394 394 gfp_t); ··· 414 414 extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, 415 415 unsigned long, unsigned int, int, gfp_t); 416 416 extern struct bio *bio_copy_user_iov(struct request_queue *, 417 - struct rq_map_data *, struct sg_iovec *, 417 + struct rq_map_data *, 418 + const struct sg_iovec *, 418 419 int, int, gfp_t); 419 420 extern int bio_uncopy_user(struct bio *); 420 421 void zero_fill_bio(struct bio *bio);
+2 -2
include/linux/blkdev.h
··· 835 835 extern int blk_rq_unmap_user(struct bio *); 836 836 extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); 837 837 extern int blk_rq_map_user_iov(struct request_queue *, struct request *, 838 - struct rq_map_data *, struct sg_iovec *, int, 839 - unsigned int, gfp_t); 838 + struct rq_map_data *, const struct sg_iovec *, 839 + int, unsigned int, gfp_t); 840 840 extern int blk_execute_rq(struct request_queue *, struct gendisk *, 841 841 struct request *, int); 842 842 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
+2 -2
include/linux/buffer_head.h
··· 210 210 int block_write_full_page_endio(struct page *page, get_block_t *get_block, 211 211 struct writeback_control *wbc, bh_end_io_t *handler); 212 212 int block_read_full_page(struct page*, get_block_t*); 213 - int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, 214 - unsigned long from); 213 + int block_is_partially_uptodate(struct page *page, unsigned long from, 214 + unsigned long count); 215 215 int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, 216 216 unsigned flags, struct page **pagep, get_block_t *get_block); 217 217 int __block_write_begin(struct page *page, loff_t pos, unsigned len,
-2
include/linux/fdtable.h
··· 63 63 struct vfsmount; 64 64 struct dentry; 65 65 66 - extern void __init files_defer_init(void); 67 - 68 66 #define rcu_dereference_check_fdtable(files, fdtfd) \ 69 67 rcu_dereference_check((fdtfd), lockdep_is_held(&(files)->file_lock)) 70 68
+8 -89
include/linux/fs.h
··· 48 48 struct swap_info_struct; 49 49 struct seq_file; 50 50 struct workqueue_struct; 51 + struct iov_iter; 51 52 52 53 extern void __init inode_init(void); 53 54 extern void __init inode_init_early(void); ··· 126 125 127 126 /* File needs atomic accesses to f_pos */ 128 127 #define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) 128 + /* Write access to underlying fs */ 129 + #define FMODE_WRITER ((__force fmode_t)0x10000) 129 130 130 131 /* File was opened by fanotify and shouldn't generate fanotify events */ 131 132 #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) ··· 296 293 struct address_space; 297 294 struct writeback_control; 298 295 299 - struct iov_iter { 300 - const struct iovec *iov; 301 - unsigned long nr_segs; 302 - size_t iov_offset; 303 - size_t count; 304 - }; 305 - 306 - size_t iov_iter_copy_from_user_atomic(struct page *page, 307 - struct iov_iter *i, unsigned long offset, size_t bytes); 308 - size_t iov_iter_copy_from_user(struct page *page, 309 - struct iov_iter *i, unsigned long offset, size_t bytes); 310 - void iov_iter_advance(struct iov_iter *i, size_t bytes); 311 - int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); 312 - size_t iov_iter_single_seg_count(const struct iov_iter *i); 313 - 314 - static inline void iov_iter_init(struct iov_iter *i, 315 - const struct iovec *iov, unsigned long nr_segs, 316 - size_t count, size_t written) 317 - { 318 - i->iov = iov; 319 - i->nr_segs = nr_segs; 320 - i->iov_offset = 0; 321 - i->count = count + written; 322 - 323 - iov_iter_advance(i, written); 324 - } 325 - 326 - static inline size_t iov_iter_count(struct iov_iter *i) 327 - { 328 - return i->count; 329 - } 330 - 331 296 /* 332 297 * "descriptor" for what we're up to with a read. 333 298 * This allows us to use the same read code yet ··· 354 383 int (*migratepage) (struct address_space *, 355 384 struct page *, struct page *, enum migrate_mode); 356 385 int (*launder_page) (struct page *); 357 - int (*is_partially_uptodate) (struct page *, read_descriptor_t *, 386 + int (*is_partially_uptodate) (struct page *, unsigned long, 358 387 unsigned long); 359 388 void (*is_dirty_writeback) (struct page *, bool *, bool *); 360 389 int (*error_remove_page)(struct address_space *, struct page *); ··· 741 770 index < ra->start + ra->size); 742 771 } 743 772 744 - #define FILE_MNT_WRITE_TAKEN 1 745 - #define FILE_MNT_WRITE_RELEASED 2 746 - 747 773 struct file { 748 774 union { 749 775 struct llist_node fu_llist; ··· 778 810 struct list_head f_tfile_llink; 779 811 #endif /* #ifdef CONFIG_EPOLL */ 780 812 struct address_space *f_mapping; 781 - #ifdef CONFIG_DEBUG_WRITECOUNT 782 - unsigned long f_mnt_write_state; 783 - #endif 784 813 } __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ 785 814 786 815 struct file_handle { ··· 794 829 } 795 830 #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) 796 831 #define file_count(x) atomic_long_read(&(x)->f_count) 797 - 798 - #ifdef CONFIG_DEBUG_WRITECOUNT 799 - static inline void file_take_write(struct file *f) 800 - { 801 - WARN_ON(f->f_mnt_write_state != 0); 802 - f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN; 803 - } 804 - static inline void file_release_write(struct file *f) 805 - { 806 - f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED; 807 - } 808 - static inline void file_reset_write(struct file *f) 809 - { 810 - f->f_mnt_write_state = 0; 811 - } 812 - static inline void file_check_state(struct file *f) 813 - { 814 - /* 815 - * At this point, either both or neither of these bits 816 - * should be set. 817 - */ 818 - WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN); 819 - WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED); 820 - } 821 - static inline int file_check_writeable(struct file *f) 822 - { 823 - if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN) 824 - return 0; 825 - printk(KERN_WARNING "writeable file with no " 826 - "mnt_want_write()\n"); 827 - WARN_ON(1); 828 - return -EINVAL; 829 - } 830 - #else /* !CONFIG_DEBUG_WRITECOUNT */ 831 - static inline void file_take_write(struct file *filp) {} 832 - static inline void file_release_write(struct file *filp) {} 833 - static inline void file_reset_write(struct file *filp) {} 834 - static inline void file_check_state(struct file *filp) {} 835 - static inline int file_check_writeable(struct file *filp) 836 - { 837 - return 0; 838 - } 839 - #endif /* CONFIG_DEBUG_WRITECOUNT */ 840 832 841 833 #define MAX_NON_LFS ((1UL<<31) - 1) 842 834 ··· 2403 2481 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); 2404 2482 extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, 2405 2483 unsigned long size, pgoff_t pgoff); 2406 - extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); 2407 2484 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); 2408 2485 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); 2409 - extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, 2410 - loff_t *); 2486 + extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long); 2411 2487 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); 2412 2488 extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, 2413 - unsigned long *, loff_t, loff_t *, size_t, size_t); 2414 - extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, 2415 - unsigned long, loff_t, loff_t *, size_t, ssize_t); 2489 + unsigned long *, loff_t, size_t, size_t); 2490 + extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); 2416 2491 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); 2417 2492 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); 2418 2493 extern int generic_segment_checks(const struct iovec *iov, ··· 2501 2582 2502 2583 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) 2503 2584 2504 - extern int vfs_readlink(struct dentry *, char __user *, int, const char *); 2585 + extern int readlink_copy(char __user *, int, const char *); 2505 2586 extern int page_readlink(struct dentry *, char __user *, int); 2506 2587 extern void *page_follow_link_light(struct dentry *, struct nameidata *); 2507 2588 extern void page_put_link(struct dentry *, struct nameidata *, void *);
+3
include/linux/mount.h
··· 44 44 #define MNT_SHARED_MASK (MNT_UNBINDABLE) 45 45 #define MNT_PROPAGATION_MASK (MNT_SHARED | MNT_UNBINDABLE) 46 46 47 + #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ 48 + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) 47 49 48 50 #define MNT_INTERNAL 0x4000 49 51 ··· 53 51 #define MNT_LOCKED 0x800000 54 52 #define MNT_DOOMED 0x1000000 55 53 #define MNT_SYNC_UMOUNT 0x2000000 54 + #define MNT_MARKED 0x4000000 56 55 57 56 struct vfsmount { 58 57 struct dentry *mnt_root; /* root of the mounted tree */
+1 -2
include/linux/nbd.h
··· 24 24 struct nbd_device { 25 25 int flags; 26 26 int harderror; /* Code of hard error */ 27 - struct socket * sock; 28 - struct file * file; /* If == NULL, device is not ready, yet */ 27 + struct socket * sock; /* If == NULL, device is not ready, yet */ 29 28 int magic; 30 29 31 30 spinlock_t queue_lock;
-19
include/linux/pipe_fs_i.h
··· 83 83 int can_merge; 84 84 85 85 /* 86 - * ->map() returns a virtual address mapping of the pipe buffer. 87 - * The last integer flag reflects whether this should be an atomic 88 - * mapping or not. The atomic map is faster, however you can't take 89 - * page faults before calling ->unmap() again. So if you need to eg 90 - * access user data through copy_to/from_user(), then you must get 91 - * a non-atomic map. ->map() uses the kmap_atomic slot for 92 - * atomic maps, you have to be careful if mapping another page as 93 - * source or destination for a copy. 94 - */ 95 - void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int); 96 - 97 - /* 98 - * Undoes ->map(), finishes the virtual mapping of the pipe buffer. 99 - */ 100 - void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *); 101 - 102 - /* 103 86 * ->confirm() verifies that the data in the pipe buffer is there 104 87 * and that the contents are good. If the pages in the pipe belong 105 88 * to a file system, we may need to wait for IO completion in this ··· 133 150 void free_pipe_info(struct pipe_inode_info *); 134 151 135 152 /* Generic pipe buffer ops functions */ 136 - void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int); 137 - void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *); 138 153 void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); 139 154 int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); 140 155 int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
+52
include/linux/uio.h
··· 9 9 #ifndef __LINUX_UIO_H 10 10 #define __LINUX_UIO_H 11 11 12 + #include <linux/kernel.h> 12 13 #include <uapi/linux/uio.h> 13 14 15 + struct page; 14 16 15 17 struct kvec { 16 18 void *iov_base; /* and that should *never* hold a userland pointer */ 17 19 size_t iov_len; 20 + }; 21 + 22 + struct iov_iter { 23 + const struct iovec *iov; 24 + unsigned long nr_segs; 25 + size_t iov_offset; 26 + size_t count; 18 27 }; 19 28 20 29 /* ··· 43 34 return ret; 44 35 } 45 36 37 + static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) 38 + { 39 + return (struct iovec) { 40 + .iov_base = iter->iov->iov_base + iter->iov_offset, 41 + .iov_len = min(iter->count, 42 + iter->iov->iov_len - iter->iov_offset), 43 + }; 44 + } 45 + 46 + #define iov_for_each(iov, iter, start) \ 47 + for (iter = (start); \ 48 + (iter).count && \ 49 + ((iov = iov_iter_iovec(&(iter))), 1); \ 50 + iov_iter_advance(&(iter), (iov).iov_len)) 51 + 46 52 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to); 53 + 54 + size_t iov_iter_copy_from_user_atomic(struct page *page, 55 + struct iov_iter *i, unsigned long offset, size_t bytes); 56 + size_t iov_iter_copy_from_user(struct page *page, 57 + struct iov_iter *i, unsigned long offset, size_t bytes); 58 + void iov_iter_advance(struct iov_iter *i, size_t bytes); 59 + int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); 60 + size_t iov_iter_single_seg_count(const struct iov_iter *i); 61 + size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 62 + struct iov_iter *i); 63 + 64 + static inline void iov_iter_init(struct iov_iter *i, 65 + const struct iovec *iov, unsigned long nr_segs, 66 + size_t count, size_t written) 67 + { 68 + i->iov = iov; 69 + i->nr_segs = nr_segs; 70 + i->iov_offset = 0; 71 + i->count = count + written; 72 + 73 + iov_iter_advance(i, written); 74 + } 75 + 76 + static inline size_t iov_iter_count(struct iov_iter *i) 77 + { 78 + return i->count; 79 + } 47 80 48 81 int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len); 49 82 int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len); 83 + 50 84 #endif
+1 -3
kernel/relay.c
··· 1195 1195 1196 1196 static const struct pipe_buf_operations relay_pipe_buf_ops = { 1197 1197 .can_merge = 0, 1198 - .map = generic_pipe_buf_map, 1199 - .unmap = generic_pipe_buf_unmap, 1200 1198 .confirm = generic_pipe_buf_confirm, 1201 1199 .release = relay_pipe_buf_release, 1202 1200 .steal = generic_pipe_buf_steal, ··· 1251 1253 subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT; 1252 1254 pidx = (read_start / PAGE_SIZE) % subbuf_pages; 1253 1255 poff = read_start & ~PAGE_MASK; 1254 - nr_pages = min_t(unsigned int, subbuf_pages, pipe->buffers); 1256 + nr_pages = min_t(unsigned int, subbuf_pages, spd.nr_pages_max); 1255 1257 1256 1258 for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) { 1257 1259 unsigned int this_len, this_end, private;
+2 -6
kernel/trace/trace.c
··· 4392 4392 4393 4393 static const struct pipe_buf_operations tracing_pipe_buf_ops = { 4394 4394 .can_merge = 0, 4395 - .map = generic_pipe_buf_map, 4396 - .unmap = generic_pipe_buf_unmap, 4397 4395 .confirm = generic_pipe_buf_confirm, 4398 4396 .release = generic_pipe_buf_release, 4399 4397 .steal = generic_pipe_buf_steal, ··· 4486 4488 trace_access_lock(iter->cpu_file); 4487 4489 4488 4490 /* Fill as many pages as possible. */ 4489 - for (i = 0, rem = len; i < pipe->buffers && rem; i++) { 4491 + for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) { 4490 4492 spd.pages[i] = alloc_page(GFP_KERNEL); 4491 4493 if (!spd.pages[i]) 4492 4494 break; ··· 5279 5281 /* Pipe buffer operations for a buffer. */ 5280 5282 static const struct pipe_buf_operations buffer_pipe_buf_ops = { 5281 5283 .can_merge = 0, 5282 - .map = generic_pipe_buf_map, 5283 - .unmap = generic_pipe_buf_unmap, 5284 5284 .confirm = generic_pipe_buf_confirm, 5285 5285 .release = buffer_pipe_buf_release, 5286 5286 .steal = generic_pipe_buf_steal, ··· 5354 5358 trace_access_lock(iter->cpu_file); 5355 5359 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file); 5356 5360 5357 - for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) { 5361 + for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) { 5358 5362 struct page *page; 5359 5363 int r; 5360 5364
-10
lib/Kconfig.debug
··· 1045 1045 of the BUG call as well as the EIP and oops trace. This aids 1046 1046 debugging but costs about 70-100K of memory. 1047 1047 1048 - config DEBUG_WRITECOUNT 1049 - bool "Debug filesystem writers count" 1050 - depends on DEBUG_KERNEL 1051 - help 1052 - Enable this to catch wrong use of the writers count in struct 1053 - vfsmount. This will increase the size of each file struct by 1054 - 32 bits. 1055 - 1056 - If unsure, say N. 1057 - 1058 1048 config DEBUG_LIST 1059 1049 bool "Debug linked list manipulation" 1060 1050 depends on DEBUG_KERNEL
+2 -1
mm/Makefile
··· 17 17 util.o mmzone.o vmstat.o backing-dev.o \ 18 18 mm_init.o mmu_context.o percpu.o slab_common.o \ 19 19 compaction.o balloon_compaction.o vmacache.o \ 20 - interval_tree.o list_lru.o workingset.o $(mmu-y) 20 + interval_tree.o list_lru.o workingset.o \ 21 + iov_iter.o $(mmu-y) 21 22 22 23 obj-y += init-mm.o 23 24
+59 -285
mm/filemap.c
··· 77 77 * ->mmap_sem 78 78 * ->lock_page (access_process_vm) 79 79 * 80 - * ->i_mutex (generic_file_buffered_write) 80 + * ->i_mutex (generic_perform_write) 81 81 * ->mmap_sem (fault_in_pages_readable->do_page_fault) 82 82 * 83 83 * bdi->wb.list_lock ··· 1428 1428 * do_generic_file_read - generic file read routine 1429 1429 * @filp: the file to read 1430 1430 * @ppos: current file position 1431 - * @desc: read_descriptor 1431 + * @iter: data destination 1432 + * @written: already copied 1432 1433 * 1433 1434 * This is a generic file read routine, and uses the 1434 1435 * mapping->a_ops->readpage() function for the actual low-level stuff. ··· 1437 1436 * This is really ugly. But the goto's actually try to clarify some 1438 1437 * of the logic when it comes to error handling etc. 1439 1438 */ 1440 - static void do_generic_file_read(struct file *filp, loff_t *ppos, 1441 - read_descriptor_t *desc) 1439 + static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, 1440 + struct iov_iter *iter, ssize_t written) 1442 1441 { 1443 1442 struct address_space *mapping = filp->f_mapping; 1444 1443 struct inode *inode = mapping->host; ··· 1448 1447 pgoff_t prev_index; 1449 1448 unsigned long offset; /* offset into pagecache page */ 1450 1449 unsigned int prev_offset; 1451 - int error; 1450 + int error = 0; 1452 1451 1453 1452 index = *ppos >> PAGE_CACHE_SHIFT; 1454 1453 prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; 1455 1454 prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); 1456 - last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; 1455 + last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; 1457 1456 offset = *ppos & ~PAGE_CACHE_MASK; 1458 1457 1459 1458 for (;;) { ··· 1488 1487 if (!page->mapping) 1489 1488 goto page_not_up_to_date_locked; 1490 1489 if (!mapping->a_ops->is_partially_uptodate(page, 1491 - desc, offset)) 1490 + offset, iter->count)) 1492 1491 goto page_not_up_to_date_locked; 1493 1492 unlock_page(page); 1494 1493 } ··· 1538 1537 /* 1539 1538 * Ok, we have the page, and it's up-to-date, so 1540 1539 * now we can copy it to user space... 1541 - * 1542 - * The file_read_actor routine returns how many bytes were 1543 - * actually used.. 1544 - * NOTE! This may not be the same as how much of a user buffer 1545 - * we filled up (we may be padding etc), so we can only update 1546 - * "pos" here (the actor routine has to update the user buffer 1547 - * pointers and the remaining count). 1548 1540 */ 1549 - ret = file_read_actor(desc, page, offset, nr); 1541 + 1542 + ret = copy_page_to_iter(page, offset, nr, iter); 1550 1543 offset += ret; 1551 1544 index += offset >> PAGE_CACHE_SHIFT; 1552 1545 offset &= ~PAGE_CACHE_MASK; 1553 1546 prev_offset = offset; 1554 1547 1555 1548 page_cache_release(page); 1556 - if (ret == nr && desc->count) 1557 - continue; 1558 - goto out; 1549 + written += ret; 1550 + if (!iov_iter_count(iter)) 1551 + goto out; 1552 + if (ret < nr) { 1553 + error = -EFAULT; 1554 + goto out; 1555 + } 1556 + continue; 1559 1557 1560 1558 page_not_up_to_date: 1561 1559 /* Get exclusive access to the page ... */ ··· 1589 1589 if (unlikely(error)) { 1590 1590 if (error == AOP_TRUNCATED_PAGE) { 1591 1591 page_cache_release(page); 1592 + error = 0; 1592 1593 goto find_page; 1593 1594 } 1594 1595 goto readpage_error; ··· 1620 1619 1621 1620 readpage_error: 1622 1621 /* UHHUH! A synchronous read error occurred. Report it */ 1623 - desc->error = error; 1624 1622 page_cache_release(page); 1625 1623 goto out; 1626 1624 ··· 1630 1630 */ 1631 1631 page = page_cache_alloc_cold(mapping); 1632 1632 if (!page) { 1633 - desc->error = -ENOMEM; 1633 + error = -ENOMEM; 1634 1634 goto out; 1635 1635 } 1636 1636 error = add_to_page_cache_lru(page, mapping, 1637 1637 index, GFP_KERNEL); 1638 1638 if (error) { 1639 1639 page_cache_release(page); 1640 - if (error == -EEXIST) 1640 + if (error == -EEXIST) { 1641 + error = 0; 1641 1642 goto find_page; 1642 - desc->error = error; 1643 + } 1643 1644 goto out; 1644 1645 } 1645 1646 goto readpage; ··· 1653 1652 1654 1653 *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; 1655 1654 file_accessed(filp); 1656 - } 1657 - 1658 - int file_read_actor(read_descriptor_t *desc, struct page *page, 1659 - unsigned long offset, unsigned long size) 1660 - { 1661 - char *kaddr; 1662 - unsigned long left, count = desc->count; 1663 - 1664 - if (size > count) 1665 - size = count; 1666 - 1667 - /* 1668 - * Faults on the destination of a read are common, so do it before 1669 - * taking the kmap. 1670 - */ 1671 - if (!fault_in_pages_writeable(desc->arg.buf, size)) { 1672 - kaddr = kmap_atomic(page); 1673 - left = __copy_to_user_inatomic(desc->arg.buf, 1674 - kaddr + offset, size); 1675 - kunmap_atomic(kaddr); 1676 - if (left == 0) 1677 - goto success; 1678 - } 1679 - 1680 - /* Do it the slow way */ 1681 - kaddr = kmap(page); 1682 - left = __copy_to_user(desc->arg.buf, kaddr + offset, size); 1683 - kunmap(page); 1684 - 1685 - if (left) { 1686 - size -= left; 1687 - desc->error = -EFAULT; 1688 - } 1689 - success: 1690 - desc->count = count - size; 1691 - desc->written += size; 1692 - desc->arg.buf += size; 1693 - return size; 1655 + return written ? written : error; 1694 1656 } 1695 1657 1696 1658 /* ··· 1711 1747 { 1712 1748 struct file *filp = iocb->ki_filp; 1713 1749 ssize_t retval; 1714 - unsigned long seg = 0; 1715 1750 size_t count; 1716 1751 loff_t *ppos = &iocb->ki_pos; 1752 + struct iov_iter i; 1717 1753 1718 1754 count = 0; 1719 1755 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); 1720 1756 if (retval) 1721 1757 return retval; 1758 + iov_iter_init(&i, iov, nr_segs, count, 0); 1722 1759 1723 1760 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ 1724 1761 if (filp->f_flags & O_DIRECT) { ··· 1741 1776 if (retval > 0) { 1742 1777 *ppos = pos + retval; 1743 1778 count -= retval; 1779 + /* 1780 + * If we did a short DIO read we need to skip the 1781 + * section of the iov that we've already read data into. 1782 + */ 1783 + iov_iter_advance(&i, retval); 1744 1784 } 1745 1785 1746 1786 /* ··· 1762 1792 } 1763 1793 } 1764 1794 1765 - count = retval; 1766 - for (seg = 0; seg < nr_segs; seg++) { 1767 - read_descriptor_t desc; 1768 - loff_t offset = 0; 1769 - 1770 - /* 1771 - * If we did a short DIO read we need to skip the section of the 1772 - * iov that we've already read data into. 1773 - */ 1774 - if (count) { 1775 - if (count > iov[seg].iov_len) { 1776 - count -= iov[seg].iov_len; 1777 - continue; 1778 - } 1779 - offset = count; 1780 - count = 0; 1781 - } 1782 - 1783 - desc.written = 0; 1784 - desc.arg.buf = iov[seg].iov_base + offset; 1785 - desc.count = iov[seg].iov_len - offset; 1786 - if (desc.count == 0) 1787 - continue; 1788 - desc.error = 0; 1789 - do_generic_file_read(filp, ppos, &desc); 1790 - retval += desc.written; 1791 - if (desc.error) { 1792 - retval = retval ?: desc.error; 1793 - break; 1794 - } 1795 - if (desc.count > 0) 1796 - break; 1797 - } 1795 + retval = do_generic_file_read(filp, ppos, &i, retval); 1798 1796 out: 1799 1797 return retval; 1800 1798 } ··· 2273 2335 } 2274 2336 EXPORT_SYMBOL(read_cache_page_gfp); 2275 2337 2276 - static size_t __iovec_copy_from_user_inatomic(char *vaddr, 2277 - const struct iovec *iov, size_t base, size_t bytes) 2278 - { 2279 - size_t copied = 0, left = 0; 2280 - 2281 - while (bytes) { 2282 - char __user *buf = iov->iov_base + base; 2283 - int copy = min(bytes, iov->iov_len - base); 2284 - 2285 - base = 0; 2286 - left = __copy_from_user_inatomic(vaddr, buf, copy); 2287 - copied += copy; 2288 - bytes -= copy; 2289 - vaddr += copy; 2290 - iov++; 2291 - 2292 - if (unlikely(left)) 2293 - break; 2294 - } 2295 - return copied - left; 2296 - } 2297 - 2298 - /* 2299 - * Copy as much as we can into the page and return the number of bytes which 2300 - * were successfully copied. If a fault is encountered then return the number of 2301 - * bytes which were copied. 2302 - */ 2303 - size_t iov_iter_copy_from_user_atomic(struct page *page, 2304 - struct iov_iter *i, unsigned long offset, size_t bytes) 2305 - { 2306 - char *kaddr; 2307 - size_t copied; 2308 - 2309 - BUG_ON(!in_atomic()); 2310 - kaddr = kmap_atomic(page); 2311 - if (likely(i->nr_segs == 1)) { 2312 - int left; 2313 - char __user *buf = i->iov->iov_base + i->iov_offset; 2314 - left = __copy_from_user_inatomic(kaddr + offset, buf, bytes); 2315 - copied = bytes - left; 2316 - } else { 2317 - copied = __iovec_copy_from_user_inatomic(kaddr + offset, 2318 - i->iov, i->iov_offset, bytes); 2319 - } 2320 - kunmap_atomic(kaddr); 2321 - 2322 - return copied; 2323 - } 2324 - EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 2325 - 2326 - /* 2327 - * This has the same sideeffects and return value as 2328 - * iov_iter_copy_from_user_atomic(). 2329 - * The difference is that it attempts to resolve faults. 2330 - * Page must not be locked. 2331 - */ 2332 - size_t iov_iter_copy_from_user(struct page *page, 2333 - struct iov_iter *i, unsigned long offset, size_t bytes) 2334 - { 2335 - char *kaddr; 2336 - size_t copied; 2337 - 2338 - kaddr = kmap(page); 2339 - if (likely(i->nr_segs == 1)) { 2340 - int left; 2341 - char __user *buf = i->iov->iov_base + i->iov_offset; 2342 - left = __copy_from_user(kaddr + offset, buf, bytes); 2343 - copied = bytes - left; 2344 - } else { 2345 - copied = __iovec_copy_from_user_inatomic(kaddr + offset, 2346 - i->iov, i->iov_offset, bytes); 2347 - } 2348 - kunmap(page); 2349 - return copied; 2350 - } 2351 - EXPORT_SYMBOL(iov_iter_copy_from_user); 2352 - 2353 - void iov_iter_advance(struct iov_iter *i, size_t bytes) 2354 - { 2355 - BUG_ON(i->count < bytes); 2356 - 2357 - if (likely(i->nr_segs == 1)) { 2358 - i->iov_offset += bytes; 2359 - i->count -= bytes; 2360 - } else { 2361 - const struct iovec *iov = i->iov; 2362 - size_t base = i->iov_offset; 2363 - unsigned long nr_segs = i->nr_segs; 2364 - 2365 - /* 2366 - * The !iov->iov_len check ensures we skip over unlikely 2367 - * zero-length segments (without overruning the iovec). 2368 - */ 2369 - while (bytes || unlikely(i->count && !iov->iov_len)) { 2370 - int copy; 2371 - 2372 - copy = min(bytes, iov->iov_len - base); 2373 - BUG_ON(!i->count || i->count < copy); 2374 - i->count -= copy; 2375 - bytes -= copy; 2376 - base += copy; 2377 - if (iov->iov_len == base) { 2378 - iov++; 2379 - nr_segs--; 2380 - base = 0; 2381 - } 2382 - } 2383 - i->iov = iov; 2384 - i->iov_offset = base; 2385 - i->nr_segs = nr_segs; 2386 - } 2387 - } 2388 - EXPORT_SYMBOL(iov_iter_advance); 2389 - 2390 - /* 2391 - * Fault in the first iovec of the given iov_iter, to a maximum length 2392 - * of bytes. Returns 0 on success, or non-zero if the memory could not be 2393 - * accessed (ie. because it is an invalid address). 2394 - * 2395 - * writev-intensive code may want this to prefault several iovecs -- that 2396 - * would be possible (callers must not rely on the fact that _only_ the 2397 - * first iovec will be faulted with the current implementation). 2398 - */ 2399 - int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) 2400 - { 2401 - char __user *buf = i->iov->iov_base + i->iov_offset; 2402 - bytes = min(bytes, i->iov->iov_len - i->iov_offset); 2403 - return fault_in_pages_readable(buf, bytes); 2404 - } 2405 - EXPORT_SYMBOL(iov_iter_fault_in_readable); 2406 - 2407 - /* 2408 - * Return the count of just the current iov_iter segment. 2409 - */ 2410 - size_t iov_iter_single_seg_count(const struct iov_iter *i) 2411 - { 2412 - const struct iovec *iov = i->iov; 2413 - if (i->nr_segs == 1) 2414 - return i->count; 2415 - else 2416 - return min(i->count, iov->iov_len - i->iov_offset); 2417 - } 2418 - EXPORT_SYMBOL(iov_iter_single_seg_count); 2419 - 2420 2338 /* 2421 2339 * Performs necessary checks before doing a write 2422 2340 * ··· 2379 2585 2380 2586 ssize_t 2381 2587 generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, 2382 - unsigned long *nr_segs, loff_t pos, loff_t *ppos, 2588 + unsigned long *nr_segs, loff_t pos, 2383 2589 size_t count, size_t ocount) 2384 2590 { 2385 2591 struct file *file = iocb->ki_filp; ··· 2440 2646 i_size_write(inode, pos); 2441 2647 mark_inode_dirty(inode); 2442 2648 } 2443 - *ppos = pos; 2649 + iocb->ki_pos = pos; 2444 2650 } 2445 2651 out: 2446 2652 return written; ··· 2486 2692 } 2487 2693 EXPORT_SYMBOL(grab_cache_page_write_begin); 2488 2694 2489 - static ssize_t generic_perform_write(struct file *file, 2695 + ssize_t generic_perform_write(struct file *file, 2490 2696 struct iov_iter *i, loff_t pos) 2491 2697 { 2492 2698 struct address_space *mapping = file->f_mapping; ··· 2536 2742 if (mapping_writably_mapped(mapping)) 2537 2743 flush_dcache_page(page); 2538 2744 2539 - pagefault_disable(); 2540 2745 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); 2541 - pagefault_enable(); 2542 2746 flush_dcache_page(page); 2543 2747 2544 2748 mark_page_accessed(page); ··· 2574 2782 2575 2783 return written ? written : status; 2576 2784 } 2577 - 2578 - ssize_t 2579 - generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, 2580 - unsigned long nr_segs, loff_t pos, loff_t *ppos, 2581 - size_t count, ssize_t written) 2582 - { 2583 - struct file *file = iocb->ki_filp; 2584 - ssize_t status; 2585 - struct iov_iter i; 2586 - 2587 - iov_iter_init(&i, iov, nr_segs, count, written); 2588 - status = generic_perform_write(file, &i, pos); 2589 - 2590 - if (likely(status >= 0)) { 2591 - written += status; 2592 - *ppos = pos + status; 2593 - } 2594 - 2595 - return written ? written : status; 2596 - } 2597 - EXPORT_SYMBOL(generic_file_buffered_write); 2785 + EXPORT_SYMBOL(generic_perform_write); 2598 2786 2599 2787 /** 2600 2788 * __generic_file_aio_write - write data to a file ··· 2596 2824 * avoid syncing under i_mutex. 2597 2825 */ 2598 2826 ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 2599 - unsigned long nr_segs, loff_t *ppos) 2827 + unsigned long nr_segs) 2600 2828 { 2601 2829 struct file *file = iocb->ki_filp; 2602 2830 struct address_space * mapping = file->f_mapping; 2603 2831 size_t ocount; /* original count */ 2604 2832 size_t count; /* after file limit checks */ 2605 2833 struct inode *inode = mapping->host; 2606 - loff_t pos; 2607 - ssize_t written; 2834 + loff_t pos = iocb->ki_pos; 2835 + ssize_t written = 0; 2608 2836 ssize_t err; 2837 + ssize_t status; 2838 + struct iov_iter from; 2609 2839 2610 2840 ocount = 0; 2611 2841 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); ··· 2615 2841 return err; 2616 2842 2617 2843 count = ocount; 2618 - pos = *ppos; 2619 2844 2620 2845 /* We can write back this queue in page reclaim */ 2621 2846 current->backing_dev_info = mapping->backing_dev_info; 2622 - written = 0; 2623 - 2624 2847 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 2625 2848 if (err) 2626 2849 goto out; ··· 2633 2862 if (err) 2634 2863 goto out; 2635 2864 2865 + iov_iter_init(&from, iov, nr_segs, count, 0); 2866 + 2636 2867 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ 2637 2868 if (unlikely(file->f_flags & O_DIRECT)) { 2638 2869 loff_t endbyte; 2639 - ssize_t written_buffered; 2640 2870 2641 - written = generic_file_direct_write(iocb, iov, &nr_segs, pos, 2642 - ppos, count, ocount); 2871 + written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos, 2872 + count, ocount); 2643 2873 if (written < 0 || written == count) 2644 2874 goto out; 2875 + iov_iter_advance(&from, written); 2876 + 2645 2877 /* 2646 2878 * direct-io write to a hole: fall through to buffered I/O 2647 2879 * for completing the rest of the request. 2648 2880 */ 2649 2881 pos += written; 2650 2882 count -= written; 2651 - written_buffered = generic_file_buffered_write(iocb, iov, 2652 - nr_segs, pos, ppos, count, 2653 - written); 2883 + 2884 + status = generic_perform_write(file, &from, pos); 2654 2885 /* 2655 - * If generic_file_buffered_write() retuned a synchronous error 2886 + * If generic_perform_write() returned a synchronous error 2656 2887 * then we want to return the number of bytes which were 2657 2888 * direct-written, or the error code if that was zero. Note 2658 2889 * that this differs from normal direct-io semantics, which 2659 2890 * will return -EFOO even if some bytes were written. 2660 2891 */ 2661 - if (written_buffered < 0) { 2662 - err = written_buffered; 2892 + if (unlikely(status < 0) && !written) { 2893 + err = status; 2663 2894 goto out; 2664 2895 } 2665 - 2896 + iocb->ki_pos = pos + status; 2666 2897 /* 2667 2898 * We need to ensure that the page cache pages are written to 2668 2899 * disk and invalidated to preserve the expected O_DIRECT 2669 2900 * semantics. 2670 2901 */ 2671 - endbyte = pos + written_buffered - written - 1; 2902 + endbyte = pos + status - 1; 2672 2903 err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); 2673 2904 if (err == 0) { 2674 - written = written_buffered; 2905 + written += status; 2675 2906 invalidate_mapping_pages(mapping, 2676 2907 pos >> PAGE_CACHE_SHIFT, 2677 2908 endbyte >> PAGE_CACHE_SHIFT); ··· 2684 2911 */ 2685 2912 } 2686 2913 } else { 2687 - written = generic_file_buffered_write(iocb, iov, nr_segs, 2688 - pos, ppos, count, written); 2914 + written = generic_perform_write(file, &from, pos); 2915 + if (likely(written >= 0)) 2916 + iocb->ki_pos = pos + written; 2689 2917 } 2690 2918 out: 2691 2919 current->backing_dev_info = NULL; ··· 2715 2941 BUG_ON(iocb->ki_pos != pos); 2716 2942 2717 2943 mutex_lock(&inode->i_mutex); 2718 - ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 2944 + ret = __generic_file_aio_write(iocb, iov, nr_segs); 2719 2945 mutex_unlock(&inode->i_mutex); 2720 2946 2721 2947 if (ret > 0) {
+224
mm/iov_iter.c
··· 1 + #include <linux/export.h> 2 + #include <linux/uio.h> 3 + #include <linux/pagemap.h> 4 + 5 + size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 6 + struct iov_iter *i) 7 + { 8 + size_t skip, copy, left, wanted; 9 + const struct iovec *iov; 10 + char __user *buf; 11 + void *kaddr, *from; 12 + 13 + if (unlikely(bytes > i->count)) 14 + bytes = i->count; 15 + 16 + if (unlikely(!bytes)) 17 + return 0; 18 + 19 + wanted = bytes; 20 + iov = i->iov; 21 + skip = i->iov_offset; 22 + buf = iov->iov_base + skip; 23 + copy = min(bytes, iov->iov_len - skip); 24 + 25 + if (!fault_in_pages_writeable(buf, copy)) { 26 + kaddr = kmap_atomic(page); 27 + from = kaddr + offset; 28 + 29 + /* first chunk, usually the only one */ 30 + left = __copy_to_user_inatomic(buf, from, copy); 31 + copy -= left; 32 + skip += copy; 33 + from += copy; 34 + bytes -= copy; 35 + 36 + while (unlikely(!left && bytes)) { 37 + iov++; 38 + buf = iov->iov_base; 39 + copy = min(bytes, iov->iov_len); 40 + left = __copy_to_user_inatomic(buf, from, copy); 41 + copy -= left; 42 + skip = copy; 43 + from += copy; 44 + bytes -= copy; 45 + } 46 + if (likely(!bytes)) { 47 + kunmap_atomic(kaddr); 48 + goto done; 49 + } 50 + offset = from - kaddr; 51 + buf += copy; 52 + kunmap_atomic(kaddr); 53 + copy = min(bytes, iov->iov_len - skip); 54 + } 55 + /* Too bad - revert to non-atomic kmap */ 56 + kaddr = kmap(page); 57 + from = kaddr + offset; 58 + left = __copy_to_user(buf, from, copy); 59 + copy -= left; 60 + skip += copy; 61 + from += copy; 62 + bytes -= copy; 63 + while (unlikely(!left && bytes)) { 64 + iov++; 65 + buf = iov->iov_base; 66 + copy = min(bytes, iov->iov_len); 67 + left = __copy_to_user(buf, from, copy); 68 + copy -= left; 69 + skip = copy; 70 + from += copy; 71 + bytes -= copy; 72 + } 73 + kunmap(page); 74 + done: 75 + i->count -= wanted - bytes; 76 + i->nr_segs -= iov - i->iov; 77 + i->iov = iov; 78 + i->iov_offset = skip; 79 + return wanted - bytes; 80 + } 81 + EXPORT_SYMBOL(copy_page_to_iter); 82 + 83 + static size_t __iovec_copy_from_user_inatomic(char *vaddr, 84 + const struct iovec *iov, size_t base, size_t bytes) 85 + { 86 + size_t copied = 0, left = 0; 87 + 88 + while (bytes) { 89 + char __user *buf = iov->iov_base + base; 90 + int copy = min(bytes, iov->iov_len - base); 91 + 92 + base = 0; 93 + left = __copy_from_user_inatomic(vaddr, buf, copy); 94 + copied += copy; 95 + bytes -= copy; 96 + vaddr += copy; 97 + iov++; 98 + 99 + if (unlikely(left)) 100 + break; 101 + } 102 + return copied - left; 103 + } 104 + 105 + /* 106 + * Copy as much as we can into the page and return the number of bytes which 107 + * were successfully copied. If a fault is encountered then return the number of 108 + * bytes which were copied. 109 + */ 110 + size_t iov_iter_copy_from_user_atomic(struct page *page, 111 + struct iov_iter *i, unsigned long offset, size_t bytes) 112 + { 113 + char *kaddr; 114 + size_t copied; 115 + 116 + kaddr = kmap_atomic(page); 117 + if (likely(i->nr_segs == 1)) { 118 + int left; 119 + char __user *buf = i->iov->iov_base + i->iov_offset; 120 + left = __copy_from_user_inatomic(kaddr + offset, buf, bytes); 121 + copied = bytes - left; 122 + } else { 123 + copied = __iovec_copy_from_user_inatomic(kaddr + offset, 124 + i->iov, i->iov_offset, bytes); 125 + } 126 + kunmap_atomic(kaddr); 127 + 128 + return copied; 129 + } 130 + EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 131 + 132 + /* 133 + * This has the same sideeffects and return value as 134 + * iov_iter_copy_from_user_atomic(). 135 + * The difference is that it attempts to resolve faults. 136 + * Page must not be locked. 137 + */ 138 + size_t iov_iter_copy_from_user(struct page *page, 139 + struct iov_iter *i, unsigned long offset, size_t bytes) 140 + { 141 + char *kaddr; 142 + size_t copied; 143 + 144 + kaddr = kmap(page); 145 + if (likely(i->nr_segs == 1)) { 146 + int left; 147 + char __user *buf = i->iov->iov_base + i->iov_offset; 148 + left = __copy_from_user(kaddr + offset, buf, bytes); 149 + copied = bytes - left; 150 + } else { 151 + copied = __iovec_copy_from_user_inatomic(kaddr + offset, 152 + i->iov, i->iov_offset, bytes); 153 + } 154 + kunmap(page); 155 + return copied; 156 + } 157 + EXPORT_SYMBOL(iov_iter_copy_from_user); 158 + 159 + void iov_iter_advance(struct iov_iter *i, size_t bytes) 160 + { 161 + BUG_ON(i->count < bytes); 162 + 163 + if (likely(i->nr_segs == 1)) { 164 + i->iov_offset += bytes; 165 + i->count -= bytes; 166 + } else { 167 + const struct iovec *iov = i->iov; 168 + size_t base = i->iov_offset; 169 + unsigned long nr_segs = i->nr_segs; 170 + 171 + /* 172 + * The !iov->iov_len check ensures we skip over unlikely 173 + * zero-length segments (without overruning the iovec). 174 + */ 175 + while (bytes || unlikely(i->count && !iov->iov_len)) { 176 + int copy; 177 + 178 + copy = min(bytes, iov->iov_len - base); 179 + BUG_ON(!i->count || i->count < copy); 180 + i->count -= copy; 181 + bytes -= copy; 182 + base += copy; 183 + if (iov->iov_len == base) { 184 + iov++; 185 + nr_segs--; 186 + base = 0; 187 + } 188 + } 189 + i->iov = iov; 190 + i->iov_offset = base; 191 + i->nr_segs = nr_segs; 192 + } 193 + } 194 + EXPORT_SYMBOL(iov_iter_advance); 195 + 196 + /* 197 + * Fault in the first iovec of the given iov_iter, to a maximum length 198 + * of bytes. Returns 0 on success, or non-zero if the memory could not be 199 + * accessed (ie. because it is an invalid address). 200 + * 201 + * writev-intensive code may want this to prefault several iovecs -- that 202 + * would be possible (callers must not rely on the fact that _only_ the 203 + * first iovec will be faulted with the current implementation). 204 + */ 205 + int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) 206 + { 207 + char __user *buf = i->iov->iov_base + i->iov_offset; 208 + bytes = min(bytes, i->iov->iov_len - i->iov_offset); 209 + return fault_in_pages_readable(buf, bytes); 210 + } 211 + EXPORT_SYMBOL(iov_iter_fault_in_readable); 212 + 213 + /* 214 + * Return the count of just the current iov_iter segment. 215 + */ 216 + size_t iov_iter_single_seg_count(const struct iov_iter *i) 217 + { 218 + const struct iovec *iov = i->iov; 219 + if (i->nr_segs == 1) 220 + return i->count; 221 + else 222 + return min(i->count, iov->iov_len - i->iov_offset); 223 + } 224 + EXPORT_SYMBOL(iov_iter_single_seg_count);
+74 -172
mm/process_vm_access.c
··· 23 23 24 24 /** 25 25 * process_vm_rw_pages - read/write pages from task specified 26 - * @task: task to read/write from 27 - * @mm: mm for task 28 - * @process_pages: struct pages area that can store at least 29 - * nr_pages_to_copy struct page pointers 30 - * @pa: address of page in task to start copying from/to 26 + * @pages: array of pointers to pages we want to copy 31 27 * @start_offset: offset in page to start copying from/to 32 28 * @len: number of bytes to copy 33 - * @lvec: iovec array specifying where to copy to/from 34 - * @lvec_cnt: number of elements in iovec array 35 - * @lvec_current: index in iovec array we are up to 36 - * @lvec_offset: offset in bytes from current iovec iov_base we are up to 29 + * @iter: where to copy to/from locally 37 30 * @vm_write: 0 means copy from, 1 means copy to 38 - * @nr_pages_to_copy: number of pages to copy 39 - * @bytes_copied: returns number of bytes successfully copied 40 31 * Returns 0 on success, error code otherwise 41 32 */ 42 - static int process_vm_rw_pages(struct task_struct *task, 43 - struct mm_struct *mm, 44 - struct page **process_pages, 45 - unsigned long pa, 46 - unsigned long start_offset, 47 - unsigned long len, 48 - const struct iovec *lvec, 49 - unsigned long lvec_cnt, 50 - unsigned long *lvec_current, 51 - size_t *lvec_offset, 52 - int vm_write, 53 - unsigned int nr_pages_to_copy, 54 - ssize_t *bytes_copied) 33 + static int process_vm_rw_pages(struct page **pages, 34 + unsigned offset, 35 + size_t len, 36 + struct iov_iter *iter, 37 + int vm_write) 55 38 { 56 - int pages_pinned; 57 - void *target_kaddr; 58 - int pgs_copied = 0; 59 - int j; 60 - int ret; 61 - ssize_t bytes_to_copy; 62 - ssize_t rc = 0; 63 - 64 - *bytes_copied = 0; 65 - 66 - /* Get the pages we're interested in */ 67 - down_read(&mm->mmap_sem); 68 - pages_pinned = get_user_pages(task, mm, pa, 69 - nr_pages_to_copy, 70 - vm_write, 0, process_pages, NULL); 71 - up_read(&mm->mmap_sem); 72 - 73 - if (pages_pinned != nr_pages_to_copy) { 74 - rc = -EFAULT; 75 - goto end; 76 - } 77 - 78 39 /* Do the copy for each page */ 79 - for (pgs_copied = 0; 80 - (pgs_copied < nr_pages_to_copy) && (*lvec_current < lvec_cnt); 81 - pgs_copied++) { 82 - /* Make sure we have a non zero length iovec */ 83 - while (*lvec_current < lvec_cnt 84 - && lvec[*lvec_current].iov_len == 0) 85 - (*lvec_current)++; 86 - if (*lvec_current == lvec_cnt) 87 - break; 40 + while (len && iov_iter_count(iter)) { 41 + struct page *page = *pages++; 42 + size_t copy = PAGE_SIZE - offset; 43 + size_t copied; 88 44 89 - /* 90 - * Will copy smallest of: 91 - * - bytes remaining in page 92 - * - bytes remaining in destination iovec 93 - */ 94 - bytes_to_copy = min_t(ssize_t, PAGE_SIZE - start_offset, 95 - len - *bytes_copied); 96 - bytes_to_copy = min_t(ssize_t, bytes_to_copy, 97 - lvec[*lvec_current].iov_len 98 - - *lvec_offset); 45 + if (copy > len) 46 + copy = len; 99 47 100 - target_kaddr = kmap(process_pages[pgs_copied]) + start_offset; 101 - 102 - if (vm_write) 103 - ret = copy_from_user(target_kaddr, 104 - lvec[*lvec_current].iov_base 105 - + *lvec_offset, 106 - bytes_to_copy); 107 - else 108 - ret = copy_to_user(lvec[*lvec_current].iov_base 109 - + *lvec_offset, 110 - target_kaddr, bytes_to_copy); 111 - kunmap(process_pages[pgs_copied]); 112 - if (ret) { 113 - *bytes_copied += bytes_to_copy - ret; 114 - pgs_copied++; 115 - rc = -EFAULT; 116 - goto end; 117 - } 118 - *bytes_copied += bytes_to_copy; 119 - *lvec_offset += bytes_to_copy; 120 - if (*lvec_offset == lvec[*lvec_current].iov_len) { 121 - /* 122 - * Need to copy remaining part of page into the 123 - * next iovec if there are any bytes left in page 124 - */ 125 - (*lvec_current)++; 126 - *lvec_offset = 0; 127 - start_offset = (start_offset + bytes_to_copy) 128 - % PAGE_SIZE; 129 - if (start_offset) 130 - pgs_copied--; 48 + if (vm_write) { 49 + if (copy > iov_iter_count(iter)) 50 + copy = iov_iter_count(iter); 51 + copied = iov_iter_copy_from_user(page, iter, 52 + offset, copy); 53 + iov_iter_advance(iter, copied); 54 + set_page_dirty_lock(page); 131 55 } else { 132 - start_offset = 0; 56 + copied = copy_page_to_iter(page, offset, copy, iter); 133 57 } 58 + len -= copied; 59 + if (copied < copy && iov_iter_count(iter)) 60 + return -EFAULT; 61 + offset = 0; 134 62 } 135 - 136 - end: 137 - if (vm_write) { 138 - for (j = 0; j < pages_pinned; j++) { 139 - if (j < pgs_copied) 140 - set_page_dirty_lock(process_pages[j]); 141 - put_page(process_pages[j]); 142 - } 143 - } else { 144 - for (j = 0; j < pages_pinned; j++) 145 - put_page(process_pages[j]); 146 - } 147 - 148 - return rc; 63 + return 0; 149 64 } 150 65 151 66 /* Maximum number of pages kmalloc'd to hold struct page's during copy */ ··· 70 155 * process_vm_rw_single_vec - read/write pages from task specified 71 156 * @addr: start memory address of target process 72 157 * @len: size of area to copy to/from 73 - * @lvec: iovec array specifying where to copy to/from locally 74 - * @lvec_cnt: number of elements in iovec array 75 - * @lvec_current: index in iovec array we are up to 76 - * @lvec_offset: offset in bytes from current iovec iov_base we are up to 158 + * @iter: where to copy to/from locally 77 159 * @process_pages: struct pages area that can store at least 78 160 * nr_pages_to_copy struct page pointers 79 161 * @mm: mm for task 80 162 * @task: task to read/write from 81 163 * @vm_write: 0 means copy from, 1 means copy to 82 - * @bytes_copied: returns number of bytes successfully copied 83 164 * Returns 0 on success or on failure error code 84 165 */ 85 166 static int process_vm_rw_single_vec(unsigned long addr, 86 167 unsigned long len, 87 - const struct iovec *lvec, 88 - unsigned long lvec_cnt, 89 - unsigned long *lvec_current, 90 - size_t *lvec_offset, 168 + struct iov_iter *iter, 91 169 struct page **process_pages, 92 170 struct mm_struct *mm, 93 171 struct task_struct *task, 94 - int vm_write, 95 - ssize_t *bytes_copied) 172 + int vm_write) 96 173 { 97 174 unsigned long pa = addr & PAGE_MASK; 98 175 unsigned long start_offset = addr - pa; 99 176 unsigned long nr_pages; 100 - ssize_t bytes_copied_loop; 101 177 ssize_t rc = 0; 102 - unsigned long nr_pages_copied = 0; 103 - unsigned long nr_pages_to_copy; 104 178 unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES 105 179 / sizeof(struct pages *); 106 - 107 - *bytes_copied = 0; 108 180 109 181 /* Work out address and page range required */ 110 182 if (len == 0) 111 183 return 0; 112 184 nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1; 113 185 114 - while ((nr_pages_copied < nr_pages) && (*lvec_current < lvec_cnt)) { 115 - nr_pages_to_copy = min(nr_pages - nr_pages_copied, 116 - max_pages_per_loop); 186 + while (!rc && nr_pages && iov_iter_count(iter)) { 187 + int pages = min(nr_pages, max_pages_per_loop); 188 + size_t bytes; 117 189 118 - rc = process_vm_rw_pages(task, mm, process_pages, pa, 119 - start_offset, len, 120 - lvec, lvec_cnt, 121 - lvec_current, lvec_offset, 122 - vm_write, nr_pages_to_copy, 123 - &bytes_copied_loop); 190 + /* Get the pages we're interested in */ 191 + down_read(&mm->mmap_sem); 192 + pages = get_user_pages(task, mm, pa, pages, 193 + vm_write, 0, process_pages, NULL); 194 + up_read(&mm->mmap_sem); 195 + 196 + if (pages <= 0) 197 + return -EFAULT; 198 + 199 + bytes = pages * PAGE_SIZE - start_offset; 200 + if (bytes > len) 201 + bytes = len; 202 + 203 + rc = process_vm_rw_pages(process_pages, 204 + start_offset, bytes, iter, 205 + vm_write); 206 + len -= bytes; 124 207 start_offset = 0; 125 - *bytes_copied += bytes_copied_loop; 126 - 127 - if (rc < 0) { 128 - return rc; 129 - } else { 130 - len -= bytes_copied_loop; 131 - nr_pages_copied += nr_pages_to_copy; 132 - pa += nr_pages_to_copy * PAGE_SIZE; 133 - } 208 + nr_pages -= pages; 209 + pa += pages * PAGE_SIZE; 210 + while (pages) 211 + put_page(process_pages[--pages]); 134 212 } 135 213 136 214 return rc; ··· 136 228 /** 137 229 * process_vm_rw_core - core of reading/writing pages from task specified 138 230 * @pid: PID of process to read/write from/to 139 - * @lvec: iovec array specifying where to copy to/from locally 140 - * @liovcnt: size of lvec array 231 + * @iter: where to copy to/from locally 141 232 * @rvec: iovec array specifying where to copy to/from in the other process 142 233 * @riovcnt: size of rvec array 143 234 * @flags: currently unused ··· 145 238 * return less bytes than expected if an error occurs during the copying 146 239 * process. 147 240 */ 148 - static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec, 149 - unsigned long liovcnt, 241 + static ssize_t process_vm_rw_core(pid_t pid, struct iov_iter *iter, 150 242 const struct iovec *rvec, 151 243 unsigned long riovcnt, 152 244 unsigned long flags, int vm_write) ··· 156 250 struct mm_struct *mm; 157 251 unsigned long i; 158 252 ssize_t rc = 0; 159 - ssize_t bytes_copied_loop; 160 - ssize_t bytes_copied = 0; 161 253 unsigned long nr_pages = 0; 162 254 unsigned long nr_pages_iov; 163 - unsigned long iov_l_curr_idx = 0; 164 - size_t iov_l_curr_offset = 0; 165 255 ssize_t iov_len; 256 + size_t total_len = iov_iter_count(iter); 166 257 167 258 /* 168 259 * Work out how many pages of struct pages we're going to need ··· 213 310 goto put_task_struct; 214 311 } 215 312 216 - for (i = 0; i < riovcnt && iov_l_curr_idx < liovcnt; i++) { 313 + for (i = 0; i < riovcnt && iov_iter_count(iter) && !rc; i++) 217 314 rc = process_vm_rw_single_vec( 218 315 (unsigned long)rvec[i].iov_base, rvec[i].iov_len, 219 - lvec, liovcnt, &iov_l_curr_idx, &iov_l_curr_offset, 220 - process_pages, mm, task, vm_write, &bytes_copied_loop); 221 - bytes_copied += bytes_copied_loop; 222 - if (rc != 0) { 223 - /* If we have managed to copy any data at all then 224 - we return the number of bytes copied. Otherwise 225 - we return the error code */ 226 - if (bytes_copied) 227 - rc = bytes_copied; 228 - goto put_mm; 229 - } 230 - } 316 + iter, process_pages, mm, task, vm_write); 231 317 232 - rc = bytes_copied; 233 - put_mm: 318 + /* copied = space before - space after */ 319 + total_len -= iov_iter_count(iter); 320 + 321 + /* If we have managed to copy any data at all then 322 + we return the number of bytes copied. Otherwise 323 + we return the error code */ 324 + if (total_len) 325 + rc = total_len; 326 + 234 327 mmput(mm); 235 328 236 329 put_task_struct: ··· 262 363 struct iovec iovstack_r[UIO_FASTIOV]; 263 364 struct iovec *iov_l = iovstack_l; 264 365 struct iovec *iov_r = iovstack_r; 366 + struct iov_iter iter; 265 367 ssize_t rc; 266 368 267 369 if (flags != 0) ··· 278 378 if (rc <= 0) 279 379 goto free_iovecs; 280 380 381 + iov_iter_init(&iter, iov_l, liovcnt, rc, 0); 382 + 281 383 rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV, 282 384 iovstack_r, &iov_r); 283 385 if (rc <= 0) 284 386 goto free_iovecs; 285 387 286 - rc = process_vm_rw_core(pid, iov_l, liovcnt, iov_r, riovcnt, flags, 287 - vm_write); 388 + rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write); 288 389 289 390 free_iovecs: 290 391 if (iov_r != iovstack_r) ··· 325 424 struct iovec iovstack_r[UIO_FASTIOV]; 326 425 struct iovec *iov_l = iovstack_l; 327 426 struct iovec *iov_r = iovstack_r; 427 + struct iov_iter iter; 328 428 ssize_t rc = -EFAULT; 329 429 330 430 if (flags != 0) ··· 341 439 &iov_l); 342 440 if (rc <= 0) 343 441 goto free_iovecs; 442 + iov_iter_init(&iter, iov_l, liovcnt, rc, 0); 344 443 rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, 345 444 UIO_FASTIOV, iovstack_r, 346 445 &iov_r); 347 446 if (rc <= 0) 348 447 goto free_iovecs; 349 448 350 - rc = process_vm_rw_core(pid, iov_l, liovcnt, iov_r, riovcnt, flags, 351 - vm_write); 449 + rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write); 352 450 353 451 free_iovecs: 354 452 if (iov_r != iovstack_r)
+28 -51
mm/shmem.c
··· 1402 1402 return copied; 1403 1403 } 1404 1404 1405 - static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor) 1405 + static ssize_t shmem_file_aio_read(struct kiocb *iocb, 1406 + const struct iovec *iov, unsigned long nr_segs, loff_t pos) 1406 1407 { 1407 - struct inode *inode = file_inode(filp); 1408 + struct file *file = iocb->ki_filp; 1409 + struct inode *inode = file_inode(file); 1408 1410 struct address_space *mapping = inode->i_mapping; 1409 1411 pgoff_t index; 1410 1412 unsigned long offset; 1411 1413 enum sgp_type sgp = SGP_READ; 1414 + int error; 1415 + ssize_t retval; 1416 + size_t count; 1417 + loff_t *ppos = &iocb->ki_pos; 1418 + struct iov_iter iter; 1419 + 1420 + retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); 1421 + if (retval) 1422 + return retval; 1423 + iov_iter_init(&iter, iov, nr_segs, count, 0); 1412 1424 1413 1425 /* 1414 1426 * Might this read be for a stacking filesystem? Then when reading ··· 1448 1436 break; 1449 1437 } 1450 1438 1451 - desc->error = shmem_getpage(inode, index, &page, sgp, NULL); 1452 - if (desc->error) { 1453 - if (desc->error == -EINVAL) 1454 - desc->error = 0; 1439 + error = shmem_getpage(inode, index, &page, sgp, NULL); 1440 + if (error) { 1441 + if (error == -EINVAL) 1442 + error = 0; 1455 1443 break; 1456 1444 } 1457 1445 if (page) ··· 1495 1483 /* 1496 1484 * Ok, we have the page, and it's up-to-date, so 1497 1485 * now we can copy it to user space... 1498 - * 1499 - * The actor routine returns how many bytes were actually used.. 1500 - * NOTE! This may not be the same as how much of a user buffer 1501 - * we filled up (we may be padding etc), so we can only update 1502 - * "pos" here (the actor routine has to update the user buffer 1503 - * pointers and the remaining count). 1504 1486 */ 1505 - ret = actor(desc, page, offset, nr); 1487 + ret = copy_page_to_iter(page, offset, nr, &iter); 1488 + retval += ret; 1506 1489 offset += ret; 1507 1490 index += offset >> PAGE_CACHE_SHIFT; 1508 1491 offset &= ~PAGE_CACHE_MASK; 1509 1492 1510 1493 page_cache_release(page); 1511 - if (ret != nr || !desc->count) 1494 + if (!iov_iter_count(&iter)) 1512 1495 break; 1513 - 1496 + if (ret < nr) { 1497 + error = -EFAULT; 1498 + break; 1499 + } 1514 1500 cond_resched(); 1515 1501 } 1516 1502 1517 1503 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; 1518 - file_accessed(filp); 1519 - } 1520 - 1521 - static ssize_t shmem_file_aio_read(struct kiocb *iocb, 1522 - const struct iovec *iov, unsigned long nr_segs, loff_t pos) 1523 - { 1524 - struct file *filp = iocb->ki_filp; 1525 - ssize_t retval; 1526 - unsigned long seg; 1527 - size_t count; 1528 - loff_t *ppos = &iocb->ki_pos; 1529 - 1530 - retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); 1531 - if (retval) 1532 - return retval; 1533 - 1534 - for (seg = 0; seg < nr_segs; seg++) { 1535 - read_descriptor_t desc; 1536 - 1537 - desc.written = 0; 1538 - desc.arg.buf = iov[seg].iov_base; 1539 - desc.count = iov[seg].iov_len; 1540 - if (desc.count == 0) 1541 - continue; 1542 - desc.error = 0; 1543 - do_shmem_file_read(filp, ppos, &desc, file_read_actor); 1544 - retval += desc.written; 1545 - if (desc.error) { 1546 - retval = retval ?: desc.error; 1547 - break; 1548 - } 1549 - if (desc.count > 0) 1550 - break; 1551 - } 1552 - return retval; 1504 + file_accessed(file); 1505 + return retval ? retval : error; 1553 1506 } 1554 1507 1555 1508 static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, ··· 1553 1576 index = *ppos >> PAGE_CACHE_SHIFT; 1554 1577 loff = *ppos & ~PAGE_CACHE_MASK; 1555 1578 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1556 - nr_pages = min(req_pages, pipe->buffers); 1579 + nr_pages = min(req_pages, spd.nr_pages_max); 1557 1580 1558 1581 spd.nr_pages = find_get_pages_contig(mapping, index, 1559 1582 nr_pages, spd.pages);
+1 -1
security/integrity/evm/evm_crypto.c
··· 139 139 int error; 140 140 int size; 141 141 142 - if (!inode->i_op || !inode->i_op->getxattr) 142 + if (!inode->i_op->getxattr) 143 143 return -EOPNOTSUPP; 144 144 desc = init_desc(type); 145 145 if (IS_ERR(desc))
+1 -1
security/integrity/evm/evm_main.c
··· 64 64 int error; 65 65 int count = 0; 66 66 67 - if (!inode->i_op || !inode->i_op->getxattr) 67 + if (!inode->i_op->getxattr) 68 68 return -EOPNOTSUPP; 69 69 70 70 for (xattr = evm_config_xattrnames; *xattr != NULL; xattr++) {
+2 -2
security/tomoyo/realpath.c
··· 173 173 * Use filesystem name if filesystem does not support rename() 174 174 * operation. 175 175 */ 176 - if (inode->i_op && !inode->i_op->rename) 176 + if (!inode->i_op->rename) 177 177 goto prepend_filesystem_name; 178 178 } 179 179 /* Prepend device name. */ ··· 282 282 * Get local name for filesystems without rename() operation 283 283 * or dentry without vfsmount. 284 284 */ 285 - if (!path->mnt || (inode->i_op && !inode->i_op->rename)) 285 + if (!path->mnt || !inode->i_op->rename) 286 286 pos = tomoyo_get_local_path(path->dentry, buf, 287 287 buf_len - 1); 288 288 /* Get absolute name for the rest. */