Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'pull-work.iov_iter-rebased' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull more iov_iter updates from Al Viro:

- more new_sync_{read,write}() speedups - ITER_UBUF introduction

- ITER_PIPE cleanups

- unification of iov_iter_get_pages/iov_iter_get_pages_alloc and
switching them to advancing semantics

- making ITER_PIPE take high-order pages without splitting them

- handling copy_page_from_iter() for high-order pages properly

* tag 'pull-work.iov_iter-rebased' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (32 commits)
fix copy_page_from_iter() for compound destinations
hugetlbfs: copy_page_to_iter() can deal with compound pages
copy_page_to_iter(): don't split high-order page in case of ITER_PIPE
expand those iov_iter_advance()...
pipe_get_pages(): switch to append_pipe()
get rid of non-advancing variants
ceph: switch the last caller of iov_iter_get_pages_alloc()
9p: convert to advancing variant of iov_iter_get_pages_alloc()
af_alg_make_sg(): switch to advancing variant of iov_iter_get_pages()
iter_to_pipe(): switch to advancing variant of iov_iter_get_pages()
block: convert to advancing variants of iov_iter_get_pages{,_alloc}()
iov_iter: advancing variants of iov_iter_get_pages{,_alloc}()
iov_iter: saner helper for page array allocation
fold __pipe_get_pages() into pipe_get_pages()
ITER_XARRAY: don't open-code DIV_ROUND_UP()
unify the rest of iov_iter_get_pages()/iov_iter_get_pages_alloc() guts
unify xarray_get_pages() and xarray_get_pages_alloc()
unify pipe_get_pages() and pipe_get_pages_alloc()
iov_iter_get_pages(): sanity-check arguments
iov_iter_get_pages_alloc(): lift freeing pages array on failure exits into wrapper
...

+512 -625
+13 -10
block/bio.c
··· 1200 1200 struct page **pages = (struct page **)bv; 1201 1201 ssize_t size, left; 1202 1202 unsigned len, i = 0; 1203 - size_t offset; 1203 + size_t offset, trim; 1204 1204 int ret = 0; 1205 1205 1206 1206 /* ··· 1218 1218 * result to ensure the bio's total size is correct. The remainder of 1219 1219 * the iov data will be picked up in the next bio iteration. 1220 1220 */ 1221 - size = iov_iter_get_pages(iter, pages, UINT_MAX - bio->bi_iter.bi_size, 1221 + size = iov_iter_get_pages2(iter, pages, UINT_MAX - bio->bi_iter.bi_size, 1222 1222 nr_pages, &offset); 1223 - if (size > 0) { 1224 - nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE); 1225 - size = ALIGN_DOWN(size, bdev_logical_block_size(bio->bi_bdev)); 1226 - } else 1227 - nr_pages = 0; 1223 + if (unlikely(size <= 0)) 1224 + return size ? size : -EFAULT; 1228 1225 1229 - if (unlikely(size <= 0)) { 1230 - ret = size ? size : -EFAULT; 1226 + nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE); 1227 + 1228 + trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1); 1229 + iov_iter_revert(iter, trim); 1230 + 1231 + size -= trim; 1232 + if (unlikely(!size)) { 1233 + ret = -EFAULT; 1231 1234 goto out; 1232 1235 } 1233 1236 ··· 1249 1246 offset = 0; 1250 1247 } 1251 1248 1252 - iov_iter_advance(iter, size - left); 1249 + iov_iter_revert(iter, left); 1253 1250 out: 1254 1251 while (i < nr_pages) 1255 1252 put_page(pages[i++]);
+4 -3
block/blk-map.c
··· 254 254 size_t offs, added = 0; 255 255 int npages; 256 256 257 - bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); 257 + bytes = iov_iter_get_pages_alloc2(iter, &pages, LONG_MAX, &offs); 258 258 if (unlikely(bytes <= 0)) { 259 259 ret = bytes ? bytes : -EFAULT; 260 260 goto out_unmap; ··· 284 284 bytes -= n; 285 285 offs = 0; 286 286 } 287 - iov_iter_advance(iter, added); 288 287 } 289 288 /* 290 289 * release the pages we didn't map into the bio, if any ··· 292 293 put_page(pages[j++]); 293 294 kvfree(pages); 294 295 /* couldn't stuff something into bio? */ 295 - if (bytes) 296 + if (bytes) { 297 + iov_iter_revert(iter, bytes); 296 298 break; 299 + } 297 300 } 298 301 299 302 ret = blk_rq_append_bio(rq, bio);
+3 -3
block/fops.c
··· 75 75 76 76 if (iov_iter_rw(iter) == READ) { 77 77 bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ); 78 - if (iter_is_iovec(iter)) 78 + if (user_backed_iter(iter)) 79 79 should_dirty = true; 80 80 } else { 81 81 bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb)); ··· 204 204 } 205 205 206 206 dio->size = 0; 207 - if (is_read && iter_is_iovec(iter)) 207 + if (is_read && user_backed_iter(iter)) 208 208 dio->flags |= DIO_SHOULD_DIRTY; 209 209 210 210 blk_start_plug(&plug); ··· 335 335 dio->size = bio->bi_iter.bi_size; 336 336 337 337 if (is_read) { 338 - if (iter_is_iovec(iter)) { 338 + if (user_backed_iter(iter)) { 339 339 dio->flags |= DIO_SHOULD_DIRTY; 340 340 bio_set_pages_dirty(bio); 341 341 }
+1 -2
crypto/af_alg.c
··· 404 404 ssize_t n; 405 405 int npages, i; 406 406 407 - n = iov_iter_get_pages(iter, sgl->pages, len, ALG_MAX_PAGES, &off); 407 + n = iov_iter_get_pages2(iter, sgl->pages, len, ALG_MAX_PAGES, &off); 408 408 if (n < 0) 409 409 return n; 410 410 ··· 1191 1191 len += err; 1192 1192 atomic_add(err, &ctx->rcvused); 1193 1193 rsgl->sg_num_bytes = err; 1194 - iov_iter_advance(&msg->msg_iter, err); 1195 1194 } 1196 1195 1197 1196 *outlen = len;
+3 -2
crypto/algif_hash.c
··· 102 102 err = crypto_wait_req(crypto_ahash_update(&ctx->req), 103 103 &ctx->wait); 104 104 af_alg_free_sg(&ctx->sgl); 105 - if (err) 105 + if (err) { 106 + iov_iter_revert(&msg->msg_iter, len); 106 107 goto unlock; 108 + } 107 109 108 110 copied += len; 109 - iov_iter_advance(&msg->msg_iter, len); 110 111 } 111 112 112 113 err = 0;
+1 -3
drivers/vhost/scsi.c
··· 643 643 size_t offset; 644 644 unsigned int npages = 0; 645 645 646 - bytes = iov_iter_get_pages(iter, pages, LONG_MAX, 646 + bytes = iov_iter_get_pages2(iter, pages, LONG_MAX, 647 647 VHOST_SCSI_PREALLOC_UPAGES, &offset); 648 648 /* No pages were pinned */ 649 649 if (bytes <= 0) 650 650 return bytes < 0 ? bytes : -EFAULT; 651 - 652 - iov_iter_advance(iter, bytes); 653 651 654 652 while (bytes) { 655 653 unsigned n = min_t(unsigned, PAGE_SIZE - offset, bytes);
+1 -1
fs/ceph/addr.c
··· 329 329 330 330 dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len); 331 331 iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, subreq->start, len); 332 - err = iov_iter_get_pages_alloc(&iter, &pages, len, &page_off); 332 + err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off); 333 333 if (err < 0) { 334 334 dout("%s: iov_ter_get_pages_alloc returned %d\n", __func__, err); 335 335 goto out;
+2 -3
fs/ceph/file.c
··· 95 95 size_t start; 96 96 int idx = 0; 97 97 98 - bytes = iov_iter_get_pages(iter, pages, maxsize - size, 98 + bytes = iov_iter_get_pages2(iter, pages, maxsize - size, 99 99 ITER_GET_BVECS_PAGES, &start); 100 100 if (bytes < 0) 101 101 return size ?: bytes; 102 102 103 - iov_iter_advance(iter, bytes); 104 103 size += bytes; 105 104 106 105 for ( ; bytes; idx++, bvec_idx++) { ··· 1261 1262 size_t count = iov_iter_count(iter); 1262 1263 loff_t pos = iocb->ki_pos; 1263 1264 bool write = iov_iter_rw(iter) == WRITE; 1264 - bool should_dirty = !write && iter_is_iovec(iter); 1265 + bool should_dirty = !write && user_backed_iter(iter); 1265 1266 1266 1267 if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP) 1267 1268 return -EROFS;
+3 -5
fs/cifs/file.c
··· 3276 3276 if (ctx->direct_io) { 3277 3277 ssize_t result; 3278 3278 3279 - result = iov_iter_get_pages_alloc( 3279 + result = iov_iter_get_pages_alloc2( 3280 3280 from, &pagevec, cur_len, &start); 3281 3281 if (result < 0) { 3282 3282 cifs_dbg(VFS, ··· 3290 3290 break; 3291 3291 } 3292 3292 cur_len = (size_t)result; 3293 - iov_iter_advance(from, cur_len); 3294 3293 3295 3294 nr_pages = 3296 3295 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE; ··· 4011 4012 if (ctx->direct_io) { 4012 4013 ssize_t result; 4013 4014 4014 - result = iov_iter_get_pages_alloc( 4015 + result = iov_iter_get_pages_alloc2( 4015 4016 &direct_iov, &pagevec, 4016 4017 cur_len, &start); 4017 4018 if (result < 0) { ··· 4027 4028 break; 4028 4029 } 4029 4030 cur_len = (size_t)result; 4030 - iov_iter_advance(&direct_iov, cur_len); 4031 4031 4032 4032 rdata = cifs_readdata_direct_alloc( 4033 4033 pagevec, cifs_uncached_readv_complete); ··· 4256 4258 if (!is_sync_kiocb(iocb)) 4257 4259 ctx->iocb = iocb; 4258 4260 4259 - if (iter_is_iovec(to)) 4261 + if (user_backed_iter(to)) 4260 4262 ctx->should_dirty = true; 4261 4263 4262 4264 if (direct) {
+1 -2
fs/cifs/misc.c
··· 1022 1022 saved_len = count; 1023 1023 1024 1024 while (count && npages < max_pages) { 1025 - rc = iov_iter_get_pages(iter, pages, count, max_pages, &start); 1025 + rc = iov_iter_get_pages2(iter, pages, count, max_pages, &start); 1026 1026 if (rc < 0) { 1027 1027 cifs_dbg(VFS, "Couldn't get user pages (rc=%zd)\n", rc); 1028 1028 break; ··· 1034 1034 break; 1035 1035 } 1036 1036 1037 - iov_iter_advance(iter, rc); 1038 1037 count -= rc; 1039 1038 rc += start; 1040 1039 cur_npages = DIV_ROUND_UP(rc, PAGE_SIZE);
+2 -3
fs/direct-io.c
··· 169 169 const enum req_op dio_op = dio->opf & REQ_OP_MASK; 170 170 ssize_t ret; 171 171 172 - ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES, 172 + ret = iov_iter_get_pages2(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES, 173 173 &sdio->from); 174 174 175 175 if (ret < 0 && sdio->blocks_available && dio_op == REQ_OP_WRITE) { ··· 191 191 } 192 192 193 193 if (ret >= 0) { 194 - iov_iter_advance(sdio->iter, ret); 195 194 ret += sdio->from; 196 195 sdio->head = 0; 197 196 sdio->tail = (ret + PAGE_SIZE - 1) / PAGE_SIZE; ··· 1250 1251 spin_lock_init(&dio->bio_lock); 1251 1252 dio->refcount = 1; 1252 1253 1253 - dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ; 1254 + dio->should_dirty = user_backed_iter(iter) && iov_iter_rw(iter) == READ; 1254 1255 sdio.iter = iter; 1255 1256 sdio.final_block_in_request = end >> blkbits; 1256 1257
+3 -4
fs/fuse/dev.c
··· 730 730 } 731 731 } else { 732 732 size_t off; 733 - err = iov_iter_get_pages(cs->iter, &page, PAGE_SIZE, 1, &off); 733 + err = iov_iter_get_pages2(cs->iter, &page, PAGE_SIZE, 1, &off); 734 734 if (err < 0) 735 735 return err; 736 736 BUG_ON(!err); 737 737 cs->len = err; 738 738 cs->offset = off; 739 739 cs->pg = page; 740 - iov_iter_advance(cs->iter, err); 741 740 } 742 741 743 742 return lock_request(cs->req); ··· 1355 1356 if (!fud) 1356 1357 return -EPERM; 1357 1358 1358 - if (!iter_is_iovec(to)) 1359 + if (!user_backed_iter(to)) 1359 1360 return -EINVAL; 1360 1361 1361 1362 fuse_copy_init(&cs, 1, to); ··· 1948 1949 if (!fud) 1949 1950 return -EPERM; 1950 1951 1951 - if (!iter_is_iovec(from)) 1952 + if (!user_backed_iter(from)) 1952 1953 return -EINVAL; 1953 1954 1954 1955 fuse_copy_init(&cs, 0, from);
+2 -3
fs/fuse/file.c
··· 1414 1414 while (nbytes < *nbytesp && ap->num_pages < max_pages) { 1415 1415 unsigned npages; 1416 1416 size_t start; 1417 - ret = iov_iter_get_pages(ii, &ap->pages[ap->num_pages], 1417 + ret = iov_iter_get_pages2(ii, &ap->pages[ap->num_pages], 1418 1418 *nbytesp - nbytes, 1419 1419 max_pages - ap->num_pages, 1420 1420 &start); 1421 1421 if (ret < 0) 1422 1422 break; 1423 1423 1424 - iov_iter_advance(ii, ret); 1425 1424 nbytes += ret; 1426 1425 1427 1426 ret += start; ··· 1477 1478 inode_unlock(inode); 1478 1479 } 1479 1480 1480 - io->should_dirty = !write && iter_is_iovec(iter); 1481 + io->should_dirty = !write && user_backed_iter(iter); 1481 1482 while (count) { 1482 1483 ssize_t nres; 1483 1484 fl_owner_t owner = current->files;
+1 -1
fs/gfs2/file.c
··· 780 780 781 781 if (!count) 782 782 return false; 783 - if (!iter_is_iovec(i)) 783 + if (!user_backed_iter(i)) 784 784 return false; 785 785 786 786 size = PAGE_SIZE;
+1 -30
fs/hugetlbfs/inode.c
··· 282 282 } 283 283 #endif 284 284 285 - static size_t 286 - hugetlbfs_read_actor(struct page *page, unsigned long offset, 287 - struct iov_iter *to, unsigned long size) 288 - { 289 - size_t copied = 0; 290 - int i, chunksize; 291 - 292 - /* Find which 4k chunk and offset with in that chunk */ 293 - i = offset >> PAGE_SHIFT; 294 - offset = offset & ~PAGE_MASK; 295 - 296 - while (size) { 297 - size_t n; 298 - chunksize = PAGE_SIZE; 299 - if (offset) 300 - chunksize -= offset; 301 - if (chunksize > size) 302 - chunksize = size; 303 - n = copy_page_to_iter(&page[i], offset, chunksize, to); 304 - copied += n; 305 - if (n != chunksize) 306 - return copied; 307 - offset = 0; 308 - size -= chunksize; 309 - i++; 310 - } 311 - return copied; 312 - } 313 - 314 285 /* 315 286 * Support for read() - Find the page attached to f_mapping and copy out the 316 287 * data. This provides functionality similar to filemap_read(). ··· 331 360 /* 332 361 * We have the page, copy it to user space buffer. 333 362 */ 334 - copied = hugetlbfs_read_actor(page, offset, to, nr); 363 + copied = copy_page_to_iter(page, offset, nr, to); 335 364 put_page(page); 336 365 } 337 366 offset += copied;
+1 -1
fs/iomap/direct-io.c
··· 533 533 iomi.flags |= IOMAP_NOWAIT; 534 534 } 535 535 536 - if (iter_is_iovec(iter)) 536 + if (user_backed_iter(iter)) 537 537 dio->flags |= IOMAP_DIO_DIRTY; 538 538 } else { 539 539 iomi.flags |= IOMAP_WRITE;
+3 -5
fs/nfs/direct.c
··· 364 364 size_t pgbase; 365 365 unsigned npages, i; 366 366 367 - result = iov_iter_get_pages_alloc(iter, &pagevec, 367 + result = iov_iter_get_pages_alloc2(iter, &pagevec, 368 368 rsize, &pgbase); 369 369 if (result < 0) 370 370 break; 371 371 372 372 bytes = result; 373 - iov_iter_advance(iter, bytes); 374 373 npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; 375 374 for (i = 0; i < npages; i++) { 376 375 struct nfs_page *req; ··· 477 478 if (!is_sync_kiocb(iocb)) 478 479 dreq->iocb = iocb; 479 480 480 - if (iter_is_iovec(iter)) 481 + if (user_backed_iter(iter)) 481 482 dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; 482 483 483 484 if (!swap) ··· 811 812 size_t pgbase; 812 813 unsigned npages, i; 813 814 814 - result = iov_iter_get_pages_alloc(iter, &pagevec, 815 + result = iov_iter_get_pages_alloc2(iter, &pagevec, 815 816 wsize, &pgbase); 816 817 if (result < 0) 817 818 break; 818 819 819 820 bytes = result; 820 - iov_iter_advance(iter, bytes); 821 821 npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; 822 822 for (i = 0; i < npages; i++) { 823 823 struct nfs_page *req;
+2 -4
fs/read_write.c
··· 378 378 379 379 static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 380 380 { 381 - struct iovec iov = { .iov_base = buf, .iov_len = len }; 382 381 struct kiocb kiocb; 383 382 struct iov_iter iter; 384 383 ssize_t ret; 385 384 386 385 init_sync_kiocb(&kiocb, filp); 387 386 kiocb.ki_pos = (ppos ? *ppos : 0); 388 - iov_iter_init(&iter, READ, &iov, 1, len); 387 + iov_iter_ubuf(&iter, READ, buf, len); 389 388 390 389 ret = call_read_iter(filp, &kiocb, &iter); 391 390 BUG_ON(ret == -EIOCBQUEUED); ··· 480 481 481 482 static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 482 483 { 483 - struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 484 484 struct kiocb kiocb; 485 485 struct iov_iter iter; 486 486 ssize_t ret; 487 487 488 488 init_sync_kiocb(&kiocb, filp); 489 489 kiocb.ki_pos = (ppos ? *ppos : 0); 490 - iov_iter_init(&iter, WRITE, &iov, 1, len); 490 + iov_iter_ubuf(&iter, WRITE, (void __user *)buf, len); 491 491 492 492 ret = call_write_iter(filp, &kiocb, &iter); 493 493 BUG_ON(ret == -EIOCBQUEUED);
+26 -28
fs/splice.c
··· 301 301 { 302 302 struct iov_iter to; 303 303 struct kiocb kiocb; 304 - unsigned int i_head; 305 304 int ret; 306 305 307 306 iov_iter_pipe(&to, READ, pipe, len); 308 - i_head = to.head; 309 307 init_sync_kiocb(&kiocb, in); 310 308 kiocb.ki_pos = *ppos; 311 309 ret = call_read_iter(in, &kiocb, &to); ··· 311 313 *ppos = kiocb.ki_pos; 312 314 file_accessed(in); 313 315 } else if (ret < 0) { 314 - to.head = i_head; 315 - to.iov_offset = 0; 316 - iov_iter_advance(&to, 0); /* to free what was emitted */ 316 + /* free what was emitted */ 317 + pipe_discard_from(pipe, to.start_head); 317 318 /* 318 319 * callers of ->splice_read() expect -EAGAIN on 319 320 * "can't put anything in there", rather than -EFAULT. ··· 1158 1161 }; 1159 1162 size_t total = 0; 1160 1163 int ret = 0; 1161 - bool failed = false; 1162 1164 1163 - while (iov_iter_count(from) && !failed) { 1165 + while (iov_iter_count(from)) { 1164 1166 struct page *pages[16]; 1165 - ssize_t copied; 1167 + ssize_t left; 1166 1168 size_t start; 1167 - int n; 1169 + int i, n; 1168 1170 1169 - copied = iov_iter_get_pages(from, pages, ~0UL, 16, &start); 1170 - if (copied <= 0) { 1171 - ret = copied; 1171 + left = iov_iter_get_pages2(from, pages, ~0UL, 16, &start); 1172 + if (left <= 0) { 1173 + ret = left; 1172 1174 break; 1173 1175 } 1174 1176 1175 - for (n = 0; copied; n++, start = 0) { 1176 - int size = min_t(int, copied, PAGE_SIZE - start); 1177 - if (!failed) { 1178 - buf.page = pages[n]; 1179 - buf.offset = start; 1180 - buf.len = size; 1181 - ret = add_to_pipe(pipe, &buf); 1182 - if (unlikely(ret < 0)) { 1183 - failed = true; 1184 - } else { 1185 - iov_iter_advance(from, ret); 1186 - total += ret; 1187 - } 1188 - } else { 1189 - put_page(pages[n]); 1177 + n = DIV_ROUND_UP(left + start, PAGE_SIZE); 1178 + for (i = 0; i < n; i++) { 1179 + int size = min_t(int, left, PAGE_SIZE - start); 1180 + 1181 + buf.page = pages[i]; 1182 + buf.offset = start; 1183 + buf.len = size; 1184 + ret = add_to_pipe(pipe, &buf); 1185 + if (unlikely(ret < 0)) { 1186 + iov_iter_revert(from, left); 1187 + // this one got dropped by add_to_pipe() 1188 + while (++i < n) 1189 + put_page(pages[i]); 1190 + goto out; 1190 1191 } 1191 - copied -= size; 1192 + total += ret; 1193 + left -= size; 1194 + start = 0; 1192 1195 } 1193 1196 } 1197 + out: 1194 1198 return total ? total : ret; 1195 1199 } 1196 1200
-20
include/linux/pipe_fs_i.h
··· 157 157 } 158 158 159 159 /** 160 - * pipe_space_for_user - Return number of slots available to userspace 161 - * @head: The pipe ring head pointer 162 - * @tail: The pipe ring tail pointer 163 - * @pipe: The pipe info structure 164 - */ 165 - static inline unsigned int pipe_space_for_user(unsigned int head, unsigned int tail, 166 - struct pipe_inode_info *pipe) 167 - { 168 - unsigned int p_occupancy, p_space; 169 - 170 - p_occupancy = pipe_occupancy(head, tail); 171 - if (p_occupancy >= pipe->max_usage) 172 - return 0; 173 - p_space = pipe->ring_size - p_occupancy; 174 - if (p_space > pipe->max_usage) 175 - p_space = pipe->max_usage; 176 - return p_space; 177 - } 178 - 179 - /** 180 160 * pipe_buf_get - get a reference to a pipe_buffer 181 161 * @pipe: the pipe that the buffer belongs to 182 162 * @buf: the buffer to get a reference to
+32 -3
include/linux/uio.h
··· 26 26 ITER_PIPE, 27 27 ITER_XARRAY, 28 28 ITER_DISCARD, 29 + ITER_UBUF, 29 30 }; 30 31 31 32 struct iov_iter_state { ··· 39 38 u8 iter_type; 40 39 bool nofault; 41 40 bool data_source; 42 - size_t iov_offset; 41 + bool user_backed; 42 + union { 43 + size_t iov_offset; 44 + int last_offset; 45 + }; 43 46 size_t count; 44 47 union { 45 48 const struct iovec *iov; ··· 51 46 const struct bio_vec *bvec; 52 47 struct xarray *xarray; 53 48 struct pipe_inode_info *pipe; 49 + void __user *ubuf; 54 50 }; 55 51 union { 56 52 unsigned long nr_segs; ··· 74 68 state->iov_offset = iter->iov_offset; 75 69 state->count = iter->count; 76 70 state->nr_segs = iter->nr_segs; 71 + } 72 + 73 + static inline bool iter_is_ubuf(const struct iov_iter *i) 74 + { 75 + return iov_iter_type(i) == ITER_UBUF; 77 76 } 78 77 79 78 static inline bool iter_is_iovec(const struct iov_iter *i) ··· 114 103 static inline unsigned char iov_iter_rw(const struct iov_iter *i) 115 104 { 116 105 return i->data_source ? WRITE : READ; 106 + } 107 + 108 + static inline bool user_backed_iter(const struct iov_iter *i) 109 + { 110 + return i->user_backed; 117 111 } 118 112 119 113 /* ··· 247 231 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); 248 232 void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray, 249 233 loff_t start, size_t count); 250 - ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, 234 + ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, 251 235 size_t maxsize, unsigned maxpages, size_t *start); 252 - ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, 236 + ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, 253 237 size_t maxsize, size_t *start); 254 238 int iov_iter_npages(const struct iov_iter *i, int maxpages); 255 239 void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state); ··· 337 321 struct iov_iter *i, bool compat); 338 322 int import_single_range(int type, void __user *buf, size_t len, 339 323 struct iovec *iov, struct iov_iter *i); 324 + 325 + static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, 326 + void __user *buf, size_t count) 327 + { 328 + WARN_ON(direction & ~(READ | WRITE)); 329 + *i = (struct iov_iter) { 330 + .iter_type = ITER_UBUF, 331 + .user_backed = true, 332 + .data_source = direction, 333 + .ubuf = buf, 334 + .count = count 335 + }; 336 + } 340 337 341 338 #endif
+376 -460
lib/iov_iter.c
··· 16 16 17 17 #define PIPE_PARANOIA /* for now */ 18 18 19 + /* covers ubuf and kbuf alike */ 20 + #define iterate_buf(i, n, base, len, off, __p, STEP) { \ 21 + size_t __maybe_unused off = 0; \ 22 + len = n; \ 23 + base = __p + i->iov_offset; \ 24 + len -= (STEP); \ 25 + i->iov_offset += len; \ 26 + n = len; \ 27 + } 28 + 19 29 /* covers iovec and kvec alike */ 20 30 #define iterate_iovec(i, n, base, len, off, __p, STEP) { \ 21 31 size_t off = 0; \ ··· 120 110 if (unlikely(i->count < n)) \ 121 111 n = i->count; \ 122 112 if (likely(n)) { \ 123 - if (likely(iter_is_iovec(i))) { \ 113 + if (likely(iter_is_ubuf(i))) { \ 114 + void __user *base; \ 115 + size_t len; \ 116 + iterate_buf(i, n, base, len, off, \ 117 + i->ubuf, (I)) \ 118 + } else if (likely(iter_is_iovec(i))) { \ 124 119 const struct iovec *iov = i->iov; \ 125 120 void __user *base; \ 126 121 size_t len; \ ··· 183 168 return n; 184 169 } 185 170 171 + static inline struct pipe_buffer *pipe_buf(const struct pipe_inode_info *pipe, 172 + unsigned int slot) 173 + { 174 + return &pipe->bufs[slot & (pipe->ring_size - 1)]; 175 + } 176 + 186 177 #ifdef PIPE_PARANOIA 187 178 static bool sanity(const struct iov_iter *i) 188 179 { 189 180 struct pipe_inode_info *pipe = i->pipe; 190 181 unsigned int p_head = pipe->head; 191 182 unsigned int p_tail = pipe->tail; 192 - unsigned int p_mask = pipe->ring_size - 1; 193 183 unsigned int p_occupancy = pipe_occupancy(p_head, p_tail); 194 184 unsigned int i_head = i->head; 195 185 unsigned int idx; 196 186 197 - if (i->iov_offset) { 187 + if (i->last_offset) { 198 188 struct pipe_buffer *p; 199 189 if (unlikely(p_occupancy == 0)) 200 190 goto Bad; // pipe must be non-empty 201 191 if (unlikely(i_head != p_head - 1)) 202 192 goto Bad; // must be at the last buffer... 203 193 204 - p = &pipe->bufs[i_head & p_mask]; 205 - if (unlikely(p->offset + p->len != i->iov_offset)) 194 + p = pipe_buf(pipe, i_head); 195 + if (unlikely(p->offset + p->len != abs(i->last_offset))) 206 196 goto Bad; // ... at the end of segment 207 197 } else { 208 198 if (i_head != p_head) ··· 215 195 } 216 196 return true; 217 197 Bad: 218 - printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset); 198 + printk(KERN_ERR "idx = %d, offset = %d\n", i_head, i->last_offset); 219 199 printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n", 220 200 p_head, p_tail, pipe->ring_size); 221 201 for (idx = 0; idx < pipe->ring_size; idx++) ··· 231 211 #define sanity(i) true 232 212 #endif 233 213 214 + static struct page *push_anon(struct pipe_inode_info *pipe, unsigned size) 215 + { 216 + struct page *page = alloc_page(GFP_USER); 217 + if (page) { 218 + struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++); 219 + *buf = (struct pipe_buffer) { 220 + .ops = &default_pipe_buf_ops, 221 + .page = page, 222 + .offset = 0, 223 + .len = size 224 + }; 225 + } 226 + return page; 227 + } 228 + 229 + static void push_page(struct pipe_inode_info *pipe, struct page *page, 230 + unsigned int offset, unsigned int size) 231 + { 232 + struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++); 233 + *buf = (struct pipe_buffer) { 234 + .ops = &page_cache_pipe_buf_ops, 235 + .page = page, 236 + .offset = offset, 237 + .len = size 238 + }; 239 + get_page(page); 240 + } 241 + 242 + static inline int last_offset(const struct pipe_buffer *buf) 243 + { 244 + if (buf->ops == &default_pipe_buf_ops) 245 + return buf->len; // buf->offset is 0 for those 246 + else 247 + return -(buf->offset + buf->len); 248 + } 249 + 250 + static struct page *append_pipe(struct iov_iter *i, size_t size, 251 + unsigned int *off) 252 + { 253 + struct pipe_inode_info *pipe = i->pipe; 254 + int offset = i->last_offset; 255 + struct pipe_buffer *buf; 256 + struct page *page; 257 + 258 + if (offset > 0 && offset < PAGE_SIZE) { 259 + // some space in the last buffer; add to it 260 + buf = pipe_buf(pipe, pipe->head - 1); 261 + size = min_t(size_t, size, PAGE_SIZE - offset); 262 + buf->len += size; 263 + i->last_offset += size; 264 + i->count -= size; 265 + *off = offset; 266 + return buf->page; 267 + } 268 + // OK, we need a new buffer 269 + *off = 0; 270 + size = min_t(size_t, size, PAGE_SIZE); 271 + if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) 272 + return NULL; 273 + page = push_anon(pipe, size); 274 + if (!page) 275 + return NULL; 276 + i->head = pipe->head - 1; 277 + i->last_offset = size; 278 + i->count -= size; 279 + return page; 280 + } 281 + 234 282 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, 235 283 struct iov_iter *i) 236 284 { 237 285 struct pipe_inode_info *pipe = i->pipe; 238 - struct pipe_buffer *buf; 239 - unsigned int p_tail = pipe->tail; 240 - unsigned int p_mask = pipe->ring_size - 1; 241 - unsigned int i_head = i->head; 242 - size_t off; 286 + unsigned int head = pipe->head; 243 287 244 288 if (unlikely(bytes > i->count)) 245 289 bytes = i->count; ··· 314 230 if (!sanity(i)) 315 231 return 0; 316 232 317 - off = i->iov_offset; 318 - buf = &pipe->bufs[i_head & p_mask]; 319 - if (off) { 320 - if (offset == off && buf->page == page) { 321 - /* merge with the last one */ 233 + if (offset && i->last_offset == -offset) { // could we merge it? 234 + struct pipe_buffer *buf = pipe_buf(pipe, head - 1); 235 + if (buf->page == page) { 322 236 buf->len += bytes; 323 - i->iov_offset += bytes; 324 - goto out; 237 + i->last_offset -= bytes; 238 + i->count -= bytes; 239 + return bytes; 325 240 } 326 - i_head++; 327 - buf = &pipe->bufs[i_head & p_mask]; 328 241 } 329 - if (pipe_full(i_head, p_tail, pipe->max_usage)) 242 + if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) 330 243 return 0; 331 244 332 - buf->ops = &page_cache_pipe_buf_ops; 333 - buf->flags = 0; 334 - get_page(page); 335 - buf->page = page; 336 - buf->offset = offset; 337 - buf->len = bytes; 338 - 339 - pipe->head = i_head + 1; 340 - i->iov_offset = offset + bytes; 341 - i->head = i_head; 342 - out: 245 + push_page(pipe, page, offset, bytes); 246 + i->last_offset = -(offset + bytes); 247 + i->head = head; 343 248 i->count -= bytes; 344 249 return bytes; 345 250 } ··· 348 275 */ 349 276 size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size) 350 277 { 351 - if (iter_is_iovec(i)) { 278 + if (iter_is_ubuf(i)) { 279 + size_t n = min(size, iov_iter_count(i)); 280 + n -= fault_in_readable(i->ubuf + i->iov_offset, n); 281 + return size - n; 282 + } else if (iter_is_iovec(i)) { 352 283 size_t count = min(size, iov_iter_count(i)); 353 284 const struct iovec *p; 354 285 size_t skip; ··· 391 314 */ 392 315 size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size) 393 316 { 394 - if (iter_is_iovec(i)) { 317 + if (iter_is_ubuf(i)) { 318 + size_t n = min(size, iov_iter_count(i)); 319 + n -= fault_in_safe_writeable(i->ubuf + i->iov_offset, n); 320 + return size - n; 321 + } else if (iter_is_iovec(i)) { 395 322 size_t count = min(size, iov_iter_count(i)); 396 323 const struct iovec *p; 397 324 size_t skip; ··· 426 345 *i = (struct iov_iter) { 427 346 .iter_type = ITER_IOVEC, 428 347 .nofault = false, 348 + .user_backed = true, 429 349 .data_source = direction, 430 350 .iov = iov, 431 351 .nr_segs = nr_segs, ··· 436 354 } 437 355 EXPORT_SYMBOL(iov_iter_init); 438 356 439 - static inline bool allocated(struct pipe_buffer *buf) 440 - { 441 - return buf->ops == &default_pipe_buf_ops; 442 - } 443 - 444 - static inline void data_start(const struct iov_iter *i, 445 - unsigned int *iter_headp, size_t *offp) 446 - { 447 - unsigned int p_mask = i->pipe->ring_size - 1; 448 - unsigned int iter_head = i->head; 449 - size_t off = i->iov_offset; 450 - 451 - if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) || 452 - off == PAGE_SIZE)) { 453 - iter_head++; 454 - off = 0; 455 - } 456 - *iter_headp = iter_head; 457 - *offp = off; 458 - } 459 - 460 - static size_t push_pipe(struct iov_iter *i, size_t size, 461 - int *iter_headp, size_t *offp) 357 + // returns the offset in partial buffer (if any) 358 + static inline unsigned int pipe_npages(const struct iov_iter *i, int *npages) 462 359 { 463 360 struct pipe_inode_info *pipe = i->pipe; 464 - unsigned int p_tail = pipe->tail; 465 - unsigned int p_mask = pipe->ring_size - 1; 466 - unsigned int iter_head; 467 - size_t off; 468 - ssize_t left; 361 + int used = pipe->head - pipe->tail; 362 + int off = i->last_offset; 469 363 470 - if (unlikely(size > i->count)) 471 - size = i->count; 472 - if (unlikely(!size)) 473 - return 0; 364 + *npages = max((int)pipe->max_usage - used, 0); 474 365 475 - left = size; 476 - data_start(i, &iter_head, &off); 477 - *iter_headp = iter_head; 478 - *offp = off; 479 - if (off) { 480 - left -= PAGE_SIZE - off; 481 - if (left <= 0) { 482 - pipe->bufs[iter_head & p_mask].len += size; 483 - return size; 484 - } 485 - pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; 486 - iter_head++; 366 + if (off > 0 && off < PAGE_SIZE) { // anon and not full 367 + (*npages)++; 368 + return off; 487 369 } 488 - while (!pipe_full(iter_head, p_tail, pipe->max_usage)) { 489 - struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; 490 - struct page *page = alloc_page(GFP_USER); 491 - if (!page) 492 - break; 493 - 494 - buf->ops = &default_pipe_buf_ops; 495 - buf->flags = 0; 496 - buf->page = page; 497 - buf->offset = 0; 498 - buf->len = min_t(ssize_t, left, PAGE_SIZE); 499 - left -= buf->len; 500 - iter_head++; 501 - pipe->head = iter_head; 502 - 503 - if (left == 0) 504 - return size; 505 - } 506 - return size - left; 370 + return 0; 507 371 } 508 372 509 373 static size_t copy_pipe_to_iter(const void *addr, size_t bytes, 510 374 struct iov_iter *i) 511 375 { 512 - struct pipe_inode_info *pipe = i->pipe; 513 - unsigned int p_mask = pipe->ring_size - 1; 514 - unsigned int i_head; 515 - size_t n, off; 376 + unsigned int off, chunk; 377 + 378 + if (unlikely(bytes > i->count)) 379 + bytes = i->count; 380 + if (unlikely(!bytes)) 381 + return 0; 516 382 517 383 if (!sanity(i)) 518 384 return 0; 519 385 520 - bytes = n = push_pipe(i, bytes, &i_head, &off); 521 - if (unlikely(!n)) 522 - return 0; 523 - do { 524 - size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 525 - memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); 526 - i->head = i_head; 527 - i->iov_offset = off + chunk; 528 - n -= chunk; 386 + for (size_t n = bytes; n; n -= chunk) { 387 + struct page *page = append_pipe(i, n, &off); 388 + chunk = min_t(size_t, n, PAGE_SIZE - off); 389 + if (!page) 390 + return bytes - n; 391 + memcpy_to_page(page, off, addr, chunk); 529 392 addr += chunk; 530 - off = 0; 531 - i_head++; 532 - } while (n); 533 - i->count -= bytes; 393 + } 534 394 return bytes; 535 395 } 536 396 ··· 486 462 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, 487 463 struct iov_iter *i, __wsum *sump) 488 464 { 489 - struct pipe_inode_info *pipe = i->pipe; 490 - unsigned int p_mask = pipe->ring_size - 1; 491 465 __wsum sum = *sump; 492 466 size_t off = 0; 493 - unsigned int i_head; 494 - size_t r; 467 + unsigned int chunk, r; 468 + 469 + if (unlikely(bytes > i->count)) 470 + bytes = i->count; 471 + if (unlikely(!bytes)) 472 + return 0; 495 473 496 474 if (!sanity(i)) 497 475 return 0; 498 476 499 - bytes = push_pipe(i, bytes, &i_head, &r); 500 477 while (bytes) { 501 - size_t chunk = min_t(size_t, bytes, PAGE_SIZE - r); 502 - char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); 478 + struct page *page = append_pipe(i, bytes, &r); 479 + char *p; 480 + 481 + if (!page) 482 + break; 483 + chunk = min_t(size_t, bytes, PAGE_SIZE - r); 484 + p = kmap_local_page(page); 503 485 sum = csum_and_memcpy(p + r, addr + off, chunk, sum, off); 504 486 kunmap_local(p); 505 - i->head = i_head; 506 - i->iov_offset = r + chunk; 507 - bytes -= chunk; 508 487 off += chunk; 509 - r = 0; 510 - i_head++; 488 + bytes -= chunk; 511 489 } 512 490 *sump = sum; 513 - i->count -= off; 514 491 return off; 515 492 } 516 493 ··· 519 494 { 520 495 if (unlikely(iov_iter_is_pipe(i))) 521 496 return copy_pipe_to_iter(addr, bytes, i); 522 - if (iter_is_iovec(i)) 497 + if (user_backed_iter(i)) 523 498 might_fault(); 524 499 iterate_and_advance(i, bytes, base, len, off, 525 500 copyout(base, addr + off, len), ··· 543 518 static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, 544 519 struct iov_iter *i) 545 520 { 546 - struct pipe_inode_info *pipe = i->pipe; 547 - unsigned int p_mask = pipe->ring_size - 1; 548 - unsigned int i_head; 549 - unsigned int valid = pipe->head; 550 - size_t n, off, xfer = 0; 521 + size_t xfer = 0; 522 + unsigned int off, chunk; 523 + 524 + if (unlikely(bytes > i->count)) 525 + bytes = i->count; 526 + if (unlikely(!bytes)) 527 + return 0; 551 528 552 529 if (!sanity(i)) 553 530 return 0; 554 531 555 - n = push_pipe(i, bytes, &i_head, &off); 556 - while (n) { 557 - size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 558 - char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); 532 + while (bytes) { 533 + struct page *page = append_pipe(i, bytes, &off); 559 534 unsigned long rem; 535 + char *p; 536 + 537 + if (!page) 538 + break; 539 + chunk = min_t(size_t, bytes, PAGE_SIZE - off); 540 + p = kmap_local_page(page); 560 541 rem = copy_mc_to_kernel(p + off, addr + xfer, chunk); 561 542 chunk -= rem; 562 543 kunmap_local(p); 563 - if (chunk) { 564 - i->head = i_head; 565 - i->iov_offset = off + chunk; 566 - xfer += chunk; 567 - valid = i_head + 1; 568 - } 544 + xfer += chunk; 545 + bytes -= chunk; 569 546 if (rem) { 570 - pipe->bufs[i_head & p_mask].len -= rem; 571 - pipe_discard_from(pipe, valid); 547 + iov_iter_revert(i, rem); 572 548 break; 573 549 } 574 - n -= chunk; 575 - off = 0; 576 - i_head++; 577 550 } 578 - i->count -= xfer; 579 551 return xfer; 580 552 } 581 553 ··· 605 583 { 606 584 if (unlikely(iov_iter_is_pipe(i))) 607 585 return copy_mc_pipe_to_iter(addr, bytes, i); 608 - if (iter_is_iovec(i)) 586 + if (user_backed_iter(i)) 609 587 might_fault(); 610 588 __iterate_and_advance(i, bytes, base, len, off, 611 589 copyout_mc(base, addr + off, len), ··· 623 601 WARN_ON(1); 624 602 return 0; 625 603 } 626 - if (iter_is_iovec(i)) 604 + if (user_backed_iter(i)) 627 605 might_fault(); 628 606 iterate_and_advance(i, bytes, base, len, off, 629 607 copyin(addr + off, base, len), ··· 706 684 return false; 707 685 } 708 686 709 - static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 710 - struct iov_iter *i) 711 - { 712 - if (unlikely(iov_iter_is_pipe(i))) { 713 - return copy_page_to_iter_pipe(page, offset, bytes, i); 714 - } else { 715 - void *kaddr = kmap_local_page(page); 716 - size_t wanted = _copy_to_iter(kaddr + offset, bytes, i); 717 - kunmap_local(kaddr); 718 - return wanted; 719 - } 720 - } 721 - 722 687 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 723 688 struct iov_iter *i) 724 689 { 725 690 size_t res = 0; 726 691 if (unlikely(!page_copy_sane(page, offset, bytes))) 727 692 return 0; 693 + if (unlikely(iov_iter_is_pipe(i))) 694 + return copy_page_to_iter_pipe(page, offset, bytes, i); 728 695 page += offset / PAGE_SIZE; // first subpage 729 696 offset %= PAGE_SIZE; 730 697 while (1) { 731 - size_t n = __copy_page_to_iter(page, offset, 732 - min(bytes, (size_t)PAGE_SIZE - offset), i); 698 + void *kaddr = kmap_local_page(page); 699 + size_t n = min(bytes, (size_t)PAGE_SIZE - offset); 700 + n = _copy_to_iter(kaddr + offset, n, i); 701 + kunmap_local(kaddr); 733 702 res += n; 734 703 bytes -= n; 735 704 if (!bytes || !n) ··· 738 725 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 739 726 struct iov_iter *i) 740 727 { 741 - if (page_copy_sane(page, offset, bytes)) { 728 + size_t res = 0; 729 + if (!page_copy_sane(page, offset, bytes)) 730 + return 0; 731 + page += offset / PAGE_SIZE; // first subpage 732 + offset %= PAGE_SIZE; 733 + while (1) { 742 734 void *kaddr = kmap_local_page(page); 743 - size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); 735 + size_t n = min(bytes, (size_t)PAGE_SIZE - offset); 736 + n = _copy_from_iter(kaddr + offset, n, i); 744 737 kunmap_local(kaddr); 745 - return wanted; 738 + res += n; 739 + bytes -= n; 740 + if (!bytes || !n) 741 + break; 742 + offset += n; 743 + if (offset == PAGE_SIZE) { 744 + page++; 745 + offset = 0; 746 + } 746 747 } 747 - return 0; 748 + return res; 748 749 } 749 750 EXPORT_SYMBOL(copy_page_from_iter); 750 751 751 752 static size_t pipe_zero(size_t bytes, struct iov_iter *i) 752 753 { 753 - struct pipe_inode_info *pipe = i->pipe; 754 - unsigned int p_mask = pipe->ring_size - 1; 755 - unsigned int i_head; 756 - size_t n, off; 754 + unsigned int chunk, off; 755 + 756 + if (unlikely(bytes > i->count)) 757 + bytes = i->count; 758 + if (unlikely(!bytes)) 759 + return 0; 757 760 758 761 if (!sanity(i)) 759 762 return 0; 760 763 761 - bytes = n = push_pipe(i, bytes, &i_head, &off); 762 - if (unlikely(!n)) 763 - return 0; 764 + for (size_t n = bytes; n; n -= chunk) { 765 + struct page *page = append_pipe(i, n, &off); 766 + char *p; 764 767 765 - do { 766 - size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 767 - char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); 768 + if (!page) 769 + return bytes - n; 770 + chunk = min_t(size_t, n, PAGE_SIZE - off); 771 + p = kmap_local_page(page); 768 772 memset(p + off, 0, chunk); 769 773 kunmap_local(p); 770 - i->head = i_head; 771 - i->iov_offset = off + chunk; 772 - n -= chunk; 773 - off = 0; 774 - i_head++; 775 - } while (n); 776 - i->count -= bytes; 774 + } 777 775 return bytes; 778 776 } 779 777 ··· 823 799 } 824 800 EXPORT_SYMBOL(copy_page_from_iter_atomic); 825 801 826 - static inline void pipe_truncate(struct iov_iter *i) 827 - { 828 - struct pipe_inode_info *pipe = i->pipe; 829 - unsigned int p_tail = pipe->tail; 830 - unsigned int p_head = pipe->head; 831 - unsigned int p_mask = pipe->ring_size - 1; 832 - 833 - if (!pipe_empty(p_head, p_tail)) { 834 - struct pipe_buffer *buf; 835 - unsigned int i_head = i->head; 836 - size_t off = i->iov_offset; 837 - 838 - if (off) { 839 - buf = &pipe->bufs[i_head & p_mask]; 840 - buf->len = off - buf->offset; 841 - i_head++; 842 - } 843 - while (p_head != i_head) { 844 - p_head--; 845 - pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]); 846 - } 847 - 848 - pipe->head = p_head; 849 - } 850 - } 851 - 852 802 static void pipe_advance(struct iov_iter *i, size_t size) 853 803 { 854 804 struct pipe_inode_info *pipe = i->pipe; 855 - if (size) { 856 - struct pipe_buffer *buf; 857 - unsigned int p_mask = pipe->ring_size - 1; 858 - unsigned int i_head = i->head; 859 - size_t off = i->iov_offset, left = size; 805 + int off = i->last_offset; 860 806 861 - if (off) /* make it relative to the beginning of buffer */ 862 - left += off - pipe->bufs[i_head & p_mask].offset; 863 - while (1) { 864 - buf = &pipe->bufs[i_head & p_mask]; 865 - if (left <= buf->len) 866 - break; 867 - left -= buf->len; 868 - i_head++; 869 - } 870 - i->head = i_head; 871 - i->iov_offset = buf->offset + left; 807 + if (!off && !size) { 808 + pipe_discard_from(pipe, i->start_head); // discard everything 809 + return; 872 810 } 873 811 i->count -= size; 874 - /* ... and discard everything past that point */ 875 - pipe_truncate(i); 812 + while (1) { 813 + struct pipe_buffer *buf = pipe_buf(pipe, i->head); 814 + if (off) /* make it relative to the beginning of buffer */ 815 + size += abs(off) - buf->offset; 816 + if (size <= buf->len) { 817 + buf->len = size; 818 + i->last_offset = last_offset(buf); 819 + break; 820 + } 821 + size -= buf->len; 822 + i->head++; 823 + off = 0; 824 + } 825 + pipe_discard_from(pipe, i->head + 1); // discard everything past this one 876 826 } 877 827 878 828 static void iov_iter_bvec_advance(struct iov_iter *i, size_t size) ··· 892 894 { 893 895 if (unlikely(i->count < size)) 894 896 size = i->count; 895 - if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) { 897 + if (likely(iter_is_ubuf(i)) || unlikely(iov_iter_is_xarray(i))) { 898 + i->iov_offset += size; 899 + i->count -= size; 900 + } else if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) { 896 901 /* iovec and kvec have identical layouts */ 897 902 iov_iter_iovec_advance(i, size); 898 903 } else if (iov_iter_is_bvec(i)) { 899 904 iov_iter_bvec_advance(i, size); 900 905 } else if (iov_iter_is_pipe(i)) { 901 906 pipe_advance(i, size); 902 - } else if (unlikely(iov_iter_is_xarray(i))) { 903 - i->iov_offset += size; 904 - i->count -= size; 905 907 } else if (iov_iter_is_discard(i)) { 906 908 i->count -= size; 907 909 } ··· 917 919 i->count += unroll; 918 920 if (unlikely(iov_iter_is_pipe(i))) { 919 921 struct pipe_inode_info *pipe = i->pipe; 920 - unsigned int p_mask = pipe->ring_size - 1; 921 - unsigned int i_head = i->head; 922 - size_t off = i->iov_offset; 923 - while (1) { 924 - struct pipe_buffer *b = &pipe->bufs[i_head & p_mask]; 925 - size_t n = off - b->offset; 926 - if (unroll < n) { 927 - off -= unroll; 928 - break; 922 + unsigned int head = pipe->head; 923 + 924 + while (head > i->start_head) { 925 + struct pipe_buffer *b = pipe_buf(pipe, --head); 926 + if (unroll < b->len) { 927 + b->len -= unroll; 928 + i->last_offset = last_offset(b); 929 + i->head = head; 930 + return; 929 931 } 930 - unroll -= n; 931 - if (!unroll && i_head == i->start_head) { 932 - off = 0; 933 - break; 934 - } 935 - i_head--; 936 - b = &pipe->bufs[i_head & p_mask]; 937 - off = b->offset + b->len; 932 + unroll -= b->len; 933 + pipe_buf_release(pipe, b); 934 + pipe->head--; 938 935 } 939 - i->iov_offset = off; 940 - i->head = i_head; 941 - pipe_truncate(i); 936 + i->last_offset = 0; 937 + i->head = head; 942 938 return; 943 939 } 944 940 if (unlikely(iov_iter_is_discard(i))) ··· 942 950 return; 943 951 } 944 952 unroll -= i->iov_offset; 945 - if (iov_iter_is_xarray(i)) { 953 + if (iov_iter_is_xarray(i) || iter_is_ubuf(i)) { 946 954 BUG(); /* We should never go beyond the start of the specified 947 955 * range since we might then be straying into pages that 948 956 * aren't pinned. ··· 1034 1042 .pipe = pipe, 1035 1043 .head = pipe->head, 1036 1044 .start_head = pipe->head, 1037 - .iov_offset = 0, 1045 + .last_offset = 0, 1038 1046 .count = count 1039 1047 }; 1040 1048 } ··· 1150 1158 bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, 1151 1159 unsigned len_mask) 1152 1160 { 1161 + if (likely(iter_is_ubuf(i))) { 1162 + if (i->count & len_mask) 1163 + return false; 1164 + if ((unsigned long)(i->ubuf + i->iov_offset) & addr_mask) 1165 + return false; 1166 + return true; 1167 + } 1168 + 1153 1169 if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) 1154 1170 return iov_iter_aligned_iovec(i, addr_mask, len_mask); 1155 1171 ··· 1165 1165 return iov_iter_aligned_bvec(i, addr_mask, len_mask); 1166 1166 1167 1167 if (iov_iter_is_pipe(i)) { 1168 - unsigned int p_mask = i->pipe->ring_size - 1; 1169 1168 size_t size = i->count; 1170 1169 1171 1170 if (size & len_mask) 1172 1171 return false; 1173 - if (size && allocated(&i->pipe->bufs[i->head & p_mask])) { 1174 - if (i->iov_offset & addr_mask) 1172 + if (size && i->last_offset > 0) { 1173 + if (i->last_offset & addr_mask) 1175 1174 return false; 1176 1175 } 1177 1176 ··· 1232 1233 1233 1234 unsigned long iov_iter_alignment(const struct iov_iter *i) 1234 1235 { 1236 + if (likely(iter_is_ubuf(i))) { 1237 + size_t size = i->count; 1238 + if (size) 1239 + return ((unsigned long)i->ubuf + i->iov_offset) | size; 1240 + return 0; 1241 + } 1242 + 1235 1243 /* iovec and kvec have identical layouts */ 1236 1244 if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) 1237 1245 return iov_iter_alignment_iovec(i); ··· 1247 1241 return iov_iter_alignment_bvec(i); 1248 1242 1249 1243 if (iov_iter_is_pipe(i)) { 1250 - unsigned int p_mask = i->pipe->ring_size - 1; 1251 1244 size_t size = i->count; 1252 1245 1253 - if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) 1254 - return size | i->iov_offset; 1246 + if (size && i->last_offset > 0) 1247 + return size | i->last_offset; 1255 1248 return size; 1256 1249 } 1257 1250 ··· 1267 1262 unsigned long v = 0; 1268 1263 size_t size = i->count; 1269 1264 unsigned k; 1265 + 1266 + if (iter_is_ubuf(i)) 1267 + return 0; 1270 1268 1271 1269 if (WARN_ON(!iter_is_iovec(i))) 1272 1270 return ~0U; ··· 1289 1281 } 1290 1282 EXPORT_SYMBOL(iov_iter_gap_alignment); 1291 1283 1292 - static inline ssize_t __pipe_get_pages(struct iov_iter *i, 1293 - size_t maxsize, 1294 - struct page **pages, 1295 - int iter_head, 1296 - size_t *start) 1284 + static int want_pages_array(struct page ***res, size_t size, 1285 + size_t start, unsigned int maxpages) 1297 1286 { 1298 - struct pipe_inode_info *pipe = i->pipe; 1299 - unsigned int p_mask = pipe->ring_size - 1; 1300 - ssize_t n = push_pipe(i, maxsize, &iter_head, start); 1301 - if (!n) 1302 - return -EFAULT; 1287 + unsigned int count = DIV_ROUND_UP(size + start, PAGE_SIZE); 1303 1288 1304 - maxsize = n; 1305 - n += *start; 1306 - while (n > 0) { 1307 - get_page(*pages++ = pipe->bufs[iter_head & p_mask].page); 1308 - iter_head++; 1309 - n -= PAGE_SIZE; 1289 + if (count > maxpages) 1290 + count = maxpages; 1291 + WARN_ON(!count); // caller should've prevented that 1292 + if (!*res) { 1293 + *res = kvmalloc_array(count, sizeof(struct page *), GFP_KERNEL); 1294 + if (!*res) 1295 + return 0; 1310 1296 } 1311 - 1312 - return maxsize; 1297 + return count; 1313 1298 } 1314 1299 1315 1300 static ssize_t pipe_get_pages(struct iov_iter *i, 1316 - struct page **pages, size_t maxsize, unsigned maxpages, 1301 + struct page ***pages, size_t maxsize, unsigned maxpages, 1317 1302 size_t *start) 1318 1303 { 1319 - unsigned int iter_head, npages; 1320 - size_t capacity; 1304 + unsigned int npages, count, off, chunk; 1305 + struct page **p; 1306 + size_t left; 1321 1307 1322 1308 if (!sanity(i)) 1323 1309 return -EFAULT; 1324 1310 1325 - data_start(i, &iter_head, start); 1326 - /* Amount of free space: some of this one + all after this one */ 1327 - npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1328 - capacity = min(npages, maxpages) * PAGE_SIZE - *start; 1329 - 1330 - return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start); 1311 + *start = off = pipe_npages(i, &npages); 1312 + if (!npages) 1313 + return -EFAULT; 1314 + count = want_pages_array(pages, maxsize, off, min(npages, maxpages)); 1315 + if (!count) 1316 + return -ENOMEM; 1317 + p = *pages; 1318 + for (npages = 0, left = maxsize ; npages < count; npages++, left -= chunk) { 1319 + struct page *page = append_pipe(i, left, &off); 1320 + if (!page) 1321 + break; 1322 + chunk = min_t(size_t, left, PAGE_SIZE - off); 1323 + get_page(*p++ = page); 1324 + } 1325 + if (!npages) 1326 + return -EFAULT; 1327 + return maxsize - left; 1331 1328 } 1332 1329 1333 1330 static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, ··· 1363 1350 } 1364 1351 1365 1352 static ssize_t iter_xarray_get_pages(struct iov_iter *i, 1366 - struct page **pages, size_t maxsize, 1353 + struct page ***pages, size_t maxsize, 1367 1354 unsigned maxpages, size_t *_start_offset) 1368 1355 { 1369 - unsigned nr, offset; 1370 - pgoff_t index, count; 1371 - size_t size = maxsize; 1356 + unsigned nr, offset, count; 1357 + pgoff_t index; 1372 1358 loff_t pos; 1373 - 1374 - if (!size || !maxpages) 1375 - return 0; 1376 1359 1377 1360 pos = i->xarray_start + i->iov_offset; 1378 1361 index = pos >> PAGE_SHIFT; 1379 1362 offset = pos & ~PAGE_MASK; 1380 1363 *_start_offset = offset; 1381 1364 1382 - count = 1; 1383 - if (size > PAGE_SIZE - offset) { 1384 - size -= PAGE_SIZE - offset; 1385 - count += size >> PAGE_SHIFT; 1386 - size &= ~PAGE_MASK; 1387 - if (size) 1388 - count++; 1389 - } 1390 - 1391 - if (count > maxpages) 1392 - count = maxpages; 1393 - 1394 - nr = iter_xarray_populate_pages(pages, i->xarray, index, count); 1365 + count = want_pages_array(pages, maxsize, offset, maxpages); 1366 + if (!count) 1367 + return -ENOMEM; 1368 + nr = iter_xarray_populate_pages(*pages, i->xarray, index, count); 1395 1369 if (nr == 0) 1396 1370 return 0; 1397 1371 1398 - return min_t(size_t, nr * PAGE_SIZE - offset, maxsize); 1372 + maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize); 1373 + i->iov_offset += maxsize; 1374 + i->count -= maxsize; 1375 + return maxsize; 1399 1376 } 1400 1377 1401 - /* must be done on non-empty ITER_IOVEC one */ 1378 + /* must be done on non-empty ITER_UBUF or ITER_IOVEC one */ 1402 1379 static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size) 1403 1380 { 1404 1381 size_t skip; 1405 1382 long k; 1383 + 1384 + if (iter_is_ubuf(i)) 1385 + return (unsigned long)i->ubuf + i->iov_offset; 1406 1386 1407 1387 for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) { 1408 1388 size_t len = i->iov[k].iov_len - skip; ··· 1425 1419 return page; 1426 1420 } 1427 1421 1428 - ssize_t iov_iter_get_pages(struct iov_iter *i, 1429 - struct page **pages, size_t maxsize, unsigned maxpages, 1430 - size_t *start) 1422 + static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, 1423 + struct page ***pages, size_t maxsize, 1424 + unsigned int maxpages, size_t *start) 1431 1425 { 1432 - int n, res; 1426 + unsigned int n; 1433 1427 1434 1428 if (maxsize > i->count) 1435 1429 maxsize = i->count; ··· 1438 1432 if (maxsize > MAX_RW_COUNT) 1439 1433 maxsize = MAX_RW_COUNT; 1440 1434 1441 - if (likely(iter_is_iovec(i))) { 1435 + if (likely(user_backed_iter(i))) { 1442 1436 unsigned int gup_flags = 0; 1443 1437 unsigned long addr; 1438 + int res; 1444 1439 1445 1440 if (iov_iter_rw(i) != WRITE) 1446 1441 gup_flags |= FOLL_WRITE; ··· 1451 1444 addr = first_iovec_segment(i, &maxsize); 1452 1445 *start = addr % PAGE_SIZE; 1453 1446 addr &= PAGE_MASK; 1454 - n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 1455 - if (n > maxpages) 1456 - n = maxpages; 1457 - res = get_user_pages_fast(addr, n, gup_flags, pages); 1447 + n = want_pages_array(pages, maxsize, *start, maxpages); 1448 + if (!n) 1449 + return -ENOMEM; 1450 + res = get_user_pages_fast(addr, n, gup_flags, *pages); 1458 1451 if (unlikely(res <= 0)) 1459 1452 return res; 1460 - return min_t(size_t, maxsize, res * PAGE_SIZE - *start); 1453 + maxsize = min_t(size_t, maxsize, res * PAGE_SIZE - *start); 1454 + iov_iter_advance(i, maxsize); 1455 + return maxsize; 1461 1456 } 1462 1457 if (iov_iter_is_bvec(i)) { 1458 + struct page **p; 1463 1459 struct page *page; 1464 1460 1465 1461 page = first_bvec_segment(i, &maxsize, start); 1466 - n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 1467 - if (n > maxpages) 1468 - n = maxpages; 1462 + n = want_pages_array(pages, maxsize, *start, maxpages); 1463 + if (!n) 1464 + return -ENOMEM; 1465 + p = *pages; 1469 1466 for (int k = 0; k < n; k++) 1470 - get_page(*pages++ = page++); 1471 - return min_t(size_t, maxsize, n * PAGE_SIZE - *start); 1467 + get_page(p[k] = page + k); 1468 + maxsize = min_t(size_t, maxsize, n * PAGE_SIZE - *start); 1469 + i->count -= maxsize; 1470 + i->iov_offset += maxsize; 1471 + if (i->iov_offset == i->bvec->bv_len) { 1472 + i->iov_offset = 0; 1473 + i->bvec++; 1474 + i->nr_segs--; 1475 + } 1476 + return maxsize; 1472 1477 } 1473 1478 if (iov_iter_is_pipe(i)) 1474 1479 return pipe_get_pages(i, pages, maxsize, maxpages, start); ··· 1488 1469 return iter_xarray_get_pages(i, pages, maxsize, maxpages, start); 1489 1470 return -EFAULT; 1490 1471 } 1491 - EXPORT_SYMBOL(iov_iter_get_pages); 1492 1472 1493 - static struct page **get_pages_array(size_t n) 1473 + ssize_t iov_iter_get_pages2(struct iov_iter *i, 1474 + struct page **pages, size_t maxsize, unsigned maxpages, 1475 + size_t *start) 1494 1476 { 1495 - return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); 1496 - } 1477 + if (!maxpages) 1478 + return 0; 1479 + BUG_ON(!pages); 1497 1480 1498 - static ssize_t pipe_get_pages_alloc(struct iov_iter *i, 1481 + return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, start); 1482 + } 1483 + EXPORT_SYMBOL(iov_iter_get_pages2); 1484 + 1485 + ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, 1499 1486 struct page ***pages, size_t maxsize, 1500 1487 size_t *start) 1501 1488 { 1502 - struct page **p; 1503 - unsigned int iter_head, npages; 1504 - ssize_t n; 1489 + ssize_t len; 1505 1490 1506 - if (!sanity(i)) 1507 - return -EFAULT; 1491 + *pages = NULL; 1508 1492 1509 - data_start(i, &iter_head, start); 1510 - /* Amount of free space: some of this one + all after this one */ 1511 - npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1512 - n = npages * PAGE_SIZE - *start; 1513 - if (maxsize > n) 1514 - maxsize = n; 1515 - else 1516 - npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 1517 - p = get_pages_array(npages); 1518 - if (!p) 1519 - return -ENOMEM; 1520 - n = __pipe_get_pages(i, maxsize, p, iter_head, start); 1521 - if (n > 0) 1522 - *pages = p; 1523 - else 1524 - kvfree(p); 1525 - return n; 1526 - } 1527 - 1528 - static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, 1529 - struct page ***pages, size_t maxsize, 1530 - size_t *_start_offset) 1531 - { 1532 - struct page **p; 1533 - unsigned nr, offset; 1534 - pgoff_t index, count; 1535 - size_t size = maxsize; 1536 - loff_t pos; 1537 - 1538 - if (!size) 1539 - return 0; 1540 - 1541 - pos = i->xarray_start + i->iov_offset; 1542 - index = pos >> PAGE_SHIFT; 1543 - offset = pos & ~PAGE_MASK; 1544 - *_start_offset = offset; 1545 - 1546 - count = 1; 1547 - if (size > PAGE_SIZE - offset) { 1548 - size -= PAGE_SIZE - offset; 1549 - count += size >> PAGE_SHIFT; 1550 - size &= ~PAGE_MASK; 1551 - if (size) 1552 - count++; 1493 + len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start); 1494 + if (len <= 0) { 1495 + kvfree(*pages); 1496 + *pages = NULL; 1553 1497 } 1554 - 1555 - p = get_pages_array(count); 1556 - if (!p) 1557 - return -ENOMEM; 1558 - *pages = p; 1559 - 1560 - nr = iter_xarray_populate_pages(p, i->xarray, index, count); 1561 - if (nr == 0) 1562 - return 0; 1563 - 1564 - return min_t(size_t, nr * PAGE_SIZE - offset, maxsize); 1498 + return len; 1565 1499 } 1566 - 1567 - ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 1568 - struct page ***pages, size_t maxsize, 1569 - size_t *start) 1570 - { 1571 - struct page **p; 1572 - int n, res; 1573 - 1574 - if (maxsize > i->count) 1575 - maxsize = i->count; 1576 - if (!maxsize) 1577 - return 0; 1578 - if (maxsize > MAX_RW_COUNT) 1579 - maxsize = MAX_RW_COUNT; 1580 - 1581 - if (likely(iter_is_iovec(i))) { 1582 - unsigned int gup_flags = 0; 1583 - unsigned long addr; 1584 - 1585 - if (iov_iter_rw(i) != WRITE) 1586 - gup_flags |= FOLL_WRITE; 1587 - if (i->nofault) 1588 - gup_flags |= FOLL_NOFAULT; 1589 - 1590 - addr = first_iovec_segment(i, &maxsize); 1591 - *start = addr % PAGE_SIZE; 1592 - addr &= PAGE_MASK; 1593 - n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 1594 - p = get_pages_array(n); 1595 - if (!p) 1596 - return -ENOMEM; 1597 - res = get_user_pages_fast(addr, n, gup_flags, p); 1598 - if (unlikely(res <= 0)) { 1599 - kvfree(p); 1600 - *pages = NULL; 1601 - return res; 1602 - } 1603 - *pages = p; 1604 - return min_t(size_t, maxsize, res * PAGE_SIZE - *start); 1605 - } 1606 - if (iov_iter_is_bvec(i)) { 1607 - struct page *page; 1608 - 1609 - page = first_bvec_segment(i, &maxsize, start); 1610 - n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 1611 - *pages = p = get_pages_array(n); 1612 - if (!p) 1613 - return -ENOMEM; 1614 - for (int k = 0; k < n; k++) 1615 - get_page(*p++ = page++); 1616 - return min_t(size_t, maxsize, n * PAGE_SIZE - *start); 1617 - } 1618 - if (iov_iter_is_pipe(i)) 1619 - return pipe_get_pages_alloc(i, pages, maxsize, start); 1620 - if (iov_iter_is_xarray(i)) 1621 - return iter_xarray_get_pages_alloc(i, pages, maxsize, start); 1622 - return -EFAULT; 1623 - } 1624 - EXPORT_SYMBOL(iov_iter_get_pages_alloc); 1500 + EXPORT_SYMBOL(iov_iter_get_pages_alloc2); 1625 1501 1626 1502 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, 1627 1503 struct iov_iter *i) ··· 1629 1715 { 1630 1716 if (unlikely(!i->count)) 1631 1717 return 0; 1718 + if (likely(iter_is_ubuf(i))) { 1719 + unsigned offs = offset_in_page(i->ubuf + i->iov_offset); 1720 + int npages = DIV_ROUND_UP(offs + i->count, PAGE_SIZE); 1721 + return min(npages, maxpages); 1722 + } 1632 1723 /* iovec and kvec have identical layouts */ 1633 1724 if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) 1634 1725 return iov_npages(i, maxpages); 1635 1726 if (iov_iter_is_bvec(i)) 1636 1727 return bvec_npages(i, maxpages); 1637 1728 if (iov_iter_is_pipe(i)) { 1638 - unsigned int iter_head; 1639 1729 int npages; 1640 - size_t off; 1641 1730 1642 1731 if (!sanity(i)) 1643 1732 return 0; 1644 1733 1645 - data_start(i, &iter_head, &off); 1646 - /* some of this one + all after this one */ 1647 - npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1734 + pipe_npages(i, &npages); 1648 1735 return min(npages, maxpages); 1649 1736 } 1650 1737 if (iov_iter_is_xarray(i)) { ··· 1664 1749 WARN_ON(1); 1665 1750 return NULL; 1666 1751 } 1667 - if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new))) 1668 - return NULL; 1669 1752 if (iov_iter_is_bvec(new)) 1670 1753 return new->bvec = kmemdup(new->bvec, 1671 1754 new->nr_segs * sizeof(struct bio_vec), 1672 1755 flags); 1673 - else 1756 + else if (iov_iter_is_kvec(new) || iter_is_iovec(new)) 1674 1757 /* iovec and kvec have identical layout */ 1675 1758 return new->iov = kmemdup(new->iov, 1676 1759 new->nr_segs * sizeof(struct iovec), 1677 1760 flags); 1761 + return NULL; 1678 1762 } 1679 1763 EXPORT_SYMBOL(dup_iter); 1680 1764 ··· 1867 1953 void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) 1868 1954 { 1869 1955 if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) && 1870 - !iov_iter_is_kvec(i)) 1956 + !iov_iter_is_kvec(i) && !iter_is_ubuf(i)) 1871 1957 return; 1872 1958 i->iov_offset = state->iov_offset; 1873 1959 i->count = state->count; 1960 + if (iter_is_ubuf(i)) 1961 + return; 1874 1962 /* 1875 1963 * For the *vec iters, nr_segs + iov is constant - if we increment 1876 1964 * the vec, then we also decrement the nr_segs count. Hence we don't
+1 -1
mm/shmem.c
··· 2626 2626 ret = copy_page_to_iter(page, offset, nr, to); 2627 2627 put_page(page); 2628 2628 2629 - } else if (iter_is_iovec(to)) { 2629 + } else if (user_backed_iter(to)) { 2630 2630 /* 2631 2631 * Copy to user tends to be so well optimized, but 2632 2632 * clear_user() not so much, that it is noticeably
+23 -16
net/9p/client.c
··· 1495 1495 struct p9_client *clnt = fid->clnt; 1496 1496 struct p9_req_t *req; 1497 1497 int count = iov_iter_count(to); 1498 - int rsize, non_zc = 0; 1498 + int rsize, received, non_zc = 0; 1499 1499 char *dataptr; 1500 1500 1501 1501 *err = 0; ··· 1524 1524 } 1525 1525 if (IS_ERR(req)) { 1526 1526 *err = PTR_ERR(req); 1527 + if (!non_zc) 1528 + iov_iter_revert(to, count - iov_iter_count(to)); 1527 1529 return 0; 1528 1530 } 1529 1531 1530 1532 *err = p9pdu_readf(&req->rc, clnt->proto_version, 1531 - "D", &count, &dataptr); 1533 + "D", &received, &dataptr); 1532 1534 if (*err) { 1535 + if (!non_zc) 1536 + iov_iter_revert(to, count - iov_iter_count(to)); 1533 1537 trace_9p_protocol_dump(clnt, &req->rc); 1534 1538 p9_req_put(clnt, req); 1535 1539 return 0; 1536 1540 } 1537 - if (rsize < count) { 1538 - pr_err("bogus RREAD count (%d > %d)\n", count, rsize); 1539 - count = rsize; 1541 + if (rsize < received) { 1542 + pr_err("bogus RREAD count (%d > %d)\n", received, rsize); 1543 + received = rsize; 1540 1544 } 1541 1545 1542 1546 p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); 1543 1547 1544 1548 if (non_zc) { 1545 - int n = copy_to_iter(dataptr, count, to); 1549 + int n = copy_to_iter(dataptr, received, to); 1546 1550 1547 - if (n != count) { 1551 + if (n != received) { 1548 1552 *err = -EFAULT; 1549 1553 p9_req_put(clnt, req); 1550 1554 return n; 1551 1555 } 1552 1556 } else { 1553 - iov_iter_advance(to, count); 1557 + iov_iter_revert(to, count - received - iov_iter_count(to)); 1554 1558 } 1555 1559 p9_req_put(clnt, req); 1556 - return count; 1560 + return received; 1557 1561 } 1558 1562 EXPORT_SYMBOL(p9_client_read_once); 1559 1563 ··· 1575 1571 while (iov_iter_count(from)) { 1576 1572 int count = iov_iter_count(from); 1577 1573 int rsize = fid->iounit; 1574 + int written; 1578 1575 1579 1576 if (!rsize || rsize > clnt->msize - P9_IOHDRSZ) 1580 1577 rsize = clnt->msize - P9_IOHDRSZ; ··· 1593 1588 offset, rsize, from); 1594 1589 } 1595 1590 if (IS_ERR(req)) { 1591 + iov_iter_revert(from, count - iov_iter_count(from)); 1596 1592 *err = PTR_ERR(req); 1597 1593 break; 1598 1594 } 1599 1595 1600 - *err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &count); 1596 + *err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &written); 1601 1597 if (*err) { 1598 + iov_iter_revert(from, count - iov_iter_count(from)); 1602 1599 trace_9p_protocol_dump(clnt, &req->rc); 1603 1600 p9_req_put(clnt, req); 1604 1601 break; 1605 1602 } 1606 - if (rsize < count) { 1607 - pr_err("bogus RWRITE count (%d > %d)\n", count, rsize); 1608 - count = rsize; 1603 + if (rsize < written) { 1604 + pr_err("bogus RWRITE count (%d > %d)\n", written, rsize); 1605 + written = rsize; 1609 1606 } 1610 1607 1611 1608 p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); 1612 1609 1613 1610 p9_req_put(clnt, req); 1614 - iov_iter_advance(from, count); 1615 - total += count; 1616 - offset += count; 1611 + iov_iter_revert(from, count - written - iov_iter_count(from)); 1612 + total += written; 1613 + offset += written; 1617 1614 } 1618 1615 return total; 1619 1616 }
+1 -2
net/9p/protocol.c
··· 63 63 pdu_write_u(struct p9_fcall *pdu, struct iov_iter *from, size_t size) 64 64 { 65 65 size_t len = min(pdu->capacity - pdu->size, size); 66 - struct iov_iter i = *from; 67 66 68 - if (!copy_from_iter_full(&pdu->sdata[pdu->size], len, &i)) 67 + if (!copy_from_iter_full(&pdu->sdata[pdu->size], len, from)) 69 68 len = 0; 70 69 71 70 pdu->size += len;
+2 -1
net/9p/trans_virtio.c
··· 331 331 if (err == -ERESTARTSYS) 332 332 return err; 333 333 } 334 - n = iov_iter_get_pages_alloc(data, pages, count, offs); 334 + n = iov_iter_get_pages_alloc2(data, pages, count, offs); 335 335 if (n < 0) 336 336 return n; 337 337 *need_drop = 1; ··· 373 373 (*pages)[index] = kmap_to_page(p); 374 374 p += PAGE_SIZE; 375 375 } 376 + iov_iter_advance(data, len); 376 377 return len; 377 378 } 378 379 }
+1 -2
net/core/datagram.c
··· 632 632 if (frag == MAX_SKB_FRAGS) 633 633 return -EMSGSIZE; 634 634 635 - copied = iov_iter_get_pages(from, pages, length, 635 + copied = iov_iter_get_pages2(from, pages, length, 636 636 MAX_SKB_FRAGS - frag, &start); 637 637 if (copied < 0) 638 638 return -EFAULT; 639 639 640 - iov_iter_advance(from, copied); 641 640 length -= copied; 642 641 643 642 truesize = PAGE_ALIGN(copied + start);
+1 -2
net/core/skmsg.c
··· 324 324 goto out; 325 325 } 326 326 327 - copied = iov_iter_get_pages(from, pages, bytes, maxpages, 327 + copied = iov_iter_get_pages2(from, pages, bytes, maxpages, 328 328 &offset); 329 329 if (copied <= 0) { 330 330 ret = -EFAULT; 331 331 goto out; 332 332 } 333 333 334 - iov_iter_advance(from, copied); 335 334 bytes -= copied; 336 335 msg->sg.size += copied; 337 336
+1 -2
net/rds/message.c
··· 391 391 size_t start; 392 392 ssize_t copied; 393 393 394 - copied = iov_iter_get_pages(from, &pages, PAGE_SIZE, 394 + copied = iov_iter_get_pages2(from, &pages, PAGE_SIZE, 395 395 1, &start); 396 396 if (copied < 0) { 397 397 struct mmpin *mmp; ··· 405 405 goto err; 406 406 } 407 407 total_copied += copied; 408 - iov_iter_advance(from, copied); 409 408 length -= copied; 410 409 sg_set_page(sg, pages, copied, start); 411 410 rm->data.op_nents++;
+1 -3
net/tls/tls_sw.c
··· 1352 1352 rc = -EFAULT; 1353 1353 goto out; 1354 1354 } 1355 - copied = iov_iter_get_pages(from, pages, 1355 + copied = iov_iter_get_pages2(from, pages, 1356 1356 length, 1357 1357 maxpages, &offset); 1358 1358 if (copied <= 0) { 1359 1359 rc = -EFAULT; 1360 1360 goto out; 1361 1361 } 1362 - 1363 - iov_iter_advance(from, copied); 1364 1362 1365 1363 length -= copied; 1366 1364 size += copied;