Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vmsplice: add vmsplice-to-user support

A bit of a cheat, it actually just copies the data to userspace. But
this makes the interface nice and symmetric and enables people to build
on splice, with room for future improvement in performance.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

+158 -30
+1 -1
fs/ocfs2/file.c
··· 1640 1640 .total_len = len, 1641 1641 .flags = flags, 1642 1642 .pos = *ppos, 1643 - .file = out, 1643 + .u.file = out, 1644 1644 }; 1645 1645 1646 1646 ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor);
+150 -28
fs/splice.c
··· 528 528 static int pipe_to_sendpage(struct pipe_inode_info *pipe, 529 529 struct pipe_buffer *buf, struct splice_desc *sd) 530 530 { 531 - struct file *file = sd->file; 531 + struct file *file = sd->u.file; 532 532 loff_t pos = sd->pos; 533 533 int ret, more; 534 534 ··· 566 566 static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 567 567 struct splice_desc *sd) 568 568 { 569 - struct file *file = sd->file; 569 + struct file *file = sd->u.file; 570 570 struct address_space *mapping = file->f_mapping; 571 571 unsigned int offset, this_len; 572 572 struct page *page; ··· 769 769 .total_len = len, 770 770 .flags = flags, 771 771 .pos = *ppos, 772 - .file = out, 772 + .u.file = out, 773 773 }; 774 774 775 775 /* ··· 807 807 .total_len = len, 808 808 .flags = flags, 809 809 .pos = *ppos, 810 - .file = out, 810 + .u.file = out, 811 811 }; 812 812 ssize_t ret; 813 813 int err; ··· 1087 1087 static int direct_splice_actor(struct pipe_inode_info *pipe, 1088 1088 struct splice_desc *sd) 1089 1089 { 1090 - struct file *file = sd->file; 1090 + struct file *file = sd->u.file; 1091 1091 1092 1092 return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); 1093 1093 } ··· 1100 1100 .total_len = len, 1101 1101 .flags = flags, 1102 1102 .pos = *ppos, 1103 - .file = out, 1103 + .u.file = out, 1104 1104 }; 1105 1105 size_t ret; 1106 1106 ··· 1289 1289 return error; 1290 1290 } 1291 1291 1292 + static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 1293 + struct splice_desc *sd) 1294 + { 1295 + char *src; 1296 + int ret; 1297 + 1298 + ret = buf->ops->pin(pipe, buf); 1299 + if (unlikely(ret)) 1300 + return ret; 1301 + 1302 + /* 1303 + * See if we can use the atomic maps, by prefaulting in the 1304 + * pages and doing an atomic copy 1305 + */ 1306 + if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) { 1307 + src = buf->ops->map(pipe, buf, 1); 1308 + ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset, 1309 + sd->len); 1310 + buf->ops->unmap(pipe, buf, src); 1311 + if (!ret) { 1312 + ret = sd->len; 1313 + goto out; 1314 + } 1315 + } 1316 + 1317 + /* 1318 + * No dice, use slow non-atomic map and copy 1319 + */ 1320 + src = buf->ops->map(pipe, buf, 0); 1321 + 1322 + ret = sd->len; 1323 + if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len)) 1324 + ret = -EFAULT; 1325 + 1326 + out: 1327 + if (ret > 0) 1328 + sd->u.userptr += ret; 1329 + buf->ops->unmap(pipe, buf, src); 1330 + return ret; 1331 + } 1332 + 1333 + /* 1334 + * For lack of a better implementation, implement vmsplice() to userspace 1335 + * as a simple copy of the pipes pages to the user iov. 1336 + */ 1337 + static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, 1338 + unsigned long nr_segs, unsigned int flags) 1339 + { 1340 + struct pipe_inode_info *pipe; 1341 + struct splice_desc sd; 1342 + ssize_t size; 1343 + int error; 1344 + long ret; 1345 + 1346 + pipe = pipe_info(file->f_path.dentry->d_inode); 1347 + if (!pipe) 1348 + return -EBADF; 1349 + 1350 + if (pipe->inode) 1351 + mutex_lock(&pipe->inode->i_mutex); 1352 + 1353 + error = ret = 0; 1354 + while (nr_segs) { 1355 + void __user *base; 1356 + size_t len; 1357 + 1358 + /* 1359 + * Get user address base and length for this iovec. 1360 + */ 1361 + error = get_user(base, &iov->iov_base); 1362 + if (unlikely(error)) 1363 + break; 1364 + error = get_user(len, &iov->iov_len); 1365 + if (unlikely(error)) 1366 + break; 1367 + 1368 + /* 1369 + * Sanity check this iovec. 0 read succeeds. 1370 + */ 1371 + if (unlikely(!len)) 1372 + break; 1373 + if (unlikely(!base)) { 1374 + error = -EFAULT; 1375 + break; 1376 + } 1377 + 1378 + sd.len = 0; 1379 + sd.total_len = len; 1380 + sd.flags = flags; 1381 + sd.u.userptr = base; 1382 + sd.pos = 0; 1383 + 1384 + size = __splice_from_pipe(pipe, &sd, pipe_to_user); 1385 + if (size < 0) { 1386 + if (!ret) 1387 + ret = size; 1388 + 1389 + break; 1390 + } 1391 + 1392 + ret += size; 1393 + 1394 + if (size < len) 1395 + break; 1396 + 1397 + nr_segs--; 1398 + iov++; 1399 + } 1400 + 1401 + if (pipe->inode) 1402 + mutex_unlock(&pipe->inode->i_mutex); 1403 + 1404 + if (!ret) 1405 + ret = error; 1406 + 1407 + return ret; 1408 + } 1409 + 1292 1410 /* 1293 1411 * vmsplice splices a user address range into a pipe. It can be thought of 1294 1412 * as splice-from-memory, where the regular splice is splice-from-file (or 1295 1413 * to file). In both cases the output is a pipe, naturally. 1296 - * 1297 - * Note that vmsplice only supports splicing _from_ user memory to a pipe, 1298 - * not the other way around. Splicing from user memory is a simple operation 1299 - * that can be supported without any funky alignment restrictions or nasty 1300 - * vm tricks. We simply map in the user memory and fill them into a pipe. 1301 - * The reverse isn't quite as easy, though. There are two possible solutions 1302 - * for that: 1303 - * 1304 - * - memcpy() the data internally, at which point we might as well just 1305 - * do a regular read() on the buffer anyway. 1306 - * - Lots of nasty vm tricks, that are neither fast nor flexible (it 1307 - * has restriction limitations on both ends of the pipe). 1308 - * 1309 - * Alas, it isn't here. 1310 - * 1311 1414 */ 1312 - static long do_vmsplice(struct file *file, const struct iovec __user *iov, 1313 - unsigned long nr_segs, unsigned int flags) 1415 + static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, 1416 + unsigned long nr_segs, unsigned int flags) 1314 1417 { 1315 1418 struct pipe_inode_info *pipe; 1316 1419 struct page *pages[PIPE_BUFFERS]; ··· 1428 1325 pipe = pipe_info(file->f_path.dentry->d_inode); 1429 1326 if (!pipe) 1430 1327 return -EBADF; 1431 - if (unlikely(nr_segs > UIO_MAXIOV)) 1432 - return -EINVAL; 1433 - else if (unlikely(!nr_segs)) 1434 - return 0; 1435 1328 1436 1329 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, 1437 1330 flags & SPLICE_F_GIFT); ··· 1437 1338 return splice_to_pipe(pipe, &spd); 1438 1339 } 1439 1340 1341 + /* 1342 + * Note that vmsplice only really supports true splicing _from_ user memory 1343 + * to a pipe, not the other way around. Splicing from user memory is a simple 1344 + * operation that can be supported without any funky alignment restrictions 1345 + * or nasty vm tricks. We simply map in the user memory and fill them into 1346 + * a pipe. The reverse isn't quite as easy, though. There are two possible 1347 + * solutions for that: 1348 + * 1349 + * - memcpy() the data internally, at which point we might as well just 1350 + * do a regular read() on the buffer anyway. 1351 + * - Lots of nasty vm tricks, that are neither fast nor flexible (it 1352 + * has restriction limitations on both ends of the pipe). 1353 + * 1354 + * Currently we punt and implement it as a normal copy, see pipe_to_user(). 1355 + * 1356 + */ 1440 1357 asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, 1441 1358 unsigned long nr_segs, unsigned int flags) 1442 1359 { ··· 1460 1345 long error; 1461 1346 int fput; 1462 1347 1348 + if (unlikely(nr_segs > UIO_MAXIOV)) 1349 + return -EINVAL; 1350 + else if (unlikely(!nr_segs)) 1351 + return 0; 1352 + 1463 1353 error = -EBADF; 1464 1354 file = fget_light(fd, &fput); 1465 1355 if (file) { 1466 1356 if (file->f_mode & FMODE_WRITE) 1467 - error = do_vmsplice(file, iov, nr_segs, flags); 1357 + error = vmsplice_to_pipe(file, iov, nr_segs, flags); 1358 + else if (file->f_mode & FMODE_READ) 1359 + error = vmsplice_to_user(file, iov, nr_segs, flags); 1468 1360 1469 1361 fput_light(file, fput); 1470 1362 }
+7 -1
include/linux/pipe_fs_i.h
··· 88 88 struct splice_desc { 89 89 unsigned int len, total_len; /* current and remaining length */ 90 90 unsigned int flags; /* splice flags */ 91 - struct file *file; /* file to read/write */ 91 + /* 92 + * actor() private data 93 + */ 94 + union { 95 + void __user *userptr; /* memory to write to */ 96 + struct file *file; /* file to read/write */ 97 + } u; 92 98 loff_t pos; /* file position */ 93 99 }; 94 100