[PATCH] Introduce sys_splice() system call

This adds support for the sys_splice system call. Using a pipe as a
transport, it can connect to files or sockets (latter as output only).

From the splice.c comments:

"splice": joining two ropes together by interweaving their strands.

This is the "extended pipe" functionality, where a pipe is used as
an arbitrary in-memory buffer. Think of a pipe as a small kernel
buffer that you can use to transfer data from one end to the other.

The traditional unix read/write is extended with a "splice()" operation
that transfers data buffers to or from a pipe buffer.

Named by Larry McVoy, original implementation from Linus, extended by
Jens to support splicing to files and fixing the initial implementation
bugs.

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Jens Axboe and committed by
Linus Torvalds
5274f052 5d4fe2c1

+669 -11
+1
arch/i386/kernel/syscall_table.S
··· 312 312 .long sys_unshare /* 310 */ 313 313 .long sys_set_robust_list 314 314 .long sys_get_robust_list 315 + .long sys_splice
+1
arch/ia64/kernel/entry.S
··· 1605 1605 data8 sys_ni_syscall // reserved for pselect 1606 1606 data8 sys_ni_syscall // 1295 reserved for ppoll 1607 1607 data8 sys_unshare 1608 + data8 sys_splice 1608 1609 1609 1610 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
+1 -1
fs/Makefile
··· 10 10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ 11 11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ 12 12 seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ 13 - ioprio.o pnode.o drop_caches.o 13 + ioprio.o pnode.o drop_caches.o splice.o 14 14 15 15 obj-$(CONFIG_INOTIFY) += inotify.o 16 16 obj-$(CONFIG_EPOLL) += eventpoll.o
+2
fs/ext2/file.c
··· 53 53 .readv = generic_file_readv, 54 54 .writev = generic_file_writev, 55 55 .sendfile = generic_file_sendfile, 56 + .splice_read = generic_file_splice_read, 57 + .splice_write = generic_file_splice_write, 56 58 }; 57 59 58 60 #ifdef CONFIG_EXT2_FS_XIP
+2
fs/ext3/file.c
··· 119 119 .release = ext3_release_file, 120 120 .fsync = ext3_sync_file, 121 121 .sendfile = generic_file_sendfile, 122 + .splice_read = generic_file_splice_read, 123 + .splice_write = generic_file_splice_write, 122 124 }; 123 125 124 126 struct inode_operations ext3_file_inode_operations = {
+28 -5
fs/pipe.c
··· 15 15 #include <linux/pipe_fs_i.h> 16 16 #include <linux/uio.h> 17 17 #include <linux/highmem.h> 18 + #include <linux/pagemap.h> 18 19 19 20 #include <asm/uaccess.h> 20 21 #include <asm/ioctls.h> ··· 95 94 { 96 95 struct page *page = buf->page; 97 96 98 - if (info->tmp_page) { 99 - __free_page(page); 97 + /* 98 + * If nobody else uses this page, and we don't already have a 99 + * temporary page, let's keep track of it as a one-deep 100 + * allocation cache 101 + */ 102 + if (page_count(page) == 1 && !info->tmp_page) { 103 + info->tmp_page = page; 100 104 return; 101 105 } 102 - info->tmp_page = page; 106 + 107 + /* 108 + * Otherwise just release our reference to it 109 + */ 110 + page_cache_release(page); 103 111 } 104 112 105 113 static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf) ··· 162 152 chars = total_len; 163 153 164 154 addr = ops->map(filp, info, buf); 155 + if (IS_ERR(addr)) { 156 + if (!ret) 157 + ret = PTR_ERR(addr); 158 + break; 159 + } 165 160 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); 166 161 ops->unmap(info, buf); 167 162 if (unlikely(error)) { ··· 269 254 struct pipe_buf_operations *ops = buf->ops; 270 255 int offset = buf->offset + buf->len; 271 256 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 272 - void *addr = ops->map(filp, info, buf); 273 - int error = pipe_iov_copy_from_user(offset + addr, iov, chars); 257 + void *addr; 258 + int error; 259 + 260 + addr = ops->map(filp, info, buf); 261 + if (IS_ERR(addr)) { 262 + error = PTR_ERR(addr); 263 + goto out; 264 + } 265 + error = pipe_iov_copy_from_user(offset + addr, iov, 266 + chars); 274 267 ops->unmap(info, buf); 275 268 ret = error; 276 269 do_wakeup = 1;
+2
fs/reiserfs/file.c
··· 1576 1576 .sendfile = generic_file_sendfile, 1577 1577 .aio_read = generic_file_aio_read, 1578 1578 .aio_write = reiserfs_aio_write, 1579 + .splice_read = generic_file_splice_read, 1580 + .splice_write = generic_file_splice_write, 1579 1581 }; 1580 1582 1581 1583 struct inode_operations reiserfs_file_inode_operations = {
+612
fs/splice.c
··· 1 + /* 2 + * "splice": joining two ropes together by interweaving their strands. 3 + * 4 + * This is the "extended pipe" functionality, where a pipe is used as 5 + * an arbitrary in-memory buffer. Think of a pipe as a small kernel 6 + * buffer that you can use to transfer data from one end to the other. 7 + * 8 + * The traditional unix read/write is extended with a "splice()" operation 9 + * that transfers data buffers to or from a pipe buffer. 10 + * 11 + * Named by Larry McVoy, original implementation from Linus, extended by 12 + * Jens to support splicing to files and fixing the initial implementation 13 + * bugs. 14 + * 15 + * Copyright (C) 2005 Jens Axboe <axboe@suse.de> 16 + * Copyright (C) 2005 Linus Torvalds <torvalds@osdl.org> 17 + * 18 + */ 19 + #include <linux/fs.h> 20 + #include <linux/file.h> 21 + #include <linux/pagemap.h> 22 + #include <linux/pipe_fs_i.h> 23 + #include <linux/mm_inline.h> 24 + 25 + /* 26 + * Passed to the actors 27 + */ 28 + struct splice_desc { 29 + unsigned int len, total_len; /* current and remaining length */ 30 + unsigned int flags; /* splice flags */ 31 + struct file *file; /* file to read/write */ 32 + loff_t pos; /* file position */ 33 + }; 34 + 35 + static void page_cache_pipe_buf_release(struct pipe_inode_info *info, 36 + struct pipe_buffer *buf) 37 + { 38 + page_cache_release(buf->page); 39 + buf->page = NULL; 40 + } 41 + 42 + static void *page_cache_pipe_buf_map(struct file *file, 43 + struct pipe_inode_info *info, 44 + struct pipe_buffer *buf) 45 + { 46 + struct page *page = buf->page; 47 + 48 + lock_page(page); 49 + 50 + if (!PageUptodate(page)) { 51 + unlock_page(page); 52 + return ERR_PTR(-EIO); 53 + } 54 + 55 + if (!page->mapping) { 56 + unlock_page(page); 57 + return ERR_PTR(-ENODATA); 58 + } 59 + 60 + return kmap(buf->page); 61 + } 62 + 63 + static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 64 + struct pipe_buffer *buf) 65 + { 66 + unlock_page(buf->page); 67 + kunmap(buf->page); 68 + } 69 + 70 + static struct pipe_buf_operations page_cache_pipe_buf_ops = { 71 + .can_merge = 0, 72 + .map = page_cache_pipe_buf_map, 73 + .unmap = page_cache_pipe_buf_unmap, 74 + .release = page_cache_pipe_buf_release, 75 + }; 76 + 77 + static ssize_t move_to_pipe(struct inode *inode, struct page **pages, 78 + int nr_pages, unsigned long offset, 79 + unsigned long len) 80 + { 81 + struct pipe_inode_info *info; 82 + int ret, do_wakeup, i; 83 + 84 + ret = 0; 85 + do_wakeup = 0; 86 + i = 0; 87 + 88 + mutex_lock(PIPE_MUTEX(*inode)); 89 + 90 + info = inode->i_pipe; 91 + for (;;) { 92 + int bufs; 93 + 94 + if (!PIPE_READERS(*inode)) { 95 + send_sig(SIGPIPE, current, 0); 96 + if (!ret) 97 + ret = -EPIPE; 98 + break; 99 + } 100 + 101 + bufs = info->nrbufs; 102 + if (bufs < PIPE_BUFFERS) { 103 + int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS - 1); 104 + struct pipe_buffer *buf = info->bufs + newbuf; 105 + struct page *page = pages[i++]; 106 + unsigned long this_len; 107 + 108 + this_len = PAGE_CACHE_SIZE - offset; 109 + if (this_len > len) 110 + this_len = len; 111 + 112 + buf->page = page; 113 + buf->offset = offset; 114 + buf->len = this_len; 115 + buf->ops = &page_cache_pipe_buf_ops; 116 + info->nrbufs = ++bufs; 117 + do_wakeup = 1; 118 + 119 + ret += this_len; 120 + len -= this_len; 121 + offset = 0; 122 + if (!--nr_pages) 123 + break; 124 + if (!len) 125 + break; 126 + if (bufs < PIPE_BUFFERS) 127 + continue; 128 + 129 + break; 130 + } 131 + 132 + if (signal_pending(current)) { 133 + if (!ret) 134 + ret = -ERESTARTSYS; 135 + break; 136 + } 137 + 138 + if (do_wakeup) { 139 + wake_up_interruptible_sync(PIPE_WAIT(*inode)); 140 + kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, 141 + POLL_IN); 142 + do_wakeup = 0; 143 + } 144 + 145 + PIPE_WAITING_WRITERS(*inode)++; 146 + pipe_wait(inode); 147 + PIPE_WAITING_WRITERS(*inode)--; 148 + } 149 + 150 + mutex_unlock(PIPE_MUTEX(*inode)); 151 + 152 + if (do_wakeup) { 153 + wake_up_interruptible(PIPE_WAIT(*inode)); 154 + kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 155 + } 156 + 157 + while (i < nr_pages) 158 + page_cache_release(pages[i++]); 159 + 160 + return ret; 161 + } 162 + 163 + static int __generic_file_splice_read(struct file *in, struct inode *pipe, 164 + size_t len) 165 + { 166 + struct address_space *mapping = in->f_mapping; 167 + unsigned int offset, nr_pages; 168 + struct page *pages[PIPE_BUFFERS], *shadow[PIPE_BUFFERS]; 169 + struct page *page; 170 + pgoff_t index, pidx; 171 + int i, j; 172 + 173 + index = in->f_pos >> PAGE_CACHE_SHIFT; 174 + offset = in->f_pos & ~PAGE_CACHE_MASK; 175 + nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 176 + 177 + if (nr_pages > PIPE_BUFFERS) 178 + nr_pages = PIPE_BUFFERS; 179 + 180 + /* 181 + * initiate read-ahead on this page range 182 + */ 183 + do_page_cache_readahead(mapping, in, index, nr_pages); 184 + 185 + /* 186 + * Get as many pages from the page cache as possible.. 187 + * Start IO on the page cache entries we create (we 188 + * can assume that any pre-existing ones we find have 189 + * already had IO started on them). 190 + */ 191 + i = find_get_pages(mapping, index, nr_pages, pages); 192 + 193 + /* 194 + * common case - we found all pages and they are contiguous, 195 + * kick them off 196 + */ 197 + if (i && (pages[i - 1]->index == index + i - 1)) 198 + goto splice_them; 199 + 200 + /* 201 + * fill shadow[] with pages at the right locations, so we only 202 + * have to fill holes 203 + */ 204 + memset(shadow, 0, i * sizeof(struct page *)); 205 + for (j = 0, pidx = index; j < i; pidx++, j++) 206 + shadow[pages[j]->index - pidx] = pages[j]; 207 + 208 + /* 209 + * now fill in the holes 210 + */ 211 + for (i = 0, pidx = index; i < nr_pages; pidx++, i++) { 212 + int error; 213 + 214 + if (shadow[i]) 215 + continue; 216 + 217 + /* 218 + * no page there, look one up / create it 219 + */ 220 + page = find_or_create_page(mapping, pidx, 221 + mapping_gfp_mask(mapping)); 222 + if (!page) 223 + break; 224 + 225 + if (PageUptodate(page)) 226 + unlock_page(page); 227 + else { 228 + error = mapping->a_ops->readpage(in, page); 229 + 230 + if (unlikely(error)) { 231 + page_cache_release(page); 232 + break; 233 + } 234 + } 235 + shadow[i] = page; 236 + } 237 + 238 + if (!i) { 239 + for (i = 0; i < nr_pages; i++) { 240 + if (shadow[i]) 241 + page_cache_release(shadow[i]); 242 + } 243 + return 0; 244 + } 245 + 246 + memcpy(pages, shadow, i * sizeof(struct page *)); 247 + 248 + /* 249 + * Now we splice them into the pipe.. 250 + */ 251 + splice_them: 252 + return move_to_pipe(pipe, pages, i, offset, len); 253 + } 254 + 255 + ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, 256 + size_t len, unsigned int flags) 257 + { 258 + ssize_t spliced; 259 + int ret; 260 + 261 + ret = 0; 262 + spliced = 0; 263 + while (len) { 264 + ret = __generic_file_splice_read(in, pipe, len); 265 + 266 + if (ret <= 0) 267 + break; 268 + 269 + in->f_pos += ret; 270 + len -= ret; 271 + spliced += ret; 272 + } 273 + 274 + if (spliced) 275 + return spliced; 276 + 277 + return ret; 278 + } 279 + 280 + /* 281 + * Send 'len' bytes to socket from 'file' at position 'pos' using sendpage(). 282 + */ 283 + static int pipe_to_sendpage(struct pipe_inode_info *info, 284 + struct pipe_buffer *buf, struct splice_desc *sd) 285 + { 286 + struct file *file = sd->file; 287 + loff_t pos = sd->pos; 288 + unsigned int offset; 289 + ssize_t ret; 290 + void *ptr; 291 + 292 + /* 293 + * sub-optimal, but we are limited by the pipe ->map. we don't 294 + * need a kmap'ed buffer here, we just want to make sure we 295 + * have the page pinned if the pipe page originates from the 296 + * page cache 297 + */ 298 + ptr = buf->ops->map(file, info, buf); 299 + if (IS_ERR(ptr)) 300 + return PTR_ERR(ptr); 301 + 302 + offset = pos & ~PAGE_CACHE_MASK; 303 + 304 + ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos, 305 + sd->len < sd->total_len); 306 + 307 + buf->ops->unmap(info, buf); 308 + if (ret == sd->len) 309 + return 0; 310 + 311 + return -EIO; 312 + } 313 + 314 + /* 315 + * This is a little more tricky than the file -> pipe splicing. There are 316 + * basically three cases: 317 + * 318 + * - Destination page already exists in the address space and there 319 + * are users of it. For that case we have no other option that 320 + * copying the data. Tough luck. 321 + * - Destination page already exists in the address space, but there 322 + * are no users of it. Make sure it's uptodate, then drop it. Fall 323 + * through to last case. 324 + * - Destination page does not exist, we can add the pipe page to 325 + * the page cache and avoid the copy. 326 + * 327 + * For now we just do the slower thing and always copy pages over, it's 328 + * easier than migrating pages from the pipe to the target file. For the 329 + * case of doing file | file splicing, the migrate approach had some LRU 330 + * nastiness... 331 + */ 332 + static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, 333 + struct splice_desc *sd) 334 + { 335 + struct file *file = sd->file; 336 + struct address_space *mapping = file->f_mapping; 337 + unsigned int offset; 338 + struct page *page; 339 + char *src, *dst; 340 + pgoff_t index; 341 + int ret; 342 + 343 + /* 344 + * after this, page will be locked and unmapped 345 + */ 346 + src = buf->ops->map(file, info, buf); 347 + if (IS_ERR(src)) 348 + return PTR_ERR(src); 349 + 350 + index = sd->pos >> PAGE_CACHE_SHIFT; 351 + offset = sd->pos & ~PAGE_CACHE_MASK; 352 + 353 + find_page: 354 + ret = -ENOMEM; 355 + page = find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); 356 + if (!page) 357 + goto out; 358 + 359 + /* 360 + * If the page is uptodate, it is also locked. If it isn't 361 + * uptodate, we can mark it uptodate if we are filling the 362 + * full page. Otherwise we need to read it in first... 363 + */ 364 + if (!PageUptodate(page)) { 365 + if (sd->len < PAGE_CACHE_SIZE) { 366 + ret = mapping->a_ops->readpage(file, page); 367 + if (unlikely(ret)) 368 + goto out; 369 + 370 + lock_page(page); 371 + 372 + if (!PageUptodate(page)) { 373 + /* 374 + * page got invalidated, repeat 375 + */ 376 + if (!page->mapping) { 377 + unlock_page(page); 378 + page_cache_release(page); 379 + goto find_page; 380 + } 381 + ret = -EIO; 382 + goto out; 383 + } 384 + } else { 385 + WARN_ON(!PageLocked(page)); 386 + SetPageUptodate(page); 387 + } 388 + } 389 + 390 + ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); 391 + if (ret) 392 + goto out; 393 + 394 + dst = kmap_atomic(page, KM_USER0); 395 + memcpy(dst + offset, src + buf->offset, sd->len); 396 + flush_dcache_page(page); 397 + kunmap_atomic(dst, KM_USER0); 398 + 399 + ret = mapping->a_ops->commit_write(file, page, 0, sd->len); 400 + if (ret < 0) 401 + goto out; 402 + 403 + set_page_dirty(page); 404 + ret = write_one_page(page, 0); 405 + out: 406 + if (ret < 0) 407 + unlock_page(page); 408 + page_cache_release(page); 409 + buf->ops->unmap(info, buf); 410 + return ret; 411 + } 412 + 413 + typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, 414 + struct splice_desc *); 415 + 416 + static ssize_t move_from_pipe(struct inode *inode, struct file *out, 417 + size_t len, unsigned int flags, 418 + splice_actor *actor) 419 + { 420 + struct pipe_inode_info *info; 421 + int ret, do_wakeup, err; 422 + struct splice_desc sd; 423 + 424 + ret = 0; 425 + do_wakeup = 0; 426 + 427 + sd.total_len = len; 428 + sd.flags = flags; 429 + sd.file = out; 430 + sd.pos = out->f_pos; 431 + 432 + mutex_lock(PIPE_MUTEX(*inode)); 433 + 434 + info = inode->i_pipe; 435 + for (;;) { 436 + int bufs = info->nrbufs; 437 + 438 + if (bufs) { 439 + int curbuf = info->curbuf; 440 + struct pipe_buffer *buf = info->bufs + curbuf; 441 + struct pipe_buf_operations *ops = buf->ops; 442 + 443 + sd.len = buf->len; 444 + if (sd.len > sd.total_len) 445 + sd.len = sd.total_len; 446 + 447 + err = actor(info, buf, &sd); 448 + if (err) { 449 + if (!ret && err != -ENODATA) 450 + ret = err; 451 + 452 + break; 453 + } 454 + 455 + ret += sd.len; 456 + buf->offset += sd.len; 457 + buf->len -= sd.len; 458 + if (!buf->len) { 459 + buf->ops = NULL; 460 + ops->release(info, buf); 461 + curbuf = (curbuf + 1) & (PIPE_BUFFERS - 1); 462 + info->curbuf = curbuf; 463 + info->nrbufs = --bufs; 464 + do_wakeup = 1; 465 + } 466 + 467 + sd.pos += sd.len; 468 + sd.total_len -= sd.len; 469 + if (!sd.total_len) 470 + break; 471 + } 472 + 473 + if (bufs) 474 + continue; 475 + if (!PIPE_WRITERS(*inode)) 476 + break; 477 + if (!PIPE_WAITING_WRITERS(*inode)) { 478 + if (ret) 479 + break; 480 + } 481 + 482 + if (signal_pending(current)) { 483 + if (!ret) 484 + ret = -ERESTARTSYS; 485 + break; 486 + } 487 + 488 + if (do_wakeup) { 489 + wake_up_interruptible_sync(PIPE_WAIT(*inode)); 490 + kill_fasync(PIPE_FASYNC_WRITERS(*inode),SIGIO,POLL_OUT); 491 + do_wakeup = 0; 492 + } 493 + 494 + pipe_wait(inode); 495 + } 496 + 497 + mutex_unlock(PIPE_MUTEX(*inode)); 498 + 499 + if (do_wakeup) { 500 + wake_up_interruptible(PIPE_WAIT(*inode)); 501 + kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 502 + } 503 + 504 + mutex_lock(&out->f_mapping->host->i_mutex); 505 + out->f_pos = sd.pos; 506 + mutex_unlock(&out->f_mapping->host->i_mutex); 507 + return ret; 508 + 509 + } 510 + 511 + ssize_t generic_file_splice_write(struct inode *inode, struct file *out, 512 + size_t len, unsigned int flags) 513 + { 514 + return move_from_pipe(inode, out, len, flags, pipe_to_file); 515 + } 516 + 517 + ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, 518 + size_t len, unsigned int flags) 519 + { 520 + return move_from_pipe(inode, out, len, flags, pipe_to_sendpage); 521 + } 522 + 523 + static long do_splice_from(struct inode *pipe, struct file *out, size_t len, 524 + unsigned int flags) 525 + { 526 + loff_t pos; 527 + int ret; 528 + 529 + if (!out->f_op || !out->f_op->splice_write) 530 + return -EINVAL; 531 + 532 + if (!(out->f_mode & FMODE_WRITE)) 533 + return -EBADF; 534 + 535 + pos = out->f_pos; 536 + ret = rw_verify_area(WRITE, out, &pos, len); 537 + if (unlikely(ret < 0)) 538 + return ret; 539 + 540 + return out->f_op->splice_write(pipe, out, len, flags); 541 + } 542 + 543 + static long do_splice_to(struct file *in, struct inode *pipe, size_t len, 544 + unsigned int flags) 545 + { 546 + loff_t pos, isize, left; 547 + int ret; 548 + 549 + if (!in->f_op || !in->f_op->splice_read) 550 + return -EINVAL; 551 + 552 + if (!(in->f_mode & FMODE_READ)) 553 + return -EBADF; 554 + 555 + pos = in->f_pos; 556 + ret = rw_verify_area(READ, in, &pos, len); 557 + if (unlikely(ret < 0)) 558 + return ret; 559 + 560 + isize = i_size_read(in->f_mapping->host); 561 + if (unlikely(in->f_pos >= isize)) 562 + return 0; 563 + 564 + left = isize - in->f_pos; 565 + if (left < len) 566 + len = left; 567 + 568 + return in->f_op->splice_read(in, pipe, len, flags); 569 + } 570 + 571 + static long do_splice(struct file *in, struct file *out, size_t len, 572 + unsigned int flags) 573 + { 574 + struct inode *pipe; 575 + 576 + pipe = in->f_dentry->d_inode; 577 + if (pipe->i_pipe) 578 + return do_splice_from(pipe, out, len, flags); 579 + 580 + pipe = out->f_dentry->d_inode; 581 + if (pipe->i_pipe) 582 + return do_splice_to(in, pipe, len, flags); 583 + 584 + return -EINVAL; 585 + } 586 + 587 + asmlinkage long sys_splice(int fdin, int fdout, size_t len, unsigned int flags) 588 + { 589 + long error; 590 + struct file *in, *out; 591 + int fput_in, fput_out; 592 + 593 + if (unlikely(!len)) 594 + return 0; 595 + 596 + error = -EBADF; 597 + in = fget_light(fdin, &fput_in); 598 + if (in) { 599 + if (in->f_mode & FMODE_READ) { 600 + out = fget_light(fdout, &fput_out); 601 + if (out) { 602 + if (out->f_mode & FMODE_WRITE) 603 + error = do_splice(in, out, len, flags); 604 + fput_light(out, fput_out); 605 + } 606 + } 607 + 608 + fput_light(in, fput_in); 609 + } 610 + 611 + return error; 612 + }
+2 -1
include/asm-i386/unistd.h
··· 318 318 #define __NR_unshare 310 319 319 #define __NR_set_robust_list 311 320 320 #define __NR_get_robust_list 312 321 + #define __NR_sys_splice 313 321 322 322 - #define NR_syscalls 313 323 + #define NR_syscalls 314 323 324 324 325 /* 325 326 * user-visible error numbers are in the range -1 - -128: see
+2 -1
include/asm-ia64/unistd.h
··· 285 285 #define __NR_faccessat 1293 286 286 /* 1294, 1295 reserved for pselect/ppoll */ 287 287 #define __NR_unshare 1296 288 + #define __NR_splice 1297 288 289 289 290 #ifdef __KERNEL__ 290 291 291 292 #include <linux/config.h> 292 293 293 - #define NR_syscalls 273 /* length of syscall table */ 294 + #define NR_syscalls 274 /* length of syscall table */ 294 295 295 296 #define __ARCH_WANT_SYS_RT_SIGACTION 296 297
+2 -1
include/asm-powerpc/unistd.h
··· 301 301 #define __NR_pselect6 280 302 302 #define __NR_ppoll 281 303 303 #define __NR_unshare 282 304 + #define __NR_splice 283 304 305 305 - #define __NR_syscalls 283 306 + #define __NR_syscalls 284 306 307 307 308 #ifdef __KERNEL__ 308 309 #define __NR__exit __NR_exit
+3 -1
include/asm-x86_64/unistd.h
··· 609 609 __SYSCALL(__NR_set_robust_list, sys_set_robust_list) 610 610 #define __NR_get_robust_list 274 611 611 __SYSCALL(__NR_get_robust_list, sys_get_robust_list) 612 + #define __NR_splice 275 613 + __SYSCALL(__NR_splice, sys_splice) 612 614 613 - #define __NR_syscall_max __NR_get_robust_list 615 + #define __NR_syscall_max __NR_splice 614 616 615 617 #ifndef __NO_STUBS 616 618
+4
include/linux/fs.h
··· 1032 1032 int (*check_flags)(int); 1033 1033 int (*dir_notify)(struct file *filp, unsigned long arg); 1034 1034 int (*flock) (struct file *, int, struct file_lock *); 1035 + ssize_t (*splice_write)(struct inode *, struct file *, size_t, unsigned int); 1036 + ssize_t (*splice_read)(struct file *, struct inode *, size_t, unsigned int); 1035 1037 }; 1036 1038 1037 1039 struct inode_operations { ··· 1611 1609 extern void do_generic_mapping_read(struct address_space *mapping, 1612 1610 struct file_ra_state *, struct file *, 1613 1611 loff_t *, read_descriptor_t *, read_actor_t); 1612 + extern ssize_t generic_file_splice_read(struct file *, struct inode *, size_t, unsigned int); 1613 + extern ssize_t generic_file_splice_write(struct inode *, struct file *, size_t, unsigned int); 1614 1614 extern void 1615 1615 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); 1616 1616 extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
+2
include/linux/syscalls.h
··· 569 569 asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename, 570 570 int flags, int mode); 571 571 asmlinkage long sys_unshare(unsigned long unshare_flags); 572 + asmlinkage long sys_splice(int fdin, int fdout, size_t len, 573 + unsigned int flags); 572 574 573 575 #endif
+5 -1
net/socket.c
··· 119 119 static ssize_t sock_sendpage(struct file *file, struct page *page, 120 120 int offset, size_t size, loff_t *ppos, int more); 121 121 122 + extern ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, 123 + size_t len, unsigned int flags); 124 + 122 125 123 126 /* 124 127 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear ··· 144 141 .fasync = sock_fasync, 145 142 .readv = sock_readv, 146 143 .writev = sock_writev, 147 - .sendpage = sock_sendpage 144 + .sendpage = sock_sendpage, 145 + .splice_write = generic_splice_sendpage, 148 146 }; 149 147 150 148 /*