Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

pipe: add support for shrinking and growing pipes

This patch adds F_GETPIPE_SZ and F_SETPIPE_SZ fcntl() actions for
growing and shrinking the size of a pipe and adjusts pipe.c and splice.c
(and relay and network splice) usage to work with these larger (or smaller)
pipes.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

+293 -109
+5
fs/fcntl.c
··· 14 14 #include <linux/dnotify.h> 15 15 #include <linux/slab.h> 16 16 #include <linux/module.h> 17 + #include <linux/pipe_fs_i.h> 17 18 #include <linux/security.h> 18 19 #include <linux/ptrace.h> 19 20 #include <linux/signal.h> ··· 412 411 break; 413 412 case F_NOTIFY: 414 413 err = fcntl_dirnotify(fd, filp, arg); 414 + break; 415 + case F_SETPIPE_SZ: 416 + case F_GETPIPE_SZ: 417 + err = pipe_fcntl(filp, cmd, arg); 415 418 break; 416 419 default: 417 420 break;
+95 -12
fs/pipe.c
··· 11 11 #include <linux/module.h> 12 12 #include <linux/init.h> 13 13 #include <linux/fs.h> 14 + #include <linux/log2.h> 14 15 #include <linux/mount.h> 15 16 #include <linux/pipe_fs_i.h> 16 17 #include <linux/uio.h> ··· 391 390 if (!buf->len) { 392 391 buf->ops = NULL; 393 392 ops->release(pipe, buf); 394 - curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); 393 + curbuf = (curbuf + 1) & (pipe->buffers - 1); 395 394 pipe->curbuf = curbuf; 396 395 pipe->nrbufs = --bufs; 397 396 do_wakeup = 1; ··· 473 472 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ 474 473 if (pipe->nrbufs && chars != 0) { 475 474 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & 476 - (PIPE_BUFFERS-1); 475 + (pipe->buffers - 1); 477 476 struct pipe_buffer *buf = pipe->bufs + lastbuf; 478 477 const struct pipe_buf_operations *ops = buf->ops; 479 478 int offset = buf->offset + buf->len; ··· 519 518 break; 520 519 } 521 520 bufs = pipe->nrbufs; 522 - if (bufs < PIPE_BUFFERS) { 523 - int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); 521 + if (bufs < pipe->buffers) { 522 + int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1); 524 523 struct pipe_buffer *buf = pipe->bufs + newbuf; 525 524 struct page *page = pipe->tmp_page; 526 525 char *src; ··· 581 580 if (!total_len) 582 581 break; 583 582 } 584 - if (bufs < PIPE_BUFFERS) 583 + if (bufs < pipe->buffers) 585 584 continue; 586 585 if (filp->f_flags & O_NONBLOCK) { 587 586 if (!ret) ··· 641 640 nrbufs = pipe->nrbufs; 642 641 while (--nrbufs >= 0) { 643 642 count += pipe->bufs[buf].len; 644 - buf = (buf+1) & (PIPE_BUFFERS-1); 643 + buf = (buf+1) & (pipe->buffers - 1); 645 644 } 646 645 mutex_unlock(&inode->i_mutex); 647 646 ··· 672 671 } 673 672 674 673 if (filp->f_mode & FMODE_WRITE) { 675 - mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; 674 + mask |= (nrbufs < pipe->buffers) ? POLLOUT | POLLWRNORM : 0; 676 675 /* 677 676 * Most Unices do not set POLLERR for FIFOs but on Linux they 678 677 * behave exactly like pipes for poll(). ··· 878 877 879 878 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); 880 879 if (pipe) { 881 - init_waitqueue_head(&pipe->wait); 882 - pipe->r_counter = pipe->w_counter = 1; 883 - pipe->inode = inode; 880 + pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL); 881 + if (pipe->bufs) { 882 + init_waitqueue_head(&pipe->wait); 883 + pipe->r_counter = pipe->w_counter = 1; 884 + pipe->inode = inode; 885 + pipe->buffers = PIPE_DEF_BUFFERS; 886 + return pipe; 887 + } 888 + kfree(pipe); 884 889 } 885 890 886 - return pipe; 891 + return NULL; 887 892 } 888 893 889 894 void __free_pipe_info(struct pipe_inode_info *pipe) 890 895 { 891 896 int i; 892 897 893 - for (i = 0; i < PIPE_BUFFERS; i++) { 898 + for (i = 0; i < pipe->buffers; i++) { 894 899 struct pipe_buffer *buf = pipe->bufs + i; 895 900 if (buf->ops) 896 901 buf->ops->release(pipe, buf); 897 902 } 898 903 if (pipe->tmp_page) 899 904 __free_page(pipe->tmp_page); 905 + kfree(pipe->bufs); 900 906 kfree(pipe); 901 907 } 902 908 ··· 1099 1091 SYSCALL_DEFINE1(pipe, int __user *, fildes) 1100 1092 { 1101 1093 return sys_pipe2(fildes, 0); 1094 + } 1095 + 1096 + /* 1097 + * Allocate a new array of pipe buffers and copy the info over. Returns the 1098 + * pipe size if successful, or return -ERROR on error. 1099 + */ 1100 + static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) 1101 + { 1102 + struct pipe_buffer *bufs; 1103 + 1104 + /* 1105 + * Must be a power-of-2 currently 1106 + */ 1107 + if (!is_power_of_2(arg)) 1108 + return -EINVAL; 1109 + 1110 + /* 1111 + * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't 1112 + * expect a lot of shrink+grow operations, just free and allocate 1113 + * again like we would do for growing. If the pipe currently 1114 + * contains more buffers than arg, then return busy. 1115 + */ 1116 + if (arg < pipe->nrbufs) 1117 + return -EBUSY; 1118 + 1119 + bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL); 1120 + if (unlikely(!bufs)) 1121 + return -ENOMEM; 1122 + 1123 + /* 1124 + * The pipe array wraps around, so just start the new one at zero 1125 + * and adjust the indexes. 1126 + */ 1127 + if (pipe->nrbufs) { 1128 + const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1); 1129 + const unsigned int head = pipe->nrbufs - tail; 1130 + 1131 + if (head) 1132 + memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer)); 1133 + if (tail) 1134 + memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer)); 1135 + } 1136 + 1137 + pipe->curbuf = 0; 1138 + kfree(pipe->bufs); 1139 + pipe->bufs = bufs; 1140 + pipe->buffers = arg; 1141 + return arg; 1142 + } 1143 + 1144 + long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) 1145 + { 1146 + struct pipe_inode_info *pipe; 1147 + long ret; 1148 + 1149 + pipe = file->f_path.dentry->d_inode->i_pipe; 1150 + if (!pipe) 1151 + return -EBADF; 1152 + 1153 + mutex_lock(&pipe->inode->i_mutex); 1154 + 1155 + switch (cmd) { 1156 + case F_SETPIPE_SZ: 1157 + ret = pipe_set_size(pipe, arg); 1158 + break; 1159 + case F_GETPIPE_SZ: 1160 + ret = pipe->buffers; 1161 + break; 1162 + default: 1163 + ret = -EINVAL; 1164 + break; 1165 + } 1166 + 1167 + mutex_unlock(&pipe->inode->i_mutex); 1168 + return ret; 1102 1169 } 1103 1170 1104 1171 /*
+106 -47
fs/splice.c
··· 193 193 break; 194 194 } 195 195 196 - if (pipe->nrbufs < PIPE_BUFFERS) { 197 - int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); 196 + if (pipe->nrbufs < pipe->buffers) { 197 + int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); 198 198 struct pipe_buffer *buf = pipe->bufs + newbuf; 199 199 200 200 buf->page = spd->pages[page_nr]; ··· 214 214 215 215 if (!--spd->nr_pages) 216 216 break; 217 - if (pipe->nrbufs < PIPE_BUFFERS) 217 + if (pipe->nrbufs < pipe->buffers) 218 218 continue; 219 219 220 220 break; ··· 265 265 page_cache_release(spd->pages[i]); 266 266 } 267 267 268 + /* 269 + * Check if we need to grow the arrays holding pages and partial page 270 + * descriptions. 271 + */ 272 + int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) 273 + { 274 + if (pipe->buffers <= PIPE_DEF_BUFFERS) 275 + return 0; 276 + 277 + spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL); 278 + spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL); 279 + 280 + if (spd->pages && spd->partial) 281 + return 0; 282 + 283 + kfree(spd->pages); 284 + kfree(spd->partial); 285 + return -ENOMEM; 286 + } 287 + 288 + void splice_shrink_spd(struct pipe_inode_info *pipe, 289 + struct splice_pipe_desc *spd) 290 + { 291 + if (pipe->buffers <= PIPE_DEF_BUFFERS) 292 + return; 293 + 294 + kfree(spd->pages); 295 + kfree(spd->partial); 296 + } 297 + 268 298 static int 269 299 __generic_file_splice_read(struct file *in, loff_t *ppos, 270 300 struct pipe_inode_info *pipe, size_t len, ··· 302 272 { 303 273 struct address_space *mapping = in->f_mapping; 304 274 unsigned int loff, nr_pages, req_pages; 305 - struct page *pages[PIPE_BUFFERS]; 306 - struct partial_page partial[PIPE_BUFFERS]; 275 + struct page *pages[PIPE_DEF_BUFFERS]; 276 + struct partial_page partial[PIPE_DEF_BUFFERS]; 307 277 struct page *page; 308 278 pgoff_t index, end_index; 309 279 loff_t isize; ··· 316 286 .spd_release = spd_release_page, 317 287 }; 318 288 289 + if (splice_grow_spd(pipe, &spd)) 290 + return -ENOMEM; 291 + 319 292 index = *ppos >> PAGE_CACHE_SHIFT; 320 293 loff = *ppos & ~PAGE_CACHE_MASK; 321 294 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 322 - nr_pages = min(req_pages, (unsigned)PIPE_BUFFERS); 295 + nr_pages = min(req_pages, pipe->buffers); 323 296 324 297 /* 325 298 * Lookup the (hopefully) full range of pages we need. 326 299 */ 327 - spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages); 300 + spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages); 328 301 index += spd.nr_pages; 329 302 330 303 /* ··· 368 335 unlock_page(page); 369 336 } 370 337 371 - pages[spd.nr_pages++] = page; 338 + spd.pages[spd.nr_pages++] = page; 372 339 index++; 373 340 } 374 341 ··· 389 356 * this_len is the max we'll use from this page 390 357 */ 391 358 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); 392 - page = pages[page_nr]; 359 + page = spd.pages[page_nr]; 393 360 394 361 if (PageReadahead(page)) 395 362 page_cache_async_readahead(mapping, &in->f_ra, in, ··· 426 393 error = -ENOMEM; 427 394 break; 428 395 } 429 - page_cache_release(pages[page_nr]); 430 - pages[page_nr] = page; 396 + page_cache_release(spd.pages[page_nr]); 397 + spd.pages[page_nr] = page; 431 398 } 432 399 /* 433 400 * page was already under io and is now done, great ··· 484 451 len = this_len; 485 452 } 486 453 487 - partial[page_nr].offset = loff; 488 - partial[page_nr].len = this_len; 454 + spd.partial[page_nr].offset = loff; 455 + spd.partial[page_nr].len = this_len; 489 456 len -= this_len; 490 457 loff = 0; 491 458 spd.nr_pages++; ··· 497 464 * we got, 'nr_pages' is how many pages are in the map. 498 465 */ 499 466 while (page_nr < nr_pages) 500 - page_cache_release(pages[page_nr++]); 467 + page_cache_release(spd.pages[page_nr++]); 501 468 in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; 502 469 503 470 if (spd.nr_pages) 504 - return splice_to_pipe(pipe, &spd); 471 + error = splice_to_pipe(pipe, &spd); 505 472 473 + splice_shrink_spd(pipe, &spd); 506 474 return error; 507 475 } 508 476 ··· 594 560 unsigned int nr_pages; 595 561 unsigned int nr_freed; 596 562 size_t offset; 597 - struct page *pages[PIPE_BUFFERS]; 598 - struct partial_page partial[PIPE_BUFFERS]; 599 - struct iovec vec[PIPE_BUFFERS]; 563 + struct page *pages[PIPE_DEF_BUFFERS]; 564 + struct partial_page partial[PIPE_DEF_BUFFERS]; 565 + struct iovec *vec, __vec[PIPE_DEF_BUFFERS]; 600 566 pgoff_t index; 601 567 ssize_t res; 602 568 size_t this_len; ··· 610 576 .spd_release = spd_release_page, 611 577 }; 612 578 579 + if (splice_grow_spd(pipe, &spd)) 580 + return -ENOMEM; 581 + 582 + res = -ENOMEM; 583 + vec = __vec; 584 + if (pipe->buffers > PIPE_DEF_BUFFERS) { 585 + vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL); 586 + if (!vec) 587 + goto shrink_ret; 588 + } 589 + 613 590 index = *ppos >> PAGE_CACHE_SHIFT; 614 591 offset = *ppos & ~PAGE_CACHE_MASK; 615 592 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 616 593 617 - for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) { 594 + for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) { 618 595 struct page *page; 619 596 620 597 page = alloc_page(GFP_USER); ··· 636 591 this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset); 637 592 vec[i].iov_base = (void __user *) page_address(page); 638 593 vec[i].iov_len = this_len; 639 - pages[i] = page; 594 + spd.pages[i] = page; 640 595 spd.nr_pages++; 641 596 len -= this_len; 642 597 offset = 0; ··· 655 610 nr_freed = 0; 656 611 for (i = 0; i < spd.nr_pages; i++) { 657 612 this_len = min_t(size_t, vec[i].iov_len, res); 658 - partial[i].offset = 0; 659 - partial[i].len = this_len; 613 + spd.partial[i].offset = 0; 614 + spd.partial[i].len = this_len; 660 615 if (!this_len) { 661 - __free_page(pages[i]); 662 - pages[i] = NULL; 616 + __free_page(spd.pages[i]); 617 + spd.pages[i] = NULL; 663 618 nr_freed++; 664 619 } 665 620 res -= this_len; ··· 670 625 if (res > 0) 671 626 *ppos += res; 672 627 628 + shrink_ret: 629 + if (vec != __vec) 630 + kfree(vec); 631 + splice_shrink_spd(pipe, &spd); 673 632 return res; 674 633 675 634 err: 676 635 for (i = 0; i < spd.nr_pages; i++) 677 - __free_page(pages[i]); 636 + __free_page(spd.pages[i]); 678 637 679 - return error; 638 + res = error; 639 + goto shrink_ret; 680 640 } 681 641 EXPORT_SYMBOL(default_file_splice_read); 682 642 ··· 834 784 if (!buf->len) { 835 785 buf->ops = NULL; 836 786 ops->release(pipe, buf); 837 - pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); 787 + pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); 838 788 pipe->nrbufs--; 839 789 if (pipe->inode) 840 790 sd->need_wakeup = true; ··· 1261 1211 * If we did an incomplete transfer we must release 1262 1212 * the pipe buffers in question: 1263 1213 */ 1264 - for (i = 0; i < PIPE_BUFFERS; i++) { 1214 + for (i = 0; i < pipe->buffers; i++) { 1265 1215 struct pipe_buffer *buf = pipe->bufs + i; 1266 1216 1267 1217 if (buf->ops) { ··· 1421 1371 */ 1422 1372 static int get_iovec_page_array(const struct iovec __user *iov, 1423 1373 unsigned int nr_vecs, struct page **pages, 1424 - struct partial_page *partial, int aligned) 1374 + struct partial_page *partial, int aligned, 1375 + unsigned int pipe_buffers) 1425 1376 { 1426 1377 int buffers = 0, error = 0; 1427 1378 ··· 1465 1414 break; 1466 1415 1467 1416 npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1468 - if (npages > PIPE_BUFFERS - buffers) 1469 - npages = PIPE_BUFFERS - buffers; 1417 + if (npages > pipe_buffers - buffers) 1418 + npages = pipe_buffers - buffers; 1470 1419 1471 1420 error = get_user_pages_fast((unsigned long)base, npages, 1472 1421 0, &pages[buffers]); ··· 1501 1450 * or if we mapped the max number of pages that we have 1502 1451 * room for. 1503 1452 */ 1504 - if (error < npages || buffers == PIPE_BUFFERS) 1453 + if (error < npages || buffers == pipe_buffers) 1505 1454 break; 1506 1455 1507 1456 nr_vecs--; ··· 1644 1593 unsigned long nr_segs, unsigned int flags) 1645 1594 { 1646 1595 struct pipe_inode_info *pipe; 1647 - struct page *pages[PIPE_BUFFERS]; 1648 - struct partial_page partial[PIPE_BUFFERS]; 1596 + struct page *pages[PIPE_DEF_BUFFERS]; 1597 + struct partial_page partial[PIPE_DEF_BUFFERS]; 1649 1598 struct splice_pipe_desc spd = { 1650 1599 .pages = pages, 1651 1600 .partial = partial, ··· 1653 1602 .ops = &user_page_pipe_buf_ops, 1654 1603 .spd_release = spd_release_page, 1655 1604 }; 1605 + long ret; 1656 1606 1657 1607 pipe = pipe_info(file->f_path.dentry->d_inode); 1658 1608 if (!pipe) 1659 1609 return -EBADF; 1660 1610 1661 - spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, 1662 - flags & SPLICE_F_GIFT); 1663 - if (spd.nr_pages <= 0) 1664 - return spd.nr_pages; 1611 + if (splice_grow_spd(pipe, &spd)) 1612 + return -ENOMEM; 1665 1613 1666 - return splice_to_pipe(pipe, &spd); 1614 + spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages, 1615 + spd.partial, flags & SPLICE_F_GIFT, 1616 + pipe->buffers); 1617 + if (spd.nr_pages <= 0) 1618 + ret = spd.nr_pages; 1619 + else 1620 + ret = splice_to_pipe(pipe, &spd); 1621 + 1622 + splice_shrink_spd(pipe, &spd); 1623 + return ret; 1667 1624 } 1668 1625 1669 1626 /* ··· 1797 1738 * Check ->nrbufs without the inode lock first. This function 1798 1739 * is speculative anyways, so missing one is ok. 1799 1740 */ 1800 - if (pipe->nrbufs < PIPE_BUFFERS) 1741 + if (pipe->nrbufs < pipe->buffers) 1801 1742 return 0; 1802 1743 1803 1744 ret = 0; 1804 1745 pipe_lock(pipe); 1805 1746 1806 - while (pipe->nrbufs >= PIPE_BUFFERS) { 1747 + while (pipe->nrbufs >= pipe->buffers) { 1807 1748 if (!pipe->readers) { 1808 1749 send_sig(SIGPIPE, current, 0); 1809 1750 ret = -EPIPE; ··· 1869 1810 * Cannot make any progress, because either the input 1870 1811 * pipe is empty or the output pipe is full. 1871 1812 */ 1872 - if (!ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) { 1813 + if (!ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) { 1873 1814 /* Already processed some buffers, break */ 1874 1815 if (ret) 1875 1816 break; ··· 1890 1831 } 1891 1832 1892 1833 ibuf = ipipe->bufs + ipipe->curbuf; 1893 - nbuf = (opipe->curbuf + opipe->nrbufs) % PIPE_BUFFERS; 1834 + nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); 1894 1835 obuf = opipe->bufs + nbuf; 1895 1836 1896 1837 if (len >= ibuf->len) { ··· 1900 1841 *obuf = *ibuf; 1901 1842 ibuf->ops = NULL; 1902 1843 opipe->nrbufs++; 1903 - ipipe->curbuf = (ipipe->curbuf + 1) % PIPE_BUFFERS; 1844 + ipipe->curbuf = (ipipe->curbuf + 1) & (ipipe->buffers - 1); 1904 1845 ipipe->nrbufs--; 1905 1846 input_wakeup = true; 1906 1847 } else { ··· 1973 1914 * If we have iterated all input buffers or ran out of 1974 1915 * output room, break. 1975 1916 */ 1976 - if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) 1917 + if (i >= ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) 1977 1918 break; 1978 1919 1979 - ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); 1980 - nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); 1920 + ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (ipipe->buffers-1)); 1921 + nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); 1981 1922 1982 1923 /* 1983 1924 * Get a reference to this pipe buffer,
+6
include/linux/fcntl.h
··· 22 22 #define F_NOTIFY (F_LINUX_SPECIFIC_BASE+2) 23 23 24 24 /* 25 + * Set and get of pipe page size array 26 + */ 27 + #define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) 28 + #define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8) 29 + 30 + /* 25 31 * Types of directory notifications that may be requested. 26 32 */ 27 33 #define DN_ACCESS 0x00000001 /* File accessed */
+7 -4
include/linux/pipe_fs_i.h
··· 3 3 4 4 #define PIPEFS_MAGIC 0x50495045 5 5 6 - #define PIPE_BUFFERS (16) 6 + #define PIPE_DEF_BUFFERS 16 7 7 8 8 #define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ 9 9 #define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ ··· 44 44 **/ 45 45 struct pipe_inode_info { 46 46 wait_queue_head_t wait; 47 - unsigned int nrbufs, curbuf; 48 - struct page *tmp_page; 47 + unsigned int nrbufs, curbuf, buffers; 49 48 unsigned int readers; 50 49 unsigned int writers; 51 50 unsigned int waiting_writers; 52 51 unsigned int r_counter; 53 52 unsigned int w_counter; 53 + struct page *tmp_page; 54 54 struct fasync_struct *fasync_readers; 55 55 struct fasync_struct *fasync_writers; 56 56 struct inode *inode; 57 - struct pipe_buffer bufs[PIPE_BUFFERS]; 57 + struct pipe_buffer *bufs; 58 58 }; 59 59 60 60 /* ··· 153 153 int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); 154 154 int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); 155 155 void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); 156 + 157 + /* for F_SETPIPE_SZ and F_GETPIPE_SZ */ 158 + long pipe_fcntl(struct file *, unsigned int, unsigned long arg); 156 159 157 160 #endif
+7
include/linux/splice.h
··· 82 82 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, 83 83 splice_direct_actor *); 84 84 85 + /* 86 + * for dynamic pipe sizing 87 + */ 88 + extern int splice_grow_spd(struct pipe_inode_info *, struct splice_pipe_desc *); 89 + extern void splice_shrink_spd(struct pipe_inode_info *, 90 + struct splice_pipe_desc *); 91 + 85 92 #endif
+10 -5
kernel/relay.c
··· 1231 1231 size_t read_subbuf = read_start / subbuf_size; 1232 1232 size_t padding = rbuf->padding[read_subbuf]; 1233 1233 size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding; 1234 - struct page *pages[PIPE_BUFFERS]; 1235 - struct partial_page partial[PIPE_BUFFERS]; 1234 + struct page *pages[PIPE_DEF_BUFFERS]; 1235 + struct partial_page partial[PIPE_DEF_BUFFERS]; 1236 1236 struct splice_pipe_desc spd = { 1237 1237 .pages = pages, 1238 1238 .nr_pages = 0, ··· 1245 1245 1246 1246 if (rbuf->subbufs_produced == rbuf->subbufs_consumed) 1247 1247 return 0; 1248 + if (splice_grow_spd(pipe, &spd)) 1249 + return -ENOMEM; 1248 1250 1249 1251 /* 1250 1252 * Adjust read len, if longer than what is available ··· 1257 1255 subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT; 1258 1256 pidx = (read_start / PAGE_SIZE) % subbuf_pages; 1259 1257 poff = read_start & ~PAGE_MASK; 1260 - nr_pages = min_t(unsigned int, subbuf_pages, PIPE_BUFFERS); 1258 + nr_pages = min_t(unsigned int, subbuf_pages, pipe->buffers); 1261 1259 1262 1260 for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) { 1263 1261 unsigned int this_len, this_end, private; ··· 1291 1289 } 1292 1290 } 1293 1291 1292 + ret = 0; 1294 1293 if (!spd.nr_pages) 1295 - return 0; 1294 + goto out; 1296 1295 1297 1296 ret = *nonpad_ret = splice_to_pipe(pipe, &spd); 1298 1297 if (ret < 0 || ret < total_len) 1299 - return ret; 1298 + goto out; 1300 1299 1301 1300 if (read_start + ret == nonpad_end) 1302 1301 ret += padding; 1303 1302 1303 + out: 1304 + splice_shrink_spd(pipe, &spd); 1304 1305 return ret; 1305 1306 } 1306 1307
+36 -24
kernel/trace/trace.c
··· 3269 3269 size_t len, 3270 3270 unsigned int flags) 3271 3271 { 3272 - struct page *pages[PIPE_BUFFERS]; 3273 - struct partial_page partial[PIPE_BUFFERS]; 3272 + struct page *pages_def[PIPE_DEF_BUFFERS]; 3273 + struct partial_page partial_def[PIPE_DEF_BUFFERS]; 3274 3274 struct trace_iterator *iter = filp->private_data; 3275 3275 struct splice_pipe_desc spd = { 3276 - .pages = pages, 3277 - .partial = partial, 3276 + .pages = pages_def, 3277 + .partial = partial_def, 3278 3278 .nr_pages = 0, /* This gets updated below. */ 3279 3279 .flags = flags, 3280 3280 .ops = &tracing_pipe_buf_ops, ··· 3284 3284 ssize_t ret; 3285 3285 size_t rem; 3286 3286 unsigned int i; 3287 + 3288 + if (splice_grow_spd(pipe, &spd)) 3289 + return -ENOMEM; 3287 3290 3288 3291 /* copy the tracer to avoid using a global lock all around */ 3289 3292 mutex_lock(&trace_types_lock); ··· 3318 3315 trace_access_lock(iter->cpu_file); 3319 3316 3320 3317 /* Fill as many pages as possible. */ 3321 - for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { 3322 - pages[i] = alloc_page(GFP_KERNEL); 3323 - if (!pages[i]) 3318 + for (i = 0, rem = len; i < pipe->buffers && rem; i++) { 3319 + spd.pages[i] = alloc_page(GFP_KERNEL); 3320 + if (!spd.pages[i]) 3324 3321 break; 3325 3322 3326 3323 rem = tracing_fill_pipe_page(rem, iter); 3327 3324 3328 3325 /* Copy the data into the page, so we can start over. */ 3329 3326 ret = trace_seq_to_buffer(&iter->seq, 3330 - page_address(pages[i]), 3327 + page_address(spd.pages[i]), 3331 3328 iter->seq.len); 3332 3329 if (ret < 0) { 3333 - __free_page(pages[i]); 3330 + __free_page(spd.pages[i]); 3334 3331 break; 3335 3332 } 3336 - partial[i].offset = 0; 3337 - partial[i].len = iter->seq.len; 3333 + spd.partial[i].offset = 0; 3334 + spd.partial[i].len = iter->seq.len; 3338 3335 3339 3336 trace_seq_init(&iter->seq); 3340 3337 } ··· 3345 3342 3346 3343 spd.nr_pages = i; 3347 3344 3348 - return splice_to_pipe(pipe, &spd); 3345 + ret = splice_to_pipe(pipe, &spd); 3346 + out: 3347 + splice_shrink_spd(pipe, &spd); 3348 + return ret; 3349 3349 3350 3350 out_err: 3351 3351 mutex_unlock(&iter->mutex); 3352 - 3353 - return ret; 3352 + goto out; 3354 3353 } 3355 3354 3356 3355 static ssize_t ··· 3751 3746 unsigned int flags) 3752 3747 { 3753 3748 struct ftrace_buffer_info *info = file->private_data; 3754 - struct partial_page partial[PIPE_BUFFERS]; 3755 - struct page *pages[PIPE_BUFFERS]; 3749 + struct partial_page partial_def[PIPE_DEF_BUFFERS]; 3750 + struct page *pages_def[PIPE_DEF_BUFFERS]; 3756 3751 struct splice_pipe_desc spd = { 3757 - .pages = pages, 3758 - .partial = partial, 3752 + .pages = pages_def, 3753 + .partial = partial_def, 3759 3754 .flags = flags, 3760 3755 .ops = &buffer_pipe_buf_ops, 3761 3756 .spd_release = buffer_spd_release, ··· 3764 3759 int entries, size, i; 3765 3760 size_t ret; 3766 3761 3762 + if (splice_grow_spd(pipe, &spd)) 3763 + return -ENOMEM; 3764 + 3767 3765 if (*ppos & (PAGE_SIZE - 1)) { 3768 3766 WARN_ONCE(1, "Ftrace: previous read must page-align\n"); 3769 - return -EINVAL; 3767 + ret = -EINVAL; 3768 + goto out; 3770 3769 } 3771 3770 3772 3771 if (len & (PAGE_SIZE - 1)) { 3773 3772 WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); 3774 - if (len < PAGE_SIZE) 3775 - return -EINVAL; 3773 + if (len < PAGE_SIZE) { 3774 + ret = -EINVAL; 3775 + goto out; 3776 + } 3776 3777 len &= PAGE_MASK; 3777 3778 } 3778 3779 3779 3780 trace_access_lock(info->cpu); 3780 3781 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3781 3782 3782 - for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { 3783 + for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) { 3783 3784 struct page *page; 3784 3785 int r; 3785 3786 ··· 3840 3829 else 3841 3830 ret = 0; 3842 3831 /* TODO: block */ 3843 - return ret; 3832 + goto out; 3844 3833 } 3845 3834 3846 3835 ret = splice_to_pipe(pipe, &spd); 3847 - 3836 + splice_shrink_spd(pipe, &spd); 3837 + out: 3848 3838 return ret; 3849 3839 } 3850 3840
+21 -17
net/core/skbuff.c
··· 1417 1417 /* 1418 1418 * Fill page/offset/length into spd, if it can hold more pages. 1419 1419 */ 1420 - static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, 1420 + static inline int spd_fill_page(struct splice_pipe_desc *spd, 1421 + struct pipe_inode_info *pipe, struct page *page, 1421 1422 unsigned int *len, unsigned int offset, 1422 1423 struct sk_buff *skb, int linear, 1423 1424 struct sock *sk) 1424 1425 { 1425 - if (unlikely(spd->nr_pages == PIPE_BUFFERS)) 1426 + if (unlikely(spd->nr_pages == pipe->buffers)) 1426 1427 return 1; 1427 1428 1428 1429 if (linear) { ··· 1459 1458 unsigned int plen, unsigned int *off, 1460 1459 unsigned int *len, struct sk_buff *skb, 1461 1460 struct splice_pipe_desc *spd, int linear, 1462 - struct sock *sk) 1461 + struct sock *sk, 1462 + struct pipe_inode_info *pipe) 1463 1463 { 1464 1464 if (!*len) 1465 1465 return 1; ··· 1483 1481 /* the linear region may spread across several pages */ 1484 1482 flen = min_t(unsigned int, flen, PAGE_SIZE - poff); 1485 1483 1486 - if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk)) 1484 + if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk)) 1487 1485 return 1; 1488 1486 1489 1487 __segment_seek(&page, &poff, &plen, flen); ··· 1498 1496 * Map linear and fragment data from the skb to spd. It reports failure if the 1499 1497 * pipe is full or if we already spliced the requested length. 1500 1498 */ 1501 - static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, 1502 - unsigned int *len, struct splice_pipe_desc *spd, 1503 - struct sock *sk) 1499 + static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, 1500 + unsigned int *offset, unsigned int *len, 1501 + struct splice_pipe_desc *spd, struct sock *sk) 1504 1502 { 1505 1503 int seg; 1506 1504 ··· 1510 1508 if (__splice_segment(virt_to_page(skb->data), 1511 1509 (unsigned long) skb->data & (PAGE_SIZE - 1), 1512 1510 skb_headlen(skb), 1513 - offset, len, skb, spd, 1, sk)) 1511 + offset, len, skb, spd, 1, sk, pipe)) 1514 1512 return 1; 1515 1513 1516 1514 /* ··· 1520 1518 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; 1521 1519 1522 1520 if (__splice_segment(f->page, f->page_offset, f->size, 1523 - offset, len, skb, spd, 0, sk)) 1521 + offset, len, skb, spd, 0, sk, pipe)) 1524 1522 return 1; 1525 1523 } 1526 1524 ··· 1537 1535 struct pipe_inode_info *pipe, unsigned int tlen, 1538 1536 unsigned int flags) 1539 1537 { 1540 - struct partial_page partial[PIPE_BUFFERS]; 1541 - struct page *pages[PIPE_BUFFERS]; 1538 + struct partial_page partial[PIPE_DEF_BUFFERS]; 1539 + struct page *pages[PIPE_DEF_BUFFERS]; 1542 1540 struct splice_pipe_desc spd = { 1543 1541 .pages = pages, 1544 1542 .partial = partial, ··· 1548 1546 }; 1549 1547 struct sk_buff *frag_iter; 1550 1548 struct sock *sk = skb->sk; 1549 + int ret = 0; 1550 + 1551 + if (splice_grow_spd(pipe, &spd)) 1552 + return -ENOMEM; 1551 1553 1552 1554 /* 1553 1555 * __skb_splice_bits() only fails if the output has no room left, 1554 1556 * so no point in going over the frag_list for the error case. 1555 1557 */ 1556 - if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk)) 1558 + if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk)) 1557 1559 goto done; 1558 1560 else if (!tlen) 1559 1561 goto done; ··· 1568 1562 skb_walk_frags(skb, frag_iter) { 1569 1563 if (!tlen) 1570 1564 break; 1571 - if (__skb_splice_bits(frag_iter, &offset, &tlen, &spd, sk)) 1565 + if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk)) 1572 1566 break; 1573 1567 } 1574 1568 1575 1569 done: 1576 1570 if (spd.nr_pages) { 1577 - int ret; 1578 - 1579 1571 /* 1580 1572 * Drop the socket lock, otherwise we have reverse 1581 1573 * locking dependencies between sk_lock and i_mutex ··· 1586 1582 release_sock(sk); 1587 1583 ret = splice_to_pipe(pipe, &spd); 1588 1584 lock_sock(sk); 1589 - return ret; 1590 1585 } 1591 1586 1592 - return 0; 1587 + splice_shrink_spd(pipe, &spd); 1588 + return ret; 1593 1589 } 1594 1590 1595 1591 /**