Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at 17431928194b36a0f88082df875e2e036da7fddf 1234 lines 27 kB view raw
1/* 2 * linux/fs/pipe.c 3 * 4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds 5 */ 6 7#include <linux/mm.h> 8#include <linux/file.h> 9#include <linux/poll.h> 10#include <linux/slab.h> 11#include <linux/module.h> 12#include <linux/init.h> 13#include <linux/fs.h> 14#include <linux/log2.h> 15#include <linux/mount.h> 16#include <linux/pipe_fs_i.h> 17#include <linux/uio.h> 18#include <linux/highmem.h> 19#include <linux/pagemap.h> 20#include <linux/audit.h> 21#include <linux/syscalls.h> 22#include <linux/fcntl.h> 23 24#include <asm/uaccess.h> 25#include <asm/ioctls.h> 26 27/* 28 * The max size that a non-root user is allowed to grow the pipe. Can 29 * be set by root in /proc/sys/fs/pipe-max-pages 30 */ 31unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16; 32 33/* 34 * We use a start+len construction, which provides full use of the 35 * allocated memory. 36 * -- Florian Coosmann (FGC) 37 * 38 * Reads with count = 0 should always return 0. 39 * -- Julian Bradfield 1999-06-07. 40 * 41 * FIFOs and Pipes now generate SIGIO for both readers and writers. 42 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16 43 * 44 * pipe_read & write cleanup 45 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 46 */ 47 48static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) 49{ 50 if (pipe->inode) 51 mutex_lock_nested(&pipe->inode->i_mutex, subclass); 52} 53 54void pipe_lock(struct pipe_inode_info *pipe) 55{ 56 /* 57 * pipe_lock() nests non-pipe inode locks (for writing to a file) 58 */ 59 pipe_lock_nested(pipe, I_MUTEX_PARENT); 60} 61EXPORT_SYMBOL(pipe_lock); 62 63void pipe_unlock(struct pipe_inode_info *pipe) 64{ 65 if (pipe->inode) 66 mutex_unlock(&pipe->inode->i_mutex); 67} 68EXPORT_SYMBOL(pipe_unlock); 69 70void pipe_double_lock(struct pipe_inode_info *pipe1, 71 struct pipe_inode_info *pipe2) 72{ 73 BUG_ON(pipe1 == pipe2); 74 75 if (pipe1 < pipe2) { 76 pipe_lock_nested(pipe1, I_MUTEX_PARENT); 77 pipe_lock_nested(pipe2, I_MUTEX_CHILD); 78 } else { 79 pipe_lock_nested(pipe2, I_MUTEX_PARENT); 80 pipe_lock_nested(pipe1, I_MUTEX_CHILD); 81 } 82} 83 84/* Drop the inode semaphore and wait for a pipe event, atomically */ 85void pipe_wait(struct pipe_inode_info *pipe) 86{ 87 DEFINE_WAIT(wait); 88 89 /* 90 * Pipes are system-local resources, so sleeping on them 91 * is considered a noninteractive wait: 92 */ 93 prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE); 94 pipe_unlock(pipe); 95 schedule(); 96 finish_wait(&pipe->wait, &wait); 97 pipe_lock(pipe); 98} 99 100static int 101pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, 102 int atomic) 103{ 104 unsigned long copy; 105 106 while (len > 0) { 107 while (!iov->iov_len) 108 iov++; 109 copy = min_t(unsigned long, len, iov->iov_len); 110 111 if (atomic) { 112 if (__copy_from_user_inatomic(to, iov->iov_base, copy)) 113 return -EFAULT; 114 } else { 115 if (copy_from_user(to, iov->iov_base, copy)) 116 return -EFAULT; 117 } 118 to += copy; 119 len -= copy; 120 iov->iov_base += copy; 121 iov->iov_len -= copy; 122 } 123 return 0; 124} 125 126static int 127pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len, 128 int atomic) 129{ 130 unsigned long copy; 131 132 while (len > 0) { 133 while (!iov->iov_len) 134 iov++; 135 copy = min_t(unsigned long, len, iov->iov_len); 136 137 if (atomic) { 138 if (__copy_to_user_inatomic(iov->iov_base, from, copy)) 139 return -EFAULT; 140 } else { 141 if (copy_to_user(iov->iov_base, from, copy)) 142 return -EFAULT; 143 } 144 from += copy; 145 len -= copy; 146 iov->iov_base += copy; 147 iov->iov_len -= copy; 148 } 149 return 0; 150} 151 152/* 153 * Attempt to pre-fault in the user memory, so we can use atomic copies. 154 * Returns the number of bytes not faulted in. 155 */ 156static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len) 157{ 158 while (!iov->iov_len) 159 iov++; 160 161 while (len > 0) { 162 unsigned long this_len; 163 164 this_len = min_t(unsigned long, len, iov->iov_len); 165 if (fault_in_pages_writeable(iov->iov_base, this_len)) 166 break; 167 168 len -= this_len; 169 iov++; 170 } 171 172 return len; 173} 174 175/* 176 * Pre-fault in the user memory, so we can use atomic copies. 177 */ 178static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len) 179{ 180 while (!iov->iov_len) 181 iov++; 182 183 while (len > 0) { 184 unsigned long this_len; 185 186 this_len = min_t(unsigned long, len, iov->iov_len); 187 fault_in_pages_readable(iov->iov_base, this_len); 188 len -= this_len; 189 iov++; 190 } 191} 192 193static void anon_pipe_buf_release(struct pipe_inode_info *pipe, 194 struct pipe_buffer *buf) 195{ 196 struct page *page = buf->page; 197 198 /* 199 * If nobody else uses this page, and we don't already have a 200 * temporary page, let's keep track of it as a one-deep 201 * allocation cache. (Otherwise just release our reference to it) 202 */ 203 if (page_count(page) == 1 && !pipe->tmp_page) 204 pipe->tmp_page = page; 205 else 206 page_cache_release(page); 207} 208 209/** 210 * generic_pipe_buf_map - virtually map a pipe buffer 211 * @pipe: the pipe that the buffer belongs to 212 * @buf: the buffer that should be mapped 213 * @atomic: whether to use an atomic map 214 * 215 * Description: 216 * This function returns a kernel virtual address mapping for the 217 * pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided 218 * and the caller has to be careful not to fault before calling 219 * the unmap function. 220 * 221 * Note that this function occupies KM_USER0 if @atomic != 0. 222 */ 223void *generic_pipe_buf_map(struct pipe_inode_info *pipe, 224 struct pipe_buffer *buf, int atomic) 225{ 226 if (atomic) { 227 buf->flags |= PIPE_BUF_FLAG_ATOMIC; 228 return kmap_atomic(buf->page, KM_USER0); 229 } 230 231 return kmap(buf->page); 232} 233 234/** 235 * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer 236 * @pipe: the pipe that the buffer belongs to 237 * @buf: the buffer that should be unmapped 238 * @map_data: the data that the mapping function returned 239 * 240 * Description: 241 * This function undoes the mapping that ->map() provided. 242 */ 243void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, 244 struct pipe_buffer *buf, void *map_data) 245{ 246 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { 247 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; 248 kunmap_atomic(map_data, KM_USER0); 249 } else 250 kunmap(buf->page); 251} 252 253/** 254 * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer 255 * @pipe: the pipe that the buffer belongs to 256 * @buf: the buffer to attempt to steal 257 * 258 * Description: 259 * This function attempts to steal the &struct page attached to 260 * @buf. If successful, this function returns 0 and returns with 261 * the page locked. The caller may then reuse the page for whatever 262 * he wishes; the typical use is insertion into a different file 263 * page cache. 264 */ 265int generic_pipe_buf_steal(struct pipe_inode_info *pipe, 266 struct pipe_buffer *buf) 267{ 268 struct page *page = buf->page; 269 270 /* 271 * A reference of one is golden, that means that the owner of this 272 * page is the only one holding a reference to it. lock the page 273 * and return OK. 274 */ 275 if (page_count(page) == 1) { 276 lock_page(page); 277 return 0; 278 } 279 280 return 1; 281} 282 283/** 284 * generic_pipe_buf_get - get a reference to a &struct pipe_buffer 285 * @pipe: the pipe that the buffer belongs to 286 * @buf: the buffer to get a reference to 287 * 288 * Description: 289 * This function grabs an extra reference to @buf. It's used in 290 * in the tee() system call, when we duplicate the buffers in one 291 * pipe into another. 292 */ 293void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) 294{ 295 page_cache_get(buf->page); 296} 297 298/** 299 * generic_pipe_buf_confirm - verify contents of the pipe buffer 300 * @info: the pipe that the buffer belongs to 301 * @buf: the buffer to confirm 302 * 303 * Description: 304 * This function does nothing, because the generic pipe code uses 305 * pages that are always good when inserted into the pipe. 306 */ 307int generic_pipe_buf_confirm(struct pipe_inode_info *info, 308 struct pipe_buffer *buf) 309{ 310 return 0; 311} 312 313/** 314 * generic_pipe_buf_release - put a reference to a &struct pipe_buffer 315 * @pipe: the pipe that the buffer belongs to 316 * @buf: the buffer to put a reference to 317 * 318 * Description: 319 * This function releases a reference to @buf. 320 */ 321void generic_pipe_buf_release(struct pipe_inode_info *pipe, 322 struct pipe_buffer *buf) 323{ 324 page_cache_release(buf->page); 325} 326 327static const struct pipe_buf_operations anon_pipe_buf_ops = { 328 .can_merge = 1, 329 .map = generic_pipe_buf_map, 330 .unmap = generic_pipe_buf_unmap, 331 .confirm = generic_pipe_buf_confirm, 332 .release = anon_pipe_buf_release, 333 .steal = generic_pipe_buf_steal, 334 .get = generic_pipe_buf_get, 335}; 336 337static ssize_t 338pipe_read(struct kiocb *iocb, const struct iovec *_iov, 339 unsigned long nr_segs, loff_t pos) 340{ 341 struct file *filp = iocb->ki_filp; 342 struct inode *inode = filp->f_path.dentry->d_inode; 343 struct pipe_inode_info *pipe; 344 int do_wakeup; 345 ssize_t ret; 346 struct iovec *iov = (struct iovec *)_iov; 347 size_t total_len; 348 349 total_len = iov_length(iov, nr_segs); 350 /* Null read succeeds. */ 351 if (unlikely(total_len == 0)) 352 return 0; 353 354 do_wakeup = 0; 355 ret = 0; 356 mutex_lock(&inode->i_mutex); 357 pipe = inode->i_pipe; 358 for (;;) { 359 int bufs = pipe->nrbufs; 360 if (bufs) { 361 int curbuf = pipe->curbuf; 362 struct pipe_buffer *buf = pipe->bufs + curbuf; 363 const struct pipe_buf_operations *ops = buf->ops; 364 void *addr; 365 size_t chars = buf->len; 366 int error, atomic; 367 368 if (chars > total_len) 369 chars = total_len; 370 371 error = ops->confirm(pipe, buf); 372 if (error) { 373 if (!ret) 374 error = ret; 375 break; 376 } 377 378 atomic = !iov_fault_in_pages_write(iov, chars); 379redo: 380 addr = ops->map(pipe, buf, atomic); 381 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); 382 ops->unmap(pipe, buf, addr); 383 if (unlikely(error)) { 384 /* 385 * Just retry with the slow path if we failed. 386 */ 387 if (atomic) { 388 atomic = 0; 389 goto redo; 390 } 391 if (!ret) 392 ret = error; 393 break; 394 } 395 ret += chars; 396 buf->offset += chars; 397 buf->len -= chars; 398 if (!buf->len) { 399 buf->ops = NULL; 400 ops->release(pipe, buf); 401 curbuf = (curbuf + 1) & (pipe->buffers - 1); 402 pipe->curbuf = curbuf; 403 pipe->nrbufs = --bufs; 404 do_wakeup = 1; 405 } 406 total_len -= chars; 407 if (!total_len) 408 break; /* common path: read succeeded */ 409 } 410 if (bufs) /* More to do? */ 411 continue; 412 if (!pipe->writers) 413 break; 414 if (!pipe->waiting_writers) { 415 /* syscall merging: Usually we must not sleep 416 * if O_NONBLOCK is set, or if we got some data. 417 * But if a writer sleeps in kernel space, then 418 * we can wait for that data without violating POSIX. 419 */ 420 if (ret) 421 break; 422 if (filp->f_flags & O_NONBLOCK) { 423 ret = -EAGAIN; 424 break; 425 } 426 } 427 if (signal_pending(current)) { 428 if (!ret) 429 ret = -ERESTARTSYS; 430 break; 431 } 432 if (do_wakeup) { 433 wake_up_interruptible_sync(&pipe->wait); 434 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 435 } 436 pipe_wait(pipe); 437 } 438 mutex_unlock(&inode->i_mutex); 439 440 /* Signal writers asynchronously that there is more room. */ 441 if (do_wakeup) { 442 wake_up_interruptible_sync(&pipe->wait); 443 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 444 } 445 if (ret > 0) 446 file_accessed(filp); 447 return ret; 448} 449 450static ssize_t 451pipe_write(struct kiocb *iocb, const struct iovec *_iov, 452 unsigned long nr_segs, loff_t ppos) 453{ 454 struct file *filp = iocb->ki_filp; 455 struct inode *inode = filp->f_path.dentry->d_inode; 456 struct pipe_inode_info *pipe; 457 ssize_t ret; 458 int do_wakeup; 459 struct iovec *iov = (struct iovec *)_iov; 460 size_t total_len; 461 ssize_t chars; 462 463 total_len = iov_length(iov, nr_segs); 464 /* Null write succeeds. */ 465 if (unlikely(total_len == 0)) 466 return 0; 467 468 do_wakeup = 0; 469 ret = 0; 470 mutex_lock(&inode->i_mutex); 471 pipe = inode->i_pipe; 472 473 if (!pipe->readers) { 474 send_sig(SIGPIPE, current, 0); 475 ret = -EPIPE; 476 goto out; 477 } 478 479 /* We try to merge small writes */ 480 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ 481 if (pipe->nrbufs && chars != 0) { 482 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & 483 (pipe->buffers - 1); 484 struct pipe_buffer *buf = pipe->bufs + lastbuf; 485 const struct pipe_buf_operations *ops = buf->ops; 486 int offset = buf->offset + buf->len; 487 488 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 489 int error, atomic = 1; 490 void *addr; 491 492 error = ops->confirm(pipe, buf); 493 if (error) 494 goto out; 495 496 iov_fault_in_pages_read(iov, chars); 497redo1: 498 addr = ops->map(pipe, buf, atomic); 499 error = pipe_iov_copy_from_user(offset + addr, iov, 500 chars, atomic); 501 ops->unmap(pipe, buf, addr); 502 ret = error; 503 do_wakeup = 1; 504 if (error) { 505 if (atomic) { 506 atomic = 0; 507 goto redo1; 508 } 509 goto out; 510 } 511 buf->len += chars; 512 total_len -= chars; 513 ret = chars; 514 if (!total_len) 515 goto out; 516 } 517 } 518 519 for (;;) { 520 int bufs; 521 522 if (!pipe->readers) { 523 send_sig(SIGPIPE, current, 0); 524 if (!ret) 525 ret = -EPIPE; 526 break; 527 } 528 bufs = pipe->nrbufs; 529 if (bufs < pipe->buffers) { 530 int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1); 531 struct pipe_buffer *buf = pipe->bufs + newbuf; 532 struct page *page = pipe->tmp_page; 533 char *src; 534 int error, atomic = 1; 535 536 if (!page) { 537 page = alloc_page(GFP_HIGHUSER); 538 if (unlikely(!page)) { 539 ret = ret ? : -ENOMEM; 540 break; 541 } 542 pipe->tmp_page = page; 543 } 544 /* Always wake up, even if the copy fails. Otherwise 545 * we lock up (O_NONBLOCK-)readers that sleep due to 546 * syscall merging. 547 * FIXME! Is this really true? 548 */ 549 do_wakeup = 1; 550 chars = PAGE_SIZE; 551 if (chars > total_len) 552 chars = total_len; 553 554 iov_fault_in_pages_read(iov, chars); 555redo2: 556 if (atomic) 557 src = kmap_atomic(page, KM_USER0); 558 else 559 src = kmap(page); 560 561 error = pipe_iov_copy_from_user(src, iov, chars, 562 atomic); 563 if (atomic) 564 kunmap_atomic(src, KM_USER0); 565 else 566 kunmap(page); 567 568 if (unlikely(error)) { 569 if (atomic) { 570 atomic = 0; 571 goto redo2; 572 } 573 if (!ret) 574 ret = error; 575 break; 576 } 577 ret += chars; 578 579 /* Insert it into the buffer array */ 580 buf->page = page; 581 buf->ops = &anon_pipe_buf_ops; 582 buf->offset = 0; 583 buf->len = chars; 584 pipe->nrbufs = ++bufs; 585 pipe->tmp_page = NULL; 586 587 total_len -= chars; 588 if (!total_len) 589 break; 590 } 591 if (bufs < pipe->buffers) 592 continue; 593 if (filp->f_flags & O_NONBLOCK) { 594 if (!ret) 595 ret = -EAGAIN; 596 break; 597 } 598 if (signal_pending(current)) { 599 if (!ret) 600 ret = -ERESTARTSYS; 601 break; 602 } 603 if (do_wakeup) { 604 wake_up_interruptible_sync(&pipe->wait); 605 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 606 do_wakeup = 0; 607 } 608 pipe->waiting_writers++; 609 pipe_wait(pipe); 610 pipe->waiting_writers--; 611 } 612out: 613 mutex_unlock(&inode->i_mutex); 614 if (do_wakeup) { 615 wake_up_interruptible_sync(&pipe->wait); 616 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 617 } 618 if (ret > 0) 619 file_update_time(filp); 620 return ret; 621} 622 623static ssize_t 624bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos) 625{ 626 return -EBADF; 627} 628 629static ssize_t 630bad_pipe_w(struct file *filp, const char __user *buf, size_t count, 631 loff_t *ppos) 632{ 633 return -EBADF; 634} 635 636static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 637{ 638 struct inode *inode = filp->f_path.dentry->d_inode; 639 struct pipe_inode_info *pipe; 640 int count, buf, nrbufs; 641 642 switch (cmd) { 643 case FIONREAD: 644 mutex_lock(&inode->i_mutex); 645 pipe = inode->i_pipe; 646 count = 0; 647 buf = pipe->curbuf; 648 nrbufs = pipe->nrbufs; 649 while (--nrbufs >= 0) { 650 count += pipe->bufs[buf].len; 651 buf = (buf+1) & (pipe->buffers - 1); 652 } 653 mutex_unlock(&inode->i_mutex); 654 655 return put_user(count, (int __user *)arg); 656 default: 657 return -EINVAL; 658 } 659} 660 661/* No kernel lock held - fine */ 662static unsigned int 663pipe_poll(struct file *filp, poll_table *wait) 664{ 665 unsigned int mask; 666 struct inode *inode = filp->f_path.dentry->d_inode; 667 struct pipe_inode_info *pipe = inode->i_pipe; 668 int nrbufs; 669 670 poll_wait(filp, &pipe->wait, wait); 671 672 /* Reading only -- no need for acquiring the semaphore. */ 673 nrbufs = pipe->nrbufs; 674 mask = 0; 675 if (filp->f_mode & FMODE_READ) { 676 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0; 677 if (!pipe->writers && filp->f_version != pipe->w_counter) 678 mask |= POLLHUP; 679 } 680 681 if (filp->f_mode & FMODE_WRITE) { 682 mask |= (nrbufs < pipe->buffers) ? POLLOUT | POLLWRNORM : 0; 683 /* 684 * Most Unices do not set POLLERR for FIFOs but on Linux they 685 * behave exactly like pipes for poll(). 686 */ 687 if (!pipe->readers) 688 mask |= POLLERR; 689 } 690 691 return mask; 692} 693 694static int 695pipe_release(struct inode *inode, int decr, int decw) 696{ 697 struct pipe_inode_info *pipe; 698 699 mutex_lock(&inode->i_mutex); 700 pipe = inode->i_pipe; 701 pipe->readers -= decr; 702 pipe->writers -= decw; 703 704 if (!pipe->readers && !pipe->writers) { 705 free_pipe_info(inode); 706 } else { 707 wake_up_interruptible_sync(&pipe->wait); 708 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 709 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 710 } 711 mutex_unlock(&inode->i_mutex); 712 713 return 0; 714} 715 716static int 717pipe_read_fasync(int fd, struct file *filp, int on) 718{ 719 struct inode *inode = filp->f_path.dentry->d_inode; 720 int retval; 721 722 mutex_lock(&inode->i_mutex); 723 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers); 724 mutex_unlock(&inode->i_mutex); 725 726 return retval; 727} 728 729 730static int 731pipe_write_fasync(int fd, struct file *filp, int on) 732{ 733 struct inode *inode = filp->f_path.dentry->d_inode; 734 int retval; 735 736 mutex_lock(&inode->i_mutex); 737 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers); 738 mutex_unlock(&inode->i_mutex); 739 740 return retval; 741} 742 743 744static int 745pipe_rdwr_fasync(int fd, struct file *filp, int on) 746{ 747 struct inode *inode = filp->f_path.dentry->d_inode; 748 struct pipe_inode_info *pipe = inode->i_pipe; 749 int retval; 750 751 mutex_lock(&inode->i_mutex); 752 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); 753 if (retval >= 0) { 754 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); 755 if (retval < 0) /* this can happen only if on == T */ 756 fasync_helper(-1, filp, 0, &pipe->fasync_readers); 757 } 758 mutex_unlock(&inode->i_mutex); 759 return retval; 760} 761 762 763static int 764pipe_read_release(struct inode *inode, struct file *filp) 765{ 766 return pipe_release(inode, 1, 0); 767} 768 769static int 770pipe_write_release(struct inode *inode, struct file *filp) 771{ 772 return pipe_release(inode, 0, 1); 773} 774 775static int 776pipe_rdwr_release(struct inode *inode, struct file *filp) 777{ 778 int decr, decw; 779 780 decr = (filp->f_mode & FMODE_READ) != 0; 781 decw = (filp->f_mode & FMODE_WRITE) != 0; 782 return pipe_release(inode, decr, decw); 783} 784 785static int 786pipe_read_open(struct inode *inode, struct file *filp) 787{ 788 int ret = -ENOENT; 789 790 mutex_lock(&inode->i_mutex); 791 792 if (inode->i_pipe) { 793 ret = 0; 794 inode->i_pipe->readers++; 795 } 796 797 mutex_unlock(&inode->i_mutex); 798 799 return ret; 800} 801 802static int 803pipe_write_open(struct inode *inode, struct file *filp) 804{ 805 int ret = -ENOENT; 806 807 mutex_lock(&inode->i_mutex); 808 809 if (inode->i_pipe) { 810 ret = 0; 811 inode->i_pipe->writers++; 812 } 813 814 mutex_unlock(&inode->i_mutex); 815 816 return ret; 817} 818 819static int 820pipe_rdwr_open(struct inode *inode, struct file *filp) 821{ 822 int ret = -ENOENT; 823 824 mutex_lock(&inode->i_mutex); 825 826 if (inode->i_pipe) { 827 ret = 0; 828 if (filp->f_mode & FMODE_READ) 829 inode->i_pipe->readers++; 830 if (filp->f_mode & FMODE_WRITE) 831 inode->i_pipe->writers++; 832 } 833 834 mutex_unlock(&inode->i_mutex); 835 836 return ret; 837} 838 839/* 840 * The file_operations structs are not static because they 841 * are also used in linux/fs/fifo.c to do operations on FIFOs. 842 * 843 * Pipes reuse fifos' file_operations structs. 844 */ 845const struct file_operations read_pipefifo_fops = { 846 .llseek = no_llseek, 847 .read = do_sync_read, 848 .aio_read = pipe_read, 849 .write = bad_pipe_w, 850 .poll = pipe_poll, 851 .unlocked_ioctl = pipe_ioctl, 852 .open = pipe_read_open, 853 .release = pipe_read_release, 854 .fasync = pipe_read_fasync, 855}; 856 857const struct file_operations write_pipefifo_fops = { 858 .llseek = no_llseek, 859 .read = bad_pipe_r, 860 .write = do_sync_write, 861 .aio_write = pipe_write, 862 .poll = pipe_poll, 863 .unlocked_ioctl = pipe_ioctl, 864 .open = pipe_write_open, 865 .release = pipe_write_release, 866 .fasync = pipe_write_fasync, 867}; 868 869const struct file_operations rdwr_pipefifo_fops = { 870 .llseek = no_llseek, 871 .read = do_sync_read, 872 .aio_read = pipe_read, 873 .write = do_sync_write, 874 .aio_write = pipe_write, 875 .poll = pipe_poll, 876 .unlocked_ioctl = pipe_ioctl, 877 .open = pipe_rdwr_open, 878 .release = pipe_rdwr_release, 879 .fasync = pipe_rdwr_fasync, 880}; 881 882struct pipe_inode_info * alloc_pipe_info(struct inode *inode) 883{ 884 struct pipe_inode_info *pipe; 885 886 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); 887 if (pipe) { 888 pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL); 889 if (pipe->bufs) { 890 init_waitqueue_head(&pipe->wait); 891 pipe->r_counter = pipe->w_counter = 1; 892 pipe->inode = inode; 893 pipe->buffers = PIPE_DEF_BUFFERS; 894 return pipe; 895 } 896 kfree(pipe); 897 } 898 899 return NULL; 900} 901 902void __free_pipe_info(struct pipe_inode_info *pipe) 903{ 904 int i; 905 906 for (i = 0; i < pipe->buffers; i++) { 907 struct pipe_buffer *buf = pipe->bufs + i; 908 if (buf->ops) 909 buf->ops->release(pipe, buf); 910 } 911 if (pipe->tmp_page) 912 __free_page(pipe->tmp_page); 913 kfree(pipe->bufs); 914 kfree(pipe); 915} 916 917void free_pipe_info(struct inode *inode) 918{ 919 __free_pipe_info(inode->i_pipe); 920 inode->i_pipe = NULL; 921} 922 923static struct vfsmount *pipe_mnt __read_mostly; 924 925/* 926 * pipefs_dname() is called from d_path(). 927 */ 928static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen) 929{ 930 return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]", 931 dentry->d_inode->i_ino); 932} 933 934static const struct dentry_operations pipefs_dentry_operations = { 935 .d_dname = pipefs_dname, 936}; 937 938static struct inode * get_pipe_inode(void) 939{ 940 struct inode *inode = new_inode(pipe_mnt->mnt_sb); 941 struct pipe_inode_info *pipe; 942 943 if (!inode) 944 goto fail_inode; 945 946 pipe = alloc_pipe_info(inode); 947 if (!pipe) 948 goto fail_iput; 949 inode->i_pipe = pipe; 950 951 pipe->readers = pipe->writers = 1; 952 inode->i_fop = &rdwr_pipefifo_fops; 953 954 /* 955 * Mark the inode dirty from the very beginning, 956 * that way it will never be moved to the dirty 957 * list because "mark_inode_dirty()" will think 958 * that it already _is_ on the dirty list. 959 */ 960 inode->i_state = I_DIRTY; 961 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; 962 inode->i_uid = current_fsuid(); 963 inode->i_gid = current_fsgid(); 964 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 965 966 return inode; 967 968fail_iput: 969 iput(inode); 970 971fail_inode: 972 return NULL; 973} 974 975struct file *create_write_pipe(int flags) 976{ 977 int err; 978 struct inode *inode; 979 struct file *f; 980 struct path path; 981 struct qstr name = { .name = "" }; 982 983 err = -ENFILE; 984 inode = get_pipe_inode(); 985 if (!inode) 986 goto err; 987 988 err = -ENOMEM; 989 path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); 990 if (!path.dentry) 991 goto err_inode; 992 path.mnt = mntget(pipe_mnt); 993 994 path.dentry->d_op = &pipefs_dentry_operations; 995 d_instantiate(path.dentry, inode); 996 997 err = -ENFILE; 998 f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops); 999 if (!f) 1000 goto err_dentry; 1001 f->f_mapping = inode->i_mapping; 1002 1003 f->f_flags = O_WRONLY | (flags & O_NONBLOCK); 1004 f->f_version = 0; 1005 1006 return f; 1007 1008 err_dentry: 1009 free_pipe_info(inode); 1010 path_put(&path); 1011 return ERR_PTR(err); 1012 1013 err_inode: 1014 free_pipe_info(inode); 1015 iput(inode); 1016 err: 1017 return ERR_PTR(err); 1018} 1019 1020void free_write_pipe(struct file *f) 1021{ 1022 free_pipe_info(f->f_dentry->d_inode); 1023 path_put(&f->f_path); 1024 put_filp(f); 1025} 1026 1027struct file *create_read_pipe(struct file *wrf, int flags) 1028{ 1029 /* Grab pipe from the writer */ 1030 struct file *f = alloc_file(&wrf->f_path, FMODE_READ, 1031 &read_pipefifo_fops); 1032 if (!f) 1033 return ERR_PTR(-ENFILE); 1034 1035 path_get(&wrf->f_path); 1036 f->f_flags = O_RDONLY | (flags & O_NONBLOCK); 1037 1038 return f; 1039} 1040 1041int do_pipe_flags(int *fd, int flags) 1042{ 1043 struct file *fw, *fr; 1044 int error; 1045 int fdw, fdr; 1046 1047 if (flags & ~(O_CLOEXEC | O_NONBLOCK)) 1048 return -EINVAL; 1049 1050 fw = create_write_pipe(flags); 1051 if (IS_ERR(fw)) 1052 return PTR_ERR(fw); 1053 fr = create_read_pipe(fw, flags); 1054 error = PTR_ERR(fr); 1055 if (IS_ERR(fr)) 1056 goto err_write_pipe; 1057 1058 error = get_unused_fd_flags(flags); 1059 if (error < 0) 1060 goto err_read_pipe; 1061 fdr = error; 1062 1063 error = get_unused_fd_flags(flags); 1064 if (error < 0) 1065 goto err_fdr; 1066 fdw = error; 1067 1068 audit_fd_pair(fdr, fdw); 1069 fd_install(fdr, fr); 1070 fd_install(fdw, fw); 1071 fd[0] = fdr; 1072 fd[1] = fdw; 1073 1074 return 0; 1075 1076 err_fdr: 1077 put_unused_fd(fdr); 1078 err_read_pipe: 1079 path_put(&fr->f_path); 1080 put_filp(fr); 1081 err_write_pipe: 1082 free_write_pipe(fw); 1083 return error; 1084} 1085 1086/* 1087 * sys_pipe() is the normal C calling standard for creating 1088 * a pipe. It's not the way Unix traditionally does this, though. 1089 */ 1090SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) 1091{ 1092 int fd[2]; 1093 int error; 1094 1095 error = do_pipe_flags(fd, flags); 1096 if (!error) { 1097 if (copy_to_user(fildes, fd, sizeof(fd))) { 1098 sys_close(fd[0]); 1099 sys_close(fd[1]); 1100 error = -EFAULT; 1101 } 1102 } 1103 return error; 1104} 1105 1106SYSCALL_DEFINE1(pipe, int __user *, fildes) 1107{ 1108 return sys_pipe2(fildes, 0); 1109} 1110 1111/* 1112 * Allocate a new array of pipe buffers and copy the info over. Returns the 1113 * pipe size if successful, or return -ERROR on error. 1114 */ 1115static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) 1116{ 1117 struct pipe_buffer *bufs; 1118 1119 /* 1120 * Must be a power-of-2 currently 1121 */ 1122 if (!is_power_of_2(arg)) 1123 return -EINVAL; 1124 1125 /* 1126 * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't 1127 * expect a lot of shrink+grow operations, just free and allocate 1128 * again like we would do for growing. If the pipe currently 1129 * contains more buffers than arg, then return busy. 1130 */ 1131 if (arg < pipe->nrbufs) 1132 return -EBUSY; 1133 1134 bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL); 1135 if (unlikely(!bufs)) 1136 return -ENOMEM; 1137 1138 /* 1139 * The pipe array wraps around, so just start the new one at zero 1140 * and adjust the indexes. 1141 */ 1142 if (pipe->nrbufs) { 1143 const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1); 1144 const unsigned int head = pipe->nrbufs - tail; 1145 1146 if (head) 1147 memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer)); 1148 if (tail) 1149 memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer)); 1150 } 1151 1152 pipe->curbuf = 0; 1153 kfree(pipe->bufs); 1154 pipe->bufs = bufs; 1155 pipe->buffers = arg; 1156 return arg; 1157} 1158 1159long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) 1160{ 1161 struct pipe_inode_info *pipe; 1162 long ret; 1163 1164 pipe = file->f_path.dentry->d_inode->i_pipe; 1165 if (!pipe) 1166 return -EBADF; 1167 1168 mutex_lock(&pipe->inode->i_mutex); 1169 1170 switch (cmd) { 1171 case F_SETPIPE_SZ: 1172 if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) 1173 return -EINVAL; 1174 /* 1175 * The pipe needs to be at least 2 pages large to 1176 * guarantee POSIX behaviour. 1177 */ 1178 if (arg < 2) 1179 return -EINVAL; 1180 ret = pipe_set_size(pipe, arg); 1181 break; 1182 case F_GETPIPE_SZ: 1183 ret = pipe->buffers; 1184 break; 1185 default: 1186 ret = -EINVAL; 1187 break; 1188 } 1189 1190 mutex_unlock(&pipe->inode->i_mutex); 1191 return ret; 1192} 1193 1194/* 1195 * pipefs should _never_ be mounted by userland - too much of security hassle, 1196 * no real gain from having the whole whorehouse mounted. So we don't need 1197 * any operations on the root directory. However, we need a non-trivial 1198 * d_name - pipe: will go nicely and kill the special-casing in procfs. 1199 */ 1200static int pipefs_get_sb(struct file_system_type *fs_type, 1201 int flags, const char *dev_name, void *data, 1202 struct vfsmount *mnt) 1203{ 1204 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt); 1205} 1206 1207static struct file_system_type pipe_fs_type = { 1208 .name = "pipefs", 1209 .get_sb = pipefs_get_sb, 1210 .kill_sb = kill_anon_super, 1211}; 1212 1213static int __init init_pipe_fs(void) 1214{ 1215 int err = register_filesystem(&pipe_fs_type); 1216 1217 if (!err) { 1218 pipe_mnt = kern_mount(&pipe_fs_type); 1219 if (IS_ERR(pipe_mnt)) { 1220 err = PTR_ERR(pipe_mnt); 1221 unregister_filesystem(&pipe_fs_type); 1222 } 1223 } 1224 return err; 1225} 1226 1227static void __exit exit_pipe_fs(void) 1228{ 1229 unregister_filesystem(&pipe_fs_type); 1230 mntput(pipe_mnt); 1231} 1232 1233fs_initcall(init_pipe_fs); 1234module_exit(exit_pipe_fs);