[PATCH] splice: add support for SPLICE_F_MOVE flag

This enables the caller to migrate pages from one address space page
cache to another. In buzz word marketing, you can do zero-copy file
copies!

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Jens Axboe and committed by
Linus Torvalds
5abc97aa 5274f052

+98 -35
+8
fs/pipe.c
··· 121 121 kunmap(buf->page); 122 122 } 123 123 124 + static int anon_pipe_buf_steal(struct pipe_inode_info *info, 125 + struct pipe_buffer *buf) 126 + { 127 + buf->stolen = 1; 128 + return 0; 129 + } 130 + 124 131 static struct pipe_buf_operations anon_pipe_buf_ops = { 125 132 .can_merge = 1, 126 133 .map = anon_pipe_buf_map, 127 134 .unmap = anon_pipe_buf_unmap, 128 135 .release = anon_pipe_buf_release, 136 + .steal = anon_pipe_buf_steal, 129 137 }; 130 138 131 139 static ssize_t
+82 -35
fs/splice.c
··· 21 21 #include <linux/pagemap.h> 22 22 #include <linux/pipe_fs_i.h> 23 23 #include <linux/mm_inline.h> 24 + #include <linux/swap.h> 24 25 25 26 /* 26 27 * Passed to the actors ··· 33 32 loff_t pos; /* file position */ 34 33 }; 35 34 35 + static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, 36 + struct pipe_buffer *buf) 37 + { 38 + struct page *page = buf->page; 39 + 40 + WARN_ON(!PageLocked(page)); 41 + WARN_ON(!PageUptodate(page)); 42 + 43 + if (!remove_mapping(page_mapping(page), page)) 44 + return 1; 45 + 46 + if (PageLRU(page)) { 47 + struct zone *zone = page_zone(page); 48 + 49 + spin_lock_irq(&zone->lru_lock); 50 + BUG_ON(!PageLRU(page)); 51 + __ClearPageLRU(page); 52 + del_page_from_lru(zone, page); 53 + spin_unlock_irq(&zone->lru_lock); 54 + } 55 + 56 + buf->stolen = 1; 57 + return 0; 58 + } 59 + 36 60 static void page_cache_pipe_buf_release(struct pipe_inode_info *info, 37 61 struct pipe_buffer *buf) 38 62 { 39 63 page_cache_release(buf->page); 40 64 buf->page = NULL; 65 + buf->stolen = 0; 41 66 } 42 67 43 68 static void *page_cache_pipe_buf_map(struct file *file, ··· 90 63 static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 91 64 struct pipe_buffer *buf) 92 65 { 93 - unlock_page(buf->page); 66 + if (!buf->stolen) 67 + unlock_page(buf->page); 94 68 kunmap(buf->page); 95 69 } 96 70 ··· 100 72 .map = page_cache_pipe_buf_map, 101 73 .unmap = page_cache_pipe_buf_unmap, 102 74 .release = page_cache_pipe_buf_release, 75 + .steal = page_cache_pipe_buf_steal, 103 76 }; 104 77 105 78 static ssize_t move_to_pipe(struct inode *inode, struct page **pages, ··· 365 336 struct address_space *mapping = file->f_mapping; 366 337 unsigned int offset; 367 338 struct page *page; 368 - char *src, *dst; 369 339 pgoff_t index; 340 + char *src; 370 341 int ret; 371 342 372 343 /* ··· 379 350 index = sd->pos >> PAGE_CACHE_SHIFT; 380 351 offset = sd->pos & ~PAGE_CACHE_MASK; 381 352 382 - find_page: 383 - ret = -ENOMEM; 384 - page = find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); 385 - if (!page) 386 - goto out; 387 - 388 353 /* 389 - * If the page is uptodate, it is also locked. If it isn't 390 - * uptodate, we can mark it uptodate if we are filling the 391 - * full page. Otherwise we need to read it in first... 354 + * reuse buf page, if SPLICE_F_MOVE is set 392 355 */ 393 - if (!PageUptodate(page)) { 394 - if (sd->len < PAGE_CACHE_SIZE) { 395 - ret = mapping->a_ops->readpage(file, page); 396 - if (unlikely(ret)) 397 - goto out; 356 + if (sd->flags & SPLICE_F_MOVE) { 357 + if (buf->ops->steal(info, buf)) 358 + goto find_page; 398 359 399 - lock_page(page); 360 + page = buf->page; 361 + if (add_to_page_cache_lru(page, mapping, index, 362 + mapping_gfp_mask(mapping))) 363 + goto find_page; 364 + } else { 365 + find_page: 366 + ret = -ENOMEM; 367 + page = find_or_create_page(mapping, index, 368 + mapping_gfp_mask(mapping)); 369 + if (!page) 370 + goto out; 400 371 401 - if (!PageUptodate(page)) { 402 - /* 403 - * page got invalidated, repeat 404 - */ 405 - if (!page->mapping) { 406 - unlock_page(page); 407 - page_cache_release(page); 408 - goto find_page; 372 + /* 373 + * If the page is uptodate, it is also locked. If it isn't 374 + * uptodate, we can mark it uptodate if we are filling the 375 + * full page. Otherwise we need to read it in first... 376 + */ 377 + if (!PageUptodate(page)) { 378 + if (sd->len < PAGE_CACHE_SIZE) { 379 + ret = mapping->a_ops->readpage(file, page); 380 + if (unlikely(ret)) 381 + goto out; 382 + 383 + lock_page(page); 384 + 385 + if (!PageUptodate(page)) { 386 + /* 387 + * page got invalidated, repeat 388 + */ 389 + if (!page->mapping) { 390 + unlock_page(page); 391 + page_cache_release(page); 392 + goto find_page; 393 + } 394 + ret = -EIO; 395 + goto out; 409 396 } 410 - ret = -EIO; 411 - goto out; 397 + } else { 398 + WARN_ON(!PageLocked(page)); 399 + SetPageUptodate(page); 412 400 } 413 - } else { 414 - WARN_ON(!PageLocked(page)); 415 - SetPageUptodate(page); 416 401 } 417 402 } 418 403 ··· 434 391 if (ret) 435 392 goto out; 436 393 437 - dst = kmap_atomic(page, KM_USER0); 438 - memcpy(dst + offset, src + buf->offset, sd->len); 439 - flush_dcache_page(page); 440 - kunmap_atomic(dst, KM_USER0); 394 + if (!buf->stolen) { 395 + char *dst = kmap_atomic(page, KM_USER0); 396 + 397 + memcpy(dst + offset, src + buf->offset, sd->len); 398 + flush_dcache_page(page); 399 + kunmap_atomic(dst, KM_USER0); 400 + } 441 401 442 402 ret = mapping->a_ops->commit_write(file, page, 0, sd->len); 443 403 if (ret < 0) ··· 451 405 out: 452 406 if (ret < 0) 453 407 unlock_page(page); 454 - page_cache_release(page); 408 + if (!buf->stolen) 409 + page_cache_release(page); 455 410 buf->ops->unmap(info, buf); 456 411 return ret; 457 412 }
+8
include/linux/pipe_fs_i.h
··· 9 9 struct page *page; 10 10 unsigned int offset, len; 11 11 struct pipe_buf_operations *ops; 12 + unsigned int stolen; 12 13 }; 13 14 14 15 struct pipe_buf_operations { ··· 17 16 void * (*map)(struct file *, struct pipe_inode_info *, struct pipe_buffer *); 18 17 void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *); 19 18 void (*release)(struct pipe_inode_info *, struct pipe_buffer *); 19 + int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); 20 20 }; 21 21 22 22 struct pipe_inode_info { ··· 54 52 55 53 struct inode* pipe_new(struct inode* inode); 56 54 void free_pipe_info(struct inode* inode); 55 + 56 + /* 57 + * splice is tied to pipes as a transport (at least for now), so we'll just 58 + * add the splice flags here. 59 + */ 60 + #define SPLICE_F_MOVE (0x01) /* move pages instead of copying */ 57 61 58 62 #endif