[PATCH] splice: add support for SPLICE_F_MOVE flag

This enables the caller to migrate pages from one address space page
cache to another. In buzz word marketing, you can do zero-copy file
copies!

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Jens Axboe and committed by
Linus Torvalds
5abc97aa 5274f052

+98 -35
+8
fs/pipe.c
··· 121 kunmap(buf->page); 122 } 123 124 static struct pipe_buf_operations anon_pipe_buf_ops = { 125 .can_merge = 1, 126 .map = anon_pipe_buf_map, 127 .unmap = anon_pipe_buf_unmap, 128 .release = anon_pipe_buf_release, 129 }; 130 131 static ssize_t
··· 121 kunmap(buf->page); 122 } 123 124 + static int anon_pipe_buf_steal(struct pipe_inode_info *info, 125 + struct pipe_buffer *buf) 126 + { 127 + buf->stolen = 1; 128 + return 0; 129 + } 130 + 131 static struct pipe_buf_operations anon_pipe_buf_ops = { 132 .can_merge = 1, 133 .map = anon_pipe_buf_map, 134 .unmap = anon_pipe_buf_unmap, 135 .release = anon_pipe_buf_release, 136 + .steal = anon_pipe_buf_steal, 137 }; 138 139 static ssize_t
+82 -35
fs/splice.c
··· 21 #include <linux/pagemap.h> 22 #include <linux/pipe_fs_i.h> 23 #include <linux/mm_inline.h> 24 25 /* 26 * Passed to the actors ··· 33 loff_t pos; /* file position */ 34 }; 35 36 static void page_cache_pipe_buf_release(struct pipe_inode_info *info, 37 struct pipe_buffer *buf) 38 { 39 page_cache_release(buf->page); 40 buf->page = NULL; 41 } 42 43 static void *page_cache_pipe_buf_map(struct file *file, ··· 90 static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 91 struct pipe_buffer *buf) 92 { 93 - unlock_page(buf->page); 94 kunmap(buf->page); 95 } 96 ··· 100 .map = page_cache_pipe_buf_map, 101 .unmap = page_cache_pipe_buf_unmap, 102 .release = page_cache_pipe_buf_release, 103 }; 104 105 static ssize_t move_to_pipe(struct inode *inode, struct page **pages, ··· 365 struct address_space *mapping = file->f_mapping; 366 unsigned int offset; 367 struct page *page; 368 - char *src, *dst; 369 pgoff_t index; 370 int ret; 371 372 /* ··· 379 index = sd->pos >> PAGE_CACHE_SHIFT; 380 offset = sd->pos & ~PAGE_CACHE_MASK; 381 382 - find_page: 383 - ret = -ENOMEM; 384 - page = find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); 385 - if (!page) 386 - goto out; 387 - 388 /* 389 - * If the page is uptodate, it is also locked. If it isn't 390 - * uptodate, we can mark it uptodate if we are filling the 391 - * full page. Otherwise we need to read it in first... 392 */ 393 - if (!PageUptodate(page)) { 394 - if (sd->len < PAGE_CACHE_SIZE) { 395 - ret = mapping->a_ops->readpage(file, page); 396 - if (unlikely(ret)) 397 - goto out; 398 399 - lock_page(page); 400 401 - if (!PageUptodate(page)) { 402 - /* 403 - * page got invalidated, repeat 404 - */ 405 - if (!page->mapping) { 406 - unlock_page(page); 407 - page_cache_release(page); 408 - goto find_page; 409 } 410 - ret = -EIO; 411 - goto out; 412 } 413 - } else { 414 - WARN_ON(!PageLocked(page)); 415 - SetPageUptodate(page); 416 } 417 } 418 ··· 434 if (ret) 435 goto out; 436 437 - dst = kmap_atomic(page, KM_USER0); 438 - memcpy(dst + offset, src + buf->offset, sd->len); 439 - flush_dcache_page(page); 440 - kunmap_atomic(dst, KM_USER0); 441 442 ret = mapping->a_ops->commit_write(file, page, 0, sd->len); 443 if (ret < 0) ··· 451 out: 452 if (ret < 0) 453 unlock_page(page); 454 - page_cache_release(page); 455 buf->ops->unmap(info, buf); 456 return ret; 457 }
··· 21 #include <linux/pagemap.h> 22 #include <linux/pipe_fs_i.h> 23 #include <linux/mm_inline.h> 24 + #include <linux/swap.h> 25 26 /* 27 * Passed to the actors ··· 32 loff_t pos; /* file position */ 33 }; 34 35 + static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, 36 + struct pipe_buffer *buf) 37 + { 38 + struct page *page = buf->page; 39 + 40 + WARN_ON(!PageLocked(page)); 41 + WARN_ON(!PageUptodate(page)); 42 + 43 + if (!remove_mapping(page_mapping(page), page)) 44 + return 1; 45 + 46 + if (PageLRU(page)) { 47 + struct zone *zone = page_zone(page); 48 + 49 + spin_lock_irq(&zone->lru_lock); 50 + BUG_ON(!PageLRU(page)); 51 + __ClearPageLRU(page); 52 + del_page_from_lru(zone, page); 53 + spin_unlock_irq(&zone->lru_lock); 54 + } 55 + 56 + buf->stolen = 1; 57 + return 0; 58 + } 59 + 60 static void page_cache_pipe_buf_release(struct pipe_inode_info *info, 61 struct pipe_buffer *buf) 62 { 63 page_cache_release(buf->page); 64 buf->page = NULL; 65 + buf->stolen = 0; 66 } 67 68 static void *page_cache_pipe_buf_map(struct file *file, ··· 63 static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 64 struct pipe_buffer *buf) 65 { 66 + if (!buf->stolen) 67 + unlock_page(buf->page); 68 kunmap(buf->page); 69 } 70 ··· 72 .map = page_cache_pipe_buf_map, 73 .unmap = page_cache_pipe_buf_unmap, 74 .release = page_cache_pipe_buf_release, 75 + .steal = page_cache_pipe_buf_steal, 76 }; 77 78 static ssize_t move_to_pipe(struct inode *inode, struct page **pages, ··· 336 struct address_space *mapping = file->f_mapping; 337 unsigned int offset; 338 struct page *page; 339 pgoff_t index; 340 + char *src; 341 int ret; 342 343 /* ··· 350 index = sd->pos >> PAGE_CACHE_SHIFT; 351 offset = sd->pos & ~PAGE_CACHE_MASK; 352 353 /* 354 + * reuse buf page, if SPLICE_F_MOVE is set 355 */ 356 + if (sd->flags & SPLICE_F_MOVE) { 357 + if (buf->ops->steal(info, buf)) 358 + goto find_page; 359 360 + page = buf->page; 361 + if (add_to_page_cache_lru(page, mapping, index, 362 + mapping_gfp_mask(mapping))) 363 + goto find_page; 364 + } else { 365 + find_page: 366 + ret = -ENOMEM; 367 + page = find_or_create_page(mapping, index, 368 + mapping_gfp_mask(mapping)); 369 + if (!page) 370 + goto out; 371 372 + /* 373 + * If the page is uptodate, it is also locked. If it isn't 374 + * uptodate, we can mark it uptodate if we are filling the 375 + * full page. Otherwise we need to read it in first... 376 + */ 377 + if (!PageUptodate(page)) { 378 + if (sd->len < PAGE_CACHE_SIZE) { 379 + ret = mapping->a_ops->readpage(file, page); 380 + if (unlikely(ret)) 381 + goto out; 382 + 383 + lock_page(page); 384 + 385 + if (!PageUptodate(page)) { 386 + /* 387 + * page got invalidated, repeat 388 + */ 389 + if (!page->mapping) { 390 + unlock_page(page); 391 + page_cache_release(page); 392 + goto find_page; 393 + } 394 + ret = -EIO; 395 + goto out; 396 } 397 + } else { 398 + WARN_ON(!PageLocked(page)); 399 + SetPageUptodate(page); 400 } 401 } 402 } 403 ··· 391 if (ret) 392 goto out; 393 394 + if (!buf->stolen) { 395 + char *dst = kmap_atomic(page, KM_USER0); 396 + 397 + memcpy(dst + offset, src + buf->offset, sd->len); 398 + flush_dcache_page(page); 399 + kunmap_atomic(dst, KM_USER0); 400 + } 401 402 ret = mapping->a_ops->commit_write(file, page, 0, sd->len); 403 if (ret < 0) ··· 405 out: 406 if (ret < 0) 407 unlock_page(page); 408 + if (!buf->stolen) 409 + page_cache_release(page); 410 buf->ops->unmap(info, buf); 411 return ret; 412 }
+8
include/linux/pipe_fs_i.h
··· 9 struct page *page; 10 unsigned int offset, len; 11 struct pipe_buf_operations *ops; 12 }; 13 14 struct pipe_buf_operations { ··· 17 void * (*map)(struct file *, struct pipe_inode_info *, struct pipe_buffer *); 18 void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *); 19 void (*release)(struct pipe_inode_info *, struct pipe_buffer *); 20 }; 21 22 struct pipe_inode_info { ··· 54 55 struct inode* pipe_new(struct inode* inode); 56 void free_pipe_info(struct inode* inode); 57 58 #endif
··· 9 struct page *page; 10 unsigned int offset, len; 11 struct pipe_buf_operations *ops; 12 + unsigned int stolen; 13 }; 14 15 struct pipe_buf_operations { ··· 16 void * (*map)(struct file *, struct pipe_inode_info *, struct pipe_buffer *); 17 void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *); 18 void (*release)(struct pipe_inode_info *, struct pipe_buffer *); 19 + int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); 20 }; 21 22 struct pipe_inode_info { ··· 52 53 struct inode* pipe_new(struct inode* inode); 54 void free_pipe_info(struct inode* inode); 55 + 56 + /* 57 + * splice is tied to pipes as a transport (at least for now), so we'll just 58 + * add the splice flags here. 59 + */ 60 + #define SPLICE_F_MOVE (0x01) /* move pages instead of copying */ 61 62 #endif