Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

9P: introduction of a new cache=mmap model.

- Add cache=mmap option
- Make mmap read-write while keeping it as synchronous as possible
- Build writeback fid on mmap creation if it is writable

Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>

authored by

Dominique Martinet and committed by
Eric Van Hensbergen
fb89b45c f94741fd

+179 -19
+8 -1
fs/9p/v9fs.c
··· 56 56 /* Options that take no arguments */ 57 57 Opt_nodevmap, 58 58 /* Cache options */ 59 - Opt_cache_loose, Opt_fscache, 59 + Opt_cache_loose, Opt_fscache, Opt_mmap, 60 60 /* Access options */ 61 61 Opt_access, Opt_posixacl, 62 62 /* Error token */ ··· 74 74 {Opt_cache, "cache=%s"}, 75 75 {Opt_cache_loose, "loose"}, 76 76 {Opt_fscache, "fscache"}, 77 + {Opt_mmap, "mmap"}, 77 78 {Opt_cachetag, "cachetag=%s"}, 78 79 {Opt_access, "access=%s"}, 79 80 {Opt_posixacl, "posixacl"}, ··· 92 91 } else if (!strcmp(s, "fscache")) { 93 92 version = CACHE_FSCACHE; 94 93 p9_debug(P9_DEBUG_9P, "Cache mode: fscache\n"); 94 + } else if (!strcmp(s, "mmap")) { 95 + version = CACHE_MMAP; 96 + p9_debug(P9_DEBUG_9P, "Cache mode: mmap\n"); 95 97 } else if (!strcmp(s, "none")) { 96 98 version = CACHE_NONE; 97 99 p9_debug(P9_DEBUG_9P, "Cache mode: none\n"); ··· 223 219 break; 224 220 case Opt_fscache: 225 221 v9ses->cache = CACHE_FSCACHE; 222 + break; 223 + case Opt_mmap: 224 + v9ses->cache = CACHE_MMAP; 226 225 break; 227 226 case Opt_cachetag: 228 227 #ifdef CONFIG_9P_FSCACHE
+1
fs/9p/v9fs.h
··· 64 64 65 65 enum p9_cache_modes { 66 66 CACHE_NONE, 67 + CACHE_MMAP, 67 68 CACHE_LOOSE, 68 69 CACHE_FSCACHE, 69 70 };
+2
fs/9p/v9fs_vfs.h
··· 50 50 extern const struct dentry_operations v9fs_cached_dentry_operations; 51 51 extern const struct file_operations v9fs_cached_file_operations; 52 52 extern const struct file_operations v9fs_cached_file_operations_dotl; 53 + extern const struct file_operations v9fs_mmap_file_operations; 54 + extern const struct file_operations v9fs_mmap_file_operations_dotl; 53 55 extern struct kmem_cache *v9fs_inode_cache; 54 56 55 57 struct inode *v9fs_alloc_inode(struct super_block *sb);
+7
fs/9p/vfs_addr.c
··· 202 202 { 203 203 int retval; 204 204 205 + p9_debug(P9_DEBUG_VFS, "page %p\n", page); 206 + 205 207 retval = v9fs_vfs_writepage_locked(page); 206 208 if (retval < 0) { 207 209 if (retval == -EAGAIN) { ··· 284 282 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 285 283 struct inode *inode = mapping->host; 286 284 285 + 286 + p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping); 287 + 287 288 v9inode = V9FS_I(inode); 288 289 start: 289 290 page = grab_cache_page_write_begin(mapping, index, flags); ··· 316 311 { 317 312 loff_t last_pos = pos + copied; 318 313 struct inode *inode = page->mapping->host; 314 + 315 + p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping); 319 316 320 317 if (unlikely(copied < len)) { 321 318 /*
+136 -4
fs/9p/vfs_file.c
··· 45 45 #include "cache.h" 46 46 47 47 static const struct vm_operations_struct v9fs_file_vm_ops; 48 + static const struct vm_operations_struct v9fs_mmap_file_vm_ops; 48 49 49 50 /** 50 51 * v9fs_file_open - open a file (or directory) ··· 88 87 89 88 file->private_data = fid; 90 89 mutex_lock(&v9inode->v_mutex); 91 - if (v9ses->cache && !v9inode->writeback_fid && 90 + if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) && 91 + !v9inode->writeback_fid && 92 92 ((file->f_flags & O_ACCMODE) != O_RDONLY)) { 93 93 /* 94 94 * clone a fid and add it to writeback_fid ··· 107 105 v9inode->writeback_fid = (void *) fid; 108 106 } 109 107 mutex_unlock(&v9inode->v_mutex); 110 - if (v9ses->cache) 108 + if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 111 109 v9fs_cache_inode_set_cookie(inode, file); 112 110 return 0; 113 111 out_error: ··· 581 579 } 582 580 583 581 static int 584 - v9fs_file_mmap(struct file *file, struct vm_area_struct *vma) 582 + v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma) 585 583 { 586 584 int retval; 587 585 588 - retval = generic_file_mmap(file, vma); 586 + 587 + retval = generic_file_mmap(filp, vma); 589 588 if (!retval) 590 589 vma->vm_ops = &v9fs_file_vm_ops; 590 + 591 + return retval; 592 + } 593 + 594 + static int 595 + v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma) 596 + { 597 + int retval; 598 + struct inode *inode; 599 + struct v9fs_inode *v9inode; 600 + struct p9_fid *fid; 601 + 602 + inode = file_inode(filp); 603 + v9inode = V9FS_I(inode); 604 + mutex_lock(&v9inode->v_mutex); 605 + if (!v9inode->writeback_fid && 606 + (vma->vm_flags & VM_WRITE)) { 607 + /* 608 + * clone a fid and add it to writeback_fid 609 + * we do it during mmap instead of 610 + * page dirty time via write_begin/page_mkwrite 611 + * because we want write after unlink usecase 612 + * to work. 613 + */ 614 + fid = v9fs_writeback_fid(filp->f_path.dentry); 615 + if (IS_ERR(fid)) { 616 + retval = PTR_ERR(fid); 617 + mutex_unlock(&v9inode->v_mutex); 618 + return retval; 619 + } 620 + v9inode->writeback_fid = (void *) fid; 621 + } 622 + mutex_unlock(&v9inode->v_mutex); 623 + 624 + retval = generic_file_mmap(filp, vma); 625 + if (!retval) 626 + vma->vm_ops = &v9fs_mmap_file_vm_ops; 591 627 592 628 return retval; 593 629 } ··· 698 658 return do_sync_read(filp, data, count, offset); 699 659 } 700 660 661 + /** 662 + * v9fs_mmap_file_read - read from a file 663 + * @filp: file pointer to read 664 + * @udata: user data buffer to read data into 665 + * @count: size of buffer 666 + * @offset: offset at which to read data 667 + * 668 + */ 669 + static ssize_t 670 + v9fs_mmap_file_read(struct file *filp, char __user *data, size_t count, 671 + loff_t *offset) 672 + { 673 + /* TODO: Check if there are dirty pages */ 674 + return v9fs_file_read(filp, data, count, offset); 675 + } 676 + 701 677 static ssize_t 702 678 v9fs_direct_write(struct file *filp, const char __user * data, 703 679 size_t count, loff_t *offsetp) ··· 784 728 return do_sync_write(filp, data, count, offset); 785 729 } 786 730 731 + 732 + /** 733 + * v9fs_mmap_file_write - write to a file 734 + * @filp: file pointer to write 735 + * @data: data buffer to write data from 736 + * @count: size of buffer 737 + * @offset: offset at which to write data 738 + * 739 + */ 740 + static ssize_t 741 + v9fs_mmap_file_write(struct file *filp, const char __user *data, 742 + size_t count, loff_t *offset) 743 + { 744 + /* 745 + * TODO: invalidate mmaps on filp's inode between 746 + * offset and offset+count 747 + */ 748 + return v9fs_file_write(filp, data, count, offset); 749 + } 750 + 751 + static void v9fs_mmap_vm_close(struct vm_area_struct *vma) 752 + { 753 + struct inode *inode; 754 + 755 + struct writeback_control wbc = { 756 + .nr_to_write = LONG_MAX, 757 + .sync_mode = WB_SYNC_ALL, 758 + .range_start = vma->vm_pgoff * PAGE_SIZE, 759 + /* absolute end, byte at end included */ 760 + .range_end = vma->vm_pgoff * PAGE_SIZE + 761 + (vma->vm_end - vma->vm_start - 1), 762 + }; 763 + 764 + 765 + p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma); 766 + 767 + inode = file_inode(vma->vm_file); 768 + 769 + if (!mapping_cap_writeback_dirty(inode->i_mapping)) 770 + wbc.nr_to_write = 0; 771 + 772 + might_sleep(); 773 + sync_inode(inode, &wbc); 774 + } 775 + 776 + 787 777 static const struct vm_operations_struct v9fs_file_vm_ops = { 778 + .fault = filemap_fault, 779 + .page_mkwrite = v9fs_vm_page_mkwrite, 780 + .remap_pages = generic_file_remap_pages, 781 + }; 782 + 783 + static const struct vm_operations_struct v9fs_mmap_file_vm_ops = { 784 + .close = v9fs_mmap_vm_close, 788 785 .fault = filemap_fault, 789 786 .page_mkwrite = v9fs_vm_page_mkwrite, 790 787 .remap_pages = generic_file_remap_pages, ··· 891 782 .lock = v9fs_file_lock_dotl, 892 783 .flock = v9fs_file_flock_dotl, 893 784 .mmap = generic_file_readonly_mmap, 785 + .fsync = v9fs_file_fsync_dotl, 786 + }; 787 + 788 + const struct file_operations v9fs_mmap_file_operations = { 789 + .llseek = generic_file_llseek, 790 + .read = v9fs_mmap_file_read, 791 + .write = v9fs_mmap_file_write, 792 + .open = v9fs_file_open, 793 + .release = v9fs_dir_release, 794 + .lock = v9fs_file_lock, 795 + .mmap = v9fs_mmap_file_mmap, 796 + .fsync = v9fs_file_fsync, 797 + }; 798 + 799 + const struct file_operations v9fs_mmap_file_operations_dotl = { 800 + .llseek = generic_file_llseek, 801 + .read = v9fs_mmap_file_read, 802 + .write = v9fs_mmap_file_write, 803 + .open = v9fs_file_open, 804 + .release = v9fs_dir_release, 805 + .lock = v9fs_file_lock_dotl, 806 + .flock = v9fs_file_flock_dotl, 807 + .mmap = v9fs_mmap_file_mmap, 894 808 .fsync = v9fs_file_fsync_dotl, 895 809 };
+15 -7
fs/9p/vfs_inode.c
··· 299 299 case S_IFREG: 300 300 if (v9fs_proto_dotl(v9ses)) { 301 301 inode->i_op = &v9fs_file_inode_operations_dotl; 302 - if (v9ses->cache) 302 + if (v9ses->cache == CACHE_LOOSE || 303 + v9ses->cache == CACHE_FSCACHE) 303 304 inode->i_fop = 304 305 &v9fs_cached_file_operations_dotl; 306 + else if (v9ses->cache == CACHE_MMAP) 307 + inode->i_fop = &v9fs_mmap_file_operations_dotl; 305 308 else 306 309 inode->i_fop = &v9fs_file_operations_dotl; 307 310 } else { 308 311 inode->i_op = &v9fs_file_inode_operations; 309 - if (v9ses->cache) 310 - inode->i_fop = &v9fs_cached_file_operations; 312 + if (v9ses->cache == CACHE_LOOSE || 313 + v9ses->cache == CACHE_FSCACHE) 314 + inode->i_fop = 315 + &v9fs_cached_file_operations; 316 + else if (v9ses->cache == CACHE_MMAP) 317 + inode->i_fop = &v9fs_mmap_file_operations; 311 318 else 312 319 inode->i_fop = &v9fs_file_operations; 313 320 } ··· 817 810 * unlink. For cached mode create calls request for new 818 811 * inode. But with cache disabled, lookup should do this. 819 812 */ 820 - if (v9ses->cache) 813 + if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 821 814 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); 822 815 else 823 816 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); ··· 883 876 v9fs_invalidate_inode_attr(dir); 884 877 v9inode = V9FS_I(dentry->d_inode); 885 878 mutex_lock(&v9inode->v_mutex); 886 - if (v9ses->cache && !v9inode->writeback_fid && 879 + if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) && 880 + !v9inode->writeback_fid && 887 881 ((flags & O_ACCMODE) != O_RDONLY)) { 888 882 /* 889 883 * clone a fid and add it to writeback_fid ··· 907 899 goto error; 908 900 909 901 file->private_data = fid; 910 - if (v9ses->cache) 902 + if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 911 903 v9fs_cache_inode_set_cookie(dentry->d_inode, file); 912 904 913 905 *opened |= FILE_CREATED; ··· 1485 1477 */ 1486 1478 i_size = inode->i_size; 1487 1479 v9fs_stat2inode(st, inode, inode->i_sb); 1488 - if (v9ses->cache) 1480 + if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 1489 1481 inode->i_size = i_size; 1490 1482 spin_unlock(&inode->i_lock); 1491 1483 out:
+5 -4
fs/9p/vfs_inode_dotl.c
··· 330 330 331 331 v9inode = V9FS_I(inode); 332 332 mutex_lock(&v9inode->v_mutex); 333 - if (v9ses->cache && !v9inode->writeback_fid && 333 + if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) && 334 + !v9inode->writeback_fid && 334 335 ((flags & O_ACCMODE) != O_RDONLY)) { 335 336 /* 336 337 * clone a fid and add it to writeback_fid ··· 354 353 if (err) 355 354 goto err_clunk_old_fid; 356 355 file->private_data = ofid; 357 - if (v9ses->cache) 356 + if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 358 357 v9fs_cache_inode_set_cookie(inode, file); 359 358 *opened |= FILE_CREATED; 360 359 out: ··· 711 710 } 712 711 713 712 v9fs_invalidate_inode_attr(dir); 714 - if (v9ses->cache) { 713 + if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 715 714 /* Now walk from the parent so we can get an unopened fid. */ 716 715 fid = p9_client_walk(dfid, 1, &name, 1); 717 716 if (IS_ERR(fid)) { ··· 966 965 */ 967 966 i_size = inode->i_size; 968 967 v9fs_stat2inode_dotl(st, inode); 969 - if (v9ses->cache) 968 + if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 970 969 inode->i_size = i_size; 971 970 spin_unlock(&inode->i_lock); 972 971 out:
+5 -3
fs/9p/vfs_super.c
··· 144 144 } 145 145 v9fs_fill_super(sb, v9ses, flags, data); 146 146 147 - if (v9ses->cache) 147 + if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 148 148 sb->s_d_op = &v9fs_cached_dentry_operations; 149 149 else 150 150 sb->s_d_op = &v9fs_dentry_operations; ··· 282 282 { 283 283 struct v9fs_session_info *v9ses; 284 284 v9ses = v9fs_inode2v9ses(inode); 285 - if (v9ses->cache) 285 + if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 286 286 return generic_drop_inode(inode); 287 287 /* 288 288 * in case of non cached mode always drop the ··· 325 325 * send an fsync request to server irrespective of 326 326 * wbc->sync_mode. 327 327 */ 328 - p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode); 329 328 v9inode = V9FS_I(inode); 329 + p9_debug(P9_DEBUG_VFS, "%s: inode %p, writeback_fid %p\n", 330 + __func__, inode, v9inode->writeback_fid); 330 331 if (!v9inode->writeback_fid) 331 332 return 0; 333 + 332 334 ret = p9_client_fsync(v9inode->writeback_fid, 0); 333 335 if (ret < 0) { 334 336 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);