Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

NFSv4.1/pnfs: Separate out metadata and data consistency for pNFS

The LAYOUTCOMMIT operation means different things to different layout types.
For blocks and objects, it is both a data and metadata consistency operation.
For files and flexfiles, it is only a metadata consistency operation.

This patch separates out the 2 cases, allowing the files/flexfiles layout
drivers to optimise away the data consistency calls to layoutcommit.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>

+47 -8
+1
fs/nfs/blocklayout/blocklayout.c
··· 890 890 .free_deviceid_node = bl_free_deviceid_node, 891 891 .pg_read_ops = &bl_pg_read_ops, 892 892 .pg_write_ops = &bl_pg_write_ops, 893 + .sync = pnfs_generic_sync, 893 894 }; 894 895 895 896 static int __init nfs4blocklayout_init(void)
+1
fs/nfs/filelayout/filelayout.c
··· 1139 1139 .write_pagelist = filelayout_write_pagelist, 1140 1140 .alloc_deviceid_node = filelayout_alloc_deviceid_node, 1141 1141 .free_deviceid_node = filelayout_free_deviceid_node, 1142 + .sync = pnfs_nfs_generic_sync, 1142 1143 }; 1143 1144 1144 1145 static int __init nfs4filelayout_init(void)
+1
fs/nfs/flexfilelayout/flexfilelayout.c
··· 1509 1509 .write_pagelist = ff_layout_write_pagelist, 1510 1510 .alloc_deviceid_node = ff_layout_alloc_deviceid_node, 1511 1511 .encode_layoutreturn = ff_layout_encode_layoutreturn, 1512 + .sync = pnfs_nfs_generic_sync, 1512 1513 }; 1513 1514 1514 1515 static int __init nfs4flexfilelayout_init(void)
+1 -1
fs/nfs/nfs4file.c
··· 112 112 mutex_lock(&inode->i_mutex); 113 113 ret = nfs_file_fsync_commit(file, start, end, datasync); 114 114 if (!ret) 115 - ret = pnfs_layoutcommit_inode(inode, true); 115 + ret = pnfs_sync_inode(inode, !!datasync); 116 116 mutex_unlock(&inode->i_mutex); 117 117 /* 118 118 * If nfs_file_fsync_commit detected a server reboot, then
+2
fs/nfs/objlayout/objio_osd.c
··· 637 637 .pg_read_ops = &objio_pg_read_ops, 638 638 .pg_write_ops = &objio_pg_write_ops, 639 639 640 + .sync = pnfs_generic_sync, 641 + 640 642 .free_deviceid_node = objio_free_deviceid_node, 641 643 642 644 .encode_layoutcommit = objlayout_encode_layoutcommit,
+7
fs/nfs/pnfs.c
··· 2231 2231 } 2232 2232 EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); 2233 2233 2234 + int 2235 + pnfs_generic_sync(struct inode *inode, bool datasync) 2236 + { 2237 + return pnfs_layoutcommit_inode(inode, true); 2238 + } 2239 + EXPORT_SYMBOL_GPL(pnfs_generic_sync); 2240 + 2234 2241 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) 2235 2242 { 2236 2243 struct nfs4_threshold *thp;
+18
fs/nfs/pnfs.h
··· 155 155 int how, 156 156 struct nfs_commit_info *cinfo); 157 157 158 + int (*sync)(struct inode *inode, bool datasync); 159 + 158 160 /* 159 161 * Return PNFS_ATTEMPTED to indicate the layout code has attempted 160 162 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS ··· 269 267 void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t); 270 268 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); 271 269 int pnfs_layoutcommit_inode(struct inode *inode, bool sync); 270 + int pnfs_generic_sync(struct inode *inode, bool datasync); 271 + int pnfs_nfs_generic_sync(struct inode *inode, bool datasync); 272 272 int _pnfs_return_layout(struct inode *); 273 273 int pnfs_commit_and_return_layout(struct inode *); 274 274 void pnfs_ld_write_done(struct nfs_pgio_header *); ··· 492 488 return NFS_SERVER(inode)->pnfs_curr_ld->flags & PNFS_READ_WHOLE_PAGE; 493 489 } 494 490 491 + static inline int 492 + pnfs_sync_inode(struct inode *inode, bool datasync) 493 + { 494 + if (!pnfs_enabled_sb(NFS_SERVER(inode))) 495 + return 0; 496 + return NFS_SERVER(inode)->pnfs_curr_ld->sync(inode, datasync); 497 + } 498 + 495 499 static inline bool 496 500 pnfs_layoutcommit_outstanding(struct inode *inode) 497 501 { ··· 580 568 pnfs_ld_read_whole_page(struct inode *inode) 581 569 { 582 570 return false; 571 + } 572 + 573 + static inline int 574 + pnfs_sync_inode(struct inode *inode, bool datasync) 575 + { 576 + return 0; 583 577 } 584 578 585 579 static inline bool
+10
fs/nfs/pnfs_nfs.c
··· 868 868 nfs_request_add_commit_list(req, list, cinfo); 869 869 } 870 870 EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit); 871 + 872 + int 873 + pnfs_nfs_generic_sync(struct inode *inode, bool datasync) 874 + { 875 + if (datasync) 876 + return 0; 877 + return pnfs_layoutcommit_inode(inode, true); 878 + } 879 + EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync); 880 +
+6 -7
fs/nfs/write.c
··· 1840 1840 */ 1841 1841 int nfs_wb_all(struct inode *inode) 1842 1842 { 1843 - struct writeback_control wbc = { 1844 - .sync_mode = WB_SYNC_ALL, 1845 - .nr_to_write = LONG_MAX, 1846 - .range_start = 0, 1847 - .range_end = LLONG_MAX, 1848 - }; 1849 1843 int ret; 1850 1844 1851 1845 trace_nfs_writeback_inode_enter(inode); 1852 1846 1853 - ret = sync_inode(inode, &wbc); 1847 + ret = filemap_write_and_wait(inode->i_mapping); 1848 + if (!ret) { 1849 + ret = nfs_commit_inode(inode, FLUSH_SYNC); 1850 + if (!ret) 1851 + pnfs_sync_inode(inode, true); 1852 + } 1854 1853 1855 1854 trace_nfs_writeback_inode_exit(inode, ret); 1856 1855 return ret;