Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ceph: remove special ack vs commit behavior

- ask for a commit reply instead of an ack reply in
__ceph_pool_perm_get()
- don't ask for both ack and commit replies in ceph_sync_write()
- since just only one reply is requested now, i_unsafe_writes list
will always be empty -- kill ceph_sync_write_wait() and go back to
a standard ->evict_inode()

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>

+3 -103
+1 -1
fs/ceph/addr.c
··· 1872 1872 goto out_unlock; 1873 1873 } 1874 1874 1875 - wr_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ACK; 1875 + wr_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; 1876 1876 osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL); 1877 1877 ceph_oloc_copy(&wr_req->r_base_oloc, &rd_req->r_base_oloc); 1878 1878 ceph_oid_copy(&wr_req->r_base_oid, &rd_req->r_base_oid);
-2
fs/ceph/caps.c
··· 2091 2091 2092 2092 dout("fsync %p%s\n", inode, datasync ? " datasync" : ""); 2093 2093 2094 - ceph_sync_write_wait(inode); 2095 - 2096 2094 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 2097 2095 if (ret < 0) 2098 2096 goto out;
+1 -87
fs/ceph/file.c
··· 795 795 kfree(aio_work); 796 796 } 797 797 798 - /* 799 - * Write commit request unsafe callback, called to tell us when a 800 - * request is unsafe (that is, in flight--has been handed to the 801 - * messenger to send to its target osd). It is called again when 802 - * we've received a response message indicating the request is 803 - * "safe" (its CEPH_OSD_FLAG_ONDISK flag is set), or when a request 804 - * is completed early (and unsuccessfully) due to a timeout or 805 - * interrupt. 806 - * 807 - * This is used if we requested both an ACK and ONDISK commit reply 808 - * from the OSD. 809 - */ 810 - static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe) 811 - { 812 - struct ceph_inode_info *ci = ceph_inode(req->r_inode); 813 - 814 - dout("%s %p tid %llu %ssafe\n", __func__, req, req->r_tid, 815 - unsafe ? "un" : ""); 816 - if (unsafe) { 817 - ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); 818 - spin_lock(&ci->i_unsafe_lock); 819 - list_add_tail(&req->r_unsafe_item, 820 - &ci->i_unsafe_writes); 821 - spin_unlock(&ci->i_unsafe_lock); 822 - 823 - complete_all(&req->r_completion); 824 - } else { 825 - spin_lock(&ci->i_unsafe_lock); 826 - list_del_init(&req->r_unsafe_item); 827 - spin_unlock(&ci->i_unsafe_lock); 828 - ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR); 829 - } 830 - } 831 - 832 - /* 833 - * Wait on any unsafe replies for the given inode. First wait on the 834 - * newest request, and make that the upper bound. Then, if there are 835 - * more requests, keep waiting on the oldest as long as it is still older 836 - * than the original request. 837 - */ 838 - void ceph_sync_write_wait(struct inode *inode) 839 - { 840 - struct ceph_inode_info *ci = ceph_inode(inode); 841 - struct list_head *head = &ci->i_unsafe_writes; 842 - struct ceph_osd_request *req; 843 - u64 last_tid; 844 - 845 - if (!S_ISREG(inode->i_mode)) 846 - return; 847 - 848 - spin_lock(&ci->i_unsafe_lock); 849 - if (list_empty(head)) 850 - goto out; 851 - 852 - /* set upper bound as _last_ entry in chain */ 853 - 854 - req = list_last_entry(head, struct ceph_osd_request, 855 - r_unsafe_item); 856 - last_tid = req->r_tid; 857 - 858 - do { 859 - ceph_osdc_get_request(req); 860 - spin_unlock(&ci->i_unsafe_lock); 861 - 862 - dout("sync_write_wait on tid %llu (until %llu)\n", 863 - req->r_tid, last_tid); 864 - wait_for_completion(&req->r_done_completion); 865 - ceph_osdc_put_request(req); 866 - 867 - spin_lock(&ci->i_unsafe_lock); 868 - /* 869 - * from here on look at first entry in chain, since we 870 - * only want to wait for anything older than last_tid 871 - */ 872 - if (list_empty(head)) 873 - break; 874 - req = list_first_entry(head, struct ceph_osd_request, 875 - r_unsafe_item); 876 - } while (req->r_tid < last_tid); 877 - out: 878 - spin_unlock(&ci->i_unsafe_lock); 879 - } 880 - 881 798 static ssize_t 882 799 ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, 883 800 struct ceph_snap_context *snapc, ··· 1036 1119 1037 1120 flags = CEPH_OSD_FLAG_ORDERSNAP | 1038 1121 CEPH_OSD_FLAG_ONDISK | 1039 - CEPH_OSD_FLAG_WRITE | 1040 - CEPH_OSD_FLAG_ACK; 1122 + CEPH_OSD_FLAG_WRITE; 1041 1123 1042 1124 while ((len = iov_iter_count(from)) > 0) { 1043 1125 size_t left; ··· 1082 1166 goto out; 1083 1167 } 1084 1168 1085 - /* get a second commit callback */ 1086 - req->r_unsafe_callback = ceph_sync_write_unsafe; 1087 1169 req->r_inode = inode; 1088 1170 1089 1171 osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
-9
fs/ceph/inode.c
··· 499 499 ci->i_rdcache_gen = 0; 500 500 ci->i_rdcache_revoking = 0; 501 501 502 - INIT_LIST_HEAD(&ci->i_unsafe_writes); 503 502 INIT_LIST_HEAD(&ci->i_unsafe_dirops); 504 503 INIT_LIST_HEAD(&ci->i_unsafe_iops); 505 504 spin_lock_init(&ci->i_unsafe_lock); ··· 580 581 * dropping all its aliases. 581 582 */ 582 583 return 1; 583 - } 584 - 585 - void ceph_evict_inode(struct inode *inode) 586 - { 587 - /* wait unsafe sync writes */ 588 - ceph_sync_write_wait(inode); 589 - truncate_inode_pages_final(&inode->i_data); 590 - clear_inode(inode); 591 584 } 592 585 593 586 static inline blkcnt_t calc_inode_blocks(u64 size)
-1
fs/ceph/super.c
··· 757 757 .destroy_inode = ceph_destroy_inode, 758 758 .write_inode = ceph_write_inode, 759 759 .drop_inode = ceph_drop_inode, 760 - .evict_inode = ceph_evict_inode, 761 760 .sync_fs = ceph_sync_fs, 762 761 .put_super = ceph_put_super, 763 762 .show_options = ceph_show_options,
+1 -3
fs/ceph/super.h
··· 343 343 u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ 344 344 u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ 345 345 346 - struct list_head i_unsafe_writes; /* uncommitted sync writes */ 347 346 struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */ 348 347 struct list_head i_unsafe_iops; /* uncommitted mds inode ops */ 349 348 spinlock_t i_unsafe_lock; ··· 752 753 extern struct inode *ceph_alloc_inode(struct super_block *sb); 753 754 extern void ceph_destroy_inode(struct inode *inode); 754 755 extern int ceph_drop_inode(struct inode *inode); 755 - extern void ceph_evict_inode(struct inode *inode); 756 756 757 757 extern struct inode *ceph_get_inode(struct super_block *sb, 758 758 struct ceph_vino vino); ··· 931 933 extern int ceph_release(struct inode *inode, struct file *filp); 932 934 extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, 933 935 char *data, size_t len); 934 - extern void ceph_sync_write_wait(struct inode *inode); 936 + 935 937 /* dir.c */ 936 938 extern const struct file_operations ceph_dir_fops; 937 939 extern const struct file_operations ceph_snapdir_fops;