Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: unwind canceled flock state
ceph: fix ENOENT logic in striped_read
ceph: fix short sync reads from the OSD
ceph: fix sync vs canceled write
ceph: use ihold when we already have an inode ref

+81 -59
+1 -1
fs/ceph/addr.c
··· 453 453 int err; 454 454 struct inode *inode = page->mapping->host; 455 455 BUG_ON(!inode); 456 - igrab(inode); 456 + ihold(inode); 457 457 err = writepage_nounlock(page, wbc); 458 458 unlock_page(page); 459 459 iput(inode);
+4 -6
fs/ceph/caps.c
··· 2940 2940 while (!list_empty(&mdsc->cap_dirty)) { 2941 2941 ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info, 2942 2942 i_dirty_item); 2943 - inode = igrab(&ci->vfs_inode); 2943 + inode = &ci->vfs_inode; 2944 + ihold(inode); 2944 2945 dout("flush_dirty_caps %p\n", inode); 2945 2946 spin_unlock(&mdsc->cap_dirty_lock); 2946 - if (inode) { 2947 - ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, 2948 - NULL); 2949 - iput(inode); 2950 - } 2947 + ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, NULL); 2948 + iput(inode); 2951 2949 spin_lock(&mdsc->cap_dirty_lock); 2952 2950 } 2953 2951 spin_unlock(&mdsc->cap_dirty_lock);
+7 -4
fs/ceph/dir.c
··· 308 308 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 309 309 if (IS_ERR(req)) 310 310 return PTR_ERR(req); 311 - req->r_inode = igrab(inode); 311 + req->r_inode = inode; 312 + ihold(inode); 312 313 req->r_dentry = dget(filp->f_dentry); 313 314 /* hints to request -> mds selection code */ 314 315 req->r_direct_mode = USE_AUTH_MDS; ··· 788 787 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 789 788 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 790 789 err = ceph_mdsc_do_request(mdsc, dir, req); 791 - if (err) 790 + if (err) { 792 791 d_drop(dentry); 793 - else if (!req->r_reply_info.head->is_dentry) 794 - d_instantiate(dentry, igrab(old_dentry->d_inode)); 792 + } else if (!req->r_reply_info.head->is_dentry) { 793 + ihold(old_dentry->d_inode); 794 + d_instantiate(dentry, old_dentry->d_inode); 795 + } 795 796 ceph_mdsc_put_request(req); 796 797 return err; 797 798 }
+2 -2
fs/ceph/export.c
··· 109 109 err = ceph_mdsc_do_request(mdsc, NULL, req); 110 110 inode = req->r_target_inode; 111 111 if (inode) 112 - igrab(inode); 112 + ihold(inode); 113 113 ceph_mdsc_put_request(req); 114 114 if (!inode) 115 115 return ERR_PTR(-ESTALE); ··· 167 167 err = ceph_mdsc_do_request(mdsc, NULL, req); 168 168 inode = req->r_target_inode; 169 169 if (inode) 170 - igrab(inode); 170 + ihold(inode); 171 171 ceph_mdsc_put_request(req); 172 172 if (!inode) 173 173 return ERR_PTR(err ? err : -ESTALE);
+20 -17
fs/ceph/file.c
··· 191 191 err = PTR_ERR(req); 192 192 goto out; 193 193 } 194 - req->r_inode = igrab(inode); 194 + req->r_inode = inode; 195 + ihold(inode); 195 196 req->r_num_caps = 1; 196 197 err = ceph_mdsc_do_request(mdsc, parent_inode, req); 197 198 if (!err) ··· 283 282 static int striped_read(struct inode *inode, 284 283 u64 off, u64 len, 285 284 struct page **pages, int num_pages, 286 - int *checkeof, bool align_to_pages, 285 + int *checkeof, bool o_direct, 287 286 unsigned long buf_align) 288 287 { 289 288 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); ··· 308 307 io_align = off & ~PAGE_MASK; 309 308 310 309 more: 311 - if (align_to_pages) 310 + if (o_direct) 312 311 page_align = (pos - io_align + buf_align) & ~PAGE_MASK; 313 312 else 314 313 page_align = pos & ~PAGE_MASK; ··· 318 317 ci->i_truncate_seq, 319 318 ci->i_truncate_size, 320 319 page_pos, pages_left, page_align); 321 - hit_stripe = this_len < left; 322 - was_short = ret >= 0 && ret < this_len; 323 320 if (ret == -ENOENT) 324 321 ret = 0; 322 + hit_stripe = this_len < left; 323 + was_short = ret >= 0 && ret < this_len; 325 324 dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read, 326 325 ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); 327 326 ··· 346 345 } 347 346 348 347 if (was_short) { 349 - /* was original extent fully inside i_size? */ 350 - if (pos + left <= inode->i_size) { 351 - dout("zero tail\n"); 352 - ceph_zero_page_vector_range(page_off + read, len - read, 353 - pages); 354 - read = len; 355 - goto out; 356 - } 348 + /* did we bounce off eof? */ 349 + if (pos + left > inode->i_size) 350 + *checkeof = 1; 357 351 358 - /* check i_size */ 359 - *checkeof = 1; 352 + /* zero trailing bytes (inside i_size) */ 353 + if (left > 0 && pos < inode->i_size) { 354 + if (pos + left > inode->i_size) 355 + left = inode->i_size - pos; 356 + 357 + dout("zero tail %d\n", left); 358 + ceph_zero_page_vector_range(page_off + read, left, 359 + pages); 360 + read += left; 361 + } 360 362 } 361 363 362 - out: 363 364 if (ret >= 0) 364 365 ret = read; 365 366 dout("striped_read returns %d\n", ret); ··· 661 658 662 659 /* hit EOF or hole? */ 663 660 if (statret == 0 && *ppos < inode->i_size) { 664 - dout("aio_read sync_read hit hole, reading more\n"); 661 + dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size); 665 662 read += ret; 666 663 base += ret; 667 664 len -= ret;
+10 -8
fs/ceph/inode.c
··· 1101 1101 goto done; 1102 1102 } 1103 1103 req->r_dentry = dn; /* may have spliced */ 1104 - igrab(in); 1104 + ihold(in); 1105 1105 } else if (ceph_ino(in) == vino.ino && 1106 1106 ceph_snap(in) == vino.snap) { 1107 - igrab(in); 1107 + ihold(in); 1108 1108 } else { 1109 1109 dout(" %p links to %p %llx.%llx, not %llx.%llx\n", 1110 1110 dn, in, ceph_ino(in), ceph_snap(in), ··· 1144 1144 goto done; 1145 1145 } 1146 1146 req->r_dentry = dn; /* may have spliced */ 1147 - igrab(in); 1147 + ihold(in); 1148 1148 rinfo->head->is_dentry = 1; /* fool notrace handlers */ 1149 1149 } 1150 1150 ··· 1328 1328 if (queue_work(ceph_inode_to_client(inode)->wb_wq, 1329 1329 &ceph_inode(inode)->i_wb_work)) { 1330 1330 dout("ceph_queue_writeback %p\n", inode); 1331 - igrab(inode); 1331 + ihold(inode); 1332 1332 } else { 1333 1333 dout("ceph_queue_writeback %p failed\n", inode); 1334 1334 } ··· 1353 1353 if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, 1354 1354 &ceph_inode(inode)->i_pg_inv_work)) { 1355 1355 dout("ceph_queue_invalidate %p\n", inode); 1356 - igrab(inode); 1356 + ihold(inode); 1357 1357 } else { 1358 1358 dout("ceph_queue_invalidate %p failed\n", inode); 1359 1359 } ··· 1477 1477 if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, 1478 1478 &ci->i_vmtruncate_work)) { 1479 1479 dout("ceph_queue_vmtruncate %p\n", inode); 1480 - igrab(inode); 1480 + ihold(inode); 1481 1481 } else { 1482 1482 dout("ceph_queue_vmtruncate %p failed, pending=%d\n", 1483 1483 inode, ci->i_truncate_pending); ··· 1738 1738 __mark_inode_dirty(inode, inode_dirty_flags); 1739 1739 1740 1740 if (mask) { 1741 - req->r_inode = igrab(inode); 1741 + req->r_inode = inode; 1742 + ihold(inode); 1742 1743 req->r_inode_drop = release; 1743 1744 req->r_args.setattr.mask = cpu_to_le32(mask); 1744 1745 req->r_num_caps = 1; ··· 1780 1779 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 1781 1780 if (IS_ERR(req)) 1782 1781 return PTR_ERR(req); 1783 - req->r_inode = igrab(inode); 1782 + req->r_inode = inode; 1783 + ihold(inode); 1784 1784 req->r_num_caps = 1; 1785 1785 req->r_args.getattr.mask = cpu_to_le32(mask); 1786 1786 err = ceph_mdsc_do_request(mdsc, NULL, req);
+4 -2
fs/ceph/ioctl.c
··· 73 73 USE_AUTH_MDS); 74 74 if (IS_ERR(req)) 75 75 return PTR_ERR(req); 76 - req->r_inode = igrab(inode); 76 + req->r_inode = inode; 77 + ihold(inode); 77 78 req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL; 78 79 79 80 req->r_args.setlayout.layout.fl_stripe_unit = ··· 136 135 137 136 if (IS_ERR(req)) 138 137 return PTR_ERR(req); 139 - req->r_inode = igrab(inode); 138 + req->r_inode = inode; 139 + ihold(inode); 140 140 141 141 req->r_args.setlayout.layout.fl_stripe_unit = 142 142 cpu_to_le32(l.stripe_unit);
+18 -11
fs/ceph/locks.c
··· 23 23 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); 24 24 if (IS_ERR(req)) 25 25 return PTR_ERR(req); 26 - req->r_inode = igrab(inode); 26 + req->r_inode = inode; 27 + ihold(inode); 27 28 28 29 /* mds requires start and length rather than start and end */ 29 30 if (LLONG_MAX == fl->fl_end) ··· 33 32 length = fl->fl_end - fl->fl_start + 1; 34 33 35 34 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 36 - "length: %llu, wait: %d, type`: %d", (int)lock_type, 35 + "length: %llu, wait: %d, type: %d", (int)lock_type, 37 36 (int)operation, (u64)fl->fl_pid, fl->fl_start, 38 37 length, wait, fl->fl_type); 39 - 40 38 41 39 req->r_args.filelock_change.rule = lock_type; 42 40 req->r_args.filelock_change.type = cmd; ··· 70 70 } 71 71 ceph_mdsc_put_request(req); 72 72 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 73 - "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type, 73 + "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type, 74 74 (int)operation, (u64)fl->fl_pid, fl->fl_start, 75 75 length, wait, fl->fl_type, err); 76 76 return err; ··· 109 109 dout("mds locked, locking locally"); 110 110 err = posix_lock_file(file, fl, NULL); 111 111 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { 112 - /* undo! This should only happen if the kernel detects 113 - * local deadlock. */ 112 + /* undo! This should only happen if 113 + * the kernel detects local 114 + * deadlock. */ 114 115 ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 115 116 CEPH_LOCK_UNLOCK, 0, fl); 116 - dout("got %d on posix_lock_file, undid lock", err); 117 + dout("got %d on posix_lock_file, undid lock", 118 + err); 117 119 } 118 120 } 119 121 120 - } else { 121 - dout("mds returned error code %d", err); 122 + } else if (err == -ERESTARTSYS) { 123 + dout("undoing lock\n"); 124 + ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 125 + CEPH_LOCK_UNLOCK, 0, fl); 122 126 } 123 127 return err; 124 128 } ··· 159 155 file, CEPH_LOCK_UNLOCK, 0, fl); 160 156 dout("got %d on flock_lock_file_wait, undid lock", err); 161 157 } 162 - } else { 163 - dout("mds error code %d", err); 158 + } else if (err == -ERESTARTSYS) { 159 + dout("undoing lock\n"); 160 + ceph_lock_message(CEPH_LOCK_FLOCK, 161 + CEPH_MDS_OP_SETFILELOCK, 162 + file, CEPH_LOCK_UNLOCK, 0, fl); 164 163 } 165 164 return err; 166 165 }
+1 -1
fs/ceph/snap.c
··· 722 722 ci = list_first_entry(&mdsc->snap_flush_list, 723 723 struct ceph_inode_info, i_snap_flush_item); 724 724 inode = &ci->vfs_inode; 725 - igrab(inode); 725 + ihold(inode); 726 726 spin_unlock(&mdsc->snap_flush_lock); 727 727 spin_lock(&inode->i_lock); 728 728 __ceph_flush_snaps(ci, &session, 0);
+4 -2
fs/ceph/xattr.c
··· 665 665 err = PTR_ERR(req); 666 666 goto out; 667 667 } 668 - req->r_inode = igrab(inode); 668 + req->r_inode = inode; 669 + ihold(inode); 669 670 req->r_inode_drop = CEPH_CAP_XATTR_SHARED; 670 671 req->r_num_caps = 1; 671 672 req->r_args.setxattr.flags = cpu_to_le32(flags); ··· 796 795 USE_AUTH_MDS); 797 796 if (IS_ERR(req)) 798 797 return PTR_ERR(req); 799 - req->r_inode = igrab(inode); 798 + req->r_inode = inode; 799 + ihold(inode); 800 800 req->r_inode_drop = CEPH_CAP_XATTR_SHARED; 801 801 req->r_num_caps = 1; 802 802 req->r_path2 = kstrdup(name, GFP_NOFS);
+10 -5
net/ceph/osd_client.c
··· 1144 1144 round_jiffies_relative(delay)); 1145 1145 } 1146 1146 1147 + static void complete_request(struct ceph_osd_request *req) 1148 + { 1149 + if (req->r_safe_callback) 1150 + req->r_safe_callback(req, NULL); 1151 + complete_all(&req->r_safe_completion); /* fsync waiter */ 1152 + } 1153 + 1147 1154 /* 1148 1155 * handle osd op reply. either call the callback if it is specified, 1149 1156 * or do the completion to wake up the waiting thread. ··· 1233 1226 else 1234 1227 complete_all(&req->r_completion); 1235 1228 1236 - if (flags & CEPH_OSD_FLAG_ONDISK) { 1237 - if (req->r_safe_callback) 1238 - req->r_safe_callback(req, msg); 1239 - complete_all(&req->r_safe_completion); /* fsync waiter */ 1240 - } 1229 + if (flags & CEPH_OSD_FLAG_ONDISK) 1230 + complete_request(req); 1241 1231 1242 1232 done: 1243 1233 dout("req=%p req->r_linger=%d\n", req, req->r_linger); ··· 1736 1732 __cancel_request(req); 1737 1733 __unregister_request(osdc, req); 1738 1734 mutex_unlock(&osdc->request_mutex); 1735 + complete_request(req); 1739 1736 dout("wait_request tid %llu canceled/timed out\n", req->r_tid); 1740 1737 return rc; 1741 1738 }