Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'ceph-for-4.10-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
"A varied set of changes:

- a large rework of cephx auth code to cope with CONFIG_VMAP_STACK
(myself). Also fixed a deadlock caused by a bogus allocation on the
writeback path and authorize reply verification.

- a fix for long stalls during fsync (Jeff Layton). The client now
has a way to force the MDS log flush, leading to ~100x speedups in
some synthetic tests.

- a new [no]require_active_mds mount option (Zheng Yan).

On mount, we will now check whether any of the MDSes are available
and bail rather than block if none are. This check can be avoided
by specifying the "no" option.

- a couple of MDS cap handling fixes and a few assorted patches
throughout"

* tag 'ceph-for-4.10-rc1' of git://github.com/ceph/ceph-client: (32 commits)
libceph: remove now unused finish_request() wrapper
libceph: always signal completion when done
ceph: avoid creating orphan object when checking pool permission
ceph: properly set issue_seq for cap release
ceph: add flags parameter to send_cap_msg
ceph: update cap message struct version to 10
ceph: define new argument structure for send_cap_msg
ceph: move xattr initialzation before the encoding past the ceph_mds_caps
ceph: fix minor typo in unsafe_request_wait
ceph: record truncate size/seq for snap data writeback
ceph: check availability of mds cluster on mount
ceph: fix splice read for no Fc capability case
ceph: try getting buffer capability for readahead/fadvise
ceph: fix scheduler warning due to nested blocking
ceph: fix printing wrong return variable in ceph_direct_read_write()
crush: include mapper.h in mapper.c
rbd: silence bogus -Wmaybe-uninitialized warning
libceph: no need to drop con->mutex for ->get_authorizer()
libceph: drop len argument of *verify_authorizer_reply()
libceph: verify authorize reply on connect
...

+811 -726
+1 -1
drivers/block/rbd.c
··· 3756 3756 struct rbd_device *rbd_dev = arg; 3757 3757 void *p = data; 3758 3758 void *const end = p + data_len; 3759 - u8 struct_v; 3759 + u8 struct_v = 0; 3760 3760 u32 len; 3761 3761 u32 notify_op; 3762 3762 int ret;
+75 -23
fs/ceph/addr.c
··· 315 315 struct page **pages; 316 316 pgoff_t next_index; 317 317 int nr_pages = 0; 318 - int ret; 318 + int got = 0; 319 + int ret = 0; 320 + 321 + if (!current->journal_info) { 322 + /* caller of readpages does not hold buffer and read caps 323 + * (fadvise, madvise and readahead cases) */ 324 + int want = CEPH_CAP_FILE_CACHE; 325 + ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, &got); 326 + if (ret < 0) { 327 + dout("start_read %p, error getting cap\n", inode); 328 + } else if (!(got & want)) { 329 + dout("start_read %p, no cache cap\n", inode); 330 + ret = 0; 331 + } 332 + if (ret <= 0) { 333 + if (got) 334 + ceph_put_cap_refs(ci, got); 335 + while (!list_empty(page_list)) { 336 + page = list_entry(page_list->prev, 337 + struct page, lru); 338 + list_del(&page->lru); 339 + put_page(page); 340 + } 341 + return ret; 342 + } 343 + } 319 344 320 345 off = (u64) page_offset(page); 321 346 ··· 363 338 CEPH_OSD_FLAG_READ, NULL, 364 339 ci->i_truncate_seq, ci->i_truncate_size, 365 340 false); 366 - if (IS_ERR(req)) 367 - return PTR_ERR(req); 341 + if (IS_ERR(req)) { 342 + ret = PTR_ERR(req); 343 + goto out; 344 + } 368 345 369 346 /* build page vector */ 370 347 nr_pages = calc_pages_for(0, len); 371 348 pages = kmalloc(sizeof(*pages) * nr_pages, GFP_KERNEL); 372 - ret = -ENOMEM; 373 - if (!pages) 374 - goto out; 349 + if (!pages) { 350 + ret = -ENOMEM; 351 + goto out_put; 352 + } 375 353 for (i = 0; i < nr_pages; ++i) { 376 354 page = list_entry(page_list->prev, struct page, lru); 377 355 BUG_ON(PageLocked(page)); ··· 406 378 if (ret < 0) 407 379 goto out_pages; 408 380 ceph_osdc_put_request(req); 381 + 382 + /* After adding locked pages to page cache, the inode holds cache cap. 383 + * So we can drop our cap refs. */ 384 + if (got) 385 + ceph_put_cap_refs(ci, got); 386 + 409 387 return nr_pages; 410 388 411 389 out_pages: ··· 420 386 unlock_page(pages[i]); 421 387 } 422 388 ceph_put_page_vector(pages, nr_pages, false); 423 - out: 389 + out_put: 424 390 ceph_osdc_put_request(req); 391 + out: 392 + if (got) 393 + ceph_put_cap_refs(ci, got); 425 394 return ret; 426 395 } 427 396 ··· 461 424 rc = start_read(inode, page_list, max); 462 425 if (rc < 0) 463 426 goto out; 464 - BUG_ON(rc == 0); 465 427 } 466 428 out: 467 429 ceph_fscache_readpages_cancel(inode, page_list); ··· 474 438 * only snap context we are allowed to write back. 475 439 */ 476 440 static struct ceph_snap_context *get_oldest_context(struct inode *inode, 477 - loff_t *snap_size) 441 + loff_t *snap_size, 442 + u64 *truncate_size, 443 + u32 *truncate_seq) 478 444 { 479 445 struct ceph_inode_info *ci = ceph_inode(inode); 480 446 struct ceph_snap_context *snapc = NULL; ··· 490 452 snapc = ceph_get_snap_context(capsnap->context); 491 453 if (snap_size) 492 454 *snap_size = capsnap->size; 455 + if (truncate_size) 456 + *truncate_size = capsnap->truncate_size; 457 + if (truncate_seq) 458 + *truncate_seq = capsnap->truncate_seq; 493 459 break; 494 460 } 495 461 } ··· 501 459 snapc = ceph_get_snap_context(ci->i_head_snapc); 502 460 dout(" head snapc %p has %d dirty pages\n", 503 461 snapc, ci->i_wrbuffer_ref_head); 462 + if (truncate_size) 463 + *truncate_size = capsnap->truncate_size; 464 + if (truncate_seq) 465 + *truncate_seq = capsnap->truncate_seq; 504 466 } 505 467 spin_unlock(&ci->i_ceph_lock); 506 468 return snapc; ··· 547 501 dout("writepage %p page %p not dirty?\n", inode, page); 548 502 goto out; 549 503 } 550 - oldest = get_oldest_context(inode, &snap_size); 504 + oldest = get_oldest_context(inode, &snap_size, 505 + &truncate_size, &truncate_seq); 551 506 if (snapc->seq > oldest->seq) { 552 507 dout("writepage %p page %p snapc %p not writeable - noop\n", 553 508 inode, page, snapc); ··· 559 512 } 560 513 ceph_put_snap_context(oldest); 561 514 562 - spin_lock(&ci->i_ceph_lock); 563 - truncate_seq = ci->i_truncate_seq; 564 - truncate_size = ci->i_truncate_size; 565 515 if (snap_size == -1) 566 516 snap_size = i_size_read(inode); 567 - spin_unlock(&ci->i_ceph_lock); 568 517 569 518 /* is this a partial page at end of file? */ 570 519 if (page_off >= snap_size) { ··· 807 764 /* find oldest snap context with dirty data */ 808 765 ceph_put_snap_context(snapc); 809 766 snap_size = -1; 810 - snapc = get_oldest_context(inode, &snap_size); 767 + snapc = get_oldest_context(inode, &snap_size, 768 + &truncate_size, &truncate_seq); 811 769 if (!snapc) { 812 770 /* hmm, why does writepages get called when there 813 771 is no dirty data? */ ··· 818 774 dout(" oldest snapc is %p seq %lld (%d snaps)\n", 819 775 snapc, snapc->seq, snapc->num_snaps); 820 776 821 - spin_lock(&ci->i_ceph_lock); 822 - truncate_seq = ci->i_truncate_seq; 823 - truncate_size = ci->i_truncate_size; 824 777 i_size = i_size_read(inode); 825 - spin_unlock(&ci->i_ceph_lock); 826 778 827 779 if (last_snapc && snapc != last_snapc) { 828 780 /* if we switched to a newer snapc, restart our scan at the ··· 1164 1124 static int context_is_writeable_or_written(struct inode *inode, 1165 1125 struct ceph_snap_context *snapc) 1166 1126 { 1167 - struct ceph_snap_context *oldest = get_oldest_context(inode, NULL); 1127 + struct ceph_snap_context *oldest = get_oldest_context(inode, NULL, 1128 + NULL, NULL); 1168 1129 int ret = !oldest || snapc->seq <= oldest->seq; 1169 1130 1170 1131 ceph_put_snap_context(oldest); ··· 1210 1169 * this page is already dirty in another (older) snap 1211 1170 * context! is it writeable now? 1212 1171 */ 1213 - oldest = get_oldest_context(inode, NULL); 1172 + oldest = get_oldest_context(inode, NULL, NULL, NULL); 1214 1173 1215 1174 if (snapc->seq > oldest->seq) { 1216 1175 ceph_put_snap_context(oldest); ··· 1412 1371 inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got)); 1413 1372 1414 1373 if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || 1415 - ci->i_inline_version == CEPH_INLINE_NONE) 1374 + ci->i_inline_version == CEPH_INLINE_NONE) { 1375 + current->journal_info = vma->vm_file; 1416 1376 ret = filemap_fault(vma, vmf); 1417 - else 1377 + current->journal_info = NULL; 1378 + } else 1418 1379 ret = -EAGAIN; 1419 1380 1420 1381 dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n", ··· 1947 1904 s64 pool; 1948 1905 struct ceph_string *pool_ns; 1949 1906 int ret, flags; 1907 + 1908 + if (ci->i_vino.snap != CEPH_NOSNAP) { 1909 + /* 1910 + * Pool permission check needs to write to the first object. 1911 + * But for snapshot, head of the first object may have alread 1912 + * been deleted. Skip check to avoid creating orphan object. 1913 + */ 1914 + return 0; 1915 + } 1950 1916 1951 1917 if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode), 1952 1918 NOPOOLPERM))
+207 -120
fs/ceph/caps.c
··· 987 987 __cap_delay_cancel(mdsc, ci); 988 988 } 989 989 990 + struct cap_msg_args { 991 + struct ceph_mds_session *session; 992 + u64 ino, cid, follows; 993 + u64 flush_tid, oldest_flush_tid, size, max_size; 994 + u64 xattr_version; 995 + struct ceph_buffer *xattr_buf; 996 + struct timespec atime, mtime, ctime; 997 + int op, caps, wanted, dirty; 998 + u32 seq, issue_seq, mseq, time_warp_seq; 999 + u32 flags; 1000 + kuid_t uid; 1001 + kgid_t gid; 1002 + umode_t mode; 1003 + bool inline_data; 1004 + }; 1005 + 990 1006 /* 991 1007 * Build and send a cap message to the given MDS. 992 1008 * 993 1009 * Caller should be holding s_mutex. 994 1010 */ 995 - static int send_cap_msg(struct ceph_mds_session *session, 996 - u64 ino, u64 cid, int op, 997 - int caps, int wanted, int dirty, 998 - u32 seq, u64 flush_tid, u64 oldest_flush_tid, 999 - u32 issue_seq, u32 mseq, u64 size, u64 max_size, 1000 - struct timespec *mtime, struct timespec *atime, 1001 - struct timespec *ctime, u32 time_warp_seq, 1002 - kuid_t uid, kgid_t gid, umode_t mode, 1003 - u64 xattr_version, 1004 - struct ceph_buffer *xattrs_buf, 1005 - u64 follows, bool inline_data) 1011 + static int send_cap_msg(struct cap_msg_args *arg) 1006 1012 { 1007 1013 struct ceph_mds_caps *fc; 1008 1014 struct ceph_msg *msg; 1009 1015 void *p; 1010 1016 size_t extra_len; 1017 + struct timespec zerotime = {0}; 1011 1018 1012 1019 dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s" 1013 1020 " seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu" 1014 - " xattr_ver %llu xattr_len %d\n", ceph_cap_op_name(op), 1015 - cid, ino, ceph_cap_string(caps), ceph_cap_string(wanted), 1016 - ceph_cap_string(dirty), 1017 - seq, issue_seq, flush_tid, oldest_flush_tid, 1018 - mseq, follows, size, max_size, 1019 - xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); 1021 + " xattr_ver %llu xattr_len %d\n", ceph_cap_op_name(arg->op), 1022 + arg->cid, arg->ino, ceph_cap_string(arg->caps), 1023 + ceph_cap_string(arg->wanted), ceph_cap_string(arg->dirty), 1024 + arg->seq, arg->issue_seq, arg->flush_tid, arg->oldest_flush_tid, 1025 + arg->mseq, arg->follows, arg->size, arg->max_size, 1026 + arg->xattr_version, 1027 + arg->xattr_buf ? (int)arg->xattr_buf->vec.iov_len : 0); 1020 1028 1021 1029 /* flock buffer size + inline version + inline data size + 1022 1030 * osd_epoch_barrier + oldest_flush_tid */ 1023 - extra_len = 4 + 8 + 4 + 4 + 8; 1031 + extra_len = 4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4; 1024 1032 msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len, 1025 1033 GFP_NOFS, false); 1026 1034 if (!msg) 1027 1035 return -ENOMEM; 1028 1036 1029 - msg->hdr.version = cpu_to_le16(6); 1030 - msg->hdr.tid = cpu_to_le64(flush_tid); 1037 + msg->hdr.version = cpu_to_le16(10); 1038 + msg->hdr.tid = cpu_to_le64(arg->flush_tid); 1031 1039 1032 1040 fc = msg->front.iov_base; 1033 1041 memset(fc, 0, sizeof(*fc)); 1034 1042 1035 - fc->cap_id = cpu_to_le64(cid); 1036 - fc->op = cpu_to_le32(op); 1037 - fc->seq = cpu_to_le32(seq); 1038 - fc->issue_seq = cpu_to_le32(issue_seq); 1039 - fc->migrate_seq = cpu_to_le32(mseq); 1040 - fc->caps = cpu_to_le32(caps); 1041 - fc->wanted = cpu_to_le32(wanted); 1042 - fc->dirty = cpu_to_le32(dirty); 1043 - fc->ino = cpu_to_le64(ino); 1044 - fc->snap_follows = cpu_to_le64(follows); 1043 + fc->cap_id = cpu_to_le64(arg->cid); 1044 + fc->op = cpu_to_le32(arg->op); 1045 + fc->seq = cpu_to_le32(arg->seq); 1046 + fc->issue_seq = cpu_to_le32(arg->issue_seq); 1047 + fc->migrate_seq = cpu_to_le32(arg->mseq); 1048 + fc->caps = cpu_to_le32(arg->caps); 1049 + fc->wanted = cpu_to_le32(arg->wanted); 1050 + fc->dirty = cpu_to_le32(arg->dirty); 1051 + fc->ino = cpu_to_le64(arg->ino); 1052 + fc->snap_follows = cpu_to_le64(arg->follows); 1045 1053 1046 - fc->size = cpu_to_le64(size); 1047 - fc->max_size = cpu_to_le64(max_size); 1048 - if (mtime) 1049 - ceph_encode_timespec(&fc->mtime, mtime); 1050 - if (atime) 1051 - ceph_encode_timespec(&fc->atime, atime); 1052 - if (ctime) 1053 - ceph_encode_timespec(&fc->ctime, ctime); 1054 - fc->time_warp_seq = cpu_to_le32(time_warp_seq); 1054 + fc->size = cpu_to_le64(arg->size); 1055 + fc->max_size = cpu_to_le64(arg->max_size); 1056 + ceph_encode_timespec(&fc->mtime, &arg->mtime); 1057 + ceph_encode_timespec(&fc->atime, &arg->atime); 1058 + ceph_encode_timespec(&fc->ctime, &arg->ctime); 1059 + fc->time_warp_seq = cpu_to_le32(arg->time_warp_seq); 1055 1060 1056 - fc->uid = cpu_to_le32(from_kuid(&init_user_ns, uid)); 1057 - fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid)); 1058 - fc->mode = cpu_to_le32(mode); 1061 + fc->uid = cpu_to_le32(from_kuid(&init_user_ns, arg->uid)); 1062 + fc->gid = cpu_to_le32(from_kgid(&init_user_ns, arg->gid)); 1063 + fc->mode = cpu_to_le32(arg->mode); 1059 1064 1060 - p = fc + 1; 1061 - /* flock buffer size */ 1062 - ceph_encode_32(&p, 0); 1063 - /* inline version */ 1064 - ceph_encode_64(&p, inline_data ? 0 : CEPH_INLINE_NONE); 1065 - /* inline data size */ 1066 - ceph_encode_32(&p, 0); 1067 - /* osd_epoch_barrier */ 1068 - ceph_encode_32(&p, 0); 1069 - /* oldest_flush_tid */ 1070 - ceph_encode_64(&p, oldest_flush_tid); 1071 - 1072 - fc->xattr_version = cpu_to_le64(xattr_version); 1073 - if (xattrs_buf) { 1074 - msg->middle = ceph_buffer_get(xattrs_buf); 1075 - fc->xattr_len = cpu_to_le32(xattrs_buf->vec.iov_len); 1076 - msg->hdr.middle_len = cpu_to_le32(xattrs_buf->vec.iov_len); 1065 + fc->xattr_version = cpu_to_le64(arg->xattr_version); 1066 + if (arg->xattr_buf) { 1067 + msg->middle = ceph_buffer_get(arg->xattr_buf); 1068 + fc->xattr_len = cpu_to_le32(arg->xattr_buf->vec.iov_len); 1069 + msg->hdr.middle_len = cpu_to_le32(arg->xattr_buf->vec.iov_len); 1077 1070 } 1078 1071 1079 - ceph_con_send(&session->s_con, msg); 1072 + p = fc + 1; 1073 + /* flock buffer size (version 2) */ 1074 + ceph_encode_32(&p, 0); 1075 + /* inline version (version 4) */ 1076 + ceph_encode_64(&p, arg->inline_data ? 0 : CEPH_INLINE_NONE); 1077 + /* inline data size */ 1078 + ceph_encode_32(&p, 0); 1079 + /* osd_epoch_barrier (version 5) */ 1080 + ceph_encode_32(&p, 0); 1081 + /* oldest_flush_tid (version 6) */ 1082 + ceph_encode_64(&p, arg->oldest_flush_tid); 1083 + 1084 + /* 1085 + * caller_uid/caller_gid (version 7) 1086 + * 1087 + * Currently, we don't properly track which caller dirtied the caps 1088 + * last, and force a flush of them when there is a conflict. For now, 1089 + * just set this to 0:0, to emulate how the MDS has worked up to now. 1090 + */ 1091 + ceph_encode_32(&p, 0); 1092 + ceph_encode_32(&p, 0); 1093 + 1094 + /* pool namespace (version 8) (mds always ignores this) */ 1095 + ceph_encode_32(&p, 0); 1096 + 1097 + /* 1098 + * btime and change_attr (version 9) 1099 + * 1100 + * We just zero these out for now, as the MDS ignores them unless 1101 + * the requisite feature flags are set (which we don't do yet). 1102 + */ 1103 + ceph_encode_timespec(p, &zerotime); 1104 + p += sizeof(struct ceph_timespec); 1105 + ceph_encode_64(&p, 0); 1106 + 1107 + /* Advisory flags (version 10) */ 1108 + ceph_encode_32(&p, arg->flags); 1109 + 1110 + ceph_con_send(&arg->session->s_con, msg); 1080 1111 return 0; 1081 1112 } 1082 1113 ··· 1146 1115 * caller should hold snap_rwsem (read), s_mutex. 1147 1116 */ 1148 1117 static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, 1149 - int op, int used, int want, int retain, int flushing, 1150 - u64 flush_tid, u64 oldest_flush_tid) 1118 + int op, bool sync, int used, int want, int retain, 1119 + int flushing, u64 flush_tid, u64 oldest_flush_tid) 1151 1120 __releases(cap->ci->i_ceph_lock) 1152 1121 { 1153 1122 struct ceph_inode_info *ci = cap->ci; 1154 1123 struct inode *inode = &ci->vfs_inode; 1155 - u64 cap_id = cap->cap_id; 1156 - int held, revoking, dropping, keep; 1157 - u64 follows, size, max_size; 1158 - u32 seq, issue_seq, mseq, time_warp_seq; 1159 - struct timespec mtime, atime, ctime; 1124 + struct cap_msg_args arg; 1125 + int held, revoking, dropping; 1160 1126 int wake = 0; 1161 - umode_t mode; 1162 - kuid_t uid; 1163 - kgid_t gid; 1164 - struct ceph_mds_session *session; 1165 - u64 xattr_version = 0; 1166 - struct ceph_buffer *xattr_blob = NULL; 1167 1127 int delayed = 0; 1168 1128 int ret; 1169 - bool inline_data; 1170 1129 1171 1130 held = cap->issued | cap->implemented; 1172 1131 revoking = cap->implemented & ~cap->issued; ··· 1169 1148 ceph_cap_string(revoking)); 1170 1149 BUG_ON((retain & CEPH_CAP_PIN) == 0); 1171 1150 1172 - session = cap->session; 1151 + arg.session = cap->session; 1173 1152 1174 1153 /* don't release wanted unless we've waited a bit. */ 1175 1154 if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0 && ··· 1198 1177 cap->implemented &= cap->issued | used; 1199 1178 cap->mds_wanted = want; 1200 1179 1201 - follows = flushing ? ci->i_head_snapc->seq : 0; 1180 + arg.ino = ceph_vino(inode).ino; 1181 + arg.cid = cap->cap_id; 1182 + arg.follows = flushing ? ci->i_head_snapc->seq : 0; 1183 + arg.flush_tid = flush_tid; 1184 + arg.oldest_flush_tid = oldest_flush_tid; 1202 1185 1203 - keep = cap->implemented; 1204 - seq = cap->seq; 1205 - issue_seq = cap->issue_seq; 1206 - mseq = cap->mseq; 1207 - size = inode->i_size; 1208 - ci->i_reported_size = size; 1209 - max_size = ci->i_wanted_max_size; 1210 - ci->i_requested_max_size = max_size; 1211 - mtime = inode->i_mtime; 1212 - atime = inode->i_atime; 1213 - ctime = inode->i_ctime; 1214 - time_warp_seq = ci->i_time_warp_seq; 1215 - uid = inode->i_uid; 1216 - gid = inode->i_gid; 1217 - mode = inode->i_mode; 1186 + arg.size = inode->i_size; 1187 + ci->i_reported_size = arg.size; 1188 + arg.max_size = ci->i_wanted_max_size; 1189 + ci->i_requested_max_size = arg.max_size; 1218 1190 1219 1191 if (flushing & CEPH_CAP_XATTR_EXCL) { 1220 1192 __ceph_build_xattrs_blob(ci); 1221 - xattr_blob = ci->i_xattrs.blob; 1222 - xattr_version = ci->i_xattrs.version; 1193 + arg.xattr_version = ci->i_xattrs.version; 1194 + arg.xattr_buf = ci->i_xattrs.blob; 1195 + } else { 1196 + arg.xattr_buf = NULL; 1223 1197 } 1224 1198 1225 - inline_data = ci->i_inline_version != CEPH_INLINE_NONE; 1199 + arg.mtime = inode->i_mtime; 1200 + arg.atime = inode->i_atime; 1201 + arg.ctime = inode->i_ctime; 1202 + 1203 + arg.op = op; 1204 + arg.caps = cap->implemented; 1205 + arg.wanted = want; 1206 + arg.dirty = flushing; 1207 + 1208 + arg.seq = cap->seq; 1209 + arg.issue_seq = cap->issue_seq; 1210 + arg.mseq = cap->mseq; 1211 + arg.time_warp_seq = ci->i_time_warp_seq; 1212 + 1213 + arg.uid = inode->i_uid; 1214 + arg.gid = inode->i_gid; 1215 + arg.mode = inode->i_mode; 1216 + 1217 + arg.inline_data = ci->i_inline_version != CEPH_INLINE_NONE; 1218 + arg.flags = 0; 1219 + if (sync) 1220 + arg.flags |= CEPH_CLIENT_CAPS_SYNC; 1226 1221 1227 1222 spin_unlock(&ci->i_ceph_lock); 1228 1223 1229 - ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, 1230 - op, keep, want, flushing, seq, 1231 - flush_tid, oldest_flush_tid, issue_seq, mseq, 1232 - size, max_size, &mtime, &atime, &ctime, time_warp_seq, 1233 - uid, gid, mode, xattr_version, xattr_blob, 1234 - follows, inline_data); 1224 + ret = send_cap_msg(&arg); 1235 1225 if (ret < 0) { 1236 1226 dout("error sending cap msg, must requeue %p\n", inode); 1237 1227 delayed = 1; ··· 1259 1227 struct ceph_cap_snap *capsnap, 1260 1228 u32 mseq, u64 oldest_flush_tid) 1261 1229 { 1262 - return send_cap_msg(session, ceph_vino(inode).ino, 0, 1263 - CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, 1264 - capsnap->dirty, 0, capsnap->cap_flush.tid, 1265 - oldest_flush_tid, 0, mseq, capsnap->size, 0, 1266 - &capsnap->mtime, &capsnap->atime, 1267 - &capsnap->ctime, capsnap->time_warp_seq, 1268 - capsnap->uid, capsnap->gid, capsnap->mode, 1269 - capsnap->xattr_version, capsnap->xattr_blob, 1270 - capsnap->follows, capsnap->inline_data); 1230 + struct cap_msg_args arg; 1231 + 1232 + arg.session = session; 1233 + arg.ino = ceph_vino(inode).ino; 1234 + arg.cid = 0; 1235 + arg.follows = capsnap->follows; 1236 + arg.flush_tid = capsnap->cap_flush.tid; 1237 + arg.oldest_flush_tid = oldest_flush_tid; 1238 + 1239 + arg.size = capsnap->size; 1240 + arg.max_size = 0; 1241 + arg.xattr_version = capsnap->xattr_version; 1242 + arg.xattr_buf = capsnap->xattr_blob; 1243 + 1244 + arg.atime = capsnap->atime; 1245 + arg.mtime = capsnap->mtime; 1246 + arg.ctime = capsnap->ctime; 1247 + 1248 + arg.op = CEPH_CAP_OP_FLUSHSNAP; 1249 + arg.caps = capsnap->issued; 1250 + arg.wanted = 0; 1251 + arg.dirty = capsnap->dirty; 1252 + 1253 + arg.seq = 0; 1254 + arg.issue_seq = 0; 1255 + arg.mseq = mseq; 1256 + arg.time_warp_seq = capsnap->time_warp_seq; 1257 + 1258 + arg.uid = capsnap->uid; 1259 + arg.gid = capsnap->gid; 1260 + arg.mode = capsnap->mode; 1261 + 1262 + arg.inline_data = capsnap->inline_data; 1263 + arg.flags = 0; 1264 + 1265 + return send_cap_msg(&arg); 1271 1266 } 1272 1267 1273 1268 /* ··· 1917 1858 sent++; 1918 1859 1919 1860 /* __send_cap drops i_ceph_lock */ 1920 - delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used, 1921 - want, retain, flushing, 1922 - flush_tid, oldest_flush_tid); 1861 + delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, false, 1862 + cap_used, want, retain, flushing, 1863 + flush_tid, oldest_flush_tid); 1923 1864 goto retry; /* retake i_ceph_lock and restart our cap scan. */ 1924 1865 } 1925 1866 ··· 1983 1924 &flush_tid, &oldest_flush_tid); 1984 1925 1985 1926 /* __send_cap drops i_ceph_lock */ 1986 - delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want, 1987 - (cap->issued | cap->implemented), 1988 - flushing, flush_tid, oldest_flush_tid); 1927 + delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, true, 1928 + used, want, (cap->issued | cap->implemented), 1929 + flushing, flush_tid, oldest_flush_tid); 1989 1930 1990 1931 if (delayed) { 1991 1932 spin_lock(&ci->i_ceph_lock); ··· 2055 1996 } 2056 1997 spin_unlock(&ci->i_unsafe_lock); 2057 1998 2058 - dout("unsafe_requeset_wait %p wait on tid %llu %llu\n", 1999 + dout("unsafe_request_wait %p wait on tid %llu %llu\n", 2059 2000 inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL); 2060 2001 if (req1) { 2061 2002 ret = !wait_for_completion_timeout(&req1->r_safe_completion, ··· 2178 2119 inode, cap, cf->tid, ceph_cap_string(cf->caps)); 2179 2120 ci->i_ceph_flags |= CEPH_I_NODELAY; 2180 2121 ret = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, 2181 - __ceph_caps_used(ci), 2122 + false, __ceph_caps_used(ci), 2182 2123 __ceph_caps_wanted(ci), 2183 2124 cap->issued | cap->implemented, 2184 2125 cf->caps, cf->tid, oldest_flush_tid); ··· 2538 2479 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 2539 2480 } 2540 2481 2482 + int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, int *got) 2483 + { 2484 + int ret, err = 0; 2485 + 2486 + BUG_ON(need & ~CEPH_CAP_FILE_RD); 2487 + BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)); 2488 + ret = ceph_pool_perm_check(ci, need); 2489 + if (ret < 0) 2490 + return ret; 2491 + 2492 + ret = try_get_cap_refs(ci, need, want, 0, true, got, &err); 2493 + if (ret) { 2494 + if (err == -EAGAIN) { 2495 + ret = 0; 2496 + } else if (err < 0) { 2497 + ret = err; 2498 + } 2499 + } 2500 + return ret; 2501 + } 2502 + 2541 2503 /* 2542 2504 * Wait for caps, and take cap references. If we can't get a WR cap 2543 2505 * due to a small max_size, make sure we check_max_size (and possibly ··· 2587 2507 if (err < 0) 2588 2508 ret = err; 2589 2509 } else { 2590 - ret = wait_event_interruptible(ci->i_cap_wq, 2591 - try_get_cap_refs(ci, need, want, endoff, 2592 - true, &_got, &err)); 2510 + DEFINE_WAIT_FUNC(wait, woken_wake_function); 2511 + add_wait_queue(&ci->i_cap_wq, &wait); 2512 + 2513 + while (!try_get_cap_refs(ci, need, want, endoff, 2514 + true, &_got, &err)) 2515 + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); 2516 + 2517 + remove_wait_queue(&ci->i_cap_wq, &wait); 2518 + 2593 2519 if (err == -EAGAIN) 2594 2520 continue; 2595 2521 if (err < 0) ··· 3656 3570 cap->cap_id = le64_to_cpu(h->cap_id); 3657 3571 cap->mseq = mseq; 3658 3572 cap->seq = seq; 3573 + cap->issue_seq = seq; 3659 3574 spin_lock(&session->s_cap_lock); 3660 3575 list_add_tail(&cap->session_caps, 3661 3576 &session->s_cap_releases);
+69 -56
fs/ceph/file.c
··· 454 454 * only return a short read to the caller if we hit EOF. 455 455 */ 456 456 static int striped_read(struct inode *inode, 457 - u64 off, u64 len, 457 + u64 pos, u64 len, 458 458 struct page **pages, int num_pages, 459 - int *checkeof) 459 + int page_align, int *checkeof) 460 460 { 461 461 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 462 462 struct ceph_inode_info *ci = ceph_inode(inode); 463 - u64 pos, this_len, left; 463 + u64 this_len; 464 464 loff_t i_size; 465 - int page_align, pages_left; 466 - int read, ret; 467 - struct page **page_pos; 465 + int page_idx; 466 + int ret, read = 0; 468 467 bool hit_stripe, was_short; 469 468 470 469 /* 471 470 * we may need to do multiple reads. not atomic, unfortunately. 472 471 */ 473 - pos = off; 474 - left = len; 475 - page_pos = pages; 476 - pages_left = num_pages; 477 - read = 0; 478 - 479 472 more: 480 - page_align = pos & ~PAGE_MASK; 481 - this_len = left; 473 + this_len = len; 474 + page_idx = (page_align + read) >> PAGE_SHIFT; 482 475 ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), 483 476 &ci->i_layout, pos, &this_len, 484 - ci->i_truncate_seq, 485 - ci->i_truncate_size, 486 - page_pos, pages_left, page_align); 477 + ci->i_truncate_seq, ci->i_truncate_size, 478 + pages + page_idx, num_pages - page_idx, 479 + ((page_align + read) & ~PAGE_MASK)); 487 480 if (ret == -ENOENT) 488 481 ret = 0; 489 - hit_stripe = this_len < left; 482 + hit_stripe = this_len < len; 490 483 was_short = ret >= 0 && ret < this_len; 491 - dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read, 484 + dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, len, read, 492 485 ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); 493 486 494 487 i_size = i_size_read(inode); 495 488 if (ret >= 0) { 496 - int didpages; 497 489 if (was_short && (pos + ret < i_size)) { 498 490 int zlen = min(this_len - ret, i_size - pos - ret); 499 - int zoff = (off & ~PAGE_MASK) + read + ret; 491 + int zoff = page_align + read + ret; 500 492 dout(" zero gap %llu to %llu\n", 501 - pos + ret, pos + ret + zlen); 493 + pos + ret, pos + ret + zlen); 502 494 ceph_zero_page_vector_range(zoff, zlen, pages); 503 495 ret += zlen; 504 496 } 505 497 506 - didpages = (page_align + ret) >> PAGE_SHIFT; 498 + read += ret; 507 499 pos += ret; 508 - read = pos - off; 509 - left -= ret; 510 - page_pos += didpages; 511 - pages_left -= didpages; 500 + len -= ret; 512 501 513 502 /* hit stripe and need continue*/ 514 - if (left && hit_stripe && pos < i_size) 503 + if (len && hit_stripe && pos < i_size) 515 504 goto more; 516 505 } 517 506 518 507 if (read > 0) { 519 508 ret = read; 520 509 /* did we bounce off eof? */ 521 - if (pos + left > i_size) 510 + if (pos + len > i_size) 522 511 *checkeof = CHECK_EOF; 523 512 } 524 513 ··· 521 532 * 522 533 * If the read spans object boundary, just do multiple reads. 523 534 */ 524 - static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, 525 - int *checkeof) 535 + static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, 536 + int *checkeof) 526 537 { 527 538 struct file *file = iocb->ki_filp; 528 539 struct inode *inode = file_inode(file); 529 540 struct page **pages; 530 541 u64 off = iocb->ki_pos; 531 - int num_pages, ret; 532 - size_t len = iov_iter_count(i); 542 + int num_pages; 543 + ssize_t ret; 544 + size_t len = iov_iter_count(to); 533 545 534 546 dout("sync_read on file %p %llu~%u %s\n", file, off, 535 547 (unsigned)len, ··· 549 559 if (ret < 0) 550 560 return ret; 551 561 552 - num_pages = calc_pages_for(off, len); 553 - pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); 554 - if (IS_ERR(pages)) 555 - return PTR_ERR(pages); 556 - ret = striped_read(inode, off, len, pages, 557 - num_pages, checkeof); 558 - if (ret > 0) { 559 - int l, k = 0; 560 - size_t left = ret; 562 + if (unlikely(to->type & ITER_PIPE)) { 563 + size_t page_off; 564 + ret = iov_iter_get_pages_alloc(to, &pages, len, 565 + &page_off); 566 + if (ret <= 0) 567 + return -ENOMEM; 568 + num_pages = DIV_ROUND_UP(ret + page_off, PAGE_SIZE); 561 569 562 - while (left) { 563 - size_t page_off = off & ~PAGE_MASK; 564 - size_t copy = min_t(size_t, left, 565 - PAGE_SIZE - page_off); 566 - l = copy_page_to_iter(pages[k++], page_off, copy, i); 567 - off += l; 568 - left -= l; 569 - if (l < copy) 570 - break; 570 + ret = striped_read(inode, off, ret, pages, num_pages, 571 + page_off, checkeof); 572 + if (ret > 0) { 573 + iov_iter_advance(to, ret); 574 + off += ret; 575 + } else { 576 + iov_iter_advance(to, 0); 571 577 } 578 + ceph_put_page_vector(pages, num_pages, false); 579 + } else { 580 + num_pages = calc_pages_for(off, len); 581 + pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); 582 + if (IS_ERR(pages)) 583 + return PTR_ERR(pages); 584 + 585 + ret = striped_read(inode, off, len, pages, num_pages, 586 + (off & ~PAGE_MASK), checkeof); 587 + if (ret > 0) { 588 + int l, k = 0; 589 + size_t left = ret; 590 + 591 + while (left) { 592 + size_t page_off = off & ~PAGE_MASK; 593 + size_t copy = min_t(size_t, left, 594 + PAGE_SIZE - page_off); 595 + l = copy_page_to_iter(pages[k++], page_off, 596 + copy, to); 597 + off += l; 598 + left -= l; 599 + if (l < copy) 600 + break; 601 + } 602 + } 603 + ceph_release_page_vector(pages, num_pages); 572 604 } 573 - ceph_release_page_vector(pages, num_pages); 574 605 575 606 if (off > iocb->ki_pos) { 576 607 ret = off - iocb->ki_pos; 577 608 iocb->ki_pos = off; 578 609 } 579 610 580 - dout("sync_read result %d\n", ret); 611 + dout("sync_read result %zd\n", ret); 581 612 return ret; 582 613 } 583 614 ··· 860 849 861 850 dout("sync_write_wait on tid %llu (until %llu)\n", 862 851 req->r_tid, last_tid); 863 - wait_for_completion(&req->r_safe_completion); 852 + wait_for_completion(&req->r_done_completion); 864 853 ceph_osdc_put_request(req); 865 854 866 855 spin_lock(&ci->i_unsafe_lock); ··· 913 902 pos >> PAGE_SHIFT, 914 903 (pos + count) >> PAGE_SHIFT); 915 904 if (ret2 < 0) 916 - dout("invalidate_inode_pages2_range returned %d\n", ret); 905 + dout("invalidate_inode_pages2_range returned %d\n", ret2); 917 906 918 907 flags = CEPH_OSD_FLAG_ORDERSNAP | 919 908 CEPH_OSD_FLAG_ONDISK | ··· 1256 1245 dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", 1257 1246 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, 1258 1247 ceph_cap_string(got)); 1259 - 1248 + current->journal_info = filp; 1260 1249 ret = generic_file_read_iter(iocb, to); 1250 + current->journal_info = NULL; 1261 1251 } 1262 1252 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", 1263 1253 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); ··· 1778 1766 .fsync = ceph_fsync, 1779 1767 .lock = ceph_lock, 1780 1768 .flock = ceph_flock, 1769 + .splice_read = generic_file_splice_read, 1781 1770 .splice_write = iter_file_splice_write, 1782 1771 .unlocked_ioctl = ceph_ioctl, 1783 1772 .compat_ioctl = ceph_ioctl,
+16 -7
fs/ceph/mds_client.c
··· 2100 2100 err = -EIO; 2101 2101 goto finish; 2102 2102 } 2103 + if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) { 2104 + if (mdsc->mdsmap_err) { 2105 + err = mdsc->mdsmap_err; 2106 + dout("do_request mdsmap err %d\n", err); 2107 + goto finish; 2108 + } 2109 + if (!(mdsc->fsc->mount_options->flags & 2110 + CEPH_MOUNT_OPT_MOUNTWAIT) && 2111 + !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) { 2112 + err = -ENOENT; 2113 + pr_info("probably no mds server is up\n"); 2114 + goto finish; 2115 + } 2116 + } 2103 2117 2104 2118 put_request_session(req); 2105 2119 2106 2120 mds = __choose_mds(mdsc, req); 2107 2121 if (mds < 0 || 2108 2122 ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) { 2109 - if (mdsc->mdsmap_err) { 2110 - err = mdsc->mdsmap_err; 2111 - dout("do_request mdsmap err %d\n", err); 2112 - goto finish; 2113 - } 2114 2123 dout("do_request no mds or not active, waiting for map\n"); 2115 2124 list_add(&req->r_wait, &mdsc->waiting_for_map); 2116 2125 goto out; ··· 3952 3943 } 3953 3944 3954 3945 3955 - static int verify_authorizer_reply(struct ceph_connection *con, int len) 3946 + static int verify_authorizer_reply(struct ceph_connection *con) 3956 3947 { 3957 3948 struct ceph_mds_session *s = con->private; 3958 3949 struct ceph_mds_client *mdsc = s->s_mdsc; 3959 3950 struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; 3960 3951 3961 - return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer, len); 3952 + return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer); 3962 3953 } 3963 3954 3964 3955 static int invalidate_authorizer(struct ceph_connection *con)
+157 -6
fs/ceph/mdsmap.c
··· 42 42 return i; 43 43 } 44 44 45 + #define __decode_and_drop_type(p, end, type, bad) \ 46 + do { \ 47 + if (*p + sizeof(type) > end) \ 48 + goto bad; \ 49 + *p += sizeof(type); \ 50 + } while (0) 51 + 52 + #define __decode_and_drop_set(p, end, type, bad) \ 53 + do { \ 54 + u32 n; \ 55 + size_t need; \ 56 + ceph_decode_32_safe(p, end, n, bad); \ 57 + need = sizeof(type) * n; \ 58 + ceph_decode_need(p, end, need, bad); \ 59 + *p += need; \ 60 + } while (0) 61 + 62 + #define __decode_and_drop_map(p, end, ktype, vtype, bad) \ 63 + do { \ 64 + u32 n; \ 65 + size_t need; \ 66 + ceph_decode_32_safe(p, end, n, bad); \ 67 + need = (sizeof(ktype) + sizeof(vtype)) * n; \ 68 + ceph_decode_need(p, end, need, bad); \ 69 + *p += need; \ 70 + } while (0) 71 + 72 + 73 + static int __decode_and_drop_compat_set(void **p, void* end) 74 + { 75 + int i; 76 + /* compat, ro_compat, incompat*/ 77 + for (i = 0; i < 3; i++) { 78 + u32 n; 79 + ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); 80 + /* mask */ 81 + *p += sizeof(u64); 82 + /* names (map<u64, string>) */ 83 + n = ceph_decode_32(p); 84 + while (n-- > 0) { 85 + u32 len; 86 + ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), 87 + bad); 88 + *p += sizeof(u64); 89 + len = ceph_decode_32(p); 90 + ceph_decode_need(p, end, len, bad); 91 + *p += len; 92 + } 93 + } 94 + return 0; 95 + bad: 96 + return -1; 97 + } 98 + 45 99 /* 46 100 * Decode an MDS map 47 101 * ··· 109 55 int i, j, n; 110 56 int err = -EINVAL; 111 57 u8 mdsmap_v, mdsmap_cv; 58 + u16 mdsmap_ev; 112 59 113 60 m = kzalloc(sizeof(*m), GFP_NOFS); 114 61 if (m == NULL) ··· 138 83 139 84 m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS); 140 85 if (m->m_info == NULL) 141 - goto badmem; 86 + goto nomem; 142 87 143 88 /* pick out active nodes from mds_info (state > 0) */ 144 89 n = ceph_decode_32(p); ··· 221 166 info->export_targets = kcalloc(num_export_targets, 222 167 sizeof(u32), GFP_NOFS); 223 168 if (info->export_targets == NULL) 224 - goto badmem; 169 + goto nomem; 225 170 for (j = 0; j < num_export_targets; j++) 226 171 info->export_targets[j] = 227 172 ceph_decode_32(&pexport_targets); ··· 235 180 m->m_num_data_pg_pools = n; 236 181 m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS); 237 182 if (!m->m_data_pg_pools) 238 - goto badmem; 183 + goto nomem; 239 184 ceph_decode_need(p, end, sizeof(u64)*(n+1), bad); 240 185 for (i = 0; i < n; i++) 241 186 m->m_data_pg_pools[i] = ceph_decode_64(p); 242 187 m->m_cas_pg_pool = ceph_decode_64(p); 188 + m->m_enabled = m->m_epoch > 1; 243 189 244 - /* ok, we don't care about the rest. */ 190 + mdsmap_ev = 1; 191 + if (mdsmap_v >= 2) { 192 + ceph_decode_16_safe(p, end, mdsmap_ev, bad_ext); 193 + } 194 + if (mdsmap_ev >= 3) { 195 + if (__decode_and_drop_compat_set(p, end) < 0) 196 + goto bad_ext; 197 + } 198 + /* metadata_pool */ 199 + if (mdsmap_ev < 5) { 200 + __decode_and_drop_type(p, end, u32, bad_ext); 201 + } else { 202 + __decode_and_drop_type(p, end, u64, bad_ext); 203 + } 204 + 205 + /* created + modified + tableserver */ 206 + __decode_and_drop_type(p, end, struct ceph_timespec, bad_ext); 207 + __decode_and_drop_type(p, end, struct ceph_timespec, bad_ext); 208 + __decode_and_drop_type(p, end, u32, bad_ext); 209 + 210 + /* in */ 211 + { 212 + int num_laggy = 0; 213 + ceph_decode_32_safe(p, end, n, bad_ext); 214 + ceph_decode_need(p, end, sizeof(u32) * n, bad_ext); 215 + 216 + for (i = 0; i < n; i++) { 217 + s32 mds = ceph_decode_32(p); 218 + if (mds >= 0 && mds < m->m_max_mds) { 219 + if (m->m_info[mds].laggy) 220 + num_laggy++; 221 + } 222 + } 223 + m->m_num_laggy = num_laggy; 224 + } 225 + 226 + /* inc */ 227 + __decode_and_drop_map(p, end, u32, u32, bad_ext); 228 + /* up */ 229 + __decode_and_drop_map(p, end, u32, u64, bad_ext); 230 + /* failed */ 231 + __decode_and_drop_set(p, end, u32, bad_ext); 232 + /* stopped */ 233 + __decode_and_drop_set(p, end, u32, bad_ext); 234 + 235 + if (mdsmap_ev >= 4) { 236 + /* last_failure_osd_epoch */ 237 + __decode_and_drop_type(p, end, u32, bad_ext); 238 + } 239 + if (mdsmap_ev >= 6) { 240 + /* ever_allowed_snaps */ 241 + __decode_and_drop_type(p, end, u8, bad_ext); 242 + /* explicitly_allowed_snaps */ 243 + __decode_and_drop_type(p, end, u8, bad_ext); 244 + } 245 + if (mdsmap_ev >= 7) { 246 + /* inline_data_enabled */ 247 + __decode_and_drop_type(p, end, u8, bad_ext); 248 + } 249 + if (mdsmap_ev >= 8) { 250 + u32 name_len; 251 + /* enabled */ 252 + ceph_decode_8_safe(p, end, m->m_enabled, bad_ext); 253 + ceph_decode_32_safe(p, end, name_len, bad_ext); 254 + ceph_decode_need(p, end, name_len, bad_ext); 255 + *p += name_len; 256 + } 257 + /* damaged */ 258 + if (mdsmap_ev >= 9) { 259 + size_t need; 260 + ceph_decode_32_safe(p, end, n, bad_ext); 261 + need = sizeof(u32) * n; 262 + ceph_decode_need(p, end, need, bad_ext); 263 + *p += need; 264 + m->m_damaged = n > 0; 265 + } else { 266 + m->m_damaged = false; 267 + } 268 + bad_ext: 245 269 *p = end; 246 270 dout("mdsmap_decode success epoch %u\n", m->m_epoch); 247 271 return m; 248 - 249 - badmem: 272 + nomem: 250 273 err = -ENOMEM; 274 + goto out_err; 251 275 bad: 252 276 pr_err("corrupt mdsmap\n"); 253 277 print_hex_dump(KERN_DEBUG, "mdsmap: ", 254 278 DUMP_PREFIX_OFFSET, 16, 1, 255 279 start, end - start, true); 280 + out_err: 256 281 ceph_mdsmap_destroy(m); 257 282 return ERR_PTR(err); 258 283 } ··· 346 211 kfree(m->m_info); 347 212 kfree(m->m_data_pg_pools); 348 213 kfree(m); 214 + } 215 + 216 + bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m) 217 + { 218 + int i, nr_active = 0; 219 + if (!m->m_enabled) 220 + return false; 221 + if (m->m_damaged) 222 + return false; 223 + if (m->m_num_laggy > 0) 224 + return false; 225 + for (i = 0; i < m->m_max_mds; i++) { 226 + if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE) 227 + nr_active++; 228 + } 229 + return nr_active > 0; 349 230 }
+2
fs/ceph/snap.c
··· 593 593 capsnap->atime = inode->i_atime; 594 594 capsnap->ctime = inode->i_ctime; 595 595 capsnap->time_warp_seq = ci->i_time_warp_seq; 596 + capsnap->truncate_size = ci->i_truncate_size; 597 + capsnap->truncate_seq = ci->i_truncate_seq; 596 598 if (capsnap->dirty_pages) { 597 599 dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu " 598 600 "still has %d dirty pages\n", inode, capsnap,
+10
fs/ceph/super.c
··· 137 137 Opt_nofscache, 138 138 Opt_poolperm, 139 139 Opt_nopoolperm, 140 + Opt_require_active_mds, 141 + Opt_norequire_active_mds, 140 142 #ifdef CONFIG_CEPH_FS_POSIX_ACL 141 143 Opt_acl, 142 144 #endif ··· 173 171 {Opt_nofscache, "nofsc"}, 174 172 {Opt_poolperm, "poolperm"}, 175 173 {Opt_nopoolperm, "nopoolperm"}, 174 + {Opt_require_active_mds, "require_active_mds"}, 175 + {Opt_norequire_active_mds, "norequire_active_mds"}, 176 176 #ifdef CONFIG_CEPH_FS_POSIX_ACL 177 177 {Opt_acl, "acl"}, 178 178 #endif ··· 290 286 break; 291 287 case Opt_nopoolperm: 292 288 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 289 + break; 290 + case Opt_require_active_mds: 291 + fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 292 + break; 293 + case Opt_norequire_active_mds: 294 + fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 293 295 break; 294 296 #ifdef CONFIG_CEPH_FS_POSIX_ACL 295 297 case Opt_acl:
+5
fs/ceph/super.h
··· 36 36 #define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ 37 37 #define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ 38 38 #define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */ 39 + #define CEPH_MOUNT_OPT_MOUNTWAIT (1<<12) /* mount waits if no mds is up */ 39 40 40 41 #define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE 41 42 ··· 181 180 u64 size; 182 181 struct timespec mtime, atime, ctime; 183 182 u64 time_warp_seq; 183 + u64 truncate_size; 184 + u32 truncate_seq; 184 185 int writing; /* a sync write is still in progress */ 185 186 int dirty_pages; /* dirty pages awaiting writeback */ 186 187 bool inline_data; ··· 908 905 909 906 extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, 910 907 loff_t endoff, int *got, struct page **pinned_page); 908 + extern int ceph_try_get_caps(struct ceph_inode_info *ci, 909 + int need, int want, int *got); 911 910 912 911 /* for counting open files by mode */ 913 912 extern void __ceph_get_fmode(struct ceph_inode_info *ci, int mode);
+2 -3
include/linux/ceph/auth.h
··· 64 64 int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type, 65 65 struct ceph_auth_handshake *auth); 66 66 int (*verify_authorizer_reply)(struct ceph_auth_client *ac, 67 - struct ceph_authorizer *a, size_t len); 67 + struct ceph_authorizer *a); 68 68 void (*invalidate_authorizer)(struct ceph_auth_client *ac, 69 69 int peer_type); 70 70 ··· 118 118 int peer_type, 119 119 struct ceph_auth_handshake *a); 120 120 extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, 121 - struct ceph_authorizer *a, 122 - size_t len); 121 + struct ceph_authorizer *a); 123 122 extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, 124 123 int peer_type); 125 124
+3
include/linux/ceph/ceph_fs.h
··· 653 653 654 654 extern const char *ceph_cap_op_name(int op); 655 655 656 + /* flags field in client cap messages (version >= 10) */ 657 + #define CEPH_CLIENT_CAPS_SYNC (0x1) 658 + 656 659 /* 657 660 * caps message, used for capability callbacks, acks, requests, etc. 658 661 */
+5
include/linux/ceph/mdsmap.h
··· 31 31 int m_num_data_pg_pools; 32 32 u64 *m_data_pg_pools; 33 33 u64 m_cas_pg_pool; 34 + 35 + bool m_enabled; 36 + bool m_damaged; 37 + int m_num_laggy; 34 38 }; 35 39 36 40 static inline struct ceph_entity_addr * ··· 63 59 extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); 64 60 extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); 65 61 extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); 62 + extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m); 66 63 67 64 #endif
+1 -1
include/linux/ceph/messenger.h
··· 30 30 struct ceph_auth_handshake *(*get_authorizer) ( 31 31 struct ceph_connection *con, 32 32 int *proto, int force_new); 33 - int (*verify_authorizer_reply) (struct ceph_connection *con, int len); 33 + int (*verify_authorizer_reply) (struct ceph_connection *con); 34 34 int (*invalidate_authorizer)(struct ceph_connection *con); 35 35 36 36 /* there was some error on the socket (disconnect, whatever) */
+1 -1
include/linux/ceph/osd_client.h
··· 176 176 struct kref r_kref; 177 177 bool r_mempool; 178 178 struct completion r_completion; 179 - struct completion r_safe_completion; /* fsync waiter */ 179 + struct completion r_done_completion; /* fsync waiter */ 180 180 ceph_osdc_callback_t r_callback; 181 181 ceph_osdc_unsafe_callback_t r_unsafe_callback; 182 182 struct list_head r_unsafe_item;
+2 -2
net/ceph/auth.c
··· 315 315 EXPORT_SYMBOL(ceph_auth_update_authorizer); 316 316 317 317 int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, 318 - struct ceph_authorizer *a, size_t len) 318 + struct ceph_authorizer *a) 319 319 { 320 320 int ret = 0; 321 321 322 322 mutex_lock(&ac->mutex); 323 323 if (ac->ops && ac->ops->verify_authorizer_reply) 324 - ret = ac->ops->verify_authorizer_reply(ac, a, len); 324 + ret = ac->ops->verify_authorizer_reply(ac, a); 325 325 mutex_unlock(&ac->mutex); 326 326 return ret; 327 327 }
+101 -96
net/ceph/auth_x.c
··· 39 39 return need != 0; 40 40 } 41 41 42 + static int ceph_x_encrypt_offset(void) 43 + { 44 + return sizeof(u32) + sizeof(struct ceph_x_encrypt_header); 45 + } 46 + 42 47 static int ceph_x_encrypt_buflen(int ilen) 43 48 { 44 - return sizeof(struct ceph_x_encrypt_header) + ilen + 16 + 45 - sizeof(u32); 49 + return ceph_x_encrypt_offset() + ilen + 16; 46 50 } 47 51 48 - static int ceph_x_encrypt(struct ceph_crypto_key *secret, 49 - void *ibuf, int ilen, void *obuf, size_t olen) 52 + static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *buf, 53 + int buf_len, int plaintext_len) 50 54 { 51 - struct ceph_x_encrypt_header head = { 52 - .struct_v = 1, 53 - .magic = cpu_to_le64(CEPHX_ENC_MAGIC) 54 - }; 55 - size_t len = olen - sizeof(u32); 55 + struct ceph_x_encrypt_header *hdr = buf + sizeof(u32); 56 + int ciphertext_len; 56 57 int ret; 57 58 58 - ret = ceph_encrypt2(secret, obuf + sizeof(u32), &len, 59 - &head, sizeof(head), ibuf, ilen); 59 + hdr->struct_v = 1; 60 + hdr->magic = cpu_to_le64(CEPHX_ENC_MAGIC); 61 + 62 + ret = ceph_crypt(secret, true, buf + sizeof(u32), buf_len - sizeof(u32), 63 + plaintext_len + sizeof(struct ceph_x_encrypt_header), 64 + &ciphertext_len); 60 65 if (ret) 61 66 return ret; 62 - ceph_encode_32(&obuf, len); 63 - return len + sizeof(u32); 67 + 68 + ceph_encode_32(&buf, ciphertext_len); 69 + return sizeof(u32) + ciphertext_len; 64 70 } 65 71 66 - static int ceph_x_decrypt(struct ceph_crypto_key *secret, 67 - void **p, void *end, void **obuf, size_t olen) 72 + static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end) 68 73 { 69 - struct ceph_x_encrypt_header head; 70 - size_t head_len = sizeof(head); 71 - int len, ret; 74 + struct ceph_x_encrypt_header *hdr = *p + sizeof(u32); 75 + int ciphertext_len, plaintext_len; 76 + int ret; 72 77 73 - len = ceph_decode_32(p); 74 - if (*p + len > end) 75 - return -EINVAL; 78 + ceph_decode_32_safe(p, end, ciphertext_len, e_inval); 79 + ceph_decode_need(p, end, ciphertext_len, e_inval); 76 80 77 - dout("ceph_x_decrypt len %d\n", len); 78 - if (*obuf == NULL) { 79 - *obuf = kmalloc(len, GFP_NOFS); 80 - if (!*obuf) 81 - return -ENOMEM; 82 - olen = len; 83 - } 84 - 85 - ret = ceph_decrypt2(secret, &head, &head_len, *obuf, &olen, *p, len); 81 + ret = ceph_crypt(secret, false, *p, end - *p, ciphertext_len, 82 + &plaintext_len); 86 83 if (ret) 87 84 return ret; 88 - if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC) 85 + 86 + if (hdr->struct_v != 1 || le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) 89 87 return -EPERM; 90 - *p += len; 91 - return olen; 88 + 89 + *p += ciphertext_len; 90 + return plaintext_len - sizeof(struct ceph_x_encrypt_header); 91 + 92 + e_inval: 93 + return -EINVAL; 92 94 } 93 95 94 96 /* ··· 145 143 int type; 146 144 u8 tkt_struct_v, blob_struct_v; 147 145 struct ceph_x_ticket_handler *th; 148 - void *dbuf = NULL; 149 146 void *dp, *dend; 150 147 int dlen; 151 148 char is_enc; 152 149 struct timespec validity; 153 - struct ceph_crypto_key old_key; 154 - void *ticket_buf = NULL; 155 150 void *tp, *tpend; 156 151 void **ptp; 157 152 struct ceph_crypto_key new_session_key; ··· 173 174 } 174 175 175 176 /* blob for me */ 176 - dlen = ceph_x_decrypt(secret, p, end, &dbuf, 0); 177 - if (dlen <= 0) { 178 - ret = dlen; 177 + dp = *p + ceph_x_encrypt_offset(); 178 + ret = ceph_x_decrypt(secret, p, end); 179 + if (ret < 0) 179 180 goto out; 180 - } 181 - dout(" decrypted %d bytes\n", dlen); 182 - dp = dbuf; 183 - dend = dp + dlen; 181 + dout(" decrypted %d bytes\n", ret); 182 + dend = dp + ret; 184 183 185 184 tkt_struct_v = ceph_decode_8(&dp); 186 185 if (tkt_struct_v != 1) 187 186 goto bad; 188 187 189 - memcpy(&old_key, &th->session_key, sizeof(old_key)); 190 188 ret = ceph_crypto_key_decode(&new_session_key, &dp, dend); 191 189 if (ret) 192 190 goto out; ··· 199 203 ceph_decode_8_safe(p, end, is_enc, bad); 200 204 if (is_enc) { 201 205 /* encrypted */ 202 - dout(" encrypted ticket\n"); 203 - dlen = ceph_x_decrypt(&old_key, p, end, &ticket_buf, 0); 204 - if (dlen < 0) { 205 - ret = dlen; 206 + tp = *p + ceph_x_encrypt_offset(); 207 + ret = ceph_x_decrypt(&th->session_key, p, end); 208 + if (ret < 0) 206 209 goto out; 207 - } 208 - tp = ticket_buf; 210 + dout(" encrypted ticket, decrypted %d bytes\n", ret); 209 211 ptp = &tp; 210 - tpend = *ptp + dlen; 212 + tpend = tp + ret; 211 213 } else { 212 214 /* unencrypted */ 213 215 ptp = p; ··· 236 242 xi->have_keys |= th->service; 237 243 238 244 out: 239 - kfree(ticket_buf); 240 - kfree(dbuf); 241 245 return ret; 242 246 243 247 bad: ··· 286 294 { 287 295 int maxlen; 288 296 struct ceph_x_authorize_a *msg_a; 289 - struct ceph_x_authorize_b msg_b; 297 + struct ceph_x_authorize_b *msg_b; 290 298 void *p, *end; 291 299 int ret; 292 300 int ticket_blob_len = ··· 300 308 if (ret) 301 309 goto out_au; 302 310 303 - maxlen = sizeof(*msg_a) + sizeof(msg_b) + 304 - ceph_x_encrypt_buflen(ticket_blob_len); 311 + maxlen = sizeof(*msg_a) + ticket_blob_len + 312 + ceph_x_encrypt_buflen(sizeof(*msg_b)); 305 313 dout(" need len %d\n", maxlen); 306 314 if (au->buf && au->buf->alloc_len < maxlen) { 307 315 ceph_buffer_put(au->buf); ··· 335 343 p += ticket_blob_len; 336 344 end = au->buf->vec.iov_base + au->buf->vec.iov_len; 337 345 346 + msg_b = p + ceph_x_encrypt_offset(); 347 + msg_b->struct_v = 1; 338 348 get_random_bytes(&au->nonce, sizeof(au->nonce)); 339 - msg_b.struct_v = 1; 340 - msg_b.nonce = cpu_to_le64(au->nonce); 341 - ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b), 342 - p, end - p); 349 + msg_b->nonce = cpu_to_le64(au->nonce); 350 + ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b)); 343 351 if (ret < 0) 344 352 goto out_au; 353 + 345 354 p += ret; 355 + WARN_ON(p > end); 346 356 au->buf->vec.iov_len = p - au->buf->vec.iov_base; 347 357 dout(" built authorizer nonce %llx len %d\n", au->nonce, 348 358 (int)au->buf->vec.iov_len); 349 - BUG_ON(au->buf->vec.iov_len > maxlen); 350 359 return 0; 351 360 352 361 out_au: ··· 445 452 if (need & CEPH_ENTITY_TYPE_AUTH) { 446 453 struct ceph_x_authenticate *auth = (void *)(head + 1); 447 454 void *p = auth + 1; 448 - struct ceph_x_challenge_blob tmp; 449 - char tmp_enc[40]; 455 + void *enc_buf = xi->auth_authorizer.enc_buf; 456 + struct ceph_x_challenge_blob *blob = enc_buf + 457 + ceph_x_encrypt_offset(); 450 458 u64 *u; 451 459 452 460 if (p > end) ··· 458 464 459 465 /* encrypt and hash */ 460 466 get_random_bytes(&auth->client_challenge, sizeof(u64)); 461 - tmp.client_challenge = auth->client_challenge; 462 - tmp.server_challenge = cpu_to_le64(xi->server_challenge); 463 - ret = ceph_x_encrypt(&xi->secret, &tmp, sizeof(tmp), 464 - tmp_enc, sizeof(tmp_enc)); 467 + blob->client_challenge = auth->client_challenge; 468 + blob->server_challenge = cpu_to_le64(xi->server_challenge); 469 + ret = ceph_x_encrypt(&xi->secret, enc_buf, CEPHX_AU_ENC_BUF_LEN, 470 + sizeof(*blob)); 465 471 if (ret < 0) 466 472 return ret; 467 473 468 474 auth->struct_v = 1; 469 475 auth->key = 0; 470 - for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++) 476 + for (u = (u64 *)enc_buf; u + 1 <= (u64 *)(enc_buf + ret); u++) 471 477 auth->key ^= *(__le64 *)u; 472 478 dout(" server_challenge %llx client_challenge %llx key %llx\n", 473 479 xi->server_challenge, le64_to_cpu(auth->client_challenge), ··· 594 600 auth->authorizer = (struct ceph_authorizer *) au; 595 601 auth->authorizer_buf = au->buf->vec.iov_base; 596 602 auth->authorizer_buf_len = au->buf->vec.iov_len; 597 - auth->authorizer_reply_buf = au->reply_buf; 598 - auth->authorizer_reply_buf_len = sizeof (au->reply_buf); 603 + auth->authorizer_reply_buf = au->enc_buf; 604 + auth->authorizer_reply_buf_len = CEPHX_AU_ENC_BUF_LEN; 599 605 auth->sign_message = ac->ops->sign_message; 600 606 auth->check_message_signature = ac->ops->check_message_signature; 601 607 ··· 623 629 } 624 630 625 631 static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, 626 - struct ceph_authorizer *a, size_t len) 632 + struct ceph_authorizer *a) 627 633 { 628 634 struct ceph_x_authorizer *au = (void *)a; 629 - int ret = 0; 630 - struct ceph_x_authorize_reply reply; 631 - void *preply = &reply; 632 - void *p = au->reply_buf; 633 - void *end = p + sizeof(au->reply_buf); 635 + void *p = au->enc_buf; 636 + struct ceph_x_authorize_reply *reply = p + ceph_x_encrypt_offset(); 637 + int ret; 634 638 635 - ret = ceph_x_decrypt(&au->session_key, &p, end, &preply, sizeof(reply)); 639 + ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN); 636 640 if (ret < 0) 637 641 return ret; 638 - if (ret != sizeof(reply)) 642 + if (ret != sizeof(*reply)) 639 643 return -EPERM; 640 644 641 - if (au->nonce + 1 != le64_to_cpu(reply.nonce_plus_one)) 645 + if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one)) 642 646 ret = -EPERM; 643 647 else 644 648 ret = 0; 645 649 dout("verify_authorizer_reply nonce %llx got %llx ret %d\n", 646 - au->nonce, le64_to_cpu(reply.nonce_plus_one), ret); 650 + au->nonce, le64_to_cpu(reply->nonce_plus_one), ret); 647 651 return ret; 648 652 } 649 653 ··· 696 704 invalidate_ticket(ac, CEPH_ENTITY_TYPE_AUTH); 697 705 } 698 706 699 - static int calcu_signature(struct ceph_x_authorizer *au, 700 - struct ceph_msg *msg, __le64 *sig) 707 + static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg, 708 + __le64 *psig) 701 709 { 710 + void *enc_buf = au->enc_buf; 711 + struct { 712 + __le32 len; 713 + __le32 header_crc; 714 + __le32 front_crc; 715 + __le32 middle_crc; 716 + __le32 data_crc; 717 + } __packed *sigblock = enc_buf + ceph_x_encrypt_offset(); 702 718 int ret; 703 - char tmp_enc[40]; 704 - __le32 tmp[5] = { 705 - cpu_to_le32(16), msg->hdr.crc, msg->footer.front_crc, 706 - msg->footer.middle_crc, msg->footer.data_crc, 707 - }; 708 - ret = ceph_x_encrypt(&au->session_key, &tmp, sizeof(tmp), 709 - tmp_enc, sizeof(tmp_enc)); 719 + 720 + sigblock->len = cpu_to_le32(4*sizeof(u32)); 721 + sigblock->header_crc = msg->hdr.crc; 722 + sigblock->front_crc = msg->footer.front_crc; 723 + sigblock->middle_crc = msg->footer.middle_crc; 724 + sigblock->data_crc = msg->footer.data_crc; 725 + ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN, 726 + sizeof(*sigblock)); 710 727 if (ret < 0) 711 728 return ret; 712 - *sig = *(__le64*)(tmp_enc + 4); 729 + 730 + *psig = *(__le64 *)(enc_buf + sizeof(u32)); 713 731 return 0; 714 732 } 715 733 716 734 static int ceph_x_sign_message(struct ceph_auth_handshake *auth, 717 735 struct ceph_msg *msg) 718 736 { 737 + __le64 sig; 719 738 int ret; 720 739 721 740 if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN)) 722 741 return 0; 723 742 724 - ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer, 725 - msg, &msg->footer.sig); 726 - if (ret < 0) 743 + ret = calc_signature((struct ceph_x_authorizer *)auth->authorizer, 744 + msg, &sig); 745 + if (ret) 727 746 return ret; 747 + 748 + msg->footer.sig = sig; 728 749 msg->footer.flags |= CEPH_MSG_FOOTER_SIGNED; 729 750 return 0; 730 751 } ··· 751 746 if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN)) 752 747 return 0; 753 748 754 - ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer, 755 - msg, &sig_check); 756 - if (ret < 0) 749 + ret = calc_signature((struct ceph_x_authorizer *)auth->authorizer, 750 + msg, &sig_check); 751 + if (ret) 757 752 return ret; 758 753 if (sig_check == msg->footer.sig) 759 754 return 0;
+2 -1
net/ceph/auth_x.h
··· 24 24 unsigned long renew_after, expires; 25 25 }; 26 26 27 + #define CEPHX_AU_ENC_BUF_LEN 128 /* big enough for encrypted blob */ 27 28 28 29 struct ceph_x_authorizer { 29 30 struct ceph_authorizer base; ··· 33 32 unsigned int service; 34 33 u64 nonce; 35 34 u64 secret_id; 36 - char reply_buf[128]; /* big enough for encrypted blob */ 35 + char enc_buf[CEPHX_AU_ENC_BUF_LEN] __aligned(8); 37 36 }; 38 37 39 38 struct ceph_x_info {
+2
net/ceph/crush/mapper.c
··· 17 17 # include <linux/kernel.h> 18 18 # include <linux/crush/crush.h> 19 19 # include <linux/crush/hash.h> 20 + # include <linux/crush/mapper.h> 20 21 #else 21 22 # include "crush_compat.h" 22 23 # include "crush.h" 23 24 # include "hash.h" 25 + # include "mapper.h" 24 26 #endif 25 27 #include "crush_ln_table.h" 26 28
+112 -351
net/ceph/crypto.c
··· 13 13 #include <linux/ceph/decode.h> 14 14 #include "crypto.h" 15 15 16 + /* 17 + * Set ->key and ->tfm. The rest of the key should be filled in before 18 + * this function is called. 19 + */ 20 + static int set_secret(struct ceph_crypto_key *key, void *buf) 21 + { 22 + unsigned int noio_flag; 23 + int ret; 24 + 25 + key->key = NULL; 26 + key->tfm = NULL; 27 + 28 + switch (key->type) { 29 + case CEPH_CRYPTO_NONE: 30 + return 0; /* nothing to do */ 31 + case CEPH_CRYPTO_AES: 32 + break; 33 + default: 34 + return -ENOTSUPP; 35 + } 36 + 37 + WARN_ON(!key->len); 38 + key->key = kmemdup(buf, key->len, GFP_NOIO); 39 + if (!key->key) { 40 + ret = -ENOMEM; 41 + goto fail; 42 + } 43 + 44 + /* crypto_alloc_skcipher() allocates with GFP_KERNEL */ 45 + noio_flag = memalloc_noio_save(); 46 + key->tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); 47 + memalloc_noio_restore(noio_flag); 48 + if (IS_ERR(key->tfm)) { 49 + ret = PTR_ERR(key->tfm); 50 + key->tfm = NULL; 51 + goto fail; 52 + } 53 + 54 + ret = crypto_skcipher_setkey(key->tfm, key->key, key->len); 55 + if (ret) 56 + goto fail; 57 + 58 + return 0; 59 + 60 + fail: 61 + ceph_crypto_key_destroy(key); 62 + return ret; 63 + } 64 + 16 65 int ceph_crypto_key_clone(struct ceph_crypto_key *dst, 17 66 const struct ceph_crypto_key *src) 18 67 { 19 68 memcpy(dst, src, sizeof(struct ceph_crypto_key)); 20 - dst->key = kmemdup(src->key, src->len, GFP_NOFS); 21 - if (!dst->key) 22 - return -ENOMEM; 23 - return 0; 69 + return set_secret(dst, src->key); 24 70 } 25 71 26 72 int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) ··· 83 37 84 38 int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end) 85 39 { 40 + int ret; 41 + 86 42 ceph_decode_need(p, end, 2*sizeof(u16) + sizeof(key->created), bad); 87 43 key->type = ceph_decode_16(p); 88 44 ceph_decode_copy(p, &key->created, sizeof(key->created)); 89 45 key->len = ceph_decode_16(p); 90 46 ceph_decode_need(p, end, key->len, bad); 91 - key->key = kmalloc(key->len, GFP_NOFS); 92 - if (!key->key) 93 - return -ENOMEM; 94 - ceph_decode_copy(p, key->key, key->len); 95 - return 0; 47 + ret = set_secret(key, *p); 48 + *p += key->len; 49 + return ret; 96 50 97 51 bad: 98 52 dout("failed to decode crypto key\n"); ··· 126 80 return 0; 127 81 } 128 82 129 - static struct crypto_skcipher *ceph_crypto_alloc_cipher(void) 83 + void ceph_crypto_key_destroy(struct ceph_crypto_key *key) 130 84 { 131 - return crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); 85 + if (key) { 86 + kfree(key->key); 87 + key->key = NULL; 88 + crypto_free_skcipher(key->tfm); 89 + key->tfm = NULL; 90 + } 132 91 } 133 92 134 93 static const u8 *aes_iv = (u8 *)CEPH_AES_IV; ··· 208 157 sg_free_table(sgt); 209 158 } 210 159 211 - static int ceph_aes_encrypt(const void *key, int key_len, 212 - void *dst, size_t *dst_len, 213 - const void *src, size_t src_len) 160 + static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt, 161 + void *buf, int buf_len, int in_len, int *pout_len) 214 162 { 215 - struct scatterlist sg_in[2], prealloc_sg; 216 - struct sg_table sg_out; 217 - struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher(); 218 - SKCIPHER_REQUEST_ON_STACK(req, tfm); 219 - int ret; 163 + SKCIPHER_REQUEST_ON_STACK(req, key->tfm); 164 + struct sg_table sgt; 165 + struct scatterlist prealloc_sg; 220 166 char iv[AES_BLOCK_SIZE]; 221 - size_t zero_padding = (0x10 - (src_len & 0x0f)); 222 - char pad[16]; 223 - 224 - if (IS_ERR(tfm)) 225 - return PTR_ERR(tfm); 226 - 227 - memset(pad, zero_padding, zero_padding); 228 - 229 - *dst_len = src_len + zero_padding; 230 - 231 - sg_init_table(sg_in, 2); 232 - sg_set_buf(&sg_in[0], src, src_len); 233 - sg_set_buf(&sg_in[1], pad, zero_padding); 234 - ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len); 235 - if (ret) 236 - goto out_tfm; 237 - 238 - crypto_skcipher_setkey((void *)tfm, key, key_len); 239 - memcpy(iv, aes_iv, AES_BLOCK_SIZE); 240 - 241 - skcipher_request_set_tfm(req, tfm); 242 - skcipher_request_set_callback(req, 0, NULL, NULL); 243 - skcipher_request_set_crypt(req, sg_in, sg_out.sgl, 244 - src_len + zero_padding, iv); 245 - 246 - /* 247 - print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, 248 - key, key_len, 1); 249 - print_hex_dump(KERN_ERR, "enc src: ", DUMP_PREFIX_NONE, 16, 1, 250 - src, src_len, 1); 251 - print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, 252 - pad, zero_padding, 1); 253 - */ 254 - ret = crypto_skcipher_encrypt(req); 255 - skcipher_request_zero(req); 256 - if (ret < 0) { 257 - pr_err("ceph_aes_crypt failed %d\n", ret); 258 - goto out_sg; 259 - } 260 - /* 261 - print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, 262 - dst, *dst_len, 1); 263 - */ 264 - 265 - out_sg: 266 - teardown_sgtable(&sg_out); 267 - out_tfm: 268 - crypto_free_skcipher(tfm); 269 - return ret; 270 - } 271 - 272 - static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, 273 - size_t *dst_len, 274 - const void *src1, size_t src1_len, 275 - const void *src2, size_t src2_len) 276 - { 277 - struct scatterlist sg_in[3], prealloc_sg; 278 - struct sg_table sg_out; 279 - struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher(); 280 - SKCIPHER_REQUEST_ON_STACK(req, tfm); 167 + int pad_byte = AES_BLOCK_SIZE - (in_len & (AES_BLOCK_SIZE - 1)); 168 + int crypt_len = encrypt ? in_len + pad_byte : in_len; 281 169 int ret; 282 - char iv[AES_BLOCK_SIZE]; 283 - size_t zero_padding = (0x10 - ((src1_len + src2_len) & 0x0f)); 284 - char pad[16]; 285 170 286 - if (IS_ERR(tfm)) 287 - return PTR_ERR(tfm); 288 - 289 - memset(pad, zero_padding, zero_padding); 290 - 291 - *dst_len = src1_len + src2_len + zero_padding; 292 - 293 - sg_init_table(sg_in, 3); 294 - sg_set_buf(&sg_in[0], src1, src1_len); 295 - sg_set_buf(&sg_in[1], src2, src2_len); 296 - sg_set_buf(&sg_in[2], pad, zero_padding); 297 - ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len); 171 + WARN_ON(crypt_len > buf_len); 172 + if (encrypt) 173 + memset(buf + in_len, pad_byte, pad_byte); 174 + ret = setup_sgtable(&sgt, &prealloc_sg, buf, crypt_len); 298 175 if (ret) 299 - goto out_tfm; 176 + return ret; 300 177 301 - crypto_skcipher_setkey((void *)tfm, key, key_len); 302 178 memcpy(iv, aes_iv, AES_BLOCK_SIZE); 303 - 304 - skcipher_request_set_tfm(req, tfm); 179 + skcipher_request_set_tfm(req, key->tfm); 305 180 skcipher_request_set_callback(req, 0, NULL, NULL); 306 - skcipher_request_set_crypt(req, sg_in, sg_out.sgl, 307 - src1_len + src2_len + zero_padding, iv); 181 + skcipher_request_set_crypt(req, sgt.sgl, sgt.sgl, crypt_len, iv); 308 182 309 183 /* 310 - print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, 311 - key, key_len, 1); 312 - print_hex_dump(KERN_ERR, "enc src1: ", DUMP_PREFIX_NONE, 16, 1, 313 - src1, src1_len, 1); 314 - print_hex_dump(KERN_ERR, "enc src2: ", DUMP_PREFIX_NONE, 16, 1, 315 - src2, src2_len, 1); 316 - print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, 317 - pad, zero_padding, 1); 184 + print_hex_dump(KERN_ERR, "key: ", DUMP_PREFIX_NONE, 16, 1, 185 + key->key, key->len, 1); 186 + print_hex_dump(KERN_ERR, " in: ", DUMP_PREFIX_NONE, 16, 1, 187 + buf, crypt_len, 1); 318 188 */ 319 - ret = crypto_skcipher_encrypt(req); 320 - skcipher_request_zero(req); 321 - if (ret < 0) { 322 - pr_err("ceph_aes_crypt2 failed %d\n", ret); 323 - goto out_sg; 324 - } 325 - /* 326 - print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, 327 - dst, *dst_len, 1); 328 - */ 329 - 330 - out_sg: 331 - teardown_sgtable(&sg_out); 332 - out_tfm: 333 - crypto_free_skcipher(tfm); 334 - return ret; 335 - } 336 - 337 - static int ceph_aes_decrypt(const void *key, int key_len, 338 - void *dst, size_t *dst_len, 339 - const void *src, size_t src_len) 340 - { 341 - struct sg_table sg_in; 342 - struct scatterlist sg_out[2], prealloc_sg; 343 - struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher(); 344 - SKCIPHER_REQUEST_ON_STACK(req, tfm); 345 - char pad[16]; 346 - char iv[AES_BLOCK_SIZE]; 347 - int ret; 348 - int last_byte; 349 - 350 - if (IS_ERR(tfm)) 351 - return PTR_ERR(tfm); 352 - 353 - sg_init_table(sg_out, 2); 354 - sg_set_buf(&sg_out[0], dst, *dst_len); 355 - sg_set_buf(&sg_out[1], pad, sizeof(pad)); 356 - ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len); 357 - if (ret) 358 - goto out_tfm; 359 - 360 - crypto_skcipher_setkey((void *)tfm, key, key_len); 361 - memcpy(iv, aes_iv, AES_BLOCK_SIZE); 362 - 363 - skcipher_request_set_tfm(req, tfm); 364 - skcipher_request_set_callback(req, 0, NULL, NULL); 365 - skcipher_request_set_crypt(req, sg_in.sgl, sg_out, 366 - src_len, iv); 367 - 368 - /* 369 - print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1, 370 - key, key_len, 1); 371 - print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, 372 - src, src_len, 1); 373 - */ 374 - ret = crypto_skcipher_decrypt(req); 375 - skcipher_request_zero(req); 376 - if (ret < 0) { 377 - pr_err("ceph_aes_decrypt failed %d\n", ret); 378 - goto out_sg; 379 - } 380 - 381 - if (src_len <= *dst_len) 382 - last_byte = ((char *)dst)[src_len - 1]; 189 + if (encrypt) 190 + ret = crypto_skcipher_encrypt(req); 383 191 else 384 - last_byte = pad[src_len - *dst_len - 1]; 385 - if (last_byte <= 16 && src_len >= last_byte) { 386 - *dst_len = src_len - last_byte; 387 - } else { 388 - pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n", 389 - last_byte, (int)src_len); 390 - return -EPERM; /* bad padding */ 391 - } 392 - /* 393 - print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1, 394 - dst, *dst_len, 1); 395 - */ 396 - 397 - out_sg: 398 - teardown_sgtable(&sg_in); 399 - out_tfm: 400 - crypto_free_skcipher(tfm); 401 - return ret; 402 - } 403 - 404 - static int ceph_aes_decrypt2(const void *key, int key_len, 405 - void *dst1, size_t *dst1_len, 406 - void *dst2, size_t *dst2_len, 407 - const void *src, size_t src_len) 408 - { 409 - struct sg_table sg_in; 410 - struct scatterlist sg_out[3], prealloc_sg; 411 - struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher(); 412 - SKCIPHER_REQUEST_ON_STACK(req, tfm); 413 - char pad[16]; 414 - char iv[AES_BLOCK_SIZE]; 415 - int ret; 416 - int last_byte; 417 - 418 - if (IS_ERR(tfm)) 419 - return PTR_ERR(tfm); 420 - 421 - sg_init_table(sg_out, 3); 422 - sg_set_buf(&sg_out[0], dst1, *dst1_len); 423 - sg_set_buf(&sg_out[1], dst2, *dst2_len); 424 - sg_set_buf(&sg_out[2], pad, sizeof(pad)); 425 - ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len); 426 - if (ret) 427 - goto out_tfm; 428 - 429 - crypto_skcipher_setkey((void *)tfm, key, key_len); 430 - memcpy(iv, aes_iv, AES_BLOCK_SIZE); 431 - 432 - skcipher_request_set_tfm(req, tfm); 433 - skcipher_request_set_callback(req, 0, NULL, NULL); 434 - skcipher_request_set_crypt(req, sg_in.sgl, sg_out, 435 - src_len, iv); 436 - 437 - /* 438 - print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1, 439 - key, key_len, 1); 440 - print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, 441 - src, src_len, 1); 442 - */ 443 - ret = crypto_skcipher_decrypt(req); 192 + ret = crypto_skcipher_decrypt(req); 444 193 skcipher_request_zero(req); 445 - if (ret < 0) { 446 - pr_err("ceph_aes_decrypt failed %d\n", ret); 447 - goto out_sg; 448 - } 449 - 450 - if (src_len <= *dst1_len) 451 - last_byte = ((char *)dst1)[src_len - 1]; 452 - else if (src_len <= *dst1_len + *dst2_len) 453 - last_byte = ((char *)dst2)[src_len - *dst1_len - 1]; 454 - else 455 - last_byte = pad[src_len - *dst1_len - *dst2_len - 1]; 456 - if (last_byte <= 16 && src_len >= last_byte) { 457 - src_len -= last_byte; 458 - } else { 459 - pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n", 460 - last_byte, (int)src_len); 461 - return -EPERM; /* bad padding */ 462 - } 463 - 464 - if (src_len < *dst1_len) { 465 - *dst1_len = src_len; 466 - *dst2_len = 0; 467 - } else { 468 - *dst2_len = src_len - *dst1_len; 194 + if (ret) { 195 + pr_err("%s %scrypt failed: %d\n", __func__, 196 + encrypt ? "en" : "de", ret); 197 + goto out_sgt; 469 198 } 470 199 /* 471 - print_hex_dump(KERN_ERR, "dec out1: ", DUMP_PREFIX_NONE, 16, 1, 472 - dst1, *dst1_len, 1); 473 - print_hex_dump(KERN_ERR, "dec out2: ", DUMP_PREFIX_NONE, 16, 1, 474 - dst2, *dst2_len, 1); 200 + print_hex_dump(KERN_ERR, "out: ", DUMP_PREFIX_NONE, 16, 1, 201 + buf, crypt_len, 1); 475 202 */ 476 203 477 - out_sg: 478 - teardown_sgtable(&sg_in); 479 - out_tfm: 480 - crypto_free_skcipher(tfm); 481 - return ret; 482 - } 483 - 484 - 485 - int ceph_decrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, 486 - const void *src, size_t src_len) 487 - { 488 - switch (secret->type) { 489 - case CEPH_CRYPTO_NONE: 490 - if (*dst_len < src_len) 491 - return -ERANGE; 492 - memcpy(dst, src, src_len); 493 - *dst_len = src_len; 494 - return 0; 495 - 496 - case CEPH_CRYPTO_AES: 497 - return ceph_aes_decrypt(secret->key, secret->len, dst, 498 - dst_len, src, src_len); 499 - 500 - default: 501 - return -EINVAL; 502 - } 503 - } 504 - 505 - int ceph_decrypt2(struct ceph_crypto_key *secret, 506 - void *dst1, size_t *dst1_len, 507 - void *dst2, size_t *dst2_len, 508 - const void *src, size_t src_len) 509 - { 510 - size_t t; 511 - 512 - switch (secret->type) { 513 - case CEPH_CRYPTO_NONE: 514 - if (*dst1_len + *dst2_len < src_len) 515 - return -ERANGE; 516 - t = min(*dst1_len, src_len); 517 - memcpy(dst1, src, t); 518 - *dst1_len = t; 519 - src += t; 520 - src_len -= t; 521 - if (src_len) { 522 - t = min(*dst2_len, src_len); 523 - memcpy(dst2, src, t); 524 - *dst2_len = t; 204 + if (encrypt) { 205 + *pout_len = crypt_len; 206 + } else { 207 + pad_byte = *(char *)(buf + in_len - 1); 208 + if (pad_byte > 0 && pad_byte <= AES_BLOCK_SIZE && 209 + in_len >= pad_byte) { 210 + *pout_len = in_len - pad_byte; 211 + } else { 212 + pr_err("%s got bad padding %d on in_len %d\n", 213 + __func__, pad_byte, in_len); 214 + ret = -EPERM; 215 + goto out_sgt; 525 216 } 526 - return 0; 527 - 528 - case CEPH_CRYPTO_AES: 529 - return ceph_aes_decrypt2(secret->key, secret->len, 530 - dst1, dst1_len, dst2, dst2_len, 531 - src, src_len); 532 - 533 - default: 534 - return -EINVAL; 535 217 } 218 + 219 + out_sgt: 220 + teardown_sgtable(&sgt); 221 + return ret; 536 222 } 537 223 538 - int ceph_encrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, 539 - const void *src, size_t src_len) 224 + int ceph_crypt(const struct ceph_crypto_key *key, bool encrypt, 225 + void *buf, int buf_len, int in_len, int *pout_len) 540 226 { 541 - switch (secret->type) { 227 + switch (key->type) { 542 228 case CEPH_CRYPTO_NONE: 543 - if (*dst_len < src_len) 544 - return -ERANGE; 545 - memcpy(dst, src, src_len); 546 - *dst_len = src_len; 229 + *pout_len = in_len; 547 230 return 0; 548 - 549 231 case CEPH_CRYPTO_AES: 550 - return ceph_aes_encrypt(secret->key, secret->len, dst, 551 - dst_len, src, src_len); 552 - 232 + return ceph_aes_crypt(key, encrypt, buf, buf_len, in_len, 233 + pout_len); 553 234 default: 554 - return -EINVAL; 555 - } 556 - } 557 - 558 - int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, 559 - const void *src1, size_t src1_len, 560 - const void *src2, size_t src2_len) 561 - { 562 - switch (secret->type) { 563 - case CEPH_CRYPTO_NONE: 564 - if (*dst_len < src1_len + src2_len) 565 - return -ERANGE; 566 - memcpy(dst, src1, src1_len); 567 - memcpy(dst + src1_len, src2, src2_len); 568 - *dst_len = src1_len + src2_len; 569 - return 0; 570 - 571 - case CEPH_CRYPTO_AES: 572 - return ceph_aes_encrypt2(secret->key, secret->len, dst, dst_len, 573 - src1, src1_len, src2, src2_len); 574 - 575 - default: 576 - return -EINVAL; 235 + return -ENOTSUPP; 577 236 } 578 237 } 579 238
+4 -22
net/ceph/crypto.h
··· 12 12 struct ceph_timespec created; 13 13 int len; 14 14 void *key; 15 + struct crypto_skcipher *tfm; 15 16 }; 16 - 17 - static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key) 18 - { 19 - if (key) { 20 - kfree(key->key); 21 - key->key = NULL; 22 - } 23 - } 24 17 25 18 int ceph_crypto_key_clone(struct ceph_crypto_key *dst, 26 19 const struct ceph_crypto_key *src); 27 20 int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end); 28 21 int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end); 29 22 int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in); 23 + void ceph_crypto_key_destroy(struct ceph_crypto_key *key); 30 24 31 25 /* crypto.c */ 32 - int ceph_decrypt(struct ceph_crypto_key *secret, 33 - void *dst, size_t *dst_len, 34 - const void *src, size_t src_len); 35 - int ceph_encrypt(struct ceph_crypto_key *secret, 36 - void *dst, size_t *dst_len, 37 - const void *src, size_t src_len); 38 - int ceph_decrypt2(struct ceph_crypto_key *secret, 39 - void *dst1, size_t *dst1_len, 40 - void *dst2, size_t *dst2_len, 41 - const void *src, size_t src_len); 42 - int ceph_encrypt2(struct ceph_crypto_key *secret, 43 - void *dst, size_t *dst_len, 44 - const void *src1, size_t src1_len, 45 - const void *src2, size_t src2_len); 26 + int ceph_crypt(const struct ceph_crypto_key *key, bool encrypt, 27 + void *buf, int buf_len, int in_len, int *pout_len); 46 28 int ceph_crypto_init(void); 47 29 void ceph_crypto_shutdown(void); 48 30
+13 -6
net/ceph/messenger.c
··· 1393 1393 return NULL; 1394 1394 } 1395 1395 1396 - /* Can't hold the mutex while getting authorizer */ 1397 - mutex_unlock(&con->mutex); 1398 1396 auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry); 1399 - mutex_lock(&con->mutex); 1400 - 1401 1397 if (IS_ERR(auth)) 1402 1398 return auth; 1403 - if (con->state != CON_STATE_NEGOTIATING) 1404 - return ERR_PTR(-EAGAIN); 1405 1399 1406 1400 con->auth_reply_buf = auth->authorizer_reply_buf; 1407 1401 con->auth_reply_buf_len = auth->authorizer_reply_buf_len; ··· 2020 2026 int ret; 2021 2027 2022 2028 dout("process_connect on %p tag %d\n", con, (int)con->in_tag); 2029 + 2030 + if (con->auth_reply_buf) { 2031 + /* 2032 + * Any connection that defines ->get_authorizer() 2033 + * should also define ->verify_authorizer_reply(). 2034 + * See get_connect_authorizer(). 2035 + */ 2036 + ret = con->ops->verify_authorizer_reply(con); 2037 + if (ret < 0) { 2038 + con->error_msg = "bad authorize reply"; 2039 + return ret; 2040 + } 2041 + } 2023 2042 2024 2043 switch (con->in_reply.tag) { 2025 2044 case CEPH_MSGR_TAG_FEATURES:
+6 -6
net/ceph/mon_client.c
··· 1028 1028 err = -ENOMEM; 1029 1029 monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK, 1030 1030 sizeof(struct ceph_mon_subscribe_ack), 1031 - GFP_NOFS, true); 1031 + GFP_KERNEL, true); 1032 1032 if (!monc->m_subscribe_ack) 1033 1033 goto out_auth; 1034 1034 1035 - monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 128, GFP_NOFS, 1036 - true); 1035 + monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 128, 1036 + GFP_KERNEL, true); 1037 1037 if (!monc->m_subscribe) 1038 1038 goto out_subscribe_ack; 1039 1039 1040 - monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS, 1041 - true); 1040 + monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, 1041 + GFP_KERNEL, true); 1042 1042 if (!monc->m_auth_reply) 1043 1043 goto out_subscribe; 1044 1044 1045 - monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS, true); 1045 + monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_KERNEL, true); 1046 1046 monc->pending_auth = 0; 1047 1047 if (!monc->m_auth) 1048 1048 goto out_auth_reply;
+15 -24
net/ceph/osd_client.c
··· 460 460 461 461 kref_init(&req->r_kref); 462 462 init_completion(&req->r_completion); 463 - init_completion(&req->r_safe_completion); 463 + init_completion(&req->r_done_completion); 464 464 RB_CLEAR_NODE(&req->r_node); 465 465 RB_CLEAR_NODE(&req->r_mc_node); 466 466 INIT_LIST_HEAD(&req->r_unsafe_item); ··· 1725 1725 __submit_request(req, wrlocked); 1726 1726 } 1727 1727 1728 - static void __finish_request(struct ceph_osd_request *req) 1728 + static void finish_request(struct ceph_osd_request *req) 1729 1729 { 1730 1730 struct ceph_osd_client *osdc = req->r_osdc; 1731 1731 struct ceph_osd *osd = req->r_osd; ··· 1747 1747 ceph_msg_revoke_incoming(req->r_reply); 1748 1748 } 1749 1749 1750 - static void finish_request(struct ceph_osd_request *req) 1751 - { 1752 - __finish_request(req); 1753 - ceph_osdc_put_request(req); 1754 - } 1755 - 1756 1750 static void __complete_request(struct ceph_osd_request *req) 1757 1751 { 1758 1752 if (req->r_callback) ··· 1764 1770 dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err); 1765 1771 1766 1772 req->r_result = err; 1767 - __finish_request(req); 1773 + finish_request(req); 1768 1774 __complete_request(req); 1769 - complete_all(&req->r_safe_completion); 1775 + complete_all(&req->r_done_completion); 1770 1776 ceph_osdc_put_request(req); 1771 1777 } 1772 1778 ··· 1792 1798 1793 1799 cancel_map_check(req); 1794 1800 finish_request(req); 1801 + complete_all(&req->r_done_completion); 1802 + ceph_osdc_put_request(req); 1795 1803 } 1796 1804 1797 1805 static void check_pool_dne(struct ceph_osd_request *req) ··· 2804 2808 * ->r_unsafe_callback is set? yes no 2805 2809 * 2806 2810 * first reply is OK (needed r_cb/r_completion, r_cb/r_completion, 2807 - * any or needed/got safe) r_safe_completion r_safe_completion 2811 + * any or needed/got safe) r_done_completion r_done_completion 2808 2812 * 2809 2813 * first reply is unsafe r_unsafe_cb(true) (nothing) 2810 2814 * 2811 2815 * when we get the safe reply r_unsafe_cb(false), r_cb/r_completion, 2812 - * r_safe_completion r_safe_completion 2816 + * r_done_completion r_done_completion 2813 2817 */ 2814 2818 static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg) 2815 2819 { ··· 2911 2915 } 2912 2916 2913 2917 if (done_request(req, &m)) { 2914 - __finish_request(req); 2918 + finish_request(req); 2915 2919 if (req->r_linger) { 2916 2920 WARN_ON(req->r_unsafe_callback); 2917 2921 dout("req %p tid %llu cb (locked)\n", req, req->r_tid); ··· 2930 2934 dout("req %p tid %llu cb\n", req, req->r_tid); 2931 2935 __complete_request(req); 2932 2936 } 2933 - if (m.flags & CEPH_OSD_FLAG_ONDISK) 2934 - complete_all(&req->r_safe_completion); 2937 + complete_all(&req->r_done_completion); 2935 2938 ceph_osdc_put_request(req); 2936 2939 } else { 2937 2940 if (req->r_unsafe_callback) { ··· 3466 3471 EXPORT_SYMBOL(ceph_osdc_start_request); 3467 3472 3468 3473 /* 3469 - * Unregister a registered request. The request is not completed (i.e. 3470 - * no callbacks or wakeups) - higher layers are supposed to know what 3471 - * they are canceling. 3474 + * Unregister a registered request. The request is not completed: 3475 + * ->r_result isn't set and __complete_request() isn't called. 3472 3476 */ 3473 3477 void ceph_osdc_cancel_request(struct ceph_osd_request *req) 3474 3478 { ··· 3494 3500 if (left <= 0) { 3495 3501 left = left ?: -ETIMEDOUT; 3496 3502 ceph_osdc_cancel_request(req); 3497 - 3498 - /* kludge - need to to wake ceph_osdc_sync() */ 3499 - complete_all(&req->r_safe_completion); 3500 3503 } else { 3501 3504 left = req->r_result; /* completed */ 3502 3505 } ··· 3540 3549 up_read(&osdc->lock); 3541 3550 dout("%s waiting on req %p tid %llu last_tid %llu\n", 3542 3551 __func__, req, req->r_tid, last_tid); 3543 - wait_for_completion(&req->r_safe_completion); 3552 + wait_for_completion(&req->r_done_completion); 3544 3553 ceph_osdc_put_request(req); 3545 3554 goto again; 3546 3555 } ··· 4469 4478 } 4470 4479 4471 4480 4472 - static int verify_authorizer_reply(struct ceph_connection *con, int len) 4481 + static int verify_authorizer_reply(struct ceph_connection *con) 4473 4482 { 4474 4483 struct ceph_osd *o = con->private; 4475 4484 struct ceph_osd_client *osdc = o->o_osdc; 4476 4485 struct ceph_auth_client *ac = osdc->client->monc.auth; 4477 4486 4478 - return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer, len); 4487 + return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer); 4479 4488 } 4480 4489 4481 4490 static int invalidate_authorizer(struct ceph_connection *con)