Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'ceph-for-5.9-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
"Xiubo has completed his work on filesystem client metrics, they are
sent to all available MDSes once per second now.

Other than that, we have a lot of fixes and cleanups all around the
filesystem, including a tweak to cut down on MDS request resends in
multi-MDS setups from Yanhu and fixups for SELinux symlink labeling
and MClientSession message decoding from Jeff"

* tag 'ceph-for-5.9-rc1' of git://github.com/ceph/ceph-client: (22 commits)
ceph: handle zero-length feature mask in session messages
ceph: use frag's MDS in either mode
ceph: move sb->wb_pagevec_pool to be a global mempool
ceph: set sec_context xattr on symlink creation
ceph: remove redundant initialization of variable mds
ceph: fix use-after-free for fsc->mdsc
ceph: remove unused variables in ceph_mdsmap_decode()
ceph: delete repeated words in fs/ceph/
ceph: send client provided metric flags in client metadata
ceph: periodically send perf metrics to MDSes
ceph: check the sesion state and return false in case it is closed
libceph: replace HTTP links with HTTPS ones
ceph: remove unnecessary cast in kfree()
libceph: just have osd_req_op_init() return a pointer
ceph: do not access the kiocb after aio requests
ceph: clean up and optimize ceph_check_delayed_caps()
ceph: fix potential mdsc use-after-free crash
ceph: switch to WARN_ON_ONCE in encode_supported_features()
ceph: add global total_caps to count the mdsc's total caps number
ceph: add check_session_state() helper and make it global
...

+511 -136
+1 -1
fs/ceph/Kconfig
··· 13 13 scalable file system designed to provide high performance, 14 14 reliable access to petabytes of storage. 15 15 16 - More information at http://ceph.newdream.net/. 16 + More information at https://ceph.io/. 17 17 18 18 If unsure, say N. 19 19
+11 -12
fs/ceph/addr.c
··· 862 862 863 863 osd_data = osd_req_op_extent_osd_data(req, 0); 864 864 if (osd_data->pages_from_pool) 865 - mempool_free(osd_data->pages, 866 - ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool); 865 + mempool_free(osd_data->pages, ceph_wb_pagevec_pool); 867 866 else 868 867 kfree(osd_data->pages); 869 868 ceph_osdc_put_request(req); ··· 954 955 int num_ops = 0, op_idx; 955 956 unsigned i, pvec_pages, max_pages, locked_pages = 0; 956 957 struct page **pages = NULL, **data_pages; 957 - mempool_t *pool = NULL; /* Becomes non-null if mempool used */ 958 958 struct page *page; 959 959 pgoff_t strip_unit_end = 0; 960 960 u64 offset = 0, len = 0; 961 + bool from_pool = false; 961 962 962 963 max_pages = wsize >> PAGE_SHIFT; 963 964 ··· 1056 1057 sizeof(*pages), 1057 1058 GFP_NOFS); 1058 1059 if (!pages) { 1059 - pool = fsc->wb_pagevec_pool; 1060 - pages = mempool_alloc(pool, GFP_NOFS); 1060 + from_pool = true; 1061 + pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS); 1061 1062 BUG_ON(!pages); 1062 1063 } 1063 1064 1064 1065 len = 0; 1065 1066 } else if (page->index != 1066 1067 (offset + len) >> PAGE_SHIFT) { 1067 - if (num_ops >= (pool ? CEPH_OSD_SLAB_OPS : 1068 - CEPH_OSD_MAX_OPS)) { 1068 + if (num_ops >= (from_pool ? CEPH_OSD_SLAB_OPS : 1069 + CEPH_OSD_MAX_OPS)) { 1069 1070 redirty_page_for_writepage(wbc, page); 1070 1071 unlock_page(page); 1071 1072 break; ··· 1160 1161 offset, len); 1161 1162 osd_req_op_extent_osd_data_pages(req, op_idx, 1162 1163 data_pages, len, 0, 1163 - !!pool, false); 1164 + from_pool, false); 1164 1165 osd_req_op_extent_update(req, op_idx, len); 1165 1166 1166 1167 len = 0; ··· 1187 1188 dout("writepages got pages at %llu~%llu\n", offset, len); 1188 1189 1189 1190 osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len, 1190 - 0, !!pool, false); 1191 + 0, from_pool, false); 1191 1192 osd_req_op_extent_update(req, op_idx, len); 1192 1193 1193 1194 BUG_ON(op_idx + 1 != req->r_num_ops); 1194 1195 1195 - pool = NULL; 1196 + from_pool = false; 1196 1197 if (i < locked_pages) { 1197 1198 BUG_ON(num_ops <= req->r_num_ops); 1198 1199 num_ops -= req->r_num_ops; ··· 1203 1204 pages = kmalloc_array(locked_pages, sizeof(*pages), 1204 1205 GFP_NOFS); 1205 1206 if (!pages) { 1206 - pool = fsc->wb_pagevec_pool; 1207 - pages = mempool_alloc(pool, GFP_NOFS); 1207 + from_pool = true; 1208 + pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS); 1208 1209 BUG_ON(!pages); 1209 1210 } 1210 1211 memcpy(pages, data_pages + i,
+6 -6
fs/ceph/caps.c
··· 668 668 spin_lock(&session->s_cap_lock); 669 669 list_add_tail(&cap->session_caps, &session->s_caps); 670 670 session->s_nr_caps++; 671 + atomic64_inc(&mdsc->metric.total_caps); 671 672 spin_unlock(&session->s_cap_lock); 672 673 } else { 673 674 spin_lock(&session->s_cap_lock); ··· 1162 1161 } else { 1163 1162 list_del_init(&cap->session_caps); 1164 1163 session->s_nr_caps--; 1164 + atomic64_dec(&mdsc->metric.total_caps); 1165 1165 cap->session = NULL; 1166 1166 removed = 1; 1167 1167 } ··· 4189 4187 struct ceph_inode_info *ci; 4190 4188 4191 4189 dout("check_delayed_caps\n"); 4192 - while (1) { 4193 - spin_lock(&mdsc->cap_delay_lock); 4194 - if (list_empty(&mdsc->cap_delay_list)) 4195 - break; 4190 + spin_lock(&mdsc->cap_delay_lock); 4191 + while (!list_empty(&mdsc->cap_delay_list)) { 4196 4192 ci = list_first_entry(&mdsc->cap_delay_list, 4197 4193 struct ceph_inode_info, 4198 4194 i_cap_delay_list); ··· 4200 4200 list_del_init(&ci->i_cap_delay_list); 4201 4201 4202 4202 inode = igrab(&ci->vfs_inode); 4203 - spin_unlock(&mdsc->cap_delay_lock); 4204 - 4205 4203 if (inode) { 4204 + spin_unlock(&mdsc->cap_delay_lock); 4206 4205 dout("check_delayed_caps on %p\n", inode); 4207 4206 ceph_check_caps(ci, 0, NULL); 4208 4207 /* avoid calling iput_final() in tick thread */ 4209 4208 ceph_async_iput(inode); 4209 + spin_lock(&mdsc->cap_delay_lock); 4210 4210 } 4211 4211 } 4212 4212 spin_unlock(&mdsc->cap_delay_lock);
+3 -13
fs/ceph/debugfs.c
··· 145 145 struct ceph_fs_client *fsc = s->private; 146 146 struct ceph_mds_client *mdsc = fsc->mdsc; 147 147 struct ceph_client_metric *m = &mdsc->metric; 148 - int i, nr_caps = 0; 148 + int nr_caps = 0; 149 149 s64 total, sum, avg, min, max, sq; 150 150 151 151 seq_printf(s, "item total avg_lat(us) min_lat(us) max_lat(us) stdev(us)\n"); ··· 190 190 percpu_counter_sum(&m->d_lease_mis), 191 191 percpu_counter_sum(&m->d_lease_hit)); 192 192 193 - mutex_lock(&mdsc->mutex); 194 - for (i = 0; i < mdsc->max_sessions; i++) { 195 - struct ceph_mds_session *s; 196 - 197 - s = __ceph_lookup_mds_session(mdsc, i); 198 - if (!s) 199 - continue; 200 - nr_caps += s->s_nr_caps; 201 - ceph_put_mds_session(s); 202 - } 203 - mutex_unlock(&mdsc->mutex); 193 + nr_caps = atomic64_read(&m->total_caps); 204 194 seq_printf(s, "%-14s%-16d%-16lld%lld\n", "caps", nr_caps, 205 195 percpu_counter_sum(&m->i_caps_mis), 206 196 percpu_counter_sum(&m->i_caps_hit)); ··· 262 272 struct ceph_mds_client *mdsc = fsc->mdsc; 263 273 struct ceph_auth_client *ac = fsc->client->monc.auth; 264 274 struct ceph_options *opt = fsc->client->options; 265 - int mds = -1; 275 + int mds; 266 276 267 277 mutex_lock(&mdsc->mutex); 268 278
+4
fs/ceph/dir.c
··· 930 930 req->r_num_caps = 2; 931 931 req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; 932 932 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 933 + if (as_ctx.pagelist) { 934 + req->r_pagelist = as_ctx.pagelist; 935 + as_ctx.pagelist = NULL; 936 + } 933 937 err = ceph_mdsc_do_request(mdsc, dir, req); 934 938 if (!err && !req->r_reply_info.head->is_dentry) 935 939 err = ceph_handle_notrace_create(dir, dentry);
+3 -2
fs/ceph/file.c
··· 1538 1538 struct inode *inode = file_inode(filp); 1539 1539 struct ceph_inode_info *ci = ceph_inode(inode); 1540 1540 struct page *pinned_page = NULL; 1541 + bool direct_lock = iocb->ki_flags & IOCB_DIRECT; 1541 1542 ssize_t ret; 1542 1543 int want, got = 0; 1543 1544 int retry_op = 0, read = 0; ··· 1547 1546 dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", 1548 1547 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode); 1549 1548 1550 - if (iocb->ki_flags & IOCB_DIRECT) 1549 + if (direct_lock) 1551 1550 ceph_start_io_direct(inode); 1552 1551 else 1553 1552 ceph_start_io_read(inode); ··· 1604 1603 } 1605 1604 ceph_put_cap_refs(ci, got); 1606 1605 1607 - if (iocb->ki_flags & IOCB_DIRECT) 1606 + if (direct_lock) 1608 1607 ceph_end_io_direct(inode); 1609 1608 else 1610 1609 ceph_end_io_read(inode);
+145 -39
fs/ceph/mds_client.c
··· 1103 1103 frag.frag, mds); 1104 1104 if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= 1105 1105 CEPH_MDS_STATE_ACTIVE) { 1106 - if (mode == USE_ANY_MDS && 1107 - !ceph_mdsmap_is_laggy(mdsc->mdsmap, 1106 + if (!ceph_mdsmap_is_laggy(mdsc->mdsmap, 1108 1107 mds)) 1109 1108 goto out; 1110 1109 } ··· 1167 1168 1168 1169 static const unsigned char feature_bits[] = CEPHFS_FEATURES_CLIENT_SUPPORTED; 1169 1170 #define FEATURE_BYTES(c) (DIV_ROUND_UP((size_t)feature_bits[c - 1] + 1, 64) * 8) 1170 - static void encode_supported_features(void **p, void *end) 1171 + static int encode_supported_features(void **p, void *end) 1171 1172 { 1172 1173 static const size_t count = ARRAY_SIZE(feature_bits); 1173 1174 ··· 1175 1176 size_t i; 1176 1177 size_t size = FEATURE_BYTES(count); 1177 1178 1178 - BUG_ON(*p + 4 + size > end); 1179 + if (WARN_ON_ONCE(*p + 4 + size > end)) 1180 + return -ERANGE; 1181 + 1179 1182 ceph_encode_32(p, size); 1180 1183 memset(*p, 0, size); 1181 1184 for (i = 0; i < count; i++) 1182 1185 ((unsigned char*)(*p))[i / 8] |= BIT(feature_bits[i] % 8); 1183 1186 *p += size; 1184 1187 } else { 1185 - BUG_ON(*p + 4 > end); 1188 + if (WARN_ON_ONCE(*p + 4 > end)) 1189 + return -ERANGE; 1190 + 1186 1191 ceph_encode_32(p, 0); 1187 1192 } 1193 + 1194 + return 0; 1195 + } 1196 + 1197 + static const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED; 1198 + #define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8) 1199 + static int encode_metric_spec(void **p, void *end) 1200 + { 1201 + static const size_t count = ARRAY_SIZE(metric_bits); 1202 + 1203 + /* header */ 1204 + if (WARN_ON_ONCE(*p + 2 > end)) 1205 + return -ERANGE; 1206 + 1207 + ceph_encode_8(p, 1); /* version */ 1208 + ceph_encode_8(p, 1); /* compat */ 1209 + 1210 + if (count > 0) { 1211 + size_t i; 1212 + size_t size = METRIC_BYTES(count); 1213 + 1214 + if (WARN_ON_ONCE(*p + 4 + 4 + size > end)) 1215 + return -ERANGE; 1216 + 1217 + /* metric spec info length */ 1218 + ceph_encode_32(p, 4 + size); 1219 + 1220 + /* metric spec */ 1221 + ceph_encode_32(p, size); 1222 + memset(*p, 0, size); 1223 + for (i = 0; i < count; i++) 1224 + ((unsigned char *)(*p))[i / 8] |= BIT(metric_bits[i] % 8); 1225 + *p += size; 1226 + } else { 1227 + if (WARN_ON_ONCE(*p + 4 + 4 > end)) 1228 + return -ERANGE; 1229 + 1230 + /* metric spec info length */ 1231 + ceph_encode_32(p, 4); 1232 + /* metric spec */ 1233 + ceph_encode_32(p, 0); 1234 + } 1235 + 1236 + return 0; 1188 1237 } 1189 1238 1190 1239 /* ··· 1250 1203 struct ceph_mount_options *fsopt = mdsc->fsc->mount_options; 1251 1204 size_t size, count; 1252 1205 void *p, *end; 1206 + int ret; 1253 1207 1254 1208 const char* metadata[][2] = { 1255 1209 {"hostname", mdsc->nodename}, ··· 1275 1227 size = FEATURE_BYTES(count); 1276 1228 extra_bytes += 4 + size; 1277 1229 1230 + /* metric spec */ 1231 + size = 0; 1232 + count = ARRAY_SIZE(metric_bits); 1233 + if (count > 0) 1234 + size = METRIC_BYTES(count); 1235 + extra_bytes += 2 + 4 + 4 + size; 1236 + 1278 1237 /* Allocate the message */ 1279 1238 msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, 1280 1239 GFP_NOFS, false); 1281 1240 if (!msg) { 1282 1241 pr_err("create_session_msg ENOMEM creating msg\n"); 1283 - return NULL; 1242 + return ERR_PTR(-ENOMEM); 1284 1243 } 1285 1244 p = msg->front.iov_base; 1286 1245 end = p + msg->front.iov_len; ··· 1300 1245 * Serialize client metadata into waiting buffer space, using 1301 1246 * the format that userspace expects for map<string, string> 1302 1247 * 1303 - * ClientSession messages with metadata are v3 1248 + * ClientSession messages with metadata are v4 1304 1249 */ 1305 - msg->hdr.version = cpu_to_le16(3); 1250 + msg->hdr.version = cpu_to_le16(4); 1306 1251 msg->hdr.compat_version = cpu_to_le16(1); 1307 1252 1308 1253 /* The write pointer, following the session_head structure */ ··· 1324 1269 p += val_len; 1325 1270 } 1326 1271 1327 - encode_supported_features(&p, end); 1272 + ret = encode_supported_features(&p, end); 1273 + if (ret) { 1274 + pr_err("encode_supported_features failed!\n"); 1275 + ceph_msg_put(msg); 1276 + return ERR_PTR(ret); 1277 + } 1278 + 1279 + ret = encode_metric_spec(&p, end); 1280 + if (ret) { 1281 + pr_err("encode_metric_spec failed!\n"); 1282 + ceph_msg_put(msg); 1283 + return ERR_PTR(ret); 1284 + } 1285 + 1328 1286 msg->front.iov_len = p - msg->front.iov_base; 1329 1287 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 1330 1288 ··· 1365 1297 1366 1298 /* send connect message */ 1367 1299 msg = create_session_open_msg(mdsc, session->s_seq); 1368 - if (!msg) 1369 - return -ENOMEM; 1300 + if (IS_ERR(msg)) 1301 + return PTR_ERR(msg); 1370 1302 ceph_con_send(&session->s_con, msg); 1371 1303 return 0; 1372 1304 } ··· 1380 1312 __open_export_target_session(struct ceph_mds_client *mdsc, int target) 1381 1313 { 1382 1314 struct ceph_mds_session *session; 1315 + int ret; 1383 1316 1384 1317 session = __ceph_lookup_mds_session(mdsc, target); 1385 1318 if (!session) { ··· 1389 1320 return session; 1390 1321 } 1391 1322 if (session->s_state == CEPH_MDS_SESSION_NEW || 1392 - session->s_state == CEPH_MDS_SESSION_CLOSING) 1393 - __open_session(mdsc, session); 1323 + session->s_state == CEPH_MDS_SESSION_CLOSING) { 1324 + ret = __open_session(mdsc, session); 1325 + if (ret) 1326 + return ERR_PTR(ret); 1327 + } 1394 1328 1395 1329 return session; 1396 1330 } ··· 1557 1485 cap->session = NULL; 1558 1486 list_del_init(&cap->session_caps); 1559 1487 session->s_nr_caps--; 1488 + atomic64_dec(&session->s_mdsc->metric.total_caps); 1560 1489 if (cap->queue_release) 1561 1490 __ceph_queue_cap_release(session, cap); 1562 1491 else ··· 1858 1785 /* 1859 1786 * send a session close request 1860 1787 */ 1861 - static int request_close_session(struct ceph_mds_client *mdsc, 1862 - struct ceph_mds_session *session) 1788 + static int request_close_session(struct ceph_mds_session *session) 1863 1789 { 1864 1790 struct ceph_msg *msg; 1865 1791 ··· 1881 1809 if (session->s_state >= CEPH_MDS_SESSION_CLOSING) 1882 1810 return 0; 1883 1811 session->s_state = CEPH_MDS_SESSION_CLOSING; 1884 - return request_close_session(mdsc, session); 1812 + return request_close_session(session); 1885 1813 } 1886 1814 1887 1815 static bool drop_negative_children(struct dentry *dentry) ··· 2592 2520 ceph_encode_copy(&p, &ts, sizeof(ts)); 2593 2521 } 2594 2522 2595 - BUG_ON(p > end); 2523 + if (WARN_ON_ONCE(p > end)) { 2524 + ceph_msg_put(msg); 2525 + msg = ERR_PTR(-ERANGE); 2526 + goto out_free2; 2527 + } 2528 + 2596 2529 msg->front.iov_len = p - msg->front.iov_base; 2597 2530 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 2598 2531 ··· 2833 2756 } 2834 2757 if (session->s_state == CEPH_MDS_SESSION_NEW || 2835 2758 session->s_state == CEPH_MDS_SESSION_CLOSING) { 2836 - __open_session(mdsc, session); 2759 + err = __open_session(mdsc, session); 2760 + if (err) 2761 + goto out_session; 2837 2762 /* retry the same mds later */ 2838 2763 if (random) 2839 2764 req->r_resend_mds = mds; ··· 3358 3279 goto bad; 3359 3280 /* version >= 3, feature bits */ 3360 3281 ceph_decode_32_safe(&p, end, len, bad); 3361 - ceph_decode_64_safe(&p, end, features, bad); 3362 - p += len - sizeof(features); 3282 + if (len) { 3283 + ceph_decode_64_safe(&p, end, features, bad); 3284 + p += len - sizeof(features); 3285 + } 3363 3286 } 3364 3287 3365 3288 mutex_lock(&mdsc->mutex); ··· 3391 3310 session->s_state = CEPH_MDS_SESSION_OPEN; 3392 3311 session->s_features = features; 3393 3312 renewed_caps(mdsc, session, 0); 3313 + if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &session->s_features)) 3314 + metric_schedule_delayed(&mdsc->metric); 3394 3315 wake = 1; 3395 3316 if (mdsc->stopping) 3396 3317 __close_session(mdsc, session); ··· 4346 4263 ceph_force_reconnect(fsc->sb); 4347 4264 } 4348 4265 4266 + bool check_session_state(struct ceph_mds_session *s) 4267 + { 4268 + if (s->s_state == CEPH_MDS_SESSION_CLOSING) { 4269 + dout("resending session close request for mds%d\n", 4270 + s->s_mds); 4271 + request_close_session(s); 4272 + return false; 4273 + } 4274 + if (s->s_ttl && time_after(jiffies, s->s_ttl)) { 4275 + if (s->s_state == CEPH_MDS_SESSION_OPEN) { 4276 + s->s_state = CEPH_MDS_SESSION_HUNG; 4277 + pr_info("mds%d hung\n", s->s_mds); 4278 + } 4279 + } 4280 + if (s->s_state == CEPH_MDS_SESSION_NEW || 4281 + s->s_state == CEPH_MDS_SESSION_RESTARTING || 4282 + s->s_state == CEPH_MDS_SESSION_CLOSED || 4283 + s->s_state == CEPH_MDS_SESSION_REJECTED) 4284 + /* this mds is failed or recovering, just wait */ 4285 + return false; 4286 + 4287 + return true; 4288 + } 4289 + 4349 4290 /* 4350 4291 * delayed work -- periodically trim expired leases, renew caps with mds 4351 4292 */ ··· 4390 4283 4391 4284 dout("mdsc delayed_work\n"); 4392 4285 4286 + if (mdsc->stopping) 4287 + return; 4288 + 4393 4289 mutex_lock(&mdsc->mutex); 4394 4290 renew_interval = mdsc->mdsmap->m_session_timeout >> 2; 4395 4291 renew_caps = time_after_eq(jiffies, HZ*renew_interval + ··· 4404 4294 struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i); 4405 4295 if (!s) 4406 4296 continue; 4407 - if (s->s_state == CEPH_MDS_SESSION_CLOSING) { 4408 - dout("resending session close request for mds%d\n", 4409 - s->s_mds); 4410 - request_close_session(mdsc, s); 4411 - ceph_put_mds_session(s); 4412 - continue; 4413 - } 4414 - if (s->s_ttl && time_after(jiffies, s->s_ttl)) { 4415 - if (s->s_state == CEPH_MDS_SESSION_OPEN) { 4416 - s->s_state = CEPH_MDS_SESSION_HUNG; 4417 - pr_info("mds%d hung\n", s->s_mds); 4418 - } 4419 - } 4420 - if (s->s_state == CEPH_MDS_SESSION_NEW || 4421 - s->s_state == CEPH_MDS_SESSION_RESTARTING || 4422 - s->s_state == CEPH_MDS_SESSION_REJECTED) { 4423 - /* this mds is failed or recovering, just wait */ 4297 + 4298 + if (!check_session_state(s)) { 4424 4299 ceph_put_mds_session(s); 4425 4300 continue; 4426 4301 } ··· 4454 4359 goto err_mdsc; 4455 4360 } 4456 4361 4457 - fsc->mdsc = mdsc; 4458 4362 init_completion(&mdsc->safe_umount_waiters); 4459 4363 init_waitqueue_head(&mdsc->session_close_wq); 4460 4364 INIT_LIST_HEAD(&mdsc->waiting_for_map); ··· 4508 4414 4509 4415 strscpy(mdsc->nodename, utsname()->nodename, 4510 4416 sizeof(mdsc->nodename)); 4417 + 4418 + fsc->mdsc = mdsc; 4511 4419 return 0; 4512 4420 4513 4421 err_mdsmap: ··· 4753 4657 static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) 4754 4658 { 4755 4659 dout("stop\n"); 4756 - cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ 4660 + /* 4661 + * Make sure the delayed work stopped before releasing 4662 + * the resources. 4663 + * 4664 + * Because the cancel_delayed_work_sync() will only 4665 + * guarantee that the work finishes executing. But the 4666 + * delayed work will re-arm itself again after that. 4667 + */ 4668 + flush_delayed_work(&mdsc->delayed_work); 4669 + 4757 4670 if (mdsc->mdsmap) 4758 4671 ceph_mdsmap_destroy(mdsc->mdsmap); 4759 4672 kfree(mdsc->sessions); ··· 4785 4680 4786 4681 ceph_metric_destroy(&mdsc->metric); 4787 4682 4683 + flush_delayed_work(&mdsc->metric.delayed_work); 4788 4684 fsc->mdsc = NULL; 4789 4685 kfree(mdsc); 4790 4686 dout("mdsc_destroy %p done\n", mdsc);
+6 -1
fs/ceph/mds_client.h
··· 18 18 #include <linux/ceph/auth.h> 19 19 20 20 #include "metric.h" 21 + #include "super.h" 21 22 22 23 /* The first 8 bits are reserved for old ceph releases */ 23 24 enum ceph_feature_type { ··· 28 27 CEPHFS_FEATURE_LAZY_CAP_WANTED, 29 28 CEPHFS_FEATURE_MULTI_RECONNECT, 30 29 CEPHFS_FEATURE_DELEG_INO, 30 + CEPHFS_FEATURE_METRIC_COLLECT, 31 31 32 - CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_DELEG_INO, 32 + CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT, 33 33 }; 34 34 35 35 /* ··· 44 42 CEPHFS_FEATURE_LAZY_CAP_WANTED, \ 45 43 CEPHFS_FEATURE_MULTI_RECONNECT, \ 46 44 CEPHFS_FEATURE_DELEG_INO, \ 45 + CEPHFS_FEATURE_METRIC_COLLECT, \ 47 46 \ 48 47 CEPHFS_FEATURE_MAX, \ 49 48 } ··· 478 475 }; 479 476 480 477 extern const char *ceph_mds_op_name(int op); 478 + 479 + extern bool check_session_state(struct ceph_mds_session *s); 481 480 482 481 extern struct ceph_mds_session * 483 482 __ceph_lookup_mds_session(struct ceph_mds_client *, int mds);
+4 -6
fs/ceph/mdsmap.c
··· 120 120 const void *start = *p; 121 121 int i, j, n; 122 122 int err; 123 - u8 mdsmap_v, mdsmap_cv; 123 + u8 mdsmap_v; 124 124 u16 mdsmap_ev; 125 125 126 126 m = kzalloc(sizeof(*m), GFP_NOFS); ··· 129 129 130 130 ceph_decode_need(p, end, 1 + 1, bad); 131 131 mdsmap_v = ceph_decode_8(p); 132 - mdsmap_cv = ceph_decode_8(p); 132 + *p += sizeof(u8); /* mdsmap_cv */ 133 133 if (mdsmap_v >= 4) { 134 134 u32 mdsmap_len; 135 135 ceph_decode_32_safe(p, end, mdsmap_len, bad); ··· 174 174 u64 global_id; 175 175 u32 namelen; 176 176 s32 mds, inc, state; 177 - u64 state_seq; 178 177 u8 info_v; 179 178 void *info_end = NULL; 180 179 struct ceph_entity_addr addr; ··· 188 189 info_v= ceph_decode_8(p); 189 190 if (info_v >= 4) { 190 191 u32 info_len; 191 - u8 info_cv; 192 192 ceph_decode_need(p, end, 1 + sizeof(u32), bad); 193 - info_cv = ceph_decode_8(p); 193 + *p += sizeof(u8); /* info_cv */ 194 194 info_len = ceph_decode_32(p); 195 195 info_end = *p + info_len; 196 196 if (info_end > end) ··· 208 210 mds = ceph_decode_32(p); 209 211 inc = ceph_decode_32(p); 210 212 state = ceph_decode_32(p); 211 - state_seq = ceph_decode_64(p); 213 + *p += sizeof(u64); /* state_seq */ 212 214 err = ceph_decode_entity_addr(p, end, &addr); 213 215 if (err) 214 216 goto corrupt;
+149
fs/ceph/metric.c
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 + #include <linux/ceph/ceph_debug.h> 2 3 3 4 #include <linux/types.h> 4 5 #include <linux/percpu_counter.h> 5 6 #include <linux/math64.h> 6 7 7 8 #include "metric.h" 9 + #include "mds_client.h" 10 + 11 + static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, 12 + struct ceph_mds_session *s) 13 + { 14 + struct ceph_metric_head *head; 15 + struct ceph_metric_cap *cap; 16 + struct ceph_metric_read_latency *read; 17 + struct ceph_metric_write_latency *write; 18 + struct ceph_metric_metadata_latency *meta; 19 + struct ceph_client_metric *m = &mdsc->metric; 20 + u64 nr_caps = atomic64_read(&m->total_caps); 21 + struct ceph_msg *msg; 22 + struct timespec64 ts; 23 + s64 sum; 24 + s32 items = 0; 25 + s32 len; 26 + 27 + len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write) 28 + + sizeof(*meta); 29 + 30 + msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true); 31 + if (!msg) { 32 + pr_err("send metrics to mds%d, failed to allocate message\n", 33 + s->s_mds); 34 + return false; 35 + } 36 + 37 + head = msg->front.iov_base; 38 + 39 + /* encode the cap metric */ 40 + cap = (struct ceph_metric_cap *)(head + 1); 41 + cap->type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO); 42 + cap->ver = 1; 43 + cap->compat = 1; 44 + cap->data_len = cpu_to_le32(sizeof(*cap) - 10); 45 + cap->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_hit)); 46 + cap->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_mis)); 47 + cap->total = cpu_to_le64(nr_caps); 48 + items++; 49 + 50 + /* encode the read latency metric */ 51 + read = (struct ceph_metric_read_latency *)(cap + 1); 52 + read->type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY); 53 + read->ver = 1; 54 + read->compat = 1; 55 + read->data_len = cpu_to_le32(sizeof(*read) - 10); 56 + sum = m->read_latency_sum; 57 + jiffies_to_timespec64(sum, &ts); 58 + read->sec = cpu_to_le32(ts.tv_sec); 59 + read->nsec = cpu_to_le32(ts.tv_nsec); 60 + items++; 61 + 62 + /* encode the write latency metric */ 63 + write = (struct ceph_metric_write_latency *)(read + 1); 64 + write->type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY); 65 + write->ver = 1; 66 + write->compat = 1; 67 + write->data_len = cpu_to_le32(sizeof(*write) - 10); 68 + sum = m->write_latency_sum; 69 + jiffies_to_timespec64(sum, &ts); 70 + write->sec = cpu_to_le32(ts.tv_sec); 71 + write->nsec = cpu_to_le32(ts.tv_nsec); 72 + items++; 73 + 74 + /* encode the metadata latency metric */ 75 + meta = (struct ceph_metric_metadata_latency *)(write + 1); 76 + meta->type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY); 77 + meta->ver = 1; 78 + meta->compat = 1; 79 + meta->data_len = cpu_to_le32(sizeof(*meta) - 10); 80 + sum = m->metadata_latency_sum; 81 + jiffies_to_timespec64(sum, &ts); 82 + meta->sec = cpu_to_le32(ts.tv_sec); 83 + meta->nsec = cpu_to_le32(ts.tv_nsec); 84 + items++; 85 + 86 + put_unaligned_le32(items, &head->num); 87 + msg->front.iov_len = len; 88 + msg->hdr.version = cpu_to_le16(1); 89 + msg->hdr.compat_version = cpu_to_le16(1); 90 + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 91 + dout("client%llu send metrics to mds%d\n", 92 + ceph_client_gid(mdsc->fsc->client), s->s_mds); 93 + ceph_con_send(&s->s_con, msg); 94 + 95 + return true; 96 + } 97 + 98 + 99 + static void metric_get_session(struct ceph_mds_client *mdsc) 100 + { 101 + struct ceph_mds_session *s; 102 + int i; 103 + 104 + mutex_lock(&mdsc->mutex); 105 + for (i = 0; i < mdsc->max_sessions; i++) { 106 + s = __ceph_lookup_mds_session(mdsc, i); 107 + if (!s) 108 + continue; 109 + 110 + /* 111 + * Skip it if MDS doesn't support the metric collection, 112 + * or the MDS will close the session's socket connection 113 + * directly when it get this message. 114 + */ 115 + if (check_session_state(s) && 116 + test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &s->s_features)) { 117 + mdsc->metric.session = s; 118 + break; 119 + } 120 + 121 + ceph_put_mds_session(s); 122 + } 123 + mutex_unlock(&mdsc->mutex); 124 + } 125 + 126 + static void metric_delayed_work(struct work_struct *work) 127 + { 128 + struct ceph_client_metric *m = 129 + container_of(work, struct ceph_client_metric, delayed_work.work); 130 + struct ceph_mds_client *mdsc = 131 + container_of(m, struct ceph_mds_client, metric); 132 + 133 + if (mdsc->stopping) 134 + return; 135 + 136 + if (!m->session || !check_session_state(m->session)) { 137 + if (m->session) { 138 + ceph_put_mds_session(m->session); 139 + m->session = NULL; 140 + } 141 + metric_get_session(mdsc); 142 + } 143 + if (m->session) { 144 + ceph_mdsc_send_metrics(mdsc, m->session); 145 + metric_schedule_delayed(m); 146 + } 147 + } 8 148 9 149 int ceph_metric_init(struct ceph_client_metric *m) 10 150 { ··· 162 22 if (ret) 163 23 goto err_d_lease_mis; 164 24 25 + atomic64_set(&m->total_caps, 0); 165 26 ret = percpu_counter_init(&m->i_caps_hit, 0, GFP_KERNEL); 166 27 if (ret) 167 28 goto err_i_caps_hit; ··· 192 51 m->total_metadatas = 0; 193 52 m->metadata_latency_sum = 0; 194 53 54 + m->session = NULL; 55 + INIT_DELAYED_WORK(&m->delayed_work, metric_delayed_work); 56 + 195 57 return 0; 196 58 197 59 err_i_caps_mis: ··· 216 72 percpu_counter_destroy(&m->i_caps_hit); 217 73 percpu_counter_destroy(&m->d_lease_mis); 218 74 percpu_counter_destroy(&m->d_lease_hit); 75 + 76 + cancel_delayed_work_sync(&m->delayed_work); 77 + 78 + if (m->session) 79 + ceph_put_mds_session(m->session); 219 80 } 220 81 221 82 static inline void __update_latency(ktime_t *totalp, ktime_t *lsump,
+91
fs/ceph/metric.h
··· 6 6 #include <linux/percpu_counter.h> 7 7 #include <linux/ktime.h> 8 8 9 + extern bool disable_send_metrics; 10 + 11 + enum ceph_metric_type { 12 + CLIENT_METRIC_TYPE_CAP_INFO, 13 + CLIENT_METRIC_TYPE_READ_LATENCY, 14 + CLIENT_METRIC_TYPE_WRITE_LATENCY, 15 + CLIENT_METRIC_TYPE_METADATA_LATENCY, 16 + CLIENT_METRIC_TYPE_DENTRY_LEASE, 17 + 18 + CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE, 19 + }; 20 + 21 + /* 22 + * This will always have the highest metric bit value 23 + * as the last element of the array. 24 + */ 25 + #define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED { \ 26 + CLIENT_METRIC_TYPE_CAP_INFO, \ 27 + CLIENT_METRIC_TYPE_READ_LATENCY, \ 28 + CLIENT_METRIC_TYPE_WRITE_LATENCY, \ 29 + CLIENT_METRIC_TYPE_METADATA_LATENCY, \ 30 + \ 31 + CLIENT_METRIC_TYPE_MAX, \ 32 + } 33 + 34 + /* metric caps header */ 35 + struct ceph_metric_cap { 36 + __le32 type; /* ceph metric type */ 37 + 38 + __u8 ver; 39 + __u8 compat; 40 + 41 + __le32 data_len; /* length of sizeof(hit + mis + total) */ 42 + __le64 hit; 43 + __le64 mis; 44 + __le64 total; 45 + } __packed; 46 + 47 + /* metric read latency header */ 48 + struct ceph_metric_read_latency { 49 + __le32 type; /* ceph metric type */ 50 + 51 + __u8 ver; 52 + __u8 compat; 53 + 54 + __le32 data_len; /* length of sizeof(sec + nsec) */ 55 + __le32 sec; 56 + __le32 nsec; 57 + } __packed; 58 + 59 + /* metric write latency header */ 60 + struct ceph_metric_write_latency { 61 + __le32 type; /* ceph metric type */ 62 + 63 + __u8 ver; 64 + __u8 compat; 65 + 66 + __le32 data_len; /* length of sizeof(sec + nsec) */ 67 + __le32 sec; 68 + __le32 nsec; 69 + } __packed; 70 + 71 + /* metric metadata latency header */ 72 + struct ceph_metric_metadata_latency { 73 + __le32 type; /* ceph metric type */ 74 + 75 + __u8 ver; 76 + __u8 compat; 77 + 78 + __le32 data_len; /* length of sizeof(sec + nsec) */ 79 + __le32 sec; 80 + __le32 nsec; 81 + } __packed; 82 + 83 + struct ceph_metric_head { 84 + __le32 num; /* the number of metrics that will be sent */ 85 + } __packed; 86 + 9 87 /* This is the global metrics */ 10 88 struct ceph_client_metric { 11 89 atomic64_t total_dentries; 12 90 struct percpu_counter d_lease_hit; 13 91 struct percpu_counter d_lease_mis; 14 92 93 + atomic64_t total_caps; 15 94 struct percpu_counter i_caps_hit; 16 95 struct percpu_counter i_caps_mis; 17 96 ··· 114 35 ktime_t metadata_latency_sq_sum; 115 36 ktime_t metadata_latency_min; 116 37 ktime_t metadata_latency_max; 38 + 39 + struct ceph_mds_session *session; 40 + struct delayed_work delayed_work; /* delayed work */ 117 41 }; 42 + 43 + static inline void metric_schedule_delayed(struct ceph_client_metric *m) 44 + { 45 + if (disable_send_metrics) 46 + return; 47 + 48 + /* per second */ 49 + schedule_delayed_work(&m->delayed_work, round_jiffies_relative(HZ)); 50 + } 118 51 119 52 extern int ceph_metric_init(struct ceph_client_metric *m); 120 53 extern void ceph_metric_destroy(struct ceph_client_metric *m);
+50 -14
fs/ceph/super.c
··· 27 27 #include <linux/ceph/auth.h> 28 28 #include <linux/ceph/debugfs.h> 29 29 30 + static DEFINE_SPINLOCK(ceph_fsc_lock); 31 + static LIST_HEAD(ceph_fsc_list); 32 + 30 33 /* 31 34 * Ceph superblock operations 32 35 * ··· 637 634 struct ceph_options *opt) 638 635 { 639 636 struct ceph_fs_client *fsc; 640 - int page_count; 641 - size_t size; 642 637 int err; 643 638 644 639 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); ··· 684 683 if (!fsc->cap_wq) 685 684 goto fail_inode_wq; 686 685 687 - /* set up mempools */ 688 - err = -ENOMEM; 689 - page_count = fsc->mount_options->wsize >> PAGE_SHIFT; 690 - size = sizeof (struct page *) * (page_count ? page_count : 1); 691 - fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size); 692 - if (!fsc->wb_pagevec_pool) 693 - goto fail_cap_wq; 686 + spin_lock(&ceph_fsc_lock); 687 + list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list); 688 + spin_unlock(&ceph_fsc_lock); 694 689 695 690 return fsc; 696 691 697 - fail_cap_wq: 698 - destroy_workqueue(fsc->cap_wq); 699 692 fail_inode_wq: 700 693 destroy_workqueue(fsc->inode_wq); 701 694 fail_client: ··· 712 717 { 713 718 dout("destroy_fs_client %p\n", fsc); 714 719 720 + spin_lock(&ceph_fsc_lock); 721 + list_del(&fsc->metric_wakeup); 722 + spin_unlock(&ceph_fsc_lock); 723 + 715 724 ceph_mdsc_destroy(fsc); 716 725 destroy_workqueue(fsc->inode_wq); 717 726 destroy_workqueue(fsc->cap_wq); 718 - 719 - mempool_destroy(fsc->wb_pagevec_pool); 720 727 721 728 destroy_mount_options(fsc->mount_options); 722 729 ··· 738 741 struct kmem_cache *ceph_file_cachep; 739 742 struct kmem_cache *ceph_dir_file_cachep; 740 743 struct kmem_cache *ceph_mds_request_cachep; 744 + mempool_t *ceph_wb_pagevec_pool; 741 745 742 746 static void ceph_inode_init_once(void *foo) 743 747 { ··· 783 785 if (!ceph_mds_request_cachep) 784 786 goto bad_mds_req; 785 787 788 + ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT); 789 + if (!ceph_wb_pagevec_pool) 790 + goto bad_pagevec_pool; 791 + 786 792 error = ceph_fscache_register(); 787 793 if (error) 788 794 goto bad_fscache; ··· 795 793 796 794 bad_fscache: 797 795 kmem_cache_destroy(ceph_mds_request_cachep); 796 + bad_pagevec_pool: 797 + mempool_destroy(ceph_wb_pagevec_pool); 798 798 bad_mds_req: 799 799 kmem_cache_destroy(ceph_dir_file_cachep); 800 800 bad_dir_file: ··· 827 823 kmem_cache_destroy(ceph_file_cachep); 828 824 kmem_cache_destroy(ceph_dir_file_cachep); 829 825 kmem_cache_destroy(ceph_mds_request_cachep); 826 + mempool_destroy(ceph_wb_pagevec_pool); 830 827 831 828 ceph_fscache_unregister(); 832 829 } 833 830 834 831 /* 835 - * ceph_umount_begin - initiate forced umount. Tear down down the 832 + * ceph_umount_begin - initiate forced umount. Tear down the 836 833 * mount, skipping steps that may hang while waiting for server(s). 837 834 */ 838 835 static void ceph_umount_begin(struct super_block *sb) ··· 1286 1281 unregister_filesystem(&ceph_fs_type); 1287 1282 destroy_caches(); 1288 1283 } 1284 + 1285 + static int param_set_metrics(const char *val, const struct kernel_param *kp) 1286 + { 1287 + struct ceph_fs_client *fsc; 1288 + int ret; 1289 + 1290 + ret = param_set_bool(val, kp); 1291 + if (ret) { 1292 + pr_err("Failed to parse sending metrics switch value '%s'\n", 1293 + val); 1294 + return ret; 1295 + } else if (!disable_send_metrics) { 1296 + // wake up all the mds clients 1297 + spin_lock(&ceph_fsc_lock); 1298 + list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) { 1299 + metric_schedule_delayed(&fsc->mdsc->metric); 1300 + } 1301 + spin_unlock(&ceph_fsc_lock); 1302 + } 1303 + 1304 + return 0; 1305 + } 1306 + 1307 + static const struct kernel_param_ops param_ops_metrics = { 1308 + .set = param_set_metrics, 1309 + .get = param_get_bool, 1310 + }; 1311 + 1312 + bool disable_send_metrics = false; 1313 + module_param_cb(disable_send_metrics, &param_ops_metrics, &disable_send_metrics, 0644); 1314 + MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)"); 1289 1315 1290 1316 module_init(init_ceph); 1291 1317 module_exit(exit_ceph);
+3 -3
fs/ceph/super.h
··· 101 101 struct ceph_fs_client { 102 102 struct super_block *sb; 103 103 104 + struct list_head metric_wakeup; 105 + 104 106 struct ceph_mount_options *mount_options; 105 107 struct ceph_client *client; 106 108 ··· 118 116 119 117 struct ceph_mds_client *mdsc; 120 118 121 - /* writeback */ 122 - mempool_t *wb_pagevec_pool; 123 119 atomic_long_t writeback_count; 124 120 125 121 struct workqueue_struct *inode_wq; ··· 353 353 unsigned i_dirty_caps, i_flushing_caps; /* mask of dirtied fields */ 354 354 355 355 /* 356 - * Link to the the auth cap's session's s_cap_dirty list. s_cap_dirty 356 + * Link to the auth cap's session's s_cap_dirty list. s_cap_dirty 357 357 * is protected by the mdsc->cap_dirty_lock, but each individual item 358 358 * is also protected by the inode's i_ceph_lock. Walking s_cap_dirty 359 359 * requires the mdsc->cap_dirty_lock. List presence for an item can
+6 -6
fs/ceph/xattr.c
··· 497 497 kfree(*newxattr); 498 498 *newxattr = NULL; 499 499 if (xattr->should_free_val) 500 - kfree((void *)xattr->val); 500 + kfree(xattr->val); 501 501 502 502 if (update_xattr) { 503 - kfree((void *)name); 503 + kfree(name); 504 504 name = xattr->name; 505 505 } 506 506 ci->i_xattrs.names_size -= xattr->name_len; ··· 566 566 BUG_ON(!xattr); 567 567 568 568 if (xattr->should_free_name) 569 - kfree((void *)xattr->name); 569 + kfree(xattr->name); 570 570 if (xattr->should_free_val) 571 - kfree((void *)xattr->val); 571 + kfree(xattr->val); 572 572 573 573 kfree(xattr); 574 574 } ··· 582 582 rb_erase(&xattr->node, &ci->i_xattrs.index); 583 583 584 584 if (xattr->should_free_name) 585 - kfree((void *)xattr->name); 585 + kfree(xattr->name); 586 586 if (xattr->should_free_val) 587 - kfree((void *)xattr->val); 587 + kfree(xattr->val); 588 588 589 589 ci->i_xattrs.names_size -= xattr->name_len; 590 590 ci->i_xattrs.vals_size -= xattr->val_len;
+1 -1
include/linux/ceph/ceph_features.h
··· 58 58 * because 10.2.z (jewel) did not care if its peers advertised this 59 59 * feature bit. 60 60 * 61 - * - In the second phase we stop advertising the the bit and call it 61 + * - In the second phase we stop advertising the bit and call it 62 62 * RETIRED. This can normally be done in the *next* major release 63 63 * following the one in which we marked the feature DEPRECATED. In 64 64 * the above example, for 12.0.z (luminous) we can say:
+1
include/linux/ceph/ceph_fs.h
··· 130 130 #define CEPH_MSG_CLIENT_REQUEST 24 131 131 #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25 132 132 #define CEPH_MSG_CLIENT_REPLY 26 133 + #define CEPH_MSG_CLIENT_METRICS 29 133 134 #define CEPH_MSG_CLIENT_CAPS 0x310 134 135 #define CEPH_MSG_CLIENT_LEASE 0x311 135 136 #define CEPH_MSG_CLIENT_SNAP 0x312
+1
include/linux/ceph/libceph.h
··· 282 282 extern struct kmem_cache *ceph_file_cachep; 283 283 extern struct kmem_cache *ceph_dir_file_cachep; 284 284 extern struct kmem_cache *ceph_mds_request_cachep; 285 + extern mempool_t *ceph_wb_pagevec_pool; 285 286 286 287 /* ceph_common.c */ 287 288 extern bool libceph_compatible(void *data);
+1 -1
include/linux/ceph/osd_client.h
··· 404 404 &__oreq->r_ops[__whch].typ.fld; \ 405 405 }) 406 406 407 - extern void osd_req_op_init(struct ceph_osd_request *osd_req, 407 + struct ceph_osd_req_op *osd_req_op_init(struct ceph_osd_request *osd_req, 408 408 unsigned int which, u16 opcode, u32 flags); 409 409 410 410 extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
+1 -1
include/linux/crush/crush.h
··· 17 17 * The algorithm was originally described in detail in this paper 18 18 * (although the algorithm has evolved somewhat since then): 19 19 * 20 - * http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf 20 + * https://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf 21 21 * 22 22 * LGPL2 23 23 */
+1 -1
net/ceph/Kconfig
··· 13 13 common functionality to both the Ceph filesystem and 14 14 to the rados block device (rbd). 15 15 16 - More information at http://ceph.newdream.net/. 16 + More information at https://ceph.io/. 17 17 18 18 If unsure, say N. 19 19
+1 -1
net/ceph/ceph_hash.c
··· 4 4 5 5 /* 6 6 * Robert Jenkin's hash function. 7 - * http://burtleburtle.net/bob/hash/evahash.html 7 + * https://burtleburtle.net/bob/hash/evahash.html 8 8 * This is in the public domain. 9 9 */ 10 10 #define mix(a, b, c) \
+1 -1
net/ceph/crush/hash.c
··· 7 7 8 8 /* 9 9 * Robert Jenkins' function for mixing 32-bit values 10 - * http://burtleburtle.net/bob/hash/evahash.html 10 + * https://burtleburtle.net/bob/hash/evahash.html 11 11 * a, b = random bits, c = input and output 12 12 */ 13 13 #define crush_hashmix(a, b, c) do { \
+1 -1
net/ceph/crush/mapper.c
··· 298 298 * 299 299 * for reference, see: 300 300 * 301 - * http://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables 301 + * https://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables 302 302 * 303 303 */ 304 304
+3
net/ceph/debugfs.c
··· 223 223 if (op->op == CEPH_OSD_OP_WATCH) 224 224 seq_printf(s, "-%s", 225 225 ceph_osd_watch_op_name(op->watch.op)); 226 + else if (op->op == CEPH_OSD_OP_CALL) 227 + seq_printf(s, "-%s/%s", op->cls.class_name, 228 + op->cls.method_name); 226 229 } 227 230 228 231 seq_putc(s, '\n');
+17 -26
net/ceph/osd_client.c
··· 525 525 526 526 static void request_init(struct ceph_osd_request *req) 527 527 { 528 - /* req only, each op is zeroed in _osd_req_op_init() */ 528 + /* req only, each op is zeroed in osd_req_op_init() */ 529 529 memset(req, 0, sizeof(*req)); 530 530 531 531 kref_init(&req->r_kref); ··· 746 746 * other information associated with them. It also serves as a 747 747 * common init routine for all the other init functions, below. 748 748 */ 749 - static struct ceph_osd_req_op * 750 - _osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, 749 + struct ceph_osd_req_op * 750 + osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, 751 751 u16 opcode, u32 flags) 752 752 { 753 753 struct ceph_osd_req_op *op; ··· 762 762 763 763 return op; 764 764 } 765 - 766 - void osd_req_op_init(struct ceph_osd_request *osd_req, 767 - unsigned int which, u16 opcode, u32 flags) 768 - { 769 - (void)_osd_req_op_init(osd_req, which, opcode, flags); 770 - } 771 765 EXPORT_SYMBOL(osd_req_op_init); 772 766 773 767 void osd_req_op_extent_init(struct ceph_osd_request *osd_req, ··· 769 775 u64 offset, u64 length, 770 776 u64 truncate_size, u32 truncate_seq) 771 777 { 772 - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, 773 - opcode, 0); 778 + struct ceph_osd_req_op *op = osd_req_op_init(osd_req, which, 779 + opcode, 0); 774 780 size_t payload_len = 0; 775 781 776 782 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && ··· 816 822 BUG_ON(which + 1 >= osd_req->r_num_ops); 817 823 818 824 prev_op = &osd_req->r_ops[which]; 819 - op = _osd_req_op_init(osd_req, which + 1, prev_op->op, prev_op->flags); 825 + op = osd_req_op_init(osd_req, which + 1, prev_op->op, prev_op->flags); 820 826 /* dup previous one */ 821 827 op->indata_len = prev_op->indata_len; 822 828 op->outdata_len = prev_op->outdata_len; ··· 839 845 size_t size; 840 846 int ret; 841 847 842 - op = _osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0); 848 + op = osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0); 843 849 844 850 pagelist = ceph_pagelist_alloc(GFP_NOFS); 845 851 if (!pagelist) ··· 877 883 u16 opcode, const char *name, const void *value, 878 884 size_t size, u8 cmp_op, u8 cmp_mode) 879 885 { 880 - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, 881 - opcode, 0); 886 + struct ceph_osd_req_op *op = osd_req_op_init(osd_req, which, 887 + opcode, 0); 882 888 struct ceph_pagelist *pagelist; 883 889 size_t payload_len; 884 890 int ret; ··· 922 928 { 923 929 struct ceph_osd_req_op *op; 924 930 925 - op = _osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0); 931 + op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0); 926 932 op->watch.cookie = cookie; 927 933 op->watch.op = watch_opcode; 928 934 op->watch.gen = 0; ··· 937 943 u64 expected_write_size, 938 944 u32 flags) 939 945 { 940 - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, 941 - CEPH_OSD_OP_SETALLOCHINT, 942 - 0); 946 + struct ceph_osd_req_op *op; 943 947 948 + op = osd_req_op_init(osd_req, which, CEPH_OSD_OP_SETALLOCHINT, 0); 944 949 op->alloc_hint.expected_object_size = expected_object_size; 945 950 op->alloc_hint.expected_write_size = expected_write_size; 946 951 op->alloc_hint.flags = flags; ··· 3069 3076 cancel_linger_request(req); 3070 3077 3071 3078 request_reinit(req); 3072 - ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); 3073 - ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); 3074 - req->r_flags = lreq->t.flags; 3079 + target_copy(&req->r_t, &lreq->t); 3075 3080 req->r_mtime = lreq->mtime; 3076 3081 3077 3082 mutex_lock(&lreq->lock); ··· 4792 4801 struct ceph_pagelist *pl; 4793 4802 int ret; 4794 4803 4795 - op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY_ACK, 0); 4804 + op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY_ACK, 0); 4796 4805 4797 4806 pl = ceph_pagelist_alloc(GFP_NOIO); 4798 4807 if (!pl) ··· 4861 4870 struct ceph_pagelist *pl; 4862 4871 int ret; 4863 4872 4864 - op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); 4873 + op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); 4865 4874 op->notify.cookie = cookie; 4866 4875 4867 4876 pl = ceph_pagelist_alloc(GFP_NOIO); ··· 5325 5334 if (IS_ERR(pages)) 5326 5335 return PTR_ERR(pages); 5327 5336 5328 - op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM2, 5329 - dst_fadvise_flags); 5337 + op = osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM2, 5338 + dst_fadvise_flags); 5330 5339 op->copy_from.snapid = src_snapid; 5331 5340 op->copy_from.src_version = src_version; 5332 5341 op->copy_from.flags = copy_from_flags;