Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

Pull Ceph update from Sage Weil:
"There are a few fixes for snapshot behavior with CephFS and support
for the new keepalive protocol from Zheng, a libceph fix that affects
both RBD and CephFS, a few bug fixes and cleanups for RBD from Ilya,
and several small fixes and cleanups from Jianpeng and others"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: improve readahead for file holes
ceph: get inode size for each append write
libceph: check data_len in ->alloc_msg()
libceph: use keepalive2 to verify the mon session is alive
rbd: plug rbd_dev->header.object_prefix memory leak
rbd: fix double free on rbd_dev->header_name
libceph: set 'exists' flag for newly up osd
ceph: cleanup use of ceph_msg_get
ceph: no need to get parent inode in ceph_open
ceph: remove the useless judgement
ceph: remove redundant test of head->safe and silence static analysis warnings
ceph: fix queuing inode to mdsdir's snaprealm
libceph: rename con_work() to ceph_con_workfn()
libceph: Avoid holding the zero page on ceph_msgr_slab_init errors
libceph: remove the unused macro AES_KEY_SIZE
ceph: invalidate dirty pages after forced umount
ceph: EIO all operations after forced umount

+191 -98
+4 -2
drivers/block/rbd.c
··· 4673 4673 } 4674 4674 4675 4675 ret = rbd_dev_v2_snap_context(rbd_dev); 4676 - dout("rbd_dev_v2_snap_context returned %d\n", ret); 4676 + if (ret && first_time) { 4677 + kfree(rbd_dev->header.object_prefix); 4678 + rbd_dev->header.object_prefix = NULL; 4679 + } 4677 4680 4678 4681 return ret; 4679 4682 } ··· 5157 5154 out_err: 5158 5155 if (parent) { 5159 5156 rbd_dev_unparent(rbd_dev); 5160 - kfree(rbd_dev->header_name); 5161 5157 rbd_dev_destroy(parent); 5162 5158 } else { 5163 5159 rbd_put_client(rbdc);
+4 -2
fs/ceph/addr.c
··· 276 276 for (i = 0; i < num_pages; i++) { 277 277 struct page *page = osd_data->pages[i]; 278 278 279 - if (rc < 0) 279 + if (rc < 0 && rc != ENOENT) 280 280 goto unlock; 281 281 if (bytes < (int)PAGE_CACHE_SIZE) { 282 282 /* zero (remainder of) page */ ··· 717 717 wbc->sync_mode == WB_SYNC_NONE ? "NONE" : 718 718 (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); 719 719 720 - if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { 720 + if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { 721 721 pr_warn("writepage_start %p on forced umount\n", inode); 722 + truncate_pagecache(inode, 0); 723 + mapping_set_error(mapping, -EIO); 722 724 return -EIO; /* we're in a forced umount, don't write! */ 723 725 } 724 726 if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize)
+8
fs/ceph/caps.c
··· 2413 2413 goto out_unlock; 2414 2414 } 2415 2415 2416 + if (!__ceph_is_any_caps(ci) && 2417 + ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { 2418 + dout("get_cap_refs %p forced umount\n", inode); 2419 + *err = -EIO; 2420 + ret = 1; 2421 + goto out_unlock; 2422 + } 2423 + 2416 2424 dout("get_cap_refs %p have %s needed %s\n", inode, 2417 2425 ceph_cap_string(have), ceph_cap_string(need)); 2418 2426 }
+8 -6
fs/ceph/file.c
··· 136 136 struct ceph_mds_client *mdsc = fsc->mdsc; 137 137 struct ceph_mds_request *req; 138 138 struct ceph_file_info *cf = file->private_data; 139 - struct inode *parent_inode = NULL; 140 139 int err; 141 140 int flags, fmode, wanted; 142 141 ··· 209 210 ihold(inode); 210 211 211 212 req->r_num_caps = 1; 212 - if (flags & O_CREAT) 213 - parent_inode = ceph_get_dentry_parent_inode(file->f_path.dentry); 214 - err = ceph_mdsc_do_request(mdsc, parent_inode, req); 215 - iput(parent_inode); 213 + err = ceph_mdsc_do_request(mdsc, NULL, req); 216 214 if (!err) 217 215 err = ceph_init_file(inode, file, req->r_fmode); 218 216 ceph_mdsc_put_request(req); ··· 275 279 if (err) 276 280 goto out_req; 277 281 278 - if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) 282 + if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry) 279 283 err = ceph_handle_notrace_create(dir, dentry); 280 284 281 285 if (d_unhashed(dentry)) { ··· 951 955 952 956 /* We can write back this queue in page reclaim */ 953 957 current->backing_dev_info = inode_to_bdi(inode); 958 + 959 + if (iocb->ki_flags & IOCB_APPEND) { 960 + err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false); 961 + if (err < 0) 962 + goto out; 963 + } 954 964 955 965 err = generic_write_checks(iocb, from); 956 966 if (err <= 0)
+45 -14
fs/ceph/mds_client.c
··· 2107 2107 msg = create_request_message(mdsc, req, mds, drop_cap_releases); 2108 2108 if (IS_ERR(msg)) { 2109 2109 req->r_err = PTR_ERR(msg); 2110 - complete_request(mdsc, req); 2111 2110 return PTR_ERR(msg); 2112 2111 } 2113 2112 req->r_request = msg; ··· 2134 2135 { 2135 2136 struct ceph_mds_session *session = NULL; 2136 2137 int mds = -1; 2137 - int err = -EAGAIN; 2138 + int err = 0; 2138 2139 2139 2140 if (req->r_err || req->r_got_result) { 2140 2141 if (req->r_aborted) ··· 2145 2146 if (req->r_timeout && 2146 2147 time_after_eq(jiffies, req->r_started + req->r_timeout)) { 2147 2148 dout("do_request timed out\n"); 2149 + err = -EIO; 2150 + goto finish; 2151 + } 2152 + if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { 2153 + dout("do_request forced umount\n"); 2148 2154 err = -EIO; 2149 2155 goto finish; 2150 2156 } ··· 2200 2196 2201 2197 out_session: 2202 2198 ceph_put_mds_session(session); 2199 + finish: 2200 + if (err) { 2201 + dout("__do_request early error %d\n", err); 2202 + req->r_err = err; 2203 + complete_request(mdsc, req); 2204 + __unregister_request(mdsc, req); 2205 + } 2203 2206 out: 2204 2207 return err; 2205 - 2206 - finish: 2207 - req->r_err = err; 2208 - complete_request(mdsc, req); 2209 - goto out; 2210 2208 } 2211 2209 2212 2210 /* ··· 2295 2289 2296 2290 if (req->r_err) { 2297 2291 err = req->r_err; 2298 - __unregister_request(mdsc, req); 2299 - dout("do_request early error %d\n", err); 2300 2292 goto out; 2301 2293 } 2302 2294 ··· 2415 2411 mutex_unlock(&mdsc->mutex); 2416 2412 goto out; 2417 2413 } 2418 - if (req->r_got_safe && !head->safe) { 2414 + if (req->r_got_safe) { 2419 2415 pr_warn("got unsafe after safe on %llu from mds%d\n", 2420 2416 tid, mds); 2421 2417 mutex_unlock(&mdsc->mutex); ··· 2524 2520 if (err) { 2525 2521 req->r_err = err; 2526 2522 } else { 2527 - req->r_reply = msg; 2528 - ceph_msg_get(msg); 2523 + req->r_reply = ceph_msg_get(msg); 2529 2524 req->r_got_result = true; 2530 2525 } 2531 2526 } else { ··· 3558 3555 { 3559 3556 u64 want_tid, want_flush, want_snap; 3560 3557 3561 - if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) 3558 + if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) 3562 3559 return; 3563 3560 3564 3561 dout("sync\n"); ··· 3587 3584 */ 3588 3585 static bool done_closing_sessions(struct ceph_mds_client *mdsc) 3589 3586 { 3590 - if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) 3587 + if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) 3591 3588 return true; 3592 3589 return atomic_read(&mdsc->num_sessions) == 0; 3593 3590 } ··· 3644 3641 cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ 3645 3642 3646 3643 dout("stopped\n"); 3644 + } 3645 + 3646 + void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc) 3647 + { 3648 + struct ceph_mds_session *session; 3649 + int mds; 3650 + 3651 + dout("force umount\n"); 3652 + 3653 + mutex_lock(&mdsc->mutex); 3654 + for (mds = 0; mds < mdsc->max_sessions; mds++) { 3655 + session = __ceph_lookup_mds_session(mdsc, mds); 3656 + if (!session) 3657 + continue; 3658 + mutex_unlock(&mdsc->mutex); 3659 + mutex_lock(&session->s_mutex); 3660 + __close_session(mdsc, session); 3661 + if (session->s_state == CEPH_MDS_SESSION_CLOSING) { 3662 + cleanup_session_requests(mdsc, session); 3663 + remove_session_caps(session); 3664 + } 3665 + mutex_unlock(&session->s_mutex); 3666 + ceph_put_mds_session(session); 3667 + mutex_lock(&mdsc->mutex); 3668 + kick_requests(mdsc, mds); 3669 + } 3670 + __wake_requests(mdsc, &mdsc->waiting_for_map); 3671 + mutex_unlock(&mdsc->mutex); 3647 3672 } 3648 3673 3649 3674 static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
+1
fs/ceph/mds_client.h
··· 366 366 367 367 extern int ceph_mdsc_init(struct ceph_fs_client *fsc); 368 368 extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); 369 + extern void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc); 369 370 extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc); 370 371 371 372 extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc);
-7
fs/ceph/snap.c
··· 338 338 return 0; 339 339 } 340 340 341 - if (num == 0 && realm->seq == ceph_empty_snapc->seq) { 342 - ceph_get_snap_context(ceph_empty_snapc); 343 - snapc = ceph_empty_snapc; 344 - goto done; 345 - } 346 - 347 341 /* alloc new snap context */ 348 342 err = -ENOMEM; 349 343 if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) ··· 375 381 realm->ino, realm, snapc, snapc->seq, 376 382 (unsigned int) snapc->num_snaps); 377 383 378 - done: 379 384 ceph_put_snap_context(realm->cached_context); 380 385 realm->cached_context = snapc; 381 386 return 0;
+1
fs/ceph/super.c
··· 708 708 if (!fsc) 709 709 return; 710 710 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 711 + ceph_mdsc_force_umount(fsc->mdsc); 711 712 return; 712 713 } 713 714
+2
include/linux/ceph/libceph.h
··· 46 46 unsigned long mount_timeout; /* jiffies */ 47 47 unsigned long osd_idle_ttl; /* jiffies */ 48 48 unsigned long osd_keepalive_timeout; /* jiffies */ 49 + unsigned long monc_ping_timeout; /* jiffies */ 49 50 50 51 /* 51 52 * any type that can't be simply compared or doesn't need need ··· 67 66 #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) 68 67 #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) 69 68 #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) 69 + #define CEPH_MONC_PING_TIMEOUT_DEFAULT msecs_to_jiffies(30 * 1000) 70 70 71 71 #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) 72 72 #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
+4
include/linux/ceph/messenger.h
··· 248 248 int in_base_pos; /* bytes read */ 249 249 __le64 in_temp_ack; /* for reading an ack */ 250 250 251 + struct timespec last_keepalive_ack; 252 + 251 253 struct delayed_work work; /* send|recv work */ 252 254 unsigned long delay; /* current delay interval */ 253 255 }; ··· 287 285 extern void ceph_msg_revoke_incoming(struct ceph_msg *msg); 288 286 289 287 extern void ceph_con_keepalive(struct ceph_connection *con); 288 + extern bool ceph_con_keepalive_expired(struct ceph_connection *con, 289 + unsigned long interval); 290 290 291 291 extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, 292 292 size_t length, size_t alignment);
+3 -1
include/linux/ceph/msgr.h
··· 84 84 #define CEPH_MSGR_TAG_MSG 7 /* message */ 85 85 #define CEPH_MSGR_TAG_ACK 8 /* message ack */ 86 86 #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ 87 - #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ 87 + #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ 88 88 #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ 89 89 #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ 90 90 #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ 91 + #define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */ 92 + #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */ 91 93 92 94 93 95 /*
+1
net/ceph/ceph_common.c
··· 357 357 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; 358 358 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; 359 359 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; 360 + opt->monc_ping_timeout = CEPH_MONC_PING_TIMEOUT_DEFAULT; 360 361 361 362 /* get mon ip(s) */ 362 363 /* ip1[:port1][,ip2[:port2]...] */
-4
net/ceph/crypto.c
··· 79 79 return 0; 80 80 } 81 81 82 - 83 - 84 - #define AES_KEY_SIZE 16 85 - 86 82 static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) 87 83 { 88 84 return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
+62 -20
net/ceph/messenger.c
··· 163 163 static char tag_msg = CEPH_MSGR_TAG_MSG; 164 164 static char tag_ack = CEPH_MSGR_TAG_ACK; 165 165 static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; 166 + static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2; 166 167 167 168 #ifdef CONFIG_LOCKDEP 168 169 static struct lock_class_key socket_class; ··· 177 176 178 177 static void queue_con(struct ceph_connection *con); 179 178 static void cancel_con(struct ceph_connection *con); 180 - static void con_work(struct work_struct *); 179 + static void ceph_con_workfn(struct work_struct *); 181 180 static void con_fault(struct ceph_connection *con); 182 181 183 182 /* ··· 277 276 ceph_msgr_wq = NULL; 278 277 } 279 278 280 - ceph_msgr_slab_exit(); 281 - 282 279 BUG_ON(zero_page == NULL); 283 280 page_cache_release(zero_page); 284 281 zero_page = NULL; 282 + 283 + ceph_msgr_slab_exit(); 285 284 } 286 285 287 286 int ceph_msgr_init(void) 288 287 { 288 + if (ceph_msgr_slab_init()) 289 + return -ENOMEM; 290 + 289 291 BUG_ON(zero_page != NULL); 290 292 zero_page = ZERO_PAGE(0); 291 293 page_cache_get(zero_page); 292 - 293 - if (ceph_msgr_slab_init()) 294 - return -ENOMEM; 295 294 296 295 /* 297 296 * The number of active work items is limited by the number of ··· 750 749 mutex_init(&con->mutex); 751 750 INIT_LIST_HEAD(&con->out_queue); 752 751 INIT_LIST_HEAD(&con->out_sent); 753 - INIT_DELAYED_WORK(&con->work, con_work); 752 + INIT_DELAYED_WORK(&con->work, ceph_con_workfn); 754 753 755 754 con->state = CON_STATE_CLOSED; 756 755 } ··· 1352 1351 { 1353 1352 dout("prepare_write_keepalive %p\n", con); 1354 1353 con_out_kvec_reset(con); 1355 - con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); 1354 + if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { 1355 + struct timespec ts = CURRENT_TIME; 1356 + struct ceph_timespec ceph_ts; 1357 + ceph_encode_timespec(&ceph_ts, &ts); 1358 + con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); 1359 + con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts); 1360 + } else { 1361 + con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive); 1362 + } 1356 1363 con_flag_set(con, CON_FLAG_WRITE_PENDING); 1357 1364 } 1358 1365 ··· 1632 1623 dout("prepare_read_tag %p\n", con); 1633 1624 con->in_base_pos = 0; 1634 1625 con->in_tag = CEPH_MSGR_TAG_READY; 1626 + } 1627 + 1628 + static void prepare_read_keepalive_ack(struct ceph_connection *con) 1629 + { 1630 + dout("prepare_read_keepalive_ack %p\n", con); 1631 + con->in_base_pos = 0; 1635 1632 } 1636 1633 1637 1634 /* ··· 2337 2322 return ret; 2338 2323 2339 2324 BUG_ON(!con->in_msg ^ skip); 2340 - if (con->in_msg && data_len > con->in_msg->data_length) { 2341 - pr_warn("%s skipping long message (%u > %zd)\n", 2342 - __func__, data_len, con->in_msg->data_length); 2343 - ceph_msg_put(con->in_msg); 2344 - con->in_msg = NULL; 2345 - skip = 1; 2346 - } 2347 2325 if (skip) { 2348 2326 /* skip this message */ 2349 2327 dout("alloc_msg said skip message\n"); ··· 2465 2457 mutex_lock(&con->mutex); 2466 2458 } 2467 2459 2460 + static int read_keepalive_ack(struct ceph_connection *con) 2461 + { 2462 + struct ceph_timespec ceph_ts; 2463 + size_t size = sizeof(ceph_ts); 2464 + int ret = read_partial(con, size, size, &ceph_ts); 2465 + if (ret <= 0) 2466 + return ret; 2467 + ceph_decode_timespec(&con->last_keepalive_ack, &ceph_ts); 2468 + prepare_read_tag(con); 2469 + return 1; 2470 + } 2468 2471 2469 2472 /* 2470 2473 * Write something to the socket. Called in a worker thread when the ··· 2545 2526 2546 2527 do_next: 2547 2528 if (con->state == CON_STATE_OPEN) { 2529 + if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) { 2530 + prepare_write_keepalive(con); 2531 + goto more; 2532 + } 2548 2533 /* is anything else pending? */ 2549 2534 if (!list_empty(&con->out_queue)) { 2550 2535 prepare_write_message(con); ··· 2556 2533 } 2557 2534 if (con->in_seq > con->in_seq_acked) { 2558 2535 prepare_write_ack(con); 2559 - goto more; 2560 - } 2561 - if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) { 2562 - prepare_write_keepalive(con); 2563 2536 goto more; 2564 2537 } 2565 2538 } ··· 2660 2641 case CEPH_MSGR_TAG_ACK: 2661 2642 prepare_read_ack(con); 2662 2643 break; 2644 + case CEPH_MSGR_TAG_KEEPALIVE2_ACK: 2645 + prepare_read_keepalive_ack(con); 2646 + break; 2663 2647 case CEPH_MSGR_TAG_CLOSE: 2664 2648 con_close_socket(con); 2665 2649 con->state = CON_STATE_CLOSED; ··· 2704 2682 if (ret <= 0) 2705 2683 goto out; 2706 2684 process_ack(con); 2685 + goto more; 2686 + } 2687 + if (con->in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) { 2688 + ret = read_keepalive_ack(con); 2689 + if (ret <= 0) 2690 + goto out; 2707 2691 goto more; 2708 2692 } 2709 2693 ··· 2827 2799 /* 2828 2800 * Do some work on a connection. Drop a connection ref when we're done. 2829 2801 */ 2830 - static void con_work(struct work_struct *work) 2802 + static void ceph_con_workfn(struct work_struct *work) 2831 2803 { 2832 2804 struct ceph_connection *con = container_of(work, struct ceph_connection, 2833 2805 work.work); ··· 3128 3100 queue_con(con); 3129 3101 } 3130 3102 EXPORT_SYMBOL(ceph_con_keepalive); 3103 + 3104 + bool ceph_con_keepalive_expired(struct ceph_connection *con, 3105 + unsigned long interval) 3106 + { 3107 + if (interval > 0 && 3108 + (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) { 3109 + struct timespec now = CURRENT_TIME; 3110 + struct timespec ts; 3111 + jiffies_to_timespec(interval, &ts); 3112 + ts = timespec_add(con->last_keepalive_ack, ts); 3113 + return timespec_compare(&now, &ts) >= 0; 3114 + } 3115 + return false; 3116 + } 3131 3117 3132 3118 static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) 3133 3119 {
+29 -8
net/ceph/mon_client.c
··· 149 149 CEPH_ENTITY_TYPE_MON, monc->cur_mon, 150 150 &monc->monmap->mon_inst[monc->cur_mon].addr); 151 151 152 + /* send an initial keepalive to ensure our timestamp is 153 + * valid by the time we are in an OPENED state */ 154 + ceph_con_keepalive(&monc->con); 155 + 152 156 /* initiatiate authentication handshake */ 153 157 ret = ceph_auth_build_hello(monc->auth, 154 158 monc->m_auth->front.iov_base, ··· 174 170 */ 175 171 static void __schedule_delayed(struct ceph_mon_client *monc) 176 172 { 177 - unsigned int delay; 173 + struct ceph_options *opt = monc->client->options; 174 + unsigned long delay; 178 175 179 - if (monc->cur_mon < 0 || __sub_expired(monc)) 176 + if (monc->cur_mon < 0 || __sub_expired(monc)) { 180 177 delay = 10 * HZ; 181 - else 178 + } else { 182 179 delay = 20 * HZ; 183 - dout("__schedule_delayed after %u\n", delay); 184 - schedule_delayed_work(&monc->delayed_work, delay); 180 + if (opt->monc_ping_timeout > 0) 181 + delay = min(delay, opt->monc_ping_timeout / 3); 182 + } 183 + dout("__schedule_delayed after %lu\n", delay); 184 + schedule_delayed_work(&monc->delayed_work, 185 + round_jiffies_relative(delay)); 185 186 } 186 187 187 188 /* ··· 752 743 __close_session(monc); 753 744 __open_session(monc); /* continue hunting */ 754 745 } else { 755 - ceph_con_keepalive(&monc->con); 746 + struct ceph_options *opt = monc->client->options; 747 + int is_auth = ceph_auth_is_authenticated(monc->auth); 748 + if (ceph_con_keepalive_expired(&monc->con, 749 + opt->monc_ping_timeout)) { 750 + dout("monc keepalive timeout\n"); 751 + is_auth = 0; 752 + __close_session(monc); 753 + monc->hunting = true; 754 + __open_session(monc); 755 + } 756 756 757 - __validate_auth(monc); 757 + if (!monc->hunting) { 758 + ceph_con_keepalive(&monc->con); 759 + __validate_auth(monc); 760 + } 758 761 759 - if (ceph_auth_is_authenticated(monc->auth)) 762 + if (is_auth) 760 763 __send_subscribe(monc); 761 764 } 762 765 __schedule_delayed(monc);
+18 -33
net/ceph/osd_client.c
··· 2817 2817 } 2818 2818 2819 2819 /* 2820 - * lookup and return message for incoming reply. set up reply message 2821 - * pages. 2820 + * Lookup and return message for incoming reply. Don't try to do 2821 + * anything about a larger than preallocated data portion of the 2822 + * message at the moment - for now, just skip the message. 2822 2823 */ 2823 2824 static struct ceph_msg *get_reply(struct ceph_connection *con, 2824 2825 struct ceph_msg_header *hdr, ··· 2837 2836 mutex_lock(&osdc->request_mutex); 2838 2837 req = __lookup_request(osdc, tid); 2839 2838 if (!req) { 2840 - *skip = 1; 2839 + pr_warn("%s osd%d tid %llu unknown, skipping\n", 2840 + __func__, osd->o_osd, tid); 2841 2841 m = NULL; 2842 - dout("get_reply unknown tid %llu from osd%d\n", tid, 2843 - osd->o_osd); 2842 + *skip = 1; 2844 2843 goto out; 2845 2844 } 2846 2845 ··· 2850 2849 ceph_msg_revoke_incoming(req->r_reply); 2851 2850 2852 2851 if (front_len > req->r_reply->front_alloc_len) { 2853 - pr_warn("get_reply front %d > preallocated %d (%u#%llu)\n", 2854 - front_len, req->r_reply->front_alloc_len, 2855 - (unsigned int)con->peer_name.type, 2856 - le64_to_cpu(con->peer_name.num)); 2852 + pr_warn("%s osd%d tid %llu front %d > preallocated %d\n", 2853 + __func__, osd->o_osd, req->r_tid, front_len, 2854 + req->r_reply->front_alloc_len); 2857 2855 m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS, 2858 2856 false); 2859 2857 if (!m) ··· 2860 2860 ceph_msg_put(req->r_reply); 2861 2861 req->r_reply = m; 2862 2862 } 2863 - m = ceph_msg_get(req->r_reply); 2864 2863 2865 - if (data_len > 0) { 2866 - struct ceph_osd_data *osd_data; 2867 - 2868 - /* 2869 - * XXX This is assuming there is only one op containing 2870 - * XXX page data. Probably OK for reads, but this 2871 - * XXX ought to be done more generally. 2872 - */ 2873 - osd_data = osd_req_op_extent_osd_data(req, 0); 2874 - if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) { 2875 - if (osd_data->pages && 2876 - unlikely(osd_data->length < data_len)) { 2877 - 2878 - pr_warn("tid %lld reply has %d bytes we had only %llu bytes ready\n", 2879 - tid, data_len, osd_data->length); 2880 - *skip = 1; 2881 - ceph_msg_put(m); 2882 - m = NULL; 2883 - goto out; 2884 - } 2885 - } 2864 + if (data_len > req->r_reply->data_length) { 2865 + pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n", 2866 + __func__, osd->o_osd, req->r_tid, data_len, 2867 + req->r_reply->data_length); 2868 + m = NULL; 2869 + *skip = 1; 2870 + goto out; 2886 2871 } 2887 - *skip = 0; 2872 + 2873 + m = ceph_msg_get(req->r_reply); 2888 2874 dout("get_reply tid %lld %p\n", tid, m); 2889 2875 2890 2876 out: 2891 2877 mutex_unlock(&osdc->request_mutex); 2892 2878 return m; 2893 - 2894 2879 } 2895 2880 2896 2881 static struct ceph_msg *alloc_msg(struct ceph_connection *con,
+1 -1
net/ceph/osdmap.c
··· 1300 1300 ceph_decode_addr(&addr); 1301 1301 pr_info("osd%d up\n", osd); 1302 1302 BUG_ON(osd >= map->max_osd); 1303 - map->osd_state[osd] |= CEPH_OSD_UP; 1303 + map->osd_state[osd] |= CEPH_OSD_UP | CEPH_OSD_EXISTS; 1304 1304 map->osd_addr[osd] = addr; 1305 1305 } 1306 1306