Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: no .snap inside of snapped namespace
libceph: fix msgr standby handling
libceph: fix msgr keepalive flag
libceph: fix msgr backoff
libceph: retry after authorization failure
libceph: fix handling of short returns from get_user_pages
ceph: do not clear I_COMPLETE from d_release
ceph: do not set I_COMPLETE
Revert "ceph: keep reference to parent inode on ceph_dentry"

+72 -50
+3 -25
fs/ceph/dir.c
··· 60 60 } 61 61 di->dentry = dentry; 62 62 di->lease_session = NULL; 63 - di->parent_inode = igrab(dentry->d_parent->d_inode); 64 63 dentry->d_fsdata = di; 65 64 dentry->d_time = jiffies; 66 65 ceph_dentry_lru_add(dentry); ··· 409 410 spin_lock(&inode->i_lock); 410 411 if (ci->i_release_count == fi->dir_release_count) { 411 412 dout(" marking %p complete\n", inode); 412 - ci->i_ceph_flags |= CEPH_I_COMPLETE; 413 + /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */ 413 414 ci->i_max_offset = filp->f_pos; 414 415 } 415 416 spin_unlock(&inode->i_lock); ··· 496 497 497 498 /* .snap dir? */ 498 499 if (err == -ENOENT && 500 + ceph_snap(parent) == CEPH_NOSNAP && 499 501 strcmp(dentry->d_name.name, 500 502 fsc->mount_options->snapdir_name) == 0) { 501 503 struct inode *inode = ceph_get_snapdir(parent); ··· 1030 1030 static void ceph_dentry_release(struct dentry *dentry) 1031 1031 { 1032 1032 struct ceph_dentry_info *di = ceph_dentry(dentry); 1033 - struct inode *parent_inode = NULL; 1034 - u64 snapid = CEPH_NOSNAP; 1035 1033 1036 - if (!IS_ROOT(dentry)) { 1037 - parent_inode = di->parent_inode; 1038 - if (parent_inode) 1039 - snapid = ceph_snap(parent_inode); 1040 - } 1041 - dout("dentry_release %p parent %p\n", dentry, parent_inode); 1042 - if (parent_inode && snapid != CEPH_SNAPDIR) { 1043 - struct ceph_inode_info *ci = ceph_inode(parent_inode); 1044 - 1045 - spin_lock(&parent_inode->i_lock); 1046 - if (ci->i_shared_gen == di->lease_shared_gen || 1047 - snapid <= CEPH_MAXSNAP) { 1048 - dout(" clearing %p complete (d_release)\n", 1049 - parent_inode); 1050 - ci->i_ceph_flags &= ~CEPH_I_COMPLETE; 1051 - ci->i_release_count++; 1052 - } 1053 - spin_unlock(&parent_inode->i_lock); 1054 - } 1034 + dout("dentry_release %p\n", dentry); 1055 1035 if (di) { 1056 1036 ceph_dentry_lru_del(dentry); 1057 1037 if (di->lease_session) ··· 1039 1059 kmem_cache_free(ceph_dentry_cachep, di); 1040 1060 dentry->d_fsdata = NULL; 1041 1061 } 1042 - if (parent_inode) 1043 - iput(parent_inode); 1044 1062 } 1045 1063 1046 1064 static int ceph_snapdir_d_revalidate(struct dentry *dentry,
+1 -1
fs/ceph/inode.c
··· 707 707 (issued & CEPH_CAP_FILE_EXCL) == 0 && 708 708 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { 709 709 dout(" marking %p complete (empty)\n", inode); 710 - ci->i_ceph_flags |= CEPH_I_COMPLETE; 710 + /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */ 711 711 ci->i_max_offset = 2; 712 712 } 713 713 break;
-1
fs/ceph/super.h
··· 207 207 struct dentry *dentry; 208 208 u64 time; 209 209 u64 offset; 210 - struct inode *parent_inode; 211 210 }; 212 211 213 212 struct ceph_inode_xattrs_info {
+1 -1
include/linux/ceph/messenger.h
··· 123 123 #define SOCK_CLOSED 11 /* socket state changed to closed */ 124 124 #define OPENING 13 /* open connection w/ (possibly new) peer */ 125 125 #define DEAD 14 /* dead, about to kfree */ 126 + #define BACKOFF 15 126 127 127 128 /* 128 129 * A single connection with another host. ··· 161 160 struct list_head out_queue; 162 161 struct list_head out_sent; /* sending or sent but unacked */ 163 162 u64 out_seq; /* last message queued for send */ 164 - bool out_keepalive_pending; 165 163 166 164 u64 in_seq, in_seq_acked; /* last message received, acked */ 167 165
+54 -17
net/ceph/messenger.c
··· 336 336 ceph_msg_put(con->out_msg); 337 337 con->out_msg = NULL; 338 338 } 339 - con->out_keepalive_pending = false; 340 339 con->in_seq = 0; 341 340 con->in_seq_acked = 0; 342 341 } ··· 1247 1248 con->auth_retry); 1248 1249 if (con->auth_retry == 2) { 1249 1250 con->error_msg = "connect authorization failure"; 1250 - reset_connection(con); 1251 - set_bit(CLOSED, &con->state); 1252 1251 return -1; 1253 1252 } 1254 1253 con->auth_retry = 1; ··· 1712 1715 1713 1716 /* open the socket first? */ 1714 1717 if (con->sock == NULL) { 1715 - /* 1716 - * if we were STANDBY and are reconnecting _this_ 1717 - * connection, bump connect_seq now. Always bump 1718 - * global_seq. 1719 - */ 1720 - if (test_and_clear_bit(STANDBY, &con->state)) 1721 - con->connect_seq++; 1722 - 1723 1718 prepare_write_banner(msgr, con); 1724 1719 prepare_write_connect(msgr, con, 1); 1725 1720 prepare_read_banner(con); ··· 1940 1951 work.work); 1941 1952 1942 1953 mutex_lock(&con->mutex); 1954 + if (test_and_clear_bit(BACKOFF, &con->state)) { 1955 + dout("con_work %p backing off\n", con); 1956 + if (queue_delayed_work(ceph_msgr_wq, &con->work, 1957 + round_jiffies_relative(con->delay))) { 1958 + dout("con_work %p backoff %lu\n", con, con->delay); 1959 + mutex_unlock(&con->mutex); 1960 + return; 1961 + } else { 1962 + con->ops->put(con); 1963 + dout("con_work %p FAILED to back off %lu\n", con, 1964 + con->delay); 1965 + } 1966 + } 1943 1967 1968 + if (test_bit(STANDBY, &con->state)) { 1969 + dout("con_work %p STANDBY\n", con); 1970 + goto done; 1971 + } 1944 1972 if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ 1945 1973 dout("con_work CLOSED\n"); 1946 1974 con_close_socket(con); ··· 2014 2008 /* Requeue anything that hasn't been acked */ 2015 2009 list_splice_init(&con->out_sent, &con->out_queue); 2016 2010 2017 - /* If there are no messages in the queue, place the connection 2018 - * in a STANDBY state (i.e., don't try to reconnect just yet). */ 2019 - if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { 2020 - dout("fault setting STANDBY\n"); 2011 + /* If there are no messages queued or keepalive pending, place 2012 + * the connection in a STANDBY state */ 2013 + if (list_empty(&con->out_queue) && 2014 + !test_bit(KEEPALIVE_PENDING, &con->state)) { 2015 + dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con); 2016 + clear_bit(WRITE_PENDING, &con->state); 2021 2017 set_bit(STANDBY, &con->state); 2022 2018 } else { 2023 2019 /* retry after a delay. */ ··· 2027 2019 con->delay = BASE_DELAY_INTERVAL; 2028 2020 else if (con->delay < MAX_DELAY_INTERVAL) 2029 2021 con->delay *= 2; 2030 - dout("fault queueing %p delay %lu\n", con, con->delay); 2031 2022 con->ops->get(con); 2032 2023 if (queue_delayed_work(ceph_msgr_wq, &con->work, 2033 - round_jiffies_relative(con->delay)) == 0) 2024 + round_jiffies_relative(con->delay))) { 2025 + dout("fault queued %p delay %lu\n", con, con->delay); 2026 + } else { 2034 2027 con->ops->put(con); 2028 + dout("fault failed to queue %p delay %lu, backoff\n", 2029 + con, con->delay); 2030 + /* 2031 + * In many cases we see a socket state change 2032 + * while con_work is running and end up 2033 + * queuing (non-delayed) work, such that we 2034 + * can't backoff with a delay. Set a flag so 2035 + * that when con_work restarts we schedule the 2036 + * delay then. 2037 + */ 2038 + set_bit(BACKOFF, &con->state); 2039 + } 2035 2040 } 2036 2041 2037 2042 out_unlock: ··· 2115 2094 } 2116 2095 EXPORT_SYMBOL(ceph_messenger_destroy); 2117 2096 2097 + static void clear_standby(struct ceph_connection *con) 2098 + { 2099 + /* come back from STANDBY? */ 2100 + if (test_and_clear_bit(STANDBY, &con->state)) { 2101 + mutex_lock(&con->mutex); 2102 + dout("clear_standby %p and ++connect_seq\n", con); 2103 + con->connect_seq++; 2104 + WARN_ON(test_bit(WRITE_PENDING, &con->state)); 2105 + WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state)); 2106 + mutex_unlock(&con->mutex); 2107 + } 2108 + } 2109 + 2118 2110 /* 2119 2111 * Queue up an outgoing message on the given connection. 2120 2112 */ ··· 2160 2126 2161 2127 /* if there wasn't anything waiting to send before, queue 2162 2128 * new work */ 2129 + clear_standby(con); 2163 2130 if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2164 2131 queue_con(con); 2165 2132 } ··· 2226 2191 */ 2227 2192 void ceph_con_keepalive(struct ceph_connection *con) 2228 2193 { 2194 + dout("con_keepalive %p\n", con); 2195 + clear_standby(con); 2229 2196 if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && 2230 2197 test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2231 2198 queue_con(con);
+13 -5
net/ceph/pagevec.c
··· 16 16 int num_pages, bool write_page) 17 17 { 18 18 struct page **pages; 19 - int rc; 19 + int got = 0; 20 + int rc = 0; 20 21 21 22 pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); 22 23 if (!pages) 23 24 return ERR_PTR(-ENOMEM); 24 25 25 26 down_read(&current->mm->mmap_sem); 26 - rc = get_user_pages(current, current->mm, (unsigned long)data, 27 - num_pages, write_page, 0, pages, NULL); 27 + while (got < num_pages) { 28 + rc = get_user_pages(current, current->mm, 29 + (unsigned long)data + ((unsigned long)got * PAGE_SIZE), 30 + num_pages - got, write_page, 0, pages + got, NULL); 31 + if (rc < 0) 32 + break; 33 + BUG_ON(rc == 0); 34 + got += rc; 35 + } 28 36 up_read(&current->mm->mmap_sem); 29 - if (rc < num_pages) 37 + if (rc < 0) 30 38 goto fail; 31 39 return pages; 32 40 33 41 fail: 34 - ceph_put_page_vector(pages, rc > 0 ? rc : 0, false); 42 + ceph_put_page_vector(pages, got, false); 35 43 return ERR_PTR(rc); 36 44 } 37 45 EXPORT_SYMBOL(ceph_get_direct_page_vector);