Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: no .snap inside of snapped namespace
libceph: fix msgr standby handling
libceph: fix msgr keepalive flag
libceph: fix msgr backoff
libceph: retry after authorization failure
libceph: fix handling of short returns from get_user_pages
ceph: do not clear I_COMPLETE from d_release
ceph: do not set I_COMPLETE
Revert "ceph: keep reference to parent inode on ceph_dentry"

+72 -50
+3 -25
fs/ceph/dir.c
··· 60 } 61 di->dentry = dentry; 62 di->lease_session = NULL; 63 - di->parent_inode = igrab(dentry->d_parent->d_inode); 64 dentry->d_fsdata = di; 65 dentry->d_time = jiffies; 66 ceph_dentry_lru_add(dentry); ··· 409 spin_lock(&inode->i_lock); 410 if (ci->i_release_count == fi->dir_release_count) { 411 dout(" marking %p complete\n", inode); 412 - ci->i_ceph_flags |= CEPH_I_COMPLETE; 413 ci->i_max_offset = filp->f_pos; 414 } 415 spin_unlock(&inode->i_lock); ··· 496 497 /* .snap dir? */ 498 if (err == -ENOENT && 499 strcmp(dentry->d_name.name, 500 fsc->mount_options->snapdir_name) == 0) { 501 struct inode *inode = ceph_get_snapdir(parent); ··· 1030 static void ceph_dentry_release(struct dentry *dentry) 1031 { 1032 struct ceph_dentry_info *di = ceph_dentry(dentry); 1033 - struct inode *parent_inode = NULL; 1034 - u64 snapid = CEPH_NOSNAP; 1035 1036 - if (!IS_ROOT(dentry)) { 1037 - parent_inode = di->parent_inode; 1038 - if (parent_inode) 1039 - snapid = ceph_snap(parent_inode); 1040 - } 1041 - dout("dentry_release %p parent %p\n", dentry, parent_inode); 1042 - if (parent_inode && snapid != CEPH_SNAPDIR) { 1043 - struct ceph_inode_info *ci = ceph_inode(parent_inode); 1044 - 1045 - spin_lock(&parent_inode->i_lock); 1046 - if (ci->i_shared_gen == di->lease_shared_gen || 1047 - snapid <= CEPH_MAXSNAP) { 1048 - dout(" clearing %p complete (d_release)\n", 1049 - parent_inode); 1050 - ci->i_ceph_flags &= ~CEPH_I_COMPLETE; 1051 - ci->i_release_count++; 1052 - } 1053 - spin_unlock(&parent_inode->i_lock); 1054 - } 1055 if (di) { 1056 ceph_dentry_lru_del(dentry); 1057 if (di->lease_session) ··· 1039 kmem_cache_free(ceph_dentry_cachep, di); 1040 dentry->d_fsdata = NULL; 1041 } 1042 - if (parent_inode) 1043 - iput(parent_inode); 1044 } 1045 1046 static int ceph_snapdir_d_revalidate(struct dentry *dentry,
··· 60 } 61 di->dentry = dentry; 62 di->lease_session = NULL; 63 dentry->d_fsdata = di; 64 dentry->d_time = jiffies; 65 ceph_dentry_lru_add(dentry); ··· 410 spin_lock(&inode->i_lock); 411 if (ci->i_release_count == fi->dir_release_count) { 412 dout(" marking %p complete\n", inode); 413 + /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */ 414 ci->i_max_offset = filp->f_pos; 415 } 416 spin_unlock(&inode->i_lock); ··· 497 498 /* .snap dir? */ 499 if (err == -ENOENT && 500 + ceph_snap(parent) == CEPH_NOSNAP && 501 strcmp(dentry->d_name.name, 502 fsc->mount_options->snapdir_name) == 0) { 503 struct inode *inode = ceph_get_snapdir(parent); ··· 1030 static void ceph_dentry_release(struct dentry *dentry) 1031 { 1032 struct ceph_dentry_info *di = ceph_dentry(dentry); 1033 1034 + dout("dentry_release %p\n", dentry); 1035 if (di) { 1036 ceph_dentry_lru_del(dentry); 1037 if (di->lease_session) ··· 1059 kmem_cache_free(ceph_dentry_cachep, di); 1060 dentry->d_fsdata = NULL; 1061 } 1062 } 1063 1064 static int ceph_snapdir_d_revalidate(struct dentry *dentry,
+1 -1
fs/ceph/inode.c
··· 707 (issued & CEPH_CAP_FILE_EXCL) == 0 && 708 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { 709 dout(" marking %p complete (empty)\n", inode); 710 - ci->i_ceph_flags |= CEPH_I_COMPLETE; 711 ci->i_max_offset = 2; 712 } 713 break;
··· 707 (issued & CEPH_CAP_FILE_EXCL) == 0 && 708 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { 709 dout(" marking %p complete (empty)\n", inode); 710 + /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */ 711 ci->i_max_offset = 2; 712 } 713 break;
-1
fs/ceph/super.h
··· 207 struct dentry *dentry; 208 u64 time; 209 u64 offset; 210 - struct inode *parent_inode; 211 }; 212 213 struct ceph_inode_xattrs_info {
··· 207 struct dentry *dentry; 208 u64 time; 209 u64 offset; 210 }; 211 212 struct ceph_inode_xattrs_info {
+1 -1
include/linux/ceph/messenger.h
··· 123 #define SOCK_CLOSED 11 /* socket state changed to closed */ 124 #define OPENING 13 /* open connection w/ (possibly new) peer */ 125 #define DEAD 14 /* dead, about to kfree */ 126 127 /* 128 * A single connection with another host. ··· 161 struct list_head out_queue; 162 struct list_head out_sent; /* sending or sent but unacked */ 163 u64 out_seq; /* last message queued for send */ 164 - bool out_keepalive_pending; 165 166 u64 in_seq, in_seq_acked; /* last message received, acked */ 167
··· 123 #define SOCK_CLOSED 11 /* socket state changed to closed */ 124 #define OPENING 13 /* open connection w/ (possibly new) peer */ 125 #define DEAD 14 /* dead, about to kfree */ 126 + #define BACKOFF 15 127 128 /* 129 * A single connection with another host. ··· 160 struct list_head out_queue; 161 struct list_head out_sent; /* sending or sent but unacked */ 162 u64 out_seq; /* last message queued for send */ 163 164 u64 in_seq, in_seq_acked; /* last message received, acked */ 165
+54 -17
net/ceph/messenger.c
··· 336 ceph_msg_put(con->out_msg); 337 con->out_msg = NULL; 338 } 339 - con->out_keepalive_pending = false; 340 con->in_seq = 0; 341 con->in_seq_acked = 0; 342 } ··· 1247 con->auth_retry); 1248 if (con->auth_retry == 2) { 1249 con->error_msg = "connect authorization failure"; 1250 - reset_connection(con); 1251 - set_bit(CLOSED, &con->state); 1252 return -1; 1253 } 1254 con->auth_retry = 1; ··· 1712 1713 /* open the socket first? */ 1714 if (con->sock == NULL) { 1715 - /* 1716 - * if we were STANDBY and are reconnecting _this_ 1717 - * connection, bump connect_seq now. Always bump 1718 - * global_seq. 1719 - */ 1720 - if (test_and_clear_bit(STANDBY, &con->state)) 1721 - con->connect_seq++; 1722 - 1723 prepare_write_banner(msgr, con); 1724 prepare_write_connect(msgr, con, 1); 1725 prepare_read_banner(con); ··· 1940 work.work); 1941 1942 mutex_lock(&con->mutex); 1943 1944 if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ 1945 dout("con_work CLOSED\n"); 1946 con_close_socket(con); ··· 2014 /* Requeue anything that hasn't been acked */ 2015 list_splice_init(&con->out_sent, &con->out_queue); 2016 2017 - /* If there are no messages in the queue, place the connection 2018 - * in a STANDBY state (i.e., don't try to reconnect just yet). */ 2019 - if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { 2020 - dout("fault setting STANDBY\n"); 2021 set_bit(STANDBY, &con->state); 2022 } else { 2023 /* retry after a delay. */ ··· 2027 con->delay = BASE_DELAY_INTERVAL; 2028 else if (con->delay < MAX_DELAY_INTERVAL) 2029 con->delay *= 2; 2030 - dout("fault queueing %p delay %lu\n", con, con->delay); 2031 con->ops->get(con); 2032 if (queue_delayed_work(ceph_msgr_wq, &con->work, 2033 - round_jiffies_relative(con->delay)) == 0) 2034 con->ops->put(con); 2035 } 2036 2037 out_unlock: ··· 2115 } 2116 EXPORT_SYMBOL(ceph_messenger_destroy); 2117 2118 /* 2119 * Queue up an outgoing message on the given connection. 2120 */ ··· 2160 2161 /* if there wasn't anything waiting to send before, queue 2162 * new work */ 2163 if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2164 queue_con(con); 2165 } ··· 2226 */ 2227 void ceph_con_keepalive(struct ceph_connection *con) 2228 { 2229 if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && 2230 test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2231 queue_con(con);
··· 336 ceph_msg_put(con->out_msg); 337 con->out_msg = NULL; 338 } 339 con->in_seq = 0; 340 con->in_seq_acked = 0; 341 } ··· 1248 con->auth_retry); 1249 if (con->auth_retry == 2) { 1250 con->error_msg = "connect authorization failure"; 1251 return -1; 1252 } 1253 con->auth_retry = 1; ··· 1715 1716 /* open the socket first? */ 1717 if (con->sock == NULL) { 1718 prepare_write_banner(msgr, con); 1719 prepare_write_connect(msgr, con, 1); 1720 prepare_read_banner(con); ··· 1951 work.work); 1952 1953 mutex_lock(&con->mutex); 1954 + if (test_and_clear_bit(BACKOFF, &con->state)) { 1955 + dout("con_work %p backing off\n", con); 1956 + if (queue_delayed_work(ceph_msgr_wq, &con->work, 1957 + round_jiffies_relative(con->delay))) { 1958 + dout("con_work %p backoff %lu\n", con, con->delay); 1959 + mutex_unlock(&con->mutex); 1960 + return; 1961 + } else { 1962 + con->ops->put(con); 1963 + dout("con_work %p FAILED to back off %lu\n", con, 1964 + con->delay); 1965 + } 1966 + } 1967 1968 + if (test_bit(STANDBY, &con->state)) { 1969 + dout("con_work %p STANDBY\n", con); 1970 + goto done; 1971 + } 1972 if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ 1973 dout("con_work CLOSED\n"); 1974 con_close_socket(con); ··· 2008 /* Requeue anything that hasn't been acked */ 2009 list_splice_init(&con->out_sent, &con->out_queue); 2010 2011 + /* If there are no messages queued or keepalive pending, place 2012 + * the connection in a STANDBY state */ 2013 + if (list_empty(&con->out_queue) && 2014 + !test_bit(KEEPALIVE_PENDING, &con->state)) { 2015 + dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con); 2016 + clear_bit(WRITE_PENDING, &con->state); 2017 set_bit(STANDBY, &con->state); 2018 } else { 2019 /* retry after a delay. */ ··· 2019 con->delay = BASE_DELAY_INTERVAL; 2020 else if (con->delay < MAX_DELAY_INTERVAL) 2021 con->delay *= 2; 2022 con->ops->get(con); 2023 if (queue_delayed_work(ceph_msgr_wq, &con->work, 2024 + round_jiffies_relative(con->delay))) { 2025 + dout("fault queued %p delay %lu\n", con, con->delay); 2026 + } else { 2027 con->ops->put(con); 2028 + dout("fault failed to queue %p delay %lu, backoff\n", 2029 + con, con->delay); 2030 + /* 2031 + * In many cases we see a socket state change 2032 + * while con_work is running and end up 2033 + * queuing (non-delayed) work, such that we 2034 + * can't backoff with a delay. Set a flag so 2035 + * that when con_work restarts we schedule the 2036 + * delay then. 2037 + */ 2038 + set_bit(BACKOFF, &con->state); 2039 + } 2040 } 2041 2042 out_unlock: ··· 2094 } 2095 EXPORT_SYMBOL(ceph_messenger_destroy); 2096 2097 + static void clear_standby(struct ceph_connection *con) 2098 + { 2099 + /* come back from STANDBY? */ 2100 + if (test_and_clear_bit(STANDBY, &con->state)) { 2101 + mutex_lock(&con->mutex); 2102 + dout("clear_standby %p and ++connect_seq\n", con); 2103 + con->connect_seq++; 2104 + WARN_ON(test_bit(WRITE_PENDING, &con->state)); 2105 + WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state)); 2106 + mutex_unlock(&con->mutex); 2107 + } 2108 + } 2109 + 2110 /* 2111 * Queue up an outgoing message on the given connection. 2112 */ ··· 2126 2127 /* if there wasn't anything waiting to send before, queue 2128 * new work */ 2129 + clear_standby(con); 2130 if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2131 queue_con(con); 2132 } ··· 2191 */ 2192 void ceph_con_keepalive(struct ceph_connection *con) 2193 { 2194 + dout("con_keepalive %p\n", con); 2195 + clear_standby(con); 2196 if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && 2197 test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2198 queue_con(con);
+13 -5
net/ceph/pagevec.c
··· 16 int num_pages, bool write_page) 17 { 18 struct page **pages; 19 - int rc; 20 21 pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); 22 if (!pages) 23 return ERR_PTR(-ENOMEM); 24 25 down_read(&current->mm->mmap_sem); 26 - rc = get_user_pages(current, current->mm, (unsigned long)data, 27 - num_pages, write_page, 0, pages, NULL); 28 up_read(&current->mm->mmap_sem); 29 - if (rc < num_pages) 30 goto fail; 31 return pages; 32 33 fail: 34 - ceph_put_page_vector(pages, rc > 0 ? rc : 0, false); 35 return ERR_PTR(rc); 36 } 37 EXPORT_SYMBOL(ceph_get_direct_page_vector);
··· 16 int num_pages, bool write_page) 17 { 18 struct page **pages; 19 + int got = 0; 20 + int rc = 0; 21 22 pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); 23 if (!pages) 24 return ERR_PTR(-ENOMEM); 25 26 down_read(&current->mm->mmap_sem); 27 + while (got < num_pages) { 28 + rc = get_user_pages(current, current->mm, 29 + (unsigned long)data + ((unsigned long)got * PAGE_SIZE), 30 + num_pages - got, write_page, 0, pages + got, NULL); 31 + if (rc < 0) 32 + break; 33 + BUG_ON(rc == 0); 34 + got += rc; 35 + } 36 up_read(&current->mm->mmap_sem); 37 + if (rc < 0) 38 goto fail; 39 return pages; 40 41 fail: 42 + ceph_put_page_vector(pages, got, false); 43 return ERR_PTR(rc); 44 } 45 EXPORT_SYMBOL(ceph_get_direct_page_vector);