libceph: fix msgr standby handling

The standby logic used to be pretty dependent on the work requeueing
behavior that changed when we switched to WQ_NON_REENTRANT. It was also
very fragile.

Restructure things so that:
- We clear WRITE_PENDING when we set STANDBY. This ensures we will
requeue work when we wake up later.
- con_work backs off if STANDBY is set. There is nothing to do if we are
in standby.
- clear_standby() helper is called by both con_send() and con_keepalive(),
the two actions that can wake us up again. Move the connect_seq++
logic here.

Signed-off-by: Sage Weil <sage@newdream.net>

Sage Weil e00de341 e76661d0

+22 -8
+22 -8
net/ceph/messenger.c
··· 1712 1712 1713 1713 /* open the socket first? */ 1714 1714 if (con->sock == NULL) { 1715 - /* 1716 - * if we were STANDBY and are reconnecting _this_ 1717 - * connection, bump connect_seq now. Always bump 1718 - * global_seq. 1719 - */ 1720 - if (test_and_clear_bit(STANDBY, &con->state)) 1721 - con->connect_seq++; 1722 - 1723 1715 prepare_write_banner(msgr, con); 1724 1716 prepare_write_connect(msgr, con, 1); 1725 1717 prepare_read_banner(con); ··· 1954 1962 } 1955 1963 } 1956 1964 1965 + if (test_bit(STANDBY, &con->state)) { 1966 + dout("con_work %p STANDBY\n", con); 1967 + goto done; 1968 + } 1957 1969 if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ 1958 1970 dout("con_work CLOSED\n"); 1959 1971 con_close_socket(con); ··· 2018 2022 * the connection in a STANDBY state */ 2019 2023 if (list_empty(&con->out_queue) && 2020 2024 !test_bit(KEEPALIVE_PENDING, &con->state)) { 2025 + dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con); 2026 + clear_bit(WRITE_PENDING, &con->state); 2021 2027 set_bit(STANDBY, &con->state); 2022 2028 } else { 2023 2029 /* retry after a delay. */ ··· 2115 2117 } 2116 2118 EXPORT_SYMBOL(ceph_messenger_destroy); 2117 2119 2120 + static void clear_standby(struct ceph_connection *con) 2121 + { 2122 + /* come back from STANDBY? */ 2123 + if (test_and_clear_bit(STANDBY, &con->state)) { 2124 + mutex_lock(&con->mutex); 2125 + dout("clear_standby %p and ++connect_seq\n", con); 2126 + con->connect_seq++; 2127 + WARN_ON(test_bit(WRITE_PENDING, &con->state)); 2128 + WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state)); 2129 + mutex_unlock(&con->mutex); 2130 + } 2131 + } 2132 + 2118 2133 /* 2119 2134 * Queue up an outgoing message on the given connection. 2120 2135 */ ··· 2160 2149 2161 2150 /* if there wasn't anything waiting to send before, queue 2162 2151 * new work */ 2152 + clear_standby(con); 2163 2153 if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2164 2154 queue_con(con); 2165 2155 } ··· 2226 2214 */ 2227 2215 void ceph_con_keepalive(struct ceph_connection *con) 2228 2216 { 2217 + dout("con_keepalive %p\n", con); 2218 + clear_standby(con); 2229 2219 if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && 2230 2220 test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2231 2221 queue_con(con);