Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xen: optimize xenbus driver for multiple concurrent xenstore accesses

Handling of multiple concurrent Xenstore accesses through xenbus driver
either from the kernel or user land is rather lame today: xenbus is
capable to have one access active only at one point of time.

Rewrite xenbus to handle multiple requests concurrently by making use
of the request id of the Xenstore protocol. This requires to:

- Instead of blocking inside xb_read() when trying to read data from
the xenstore ring buffer do so only in the main loop of
xenbus_thread().

- Instead of doing writes to the xenstore ring buffer in the context of
the caller just queue the request and do the write in the dedicated
xenbus thread.

- Instead of just forwarding the request id specified by the caller of
xenbus to xenstore use a xenbus internal unique request id. This will
allow multiple outstanding requests.

- Modify the locking scheme in order to allow multiple requests being
active in parallel.

- Instead of waiting for the reply of a user's xenstore request after
writing the request to the xenstore ring buffer return directly to
the caller and do the waiting in the read path.

Additionally signal handling was optimized by avoiding waking up the
xenbus thread or sending an event to Xenstore in case the addressed
entity is known to be running already.

As a result communication with Xenstore is sped up by a factor of up
to 5: depending on the request type (read or write) and the amount of
data transferred the gain was at least 20% (small reads) and went up to
a factor of 5 for large writes.

In the end some more rough edges of xenbus have been smoothed:

- Handling of memory shortage when reading from xenstore ring buffer in
the xenbus driver was not optimal: it was busy looping and issuing a
warning in each loop.

- In case of xenstore not running in dom0 but in a stubdom we end up
with two xenbus threads running as the initialization of xenbus in
dom0 expecting a local xenstored will be redone later when connecting
to the xenstore domain. Up to now this was no problem as locking
would prevent the two xenbus threads interfering with each other, but
this was just a waste of kernel resources.

- An out of memory situation while writing to or reading from the
xenstore ring buffer no longer will lead to a possible loss of
synchronization with xenstore.

- The user read and write part are now interruptible by signals.

Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>

authored by

Juergen Gross and committed by
Boris Ostrovsky
fd8aa909 5584ea25

+679 -398
+43 -5
drivers/xen/xenbus/xenbus.h
··· 32 32 #ifndef _XENBUS_XENBUS_H 33 33 #define _XENBUS_XENBUS_H 34 34 35 + #include <linux/mutex.h> 36 + #include <linux/uio.h> 37 + #include <xen/xenbus.h> 38 + 35 39 #define XEN_BUS_ID_SIZE 20 36 40 37 41 struct xen_bus_type { ··· 56 52 XS_LOCAL, 57 53 }; 58 54 55 + struct xs_watch_event { 56 + struct list_head list; 57 + unsigned int len; 58 + struct xenbus_watch *handle; 59 + const char *path; 60 + const char *token; 61 + char body[]; 62 + }; 63 + 64 + enum xb_req_state { 65 + xb_req_state_queued, 66 + xb_req_state_wait_reply, 67 + xb_req_state_got_reply, 68 + xb_req_state_aborted 69 + }; 70 + 71 + struct xb_req_data { 72 + struct list_head list; 73 + wait_queue_head_t wq; 74 + struct xsd_sockmsg msg; 75 + enum xsd_sockmsg_type type; 76 + char *body; 77 + const struct kvec *vec; 78 + int num_vecs; 79 + int err; 80 + enum xb_req_state state; 81 + void (*cb)(struct xb_req_data *); 82 + void *par; 83 + }; 84 + 59 85 extern enum xenstore_init xen_store_domain_type; 60 86 extern const struct attribute_group *xenbus_dev_groups[]; 87 + extern struct mutex xs_response_mutex; 88 + extern struct list_head xs_reply_list; 89 + extern struct list_head xb_write_list; 90 + extern wait_queue_head_t xb_waitq; 91 + extern struct mutex xb_write_mutex; 61 92 62 93 int xs_init(void); 63 94 int xb_init_comms(void); 64 95 void xb_deinit_comms(void); 65 - int xb_write(const void *data, unsigned int len); 66 - int xb_read(void *data, unsigned int len); 67 - int xb_data_to_read(void); 68 - int xb_wait_for_data_to_read(void); 96 + int xs_watch_msg(struct xs_watch_event *event); 97 + void xs_request_exit(struct xb_req_data *req); 69 98 70 99 int xenbus_match(struct device *_dev, struct device_driver *_drv); 71 100 int xenbus_dev_probe(struct device *_dev); ··· 129 92 130 93 void xenbus_ring_ops_init(void); 131 94 132 - void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg); 95 + int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par); 96 + void xenbus_dev_queue_reply(struct xb_req_data *req); 133 97 134 98 #endif
+276 -31
drivers/xen/xenbus/xenbus_comms.c
··· 34 34 35 35 #include <linux/wait.h> 36 36 #include <linux/interrupt.h> 37 + #include <linux/kthread.h> 37 38 #include <linux/sched.h> 38 39 #include <linux/err.h> 39 40 #include <xen/xenbus.h> ··· 43 42 #include <xen/page.h> 44 43 #include "xenbus.h" 45 44 45 + /* A list of replies. Currently only one will ever be outstanding. */ 46 + LIST_HEAD(xs_reply_list); 47 + 48 + /* A list of write requests. */ 49 + LIST_HEAD(xb_write_list); 50 + DECLARE_WAIT_QUEUE_HEAD(xb_waitq); 51 + DEFINE_MUTEX(xb_write_mutex); 52 + 53 + /* Protect xenbus reader thread against save/restore. */ 54 + DEFINE_MUTEX(xs_response_mutex); 55 + 46 56 static int xenbus_irq; 57 + static struct task_struct *xenbus_task; 47 58 48 59 static DECLARE_WORK(probe_work, xenbus_probe); 49 60 50 - static DECLARE_WAIT_QUEUE_HEAD(xb_waitq); 51 61 52 62 static irqreturn_t wake_waiting(int irq, void *unused) 53 63 { ··· 96 84 return buf + MASK_XENSTORE_IDX(cons); 97 85 } 98 86 87 + static int xb_data_to_write(void) 88 + { 89 + struct xenstore_domain_interface *intf = xen_store_interface; 90 + 91 + return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE && 92 + !list_empty(&xb_write_list); 93 + } 94 + 99 95 /** 100 96 * xb_write - low level write 101 97 * @data: buffer to send 102 98 * @len: length of buffer 103 99 * 104 - * Returns 0 on success, error otherwise. 100 + * Returns number of bytes written or -err. 105 101 */ 106 - int xb_write(const void *data, unsigned len) 102 + static int xb_write(const void *data, unsigned int len) 107 103 { 108 104 struct xenstore_domain_interface *intf = xen_store_interface; 109 105 XENSTORE_RING_IDX cons, prod; 110 - int rc; 106 + unsigned int bytes = 0; 111 107 112 108 while (len != 0) { 113 109 void *dst; 114 110 unsigned int avail; 115 - 116 - rc = wait_event_interruptible( 117 - xb_waitq, 118 - (intf->req_prod - intf->req_cons) != 119 - XENSTORE_RING_SIZE); 120 - if (rc < 0) 121 - return rc; 122 111 123 112 /* Read indexes, then verify. */ 124 113 cons = intf->req_cons; ··· 128 115 intf->req_cons = intf->req_prod = 0; 129 116 return -EIO; 130 117 } 118 + if (!xb_data_to_write()) 119 + return bytes; 120 + 121 + /* Must write data /after/ reading the consumer index. */ 122 + virt_mb(); 131 123 132 124 dst = get_output_chunk(cons, prod, intf->req, &avail); 133 125 if (avail == 0) ··· 140 122 if (avail > len) 141 123 avail = len; 142 124 143 - /* Must write data /after/ reading the consumer index. */ 144 - virt_mb(); 145 - 146 125 memcpy(dst, data, avail); 147 126 data += avail; 148 127 len -= avail; 128 + bytes += avail; 149 129 150 130 /* Other side must not see new producer until data is there. */ 151 131 virt_wmb(); 152 132 intf->req_prod += avail; 153 133 154 134 /* Implies mb(): other side will see the updated producer. */ 155 - notify_remote_via_evtchn(xen_store_evtchn); 135 + if (prod <= intf->req_cons) 136 + notify_remote_via_evtchn(xen_store_evtchn); 156 137 } 157 138 158 - return 0; 139 + return bytes; 159 140 } 160 141 161 - int xb_data_to_read(void) 142 + static int xb_data_to_read(void) 162 143 { 163 144 struct xenstore_domain_interface *intf = xen_store_interface; 164 145 return (intf->rsp_cons != intf->rsp_prod); 165 146 } 166 147 167 - int xb_wait_for_data_to_read(void) 168 - { 169 - return wait_event_interruptible(xb_waitq, xb_data_to_read()); 170 - } 171 - 172 - int xb_read(void *data, unsigned len) 148 + static int xb_read(void *data, unsigned int len) 173 149 { 174 150 struct xenstore_domain_interface *intf = xen_store_interface; 175 151 XENSTORE_RING_IDX cons, prod; 176 - int rc; 152 + unsigned int bytes = 0; 177 153 178 154 while (len != 0) { 179 155 unsigned int avail; 180 156 const char *src; 181 157 182 - rc = xb_wait_for_data_to_read(); 183 - if (rc < 0) 184 - return rc; 185 - 186 158 /* Read indexes, then verify. */ 187 159 cons = intf->rsp_cons; 188 160 prod = intf->rsp_prod; 161 + if (cons == prod) 162 + return bytes; 163 + 189 164 if (!check_indexes(cons, prod)) { 190 165 intf->rsp_cons = intf->rsp_prod = 0; 191 166 return -EIO; ··· 196 185 memcpy(data, src, avail); 197 186 data += avail; 198 187 len -= avail; 188 + bytes += avail; 199 189 200 190 /* Other side must not see free space until we've copied out */ 201 191 virt_mb(); 202 192 intf->rsp_cons += avail; 203 193 204 - pr_debug("Finished read of %i bytes (%i to go)\n", avail, len); 205 - 206 194 /* Implies mb(): other side will see the updated consumer. */ 207 - notify_remote_via_evtchn(xen_store_evtchn); 195 + if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE) 196 + notify_remote_via_evtchn(xen_store_evtchn); 208 197 } 209 198 199 + return bytes; 200 + } 201 + 202 + static int process_msg(void) 203 + { 204 + static struct { 205 + struct xsd_sockmsg msg; 206 + char *body; 207 + union { 208 + void *alloc; 209 + struct xs_watch_event *watch; 210 + }; 211 + bool in_msg; 212 + bool in_hdr; 213 + unsigned int read; 214 + } state; 215 + struct xb_req_data *req; 216 + int err; 217 + unsigned int len; 218 + 219 + if (!state.in_msg) { 220 + state.in_msg = true; 221 + state.in_hdr = true; 222 + state.read = 0; 223 + 224 + /* 225 + * We must disallow save/restore while reading a message. 226 + * A partial read across s/r leaves us out of sync with 227 + * xenstored. 228 + * xs_response_mutex is locked as long as we are processing one 229 + * message. state.in_msg will be true as long as we are holding 230 + * the lock here. 231 + */ 232 + mutex_lock(&xs_response_mutex); 233 + 234 + if (!xb_data_to_read()) { 235 + /* We raced with save/restore: pending data 'gone'. */ 236 + mutex_unlock(&xs_response_mutex); 237 + state.in_msg = false; 238 + return 0; 239 + } 240 + } 241 + 242 + if (state.in_hdr) { 243 + if (state.read != sizeof(state.msg)) { 244 + err = xb_read((void *)&state.msg + state.read, 245 + sizeof(state.msg) - state.read); 246 + if (err < 0) 247 + goto out; 248 + state.read += err; 249 + if (state.read != sizeof(state.msg)) 250 + return 0; 251 + if (state.msg.len > XENSTORE_PAYLOAD_MAX) { 252 + err = -EINVAL; 253 + goto out; 254 + } 255 + } 256 + 257 + len = state.msg.len + 1; 258 + if (state.msg.type == XS_WATCH_EVENT) 259 + len += sizeof(*state.watch); 260 + 261 + state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH); 262 + if (!state.alloc) 263 + return -ENOMEM; 264 + 265 + if (state.msg.type == XS_WATCH_EVENT) 266 + state.body = state.watch->body; 267 + else 268 + state.body = state.alloc; 269 + state.in_hdr = false; 270 + state.read = 0; 271 + } 272 + 273 + err = xb_read(state.body + state.read, state.msg.len - state.read); 274 + if (err < 0) 275 + goto out; 276 + 277 + state.read += err; 278 + if (state.read != state.msg.len) 279 + return 0; 280 + 281 + state.body[state.msg.len] = '\0'; 282 + 283 + if (state.msg.type == XS_WATCH_EVENT) { 284 + state.watch->len = state.msg.len; 285 + err = xs_watch_msg(state.watch); 286 + } else { 287 + err = -ENOENT; 288 + mutex_lock(&xb_write_mutex); 289 + list_for_each_entry(req, &xs_reply_list, list) { 290 + if (req->msg.req_id == state.msg.req_id) { 291 + if (req->state == xb_req_state_wait_reply) { 292 + req->msg.type = state.msg.type; 293 + req->msg.len = state.msg.len; 294 + req->body = state.body; 295 + req->state = xb_req_state_got_reply; 296 + list_del(&req->list); 297 + req->cb(req); 298 + } else { 299 + list_del(&req->list); 300 + kfree(req); 301 + } 302 + err = 0; 303 + break; 304 + } 305 + } 306 + mutex_unlock(&xb_write_mutex); 307 + if (err) 308 + goto out; 309 + } 310 + 311 + mutex_unlock(&xs_response_mutex); 312 + 313 + state.in_msg = false; 314 + state.alloc = NULL; 315 + return err; 316 + 317 + out: 318 + mutex_unlock(&xs_response_mutex); 319 + state.in_msg = false; 320 + kfree(state.alloc); 321 + state.alloc = NULL; 322 + return err; 323 + } 324 + 325 + static int process_writes(void) 326 + { 327 + static struct { 328 + struct xb_req_data *req; 329 + int idx; 330 + unsigned int written; 331 + } state; 332 + void *base; 333 + unsigned int len; 334 + int err = 0; 335 + 336 + if (!xb_data_to_write()) 337 + return 0; 338 + 339 + mutex_lock(&xb_write_mutex); 340 + 341 + if (!state.req) { 342 + state.req = list_first_entry(&xb_write_list, 343 + struct xb_req_data, list); 344 + state.idx = -1; 345 + state.written = 0; 346 + } 347 + 348 + if (state.req->state == xb_req_state_aborted) 349 + goto out_err; 350 + 351 + while (state.idx < state.req->num_vecs) { 352 + if (state.idx < 0) { 353 + base = &state.req->msg; 354 + len = sizeof(state.req->msg); 355 + } else { 356 + base = state.req->vec[state.idx].iov_base; 357 + len = state.req->vec[state.idx].iov_len; 358 + } 359 + err = xb_write(base + state.written, len - state.written); 360 + if (err < 0) 361 + goto out_err; 362 + state.written += err; 363 + if (state.written != len) 364 + goto out; 365 + 366 + state.idx++; 367 + state.written = 0; 368 + } 369 + 370 + list_del(&state.req->list); 371 + state.req->state = xb_req_state_wait_reply; 372 + list_add_tail(&state.req->list, &xs_reply_list); 373 + state.req = NULL; 374 + 375 + out: 376 + mutex_unlock(&xb_write_mutex); 377 + 378 + return 0; 379 + 380 + out_err: 381 + state.req->msg.type = XS_ERROR; 382 + state.req->err = err; 383 + list_del(&state.req->list); 384 + if (state.req->state == xb_req_state_aborted) 385 + kfree(state.req); 386 + else { 387 + state.req->state = xb_req_state_got_reply; 388 + wake_up(&state.req->wq); 389 + } 390 + 391 + mutex_unlock(&xb_write_mutex); 392 + 393 + state.req = NULL; 394 + 395 + return err; 396 + } 397 + 398 + static int xb_thread_work(void) 399 + { 400 + return xb_data_to_read() || xb_data_to_write(); 401 + } 402 + 403 + static int xenbus_thread(void *unused) 404 + { 405 + int err; 406 + 407 + while (!kthread_should_stop()) { 408 + if (wait_event_interruptible(xb_waitq, xb_thread_work())) 409 + continue; 410 + 411 + err = process_msg(); 412 + if (err == -ENOMEM) 413 + schedule(); 414 + else if (err) 415 + pr_warn_ratelimited("error %d while reading message\n", 416 + err); 417 + 418 + err = process_writes(); 419 + if (err) 420 + pr_warn_ratelimited("error %d while writing message\n", 421 + err); 422 + } 423 + 424 + xenbus_task = NULL; 210 425 return 0; 211 426 } 212 427 ··· 460 223 rebind_evtchn_irq(xen_store_evtchn, xenbus_irq); 461 224 } else { 462 225 int err; 226 + 463 227 err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, 464 228 0, "xenbus", &xb_waitq); 465 229 if (err < 0) { ··· 469 231 } 470 232 471 233 xenbus_irq = err; 234 + 235 + if (!xenbus_task) { 236 + xenbus_task = kthread_run(xenbus_thread, NULL, 237 + "xenbus"); 238 + if (IS_ERR(xenbus_task)) 239 + return PTR_ERR(xenbus_task); 240 + } 472 241 } 473 242 474 243 return 0;
+120 -68
drivers/xen/xenbus/xenbus_dev_frontend.c
··· 113 113 struct list_head read_buffers; 114 114 wait_queue_head_t read_waitq; 115 115 116 + struct kref kref; 116 117 }; 117 118 118 119 /* Read out any raw xenbus messages queued up. */ ··· 298 297 mutex_unlock(&adap->dev_data->reply_mutex); 299 298 } 300 299 300 + static void xenbus_file_free(struct kref *kref) 301 + { 302 + struct xenbus_file_priv *u; 303 + struct xenbus_transaction_holder *trans, *tmp; 304 + struct watch_adapter *watch, *tmp_watch; 305 + struct read_buffer *rb, *tmp_rb; 306 + 307 + u = container_of(kref, struct xenbus_file_priv, kref); 308 + 309 + /* 310 + * No need for locking here because there are no other users, 311 + * by definition. 312 + */ 313 + 314 + list_for_each_entry_safe(trans, tmp, &u->transactions, list) { 315 + xenbus_transaction_end(trans->handle, 1); 316 + list_del(&trans->list); 317 + kfree(trans); 318 + } 319 + 320 + list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { 321 + unregister_xenbus_watch(&watch->watch); 322 + list_del(&watch->list); 323 + free_watch_adapter(watch); 324 + } 325 + 326 + list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) { 327 + list_del(&rb->list); 328 + kfree(rb); 329 + } 330 + kfree(u); 331 + } 332 + 333 + static struct xenbus_transaction_holder *xenbus_get_transaction( 334 + struct xenbus_file_priv *u, uint32_t tx_id) 335 + { 336 + struct xenbus_transaction_holder *trans; 337 + 338 + list_for_each_entry(trans, &u->transactions, list) 339 + if (trans->handle.id == tx_id) 340 + return trans; 341 + 342 + return NULL; 343 + } 344 + 345 + void xenbus_dev_queue_reply(struct xb_req_data *req) 346 + { 347 + struct xenbus_file_priv *u = req->par; 348 + struct xenbus_transaction_holder *trans = NULL; 349 + int rc; 350 + LIST_HEAD(staging_q); 351 + 352 + xs_request_exit(req); 353 + 354 + mutex_lock(&u->msgbuffer_mutex); 355 + 356 + if (req->type == XS_TRANSACTION_START) { 357 + trans = xenbus_get_transaction(u, 0); 358 + if (WARN_ON(!trans)) 359 + goto out; 360 + if (req->msg.type == XS_ERROR) { 361 + list_del(&trans->list); 362 + kfree(trans); 363 + } else { 364 + rc = kstrtou32(req->body, 10, &trans->handle.id); 365 + if (WARN_ON(rc)) 366 + goto out; 367 + } 368 + } else if (req->msg.type == XS_TRANSACTION_END) { 369 + trans = xenbus_get_transaction(u, req->msg.tx_id); 370 + if (WARN_ON(!trans)) 371 + goto out; 372 + list_del(&trans->list); 373 + kfree(trans); 374 + } 375 + 376 + mutex_unlock(&u->msgbuffer_mutex); 377 + 378 + mutex_lock(&u->reply_mutex); 379 + rc = queue_reply(&staging_q, &req->msg, sizeof(req->msg)); 380 + if (!rc) 381 + rc = queue_reply(&staging_q, req->body, req->msg.len); 382 + if (!rc) { 383 + list_splice_tail(&staging_q, &u->read_buffers); 384 + wake_up(&u->read_waitq); 385 + } else { 386 + queue_cleanup(&staging_q); 387 + } 388 + mutex_unlock(&u->reply_mutex); 389 + 390 + kfree(req->body); 391 + kfree(req); 392 + 393 + kref_put(&u->kref, xenbus_file_free); 394 + 395 + return; 396 + 397 + out: 398 + mutex_unlock(&u->msgbuffer_mutex); 399 + } 400 + 301 401 static int xenbus_command_reply(struct xenbus_file_priv *u, 302 402 unsigned int msg_type, const char *reply) 303 403 { ··· 419 317 wake_up(&u->read_waitq); 420 318 mutex_unlock(&u->reply_mutex); 421 319 320 + if (!rc) 321 + kref_put(&u->kref, xenbus_file_free); 322 + 422 323 return rc; 423 324 } 424 325 ··· 429 324 struct xenbus_file_priv *u) 430 325 { 431 326 int rc; 432 - void *reply; 433 327 struct xenbus_transaction_holder *trans = NULL; 434 - LIST_HEAD(staging_q); 435 328 436 329 if (msg_type == XS_TRANSACTION_START) { 437 - trans = kmalloc(sizeof(*trans), GFP_KERNEL); 330 + trans = kzalloc(sizeof(*trans), GFP_KERNEL); 438 331 if (!trans) { 439 332 rc = -ENOMEM; 440 333 goto out; 441 334 } 442 - } else if (u->u.msg.tx_id != 0) { 443 - list_for_each_entry(trans, &u->transactions, list) 444 - if (trans->handle.id == u->u.msg.tx_id) 445 - break; 446 - if (&trans->list == &u->transactions) 447 - return xenbus_command_reply(u, XS_ERROR, "ENOENT"); 448 - } 335 + list_add(&trans->list, &u->transactions); 336 + } else if (u->u.msg.tx_id != 0 && 337 + !xenbus_get_transaction(u, u->u.msg.tx_id)) 338 + return xenbus_command_reply(u, XS_ERROR, "ENOENT"); 449 339 450 - reply = xenbus_dev_request_and_reply(&u->u.msg); 451 - if (IS_ERR(reply)) { 452 - if (msg_type == XS_TRANSACTION_START) 453 - kfree(trans); 454 - rc = PTR_ERR(reply); 455 - goto out; 456 - } 457 - 458 - if (msg_type == XS_TRANSACTION_START) { 459 - if (u->u.msg.type == XS_ERROR) 460 - kfree(trans); 461 - else { 462 - trans->handle.id = simple_strtoul(reply, NULL, 0); 463 - list_add(&trans->list, &u->transactions); 464 - } 465 - } else if (u->u.msg.type == XS_TRANSACTION_END) { 466 - list_del(&trans->list); 340 + rc = xenbus_dev_request_and_reply(&u->u.msg, u); 341 + if (rc) 467 342 kfree(trans); 468 - } 469 - 470 - mutex_lock(&u->reply_mutex); 471 - rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg)); 472 - if (!rc) 473 - rc = queue_reply(&staging_q, reply, u->u.msg.len); 474 - if (!rc) { 475 - list_splice_tail(&staging_q, &u->read_buffers); 476 - wake_up(&u->read_waitq); 477 - } else { 478 - queue_cleanup(&staging_q); 479 - } 480 - mutex_unlock(&u->reply_mutex); 481 - 482 - kfree(reply); 483 343 484 344 out: 485 345 return rc; ··· 576 506 * OK, now we have a complete message. Do something with it. 577 507 */ 578 508 509 + kref_get(&u->kref); 510 + 579 511 msg_type = u->u.msg.type; 580 512 581 513 switch (msg_type) { ··· 592 520 ret = xenbus_write_transaction(msg_type, u); 593 521 break; 594 522 } 595 - if (ret != 0) 523 + if (ret != 0) { 596 524 rc = ret; 525 + kref_put(&u->kref, xenbus_file_free); 526 + } 597 527 598 528 /* Buffered message consumed */ 599 529 u->len = 0; ··· 620 546 if (u == NULL) 621 547 return -ENOMEM; 622 548 549 + kref_init(&u->kref); 550 + 623 551 INIT_LIST_HEAD(&u->transactions); 624 552 INIT_LIST_HEAD(&u->watches); 625 553 INIT_LIST_HEAD(&u->read_buffers); ··· 638 562 static int xenbus_file_release(struct inode *inode, struct file *filp) 639 563 { 640 564 struct xenbus_file_priv *u = filp->private_data; 641 - struct xenbus_transaction_holder *trans, *tmp; 642 - struct watch_adapter *watch, *tmp_watch; 643 - struct read_buffer *rb, *tmp_rb; 644 565 645 - /* 646 - * No need for locking here because there are no other users, 647 - * by definition. 648 - */ 649 - 650 - list_for_each_entry_safe(trans, tmp, &u->transactions, list) { 651 - xenbus_transaction_end(trans->handle, 1); 652 - list_del(&trans->list); 653 - kfree(trans); 654 - } 655 - 656 - list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { 657 - unregister_xenbus_watch(&watch->watch); 658 - list_del(&watch->list); 659 - free_watch_adapter(watch); 660 - } 661 - 662 - list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) { 663 - list_del(&rb->list); 664 - kfree(rb); 665 - } 666 - kfree(u); 566 + kref_put(&u->kref, xenbus_file_free); 667 567 668 568 return 0; 669 569 }
+240 -294
drivers/xen/xenbus/xenbus_xs.c
··· 43 43 #include <linux/slab.h> 44 44 #include <linux/fcntl.h> 45 45 #include <linux/kthread.h> 46 + #include <linux/reboot.h> 46 47 #include <linux/rwsem.h> 47 48 #include <linux/mutex.h> 48 49 #include <asm/xen/hypervisor.h> ··· 51 50 #include <xen/xen.h> 52 51 #include "xenbus.h" 53 52 54 - struct xs_stored_msg { 55 - struct list_head list; 53 + /* 54 + * Framework to protect suspend/resume handling against normal Xenstore 55 + * message handling: 56 + * During suspend/resume there must be no open transaction and no pending 57 + * Xenstore request. 58 + * New watch events happening in this time can be ignored by firing all watches 59 + * after resume. 60 + */ 56 61 57 - struct xsd_sockmsg hdr; 62 + /* Lock protecting enter/exit critical region. */ 63 + static DEFINE_SPINLOCK(xs_state_lock); 64 + /* Number of users in critical region (protected by xs_state_lock). */ 65 + static unsigned int xs_state_users; 66 + /* Suspend handler waiting or already active (protected by xs_state_lock)? */ 67 + static int xs_suspend_active; 68 + /* Unique Xenstore request id (protected by xs_state_lock). */ 69 + static uint32_t xs_request_id; 58 70 59 - union { 60 - /* Queued replies. */ 61 - struct { 62 - char *body; 63 - } reply; 64 - 65 - /* Queued watch events. */ 66 - struct { 67 - struct xenbus_watch *handle; 68 - const char *path; 69 - const char *token; 70 - } watch; 71 - } u; 72 - }; 73 - 74 - struct xs_handle { 75 - /* A list of replies. Currently only one will ever be outstanding. */ 76 - struct list_head reply_list; 77 - spinlock_t reply_lock; 78 - wait_queue_head_t reply_waitq; 79 - 80 - /* 81 - * Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex. 82 - * response_mutex is never taken simultaneously with the other three. 83 - * 84 - * transaction_mutex must be held before incrementing 85 - * transaction_count. The mutex is held when a suspend is in 86 - * progress to prevent new transactions starting. 87 - * 88 - * When decrementing transaction_count to zero the wait queue 89 - * should be woken up, the suspend code waits for count to 90 - * reach zero. 91 - */ 92 - 93 - /* One request at a time. */ 94 - struct mutex request_mutex; 95 - 96 - /* Protect xenbus reader thread against save/restore. */ 97 - struct mutex response_mutex; 98 - 99 - /* Protect transactions against save/restore. */ 100 - struct mutex transaction_mutex; 101 - atomic_t transaction_count; 102 - wait_queue_head_t transaction_wq; 103 - 104 - /* Protect watch (de)register against save/restore. */ 105 - struct rw_semaphore watch_mutex; 106 - }; 107 - 108 - static struct xs_handle xs_state; 71 + /* Wait queue for all callers waiting for critical region to become usable. */ 72 + static DECLARE_WAIT_QUEUE_HEAD(xs_state_enter_wq); 73 + /* Wait queue for suspend handling waiting for critical region being empty. */ 74 + static DECLARE_WAIT_QUEUE_HEAD(xs_state_exit_wq); 109 75 110 76 /* List of registered watches, and a lock to protect it. */ 111 77 static LIST_HEAD(watches); ··· 81 113 /* List of pending watch callback events, and a lock to protect it. */ 82 114 static LIST_HEAD(watch_events); 83 115 static DEFINE_SPINLOCK(watch_events_lock); 116 + 117 + /* Protect watch (de)register against save/restore. */ 118 + static DECLARE_RWSEM(xs_watch_rwsem); 84 119 85 120 /* 86 121 * Details of the xenwatch callback kernel thread. The thread waits on the ··· 94 123 static pid_t xenwatch_pid; 95 124 static DEFINE_MUTEX(xenwatch_mutex); 96 125 static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq); 126 + 127 + static void xs_suspend_enter(void) 128 + { 129 + spin_lock(&xs_state_lock); 130 + xs_suspend_active++; 131 + spin_unlock(&xs_state_lock); 132 + wait_event(xs_state_exit_wq, xs_state_users == 0); 133 + } 134 + 135 + static void xs_suspend_exit(void) 136 + { 137 + spin_lock(&xs_state_lock); 138 + xs_suspend_active--; 139 + spin_unlock(&xs_state_lock); 140 + wake_up_all(&xs_state_enter_wq); 141 + } 142 + 143 + static uint32_t xs_request_enter(struct xb_req_data *req) 144 + { 145 + uint32_t rq_id; 146 + 147 + req->type = req->msg.type; 148 + 149 + spin_lock(&xs_state_lock); 150 + 151 + while (!xs_state_users && xs_suspend_active) { 152 + spin_unlock(&xs_state_lock); 153 + wait_event(xs_state_enter_wq, xs_suspend_active == 0); 154 + spin_lock(&xs_state_lock); 155 + } 156 + 157 + if (req->type == XS_TRANSACTION_START) 158 + xs_state_users++; 159 + xs_state_users++; 160 + rq_id = xs_request_id++; 161 + 162 + spin_unlock(&xs_state_lock); 163 + 164 + return rq_id; 165 + } 166 + 167 + void xs_request_exit(struct xb_req_data *req) 168 + { 169 + spin_lock(&xs_state_lock); 170 + xs_state_users--; 171 + if ((req->type == XS_TRANSACTION_START && req->msg.type == XS_ERROR) || 172 + req->type == XS_TRANSACTION_END) 173 + xs_state_users--; 174 + spin_unlock(&xs_state_lock); 175 + 176 + if (xs_suspend_active && !xs_state_users) 177 + wake_up(&xs_state_exit_wq); 178 + } 97 179 98 180 static int get_error(const char *errorstring) 99 181 { ··· 185 161 } 186 162 return false; 187 163 } 188 - static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len) 164 + 165 + static bool test_reply(struct xb_req_data *req) 189 166 { 190 - struct xs_stored_msg *msg; 191 - char *body; 167 + if (req->state == xb_req_state_got_reply || !xenbus_ok()) 168 + return true; 192 169 193 - spin_lock(&xs_state.reply_lock); 170 + /* Make sure to reread req->state each time. */ 171 + barrier(); 194 172 195 - while (list_empty(&xs_state.reply_list)) { 196 - spin_unlock(&xs_state.reply_lock); 197 - if (xenbus_ok()) 198 - /* XXX FIXME: Avoid synchronous wait for response here. */ 199 - wait_event_timeout(xs_state.reply_waitq, 200 - !list_empty(&xs_state.reply_list), 201 - msecs_to_jiffies(500)); 202 - else { 173 + return false; 174 + } 175 + 176 + static void *read_reply(struct xb_req_data *req) 177 + { 178 + while (req->state != xb_req_state_got_reply) { 179 + wait_event(req->wq, test_reply(req)); 180 + 181 + if (!xenbus_ok()) 203 182 /* 204 183 * If we are in the process of being shut-down there is 205 184 * no point of trying to contact XenBus - it is either ··· 210 183 * has been killed or is unreachable. 211 184 */ 212 185 return ERR_PTR(-EIO); 213 - } 214 - spin_lock(&xs_state.reply_lock); 186 + if (req->err) 187 + return ERR_PTR(req->err); 188 + 215 189 } 216 190 217 - msg = list_entry(xs_state.reply_list.next, 218 - struct xs_stored_msg, list); 219 - list_del(&msg->list); 220 - 221 - spin_unlock(&xs_state.reply_lock); 222 - 223 - *type = msg->hdr.type; 224 - if (len) 225 - *len = msg->hdr.len; 226 - body = msg->u.reply.body; 227 - 228 - kfree(msg); 229 - 230 - return body; 191 + return req->body; 231 192 } 232 193 233 - static void transaction_start(void) 194 + static void xs_send(struct xb_req_data *req, struct xsd_sockmsg *msg) 234 195 { 235 - mutex_lock(&xs_state.transaction_mutex); 236 - atomic_inc(&xs_state.transaction_count); 237 - mutex_unlock(&xs_state.transaction_mutex); 196 + bool notify; 197 + 198 + req->msg = *msg; 199 + req->err = 0; 200 + req->state = xb_req_state_queued; 201 + init_waitqueue_head(&req->wq); 202 + 203 + req->msg.req_id = xs_request_enter(req); 204 + 205 + mutex_lock(&xb_write_mutex); 206 + list_add_tail(&req->list, &xb_write_list); 207 + notify = list_is_singular(&xb_write_list); 208 + mutex_unlock(&xb_write_mutex); 209 + 210 + if (notify) 211 + wake_up(&xb_waitq); 238 212 } 239 213 240 - static void transaction_end(void) 241 - { 242 - if (atomic_dec_and_test(&xs_state.transaction_count)) 243 - wake_up(&xs_state.transaction_wq); 244 - } 245 - 246 - static void transaction_suspend(void) 247 - { 248 - mutex_lock(&xs_state.transaction_mutex); 249 - wait_event(xs_state.transaction_wq, 250 - atomic_read(&xs_state.transaction_count) == 0); 251 - } 252 - 253 - static void transaction_resume(void) 254 - { 255 - mutex_unlock(&xs_state.transaction_mutex); 256 - } 257 - 258 - void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) 214 + static void *xs_wait_for_reply(struct xb_req_data *req, struct xsd_sockmsg *msg) 259 215 { 260 216 void *ret; 261 - enum xsd_sockmsg_type type = msg->type; 262 - int err; 263 217 264 - if (type == XS_TRANSACTION_START) 265 - transaction_start(); 218 + ret = read_reply(req); 266 219 267 - mutex_lock(&xs_state.request_mutex); 220 + xs_request_exit(req); 268 221 269 - err = xb_write(msg, sizeof(*msg) + msg->len); 270 - if (err) { 271 - msg->type = XS_ERROR; 272 - ret = ERR_PTR(err); 273 - } else 274 - ret = read_reply(&msg->type, &msg->len); 222 + msg->type = req->msg.type; 223 + msg->len = req->msg.len; 275 224 276 - mutex_unlock(&xs_state.request_mutex); 277 - 278 - if ((msg->type == XS_TRANSACTION_END) || 279 - ((type == XS_TRANSACTION_START) && (msg->type == XS_ERROR))) 280 - transaction_end(); 225 + mutex_lock(&xb_write_mutex); 226 + if (req->state == xb_req_state_queued || 227 + req->state == xb_req_state_wait_reply) 228 + req->state = xb_req_state_aborted; 229 + else 230 + kfree(req); 231 + mutex_unlock(&xb_write_mutex); 281 232 282 233 return ret; 234 + } 235 + 236 + static void xs_wake_up(struct xb_req_data *req) 237 + { 238 + wake_up(&req->wq); 239 + } 240 + 241 + int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par) 242 + { 243 + struct xb_req_data *req; 244 + struct kvec *vec; 245 + 246 + req = kmalloc(sizeof(*req) + sizeof(*vec), GFP_KERNEL); 247 + if (!req) 248 + return -ENOMEM; 249 + 250 + vec = (struct kvec *)(req + 1); 251 + vec->iov_len = msg->len; 252 + vec->iov_base = msg + 1; 253 + 254 + req->vec = vec; 255 + req->num_vecs = 1; 256 + req->cb = xenbus_dev_queue_reply; 257 + req->par = par; 258 + 259 + xs_send(req, msg); 260 + 261 + return 0; 283 262 } 284 263 EXPORT_SYMBOL(xenbus_dev_request_and_reply); 285 264 ··· 296 263 unsigned int num_vecs, 297 264 unsigned int *len) 298 265 { 266 + struct xb_req_data *req; 299 267 struct xsd_sockmsg msg; 300 268 void *ret = NULL; 301 269 unsigned int i; 302 270 int err; 303 271 272 + req = kmalloc(sizeof(*req), GFP_NOIO | __GFP_HIGH); 273 + if (!req) 274 + return ERR_PTR(-ENOMEM); 275 + 276 + req->vec = iovec; 277 + req->num_vecs = num_vecs; 278 + req->cb = xs_wake_up; 279 + 304 280 msg.tx_id = t.id; 305 - msg.req_id = 0; 306 281 msg.type = type; 307 282 msg.len = 0; 308 283 for (i = 0; i < num_vecs; i++) 309 284 msg.len += iovec[i].iov_len; 310 285 311 - mutex_lock(&xs_state.request_mutex); 286 + xs_send(req, &msg); 312 287 313 - err = xb_write(&msg, sizeof(msg)); 314 - if (err) { 315 - mutex_unlock(&xs_state.request_mutex); 316 - return ERR_PTR(err); 317 - } 318 - 319 - for (i = 0; i < num_vecs; i++) { 320 - err = xb_write(iovec[i].iov_base, iovec[i].iov_len); 321 - if (err) { 322 - mutex_unlock(&xs_state.request_mutex); 323 - return ERR_PTR(err); 324 - } 325 - } 326 - 327 - ret = read_reply(&msg.type, len); 328 - 329 - mutex_unlock(&xs_state.request_mutex); 288 + ret = xs_wait_for_reply(req, &msg); 289 + if (len) 290 + *len = msg.len; 330 291 331 292 if (IS_ERR(ret)) 332 293 return ret; ··· 527 500 { 528 501 char *id_str; 529 502 530 - transaction_start(); 531 - 532 503 id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL); 533 - if (IS_ERR(id_str)) { 534 - transaction_end(); 504 + if (IS_ERR(id_str)) 535 505 return PTR_ERR(id_str); 536 - } 537 506 538 507 t->id = simple_strtoul(id_str, NULL, 0); 539 508 kfree(id_str); ··· 543 520 int xenbus_transaction_end(struct xenbus_transaction t, int abort) 544 521 { 545 522 char abortstr[2]; 546 - int err; 547 523 548 524 if (abort) 549 525 strcpy(abortstr, "F"); 550 526 else 551 527 strcpy(abortstr, "T"); 552 528 553 - err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL)); 554 - 555 - transaction_end(); 556 - 557 - return err; 529 + return xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL)); 558 530 } 559 531 EXPORT_SYMBOL_GPL(xenbus_transaction_end); 560 532 ··· 682 664 683 665 return NULL; 684 666 } 667 + 668 + int xs_watch_msg(struct xs_watch_event *event) 669 + { 670 + if (count_strings(event->body, event->len) != 2) { 671 + kfree(event); 672 + return -EINVAL; 673 + } 674 + event->path = (const char *)event->body; 675 + event->token = (const char *)strchr(event->body, '\0') + 1; 676 + 677 + spin_lock(&watches_lock); 678 + event->handle = find_watch(event->token); 679 + if (event->handle != NULL) { 680 + spin_lock(&watch_events_lock); 681 + list_add_tail(&event->list, &watch_events); 682 + wake_up(&watch_events_waitq); 683 + spin_unlock(&watch_events_lock); 684 + } else 685 + kfree(event); 686 + spin_unlock(&watches_lock); 687 + 688 + return 0; 689 + } 690 + 685 691 /* 686 692 * Certain older XenBus toolstack cannot handle reading values that are 687 693 * not populated. Some Xen 3.4 installation are incapable of doing this ··· 754 712 755 713 sprintf(token, "%lX", (long)watch); 756 714 757 - down_read(&xs_state.watch_mutex); 715 + down_read(&xs_watch_rwsem); 758 716 759 717 spin_lock(&watches_lock); 760 718 BUG_ON(find_watch(token)); ··· 769 727 spin_unlock(&watches_lock); 770 728 } 771 729 772 - up_read(&xs_state.watch_mutex); 730 + up_read(&xs_watch_rwsem); 773 731 774 732 return err; 775 733 } ··· 777 735 778 736 void unregister_xenbus_watch(struct xenbus_watch *watch) 779 737 { 780 - struct xs_stored_msg *msg, *tmp; 738 + struct xs_watch_event *event, *tmp; 781 739 char token[sizeof(watch) * 2 + 1]; 782 740 int err; 783 741 784 742 sprintf(token, "%lX", (long)watch); 785 743 786 - down_read(&xs_state.watch_mutex); 744 + down_read(&xs_watch_rwsem); 787 745 788 746 spin_lock(&watches_lock); 789 747 BUG_ON(!find_watch(token)); ··· 794 752 if (err) 795 753 pr_warn("Failed to release watch %s: %i\n", watch->node, err); 796 754 797 - up_read(&xs_state.watch_mutex); 755 + up_read(&xs_watch_rwsem); 798 756 799 757 /* Make sure there are no callbacks running currently (unless 800 758 its us) */ ··· 803 761 804 762 /* Cancel pending watch events. */ 805 763 spin_lock(&watch_events_lock); 806 - list_for_each_entry_safe(msg, tmp, &watch_events, list) { 807 - if (msg->u.watch.handle != watch) 764 + list_for_each_entry_safe(event, tmp, &watch_events, list) { 765 + if (event->handle != watch) 808 766 continue; 809 - list_del(&msg->list); 810 - kfree(msg->u.watch.path); 811 - kfree(msg); 767 + list_del(&event->list); 768 + kfree(event); 812 769 } 813 770 spin_unlock(&watch_events_lock); 814 771 ··· 818 777 819 778 void xs_suspend(void) 820 779 { 821 - transaction_suspend(); 822 - down_write(&xs_state.watch_mutex); 823 - mutex_lock(&xs_state.request_mutex); 824 - mutex_lock(&xs_state.response_mutex); 780 + xs_suspend_enter(); 781 + 782 + down_write(&xs_watch_rwsem); 783 + mutex_lock(&xs_response_mutex); 825 784 } 826 785 827 786 void xs_resume(void) ··· 831 790 832 791 xb_init_comms(); 833 792 834 - mutex_unlock(&xs_state.response_mutex); 835 - mutex_unlock(&xs_state.request_mutex); 836 - transaction_resume(); 793 + mutex_unlock(&xs_response_mutex); 837 794 838 - /* No need for watches_lock: the watch_mutex is sufficient. */ 795 + xs_suspend_exit(); 796 + 797 + /* No need for watches_lock: the xs_watch_rwsem is sufficient. */ 839 798 list_for_each_entry(watch, &watches, list) { 840 799 sprintf(token, "%lX", (long)watch); 841 800 xs_watch(watch->node, token); 842 801 } 843 802 844 - up_write(&xs_state.watch_mutex); 803 + up_write(&xs_watch_rwsem); 845 804 } 846 805 847 806 void xs_suspend_cancel(void) 848 807 { 849 - mutex_unlock(&xs_state.response_mutex); 850 - mutex_unlock(&xs_state.request_mutex); 851 - up_write(&xs_state.watch_mutex); 852 - mutex_unlock(&xs_state.transaction_mutex); 808 + mutex_unlock(&xs_response_mutex); 809 + up_write(&xs_watch_rwsem); 810 + 811 + xs_suspend_exit(); 853 812 } 854 813 855 814 static int xenwatch_thread(void *unused) 856 815 { 857 816 struct list_head *ent; 858 - struct xs_stored_msg *msg; 817 + struct xs_watch_event *event; 859 818 860 819 for (;;) { 861 820 wait_event_interruptible(watch_events_waitq, ··· 873 832 spin_unlock(&watch_events_lock); 874 833 875 834 if (ent != &watch_events) { 876 - msg = list_entry(ent, struct xs_stored_msg, list); 877 - msg->u.watch.handle->callback(msg->u.watch.handle, 878 - msg->u.watch.path, 879 - msg->u.watch.token); 880 - kfree(msg->u.watch.path); 881 - kfree(msg); 835 + event = list_entry(ent, struct xs_watch_event, list); 836 + event->handle->callback(event->handle, event->path, 837 + event->token); 838 + kfree(event); 882 839 } 883 840 884 841 mutex_unlock(&xenwatch_mutex); ··· 885 846 return 0; 886 847 } 887 848 888 - static int process_msg(void) 849 + /* 850 + * Wake up all threads waiting for a xenstore reply. In case of shutdown all 851 + * pending replies will be marked as "aborted" in order to let the waiters 852 + * return in spite of xenstore possibly no longer being able to reply. This 853 + * will avoid blocking shutdown by a thread waiting for xenstore but being 854 + * necessary for shutdown processing to proceed. 855 + */ 856 + static int xs_reboot_notify(struct notifier_block *nb, 857 + unsigned long code, void *unused) 889 858 { 890 - struct xs_stored_msg *msg; 891 - char *body; 892 - int err; 859 + struct xb_req_data *req; 893 860 894 - /* 895 - * We must disallow save/restore while reading a xenstore message. 896 - * A partial read across s/r leaves us out of sync with xenstored. 897 - */ 898 - for (;;) { 899 - err = xb_wait_for_data_to_read(); 900 - if (err) 901 - return err; 902 - mutex_lock(&xs_state.response_mutex); 903 - if (xb_data_to_read()) 904 - break; 905 - /* We raced with save/restore: pending data 'disappeared'. */ 906 - mutex_unlock(&xs_state.response_mutex); 907 - } 908 - 909 - 910 - msg = kmalloc(sizeof(*msg), GFP_NOIO | __GFP_HIGH); 911 - if (msg == NULL) { 912 - err = -ENOMEM; 913 - goto out; 914 - } 915 - 916 - err = xb_read(&msg->hdr, sizeof(msg->hdr)); 917 - if (err) { 918 - kfree(msg); 919 - goto out; 920 - } 921 - 922 - if (msg->hdr.len > XENSTORE_PAYLOAD_MAX) { 923 - kfree(msg); 924 - err = -EINVAL; 925 - goto out; 926 - } 927 - 928 - body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH); 929 - if (body == NULL) { 930 - kfree(msg); 931 - err = -ENOMEM; 932 - goto out; 933 - } 934 - 935 - err = xb_read(body, msg->hdr.len); 936 - if (err) { 937 - kfree(body); 938 - kfree(msg); 939 - goto out; 940 - } 941 - body[msg->hdr.len] = '\0'; 942 - 943 - if (msg->hdr.type == XS_WATCH_EVENT) { 944 - if (count_strings(body, msg->hdr.len) != 2) { 945 - err = -EINVAL; 946 - kfree(msg); 947 - kfree(body); 948 - goto out; 949 - } 950 - msg->u.watch.path = (const char *)body; 951 - msg->u.watch.token = (const char *)strchr(body, '\0') + 1; 952 - 953 - spin_lock(&watches_lock); 954 - msg->u.watch.handle = find_watch(msg->u.watch.token); 955 - if (msg->u.watch.handle != NULL) { 956 - spin_lock(&watch_events_lock); 957 - list_add_tail(&msg->list, &watch_events); 958 - wake_up(&watch_events_waitq); 959 - spin_unlock(&watch_events_lock); 960 - } else { 961 - kfree(body); 962 - kfree(msg); 963 - } 964 - spin_unlock(&watches_lock); 965 - } else { 966 - msg->u.reply.body = body; 967 - spin_lock(&xs_state.reply_lock); 968 - list_add_tail(&msg->list, &xs_state.reply_list); 969 - spin_unlock(&xs_state.reply_lock); 970 - wake_up(&xs_state.reply_waitq); 971 - } 972 - 973 - out: 974 - mutex_unlock(&xs_state.response_mutex); 975 - return err; 861 + mutex_lock(&xb_write_mutex); 862 + list_for_each_entry(req, &xs_reply_list, list) 863 + wake_up(&req->wq); 864 + list_for_each_entry(req, &xb_write_list, list) 865 + wake_up(&req->wq); 866 + mutex_unlock(&xb_write_mutex); 867 + return NOTIFY_DONE; 976 868 } 977 869 978 - static int xenbus_thread(void *unused) 979 - { 980 - int err; 981 - 982 - for (;;) { 983 - err = process_msg(); 984 - if (err) 985 - pr_warn("error %d while reading message\n", err); 986 - if (kthread_should_stop()) 987 - break; 988 - } 989 - 990 - return 0; 991 - } 870 + static struct notifier_block xs_reboot_nb = { 871 + .notifier_call = xs_reboot_notify, 872 + }; 992 873 993 874 int xs_init(void) 994 875 { 995 876 int err; 996 877 struct task_struct *task; 997 878 998 - INIT_LIST_HEAD(&xs_state.reply_list); 999 - spin_lock_init(&xs_state.reply_lock); 1000 - init_waitqueue_head(&xs_state.reply_waitq); 1001 - 1002 - mutex_init(&xs_state.request_mutex); 1003 - mutex_init(&xs_state.response_mutex); 1004 - mutex_init(&xs_state.transaction_mutex); 1005 - init_rwsem(&xs_state.watch_mutex); 1006 - atomic_set(&xs_state.transaction_count, 0); 1007 - init_waitqueue_head(&xs_state.transaction_wq); 879 + register_reboot_notifier(&xs_reboot_nb); 1008 880 1009 881 /* Initialize the shared memory rings to talk to xenstored */ 1010 882 err = xb_init_comms(); ··· 926 976 if (IS_ERR(task)) 927 977 return PTR_ERR(task); 928 978 xenwatch_pid = task->pid; 929 - 930 - task = kthread_run(xenbus_thread, NULL, "xenbus"); 931 - if (IS_ERR(task)) 932 - return PTR_ERR(task); 933 979 934 980 /* shutdown watches for kexec boot */ 935 981 xs_reset_watches();