Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

um: time-travel: rework interrupt handling in ext mode

In external time-travel mode, where time is controlled via the
controller application socket, interrupt handling is a little
tricky. For example on virtio, the following happens:
* we receive a message (that requires an ACK) on the vhost-user socket
* we add a time-travel event to handle the interrupt
(this causes communication on the time socket)
* we ACK the original vhost-user message
* we then handle the interrupt once the event is triggered

This protocol ensures that the sender of the interrupt only continues
to run in the simulation when the time-travel event has been added.

So far, this was only done in the virtio driver, but it was actually
wrong, because only virtqueue interrupts were handled this way, and
config change interrupts were handled immediately. Additionally, the
messages were actually handled in the real Linux interrupt handler,
but Linux interrupt handlers are part of the simulation and shouldn't
run while there's no time event.

To really do this properly and only handle all kinds of interrupts in
the time-travel event when we are scheduled to run in the simulation,
rework this to plug in to the lower interrupt layers in UML directly:

Add a um_request_irq_tt() function that let's a time-travel aware
driver request an interrupt with an additional timetravel_handler()
that is called outside of the context of the simulation, to handle
the message only. It then adds an event to the time-travel calendar
if necessary, and no "real" Linux code runs outside of the time
simulation.

This also hooks in with suspend/resume properly now, since this new
timetravel_handler() can run while Linux is suspended and interrupts
are disabled, and decide to wake up (or not) the system based on the
message it received. Importantly in this case, it ACKs the message
before the system even resumes and interrupts are re-enabled, thus
allowing the simulation to progress properly.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Richard Weinberger <richard@nod.at>

authored by

Johannes Berg and committed by
Richard Weinberger
c8177aba 9b84512c

+267 -69
+48 -44
arch/um/drivers/virtio_uml.c
··· 55 55 u64 protocol_features; 56 56 u8 status; 57 57 u8 registered:1; 58 + 59 + u8 config_changed_irq:1; 60 + uint64_t vq_irq_vq_map; 58 61 }; 59 62 60 63 struct virtio_uml_vq_info { 61 64 int kick_fd, call_fd; 62 65 char name[32]; 63 - #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 64 - struct virtqueue *vq; 65 - vq_callback_t *callback; 66 - struct time_travel_event defer; 67 - #endif 68 66 bool suspended; 69 67 }; 70 68 ··· 349 351 rc, size); 350 352 } 351 353 352 - static irqreturn_t vu_req_interrupt(int irq, void *data) 354 + static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev, 355 + struct time_travel_event *ev) 353 356 { 354 - struct virtio_uml_device *vu_dev = data; 355 357 struct virtqueue *vq; 356 358 int response = 1; 357 359 struct { ··· 369 371 370 372 switch (msg.msg.header.request) { 371 373 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG: 372 - virtio_config_changed(&vu_dev->vdev); 374 + vu_dev->config_changed_irq = true; 373 375 response = 0; 374 376 break; 375 377 case VHOST_USER_SLAVE_VRING_CALL: 376 378 virtio_device_for_each_vq((&vu_dev->vdev), vq) { 377 379 if (vq->index == msg.msg.payload.vring_state.index) { 378 380 response = 0; 379 - vring_interrupt(0 /* ignored */, vq); 381 + vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index); 380 382 break; 381 383 } 382 384 } ··· 390 392 msg.msg.header.request); 391 393 } 392 394 395 + if (ev) 396 + time_travel_add_irq_event(ev); 397 + 393 398 if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY) 394 399 vhost_user_reply(vu_dev, &msg.msg, response); 395 400 396 401 return IRQ_HANDLED; 402 + } 403 + 404 + static irqreturn_t vu_req_interrupt(int irq, void *data) 405 + { 406 + struct virtio_uml_device *vu_dev = data; 407 + irqreturn_t ret = IRQ_HANDLED; 408 + 409 + if (!um_irq_timetravel_handler_used()) 410 + ret = vu_req_read_message(vu_dev, NULL); 411 + 412 + if (vu_dev->vq_irq_vq_map) { 413 + struct virtqueue *vq; 414 + 415 + virtio_device_for_each_vq((&vu_dev->vdev), vq) { 416 + if (vu_dev->vq_irq_vq_map & BIT_ULL(vq->index)) 417 + vring_interrupt(0 /* ignored */, vq); 418 + } 419 + vu_dev->vq_irq_vq_map = 0; 420 + } else if (vu_dev->config_changed_irq) { 421 + virtio_config_changed(&vu_dev->vdev); 422 + vu_dev->config_changed_irq = false; 423 + } 424 + 425 + return ret; 426 + } 427 + 428 + static void vu_req_interrupt_comm_handler(int irq, int fd, void *data, 429 + struct time_travel_event *ev) 430 + { 431 + vu_req_read_message(data, ev); 397 432 } 398 433 399 434 static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev) ··· 439 408 return rc; 440 409 vu_dev->req_fd = req_fds[0]; 441 410 442 - rc = um_request_irq(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ, 443 - vu_req_interrupt, IRQF_SHARED, 444 - vu_dev->pdev->name, vu_dev); 411 + rc = um_request_irq_tt(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ, 412 + vu_req_interrupt, IRQF_SHARED, 413 + vu_dev->pdev->name, vu_dev, 414 + vu_req_interrupt_comm_handler); 445 415 if (rc < 0) 446 416 goto err_close; 447 417 ··· 914 882 return rc; 915 883 } 916 884 917 - #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 918 - static void vu_defer_irq_handle(struct time_travel_event *d) 919 - { 920 - struct virtio_uml_vq_info *info; 921 - 922 - info = container_of(d, struct virtio_uml_vq_info, defer); 923 - info->callback(info->vq); 924 - } 925 - 926 - static void vu_defer_irq_callback(struct virtqueue *vq) 927 - { 928 - struct virtio_uml_vq_info *info = vq->priv; 929 - 930 - time_travel_add_irq_event(&info->defer); 931 - } 932 - #endif 933 - 934 885 static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, 935 886 unsigned index, vq_callback_t *callback, 936 887 const char *name, bool ctx) ··· 933 918 snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name, 934 919 pdev->id, name); 935 920 936 - #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 937 - /* 938 - * When we get an interrupt, we must bounce it through the simulation 939 - * calendar (the time-travel=ext:... socket). 940 - */ 941 - if (time_travel_mode == TT_MODE_EXTERNAL && callback) { 942 - info->callback = callback; 943 - callback = vu_defer_irq_callback; 944 - time_travel_set_event_fn(&info->defer, vu_defer_irq_handle); 945 - } 946 - #endif 947 - 948 921 vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true, 949 922 ctx, vu_notify, callback, info->name); 950 923 if (!vq) { ··· 941 938 } 942 939 vq->priv = info; 943 940 num = virtqueue_get_vring_size(vq); 944 - #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 945 - info->vq = vq; 946 - #endif 947 941 948 942 if (vu_dev->protocol_features & 949 943 BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) { ··· 998 998 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 999 999 int i, queue_idx = 0, rc; 1000 1000 struct virtqueue *vq; 1001 + 1002 + /* not supported for now */ 1003 + if (WARN_ON(nvqs > 64)) 1004 + return -EINVAL; 1001 1005 1002 1006 rc = vhost_user_set_mem_table(vu_dev); 1003 1007 if (rc)
+6
arch/um/include/linux/time-internal.h
··· 7 7 #ifndef __TIMER_INTERNAL_H__ 8 8 #define __TIMER_INTERNAL_H__ 9 9 #include <linux/list.h> 10 + #include <asm/bug.h> 10 11 11 12 #define TIMER_MULTIPLIER 256 12 13 #define TIMER_MIN_DELTA 500 ··· 74 73 75 74 static inline void time_travel_wait_readable(int fd) 76 75 { 76 + } 77 + 78 + static inline void time_travel_add_irq_event(struct time_travel_event *e) 79 + { 80 + WARN_ON(1); 77 81 } 78 82 #endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ 79 83
+60
arch/um/include/shared/irq_kern.h
··· 7 7 #define __IRQ_KERN_H__ 8 8 9 9 #include <linux/interrupt.h> 10 + #include <linux/time-internal.h> 10 11 #include <asm/ptrace.h> 11 12 #include "irq_user.h" 12 13 ··· 16 15 int um_request_irq(int irq, int fd, enum um_irq_type type, 17 16 irq_handler_t handler, unsigned long irqflags, 18 17 const char *devname, void *dev_id); 18 + 19 + #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 20 + /** 21 + * um_request_irq_tt - request an IRQ with timetravel handler 22 + * 23 + * @irq: the IRQ number, or %UM_IRQ_ALLOC 24 + * @fd: The file descriptor to request an IRQ for 25 + * @type: read or write 26 + * @handler: the (generic style) IRQ handler 27 + * @irqflags: Linux IRQ flags 28 + * @devname: name for this to show 29 + * @dev_id: data pointer to pass to the IRQ handler 30 + * @timetravel_handler: the timetravel interrupt handler, invoked with the IRQ 31 + * number, fd, dev_id and time-travel event pointer. 32 + * 33 + * Returns: The interrupt number assigned or a negative error. 34 + * 35 + * Note that the timetravel handler is invoked only if the time_travel_mode is 36 + * %TT_MODE_EXTERNAL, and then it is invoked even while the system is suspended! 37 + * This function must call time_travel_add_irq_event() for the event passed with 38 + * an appropriate delay, before sending an ACK on the socket it was invoked for. 39 + * 40 + * If this was called while the system is suspended, then adding the event will 41 + * cause the system to resume. 42 + * 43 + * Since this function will almost certainly have to handle the FD's condition, 44 + * a read will consume the message, and after that it is up to the code using 45 + * it to pass such a message to the @handler in whichever way it can. 46 + * 47 + * If time_travel_mode is not %TT_MODE_EXTERNAL the @timetravel_handler will 48 + * not be invoked at all and the @handler must handle the FD becoming 49 + * readable (or writable) instead. Use um_irq_timetravel_handler_used() to 50 + * distinguish these cases. 51 + * 52 + * See virtio_uml.c for an example. 53 + */ 54 + int um_request_irq_tt(int irq, int fd, enum um_irq_type type, 55 + irq_handler_t handler, unsigned long irqflags, 56 + const char *devname, void *dev_id, 57 + void (*timetravel_handler)(int, int, void *, 58 + struct time_travel_event *)); 59 + #else 60 + static inline 61 + int um_request_irq_tt(int irq, int fd, enum um_irq_type type, 62 + irq_handler_t handler, unsigned long irqflags, 63 + const char *devname, void *dev_id, 64 + void (*timetravel_handler)(int, int, void *, 65 + struct time_travel_event *)) 66 + { 67 + return um_request_irq(irq, fd, type, handler, irqflags, 68 + devname, dev_id); 69 + } 70 + #endif 71 + 72 + static inline bool um_irq_timetravel_handler_used(void) 73 + { 74 + return time_travel_mode == TT_MODE_EXTERNAL; 75 + } 76 + 19 77 void um_free_irq(int irq, void *dev_id); 20 78 #endif
+146 -25
arch/um/kernel/irq.c
··· 20 20 #include <os.h> 21 21 #include <irq_user.h> 22 22 #include <irq_kern.h> 23 - #include <as-layout.h> 23 + #include <linux/time-internal.h> 24 24 25 25 26 26 extern void free_irqs(void); ··· 38 38 bool active; 39 39 bool pending; 40 40 bool wakeup; 41 + #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 42 + bool pending_on_resume; 43 + void (*timetravel_handler)(int, int, void *, 44 + struct time_travel_event *); 45 + struct time_travel_event event; 46 + #endif 41 47 }; 42 48 43 49 struct irq_entry { ··· 57 51 static DEFINE_SPINLOCK(irq_lock); 58 52 static LIST_HEAD(active_fds); 59 53 static DECLARE_BITMAP(irqs_allocated, NR_IRQS); 54 + static bool irqs_suspended; 60 55 61 56 static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs) 62 57 { ··· 81 74 } 82 75 } 83 76 84 - void sigio_handler_suspend(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) 77 + #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 78 + static void irq_event_handler(struct time_travel_event *ev) 85 79 { 86 - /* nothing */ 80 + struct irq_reg *reg = container_of(ev, struct irq_reg, event); 81 + 82 + /* do nothing if suspended - just to cause a wakeup */ 83 + if (irqs_suspended) 84 + return; 85 + 86 + generic_handle_irq(reg->irq); 87 + } 88 + 89 + static bool irq_do_timetravel_handler(struct irq_entry *entry, 90 + enum um_irq_type t) 91 + { 92 + struct irq_reg *reg = &entry->reg[t]; 93 + 94 + if (!reg->timetravel_handler) 95 + return false; 96 + 97 + /* prevent nesting - we'll get it again later when we SIGIO ourselves */ 98 + if (reg->pending_on_resume) 99 + return true; 100 + 101 + reg->timetravel_handler(reg->irq, entry->fd, reg->id, &reg->event); 102 + 103 + if (!reg->event.pending) 104 + return false; 105 + 106 + if (irqs_suspended) 107 + reg->pending_on_resume = true; 108 + return true; 109 + } 110 + #else 111 + static bool irq_do_timetravel_handler(struct irq_entry *entry, 112 + enum um_irq_type t) 113 + { 114 + return false; 115 + } 116 + #endif 117 + 118 + static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t, 119 + struct uml_pt_regs *regs) 120 + { 121 + struct irq_reg *reg = &entry->reg[t]; 122 + 123 + if (!reg->events) 124 + return; 125 + 126 + if (os_epoll_triggered(idx, reg->events) <= 0) 127 + return; 128 + 129 + if (irq_do_timetravel_handler(entry, t)) 130 + return; 131 + 132 + if (irqs_suspended) 133 + return; 134 + 135 + irq_io_loop(reg, regs); 87 136 } 88 137 89 138 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) 90 139 { 91 140 struct irq_entry *irq_entry; 92 141 int n, i; 142 + 143 + if (irqs_suspended && !um_irq_timetravel_handler_used()) 144 + return; 93 145 94 146 while (1) { 95 147 /* This is now lockless - epoll keeps back-referencesto the irqs ··· 171 105 172 106 irq_entry = os_epoll_get_data_pointer(i); 173 107 174 - for (t = 0; t < NUM_IRQ_TYPES; t++) { 175 - int events = irq_entry->reg[t].events; 176 - 177 - if (!events) 178 - continue; 179 - 180 - if (os_epoll_triggered(i, events) > 0) 181 - irq_io_loop(&irq_entry->reg[t], regs); 182 - } 108 + for (t = 0; t < NUM_IRQ_TYPES; t++) 109 + sigio_reg_handler(i, irq_entry, t, regs); 183 110 } 184 111 } 185 112 186 - free_irqs(); 113 + if (!irqs_suspended) 114 + free_irqs(); 187 115 } 188 116 189 117 static struct irq_entry *get_irq_entry_by_fd(int fd) ··· 229 169 free_irq_entry(entry, false); 230 170 } 231 171 232 - static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id) 172 + static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id, 173 + void (*timetravel_handler)(int, int, void *, 174 + struct time_travel_event *)) 233 175 { 234 176 struct irq_entry *irq_entry; 235 177 int err, events = os_event_mask(type); ··· 267 205 irq_entry->reg[type].irq = irq; 268 206 irq_entry->reg[type].active = true; 269 207 irq_entry->reg[type].events = events; 208 + 209 + #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 210 + if (um_irq_timetravel_handler_used()) { 211 + irq_entry->reg[type].timetravel_handler = timetravel_handler; 212 + irq_entry->reg[type].event.fn = irq_event_handler; 213 + } 214 + #endif 270 215 271 216 WARN_ON(!update_irq_entry(irq_entry)); 272 217 spin_unlock_irqrestore(&irq_lock, flags); ··· 408 339 } 409 340 EXPORT_SYMBOL(um_free_irq); 410 341 411 - int um_request_irq(int irq, int fd, enum um_irq_type type, 412 - irq_handler_t handler, unsigned long irqflags, 413 - const char *devname, void *dev_id) 342 + static int 343 + _um_request_irq(int irq, int fd, enum um_irq_type type, 344 + irq_handler_t handler, unsigned long irqflags, 345 + const char *devname, void *dev_id, 346 + void (*timetravel_handler)(int, int, void *, 347 + struct time_travel_event *)) 414 348 { 415 349 int err; 416 350 ··· 432 360 return -ENOSPC; 433 361 434 362 if (fd != -1) { 435 - err = activate_fd(irq, fd, type, dev_id); 363 + err = activate_fd(irq, fd, type, dev_id, timetravel_handler); 436 364 if (err) 437 365 goto error; 438 366 } ··· 446 374 clear_bit(irq, irqs_allocated); 447 375 return err; 448 376 } 377 + 378 + int um_request_irq(int irq, int fd, enum um_irq_type type, 379 + irq_handler_t handler, unsigned long irqflags, 380 + const char *devname, void *dev_id) 381 + { 382 + return _um_request_irq(irq, fd, type, handler, irqflags, 383 + devname, dev_id, NULL); 384 + } 449 385 EXPORT_SYMBOL(um_request_irq); 386 + 387 + #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 388 + int um_request_irq_tt(int irq, int fd, enum um_irq_type type, 389 + irq_handler_t handler, unsigned long irqflags, 390 + const char *devname, void *dev_id, 391 + void (*timetravel_handler)(int, int, void *, 392 + struct time_travel_event *)) 393 + { 394 + return _um_request_irq(irq, fd, type, handler, irqflags, 395 + devname, dev_id, timetravel_handler); 396 + } 397 + EXPORT_SYMBOL(um_request_irq_tt); 398 + #endif 450 399 451 400 #ifdef CONFIG_PM_SLEEP 452 401 void um_irqs_suspend(void) ··· 475 382 struct irq_entry *entry; 476 383 unsigned long flags; 477 384 478 - sig_info[SIGIO] = sigio_handler_suspend; 385 + irqs_suspended = true; 479 386 480 387 spin_lock_irqsave(&irq_lock, flags); 481 388 list_for_each_entry(entry, &active_fds, list) { 482 389 enum um_irq_type t; 483 - bool wake = false; 390 + bool clear = true; 484 391 485 392 for (t = 0; t < NUM_IRQ_TYPES; t++) { 486 393 if (!entry->reg[t].events) ··· 493 400 * any FDs that should be suspended. 494 401 */ 495 402 if (entry->reg[t].wakeup || 496 - entry->reg[t].irq == SIGIO_WRITE_IRQ) { 497 - wake = true; 403 + entry->reg[t].irq == SIGIO_WRITE_IRQ 404 + #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 405 + || entry->reg[t].timetravel_handler 406 + #endif 407 + ) { 408 + clear = false; 498 409 break; 499 410 } 500 411 } 501 412 502 - if (!wake) { 413 + if (clear) { 503 414 entry->suspended = true; 504 415 os_clear_fd_async(entry->fd); 505 416 entry->sigio_workaround = ··· 518 421 struct irq_entry *entry; 519 422 unsigned long flags; 520 423 521 - spin_lock_irqsave(&irq_lock, flags); 424 + 425 + local_irq_save(flags); 426 + #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 427 + /* 428 + * We don't need to lock anything here since we're in resume 429 + * and nothing else is running, but have disabled IRQs so we 430 + * don't try anything else with the interrupt list from there. 431 + */ 432 + list_for_each_entry(entry, &active_fds, list) { 433 + enum um_irq_type t; 434 + 435 + for (t = 0; t < NUM_IRQ_TYPES; t++) { 436 + struct irq_reg *reg = &entry->reg[t]; 437 + 438 + if (reg->pending_on_resume) { 439 + irq_enter(); 440 + generic_handle_irq(reg->irq); 441 + irq_exit(); 442 + reg->pending_on_resume = false; 443 + } 444 + } 445 + } 446 + #endif 447 + 448 + spin_lock(&irq_lock); 522 449 list_for_each_entry(entry, &active_fds, list) { 523 450 if (entry->suspended) { 524 451 int err = os_set_fd_async(entry->fd); ··· 558 437 } 559 438 spin_unlock_irqrestore(&irq_lock, flags); 560 439 561 - sig_info[SIGIO] = sigio_handler; 440 + irqs_suspended = false; 562 441 send_sigio_to_self(); 563 442 } 564 443
+7
arch/um/kernel/time.c
··· 278 278 { 279 279 struct time_travel_event *tmp; 280 280 bool inserted = false; 281 + unsigned long flags; 281 282 282 283 if (e->pending) 283 284 return; ··· 286 285 e->pending = true; 287 286 e->time = time; 288 287 288 + local_irq_save(flags); 289 289 list_for_each_entry(tmp, &time_travel_events, list) { 290 290 /* 291 291 * Add the new entry before one with higher time, ··· 309 307 tmp = time_travel_first_event(); 310 308 time_travel_ext_update_request(tmp->time); 311 309 time_travel_next_event = tmp->time; 310 + local_irq_restore(flags); 312 311 } 313 312 314 313 static void time_travel_add_event(struct time_travel_event *e, ··· 386 383 387 384 static bool time_travel_del_event(struct time_travel_event *e) 388 385 { 386 + unsigned long flags; 387 + 389 388 if (!e->pending) 390 389 return false; 390 + local_irq_save(flags); 391 391 list_del(&e->list); 392 392 e->pending = false; 393 + local_irq_restore(flags); 393 394 return true; 394 395 } 395 396