Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tracing/ring-buffer: Move poll wake ups into ring buffer code

Move the logic to wake up on ring buffer data into the ring buffer
code itself. This simplifies the tracing code a lot and also has the
added benefit that waiters on one of the instance buffers can be woken
only when data is added to that instance instead of data added to
any instance.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

+164 -71
+6
include/linux/ring_buffer.h
··· 4 4 #include <linux/kmemcheck.h> 5 5 #include <linux/mm.h> 6 6 #include <linux/seq_file.h> 7 + #include <linux/poll.h> 7 8 8 9 struct ring_buffer; 9 10 struct ring_buffer_iter; ··· 96 95 static struct lock_class_key __key; \ 97 96 __ring_buffer_alloc((size), (flags), &__key); \ 98 97 }) 98 + 99 + void ring_buffer_wait(struct ring_buffer *buffer, int cpu); 100 + int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, 101 + struct file *filp, poll_table *poll_table); 102 + 99 103 100 104 #define RING_BUFFER_ALL_CPUS -1 101 105
+146
kernel/trace/ring_buffer.c
··· 8 8 #include <linux/trace_clock.h> 9 9 #include <linux/trace_seq.h> 10 10 #include <linux/spinlock.h> 11 + #include <linux/irq_work.h> 11 12 #include <linux/debugfs.h> 12 13 #include <linux/uaccess.h> 13 14 #include <linux/hardirq.h> ··· 443 442 return ret; 444 443 } 445 444 445 + struct rb_irq_work { 446 + struct irq_work work; 447 + wait_queue_head_t waiters; 448 + bool waiters_pending; 449 + }; 450 + 446 451 /* 447 452 * head_page == tail_page && head == tail then buffer is empty. 448 453 */ ··· 483 476 struct list_head new_pages; /* new pages to add */ 484 477 struct work_struct update_pages_work; 485 478 struct completion update_done; 479 + 480 + struct rb_irq_work irq_work; 486 481 }; 487 482 488 483 struct ring_buffer { ··· 504 495 struct notifier_block cpu_notify; 505 496 #endif 506 497 u64 (*clock)(void); 498 + 499 + struct rb_irq_work irq_work; 507 500 }; 508 501 509 502 struct ring_buffer_iter { ··· 516 505 unsigned long cache_read; 517 506 u64 read_stamp; 518 507 }; 508 + 509 + /* 510 + * rb_wake_up_waiters - wake up tasks waiting for ring buffer input 511 + * 512 + * Schedules a delayed work to wake up any task that is blocked on the 513 + * ring buffer waiters queue. 514 + */ 515 + static void rb_wake_up_waiters(struct irq_work *work) 516 + { 517 + struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work); 518 + 519 + wake_up_all(&rbwork->waiters); 520 + } 521 + 522 + /** 523 + * ring_buffer_wait - wait for input to the ring buffer 524 + * @buffer: buffer to wait on 525 + * @cpu: the cpu buffer to wait on 526 + * 527 + * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon 528 + * as data is added to any of the @buffer's cpu buffers. Otherwise 529 + * it will wait for data to be added to a specific cpu buffer. 530 + */ 531 + void ring_buffer_wait(struct ring_buffer *buffer, int cpu) 532 + { 533 + struct ring_buffer_per_cpu *cpu_buffer; 534 + DEFINE_WAIT(wait); 535 + struct rb_irq_work *work; 536 + 537 + /* 538 + * Depending on what the caller is waiting for, either any 539 + * data in any cpu buffer, or a specific buffer, put the 540 + * caller on the appropriate wait queue. 541 + */ 542 + if (cpu == RING_BUFFER_ALL_CPUS) 543 + work = &buffer->irq_work; 544 + else { 545 + cpu_buffer = buffer->buffers[cpu]; 546 + work = &cpu_buffer->irq_work; 547 + } 548 + 549 + 550 + prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); 551 + 552 + /* 553 + * The events can happen in critical sections where 554 + * checking a work queue can cause deadlocks. 555 + * After adding a task to the queue, this flag is set 556 + * only to notify events to try to wake up the queue 557 + * using irq_work. 558 + * 559 + * We don't clear it even if the buffer is no longer 560 + * empty. The flag only causes the next event to run 561 + * irq_work to do the work queue wake up. The worse 562 + * that can happen if we race with !trace_empty() is that 563 + * an event will cause an irq_work to try to wake up 564 + * an empty queue. 565 + * 566 + * There's no reason to protect this flag either, as 567 + * the work queue and irq_work logic will do the necessary 568 + * synchronization for the wake ups. The only thing 569 + * that is necessary is that the wake up happens after 570 + * a task has been queued. It's OK for spurious wake ups. 571 + */ 572 + work->waiters_pending = true; 573 + 574 + if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) || 575 + (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu))) 576 + schedule(); 577 + 578 + finish_wait(&work->waiters, &wait); 579 + } 580 + 581 + /** 582 + * ring_buffer_poll_wait - poll on buffer input 583 + * @buffer: buffer to wait on 584 + * @cpu: the cpu buffer to wait on 585 + * @filp: the file descriptor 586 + * @poll_table: The poll descriptor 587 + * 588 + * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon 589 + * as data is added to any of the @buffer's cpu buffers. Otherwise 590 + * it will wait for data to be added to a specific cpu buffer. 591 + * 592 + * Returns POLLIN | POLLRDNORM if data exists in the buffers, 593 + * zero otherwise. 594 + */ 595 + int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, 596 + struct file *filp, poll_table *poll_table) 597 + { 598 + struct ring_buffer_per_cpu *cpu_buffer; 599 + struct rb_irq_work *work; 600 + 601 + if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || 602 + (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) 603 + return POLLIN | POLLRDNORM; 604 + 605 + if (cpu == RING_BUFFER_ALL_CPUS) 606 + work = &buffer->irq_work; 607 + else { 608 + cpu_buffer = buffer->buffers[cpu]; 609 + work = &cpu_buffer->irq_work; 610 + } 611 + 612 + work->waiters_pending = true; 613 + poll_wait(filp, &work->waiters, poll_table); 614 + 615 + if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || 616 + (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) 617 + return POLLIN | POLLRDNORM; 618 + return 0; 619 + } 519 620 520 621 /* buffer may be either ring_buffer or ring_buffer_per_cpu */ 521 622 #define RB_WARN_ON(b, cond) \ ··· 1184 1061 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 1185 1062 INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler); 1186 1063 init_completion(&cpu_buffer->update_done); 1064 + init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters); 1187 1065 1188 1066 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1189 1067 GFP_KERNEL, cpu_to_node(cpu)); ··· 1279 1155 buffer->flags = flags; 1280 1156 buffer->clock = trace_clock_local; 1281 1157 buffer->reader_lock_key = key; 1158 + 1159 + init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters); 1282 1160 1283 1161 /* need at least two pages */ 1284 1162 if (nr_pages < 2) ··· 2736 2610 rb_end_commit(cpu_buffer); 2737 2611 } 2738 2612 2613 + static __always_inline void 2614 + rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) 2615 + { 2616 + if (buffer->irq_work.waiters_pending) { 2617 + buffer->irq_work.waiters_pending = false; 2618 + /* irq_work_queue() supplies it's own memory barriers */ 2619 + irq_work_queue(&buffer->irq_work.work); 2620 + } 2621 + 2622 + if (cpu_buffer->irq_work.waiters_pending) { 2623 + cpu_buffer->irq_work.waiters_pending = false; 2624 + /* irq_work_queue() supplies it's own memory barriers */ 2625 + irq_work_queue(&cpu_buffer->irq_work.work); 2626 + } 2627 + } 2628 + 2739 2629 /** 2740 2630 * ring_buffer_unlock_commit - commit a reserved 2741 2631 * @buffer: The buffer to commit to ··· 2770 2628 cpu_buffer = buffer->buffers[cpu]; 2771 2629 2772 2630 rb_commit(cpu_buffer, event); 2631 + 2632 + rb_wakeups(buffer, cpu_buffer); 2773 2633 2774 2634 trace_recursive_unlock(); 2775 2635 ··· 2944 2800 memcpy(body, data, length); 2945 2801 2946 2802 rb_commit(cpu_buffer, event); 2803 + 2804 + rb_wakeups(buffer, cpu_buffer); 2947 2805 2948 2806 ret = 0; 2949 2807 out:
+12 -71
kernel/trace/trace.c
··· 19 19 #include <linux/seq_file.h> 20 20 #include <linux/notifier.h> 21 21 #include <linux/irqflags.h> 22 - #include <linux/irq_work.h> 23 22 #include <linux/debugfs.h> 24 23 #include <linux/pagemap.h> 25 24 #include <linux/hardirq.h> ··· 84 85 * occurred. 85 86 */ 86 87 static DEFINE_PER_CPU(bool, trace_cmdline_save); 87 - 88 - /* 89 - * When a reader is waiting for data, then this variable is 90 - * set to true. 91 - */ 92 - static bool trace_wakeup_needed; 93 - 94 - static struct irq_work trace_work_wakeup; 95 88 96 89 /* 97 90 * Kill all tracing for good (never come back). ··· 325 334 326 335 #endif 327 336 328 - /* trace_wait is a waitqueue for tasks blocked on trace_poll */ 329 - static DECLARE_WAIT_QUEUE_HEAD(trace_wait); 330 - 331 337 /* trace_flags holds trace_options default values */ 332 338 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 333 339 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | 334 340 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | 335 341 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS; 336 - 337 - /** 338 - * trace_wake_up - wake up tasks waiting for trace input 339 - * 340 - * Schedules a delayed work to wake up any task that is blocked on the 341 - * trace_wait queue. These is used with trace_poll for tasks polling the 342 - * trace. 343 - */ 344 - static void trace_wake_up(struct irq_work *work) 345 - { 346 - wake_up_all(&trace_wait); 347 - 348 - } 349 342 350 343 /** 351 344 * tracing_on - enable tracing buffers ··· 738 763 739 764 static void default_wait_pipe(struct trace_iterator *iter) 740 765 { 741 - DEFINE_WAIT(wait); 766 + /* Iterators are static, they should be filled or empty */ 767 + if (trace_buffer_iter(iter, iter->cpu_file)) 768 + return; 742 769 743 - prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); 744 - 745 - /* 746 - * The events can happen in critical sections where 747 - * checking a work queue can cause deadlocks. 748 - * After adding a task to the queue, this flag is set 749 - * only to notify events to try to wake up the queue 750 - * using irq_work. 751 - * 752 - * We don't clear it even if the buffer is no longer 753 - * empty. The flag only causes the next event to run 754 - * irq_work to do the work queue wake up. The worse 755 - * that can happen if we race with !trace_empty() is that 756 - * an event will cause an irq_work to try to wake up 757 - * an empty queue. 758 - * 759 - * There's no reason to protect this flag either, as 760 - * the work queue and irq_work logic will do the necessary 761 - * synchronization for the wake ups. The only thing 762 - * that is necessary is that the wake up happens after 763 - * a task has been queued. It's OK for spurious wake ups. 764 - */ 765 - trace_wakeup_needed = true; 766 - 767 - if (trace_empty(iter)) 768 - schedule(); 769 - 770 - finish_wait(&trace_wait, &wait); 770 + ring_buffer_wait(iter->tr->buffer, iter->cpu_file); 771 771 } 772 772 773 773 /** ··· 1212 1262 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) 1213 1263 { 1214 1264 __this_cpu_write(trace_cmdline_save, true); 1215 - if (trace_wakeup_needed) { 1216 - trace_wakeup_needed = false; 1217 - /* irq_work_queue() supplies it's own memory barriers */ 1218 - irq_work_queue(&trace_work_wakeup); 1219 - } 1220 1265 ring_buffer_unlock_commit(buffer, event); 1221 1266 } 1222 1267 ··· 3502 3557 static unsigned int 3503 3558 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table) 3504 3559 { 3505 - if (trace_flags & TRACE_ITER_BLOCK) { 3560 + /* Iterators are static, they should be filled or empty */ 3561 + if (trace_buffer_iter(iter, iter->cpu_file)) 3562 + return POLLIN | POLLRDNORM; 3563 + 3564 + if (trace_flags & TRACE_ITER_BLOCK) 3506 3565 /* 3507 3566 * Always select as readable when in blocking mode 3508 3567 */ 3509 3568 return POLLIN | POLLRDNORM; 3510 - } else { 3511 - if (!trace_empty(iter)) 3512 - return POLLIN | POLLRDNORM; 3513 - trace_wakeup_needed = true; 3514 - poll_wait(filp, &trace_wait, poll_table); 3515 - if (!trace_empty(iter)) 3516 - return POLLIN | POLLRDNORM; 3517 - 3518 - return 0; 3519 - } 3569 + else 3570 + return ring_buffer_poll_wait(iter->tr->buffer, iter->cpu_file, 3571 + filp, poll_table); 3520 3572 } 3521 3573 3522 3574 static unsigned int ··· 5643 5701 #endif 5644 5702 5645 5703 trace_init_cmdlines(); 5646 - init_irq_work(&trace_work_wakeup, trace_wake_up); 5647 5704 5648 5705 register_tracer(&nop_trace); 5649 5706