commit fa4b851b4ad632dc673627f38a8a552547568a2c · tjh.dev/kernel

+7 -1

include/linux/trace_seq.h

··· 9 9 /* 10 10 * Trace sequences are used to allow a function to call several other functions 11 11 * to create a string of data to use. 12 + * 13 + * Have the trace seq to be 8K which is typically PAGE_SIZE * 2 on 14 + * most architectures. The TRACE_SEQ_BUFFER_SIZE (which is 15 + * TRACE_SEQ_SIZE minus the other fields of trace_seq), is the 16 + * max size the output of a trace event may be. 12 17 */ 13 18 14 - #define TRACE_SEQ_BUFFER_SIZE (PAGE_SIZE * 2 - \ 19 + #define TRACE_SEQ_SIZE 8192 20 + #define TRACE_SEQ_BUFFER_SIZE (TRACE_SEQ_SIZE - \ 15 21 (sizeof(struct seq_buf) + sizeof(size_t) + sizeof(int))) 16 22 17 23 struct trace_seq {

+91 -78

kernel/trace/ring_buffer.c

··· 384 384 struct irq_work work; 385 385 wait_queue_head_t waiters; 386 386 wait_queue_head_t full_waiters; 387 - long wait_index; 388 387 bool waiters_pending; 389 388 bool full_waiters_pending; 390 389 bool wakeup_full; ··· 755 756 756 757 wake_up_all(&rbwork->waiters); 757 758 if (rbwork->full_waiters_pending || rbwork->wakeup_full) { 759 + /* Only cpu_buffer sets the above flags */ 760 + struct ring_buffer_per_cpu *cpu_buffer = 761 + container_of(rbwork, struct ring_buffer_per_cpu, irq_work); 762 + 763 + /* Called from interrupt context */ 764 + raw_spin_lock(&cpu_buffer->reader_lock); 758 765 rbwork->wakeup_full = false; 759 766 rbwork->full_waiters_pending = false; 767 + 768 + /* Waking up all waiters, they will reset the shortest full */ 769 + cpu_buffer->shortest_full = 0; 770 + raw_spin_unlock(&cpu_buffer->reader_lock); 771 + 760 772 wake_up_all(&rbwork->full_waiters); 761 773 } 762 774 } ··· 808 798 rbwork = &cpu_buffer->irq_work; 809 799 } 810 800 811 - rbwork->wait_index++; 812 - /* make sure the waiters see the new index */ 813 - smp_wmb(); 814 - 815 801 /* This can be called in any context */ 816 802 irq_work_queue(&rbwork->work); 803 + } 804 + 805 + static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full) 806 + { 807 + struct ring_buffer_per_cpu *cpu_buffer; 808 + bool ret = false; 809 + 810 + /* Reads of all CPUs always waits for any data */ 811 + if (cpu == RING_BUFFER_ALL_CPUS) 812 + return !ring_buffer_empty(buffer); 813 + 814 + cpu_buffer = buffer->buffers[cpu]; 815 + 816 + if (!ring_buffer_empty_cpu(buffer, cpu)) { 817 + unsigned long flags; 818 + bool pagebusy; 819 + 820 + if (!full) 821 + return true; 822 + 823 + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 824 + pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; 825 + ret = !pagebusy && full_hit(buffer, cpu, full); 826 + 827 + if (!cpu_buffer->shortest_full || 828 + cpu_buffer->shortest_full > full) 829 + cpu_buffer->shortest_full = full; 830 + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 831 + } 832 + return ret; 817 833 } 818 834 819 835 /** ··· 857 821 struct ring_buffer_per_cpu *cpu_buffer; 858 822 DEFINE_WAIT(wait); 859 823 struct rb_irq_work *work; 860 - long wait_index; 861 824 int ret = 0; 862 825 863 826 /* ··· 875 840 work = &cpu_buffer->irq_work; 876 841 } 877 842 878 - wait_index = READ_ONCE(work->wait_index); 843 + if (full) 844 + prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE); 845 + else 846 + prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); 879 847 880 - while (true) { 881 - if (full) 882 - prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE); 883 - else 884 - prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); 848 + /* 849 + * The events can happen in critical sections where 850 + * checking a work queue can cause deadlocks. 851 + * After adding a task to the queue, this flag is set 852 + * only to notify events to try to wake up the queue 853 + * using irq_work. 854 + * 855 + * We don't clear it even if the buffer is no longer 856 + * empty. The flag only causes the next event to run 857 + * irq_work to do the work queue wake up. The worse 858 + * that can happen if we race with !trace_empty() is that 859 + * an event will cause an irq_work to try to wake up 860 + * an empty queue. 861 + * 862 + * There's no reason to protect this flag either, as 863 + * the work queue and irq_work logic will do the necessary 864 + * synchronization for the wake ups. The only thing 865 + * that is necessary is that the wake up happens after 866 + * a task has been queued. It's OK for spurious wake ups. 867 + */ 868 + if (full) 869 + work->full_waiters_pending = true; 870 + else 871 + work->waiters_pending = true; 885 872 886 - /* 887 - * The events can happen in critical sections where 888 - * checking a work queue can cause deadlocks. 889 - * After adding a task to the queue, this flag is set 890 - * only to notify events to try to wake up the queue 891 - * using irq_work. 892 - * 893 - * We don't clear it even if the buffer is no longer 894 - * empty. The flag only causes the next event to run 895 - * irq_work to do the work queue wake up. The worse 896 - * that can happen if we race with !trace_empty() is that 897 - * an event will cause an irq_work to try to wake up 898 - * an empty queue. 899 - * 900 - * There's no reason to protect this flag either, as 901 - * the work queue and irq_work logic will do the necessary 902 - * synchronization for the wake ups. The only thing 903 - * that is necessary is that the wake up happens after 904 - * a task has been queued. It's OK for spurious wake ups. 905 - */ 906 - if (full) 907 - work->full_waiters_pending = true; 908 - else 909 - work->waiters_pending = true; 873 + if (rb_watermark_hit(buffer, cpu, full)) 874 + goto out; 910 875 911 - if (signal_pending(current)) { 912 - ret = -EINTR; 913 - break; 914 - } 915 - 916 - if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) 917 - break; 918 - 919 - if (cpu != RING_BUFFER_ALL_CPUS && 920 - !ring_buffer_empty_cpu(buffer, cpu)) { 921 - unsigned long flags; 922 - bool pagebusy; 923 - bool done; 924 - 925 - if (!full) 926 - break; 927 - 928 - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 929 - pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; 930 - done = !pagebusy && full_hit(buffer, cpu, full); 931 - 932 - if (!cpu_buffer->shortest_full || 933 - cpu_buffer->shortest_full > full) 934 - cpu_buffer->shortest_full = full; 935 - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 936 - if (done) 937 - break; 938 - } 939 - 940 - schedule(); 941 - 942 - /* Make sure to see the new wait index */ 943 - smp_rmb(); 944 - if (wait_index != work->wait_index) 945 - break; 876 + if (signal_pending(current)) { 877 + ret = -EINTR; 878 + goto out; 946 879 } 947 880 881 + schedule(); 882 + out: 948 883 if (full) 949 884 finish_wait(&work->full_waiters, &wait); 950 885 else 951 886 finish_wait(&work->waiters, &wait); 887 + 888 + if (!ret && !rb_watermark_hit(buffer, cpu, full) && signal_pending(current)) 889 + ret = -EINTR; 952 890 953 891 return ret; 954 892 } ··· 945 937 struct file *filp, poll_table *poll_table, int full) 946 938 { 947 939 struct ring_buffer_per_cpu *cpu_buffer; 948 - struct rb_irq_work *work; 940 + struct rb_irq_work *rbwork; 949 941 950 942 if (cpu == RING_BUFFER_ALL_CPUS) { 951 - work = &buffer->irq_work; 943 + rbwork = &buffer->irq_work; 952 944 full = 0; 953 945 } else { 954 946 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 955 947 return EPOLLERR; 956 948 957 949 cpu_buffer = buffer->buffers[cpu]; 958 - work = &cpu_buffer->irq_work; 950 + rbwork = &cpu_buffer->irq_work; 959 951 } 960 952 961 953 if (full) { 962 - poll_wait(filp, &work->full_waiters, poll_table); 963 - work->full_waiters_pending = true; 954 + unsigned long flags; 955 + 956 + poll_wait(filp, &rbwork->full_waiters, poll_table); 957 + 958 + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 959 + rbwork->full_waiters_pending = true; 964 960 if (!cpu_buffer->shortest_full || 965 961 cpu_buffer->shortest_full > full) 966 962 cpu_buffer->shortest_full = full; 963 + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 967 964 } else { 968 - poll_wait(filp, &work->waiters, poll_table); 969 - work->waiters_pending = true; 965 + poll_wait(filp, &rbwork->waiters, poll_table); 966 + rbwork->waiters_pending = true; 970 967 } 971 968 972 969 /*

+20 -11

kernel/trace/trace.c

··· 7293 7293 return 0; 7294 7294 } 7295 7295 7296 + #define TRACE_MARKER_MAX_SIZE 4096 7297 + 7296 7298 static ssize_t 7297 7299 tracing_mark_write(struct file *filp, const char __user *ubuf, 7298 7300 size_t cnt, loff_t *fpos) ··· 7322 7320 if ((ssize_t)cnt < 0) 7323 7321 return -EINVAL; 7324 7322 7323 + if (cnt > TRACE_MARKER_MAX_SIZE) 7324 + cnt = TRACE_MARKER_MAX_SIZE; 7325 + 7325 7326 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */ 7326 7327 again: 7327 7328 size = cnt + meta_size; ··· 7332 7327 /* If less than "<faulted>", then make sure we can still add that */ 7333 7328 if (cnt < FAULTED_SIZE) 7334 7329 size += FAULTED_SIZE - cnt; 7335 - 7336 - if (size > TRACE_SEQ_BUFFER_SIZE) { 7337 - cnt -= size - TRACE_SEQ_BUFFER_SIZE; 7338 - goto again; 7339 - } 7340 7330 7341 7331 buffer = tr->array_buffer.buffer; 7342 7332 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, ··· 8393 8393 return size; 8394 8394 } 8395 8395 8396 + static int tracing_buffers_flush(struct file *file, fl_owner_t id) 8397 + { 8398 + struct ftrace_buffer_info *info = file->private_data; 8399 + struct trace_iterator *iter = &info->iter; 8400 + 8401 + iter->wait_index++; 8402 + /* Make sure the waiters see the new wait_index */ 8403 + smp_wmb(); 8404 + 8405 + ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 8406 + 8407 + return 0; 8408 + } 8409 + 8396 8410 static int tracing_buffers_release(struct inode *inode, struct file *file) 8397 8411 { 8398 8412 struct ftrace_buffer_info *info = file->private_data; ··· 8417 8403 iter->tr->trace_ref--; 8418 8404 8419 8405 __trace_array_put(iter->tr); 8420 - 8421 - iter->wait_index++; 8422 - /* Make sure the waiters see the new wait_index */ 8423 - smp_wmb(); 8424 - 8425 - ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 8426 8406 8427 8407 if (info->spare) 8428 8408 ring_buffer_free_read_page(iter->array_buffer->buffer, ··· 8633 8625 .read = tracing_buffers_read, 8634 8626 .poll = tracing_buffers_poll, 8635 8627 .release = tracing_buffers_release, 8628 + .flush = tracing_buffers_flush, 8636 8629 .splice_read = tracing_buffers_splice_read, 8637 8630 .unlocked_ioctl = tracing_buffers_ioctl, 8638 8631 .llseek = no_llseek,

+2 -4

kernel/trace/trace_output.c

··· 1587 1587 { 1588 1588 struct print_entry *field; 1589 1589 struct trace_seq *s = &iter->seq; 1590 - int max = iter->ent_size - offsetof(struct print_entry, buf); 1591 1590 1592 1591 trace_assign_type(field, iter->ent); 1593 1592 1594 1593 seq_print_ip_sym(s, field->ip, flags); 1595 - trace_seq_printf(s, ": %.*s", max, field->buf); 1594 + trace_seq_printf(s, ": %s", field->buf); 1596 1595 1597 1596 return trace_handle_return(s); 1598 1597 } ··· 1600 1601 struct trace_event *event) 1601 1602 { 1602 1603 struct print_entry *field; 1603 - int max = iter->ent_size - offsetof(struct print_entry, buf); 1604 1604 1605 1605 trace_assign_type(field, iter->ent); 1606 1606 1607 - trace_seq_printf(&iter->seq, "# %lx %.*s", field->ip, max, field->buf); 1607 + trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf); 1608 1608 1609 1609 return trace_handle_return(&iter->seq); 1610 1610 }