Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'trace-v6.3-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull tracing fixes from Steven Rostedt:

- Fix timerlat notification, as it was not triggering the notify to
users when a new max latency was hit.

- Do not trigger max latency if the tracing is off.

When tracing is off, the ring buffer is not updated, it does not make
sense to notify when there's a new max latency detected by the
tracer, as why that latency happened is not available. The tracing
logic still runs when the ring buffer is disabled, but it should not
be triggering notifications.

- Fix race on freeing the synthetic event "last_cmd" variable by adding
a mutex around it.

- Fix race between reader and writer of the ring buffer by adding
memory barriers. When the writer is still on the reader page it must
have its content visible on the buffer before it moves the commit
index that the reader uses to know how much content is on the page.

- Make get_lock_parent_ip() always inlined, as it uses _THIS_IP_ and
_RET_IP_, which gets broken if it is not inlined.

- Make __field(int, arr[5]) in a TRACE_EVENT() macro fail to build.

The field formats of trace events are calculated by using
sizeof(type) and other means by what is passed into the structure
macros like __field(). The __field() macro is only meant for atom
types like int, long, short, pointer, etc. It is not meant for
arrays.

The code will currently compile with arrays, but then the format
produced will be inaccurate, and user space parsing tools will break.

Two bugs have already been fixed, now add code that will make the
kernel fail to build if another trace event includes this buggy field
format.

- Fix boot up snapshot code:

Boot snapshots were triggering when not even asked for on the kernel
command line. This was caused by two bugs:

1) It would trigger a snapshot on any instance if one was created
from the kernel command line.

2) The error handling would only affect the top level instance.
So the fact that a snapshot was done on a instance that didn't
allocate a buffer triggered a warning written into the top level
buffer, and worse yet, disabled the top level buffer.

- Fix memory leak that was caused when an error was logged in a trace
buffer instance, and then the buffer instance was removed.

The allocated error log messages still needed to be freed.

* tag 'trace-v6.3-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
tracing: Free error logs of tracing instances
tracing: Fix ftrace_boot_snapshot command line logic
tracing: Have tracing_snapshot_instance_cond() write errors to the appropriate instance
tracing: Error if a trace event has an array for a __field()
tracing/osnoise: Fix notify new tracing_max_latency
tracing/timerlat: Notify new max thread latency
ftrace: Mark get_lock_parent_ip() __always_inline
ring-buffer: Fix race while reader and writer are on the same page
tracing/synthetic: Fix races on freeing last_cmd

+64 -25
+1 -1
include/linux/ftrace.h
··· 980 980 #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5)) 981 981 #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6)) 982 982 983 - static inline unsigned long get_lock_parent_ip(void) 983 + static __always_inline unsigned long get_lock_parent_ip(void) 984 984 { 985 985 unsigned long addr = CALLER_ADDR0; 986 986
+17 -4
include/trace/stages/stage5_get_offsets.h
··· 9 9 #undef __entry 10 10 #define __entry entry 11 11 12 + /* 13 + * Fields should never declare an array: i.e. __field(int, arr[5]) 14 + * If they do, it will cause issues in parsing and possibly corrupt the 15 + * events. To prevent that from happening, test the sizeof() a fictitious 16 + * type called "struct _test_no_array_##item" which will fail if "item" 17 + * contains array elements (like "arr[5]"). 18 + * 19 + * If you hit this, use __array(int, arr, 5) instead. 20 + */ 12 21 #undef __field 13 - #define __field(type, item) 22 + #define __field(type, item) \ 23 + { (void)sizeof(struct _test_no_array_##item *); } 14 24 15 25 #undef __field_ext 16 - #define __field_ext(type, item, filter_type) 26 + #define __field_ext(type, item, filter_type) \ 27 + { (void)sizeof(struct _test_no_array_##item *); } 17 28 18 29 #undef __field_struct 19 - #define __field_struct(type, item) 30 + #define __field_struct(type, item) \ 31 + { (void)sizeof(struct _test_no_array_##item *); } 20 32 21 33 #undef __field_struct_ext 22 - #define __field_struct_ext(type, item, filter_type) 34 + #define __field_struct_ext(type, item, filter_type) \ 35 + { (void)sizeof(struct _test_no_array_##item *); } 23 36 24 37 #undef __array 25 38 #define __array(type, item, len)
+12 -1
kernel/trace/ring_buffer.c
··· 3098 3098 if (RB_WARN_ON(cpu_buffer, 3099 3099 rb_is_reader_page(cpu_buffer->tail_page))) 3100 3100 return; 3101 + /* 3102 + * No need for a memory barrier here, as the update 3103 + * of the tail_page did it for this page. 3104 + */ 3101 3105 local_set(&cpu_buffer->commit_page->page->commit, 3102 3106 rb_page_write(cpu_buffer->commit_page)); 3103 3107 rb_inc_page(&cpu_buffer->commit_page); ··· 3111 3107 while (rb_commit_index(cpu_buffer) != 3112 3108 rb_page_write(cpu_buffer->commit_page)) { 3113 3109 3110 + /* Make sure the readers see the content of what is committed. */ 3111 + smp_wmb(); 3114 3112 local_set(&cpu_buffer->commit_page->page->commit, 3115 3113 rb_page_write(cpu_buffer->commit_page)); 3116 3114 RB_WARN_ON(cpu_buffer, ··· 4690 4684 4691 4685 /* 4692 4686 * Make sure we see any padding after the write update 4693 - * (see rb_reset_tail()) 4687 + * (see rb_reset_tail()). 4688 + * 4689 + * In addition, a writer may be writing on the reader page 4690 + * if the page has not been fully filled, so the read barrier 4691 + * is also needed to make sure we see the content of what is 4692 + * committed by the writer (see rb_set_commit_to_write()). 4694 4693 */ 4695 4694 smp_rmb(); 4696 4695
+15 -13
kernel/trace/trace.c
··· 1149 1149 unsigned long flags; 1150 1150 1151 1151 if (in_nmi()) { 1152 - internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); 1153 - internal_trace_puts("*** snapshot is being ignored ***\n"); 1152 + trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); 1153 + trace_array_puts(tr, "*** snapshot is being ignored ***\n"); 1154 1154 return; 1155 1155 } 1156 1156 1157 1157 if (!tr->allocated_snapshot) { 1158 - internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n"); 1159 - internal_trace_puts("*** stopping trace here! ***\n"); 1160 - tracing_off(); 1158 + trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n"); 1159 + trace_array_puts(tr, "*** stopping trace here! ***\n"); 1160 + tracer_tracing_off(tr); 1161 1161 return; 1162 1162 } 1163 1163 1164 1164 /* Note, snapshot can not be used when the tracer uses it */ 1165 1165 if (tracer->use_max_tr) { 1166 - internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n"); 1167 - internal_trace_puts("*** Can not use snapshot (sorry) ***\n"); 1166 + trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n"); 1167 + trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); 1168 1168 return; 1169 1169 } 1170 1170 ··· 9516 9516 tracefs_remove(tr->dir); 9517 9517 free_percpu(tr->last_func_repeats); 9518 9518 free_trace_buffers(tr); 9519 + clear_tracing_err_log(tr); 9519 9520 9520 9521 for (i = 0; i < tr->nr_topts; i++) { 9521 9522 kfree(tr->topts[i].topts); ··· 10394 10393 10395 10394 void __init ftrace_boot_snapshot(void) 10396 10395 { 10396 + #ifdef CONFIG_TRACER_MAX_TRACE 10397 10397 struct trace_array *tr; 10398 10398 10399 - if (snapshot_at_boot) { 10400 - tracing_snapshot(); 10401 - internal_trace_puts("** Boot snapshot taken **\n"); 10402 - } 10399 + if (!snapshot_at_boot) 10400 + return; 10403 10401 10404 10402 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10405 - if (tr == &global_trace) 10403 + if (!tr->allocated_snapshot) 10406 10404 continue; 10407 - trace_array_puts(tr, "** Boot snapshot taken **\n"); 10405 + 10408 10406 tracing_snapshot_instance(tr); 10407 + trace_array_puts(tr, "** Boot snapshot taken **\n"); 10409 10408 } 10409 + #endif 10410 10410 } 10411 10411 10412 10412 void __init early_trace_init(void)
+16 -5
kernel/trace/trace_events_synth.c
··· 44 44 45 45 static const char *err_text[] = { ERRORS }; 46 46 47 + DEFINE_MUTEX(lastcmd_mutex); 47 48 static char *last_cmd; 48 49 49 50 static int errpos(const char *str) 50 51 { 51 - if (!str || !last_cmd) 52 - return 0; 52 + int ret = 0; 53 53 54 - return err_pos(last_cmd, str); 54 + mutex_lock(&lastcmd_mutex); 55 + if (!str || !last_cmd) 56 + goto out; 57 + 58 + ret = err_pos(last_cmd, str); 59 + out: 60 + mutex_unlock(&lastcmd_mutex); 61 + return ret; 55 62 } 56 63 57 64 static void last_cmd_set(const char *str) ··· 66 59 if (!str) 67 60 return; 68 61 62 + mutex_lock(&lastcmd_mutex); 69 63 kfree(last_cmd); 70 - 71 64 last_cmd = kstrdup(str, GFP_KERNEL); 65 + mutex_unlock(&lastcmd_mutex); 72 66 } 73 67 74 68 static void synth_err(u8 err_type, u16 err_pos) 75 69 { 70 + mutex_lock(&lastcmd_mutex); 76 71 if (!last_cmd) 77 - return; 72 + goto out; 78 73 79 74 tracing_log_err(NULL, "synthetic_events", last_cmd, err_text, 80 75 err_type, err_pos); 76 + out: 77 + mutex_unlock(&lastcmd_mutex); 81 78 } 82 79 83 80 static int create_synth_event(const char *raw_command);
+3 -1
kernel/trace/trace_osnoise.c
··· 1296 1296 rcu_read_lock(); 1297 1297 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1298 1298 tr = inst->tr; 1299 - if (tr->max_latency < latency) { 1299 + if (tracer_tracing_is_on(tr) && tr->max_latency < latency) { 1300 1300 tr->max_latency = latency; 1301 1301 latency_fsnotify(tr); 1302 1302 } ··· 1737 1737 s.context = THREAD_CONTEXT; 1738 1738 1739 1739 trace_timerlat_sample(&s); 1740 + 1741 + notify_new_max_latency(diff); 1740 1742 1741 1743 timerlat_dump_stack(time_to_us(diff)); 1742 1744