Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tracing: Save off entry when peeking at next entry

In order to have the iterator read the buffer even when it's still updating,
it requires that the ring buffer iterator saves each event in a separate
location outside the ring buffer such that its use is immutable.

There's one use case that saves off the event returned from the ring buffer
interator and calls it again to look at the next event, before going back to
use the first event. As the ring buffer iterator will only have a single
copy, this use case will no longer be supported.

Instead, have the one use case create its own buffer to store the first
event when looking at the next event. This way, when looking at the first
event again, it wont be corrupted by the second read.

Link: http://lkml.kernel.org/r/20200317213415.722539921@goodmis.org

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>

+47 -10
+2
include/linux/trace_events.h
··· 85 85 struct mutex mutex; 86 86 struct ring_buffer_iter **buffer_iter; 87 87 unsigned long iter_flags; 88 + void *temp; /* temp holder */ 89 + unsigned int temp_size; 88 90 89 91 /* trace_seq for __print_flags() and __print_symbolic() etc. */ 90 92 struct trace_seq tmp_seq;
+39 -1
kernel/trace/trace.c
··· 3466 3466 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 3467 3467 int *ent_cpu, u64 *ent_ts) 3468 3468 { 3469 - return __find_next_entry(iter, ent_cpu, NULL, ent_ts); 3469 + /* __find_next_entry will reset ent_size */ 3470 + int ent_size = iter->ent_size; 3471 + struct trace_entry *entry; 3472 + 3473 + /* 3474 + * The __find_next_entry() may call peek_next_entry(), which may 3475 + * call ring_buffer_peek() that may make the contents of iter->ent 3476 + * undefined. Need to copy iter->ent now. 3477 + */ 3478 + if (iter->ent && iter->ent != iter->temp) { 3479 + if (!iter->temp || iter->temp_size < iter->ent_size) { 3480 + kfree(iter->temp); 3481 + iter->temp = kmalloc(iter->ent_size, GFP_KERNEL); 3482 + if (!iter->temp) 3483 + return NULL; 3484 + } 3485 + memcpy(iter->temp, iter->ent, iter->ent_size); 3486 + iter->temp_size = iter->ent_size; 3487 + iter->ent = iter->temp; 3488 + } 3489 + entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts); 3490 + /* Put back the original ent_size */ 3491 + iter->ent_size = ent_size; 3492 + 3493 + return entry; 3470 3494 } 3471 3495 3472 3496 /* Find the next real entry, and increment the iterator to the next entry */ ··· 4222 4198 goto release; 4223 4199 4224 4200 /* 4201 + * trace_find_next_entry() may need to save off iter->ent. 4202 + * It will place it into the iter->temp buffer. As most 4203 + * events are less than 128, allocate a buffer of that size. 4204 + * If one is greater, then trace_find_next_entry() will 4205 + * allocate a new buffer to adjust for the bigger iter->ent. 4206 + * It's not critical if it fails to get allocated here. 4207 + */ 4208 + iter->temp = kmalloc(128, GFP_KERNEL); 4209 + if (iter->temp) 4210 + iter->temp_size = 128; 4211 + 4212 + /* 4225 4213 * We make a copy of the current tracer to avoid concurrent 4226 4214 * changes on it while we are reading. 4227 4215 */ ··· 4305 4269 fail: 4306 4270 mutex_unlock(&trace_types_lock); 4307 4271 kfree(iter->trace); 4272 + kfree(iter->temp); 4308 4273 kfree(iter->buffer_iter); 4309 4274 release: 4310 4275 seq_release_private(inode, file); ··· 4381 4344 4382 4345 mutex_destroy(&iter->mutex); 4383 4346 free_cpumask_var(iter->started); 4347 + kfree(iter->temp); 4384 4348 kfree(iter->trace); 4385 4349 kfree(iter->buffer_iter); 4386 4350 seq_release_private(inode, file);
+6 -9
kernel/trace/trace_output.c
··· 617 617 618 618 int trace_print_lat_context(struct trace_iterator *iter) 619 619 { 620 + struct trace_entry *entry, *next_entry; 620 621 struct trace_array *tr = iter->tr; 621 - /* trace_find_next_entry will reset ent_size */ 622 - int ent_size = iter->ent_size; 623 622 struct trace_seq *s = &iter->seq; 624 - u64 next_ts; 625 - struct trace_entry *entry = iter->ent, 626 - *next_entry = trace_find_next_entry(iter, NULL, 627 - &next_ts); 628 623 unsigned long verbose = (tr->trace_flags & TRACE_ITER_VERBOSE); 624 + u64 next_ts; 629 625 630 - /* Restore the original ent_size */ 631 - iter->ent_size = ent_size; 632 - 626 + next_entry = trace_find_next_entry(iter, NULL, &next_ts); 633 627 if (!next_entry) 634 628 next_ts = iter->ts; 629 + 630 + /* trace_find_next_entry() may change iter->ent */ 631 + entry = iter->ent; 635 632 636 633 if (verbose) { 637 634 char comm[TASK_COMM_LEN];