Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tracing: Update subbuffer with kilobytes not page order

Using page order for deciding what the size of the ring buffer sub buffers
are is exposing a bit too much of the implementation. Although the sub
buffers are only allocated in orders of pages, allow the user to specify
the minimum size of each sub-buffer via kilobytes like they can with the
buffer size itself.

If the user specifies 3 via:

echo 3 > buffer_subbuf_size_kb

Then the sub-buffer size will round up to 4kb (on a 4kb page size system).

If they specify:

echo 6 > buffer_subbuf_size_kb

The sub-buffer size will become 8kb.

and so on.

Link: https://lore.kernel.org/linux-trace-kernel/20231219185631.809766769@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
Cc: Vincent Donnefort <vdonnefort@google.com>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

+50 -44
+16 -22
Documentation/trace/ftrace.rst
··· 203 203 204 204 This displays the total combined size of all the trace buffers. 205 205 206 - buffer_subbuf_order: 206 + buffer_subbuf_size_kb: 207 207 208 - This sets or displays the sub buffer page size order. The ring buffer 209 - is broken up into several same size "sub buffers". An event can not be 210 - bigger than the size of the sub buffer. Normally, the sub buffer is 211 - the size of the architecture's page (4K on x86). The sub buffer also 212 - contains meta data at the start which also limits the size of an event. 213 - That means when the sub buffer is a page size, no event can be larger 214 - than the page size minus the sub buffer meta data. 208 + This sets or displays the sub buffer size. The ring buffer is broken up 209 + into several same size "sub buffers". An event can not be bigger than 210 + the size of the sub buffer. Normally, the sub buffer is the size of the 211 + architecture's page (4K on x86). The sub buffer also contains meta data 212 + at the start which also limits the size of an event. That means when 213 + the sub buffer is a page size, no event can be larger than the page 214 + size minus the sub buffer meta data. 215 215 216 - The buffer_subbuf_order allows the user to change the size of the sub 217 - buffer. As the sub buffer is a set of pages by the power of 2, thus 218 - the sub buffer total size is defined by the order: 216 + Note, the buffer_subbuf_size_kb is a way for the user to specify the 217 + minimum size of the subbuffer. The kernel may make it bigger due to the 218 + implementation details, or simply fail the operation if the kernel can 219 + not handle the request. 219 220 220 - order size 221 - ---- ---- 222 - 0 PAGE_SIZE 223 - 1 PAGE_SIZE * 2 224 - 2 PAGE_SIZE * 4 225 - 3 PAGE_SIZE * 8 221 + Changing the sub buffer size allows for events to be larger than the 222 + page size. 226 223 227 - Changing the order will change the sub buffer size allowing for events 228 - to be larger than the page size. 229 - 230 - Note: When changing the order, tracing is stopped and any data in the 231 - ring buffer and the snapshot buffer will be discarded. 224 + Note: When changing the sub-buffer size, tracing is stopped and any 225 + data in the ring buffer and the snapshot buffer will be discarded. 232 226 233 227 free_buffer: 234 228
+25 -13
kernel/trace/trace.c
··· 9384 9384 }; 9385 9385 9386 9386 static ssize_t 9387 - buffer_order_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 9387 + buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 9388 9388 { 9389 9389 struct trace_array *tr = filp->private_data; 9390 + size_t size; 9390 9391 char buf[64]; 9392 + int order; 9391 9393 int r; 9392 9394 9393 - r = sprintf(buf, "%d\n", ring_buffer_subbuf_order_get(tr->array_buffer.buffer)); 9395 + order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 9396 + size = (PAGE_SIZE << order) / 1024; 9397 + 9398 + r = sprintf(buf, "%zd\n", size); 9394 9399 9395 9400 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9396 9401 } 9397 9402 9398 9403 static ssize_t 9399 - buffer_order_write(struct file *filp, const char __user *ubuf, 9400 - size_t cnt, loff_t *ppos) 9404 + buffer_subbuf_size_write(struct file *filp, const char __user *ubuf, 9405 + size_t cnt, loff_t *ppos) 9401 9406 { 9402 9407 struct trace_array *tr = filp->private_data; 9403 9408 unsigned long val; 9404 9409 int old_order; 9410 + int order; 9411 + int pages; 9405 9412 int ret; 9406 9413 9407 9414 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9408 9415 if (ret) 9409 9416 return ret; 9410 9417 9418 + val *= 1024; /* value passed in is in KB */ 9419 + 9420 + pages = DIV_ROUND_UP(val, PAGE_SIZE); 9421 + order = fls(pages - 1); 9422 + 9411 9423 /* limit between 1 and 128 system pages */ 9412 - if (val < 0 || val > 7) 9424 + if (order < 0 || order > 7) 9413 9425 return -EINVAL; 9414 9426 9415 9427 /* Do not allow tracing while changing the order of the ring buffer */ 9416 9428 tracing_stop_tr(tr); 9417 9429 9418 9430 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 9419 - if (old_order == val) 9431 + if (old_order == order) 9420 9432 goto out; 9421 9433 9422 - ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, val); 9434 + ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order); 9423 9435 if (ret) 9424 9436 goto out; 9425 9437 ··· 9440 9428 if (!tr->allocated_snapshot) 9441 9429 goto out_max; 9442 9430 9443 - ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, val); 9431 + ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); 9444 9432 if (ret) { 9445 9433 /* Put back the old order */ 9446 9434 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order); ··· 9472 9460 return cnt; 9473 9461 } 9474 9462 9475 - static const struct file_operations buffer_order_fops = { 9463 + static const struct file_operations buffer_subbuf_size_fops = { 9476 9464 .open = tracing_open_generic_tr, 9477 - .read = buffer_order_read, 9478 - .write = buffer_order_write, 9465 + .read = buffer_subbuf_size_read, 9466 + .write = buffer_subbuf_size_write, 9479 9467 .release = tracing_release_generic_tr, 9480 9468 .llseek = default_llseek, 9481 9469 }; ··· 9946 9934 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer, 9947 9935 tr, &buffer_percent_fops); 9948 9936 9949 - trace_create_file("buffer_subbuf_order", TRACE_MODE_WRITE, d_tracer, 9950 - tr, &buffer_order_fops); 9937 + trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer, 9938 + tr, &buffer_subbuf_size_fops); 9951 9939 9952 9940 create_trace_options_dir(tr); 9953 9941
+9 -9
tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_order.tc tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc
··· 1 1 #!/bin/sh 2 2 # SPDX-License-Identifier: GPL-2.0 3 - # description: Change the ringbuffer sub-buffer order 4 - # requires: buffer_subbuf_order 3 + # description: Change the ringbuffer sub-buffer size 4 + # requires: buffer_subbuf_size_kb 5 5 # flags: instance 6 6 7 7 get_buffer_data_size() { ··· 52 52 } 53 53 54 54 test_buffer() { 55 - orde=$1 56 - page_size=$((4096<<order)) 55 + size_kb=$1 56 + page_size=$((size_kb*1024)) 57 57 58 58 size=`get_buffer_data_size` 59 59 ··· 82 82 fi 83 83 } 84 84 85 - ORIG=`cat buffer_subbuf_order` 85 + ORIG=`cat buffer_subbuf_size_kb` 86 86 87 - # Could test bigger orders than 3, but then creating the string 87 + # Could test bigger sizes than 32K, but then creating the string 88 88 # to write into the ring buffer takes too long 89 - for a in 0 1 2 3 ; do 90 - echo $a > buffer_subbuf_order 89 + for a in 4 8 16 32 ; do 90 + echo $a > buffer_subbuf_size_kb 91 91 test_buffer $a 92 92 done 93 93 94 - echo $ORIG > buffer_subbuf_order 94 + echo $ORIG > buffer_subbuf_size_kb 95 95