tracing: Update subbuffer with kilobytes not page order

Using page order for deciding what the size of the ring buffer sub buffers
are is exposing a bit too much of the implementation. Although the sub
buffers are only allocated in orders of pages, allow the user to specify
the minimum size of each sub-buffer via kilobytes like they can with the
buffer size itself.

If the user specifies 3 via:

  echo 3 > buffer_subbuf_size_kb

Then the sub-buffer size will round up to 4kb (on a 4kb page size system).

If they specify:

  echo 6 > buffer_subbuf_size_kb

The sub-buffer size will become 8kb.

and so on.

Link: https://lore.kernel.org/linux-trace-kernel/20231219185631.809766769@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
Cc: Vincent Donnefort <vdonnefort@google.com>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
This commit is contained in:
Steven Rostedt (Google) 2023-12-19 13:54:29 -05:00
parent 1acce70374
commit 2f84b39f48
3 changed files with 50 additions and 44 deletions

View File

@ -203,32 +203,26 @@ of ftrace. Here is a list of some of the key files:
This displays the total combined size of all the trace buffers.
buffer_subbuf_order:
buffer_subbuf_size_kb:
This sets or displays the sub buffer page size order. The ring buffer
is broken up into several same size "sub buffers". An event can not be
bigger than the size of the sub buffer. Normally, the sub buffer is
the size of the architecture's page (4K on x86). The sub buffer also
contains meta data at the start which also limits the size of an event.
That means when the sub buffer is a page size, no event can be larger
than the page size minus the sub buffer meta data.
This sets or displays the sub buffer size. The ring buffer is broken up
into several same size "sub buffers". An event can not be bigger than
the size of the sub buffer. Normally, the sub buffer is the size of the
architecture's page (4K on x86). The sub buffer also contains meta data
at the start which also limits the size of an event. That means when
the sub buffer is a page size, no event can be larger than the page
size minus the sub buffer meta data.
The buffer_subbuf_order allows the user to change the size of the sub
buffer. As the sub buffer is a set of pages by the power of 2, thus
the sub buffer total size is defined by the order:
Note, the buffer_subbuf_size_kb is a way for the user to specify the
minimum size of the subbuffer. The kernel may make it bigger due to the
implementation details, or simply fail the operation if the kernel can
not handle the request.
order size
---- ----
0 PAGE_SIZE
1 PAGE_SIZE * 2
2 PAGE_SIZE * 4
3 PAGE_SIZE * 8
Changing the sub buffer size allows for events to be larger than the
page size.
Changing the order will change the sub buffer size allowing for events
to be larger than the page size.
Note: When changing the order, tracing is stopped and any data in the
ring buffer and the snapshot buffer will be discarded.
Note: When changing the sub-buffer size, tracing is stopped and any
data in the ring buffer and the snapshot buffer will be discarded.
free_buffer:

View File

@ -9384,42 +9384,54 @@ static const struct file_operations buffer_percent_fops = {
};
static ssize_t
buffer_order_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
{
struct trace_array *tr = filp->private_data;
size_t size;
char buf[64];
int order;
int r;
r = sprintf(buf, "%d\n", ring_buffer_subbuf_order_get(tr->array_buffer.buffer));
order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
size = (PAGE_SIZE << order) / 1024;
r = sprintf(buf, "%zd\n", size);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
static ssize_t
buffer_order_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct trace_array *tr = filp->private_data;
unsigned long val;
int old_order;
int order;
int pages;
int ret;
ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
if (ret)
return ret;
val *= 1024; /* value passed in is in KB */
pages = DIV_ROUND_UP(val, PAGE_SIZE);
order = fls(pages - 1);
/* limit between 1 and 128 system pages */
if (val < 0 || val > 7)
if (order < 0 || order > 7)
return -EINVAL;
/* Do not allow tracing while changing the order of the ring buffer */
tracing_stop_tr(tr);
old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
if (old_order == val)
if (old_order == order)
goto out;
ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, val);
ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
if (ret)
goto out;
@ -9428,7 +9440,7 @@ buffer_order_write(struct file *filp, const char __user *ubuf,
if (!tr->allocated_snapshot)
goto out_max;
ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, val);
ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
if (ret) {
/* Put back the old order */
cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
@ -9460,10 +9472,10 @@ buffer_order_write(struct file *filp, const char __user *ubuf,
return cnt;
}
static const struct file_operations buffer_order_fops = {
static const struct file_operations buffer_subbuf_size_fops = {
.open = tracing_open_generic_tr,
.read = buffer_order_read,
.write = buffer_order_write,
.read = buffer_subbuf_size_read,
.write = buffer_subbuf_size_write,
.release = tracing_release_generic_tr,
.llseek = default_llseek,
};
@ -9934,8 +9946,8 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
tr, &buffer_percent_fops);
trace_create_file("buffer_subbuf_order", TRACE_MODE_WRITE, d_tracer,
tr, &buffer_order_fops);
trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
tr, &buffer_subbuf_size_fops);
create_trace_options_dir(tr);

View File

@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Change the ringbuffer sub-buffer order
# requires: buffer_subbuf_order
# description: Change the ringbuffer sub-buffer size
# requires: buffer_subbuf_size_kb
# flags: instance
get_buffer_data_size() {
@ -52,8 +52,8 @@ write_buffer() {
}
test_buffer() {
orde=$1
page_size=$((4096<<order))
size_kb=$1
page_size=$((size_kb*1024))
size=`get_buffer_data_size`
@ -82,14 +82,14 @@ test_buffer() {
fi
}
ORIG=`cat buffer_subbuf_order`
ORIG=`cat buffer_subbuf_size_kb`
# Could test bigger orders than 3, but then creating the string
# Could test bigger sizes than 32K, but then creating the string
# to write into the ring buffer takes too long
for a in 0 1 2 3 ; do
echo $a > buffer_subbuf_order
for a in 4 8 16 32 ; do
echo $a > buffer_subbuf_size_kb
test_buffer $a
done
echo $ORIG > buffer_subbuf_order
echo $ORIG > buffer_subbuf_size_kb