mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-01 10:45:49 +00:00
trace ring-buffer updates for v6.13
- Limit time interrupts are disabled in rb_check_pages() The rb_check_pages() is called after the ring buffer size is updated to make sure that the ring buffer has not been corrupted. Commitc2274b908d
("ring-buffer: Fix a race between readers and resize checks") fixed a race with the check pages and simultaneous resizes to the ring buffer by adding a raw_spin_lock_irqsave() around the check operation. Although this was a simple fix, it would hold interrupts disabled for non determinative amount of time. This could harm PREEMPT_RT operations. Instead, modify the logic by adding a counter when the buffer is modified and to release the raw_spin_lock() at each iteration. It checks the counter under the lock to see if a modification happened during the loop, and if it did, it would restart the loop up to 3 times. After 3 times, it will simply exit the check, as it is unlikely that would ever happen as buffer resizes are rare occurrences. - Replace some open coded str_low_high() with the helper - Fix some documentation/comments -----BEGIN PGP SIGNATURE----- iIoEABYIADIWIQRRSw7ePDh/lE+zeZMp5XQQmuv6qgUCZz5KNxQccm9zdGVkdEBn b29kbWlzLm9yZwAKCRAp5XQQmuv6qiANAP4/6cSGOhQgIkaN8UsKmWTfBqU89JK2 a4tqAZWKsQormgEAkDLPD0Lda0drmu/Dwnr/klS21yyLcQBzyX1CYw9G4gY= =jkLz -----END PGP SIGNATURE----- Merge tag 'trace-ring-buffer-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace Pull trace ring-buffer updates from Steven Rostedt: - Limit time interrupts are disabled in rb_check_pages() rb_check_pages() is called after the ring buffer size is updated to make sure that the ring buffer has not been corrupted. Commitc2274b908d
("ring-buffer: Fix a race between readers and resize checks") fixed a race with the check pages and simultaneous resizes to the ring buffer by adding a raw_spin_lock_irqsave() around the check operation. Although this was a simple fix, it would hold interrupts disabled for non determinative amount of time. This could harm PREEMPT_RT operations. Instead, modify the logic by adding a counter when the buffer is modified and to release the raw_spin_lock() at each iteration. It checks the counter under the lock to see if a modification happened during the loop, and if it did, it would restart the loop up to 3 times. After 3 times, it will simply exit the check, as it is unlikely that would ever happen as buffer resizes are rare occurrences. - Replace some open coded str_low_high() with the helper - Fix some documentation/comments * tag 'trace-ring-buffer-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: ring-buffer: Correct a grammatical error in a comment ring-buffer: Use str_low_high() helper in ring_buffer_producer() ring-buffer: Reorganize kerneldoc parameter names ring-buffer: Limit time with disabled interrupts in rb_check_pages()
This commit is contained in:
commit
f1db825805
@ -482,6 +482,8 @@ struct ring_buffer_per_cpu {
|
|||||||
unsigned long nr_pages;
|
unsigned long nr_pages;
|
||||||
unsigned int current_context;
|
unsigned int current_context;
|
||||||
struct list_head *pages;
|
struct list_head *pages;
|
||||||
|
/* pages generation counter, incremented when the list changes */
|
||||||
|
unsigned long cnt;
|
||||||
struct buffer_page *head_page; /* read from head */
|
struct buffer_page *head_page; /* read from head */
|
||||||
struct buffer_page *tail_page; /* write to tail */
|
struct buffer_page *tail_page; /* write to tail */
|
||||||
struct buffer_page *commit_page; /* committed pages */
|
struct buffer_page *commit_page; /* committed pages */
|
||||||
@ -1475,40 +1477,87 @@ static void rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
|
|||||||
RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK);
|
RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool rb_check_links(struct ring_buffer_per_cpu *cpu_buffer,
|
||||||
|
struct list_head *list)
|
||||||
|
{
|
||||||
|
if (RB_WARN_ON(cpu_buffer,
|
||||||
|
rb_list_head(rb_list_head(list->next)->prev) != list))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (RB_WARN_ON(cpu_buffer,
|
||||||
|
rb_list_head(rb_list_head(list->prev)->next) != list))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* rb_check_pages - integrity check of buffer pages
|
* rb_check_pages - integrity check of buffer pages
|
||||||
* @cpu_buffer: CPU buffer with pages to test
|
* @cpu_buffer: CPU buffer with pages to test
|
||||||
*
|
*
|
||||||
* As a safety measure we check to make sure the data pages have not
|
* As a safety measure we check to make sure the data pages have not
|
||||||
* been corrupted.
|
* been corrupted.
|
||||||
*
|
|
||||||
* Callers of this function need to guarantee that the list of pages doesn't get
|
|
||||||
* modified during the check. In particular, if it's possible that the function
|
|
||||||
* is invoked with concurrent readers which can swap in a new reader page then
|
|
||||||
* the caller should take cpu_buffer->reader_lock.
|
|
||||||
*/
|
*/
|
||||||
static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
|
static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
|
||||||
{
|
{
|
||||||
struct list_head *head = rb_list_head(cpu_buffer->pages);
|
struct list_head *head, *tmp;
|
||||||
struct list_head *tmp;
|
unsigned long buffer_cnt;
|
||||||
|
unsigned long flags;
|
||||||
|
int nr_loops = 0;
|
||||||
|
|
||||||
if (RB_WARN_ON(cpu_buffer,
|
/*
|
||||||
rb_list_head(rb_list_head(head->next)->prev) != head))
|
* Walk the linked list underpinning the ring buffer and validate all
|
||||||
|
* its next and prev links.
|
||||||
|
*
|
||||||
|
* The check acquires the reader_lock to avoid concurrent processing
|
||||||
|
* with code that could be modifying the list. However, the lock cannot
|
||||||
|
* be held for the entire duration of the walk, as this would make the
|
||||||
|
* time when interrupts are disabled non-deterministic, dependent on the
|
||||||
|
* ring buffer size. Therefore, the code releases and re-acquires the
|
||||||
|
* lock after checking each page. The ring_buffer_per_cpu.cnt variable
|
||||||
|
* is then used to detect if the list was modified while the lock was
|
||||||
|
* not held, in which case the check needs to be restarted.
|
||||||
|
*
|
||||||
|
* The code attempts to perform the check at most three times before
|
||||||
|
* giving up. This is acceptable because this is only a self-validation
|
||||||
|
* to detect problems early on. In practice, the list modification
|
||||||
|
* operations are fairly spaced, and so this check typically succeeds at
|
||||||
|
* most on the second try.
|
||||||
|
*/
|
||||||
|
again:
|
||||||
|
if (++nr_loops > 3)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (RB_WARN_ON(cpu_buffer,
|
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
|
||||||
rb_list_head(rb_list_head(head->prev)->next) != head))
|
head = rb_list_head(cpu_buffer->pages);
|
||||||
return;
|
if (!rb_check_links(cpu_buffer, head))
|
||||||
|
goto out_locked;
|
||||||
|
buffer_cnt = cpu_buffer->cnt;
|
||||||
|
tmp = head;
|
||||||
|
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
|
||||||
|
|
||||||
for (tmp = rb_list_head(head->next); tmp != head; tmp = rb_list_head(tmp->next)) {
|
while (true) {
|
||||||
if (RB_WARN_ON(cpu_buffer,
|
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
|
||||||
rb_list_head(rb_list_head(tmp->next)->prev) != tmp))
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (RB_WARN_ON(cpu_buffer,
|
if (buffer_cnt != cpu_buffer->cnt) {
|
||||||
rb_list_head(rb_list_head(tmp->prev)->next) != tmp))
|
/* The list was updated, try again. */
|
||||||
return;
|
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp = rb_list_head(tmp->next);
|
||||||
|
if (tmp == head)
|
||||||
|
/* The iteration circled back, all is done. */
|
||||||
|
goto out_locked;
|
||||||
|
|
||||||
|
if (!rb_check_links(cpu_buffer, tmp))
|
||||||
|
goto out_locked;
|
||||||
|
|
||||||
|
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
out_locked:
|
||||||
|
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2384,9 +2433,9 @@ EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
|
|||||||
* __ring_buffer_alloc_range - allocate a new ring_buffer from existing memory
|
* __ring_buffer_alloc_range - allocate a new ring_buffer from existing memory
|
||||||
* @size: the size in bytes per cpu that is needed.
|
* @size: the size in bytes per cpu that is needed.
|
||||||
* @flags: attributes to set for the ring buffer.
|
* @flags: attributes to set for the ring buffer.
|
||||||
|
* @order: sub-buffer order
|
||||||
* @start: start of allocated range
|
* @start: start of allocated range
|
||||||
* @range_size: size of allocated range
|
* @range_size: size of allocated range
|
||||||
* @order: sub-buffer order
|
|
||||||
* @key: ring buffer reader_lock_key.
|
* @key: ring buffer reader_lock_key.
|
||||||
*
|
*
|
||||||
* Currently the only flag that is available is the RB_FL_OVERWRITE
|
* Currently the only flag that is available is the RB_FL_OVERWRITE
|
||||||
@ -2532,6 +2581,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
|
|||||||
|
|
||||||
/* make sure pages points to a valid page in the ring buffer */
|
/* make sure pages points to a valid page in the ring buffer */
|
||||||
cpu_buffer->pages = next_page;
|
cpu_buffer->pages = next_page;
|
||||||
|
cpu_buffer->cnt++;
|
||||||
|
|
||||||
/* update head page */
|
/* update head page */
|
||||||
if (head_bit)
|
if (head_bit)
|
||||||
@ -2638,6 +2688,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
|
|||||||
* pointer to point to end of list
|
* pointer to point to end of list
|
||||||
*/
|
*/
|
||||||
head_page->prev = last_page;
|
head_page->prev = last_page;
|
||||||
|
cpu_buffer->cnt++;
|
||||||
success = true;
|
success = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -2873,12 +2924,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
|
|||||||
*/
|
*/
|
||||||
synchronize_rcu();
|
synchronize_rcu();
|
||||||
for_each_buffer_cpu(buffer, cpu) {
|
for_each_buffer_cpu(buffer, cpu) {
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
cpu_buffer = buffer->buffers[cpu];
|
cpu_buffer = buffer->buffers[cpu];
|
||||||
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
|
|
||||||
rb_check_pages(cpu_buffer);
|
rb_check_pages(cpu_buffer);
|
||||||
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
|
|
||||||
}
|
}
|
||||||
atomic_dec(&buffer->record_disabled);
|
atomic_dec(&buffer->record_disabled);
|
||||||
}
|
}
|
||||||
@ -4010,7 +4057,7 @@ static const char *show_irq_str(int bits)
|
|||||||
return type[bits];
|
return type[bits];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Assume this is an trace event */
|
/* Assume this is a trace event */
|
||||||
static const char *show_flags(struct ring_buffer_event *event)
|
static const char *show_flags(struct ring_buffer_event *event)
|
||||||
{
|
{
|
||||||
struct trace_entry *entry;
|
struct trace_entry *entry;
|
||||||
@ -5296,6 +5343,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
|
|||||||
rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
|
rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
|
||||||
rb_inc_page(&cpu_buffer->head_page);
|
rb_inc_page(&cpu_buffer->head_page);
|
||||||
|
|
||||||
|
cpu_buffer->cnt++;
|
||||||
local_inc(&cpu_buffer->pages_read);
|
local_inc(&cpu_buffer->pages_read);
|
||||||
|
|
||||||
/* Finally update the reader page to the new head */
|
/* Finally update the reader page to the new head */
|
||||||
@ -5835,12 +5883,9 @@ void
|
|||||||
ring_buffer_read_finish(struct ring_buffer_iter *iter)
|
ring_buffer_read_finish(struct ring_buffer_iter *iter)
|
||||||
{
|
{
|
||||||
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
|
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
/* Use this opportunity to check the integrity of the ring buffer. */
|
/* Use this opportunity to check the integrity of the ring buffer. */
|
||||||
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
|
|
||||||
rb_check_pages(cpu_buffer);
|
rb_check_pages(cpu_buffer);
|
||||||
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
|
|
||||||
|
|
||||||
atomic_dec(&cpu_buffer->resize_disabled);
|
atomic_dec(&cpu_buffer->resize_disabled);
|
||||||
kfree(iter->event);
|
kfree(iter->event);
|
||||||
@ -6757,6 +6802,7 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
|
|||||||
/* Install the new pages, remove the head from the list */
|
/* Install the new pages, remove the head from the list */
|
||||||
cpu_buffer->pages = cpu_buffer->new_pages.next;
|
cpu_buffer->pages = cpu_buffer->new_pages.next;
|
||||||
list_del_init(&cpu_buffer->new_pages);
|
list_del_init(&cpu_buffer->new_pages);
|
||||||
|
cpu_buffer->cnt++;
|
||||||
|
|
||||||
cpu_buffer->head_page
|
cpu_buffer->head_page
|
||||||
= list_entry(cpu_buffer->pages, struct buffer_page, list);
|
= list_entry(cpu_buffer->pages, struct buffer_page, list);
|
||||||
|
@ -307,14 +307,14 @@ static void ring_buffer_producer(void)
|
|||||||
if (!disable_reader) {
|
if (!disable_reader) {
|
||||||
if (consumer_fifo)
|
if (consumer_fifo)
|
||||||
trace_printk("Running Consumer at SCHED_FIFO %s\n",
|
trace_printk("Running Consumer at SCHED_FIFO %s\n",
|
||||||
consumer_fifo == 1 ? "low" : "high");
|
str_low_high(consumer_fifo == 1));
|
||||||
else
|
else
|
||||||
trace_printk("Running Consumer at nice: %d\n",
|
trace_printk("Running Consumer at nice: %d\n",
|
||||||
consumer_nice);
|
consumer_nice);
|
||||||
}
|
}
|
||||||
if (producer_fifo)
|
if (producer_fifo)
|
||||||
trace_printk("Running Producer at SCHED_FIFO %s\n",
|
trace_printk("Running Producer at SCHED_FIFO %s\n",
|
||||||
producer_fifo == 1 ? "low" : "high");
|
str_low_high(producer_fifo == 1));
|
||||||
else
|
else
|
||||||
trace_printk("Running Producer at nice: %d\n",
|
trace_printk("Running Producer at nice: %d\n",
|
||||||
producer_nice);
|
producer_nice);
|
||||||
|
Loading…
Reference in New Issue
Block a user