2011-05-19 19:55:04 +02:00
|
|
|
#ifndef _KERNEL_EVENTS_INTERNAL_H
|
|
|
|
#define _KERNEL_EVENTS_INTERNAL_H
|
|
|
|
|
2011-10-16 17:15:04 +02:00
|
|
|
#include <linux/hardirq.h>
|
2012-08-07 15:20:38 +02:00
|
|
|
#include <linux/uaccess.h>
|
2011-10-16 17:15:04 +02:00
|
|
|
|
|
|
|
/* Buffer handling */
|
|
|
|
|
2011-05-19 19:55:04 +02:00
|
|
|
#define RING_BUFFER_WRITABLE 0x01
|
|
|
|
|
|
|
|
struct ring_buffer {
|
|
|
|
atomic_t refcount;
|
|
|
|
struct rcu_head rcu_head;
|
2015-06-18 12:32:49 +02:00
|
|
|
struct irq_work irq_work;
|
2011-05-19 19:55:04 +02:00
|
|
|
#ifdef CONFIG_PERF_USE_VMALLOC
|
|
|
|
struct work_struct work;
|
|
|
|
int page_order; /* allocation order */
|
|
|
|
#endif
|
|
|
|
int nr_pages; /* nr of data pages */
|
2013-03-18 14:33:28 +01:00
|
|
|
int overwrite; /* can overwrite itself */
|
2011-05-19 19:55:04 +02:00
|
|
|
|
|
|
|
atomic_t poll; /* POLL_ for wakeups */
|
|
|
|
|
|
|
|
local_t head; /* write position */
|
|
|
|
local_t nest; /* nested writers */
|
|
|
|
local_t events; /* event limit */
|
|
|
|
local_t wakeup; /* wakeup stamp */
|
|
|
|
local_t lost; /* nr records lost */
|
|
|
|
|
|
|
|
long watermark; /* wakeup watermark */
|
2015-01-14 14:18:18 +02:00
|
|
|
long aux_watermark;
|
perf: Fix loss of notification with multi-event
When you do:
$ perf record -e cycles,cycles,cycles noploop 10
You expect about 10,000 samples for each event, i.e., 10s at
1000samples/sec. However, this is not what's happening. You
get much fewer samples, maybe 3700 samples/event:
$ perf report -D | tail -15
Aggregated stats:
TOTAL events: 10998
MMAP events: 66
COMM events: 2
SAMPLE events: 10930
cycles stats:
TOTAL events: 3644
SAMPLE events: 3644
cycles stats:
TOTAL events: 3642
SAMPLE events: 3642
cycles stats:
TOTAL events: 3644
SAMPLE events: 3644
On a Intel Nehalem or even AMD64, there are 4 counters capable
of measuring cycles, so there is plenty of space to measure those
events without multiplexing (even with the NMI watchdog active).
And even with multiplexing, we'd expect roughly the same number
of samples per event.
The root of the problem was that when the event that caused the buffer
to become full was not the first event passed on the cmdline, the user
notification would get lost. The notification was sent to the file
descriptor of the overflowed event but the perf tool was not polling
on it. The perf tool aggregates all samples into a single buffer,
i.e., the buffer of the first event. Consequently, it assumes
notifications for any event will come via that descriptor.
The seemingly straight forward solution of moving the waitq into the
ringbuffer object doesn't work because of life-time issues. One could
perf_event_set_output() on a fd that you're also blocking on and cause
the old rb object to be freed while its waitq would still be
referenced by the blocked thread -> FAIL.
Therefore link all events to the ringbuffer and broadcast the wakeup
from the ringbuffer object to all possible events that could be waited
upon. This is rather ugly, and we're open to better solutions but it
works for now.
Reported-by: Stephane Eranian <eranian@google.com>
Finished-by: Stephane Eranian <eranian@google.com>
Reviewed-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20111126014731.GA7030@quad
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-11-26 02:47:31 +01:00
|
|
|
/* poll crap */
|
|
|
|
spinlock_t event_lock;
|
|
|
|
struct list_head event_list;
|
2011-05-19 19:55:04 +02:00
|
|
|
|
2013-06-04 10:44:21 +02:00
|
|
|
atomic_t mmap_count;
|
|
|
|
unsigned long mmap_locked;
|
2013-05-28 10:55:48 +02:00
|
|
|
struct user_struct *mmap_user;
|
|
|
|
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 14:18:11 +02:00
|
|
|
/* AUX area */
|
perf: Add API for PMUs to write to the AUX area
For pmus that wish to write data to ring buffer's AUX area, provide
perf_aux_output_{begin,end}() calls to initiate/commit data writes,
similarly to perf_output_{begin,end}. These also use the same output
handle structure. Also, similarly to software counterparts, these
will direct inherited events' output to parents' ring buffers.
After the perf_aux_output_begin() returns successfully, handle->size
is set to the maximum amount of data that can be written wrt aux_tail
pointer, so that no data that the user hasn't seen will be overwritten,
therefore this should always be called before hardware writing is
enabled. On success, this will return the pointer to pmu driver's
private structure allocated for this aux area by pmu::setup_aux. Same
pointer can also be retrieved using perf_get_aux() while hardware
writing is enabled.
PMU driver should pass the actual amount of data written as a parameter
to perf_aux_output_end(). All hardware writes should be completed and
visible before this one is called.
Additionally, perf_aux_output_skip() will adjust output handle and
aux_head in case some part of the buffer has to be skipped over to
maintain hardware's alignment constraints.
Nested writers are forbidden and guards are in place to catch such
attempts.
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-8-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 14:18:16 +02:00
|
|
|
local_t aux_head;
|
|
|
|
local_t aux_nest;
|
2015-01-14 14:18:18 +02:00
|
|
|
local_t aux_wakeup;
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 14:18:11 +02:00
|
|
|
unsigned long aux_pgoff;
|
|
|
|
int aux_nr_pages;
|
2015-01-14 14:18:17 +02:00
|
|
|
int aux_overwrite;
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 14:18:11 +02:00
|
|
|
atomic_t aux_mmap_count;
|
|
|
|
unsigned long aux_mmap_locked;
|
|
|
|
void (*free_aux)(void *);
|
|
|
|
atomic_t aux_refcount;
|
|
|
|
void **aux_pages;
|
|
|
|
void *aux_priv;
|
|
|
|
|
2011-05-19 19:55:04 +02:00
|
|
|
struct perf_event_mmap_page *user_page;
|
|
|
|
void *data_pages[0];
|
|
|
|
};
|
|
|
|
|
|
|
|
extern void rb_free(struct ring_buffer *rb);
|
2015-06-18 12:32:49 +02:00
|
|
|
|
|
|
|
static inline void rb_free_rcu(struct rcu_head *rcu_head)
|
|
|
|
{
|
|
|
|
struct ring_buffer *rb;
|
|
|
|
|
|
|
|
rb = container_of(rcu_head, struct ring_buffer, rcu_head);
|
|
|
|
rb_free(rb);
|
|
|
|
}
|
|
|
|
|
2011-05-19 19:55:04 +02:00
|
|
|
extern struct ring_buffer *
|
|
|
|
rb_alloc(int nr_pages, long watermark, int cpu, int flags);
|
|
|
|
extern void perf_event_wakeup(struct perf_event *event);
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 14:18:11 +02:00
|
|
|
extern int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
|
2015-01-14 14:18:18 +02:00
|
|
|
pgoff_t pgoff, int nr_pages, long watermark, int flags);
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 14:18:11 +02:00
|
|
|
extern void rb_free_aux(struct ring_buffer *rb);
|
perf: Add API for PMUs to write to the AUX area
For pmus that wish to write data to ring buffer's AUX area, provide
perf_aux_output_{begin,end}() calls to initiate/commit data writes,
similarly to perf_output_{begin,end}. These also use the same output
handle structure. Also, similarly to software counterparts, these
will direct inherited events' output to parents' ring buffers.
After the perf_aux_output_begin() returns successfully, handle->size
is set to the maximum amount of data that can be written wrt aux_tail
pointer, so that no data that the user hasn't seen will be overwritten,
therefore this should always be called before hardware writing is
enabled. On success, this will return the pointer to pmu driver's
private structure allocated for this aux area by pmu::setup_aux. Same
pointer can also be retrieved using perf_get_aux() while hardware
writing is enabled.
PMU driver should pass the actual amount of data written as a parameter
to perf_aux_output_end(). All hardware writes should be completed and
visible before this one is called.
Additionally, perf_aux_output_skip() will adjust output handle and
aux_head in case some part of the buffer has to be skipped over to
maintain hardware's alignment constraints.
Nested writers are forbidden and guards are in place to catch such
attempts.
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-8-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 14:18:16 +02:00
|
|
|
extern struct ring_buffer *ring_buffer_get(struct perf_event *event);
|
|
|
|
extern void ring_buffer_put(struct ring_buffer *rb);
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 14:18:11 +02:00
|
|
|
|
|
|
|
static inline bool rb_has_aux(struct ring_buffer *rb)
|
|
|
|
{
|
|
|
|
return !!rb->aux_nr_pages;
|
|
|
|
}
|
2011-05-19 19:55:04 +02:00
|
|
|
|
2015-01-14 14:18:15 +02:00
|
|
|
void perf_event_aux_event(struct perf_event *event, unsigned long head,
|
|
|
|
unsigned long size, u64 flags);
|
|
|
|
|
2011-05-19 19:55:04 +02:00
|
|
|
extern struct page *
|
|
|
|
perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff);
|
|
|
|
|
|
|
|
#ifdef CONFIG_PERF_USE_VMALLOC
|
|
|
|
/*
|
|
|
|
* Back perf_mmap() with vmalloc memory.
|
|
|
|
*
|
|
|
|
* Required for architectures that have d-cache aliasing issues.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static inline int page_order(struct ring_buffer *rb)
|
|
|
|
{
|
|
|
|
return rb->page_order;
|
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
static inline int page_order(struct ring_buffer *rb)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2011-10-16 17:15:04 +02:00
|
|
|
static inline unsigned long perf_data_size(struct ring_buffer *rb)
|
2011-05-19 19:55:04 +02:00
|
|
|
{
|
|
|
|
return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
|
|
|
|
}
|
|
|
|
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 14:18:11 +02:00
|
|
|
static inline unsigned long perf_aux_size(struct ring_buffer *rb)
|
|
|
|
{
|
|
|
|
return rb->aux_nr_pages << PAGE_SHIFT;
|
|
|
|
}
|
|
|
|
|
2012-08-07 15:20:38 +02:00
|
|
|
#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \
|
2013-10-30 21:16:22 +01:00
|
|
|
static inline unsigned long \
|
2012-08-07 15:20:38 +02:00
|
|
|
func_name(struct perf_output_handle *handle, \
|
2013-10-30 21:16:22 +01:00
|
|
|
const void *buf, unsigned long len) \
|
2012-08-07 15:20:38 +02:00
|
|
|
{ \
|
|
|
|
unsigned long size, written; \
|
|
|
|
\
|
|
|
|
do { \
|
2013-10-30 21:16:22 +01:00
|
|
|
size = min(handle->size, len); \
|
2012-08-07 15:20:38 +02:00
|
|
|
written = memcpy_func(handle->addr, buf, size); \
|
2013-10-30 21:16:22 +01:00
|
|
|
written = size - written; \
|
2012-08-07 15:20:38 +02:00
|
|
|
\
|
|
|
|
len -= written; \
|
|
|
|
handle->addr += written; \
|
|
|
|
buf += written; \
|
|
|
|
handle->size -= written; \
|
|
|
|
if (!handle->size) { \
|
|
|
|
struct ring_buffer *rb = handle->rb; \
|
|
|
|
\
|
|
|
|
handle->page++; \
|
|
|
|
handle->page &= rb->nr_pages - 1; \
|
|
|
|
handle->addr = rb->data_pages[handle->page]; \
|
|
|
|
handle->size = PAGE_SIZE << page_order(rb); \
|
|
|
|
} \
|
|
|
|
} while (len && written == size); \
|
|
|
|
\
|
|
|
|
return len; \
|
|
|
|
}
|
|
|
|
|
2013-10-30 21:16:22 +01:00
|
|
|
static inline unsigned long
|
|
|
|
memcpy_common(void *dst, const void *src, unsigned long n)
|
2011-05-19 19:55:04 +02:00
|
|
|
{
|
2012-08-07 15:20:38 +02:00
|
|
|
memcpy(dst, src, n);
|
2013-10-30 21:16:22 +01:00
|
|
|
return 0;
|
2011-05-19 19:55:04 +02:00
|
|
|
}
|
|
|
|
|
2012-08-07 15:20:38 +02:00
|
|
|
DEFINE_OUTPUT_COPY(__output_copy, memcpy_common)
|
|
|
|
|
2013-10-30 21:16:22 +01:00
|
|
|
static inline unsigned long
|
|
|
|
memcpy_skip(void *dst, const void *src, unsigned long n)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
2012-08-07 15:20:39 +02:00
|
|
|
|
2013-10-30 21:16:22 +01:00
|
|
|
DEFINE_OUTPUT_COPY(__output_skip, memcpy_skip)
|
2012-08-07 15:20:39 +02:00
|
|
|
|
2012-08-07 15:20:38 +02:00
|
|
|
#ifndef arch_perf_out_copy_user
|
2013-10-30 21:16:22 +01:00
|
|
|
#define arch_perf_out_copy_user arch_perf_out_copy_user
|
|
|
|
|
|
|
|
static inline unsigned long
|
|
|
|
arch_perf_out_copy_user(void *dst, const void *src, unsigned long n)
|
|
|
|
{
|
|
|
|
unsigned long ret;
|
|
|
|
|
|
|
|
pagefault_disable();
|
|
|
|
ret = __copy_from_user_inatomic(dst, src, n);
|
|
|
|
pagefault_enable();
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2012-08-07 15:20:38 +02:00
|
|
|
#endif
|
|
|
|
|
|
|
|
DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
|
|
|
|
|
2011-10-16 17:15:04 +02:00
|
|
|
/* Callchain handling */
|
2012-07-11 18:14:58 +04:00
|
|
|
extern struct perf_callchain_entry *
|
|
|
|
perf_callchain(struct perf_event *event, struct pt_regs *regs);
|
2011-10-16 17:15:04 +02:00
|
|
|
extern int get_callchain_buffers(void);
|
|
|
|
extern void put_callchain_buffers(void);
|
|
|
|
|
|
|
|
static inline int get_recursion_context(int *recursion)
|
|
|
|
{
|
|
|
|
int rctx;
|
|
|
|
|
|
|
|
if (in_nmi())
|
|
|
|
rctx = 3;
|
|
|
|
else if (in_irq())
|
|
|
|
rctx = 2;
|
|
|
|
else if (in_softirq())
|
|
|
|
rctx = 1;
|
|
|
|
else
|
|
|
|
rctx = 0;
|
|
|
|
|
|
|
|
if (recursion[rctx])
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
recursion[rctx]++;
|
|
|
|
barrier();
|
|
|
|
|
|
|
|
return rctx;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void put_recursion_context(int *recursion, int rctx)
|
|
|
|
{
|
|
|
|
barrier();
|
|
|
|
recursion[rctx]--;
|
|
|
|
}
|
|
|
|
|
2012-08-07 15:20:40 +02:00
|
|
|
#ifdef CONFIG_HAVE_PERF_USER_STACK_DUMP
|
|
|
|
static inline bool arch_perf_have_user_stack_dump(void)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define perf_user_stack_pointer(regs) user_stack_pointer(regs)
|
|
|
|
#else
|
|
|
|
static inline bool arch_perf_have_user_stack_dump(void)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define perf_user_stack_pointer(regs) 0
|
|
|
|
#endif /* CONFIG_HAVE_PERF_USER_STACK_DUMP */
|
|
|
|
|
2011-05-19 19:55:04 +02:00
|
|
|
#endif /* _KERNEL_EVENTS_INTERNAL_H */
|