mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-11 00:08:50 +00:00
d9a50b0256
The aux_watermark member of struct ring_buffer represents the period (in terms of bytes) at which wakeup events should be generated when data is written to the aux buffer in non-snapshot mode. On hardware that cannot generate an interrupt when the aux_head reaches an arbitrary wakeup index (such as ARM SPE), the aux_head sampled from handle->head in perf_aux_output_{skip,end} may in fact be past the wakeup index. This can lead to wakeup slowly falling behind the head. For example, consider the case where hardware can only generate an interrupt on a page-boundary and the aux buffer is initialised as follows: // Buffer size is 2 * PAGE_SIZE rb->aux_head = rb->aux_wakeup = 0 rb->aux_watermark = PAGE_SIZE / 2 following the first perf_aux_output_begin call, the handle is initialised with: handle->head = 0 handle->size = 2 * PAGE_SIZE handle->wakeup = PAGE_SIZE / 2 and the hardware will be programmed to generate an interrupt at PAGE_SIZE. When the interrupt is raised, the hardware head will be at PAGE_SIZE, so calling perf_aux_output_end(handle, PAGE_SIZE) puts the ring buffer into the following state: rb->aux_head = PAGE_SIZE rb->aux_wakeup = PAGE_SIZE / 2 rb->aux_watermark = PAGE_SIZE / 2 and then the next call to perf_aux_output_begin will result in: handle->head = handle->wakeup = PAGE_SIZE for which the semantics are unclear and, for a smaller aux_watermark (e.g. PAGE_SIZE / 4), then the wakeup would in fact be behind head at this point. This patch fixes the problem by rounding down the aux_head (as sampled from the handle) to the nearest aux_watermark boundary when updating rb->aux_wakeup, therefore taking into account any overruns by the hardware. Reported-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1502900297-21839-2-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
252 lines
5.8 KiB
C
252 lines
5.8 KiB
C
#ifndef _KERNEL_EVENTS_INTERNAL_H
|
|
#define _KERNEL_EVENTS_INTERNAL_H
|
|
|
|
#include <linux/hardirq.h>
|
|
#include <linux/uaccess.h>
|
|
|
|
/* Buffer handling */
|
|
|
|
#define RING_BUFFER_WRITABLE 0x01
|
|
|
|
struct ring_buffer {
|
|
atomic_t refcount;
|
|
struct rcu_head rcu_head;
|
|
#ifdef CONFIG_PERF_USE_VMALLOC
|
|
struct work_struct work;
|
|
int page_order; /* allocation order */
|
|
#endif
|
|
int nr_pages; /* nr of data pages */
|
|
int overwrite; /* can overwrite itself */
|
|
int paused; /* can write into ring buffer */
|
|
|
|
atomic_t poll; /* POLL_ for wakeups */
|
|
|
|
local_t head; /* write position */
|
|
local_t nest; /* nested writers */
|
|
local_t events; /* event limit */
|
|
local_t wakeup; /* wakeup stamp */
|
|
local_t lost; /* nr records lost */
|
|
|
|
long watermark; /* wakeup watermark */
|
|
long aux_watermark;
|
|
/* poll crap */
|
|
spinlock_t event_lock;
|
|
struct list_head event_list;
|
|
|
|
atomic_t mmap_count;
|
|
unsigned long mmap_locked;
|
|
struct user_struct *mmap_user;
|
|
|
|
/* AUX area */
|
|
long aux_head;
|
|
local_t aux_nest;
|
|
long aux_wakeup; /* last aux_watermark boundary crossed by aux_head */
|
|
unsigned long aux_pgoff;
|
|
int aux_nr_pages;
|
|
int aux_overwrite;
|
|
atomic_t aux_mmap_count;
|
|
unsigned long aux_mmap_locked;
|
|
void (*free_aux)(void *);
|
|
atomic_t aux_refcount;
|
|
void **aux_pages;
|
|
void *aux_priv;
|
|
|
|
struct perf_event_mmap_page *user_page;
|
|
void *data_pages[0];
|
|
};
|
|
|
|
extern void rb_free(struct ring_buffer *rb);
|
|
|
|
static inline void rb_free_rcu(struct rcu_head *rcu_head)
|
|
{
|
|
struct ring_buffer *rb;
|
|
|
|
rb = container_of(rcu_head, struct ring_buffer, rcu_head);
|
|
rb_free(rb);
|
|
}
|
|
|
|
static inline void rb_toggle_paused(struct ring_buffer *rb, bool pause)
|
|
{
|
|
if (!pause && rb->nr_pages)
|
|
rb->paused = 0;
|
|
else
|
|
rb->paused = 1;
|
|
}
|
|
|
|
extern struct ring_buffer *
|
|
rb_alloc(int nr_pages, long watermark, int cpu, int flags);
|
|
extern void perf_event_wakeup(struct perf_event *event);
|
|
extern int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
|
|
pgoff_t pgoff, int nr_pages, long watermark, int flags);
|
|
extern void rb_free_aux(struct ring_buffer *rb);
|
|
extern struct ring_buffer *ring_buffer_get(struct perf_event *event);
|
|
extern void ring_buffer_put(struct ring_buffer *rb);
|
|
|
|
static inline bool rb_has_aux(struct ring_buffer *rb)
|
|
{
|
|
return !!rb->aux_nr_pages;
|
|
}
|
|
|
|
void perf_event_aux_event(struct perf_event *event, unsigned long head,
|
|
unsigned long size, u64 flags);
|
|
|
|
extern struct page *
|
|
perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff);
|
|
|
|
#ifdef CONFIG_PERF_USE_VMALLOC
|
|
/*
|
|
* Back perf_mmap() with vmalloc memory.
|
|
*
|
|
* Required for architectures that have d-cache aliasing issues.
|
|
*/
|
|
|
|
static inline int page_order(struct ring_buffer *rb)
|
|
{
|
|
return rb->page_order;
|
|
}
|
|
|
|
#else
|
|
|
|
static inline int page_order(struct ring_buffer *rb)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
static inline unsigned long perf_data_size(struct ring_buffer *rb)
|
|
{
|
|
return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
|
|
}
|
|
|
|
static inline unsigned long perf_aux_size(struct ring_buffer *rb)
|
|
{
|
|
return rb->aux_nr_pages << PAGE_SHIFT;
|
|
}
|
|
|
|
#define __DEFINE_OUTPUT_COPY_BODY(advance_buf, memcpy_func, ...) \
|
|
{ \
|
|
unsigned long size, written; \
|
|
\
|
|
do { \
|
|
size = min(handle->size, len); \
|
|
written = memcpy_func(__VA_ARGS__); \
|
|
written = size - written; \
|
|
\
|
|
len -= written; \
|
|
handle->addr += written; \
|
|
if (advance_buf) \
|
|
buf += written; \
|
|
handle->size -= written; \
|
|
if (!handle->size) { \
|
|
struct ring_buffer *rb = handle->rb; \
|
|
\
|
|
handle->page++; \
|
|
handle->page &= rb->nr_pages - 1; \
|
|
handle->addr = rb->data_pages[handle->page]; \
|
|
handle->size = PAGE_SIZE << page_order(rb); \
|
|
} \
|
|
} while (len && written == size); \
|
|
\
|
|
return len; \
|
|
}
|
|
|
|
#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \
|
|
static inline unsigned long \
|
|
func_name(struct perf_output_handle *handle, \
|
|
const void *buf, unsigned long len) \
|
|
__DEFINE_OUTPUT_COPY_BODY(true, memcpy_func, handle->addr, buf, size)
|
|
|
|
static inline unsigned long
|
|
__output_custom(struct perf_output_handle *handle, perf_copy_f copy_func,
|
|
const void *buf, unsigned long len)
|
|
{
|
|
unsigned long orig_len = len;
|
|
__DEFINE_OUTPUT_COPY_BODY(false, copy_func, handle->addr, buf,
|
|
orig_len - len, size)
|
|
}
|
|
|
|
static inline unsigned long
|
|
memcpy_common(void *dst, const void *src, unsigned long n)
|
|
{
|
|
memcpy(dst, src, n);
|
|
return 0;
|
|
}
|
|
|
|
DEFINE_OUTPUT_COPY(__output_copy, memcpy_common)
|
|
|
|
static inline unsigned long
|
|
memcpy_skip(void *dst, const void *src, unsigned long n)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
DEFINE_OUTPUT_COPY(__output_skip, memcpy_skip)
|
|
|
|
#ifndef arch_perf_out_copy_user
|
|
#define arch_perf_out_copy_user arch_perf_out_copy_user
|
|
|
|
static inline unsigned long
|
|
arch_perf_out_copy_user(void *dst, const void *src, unsigned long n)
|
|
{
|
|
unsigned long ret;
|
|
|
|
pagefault_disable();
|
|
ret = __copy_from_user_inatomic(dst, src, n);
|
|
pagefault_enable();
|
|
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
|
|
|
|
/* Callchain handling */
|
|
extern struct perf_callchain_entry *
|
|
perf_callchain(struct perf_event *event, struct pt_regs *regs);
|
|
|
|
static inline int get_recursion_context(int *recursion)
|
|
{
|
|
int rctx;
|
|
|
|
if (in_nmi())
|
|
rctx = 3;
|
|
else if (in_irq())
|
|
rctx = 2;
|
|
else if (in_softirq())
|
|
rctx = 1;
|
|
else
|
|
rctx = 0;
|
|
|
|
if (recursion[rctx])
|
|
return -1;
|
|
|
|
recursion[rctx]++;
|
|
barrier();
|
|
|
|
return rctx;
|
|
}
|
|
|
|
static inline void put_recursion_context(int *recursion, int rctx)
|
|
{
|
|
barrier();
|
|
recursion[rctx]--;
|
|
}
|
|
|
|
#ifdef CONFIG_HAVE_PERF_USER_STACK_DUMP
|
|
static inline bool arch_perf_have_user_stack_dump(void)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
#define perf_user_stack_pointer(regs) user_stack_pointer(regs)
|
|
#else
|
|
static inline bool arch_perf_have_user_stack_dump(void)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
#define perf_user_stack_pointer(regs) 0
|
|
#endif /* CONFIG_HAVE_PERF_USER_STACK_DUMP */
|
|
|
|
#endif /* _KERNEL_EVENTS_INTERNAL_H */
|