mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-08 14:13:53 +00:00
Tracing updates for 6.1:
Major changes: - Changed location of tracing repo from personal git repo to: git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git - Added Masami Hiramatsu as co-maintainer - Updated MAINTAINERS file to separate out FTRACE as it is more than just TRACING. Minor changes: - Added Mark Rutland as FTRACE reviewer - Updated user_events to make it on its way to remove the BROKEN tag. The changes should now be acceptable but will run it through a cycle and hopefully we can remove the BROKEN tag next release. - Added filtering to eprobes - Added a delta time to the benchmark trace event - Have the histogram and filter callbacks called via a switch statement instead of indirect functions. This speeds it up to avoid retpolines. - Add a way to wake up ring buffer waiters waiting for the ring buffer to fill up to its watermark. - New ioctl() on the trace_pipe_raw file to wake up ring buffer waiters. - Wake up waiters when the ring buffer is disabled. A reader may block when the ring buffer is disabled, but if it was blocked when the ring buffer is disabled it should then wake up. Fixes: - Allow splice to read partially read ring buffer pages Fixes splice never moving forward. - Fix inverted compare that made the "shortest" ring buffer wait queue actually the longest. - Fix a race in the ring buffer between resetting a page when a writer goes to another page, and the reader. - Fix ftrace accounting bug when function hooks are added at boot up before the weak functions are set to "disabled". - Fix bug that freed a user allocated snapshot buffer when enabling a tracer. - Fix possible recursive locks in osnoise tracer - Fix recursive locking direct functions - And other minor clean ups and fixes -----BEGIN PGP SIGNATURE----- iIoEABYIADIWIQRRSw7ePDh/lE+zeZMp5XQQmuv6qgUCYz70cxQccm9zdGVkdEBn b29kbWlzLm9yZwAKCRAp5XQQmuv6qpLKAP4+yOje7ZY/b3R4tTx0EIWiKdhqPx6t Nvam2+WR2PN3QQEAqiK2A+oIbh3Zjp1MyhQWuulssWKtSTXhIQkbs7ioYAc= =MsQw -----END PGP SIGNATURE----- Merge tag 'trace-v6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace Pull tracing updates from Steven Rostedt: "Major changes: - Changed location of tracing repo from personal git repo to: git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git - Added Masami Hiramatsu as co-maintainer - Updated MAINTAINERS file to separate out FTRACE as it is more than just TRACING. Minor changes: - Added Mark Rutland as FTRACE reviewer - Updated user_events to make it on its way to remove the BROKEN tag. The changes should now be acceptable but will run it through a cycle and hopefully we can remove the BROKEN tag next release. - Added filtering to eprobes - Added a delta time to the benchmark trace event - Have the histogram and filter callbacks called via a switch statement instead of indirect functions. This speeds it up to avoid retpolines. - Add a way to wake up ring buffer waiters waiting for the ring buffer to fill up to its watermark. - New ioctl() on the trace_pipe_raw file to wake up ring buffer waiters. - Wake up waiters when the ring buffer is disabled. A reader may block when the ring buffer is disabled, but if it was blocked when the ring buffer is disabled it should then wake up. Fixes: - Allow splice to read partially read ring buffer pages. This fixes splice never moving forward. - Fix inverted compare that made the "shortest" ring buffer wait queue actually the longest. - Fix a race in the ring buffer between resetting a page when a writer goes to another page, and the reader. - Fix ftrace accounting bug when function hooks are added at boot up before the weak functions are set to "disabled". - Fix bug that freed a user allocated snapshot buffer when enabling a tracer. - Fix possible recursive locks in osnoise tracer - Fix recursive locking direct functions - Other minor clean ups and fixes" * tag 'trace-v6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: (44 commits) ftrace: Create separate entry in MAINTAINERS for function hooks tracing: Update MAINTAINERS to reflect new tracing git repo tracing: Do not free snapshot if tracer is on cmdline ftrace: Still disable enabled records marked as disabled tracing/user_events: Move pages/locks into groups to prepare for namespaces tracing: Add Masami Hiramatsu as co-maintainer tracing: Remove unused variable 'dups' MAINTAINERS: add myself as a tracing reviewer ring-buffer: Fix race between reset page and reading page tracing/user_events: Update ABI documentation to align to bits vs bytes tracing/user_events: Use bits vs bytes for enabled status page data tracing/user_events: Use refcount instead of atomic for ref tracking tracing/user_events: Ensure user provided strings are safely formatted tracing/user_events: Use WRITE instead of READ for io vector import tracing/user_events: Use NULL for strstr checks tracing: Fix spelling mistake "preapre" -> "prepare" tracing: Wake up waiters when tracing is disabled tracing: Add ioctl() to force ring buffer waiters to wake up tracing: Wake up ring buffer waiters on closing of the file ring-buffer: Add ring_buffer_wake_waiters() ...
This commit is contained in:
commit
cdf072acb5
@ -20,14 +20,14 @@ dynamic_events is the same as the ioctl with the u: prefix applied.
|
||||
|
||||
Typically programs will register a set of events that they wish to expose to
|
||||
tools that can read trace_events (such as ftrace and perf). The registration
|
||||
process gives back two ints to the program for each event. The first int is the
|
||||
status index. This index describes which byte in the
|
||||
process gives back two ints to the program for each event. The first int is
|
||||
the status bit. This describes which bit in little-endian format in the
|
||||
/sys/kernel/debug/tracing/user_events_status file represents this event. The
|
||||
second int is the write index. This index describes the data when a write() or
|
||||
second int is the write index which describes the data when a write() or
|
||||
writev() is called on the /sys/kernel/debug/tracing/user_events_data file.
|
||||
|
||||
The structures referenced in this document are contained with the
|
||||
/include/uap/linux/user_events.h file in the source tree.
|
||||
The structures referenced in this document are contained within the
|
||||
/include/uapi/linux/user_events.h file in the source tree.
|
||||
|
||||
**NOTE:** *Both user_events_status and user_events_data are under the tracefs
|
||||
filesystem and may be mounted at different paths than above.*
|
||||
@ -38,18 +38,18 @@ Registering within a user process is done via ioctl() out to the
|
||||
/sys/kernel/debug/tracing/user_events_data file. The command to issue is
|
||||
DIAG_IOCSREG.
|
||||
|
||||
This command takes a struct user_reg as an argument::
|
||||
This command takes a packed struct user_reg as an argument::
|
||||
|
||||
struct user_reg {
|
||||
u32 size;
|
||||
u64 name_args;
|
||||
u32 status_index;
|
||||
u32 status_bit;
|
||||
u32 write_index;
|
||||
};
|
||||
|
||||
The struct user_reg requires two inputs, the first is the size of the structure
|
||||
to ensure forward and backward compatibility. The second is the command string
|
||||
to issue for registering. Upon success two outputs are set, the status index
|
||||
to issue for registering. Upon success two outputs are set, the status bit
|
||||
and the write index.
|
||||
|
||||
User based events show up under tracefs like any other event under the
|
||||
@ -111,15 +111,56 @@ in realtime. This allows user programs to only incur the cost of the write() or
|
||||
writev() calls when something is actively attached to the event.
|
||||
|
||||
User programs call mmap() on /sys/kernel/debug/tracing/user_events_status to
|
||||
check the status for each event that is registered. The byte to check in the
|
||||
file is given back after the register ioctl() via user_reg.status_index.
|
||||
check the status for each event that is registered. The bit to check in the
|
||||
file is given back after the register ioctl() via user_reg.status_bit. The bit
|
||||
is always in little-endian format. Programs can check if the bit is set either
|
||||
using a byte-wise index with a mask or a long-wise index with a little-endian
|
||||
mask.
|
||||
|
||||
Currently the size of user_events_status is a single page, however, custom
|
||||
kernel configurations can change this size to allow more user based events. In
|
||||
all cases the size of the file is a multiple of a page size.
|
||||
|
||||
For example, if the register ioctl() gives back a status_index of 3 you would
|
||||
check byte 3 of the returned mmap data to see if anything is attached to that
|
||||
event.
|
||||
For example, if the register ioctl() gives back a status_bit of 3 you would
|
||||
check byte 0 (3 / 8) of the returned mmap data and then AND the result with 8
|
||||
(1 << (3 % 8)) to see if anything is attached to that event.
|
||||
|
||||
A byte-wise index check is performed as follows::
|
||||
|
||||
int index, mask;
|
||||
char *status_page;
|
||||
|
||||
index = status_bit / 8;
|
||||
mask = 1 << (status_bit % 8);
|
||||
|
||||
...
|
||||
|
||||
if (status_page[index] & mask) {
|
||||
/* Enabled */
|
||||
}
|
||||
|
||||
A long-wise index check is performed as follows::
|
||||
|
||||
#include <asm/bitsperlong.h>
|
||||
#include <endian.h>
|
||||
|
||||
#if __BITS_PER_LONG == 64
|
||||
#define endian_swap(x) htole64(x)
|
||||
#else
|
||||
#define endian_swap(x) htole32(x)
|
||||
#endif
|
||||
|
||||
long index, mask, *status_page;
|
||||
|
||||
index = status_bit / __BITS_PER_LONG;
|
||||
mask = 1L << (status_bit % __BITS_PER_LONG);
|
||||
mask = endian_swap(mask);
|
||||
|
||||
...
|
||||
|
||||
if (status_page[index] & mask) {
|
||||
/* Enabled */
|
||||
}
|
||||
|
||||
Administrators can easily check the status of all registered events by reading
|
||||
the user_events_status file directly via a terminal. The output is as follows::
|
||||
@ -137,7 +178,7 @@ For example, on a system that has a single event the output looks like this::
|
||||
|
||||
Active: 1
|
||||
Busy: 0
|
||||
Max: 4096
|
||||
Max: 32768
|
||||
|
||||
If a user enables the user event via ftrace, the output would change to this::
|
||||
|
||||
@ -145,21 +186,10 @@ If a user enables the user event via ftrace, the output would change to this::
|
||||
|
||||
Active: 1
|
||||
Busy: 1
|
||||
Max: 4096
|
||||
Max: 32768
|
||||
|
||||
**NOTE:** *A status index of 0 will never be returned. This allows user
|
||||
programs to have an index that can be used on error cases.*
|
||||
|
||||
Status Bits
|
||||
^^^^^^^^^^^
|
||||
The byte being checked will be non-zero if anything is attached. Programs can
|
||||
check specific bits in the byte to see what mechanism has been attached.
|
||||
|
||||
The following values are defined to aid in checking what has been attached:
|
||||
|
||||
**EVENT_STATUS_FTRACE** - Bit set if ftrace has been attached (Bit 0).
|
||||
|
||||
**EVENT_STATUS_PERF** - Bit set if perf has been attached (Bit 1).
|
||||
**NOTE:** *A status bit of 0 will never be returned. This allows user programs
|
||||
to have a bit that can be used on error cases.*
|
||||
|
||||
Writing Data
|
||||
------------
|
||||
|
26
MAINTAINERS
26
MAINTAINERS
@ -8433,6 +8433,19 @@ L: platform-driver-x86@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/platform/x86/fujitsu-tablet.c
|
||||
|
||||
FUNCTION HOOKS (FTRACE)
|
||||
M: Steven Rostedt <rostedt@goodmis.org>
|
||||
M: Masami Hiramatsu <mhiramat@kernel.org>
|
||||
R: Mark Rutland <mark.rutland@arm.com>
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
|
||||
F: Documentation/trace/ftrace*
|
||||
F: kernel/trace/ftrace*
|
||||
F: kernel/trace/fgraph.c
|
||||
F: arch/*/*/*/*ftrace*
|
||||
F: arch/*/*/*ftrace*
|
||||
F: include/*/ftrace.h
|
||||
|
||||
FUNGIBLE ETHERNET DRIVERS
|
||||
M: Dimitris Michailidis <dmichail@fungible.com>
|
||||
L: netdev@vger.kernel.org
|
||||
@ -11422,7 +11435,7 @@ M: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
|
||||
M: "David S. Miller" <davem@davemloft.net>
|
||||
M: Masami Hiramatsu <mhiramat@kernel.org>
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
|
||||
F: Documentation/trace/kprobes.rst
|
||||
F: include/asm-generic/kprobes.h
|
||||
F: include/linux/kprobes.h
|
||||
@ -20771,14 +20784,11 @@ F: drivers/hwmon/pmbus/tps546d24.c
|
||||
|
||||
TRACING
|
||||
M: Steven Rostedt <rostedt@goodmis.org>
|
||||
M: Ingo Molnar <mingo@redhat.com>
|
||||
M: Masami Hiramatsu <mhiramat@kernel.org>
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git
|
||||
F: Documentation/trace/ftrace.rst
|
||||
F: arch/*/*/*/*ftrace*
|
||||
F: arch/*/*/*ftrace*
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
|
||||
F: Documentation/trace/*
|
||||
F: fs/tracefs/
|
||||
F: include/*/ftrace.h
|
||||
F: include/linux/trace*.h
|
||||
F: include/trace/
|
||||
F: kernel/trace/
|
||||
@ -20787,7 +20797,7 @@ F: tools/testing/selftests/ftrace/
|
||||
|
||||
TRACING MMIO ACCESSES (MMIOTRACE)
|
||||
M: Steven Rostedt <rostedt@goodmis.org>
|
||||
M: Ingo Molnar <mingo@kernel.org>
|
||||
M: Masami Hiramatsu <mhiramat@kernel.org>
|
||||
R: Karol Herbst <karolherbst@gmail.com>
|
||||
R: Pekka Paalanen <ppaalanen@gmail.com>
|
||||
L: linux-kernel@vger.kernel.org
|
||||
|
@ -23,7 +23,6 @@
|
||||
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
extern atomic_t modifying_ftrace_code;
|
||||
extern void __fentry__(void);
|
||||
|
||||
static inline unsigned long ftrace_call_adjust(unsigned long addr)
|
||||
|
@ -50,8 +50,6 @@ extern const int kretprobe_blacklist_size;
|
||||
|
||||
void arch_remove_kprobe(struct kprobe *p);
|
||||
|
||||
extern void arch_kprobe_override_function(struct pt_regs *regs);
|
||||
|
||||
/* Architecture specific copy of original instruction*/
|
||||
struct arch_specific_insn {
|
||||
/* copy of the original instruction */
|
||||
|
@ -59,8 +59,6 @@
|
||||
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
|
||||
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
|
||||
|
||||
#define stack_addr(regs) ((unsigned long *)regs->sp)
|
||||
|
||||
#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
|
||||
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
|
||||
(b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
|
||||
|
@ -1122,47 +1122,6 @@ static inline void unpause_graph_tracing(void) { }
|
||||
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|
||||
|
||||
#ifdef CONFIG_TRACING
|
||||
|
||||
/* flags for current->trace */
|
||||
enum {
|
||||
TSK_TRACE_FL_TRACE_BIT = 0,
|
||||
TSK_TRACE_FL_GRAPH_BIT = 1,
|
||||
};
|
||||
enum {
|
||||
TSK_TRACE_FL_TRACE = 1 << TSK_TRACE_FL_TRACE_BIT,
|
||||
TSK_TRACE_FL_GRAPH = 1 << TSK_TRACE_FL_GRAPH_BIT,
|
||||
};
|
||||
|
||||
static inline void set_tsk_trace_trace(struct task_struct *tsk)
|
||||
{
|
||||
set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
|
||||
}
|
||||
|
||||
static inline void clear_tsk_trace_trace(struct task_struct *tsk)
|
||||
{
|
||||
clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
|
||||
}
|
||||
|
||||
static inline int test_tsk_trace_trace(struct task_struct *tsk)
|
||||
{
|
||||
return tsk->trace & TSK_TRACE_FL_TRACE;
|
||||
}
|
||||
|
||||
static inline void set_tsk_trace_graph(struct task_struct *tsk)
|
||||
{
|
||||
set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
|
||||
}
|
||||
|
||||
static inline void clear_tsk_trace_graph(struct task_struct *tsk)
|
||||
{
|
||||
clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
|
||||
}
|
||||
|
||||
static inline int test_tsk_trace_graph(struct task_struct *tsk)
|
||||
{
|
||||
return tsk->trace & TSK_TRACE_FL_GRAPH;
|
||||
}
|
||||
|
||||
enum ftrace_dump_mode;
|
||||
|
||||
extern enum ftrace_dump_mode ftrace_dump_on_oops;
|
||||
|
@ -101,7 +101,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
|
||||
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full);
|
||||
__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
|
||||
struct file *filp, poll_table *poll_table);
|
||||
|
||||
void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu);
|
||||
|
||||
#define RING_BUFFER_ALL_CPUS -1
|
||||
|
||||
|
@ -1390,9 +1390,6 @@ struct task_struct {
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TRACING
|
||||
/* State flags for use by tracers: */
|
||||
unsigned long trace;
|
||||
|
||||
/* Bitmask and counter of trace recursion: */
|
||||
unsigned long trace_recursion;
|
||||
#endif /* CONFIG_TRACING */
|
||||
|
@ -92,6 +92,7 @@ struct trace_iterator {
|
||||
unsigned int temp_size;
|
||||
char *fmt; /* modified format holder */
|
||||
unsigned int fmt_size;
|
||||
long wait_index;
|
||||
|
||||
/* trace_seq for __print_flags() and __print_symbolic() etc. */
|
||||
struct trace_seq tmp_seq;
|
||||
|
@ -20,15 +20,6 @@
|
||||
#define USER_EVENTS_SYSTEM "user_events"
|
||||
#define USER_EVENTS_PREFIX "u:"
|
||||
|
||||
/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */
|
||||
#define EVENT_BIT_FTRACE 0
|
||||
#define EVENT_BIT_PERF 1
|
||||
#define EVENT_BIT_OTHER 7
|
||||
|
||||
#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE)
|
||||
#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF)
|
||||
#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER)
|
||||
|
||||
/* Create dynamic location entry within a 32-bit value */
|
||||
#define DYN_LOC(offset, size) ((size) << 16 | (offset))
|
||||
|
||||
@ -45,12 +36,12 @@ struct user_reg {
|
||||
/* Input: Pointer to string with event name, description and flags */
|
||||
__u64 name_args;
|
||||
|
||||
/* Output: Byte index of the event within the status page */
|
||||
__u32 status_index;
|
||||
/* Output: Bitwise index of the event within the status page */
|
||||
__u32 status_bit;
|
||||
|
||||
/* Output: Index of the event to use when writing data */
|
||||
__u32 write_index;
|
||||
};
|
||||
} __attribute__((__packed__));
|
||||
|
||||
#define DIAG_IOC_MAGIC '*'
|
||||
|
||||
|
@ -1644,6 +1644,18 @@ ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops *op_ex
|
||||
static struct ftrace_ops *
|
||||
ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
|
||||
|
||||
static bool skip_record(struct dyn_ftrace *rec)
|
||||
{
|
||||
/*
|
||||
* At boot up, weak functions are set to disable. Function tracing
|
||||
* can be enabled before they are, and they still need to be disabled now.
|
||||
* If the record is disabled, still continue if it is marked as already
|
||||
* enabled (this is needed to keep the accounting working).
|
||||
*/
|
||||
return rec->flags & FTRACE_FL_DISABLED &&
|
||||
!(rec->flags & FTRACE_FL_ENABLED);
|
||||
}
|
||||
|
||||
static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
|
||||
int filter_hash,
|
||||
bool inc)
|
||||
@ -1693,7 +1705,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
|
||||
int in_hash = 0;
|
||||
int match = 0;
|
||||
|
||||
if (rec->flags & FTRACE_FL_DISABLED)
|
||||
if (skip_record(rec))
|
||||
continue;
|
||||
|
||||
if (all) {
|
||||
@ -2126,7 +2138,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
|
||||
|
||||
ftrace_bug_type = FTRACE_BUG_UNKNOWN;
|
||||
|
||||
if (rec->flags & FTRACE_FL_DISABLED)
|
||||
if (skip_record(rec))
|
||||
return FTRACE_UPDATE_IGNORE;
|
||||
|
||||
/*
|
||||
@ -2241,7 +2253,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
|
||||
if (update) {
|
||||
/* If there's no more users, clear all flags */
|
||||
if (!ftrace_rec_count(rec))
|
||||
rec->flags = 0;
|
||||
rec->flags &= FTRACE_FL_DISABLED;
|
||||
else
|
||||
/*
|
||||
* Just disable the record, but keep the ops TRAMP
|
||||
@ -2634,7 +2646,7 @@ void __weak ftrace_replace_code(int mod_flags)
|
||||
|
||||
do_for_each_ftrace_rec(pg, rec) {
|
||||
|
||||
if (rec->flags & FTRACE_FL_DISABLED)
|
||||
if (skip_record(rec))
|
||||
continue;
|
||||
|
||||
failed = __ftrace_replace_code(rec, enable);
|
||||
@ -5427,6 +5439,8 @@ static struct ftrace_ops stub_ops = {
|
||||
* it is safe to modify the ftrace record, where it should be
|
||||
* currently calling @old_addr directly, to call @new_addr.
|
||||
*
|
||||
* This is called with direct_mutex locked.
|
||||
*
|
||||
* Safety checks should be made to make sure that the code at
|
||||
* @rec->ip is currently calling @old_addr. And this must
|
||||
* also update entry->direct to @new_addr.
|
||||
@ -5439,6 +5453,8 @@ int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
|
||||
unsigned long ip = rec->ip;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&direct_mutex);
|
||||
|
||||
/*
|
||||
* The ftrace_lock was used to determine if the record
|
||||
* had more than one registered user to it. If it did,
|
||||
@ -5461,7 +5477,7 @@ int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
|
||||
if (ret)
|
||||
goto out_lock;
|
||||
|
||||
ret = register_ftrace_function(&stub_ops);
|
||||
ret = register_ftrace_function_nolock(&stub_ops);
|
||||
if (ret) {
|
||||
ftrace_set_filter_ip(&stub_ops, ip, 1, 0);
|
||||
goto out_lock;
|
||||
@ -6081,8 +6097,12 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
|
||||
|
||||
if (filter_hash) {
|
||||
orig_hash = &iter->ops->func_hash->filter_hash;
|
||||
if (iter->tr && !list_empty(&iter->tr->mod_trace))
|
||||
iter->hash->flags |= FTRACE_HASH_FL_MOD;
|
||||
if (iter->tr) {
|
||||
if (list_empty(&iter->tr->mod_trace))
|
||||
iter->hash->flags &= ~FTRACE_HASH_FL_MOD;
|
||||
else
|
||||
iter->hash->flags |= FTRACE_HASH_FL_MOD;
|
||||
}
|
||||
} else
|
||||
orig_hash = &iter->ops->func_hash->notrace_hash;
|
||||
|
||||
|
@ -35,6 +35,45 @@
|
||||
static struct trace_event_file *gen_kprobe_test;
|
||||
static struct trace_event_file *gen_kretprobe_test;
|
||||
|
||||
#define KPROBE_GEN_TEST_FUNC "do_sys_open"
|
||||
|
||||
/* X86 */
|
||||
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_32)
|
||||
#define KPROBE_GEN_TEST_ARG0 "dfd=%ax"
|
||||
#define KPROBE_GEN_TEST_ARG1 "filename=%dx"
|
||||
#define KPROBE_GEN_TEST_ARG2 "flags=%cx"
|
||||
#define KPROBE_GEN_TEST_ARG3 "mode=+4($stack)"
|
||||
|
||||
/* ARM64 */
|
||||
#elif defined(CONFIG_ARM64)
|
||||
#define KPROBE_GEN_TEST_ARG0 "dfd=%x0"
|
||||
#define KPROBE_GEN_TEST_ARG1 "filename=%x1"
|
||||
#define KPROBE_GEN_TEST_ARG2 "flags=%x2"
|
||||
#define KPROBE_GEN_TEST_ARG3 "mode=%x3"
|
||||
|
||||
/* ARM */
|
||||
#elif defined(CONFIG_ARM)
|
||||
#define KPROBE_GEN_TEST_ARG0 "dfd=%r0"
|
||||
#define KPROBE_GEN_TEST_ARG1 "filename=%r1"
|
||||
#define KPROBE_GEN_TEST_ARG2 "flags=%r2"
|
||||
#define KPROBE_GEN_TEST_ARG3 "mode=%r3"
|
||||
|
||||
/* RISCV */
|
||||
#elif defined(CONFIG_RISCV)
|
||||
#define KPROBE_GEN_TEST_ARG0 "dfd=%a0"
|
||||
#define KPROBE_GEN_TEST_ARG1 "filename=%a1"
|
||||
#define KPROBE_GEN_TEST_ARG2 "flags=%a2"
|
||||
#define KPROBE_GEN_TEST_ARG3 "mode=%a3"
|
||||
|
||||
/* others */
|
||||
#else
|
||||
#define KPROBE_GEN_TEST_ARG0 NULL
|
||||
#define KPROBE_GEN_TEST_ARG1 NULL
|
||||
#define KPROBE_GEN_TEST_ARG2 NULL
|
||||
#define KPROBE_GEN_TEST_ARG3 NULL
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Test to make sure we can create a kprobe event, then add more
|
||||
* fields.
|
||||
@ -58,14 +97,14 @@ static int __init test_gen_kprobe_cmd(void)
|
||||
* fields.
|
||||
*/
|
||||
ret = kprobe_event_gen_cmd_start(&cmd, "gen_kprobe_test",
|
||||
"do_sys_open",
|
||||
"dfd=%ax", "filename=%dx");
|
||||
KPROBE_GEN_TEST_FUNC,
|
||||
KPROBE_GEN_TEST_ARG0, KPROBE_GEN_TEST_ARG1);
|
||||
if (ret)
|
||||
goto free;
|
||||
|
||||
/* Use kprobe_event_add_fields to add the rest of the fields */
|
||||
|
||||
ret = kprobe_event_add_fields(&cmd, "flags=%cx", "mode=+4($stack)");
|
||||
ret = kprobe_event_add_fields(&cmd, KPROBE_GEN_TEST_ARG2, KPROBE_GEN_TEST_ARG3);
|
||||
if (ret)
|
||||
goto free;
|
||||
|
||||
@ -128,7 +167,7 @@ static int __init test_gen_kretprobe_cmd(void)
|
||||
* Define the kretprobe event.
|
||||
*/
|
||||
ret = kretprobe_event_gen_cmd_start(&cmd, "gen_kretprobe_test",
|
||||
"do_sys_open",
|
||||
KPROBE_GEN_TEST_FUNC,
|
||||
"$retval");
|
||||
if (ret)
|
||||
goto free;
|
||||
@ -206,7 +245,7 @@ static void __exit kprobe_event_gen_test_exit(void)
|
||||
WARN_ON(kprobe_event_delete("gen_kprobe_test"));
|
||||
|
||||
/* Disable the event or you can't remove it */
|
||||
WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr,
|
||||
WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr,
|
||||
"kprobes",
|
||||
"gen_kretprobe_test", false));
|
||||
|
||||
|
@ -413,6 +413,7 @@ struct rb_irq_work {
|
||||
struct irq_work work;
|
||||
wait_queue_head_t waiters;
|
||||
wait_queue_head_t full_waiters;
|
||||
long wait_index;
|
||||
bool waiters_pending;
|
||||
bool full_waiters_pending;
|
||||
bool wakeup_full;
|
||||
@ -917,12 +918,44 @@ static void rb_wake_up_waiters(struct irq_work *work)
|
||||
struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
|
||||
|
||||
wake_up_all(&rbwork->waiters);
|
||||
if (rbwork->wakeup_full) {
|
||||
if (rbwork->full_waiters_pending || rbwork->wakeup_full) {
|
||||
rbwork->wakeup_full = false;
|
||||
rbwork->full_waiters_pending = false;
|
||||
wake_up_all(&rbwork->full_waiters);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ring_buffer_wake_waiters - wake up any waiters on this ring buffer
|
||||
* @buffer: The ring buffer to wake waiters on
|
||||
*
|
||||
* In the case of a file that represents a ring buffer is closing,
|
||||
* it is prudent to wake up any waiters that are on this.
|
||||
*/
|
||||
void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
|
||||
{
|
||||
struct ring_buffer_per_cpu *cpu_buffer;
|
||||
struct rb_irq_work *rbwork;
|
||||
|
||||
if (cpu == RING_BUFFER_ALL_CPUS) {
|
||||
|
||||
/* Wake up individual ones too. One level recursion */
|
||||
for_each_buffer_cpu(buffer, cpu)
|
||||
ring_buffer_wake_waiters(buffer, cpu);
|
||||
|
||||
rbwork = &buffer->irq_work;
|
||||
} else {
|
||||
cpu_buffer = buffer->buffers[cpu];
|
||||
rbwork = &cpu_buffer->irq_work;
|
||||
}
|
||||
|
||||
rbwork->wait_index++;
|
||||
/* make sure the waiters see the new index */
|
||||
smp_wmb();
|
||||
|
||||
rb_wake_up_waiters(&rbwork->work);
|
||||
}
|
||||
|
||||
/**
|
||||
* ring_buffer_wait - wait for input to the ring buffer
|
||||
* @buffer: buffer to wait on
|
||||
@ -938,6 +971,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
|
||||
struct ring_buffer_per_cpu *cpu_buffer;
|
||||
DEFINE_WAIT(wait);
|
||||
struct rb_irq_work *work;
|
||||
long wait_index;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
@ -956,6 +990,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
|
||||
work = &cpu_buffer->irq_work;
|
||||
}
|
||||
|
||||
wait_index = READ_ONCE(work->wait_index);
|
||||
|
||||
while (true) {
|
||||
if (full)
|
||||
@ -1011,7 +1046,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
|
||||
nr_pages = cpu_buffer->nr_pages;
|
||||
dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
|
||||
if (!cpu_buffer->shortest_full ||
|
||||
cpu_buffer->shortest_full < full)
|
||||
cpu_buffer->shortest_full > full)
|
||||
cpu_buffer->shortest_full = full;
|
||||
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
|
||||
if (!pagebusy &&
|
||||
@ -1020,6 +1055,11 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
|
||||
}
|
||||
|
||||
schedule();
|
||||
|
||||
/* Make sure to see the new wait index */
|
||||
smp_rmb();
|
||||
if (wait_index != work->wait_index)
|
||||
break;
|
||||
}
|
||||
|
||||
if (full)
|
||||
@ -2608,6 +2648,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
/* Mark the rest of the page with padding */
|
||||
rb_event_set_padding(event);
|
||||
|
||||
/* Make sure the padding is visible before the write update */
|
||||
smp_wmb();
|
||||
|
||||
/* Set the write back to the previous setting */
|
||||
local_sub(length, &tail_page->write);
|
||||
return;
|
||||
@ -2619,6 +2662,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
/* time delta must be non zero */
|
||||
event->time_delta = 1;
|
||||
|
||||
/* Make sure the padding is visible before the tail_page->write update */
|
||||
smp_wmb();
|
||||
|
||||
/* Set write to end of buffer */
|
||||
length = (tail + length) - BUF_PAGE_SIZE;
|
||||
local_sub(length, &tail_page->write);
|
||||
@ -4587,6 +4633,33 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
|
||||
arch_spin_unlock(&cpu_buffer->lock);
|
||||
local_irq_restore(flags);
|
||||
|
||||
/*
|
||||
* The writer has preempt disable, wait for it. But not forever
|
||||
* Although, 1 second is pretty much "forever"
|
||||
*/
|
||||
#define USECS_WAIT 1000000
|
||||
for (nr_loops = 0; nr_loops < USECS_WAIT; nr_loops++) {
|
||||
/* If the write is past the end of page, a writer is still updating it */
|
||||
if (likely(!reader || rb_page_write(reader) <= BUF_PAGE_SIZE))
|
||||
break;
|
||||
|
||||
udelay(1);
|
||||
|
||||
/* Get the latest version of the reader write value */
|
||||
smp_rmb();
|
||||
}
|
||||
|
||||
/* The writer is not moving forward? Something is wrong */
|
||||
if (RB_WARN_ON(cpu_buffer, nr_loops == USECS_WAIT))
|
||||
reader = NULL;
|
||||
|
||||
/*
|
||||
* Make sure we see any padding after the write update
|
||||
* (see rb_reset_tail())
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
|
||||
return reader;
|
||||
}
|
||||
|
||||
@ -5616,7 +5689,15 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
|
||||
unsigned int pos = 0;
|
||||
unsigned int size;
|
||||
|
||||
if (full)
|
||||
/*
|
||||
* If a full page is expected, this can still be returned
|
||||
* if there's been a previous partial read and the
|
||||
* rest of the page can be read and the commit page is off
|
||||
* the reader page.
|
||||
*/
|
||||
if (full &&
|
||||
(!read || (len < (commit - read)) ||
|
||||
cpu_buffer->reader_page == cpu_buffer->commit_page))
|
||||
goto out_unlock;
|
||||
|
||||
if (len > (commit - read))
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
#include "wip.h"
|
||||
|
||||
struct rv_monitor rv_wip;
|
||||
static struct rv_monitor rv_wip;
|
||||
DECLARE_DA_MON_PER_CPU(wip, unsigned char);
|
||||
|
||||
static void handle_preempt_disable(void *data, unsigned long ip, unsigned long parent_ip)
|
||||
@ -60,7 +60,7 @@ static void disable_wip(void)
|
||||
da_monitor_destroy_wip();
|
||||
}
|
||||
|
||||
struct rv_monitor rv_wip = {
|
||||
static struct rv_monitor rv_wip = {
|
||||
.name = "wip",
|
||||
.description = "wakeup in preemptive per-cpu testing monitor.",
|
||||
.enable = enable_wip,
|
||||
@ -69,13 +69,13 @@ struct rv_monitor rv_wip = {
|
||||
.enabled = 0,
|
||||
};
|
||||
|
||||
static int register_wip(void)
|
||||
static int __init register_wip(void)
|
||||
{
|
||||
rv_register_monitor(&rv_wip);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void unregister_wip(void)
|
||||
static void __exit unregister_wip(void)
|
||||
{
|
||||
rv_unregister_monitor(&rv_wip);
|
||||
}
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
#include "wwnr.h"
|
||||
|
||||
struct rv_monitor rv_wwnr;
|
||||
static struct rv_monitor rv_wwnr;
|
||||
DECLARE_DA_MON_PER_TASK(wwnr, unsigned char);
|
||||
|
||||
static void handle_switch(void *data, bool preempt, struct task_struct *p,
|
||||
@ -59,7 +59,7 @@ static void disable_wwnr(void)
|
||||
da_monitor_destroy_wwnr();
|
||||
}
|
||||
|
||||
struct rv_monitor rv_wwnr = {
|
||||
static struct rv_monitor rv_wwnr = {
|
||||
.name = "wwnr",
|
||||
.description = "wakeup while not running per-task testing model.",
|
||||
.enable = enable_wwnr,
|
||||
@ -68,13 +68,13 @@ struct rv_monitor rv_wwnr = {
|
||||
.enabled = 0,
|
||||
};
|
||||
|
||||
static int register_wwnr(void)
|
||||
static int __init register_wwnr(void)
|
||||
{
|
||||
rv_register_monitor(&rv_wwnr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void unregister_wwnr(void)
|
||||
static void __exit unregister_wwnr(void)
|
||||
{
|
||||
rv_unregister_monitor(&rv_wwnr);
|
||||
}
|
||||
|
@ -1193,12 +1193,14 @@ void *tracing_cond_snapshot_data(struct trace_array *tr)
|
||||
{
|
||||
void *cond_data = NULL;
|
||||
|
||||
local_irq_disable();
|
||||
arch_spin_lock(&tr->max_lock);
|
||||
|
||||
if (tr->cond_snapshot)
|
||||
cond_data = tr->cond_snapshot->cond_data;
|
||||
|
||||
arch_spin_unlock(&tr->max_lock);
|
||||
local_irq_enable();
|
||||
|
||||
return cond_data;
|
||||
}
|
||||
@ -1334,9 +1336,11 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
|
||||
goto fail_unlock;
|
||||
}
|
||||
|
||||
local_irq_disable();
|
||||
arch_spin_lock(&tr->max_lock);
|
||||
tr->cond_snapshot = cond_snapshot;
|
||||
arch_spin_unlock(&tr->max_lock);
|
||||
local_irq_enable();
|
||||
|
||||
mutex_unlock(&trace_types_lock);
|
||||
|
||||
@ -1363,6 +1367,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
local_irq_disable();
|
||||
arch_spin_lock(&tr->max_lock);
|
||||
|
||||
if (!tr->cond_snapshot)
|
||||
@ -1373,6 +1378,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr)
|
||||
}
|
||||
|
||||
arch_spin_unlock(&tr->max_lock);
|
||||
local_irq_enable();
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -2200,6 +2206,11 @@ static size_t tgid_map_max;
|
||||
|
||||
#define SAVED_CMDLINES_DEFAULT 128
|
||||
#define NO_CMDLINE_MAP UINT_MAX
|
||||
/*
|
||||
* Preemption must be disabled before acquiring trace_cmdline_lock.
|
||||
* The various trace_arrays' max_lock must be acquired in a context
|
||||
* where interrupt is disabled.
|
||||
*/
|
||||
static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
|
||||
struct saved_cmdlines_buffer {
|
||||
unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
|
||||
@ -2412,7 +2423,11 @@ static int trace_save_cmdline(struct task_struct *tsk)
|
||||
* the lock, but we also don't want to spin
|
||||
* nor do we want to disable interrupts,
|
||||
* so if we miss here, then better luck next time.
|
||||
*
|
||||
* This is called within the scheduler and wake up, so interrupts
|
||||
* had better been disabled and run queue lock been held.
|
||||
*/
|
||||
lockdep_assert_preemption_disabled();
|
||||
if (!arch_spin_trylock(&trace_cmdline_lock))
|
||||
return 0;
|
||||
|
||||
@ -5890,9 +5905,11 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
|
||||
char buf[64];
|
||||
int r;
|
||||
|
||||
preempt_disable();
|
||||
arch_spin_lock(&trace_cmdline_lock);
|
||||
r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
|
||||
arch_spin_unlock(&trace_cmdline_lock);
|
||||
preempt_enable();
|
||||
|
||||
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
|
||||
}
|
||||
@ -5917,10 +5934,12 @@ static int tracing_resize_saved_cmdlines(unsigned int val)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
preempt_disable();
|
||||
arch_spin_lock(&trace_cmdline_lock);
|
||||
savedcmd_temp = savedcmd;
|
||||
savedcmd = s;
|
||||
arch_spin_unlock(&trace_cmdline_lock);
|
||||
preempt_enable();
|
||||
free_saved_cmdlines_buffer(savedcmd_temp);
|
||||
|
||||
return 0;
|
||||
@ -6373,10 +6392,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
|
||||
|
||||
#ifdef CONFIG_TRACER_SNAPSHOT
|
||||
if (t->use_max_tr) {
|
||||
local_irq_disable();
|
||||
arch_spin_lock(&tr->max_lock);
|
||||
if (tr->cond_snapshot)
|
||||
ret = -EBUSY;
|
||||
arch_spin_unlock(&tr->max_lock);
|
||||
local_irq_enable();
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
@ -6407,12 +6428,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
|
||||
if (tr->current_trace->reset)
|
||||
tr->current_trace->reset(tr);
|
||||
|
||||
#ifdef CONFIG_TRACER_MAX_TRACE
|
||||
had_max_tr = tr->current_trace->use_max_tr;
|
||||
|
||||
/* Current trace needs to be nop_trace before synchronize_rcu */
|
||||
tr->current_trace = &nop_trace;
|
||||
|
||||
#ifdef CONFIG_TRACER_MAX_TRACE
|
||||
had_max_tr = tr->allocated_snapshot;
|
||||
|
||||
if (had_max_tr && !t->use_max_tr) {
|
||||
/*
|
||||
* We need to make sure that the update_max_tr sees that
|
||||
@ -6425,11 +6446,13 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
|
||||
free_snapshot(tr);
|
||||
}
|
||||
|
||||
if (t->use_max_tr && !had_max_tr) {
|
||||
if (t->use_max_tr && !tr->allocated_snapshot) {
|
||||
ret = tracing_alloc_snapshot_instance(tr);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
#else
|
||||
tr->current_trace = &nop_trace;
|
||||
#endif
|
||||
|
||||
if (t->init) {
|
||||
@ -7436,10 +7459,12 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
|
||||
goto out;
|
||||
}
|
||||
|
||||
local_irq_disable();
|
||||
arch_spin_lock(&tr->max_lock);
|
||||
if (tr->cond_snapshot)
|
||||
ret = -EBUSY;
|
||||
arch_spin_unlock(&tr->max_lock);
|
||||
local_irq_enable();
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -8137,6 +8162,12 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
|
||||
|
||||
__trace_array_put(iter->tr);
|
||||
|
||||
iter->wait_index++;
|
||||
/* Make sure the waiters see the new wait_index */
|
||||
smp_wmb();
|
||||
|
||||
ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
|
||||
|
||||
if (info->spare)
|
||||
ring_buffer_free_read_page(iter->array_buffer->buffer,
|
||||
info->spare_cpu, info->spare);
|
||||
@ -8290,6 +8321,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
|
||||
|
||||
/* did we read anything? */
|
||||
if (!spd.nr_pages) {
|
||||
long wait_index;
|
||||
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -8297,10 +8330,21 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
|
||||
if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
|
||||
goto out;
|
||||
|
||||
wait_index = READ_ONCE(iter->wait_index);
|
||||
|
||||
ret = wait_on_pipe(iter, iter->tr->buffer_percent);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* No need to wait after waking up when tracing is off */
|
||||
if (!tracer_tracing_is_on(iter->tr))
|
||||
goto out;
|
||||
|
||||
/* Make sure we see the new wait_index */
|
||||
smp_rmb();
|
||||
if (wait_index != iter->wait_index)
|
||||
goto out;
|
||||
|
||||
goto again;
|
||||
}
|
||||
|
||||
@ -8311,12 +8355,34 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
|
||||
static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct ftrace_buffer_info *info = file->private_data;
|
||||
struct trace_iterator *iter = &info->iter;
|
||||
|
||||
if (cmd)
|
||||
return -ENOIOCTLCMD;
|
||||
|
||||
mutex_lock(&trace_types_lock);
|
||||
|
||||
iter->wait_index++;
|
||||
/* Make sure the waiters see the new wait_index */
|
||||
smp_wmb();
|
||||
|
||||
ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
|
||||
|
||||
mutex_unlock(&trace_types_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations tracing_buffers_fops = {
|
||||
.open = tracing_buffers_open,
|
||||
.read = tracing_buffers_read,
|
||||
.poll = tracing_buffers_poll,
|
||||
.release = tracing_buffers_release,
|
||||
.splice_read = tracing_buffers_splice_read,
|
||||
.unlocked_ioctl = tracing_buffers_ioctl,
|
||||
.llseek = no_llseek,
|
||||
};
|
||||
|
||||
@ -9005,6 +9071,8 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
|
||||
tracer_tracing_off(tr);
|
||||
if (tr->current_trace->stop)
|
||||
tr->current_trace->stop(tr);
|
||||
/* Wake up any waiters */
|
||||
ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
|
||||
}
|
||||
mutex_unlock(&trace_types_lock);
|
||||
}
|
||||
@ -10091,7 +10159,7 @@ __init static int tracer_alloc_buffers(void)
|
||||
* buffer. The memory will be removed once the "instance" is removed.
|
||||
*/
|
||||
ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
|
||||
"trace/RB:preapre", trace_rb_cpu_prepare,
|
||||
"trace/RB:prepare", trace_rb_cpu_prepare,
|
||||
NULL);
|
||||
if (ret < 0)
|
||||
goto out_free_cpumask;
|
||||
|
@ -1435,8 +1435,6 @@ event_trigger_unlock_commit(struct trace_event_file *file,
|
||||
struct filter_pred;
|
||||
struct regex;
|
||||
|
||||
typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
|
||||
|
||||
typedef int (*regex_match_func)(char *str, struct regex *r, int len);
|
||||
|
||||
enum regex_type {
|
||||
@ -1455,17 +1453,6 @@ struct regex {
|
||||
regex_match_func match;
|
||||
};
|
||||
|
||||
struct filter_pred {
|
||||
filter_pred_fn_t fn;
|
||||
u64 val;
|
||||
struct regex regex;
|
||||
unsigned short *ops;
|
||||
struct ftrace_event_field *field;
|
||||
int offset;
|
||||
int not;
|
||||
int op;
|
||||
};
|
||||
|
||||
static inline bool is_string_field(struct ftrace_event_field *field)
|
||||
{
|
||||
return field->filter_type == FILTER_DYN_STRING ||
|
||||
|
@ -51,7 +51,7 @@ static void trace_do_benchmark(void)
|
||||
|
||||
local_irq_disable();
|
||||
start = trace_clock_local();
|
||||
trace_benchmark_event(bm_str);
|
||||
trace_benchmark_event(bm_str, bm_last);
|
||||
stop = trace_clock_local();
|
||||
local_irq_enable();
|
||||
|
||||
|
@ -14,19 +14,21 @@ extern void trace_benchmark_unreg(void);
|
||||
|
||||
TRACE_EVENT_FN(benchmark_event,
|
||||
|
||||
TP_PROTO(const char *str),
|
||||
TP_PROTO(const char *str, u64 delta),
|
||||
|
||||
TP_ARGS(str),
|
||||
TP_ARGS(str, delta),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array( char, str, BENCHMARK_EVENT_STRLEN )
|
||||
__field( u64, delta)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->str, str, BENCHMARK_EVENT_STRLEN);
|
||||
__entry->delta = delta;
|
||||
),
|
||||
|
||||
TP_printk("%s", __entry->str),
|
||||
TP_printk("%s delta=%llu", __entry->str, __entry->delta),
|
||||
|
||||
trace_benchmark_reg, trace_benchmark_unreg
|
||||
);
|
||||
|
@ -26,6 +26,9 @@ struct trace_eprobe {
|
||||
/* tracepoint event */
|
||||
const char *event_name;
|
||||
|
||||
/* filter string for the tracepoint */
|
||||
char *filter_str;
|
||||
|
||||
struct trace_event_call *event;
|
||||
|
||||
struct dyn_event devent;
|
||||
@ -664,14 +667,15 @@ static struct event_trigger_data *
|
||||
new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file)
|
||||
{
|
||||
struct event_trigger_data *trigger;
|
||||
struct event_filter *filter = NULL;
|
||||
struct eprobe_data *edata;
|
||||
int ret;
|
||||
|
||||
edata = kzalloc(sizeof(*edata), GFP_KERNEL);
|
||||
trigger = kzalloc(sizeof(*trigger), GFP_KERNEL);
|
||||
if (!trigger || !edata) {
|
||||
kfree(edata);
|
||||
kfree(trigger);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
ret = -ENOMEM;
|
||||
goto error;
|
||||
}
|
||||
|
||||
trigger->flags = EVENT_TRIGGER_FL_PROBE;
|
||||
@ -686,13 +690,25 @@ new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file)
|
||||
trigger->cmd_ops = &event_trigger_cmd;
|
||||
|
||||
INIT_LIST_HEAD(&trigger->list);
|
||||
RCU_INIT_POINTER(trigger->filter, NULL);
|
||||
|
||||
if (ep->filter_str) {
|
||||
ret = create_event_filter(file->tr, file->event_call,
|
||||
ep->filter_str, false, &filter);
|
||||
if (ret)
|
||||
goto error;
|
||||
}
|
||||
RCU_INIT_POINTER(trigger->filter, filter);
|
||||
|
||||
edata->file = file;
|
||||
edata->ep = ep;
|
||||
trigger->private_data = edata;
|
||||
|
||||
return trigger;
|
||||
error:
|
||||
free_event_filter(filter);
|
||||
kfree(edata);
|
||||
kfree(trigger);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static int enable_eprobe(struct trace_eprobe *ep,
|
||||
@ -726,6 +742,7 @@ static int disable_eprobe(struct trace_eprobe *ep,
|
||||
{
|
||||
struct event_trigger_data *trigger = NULL, *iter;
|
||||
struct trace_event_file *file;
|
||||
struct event_filter *filter;
|
||||
struct eprobe_data *edata;
|
||||
|
||||
file = find_event_file(tr, ep->event_system, ep->event_name);
|
||||
@ -752,6 +769,10 @@ static int disable_eprobe(struct trace_eprobe *ep,
|
||||
/* Make sure nothing is using the edata or trigger */
|
||||
tracepoint_synchronize_unregister();
|
||||
|
||||
filter = rcu_access_pointer(trigger->filter);
|
||||
|
||||
if (filter)
|
||||
free_event_filter(filter);
|
||||
kfree(edata);
|
||||
kfree(trigger);
|
||||
|
||||
@ -927,12 +948,62 @@ static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int trace_eprobe_parse_filter(struct trace_eprobe *ep, int argc, const char *argv[])
|
||||
{
|
||||
struct event_filter *dummy;
|
||||
int i, ret, len = 0;
|
||||
char *p;
|
||||
|
||||
if (argc == 0) {
|
||||
trace_probe_log_err(0, NO_EP_FILTER);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Recover the filter string */
|
||||
for (i = 0; i < argc; i++)
|
||||
len += strlen(argv[i]) + 1;
|
||||
|
||||
ep->filter_str = kzalloc(len, GFP_KERNEL);
|
||||
if (!ep->filter_str)
|
||||
return -ENOMEM;
|
||||
|
||||
p = ep->filter_str;
|
||||
for (i = 0; i < argc; i++) {
|
||||
ret = snprintf(p, len, "%s ", argv[i]);
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
if (ret > len) {
|
||||
ret = -E2BIG;
|
||||
goto error;
|
||||
}
|
||||
p += ret;
|
||||
len -= ret;
|
||||
}
|
||||
p[-1] = '\0';
|
||||
|
||||
/*
|
||||
* Ensure the filter string can be parsed correctly. Note, this
|
||||
* filter string is for the original event, not for the eprobe.
|
||||
*/
|
||||
ret = create_event_filter(top_trace_array(), ep->event, ep->filter_str,
|
||||
true, &dummy);
|
||||
free_event_filter(dummy);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
return 0;
|
||||
error:
|
||||
kfree(ep->filter_str);
|
||||
ep->filter_str = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __trace_eprobe_create(int argc, const char *argv[])
|
||||
{
|
||||
/*
|
||||
* Argument syntax:
|
||||
* e[:[GRP/][ENAME]] SYSTEM.EVENT [FETCHARGS]
|
||||
* Fetch args:
|
||||
* e[:[GRP/][ENAME]] SYSTEM.EVENT [FETCHARGS] [if FILTER]
|
||||
* Fetch args (no space):
|
||||
* <name>=$<field>[:TYPE]
|
||||
*/
|
||||
const char *event = NULL, *group = EPROBE_EVENT_SYSTEM;
|
||||
@ -942,8 +1013,8 @@ static int __trace_eprobe_create(int argc, const char *argv[])
|
||||
char buf1[MAX_EVENT_NAME_LEN];
|
||||
char buf2[MAX_EVENT_NAME_LEN];
|
||||
char gbuf[MAX_EVENT_NAME_LEN];
|
||||
int ret = 0;
|
||||
int i;
|
||||
int ret = 0, filter_idx = 0;
|
||||
int i, filter_cnt;
|
||||
|
||||
if (argc < 2 || argv[0][0] != 'e')
|
||||
return -ECANCELED;
|
||||
@ -968,11 +1039,19 @@ static int __trace_eprobe_create(int argc, const char *argv[])
|
||||
}
|
||||
|
||||
if (!event) {
|
||||
strscpy(buf1, argv[1], MAX_EVENT_NAME_LEN);
|
||||
sanitize_event_name(buf1);
|
||||
strscpy(buf1, sys_event, MAX_EVENT_NAME_LEN);
|
||||
event = buf1;
|
||||
}
|
||||
|
||||
for (i = 2; i < argc; i++) {
|
||||
if (!strcmp(argv[i], "if")) {
|
||||
filter_idx = i + 1;
|
||||
filter_cnt = argc - filter_idx;
|
||||
argc = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_lock(&event_mutex);
|
||||
event_call = find_and_get_event(sys_name, sys_event);
|
||||
ep = alloc_event_probe(group, event, event_call, argc - 2);
|
||||
@ -988,6 +1067,14 @@ static int __trace_eprobe_create(int argc, const char *argv[])
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (filter_idx) {
|
||||
trace_probe_log_set_index(filter_idx);
|
||||
ret = trace_eprobe_parse_filter(ep, filter_cnt, argv + filter_idx);
|
||||
if (ret)
|
||||
goto parse_error;
|
||||
} else
|
||||
ep->filter_str = NULL;
|
||||
|
||||
argc -= 2; argv += 2;
|
||||
/* parse arguments */
|
||||
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
|
||||
|
@ -43,6 +43,42 @@ enum filter_op_ids { OPS };
|
||||
|
||||
static const char * ops[] = { OPS };
|
||||
|
||||
enum filter_pred_fn {
|
||||
FILTER_PRED_FN_NOP,
|
||||
FILTER_PRED_FN_64,
|
||||
FILTER_PRED_FN_S64,
|
||||
FILTER_PRED_FN_U64,
|
||||
FILTER_PRED_FN_32,
|
||||
FILTER_PRED_FN_S32,
|
||||
FILTER_PRED_FN_U32,
|
||||
FILTER_PRED_FN_16,
|
||||
FILTER_PRED_FN_S16,
|
||||
FILTER_PRED_FN_U16,
|
||||
FILTER_PRED_FN_8,
|
||||
FILTER_PRED_FN_S8,
|
||||
FILTER_PRED_FN_U8,
|
||||
FILTER_PRED_FN_COMM,
|
||||
FILTER_PRED_FN_STRING,
|
||||
FILTER_PRED_FN_STRLOC,
|
||||
FILTER_PRED_FN_STRRELLOC,
|
||||
FILTER_PRED_FN_PCHAR_USER,
|
||||
FILTER_PRED_FN_PCHAR,
|
||||
FILTER_PRED_FN_CPU,
|
||||
FILTER_PRED_FN_,
|
||||
FILTER_PRED_TEST_VISITED,
|
||||
};
|
||||
|
||||
struct filter_pred {
|
||||
enum filter_pred_fn fn_num;
|
||||
u64 val;
|
||||
struct regex regex;
|
||||
unsigned short *ops;
|
||||
struct ftrace_event_field *field;
|
||||
int offset;
|
||||
int not;
|
||||
int op;
|
||||
};
|
||||
|
||||
/*
|
||||
* pred functions are OP_LE, OP_LT, OP_GE, OP_GT, and OP_BAND
|
||||
* pred_funcs_##type below must match the order of them above.
|
||||
@ -590,45 +626,49 @@ predicate_parse(const char *str, int nr_parens, int nr_preds,
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
#define DEFINE_COMPARISON_PRED(type) \
|
||||
static int filter_pred_LT_##type(struct filter_pred *pred, void *event) \
|
||||
{ \
|
||||
type *addr = (type *)(event + pred->offset); \
|
||||
type val = (type)pred->val; \
|
||||
return *addr < val; \
|
||||
} \
|
||||
static int filter_pred_LE_##type(struct filter_pred *pred, void *event) \
|
||||
{ \
|
||||
type *addr = (type *)(event + pred->offset); \
|
||||
type val = (type)pred->val; \
|
||||
return *addr <= val; \
|
||||
} \
|
||||
static int filter_pred_GT_##type(struct filter_pred *pred, void *event) \
|
||||
{ \
|
||||
type *addr = (type *)(event + pred->offset); \
|
||||
type val = (type)pred->val; \
|
||||
return *addr > val; \
|
||||
} \
|
||||
static int filter_pred_GE_##type(struct filter_pred *pred, void *event) \
|
||||
{ \
|
||||
type *addr = (type *)(event + pred->offset); \
|
||||
type val = (type)pred->val; \
|
||||
return *addr >= val; \
|
||||
} \
|
||||
static int filter_pred_BAND_##type(struct filter_pred *pred, void *event) \
|
||||
{ \
|
||||
type *addr = (type *)(event + pred->offset); \
|
||||
type val = (type)pred->val; \
|
||||
return !!(*addr & val); \
|
||||
} \
|
||||
static const filter_pred_fn_t pred_funcs_##type[] = { \
|
||||
filter_pred_LE_##type, \
|
||||
filter_pred_LT_##type, \
|
||||
filter_pred_GE_##type, \
|
||||
filter_pred_GT_##type, \
|
||||
filter_pred_BAND_##type, \
|
||||
enum pred_cmp_types {
|
||||
PRED_CMP_TYPE_NOP,
|
||||
PRED_CMP_TYPE_LT,
|
||||
PRED_CMP_TYPE_LE,
|
||||
PRED_CMP_TYPE_GT,
|
||||
PRED_CMP_TYPE_GE,
|
||||
PRED_CMP_TYPE_BAND,
|
||||
};
|
||||
|
||||
#define DEFINE_COMPARISON_PRED(type) \
|
||||
static int filter_pred_##type(struct filter_pred *pred, void *event) \
|
||||
{ \
|
||||
switch (pred->op) { \
|
||||
case OP_LT: { \
|
||||
type *addr = (type *)(event + pred->offset); \
|
||||
type val = (type)pred->val; \
|
||||
return *addr < val; \
|
||||
} \
|
||||
case OP_LE: { \
|
||||
type *addr = (type *)(event + pred->offset); \
|
||||
type val = (type)pred->val; \
|
||||
return *addr <= val; \
|
||||
} \
|
||||
case OP_GT: { \
|
||||
type *addr = (type *)(event + pred->offset); \
|
||||
type val = (type)pred->val; \
|
||||
return *addr > val; \
|
||||
} \
|
||||
case OP_GE: { \
|
||||
type *addr = (type *)(event + pred->offset); \
|
||||
type val = (type)pred->val; \
|
||||
return *addr >= val; \
|
||||
} \
|
||||
case OP_BAND: { \
|
||||
type *addr = (type *)(event + pred->offset); \
|
||||
type val = (type)pred->val; \
|
||||
return !!(*addr & val); \
|
||||
} \
|
||||
default: \
|
||||
return 0; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define DEFINE_EQUALITY_PRED(size) \
|
||||
static int filter_pred_##size(struct filter_pred *pred, void *event) \
|
||||
{ \
|
||||
@ -836,11 +876,6 @@ static int filter_pred_comm(struct filter_pred *pred, void *event)
|
||||
return cmp ^ pred->not;
|
||||
}
|
||||
|
||||
static int filter_pred_none(struct filter_pred *pred, void *event)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* regex_match_foo - Basic regex callbacks
|
||||
*
|
||||
@ -986,6 +1021,19 @@ static void filter_build_regex(struct filter_pred *pred)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_FTRACE_STARTUP_TEST
|
||||
static int test_pred_visited_fn(struct filter_pred *pred, void *event);
|
||||
#else
|
||||
static int test_pred_visited_fn(struct filter_pred *pred, void *event)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static int filter_pred_fn_call(struct filter_pred *pred, void *event);
|
||||
|
||||
/* return 1 if event matches, 0 otherwise (discard) */
|
||||
int filter_match_preds(struct event_filter *filter, void *rec)
|
||||
{
|
||||
@ -1003,7 +1051,7 @@ int filter_match_preds(struct event_filter *filter, void *rec)
|
||||
|
||||
for (i = 0; prog[i].pred; i++) {
|
||||
struct filter_pred *pred = prog[i].pred;
|
||||
int match = pred->fn(pred, rec);
|
||||
int match = filter_pred_fn_call(pred, rec);
|
||||
if (match == prog[i].when_to_branch)
|
||||
i = prog[i].target;
|
||||
}
|
||||
@ -1189,10 +1237,10 @@ int filter_assign_type(const char *type)
|
||||
return FILTER_OTHER;
|
||||
}
|
||||
|
||||
static filter_pred_fn_t select_comparison_fn(enum filter_op_ids op,
|
||||
int field_size, int field_is_signed)
|
||||
static enum filter_pred_fn select_comparison_fn(enum filter_op_ids op,
|
||||
int field_size, int field_is_signed)
|
||||
{
|
||||
filter_pred_fn_t fn = NULL;
|
||||
enum filter_pred_fn fn = FILTER_PRED_FN_NOP;
|
||||
int pred_func_index = -1;
|
||||
|
||||
switch (op) {
|
||||
@ -1201,50 +1249,99 @@ static filter_pred_fn_t select_comparison_fn(enum filter_op_ids op,
|
||||
break;
|
||||
default:
|
||||
if (WARN_ON_ONCE(op < PRED_FUNC_START))
|
||||
return NULL;
|
||||
return fn;
|
||||
pred_func_index = op - PRED_FUNC_START;
|
||||
if (WARN_ON_ONCE(pred_func_index > PRED_FUNC_MAX))
|
||||
return NULL;
|
||||
return fn;
|
||||
}
|
||||
|
||||
switch (field_size) {
|
||||
case 8:
|
||||
if (pred_func_index < 0)
|
||||
fn = filter_pred_64;
|
||||
fn = FILTER_PRED_FN_64;
|
||||
else if (field_is_signed)
|
||||
fn = pred_funcs_s64[pred_func_index];
|
||||
fn = FILTER_PRED_FN_S64;
|
||||
else
|
||||
fn = pred_funcs_u64[pred_func_index];
|
||||
fn = FILTER_PRED_FN_U64;
|
||||
break;
|
||||
case 4:
|
||||
if (pred_func_index < 0)
|
||||
fn = filter_pred_32;
|
||||
fn = FILTER_PRED_FN_32;
|
||||
else if (field_is_signed)
|
||||
fn = pred_funcs_s32[pred_func_index];
|
||||
fn = FILTER_PRED_FN_S32;
|
||||
else
|
||||
fn = pred_funcs_u32[pred_func_index];
|
||||
fn = FILTER_PRED_FN_U32;
|
||||
break;
|
||||
case 2:
|
||||
if (pred_func_index < 0)
|
||||
fn = filter_pred_16;
|
||||
fn = FILTER_PRED_FN_16;
|
||||
else if (field_is_signed)
|
||||
fn = pred_funcs_s16[pred_func_index];
|
||||
fn = FILTER_PRED_FN_S16;
|
||||
else
|
||||
fn = pred_funcs_u16[pred_func_index];
|
||||
fn = FILTER_PRED_FN_U16;
|
||||
break;
|
||||
case 1:
|
||||
if (pred_func_index < 0)
|
||||
fn = filter_pred_8;
|
||||
fn = FILTER_PRED_FN_8;
|
||||
else if (field_is_signed)
|
||||
fn = pred_funcs_s8[pred_func_index];
|
||||
fn = FILTER_PRED_FN_S8;
|
||||
else
|
||||
fn = pred_funcs_u8[pred_func_index];
|
||||
fn = FILTER_PRED_FN_U8;
|
||||
break;
|
||||
}
|
||||
|
||||
return fn;
|
||||
}
|
||||
|
||||
|
||||
static int filter_pred_fn_call(struct filter_pred *pred, void *event)
|
||||
{
|
||||
switch (pred->fn_num) {
|
||||
case FILTER_PRED_FN_64:
|
||||
return filter_pred_64(pred, event);
|
||||
case FILTER_PRED_FN_S64:
|
||||
return filter_pred_s64(pred, event);
|
||||
case FILTER_PRED_FN_U64:
|
||||
return filter_pred_u64(pred, event);
|
||||
case FILTER_PRED_FN_32:
|
||||
return filter_pred_32(pred, event);
|
||||
case FILTER_PRED_FN_S32:
|
||||
return filter_pred_s32(pred, event);
|
||||
case FILTER_PRED_FN_U32:
|
||||
return filter_pred_u32(pred, event);
|
||||
case FILTER_PRED_FN_16:
|
||||
return filter_pred_16(pred, event);
|
||||
case FILTER_PRED_FN_S16:
|
||||
return filter_pred_s16(pred, event);
|
||||
case FILTER_PRED_FN_U16:
|
||||
return filter_pred_u16(pred, event);
|
||||
case FILTER_PRED_FN_8:
|
||||
return filter_pred_8(pred, event);
|
||||
case FILTER_PRED_FN_S8:
|
||||
return filter_pred_s8(pred, event);
|
||||
case FILTER_PRED_FN_U8:
|
||||
return filter_pred_u8(pred, event);
|
||||
case FILTER_PRED_FN_COMM:
|
||||
return filter_pred_comm(pred, event);
|
||||
case FILTER_PRED_FN_STRING:
|
||||
return filter_pred_string(pred, event);
|
||||
case FILTER_PRED_FN_STRLOC:
|
||||
return filter_pred_strloc(pred, event);
|
||||
case FILTER_PRED_FN_STRRELLOC:
|
||||
return filter_pred_strrelloc(pred, event);
|
||||
case FILTER_PRED_FN_PCHAR_USER:
|
||||
return filter_pred_pchar_user(pred, event);
|
||||
case FILTER_PRED_FN_PCHAR:
|
||||
return filter_pred_pchar(pred, event);
|
||||
case FILTER_PRED_FN_CPU:
|
||||
return filter_pred_cpu(pred, event);
|
||||
case FILTER_PRED_TEST_VISITED:
|
||||
return test_pred_visited_fn(pred, event);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Called when a predicate is encountered by predicate_parse() */
|
||||
static int parse_pred(const char *str, void *data,
|
||||
int pos, struct filter_parse_error *pe,
|
||||
@ -1338,7 +1435,7 @@ static int parse_pred(const char *str, void *data,
|
||||
parse_error(pe, FILT_ERR_IP_FIELD_ONLY, pos + i);
|
||||
goto err_free;
|
||||
}
|
||||
pred->fn = filter_pred_none;
|
||||
pred->fn_num = FILTER_PRED_FN_NOP;
|
||||
|
||||
/*
|
||||
* Quotes are not required, but if they exist then we need
|
||||
@ -1416,16 +1513,16 @@ static int parse_pred(const char *str, void *data,
|
||||
filter_build_regex(pred);
|
||||
|
||||
if (field->filter_type == FILTER_COMM) {
|
||||
pred->fn = filter_pred_comm;
|
||||
pred->fn_num = FILTER_PRED_FN_COMM;
|
||||
|
||||
} else if (field->filter_type == FILTER_STATIC_STRING) {
|
||||
pred->fn = filter_pred_string;
|
||||
pred->fn_num = FILTER_PRED_FN_STRING;
|
||||
pred->regex.field_len = field->size;
|
||||
|
||||
} else if (field->filter_type == FILTER_DYN_STRING) {
|
||||
pred->fn = filter_pred_strloc;
|
||||
pred->fn_num = FILTER_PRED_FN_STRLOC;
|
||||
} else if (field->filter_type == FILTER_RDYN_STRING)
|
||||
pred->fn = filter_pred_strrelloc;
|
||||
pred->fn_num = FILTER_PRED_FN_STRRELLOC;
|
||||
else {
|
||||
|
||||
if (!ustring_per_cpu) {
|
||||
@ -1436,9 +1533,9 @@ static int parse_pred(const char *str, void *data,
|
||||
}
|
||||
|
||||
if (ustring)
|
||||
pred->fn = filter_pred_pchar_user;
|
||||
pred->fn_num = FILTER_PRED_FN_PCHAR_USER;
|
||||
else
|
||||
pred->fn = filter_pred_pchar;
|
||||
pred->fn_num = FILTER_PRED_FN_PCHAR;
|
||||
}
|
||||
/* go past the last quote */
|
||||
i++;
|
||||
@ -1486,10 +1583,10 @@ static int parse_pred(const char *str, void *data,
|
||||
pred->val = val;
|
||||
|
||||
if (field->filter_type == FILTER_CPU)
|
||||
pred->fn = filter_pred_cpu;
|
||||
pred->fn_num = FILTER_PRED_FN_CPU;
|
||||
else {
|
||||
pred->fn = select_comparison_fn(pred->op, field->size,
|
||||
field->is_signed);
|
||||
pred->fn_num = select_comparison_fn(pred->op, field->size,
|
||||
field->is_signed);
|
||||
if (pred->op == OP_NE)
|
||||
pred->not = 1;
|
||||
}
|
||||
@ -2296,7 +2393,7 @@ static void update_pred_fn(struct event_filter *filter, char *fields)
|
||||
struct filter_pred *pred = prog[i].pred;
|
||||
struct ftrace_event_field *field = pred->field;
|
||||
|
||||
WARN_ON_ONCE(!pred->fn);
|
||||
WARN_ON_ONCE(pred->fn_num == FILTER_PRED_FN_NOP);
|
||||
|
||||
if (!field) {
|
||||
WARN_ONCE(1, "all leafs should have field defined %d", i);
|
||||
@ -2306,7 +2403,7 @@ static void update_pred_fn(struct event_filter *filter, char *fields)
|
||||
if (!strchr(fields, *field->name))
|
||||
continue;
|
||||
|
||||
pred->fn = test_pred_visited_fn;
|
||||
pred->fn_num = FILTER_PRED_TEST_VISITED;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -104,6 +104,38 @@ enum field_op_id {
|
||||
FIELD_OP_MULT,
|
||||
};
|
||||
|
||||
enum hist_field_fn {
|
||||
HIST_FIELD_FN_NOP,
|
||||
HIST_FIELD_FN_VAR_REF,
|
||||
HIST_FIELD_FN_COUNTER,
|
||||
HIST_FIELD_FN_CONST,
|
||||
HIST_FIELD_FN_LOG2,
|
||||
HIST_FIELD_FN_BUCKET,
|
||||
HIST_FIELD_FN_TIMESTAMP,
|
||||
HIST_FIELD_FN_CPU,
|
||||
HIST_FIELD_FN_STRING,
|
||||
HIST_FIELD_FN_DYNSTRING,
|
||||
HIST_FIELD_FN_RELDYNSTRING,
|
||||
HIST_FIELD_FN_PSTRING,
|
||||
HIST_FIELD_FN_S64,
|
||||
HIST_FIELD_FN_U64,
|
||||
HIST_FIELD_FN_S32,
|
||||
HIST_FIELD_FN_U32,
|
||||
HIST_FIELD_FN_S16,
|
||||
HIST_FIELD_FN_U16,
|
||||
HIST_FIELD_FN_S8,
|
||||
HIST_FIELD_FN_U8,
|
||||
HIST_FIELD_FN_UMINUS,
|
||||
HIST_FIELD_FN_MINUS,
|
||||
HIST_FIELD_FN_PLUS,
|
||||
HIST_FIELD_FN_DIV,
|
||||
HIST_FIELD_FN_MULT,
|
||||
HIST_FIELD_FN_DIV_POWER2,
|
||||
HIST_FIELD_FN_DIV_NOT_POWER2,
|
||||
HIST_FIELD_FN_DIV_MULT_SHIFT,
|
||||
HIST_FIELD_FN_EXECNAME,
|
||||
};
|
||||
|
||||
/*
|
||||
* A hist_var (histogram variable) contains variable information for
|
||||
* hist_fields having the HIST_FIELD_FL_VAR or HIST_FIELD_FL_VAR_REF
|
||||
@ -123,15 +155,15 @@ struct hist_var {
|
||||
struct hist_field {
|
||||
struct ftrace_event_field *field;
|
||||
unsigned long flags;
|
||||
hist_field_fn_t fn;
|
||||
unsigned int ref;
|
||||
unsigned int size;
|
||||
unsigned int offset;
|
||||
unsigned int is_signed;
|
||||
unsigned long buckets;
|
||||
const char *type;
|
||||
struct hist_field *operands[HIST_FIELD_OPERANDS_MAX];
|
||||
struct hist_trigger_data *hist_data;
|
||||
enum hist_field_fn fn_num;
|
||||
unsigned int ref;
|
||||
unsigned int size;
|
||||
unsigned int offset;
|
||||
unsigned int is_signed;
|
||||
|
||||
/*
|
||||
* Variable fields contain variable-specific info in var.
|
||||
@ -166,14 +198,11 @@ struct hist_field {
|
||||
u64 div_multiplier;
|
||||
};
|
||||
|
||||
static u64 hist_field_none(struct hist_field *field,
|
||||
struct tracing_map_elt *elt,
|
||||
struct trace_buffer *buffer,
|
||||
struct ring_buffer_event *rbe,
|
||||
void *event)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static u64 hist_fn_call(struct hist_field *hist_field,
|
||||
struct tracing_map_elt *elt,
|
||||
struct trace_buffer *buffer,
|
||||
struct ring_buffer_event *rbe,
|
||||
void *event);
|
||||
|
||||
static u64 hist_field_const(struct hist_field *field,
|
||||
struct tracing_map_elt *elt,
|
||||
@ -250,7 +279,7 @@ static u64 hist_field_log2(struct hist_field *hist_field,
|
||||
{
|
||||
struct hist_field *operand = hist_field->operands[0];
|
||||
|
||||
u64 val = operand->fn(operand, elt, buffer, rbe, event);
|
||||
u64 val = hist_fn_call(operand, elt, buffer, rbe, event);
|
||||
|
||||
return (u64) ilog2(roundup_pow_of_two(val));
|
||||
}
|
||||
@ -264,7 +293,7 @@ static u64 hist_field_bucket(struct hist_field *hist_field,
|
||||
struct hist_field *operand = hist_field->operands[0];
|
||||
unsigned long buckets = hist_field->buckets;
|
||||
|
||||
u64 val = operand->fn(operand, elt, buffer, rbe, event);
|
||||
u64 val = hist_fn_call(operand, elt, buffer, rbe, event);
|
||||
|
||||
if (WARN_ON_ONCE(!buckets))
|
||||
return val;
|
||||
@ -285,8 +314,8 @@ static u64 hist_field_plus(struct hist_field *hist_field,
|
||||
struct hist_field *operand1 = hist_field->operands[0];
|
||||
struct hist_field *operand2 = hist_field->operands[1];
|
||||
|
||||
u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
|
||||
u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
|
||||
u64 val1 = hist_fn_call(operand1, elt, buffer, rbe, event);
|
||||
u64 val2 = hist_fn_call(operand2, elt, buffer, rbe, event);
|
||||
|
||||
return val1 + val2;
|
||||
}
|
||||
@ -300,8 +329,8 @@ static u64 hist_field_minus(struct hist_field *hist_field,
|
||||
struct hist_field *operand1 = hist_field->operands[0];
|
||||
struct hist_field *operand2 = hist_field->operands[1];
|
||||
|
||||
u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
|
||||
u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
|
||||
u64 val1 = hist_fn_call(operand1, elt, buffer, rbe, event);
|
||||
u64 val2 = hist_fn_call(operand2, elt, buffer, rbe, event);
|
||||
|
||||
return val1 - val2;
|
||||
}
|
||||
@ -315,8 +344,8 @@ static u64 hist_field_div(struct hist_field *hist_field,
|
||||
struct hist_field *operand1 = hist_field->operands[0];
|
||||
struct hist_field *operand2 = hist_field->operands[1];
|
||||
|
||||
u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
|
||||
u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
|
||||
u64 val1 = hist_fn_call(operand1, elt, buffer, rbe, event);
|
||||
u64 val2 = hist_fn_call(operand2, elt, buffer, rbe, event);
|
||||
|
||||
/* Return -1 for the undefined case */
|
||||
if (!val2)
|
||||
@ -338,7 +367,7 @@ static u64 div_by_power_of_two(struct hist_field *hist_field,
|
||||
struct hist_field *operand1 = hist_field->operands[0];
|
||||
struct hist_field *operand2 = hist_field->operands[1];
|
||||
|
||||
u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
|
||||
u64 val1 = hist_fn_call(operand1, elt, buffer, rbe, event);
|
||||
|
||||
return val1 >> __ffs64(operand2->constant);
|
||||
}
|
||||
@ -352,7 +381,7 @@ static u64 div_by_not_power_of_two(struct hist_field *hist_field,
|
||||
struct hist_field *operand1 = hist_field->operands[0];
|
||||
struct hist_field *operand2 = hist_field->operands[1];
|
||||
|
||||
u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
|
||||
u64 val1 = hist_fn_call(operand1, elt, buffer, rbe, event);
|
||||
|
||||
return div64_u64(val1, operand2->constant);
|
||||
}
|
||||
@ -366,7 +395,7 @@ static u64 div_by_mult_and_shift(struct hist_field *hist_field,
|
||||
struct hist_field *operand1 = hist_field->operands[0];
|
||||
struct hist_field *operand2 = hist_field->operands[1];
|
||||
|
||||
u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
|
||||
u64 val1 = hist_fn_call(operand1, elt, buffer, rbe, event);
|
||||
|
||||
/*
|
||||
* If the divisor is a constant, do a multiplication and shift instead.
|
||||
@ -400,8 +429,8 @@ static u64 hist_field_mult(struct hist_field *hist_field,
|
||||
struct hist_field *operand1 = hist_field->operands[0];
|
||||
struct hist_field *operand2 = hist_field->operands[1];
|
||||
|
||||
u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
|
||||
u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
|
||||
u64 val1 = hist_fn_call(operand1, elt, buffer, rbe, event);
|
||||
u64 val2 = hist_fn_call(operand2, elt, buffer, rbe, event);
|
||||
|
||||
return val1 * val2;
|
||||
}
|
||||
@ -414,7 +443,7 @@ static u64 hist_field_unary_minus(struct hist_field *hist_field,
|
||||
{
|
||||
struct hist_field *operand = hist_field->operands[0];
|
||||
|
||||
s64 sval = (s64)operand->fn(operand, elt, buffer, rbe, event);
|
||||
s64 sval = (s64)hist_fn_call(operand, elt, buffer, rbe, event);
|
||||
u64 val = (u64)-sval;
|
||||
|
||||
return val;
|
||||
@ -657,19 +686,19 @@ struct snapshot_context {
|
||||
* Returns the specific division function to use if the divisor
|
||||
* is constant. This avoids extra branches when the trigger is hit.
|
||||
*/
|
||||
static hist_field_fn_t hist_field_get_div_fn(struct hist_field *divisor)
|
||||
static enum hist_field_fn hist_field_get_div_fn(struct hist_field *divisor)
|
||||
{
|
||||
u64 div = divisor->constant;
|
||||
|
||||
if (!(div & (div - 1)))
|
||||
return div_by_power_of_two;
|
||||
return HIST_FIELD_FN_DIV_POWER2;
|
||||
|
||||
/* If the divisor is too large, do a regular division */
|
||||
if (div > (1 << HIST_DIV_SHIFT))
|
||||
return div_by_not_power_of_two;
|
||||
return HIST_FIELD_FN_DIV_NOT_POWER2;
|
||||
|
||||
divisor->div_multiplier = div64_u64((u64)(1 << HIST_DIV_SHIFT), div);
|
||||
return div_by_mult_and_shift;
|
||||
return HIST_FIELD_FN_DIV_MULT_SHIFT;
|
||||
}
|
||||
|
||||
static void track_data_free(struct track_data *track_data)
|
||||
@ -1334,38 +1363,32 @@ static const char *hist_field_name(struct hist_field *field,
|
||||
return field_name;
|
||||
}
|
||||
|
||||
static hist_field_fn_t select_value_fn(int field_size, int field_is_signed)
|
||||
static enum hist_field_fn select_value_fn(int field_size, int field_is_signed)
|
||||
{
|
||||
hist_field_fn_t fn = NULL;
|
||||
|
||||
switch (field_size) {
|
||||
case 8:
|
||||
if (field_is_signed)
|
||||
fn = hist_field_s64;
|
||||
return HIST_FIELD_FN_S64;
|
||||
else
|
||||
fn = hist_field_u64;
|
||||
break;
|
||||
return HIST_FIELD_FN_U64;
|
||||
case 4:
|
||||
if (field_is_signed)
|
||||
fn = hist_field_s32;
|
||||
return HIST_FIELD_FN_S32;
|
||||
else
|
||||
fn = hist_field_u32;
|
||||
break;
|
||||
return HIST_FIELD_FN_U32;
|
||||
case 2:
|
||||
if (field_is_signed)
|
||||
fn = hist_field_s16;
|
||||
return HIST_FIELD_FN_S16;
|
||||
else
|
||||
fn = hist_field_u16;
|
||||
break;
|
||||
return HIST_FIELD_FN_U16;
|
||||
case 1:
|
||||
if (field_is_signed)
|
||||
fn = hist_field_s8;
|
||||
return HIST_FIELD_FN_S8;
|
||||
else
|
||||
fn = hist_field_u8;
|
||||
break;
|
||||
return HIST_FIELD_FN_U8;
|
||||
}
|
||||
|
||||
return fn;
|
||||
return HIST_FIELD_FN_NOP;
|
||||
}
|
||||
|
||||
static int parse_map_size(char *str)
|
||||
@ -1922,19 +1945,19 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
|
||||
goto out; /* caller will populate */
|
||||
|
||||
if (flags & HIST_FIELD_FL_VAR_REF) {
|
||||
hist_field->fn = hist_field_var_ref;
|
||||
hist_field->fn_num = HIST_FIELD_FN_VAR_REF;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (flags & HIST_FIELD_FL_HITCOUNT) {
|
||||
hist_field->fn = hist_field_counter;
|
||||
hist_field->fn_num = HIST_FIELD_FN_COUNTER;
|
||||
hist_field->size = sizeof(u64);
|
||||
hist_field->type = "u64";
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (flags & HIST_FIELD_FL_CONST) {
|
||||
hist_field->fn = hist_field_const;
|
||||
hist_field->fn_num = HIST_FIELD_FN_CONST;
|
||||
hist_field->size = sizeof(u64);
|
||||
hist_field->type = kstrdup("u64", GFP_KERNEL);
|
||||
if (!hist_field->type)
|
||||
@ -1943,14 +1966,14 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
|
||||
}
|
||||
|
||||
if (flags & HIST_FIELD_FL_STACKTRACE) {
|
||||
hist_field->fn = hist_field_none;
|
||||
hist_field->fn_num = HIST_FIELD_FN_NOP;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (flags & (HIST_FIELD_FL_LOG2 | HIST_FIELD_FL_BUCKET)) {
|
||||
unsigned long fl = flags & ~(HIST_FIELD_FL_LOG2 | HIST_FIELD_FL_BUCKET);
|
||||
hist_field->fn = flags & HIST_FIELD_FL_LOG2 ? hist_field_log2 :
|
||||
hist_field_bucket;
|
||||
hist_field->fn_num = flags & HIST_FIELD_FL_LOG2 ? HIST_FIELD_FN_LOG2 :
|
||||
HIST_FIELD_FN_BUCKET;
|
||||
hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL);
|
||||
hist_field->size = hist_field->operands[0]->size;
|
||||
hist_field->type = kstrdup_const(hist_field->operands[0]->type, GFP_KERNEL);
|
||||
@ -1960,14 +1983,14 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
|
||||
}
|
||||
|
||||
if (flags & HIST_FIELD_FL_TIMESTAMP) {
|
||||
hist_field->fn = hist_field_timestamp;
|
||||
hist_field->fn_num = HIST_FIELD_FN_TIMESTAMP;
|
||||
hist_field->size = sizeof(u64);
|
||||
hist_field->type = "u64";
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (flags & HIST_FIELD_FL_CPU) {
|
||||
hist_field->fn = hist_field_cpu;
|
||||
hist_field->fn_num = HIST_FIELD_FN_CPU;
|
||||
hist_field->size = sizeof(int);
|
||||
hist_field->type = "unsigned int";
|
||||
goto out;
|
||||
@ -1987,14 +2010,14 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
|
||||
goto free;
|
||||
|
||||
if (field->filter_type == FILTER_STATIC_STRING) {
|
||||
hist_field->fn = hist_field_string;
|
||||
hist_field->fn_num = HIST_FIELD_FN_STRING;
|
||||
hist_field->size = field->size;
|
||||
} else if (field->filter_type == FILTER_DYN_STRING) {
|
||||
hist_field->fn = hist_field_dynstring;
|
||||
hist_field->fn_num = HIST_FIELD_FN_DYNSTRING;
|
||||
} else if (field->filter_type == FILTER_RDYN_STRING)
|
||||
hist_field->fn = hist_field_reldynstring;
|
||||
hist_field->fn_num = HIST_FIELD_FN_RELDYNSTRING;
|
||||
else
|
||||
hist_field->fn = hist_field_pstring;
|
||||
hist_field->fn_num = HIST_FIELD_FN_PSTRING;
|
||||
} else {
|
||||
hist_field->size = field->size;
|
||||
hist_field->is_signed = field->is_signed;
|
||||
@ -2002,9 +2025,9 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
|
||||
if (!hist_field->type)
|
||||
goto free;
|
||||
|
||||
hist_field->fn = select_value_fn(field->size,
|
||||
field->is_signed);
|
||||
if (!hist_field->fn) {
|
||||
hist_field->fn_num = select_value_fn(field->size,
|
||||
field->is_signed);
|
||||
if (hist_field->fn_num == HIST_FIELD_FN_NOP) {
|
||||
destroy_hist_field(hist_field, 0);
|
||||
return NULL;
|
||||
}
|
||||
@ -2340,7 +2363,7 @@ static struct hist_field *create_alias(struct hist_trigger_data *hist_data,
|
||||
if (!alias)
|
||||
return NULL;
|
||||
|
||||
alias->fn = var_ref->fn;
|
||||
alias->fn_num = var_ref->fn_num;
|
||||
alias->operands[0] = var_ref;
|
||||
|
||||
if (init_var_ref(alias, var_ref, var_ref->system, var_ref->event_name)) {
|
||||
@ -2523,7 +2546,7 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data,
|
||||
|
||||
expr->flags |= operand1->flags &
|
||||
(HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
|
||||
expr->fn = hist_field_unary_minus;
|
||||
expr->fn_num = HIST_FIELD_FN_UMINUS;
|
||||
expr->operands[0] = operand1;
|
||||
expr->size = operand1->size;
|
||||
expr->is_signed = operand1->is_signed;
|
||||
@ -2595,7 +2618,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
|
||||
unsigned long operand_flags, operand2_flags;
|
||||
int field_op, ret = -EINVAL;
|
||||
char *sep, *operand1_str;
|
||||
hist_field_fn_t op_fn;
|
||||
enum hist_field_fn op_fn;
|
||||
bool combine_consts;
|
||||
|
||||
if (*n_subexprs > 3) {
|
||||
@ -2654,16 +2677,16 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
|
||||
|
||||
switch (field_op) {
|
||||
case FIELD_OP_MINUS:
|
||||
op_fn = hist_field_minus;
|
||||
op_fn = HIST_FIELD_FN_MINUS;
|
||||
break;
|
||||
case FIELD_OP_PLUS:
|
||||
op_fn = hist_field_plus;
|
||||
op_fn = HIST_FIELD_FN_PLUS;
|
||||
break;
|
||||
case FIELD_OP_DIV:
|
||||
op_fn = hist_field_div;
|
||||
op_fn = HIST_FIELD_FN_DIV;
|
||||
break;
|
||||
case FIELD_OP_MULT:
|
||||
op_fn = hist_field_mult;
|
||||
op_fn = HIST_FIELD_FN_MULT;
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
@ -2719,13 +2742,16 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
|
||||
op_fn = hist_field_get_div_fn(operand2);
|
||||
}
|
||||
|
||||
expr->fn_num = op_fn;
|
||||
|
||||
if (combine_consts) {
|
||||
if (var1)
|
||||
expr->operands[0] = var1;
|
||||
if (var2)
|
||||
expr->operands[1] = var2;
|
||||
|
||||
expr->constant = op_fn(expr, NULL, NULL, NULL, NULL);
|
||||
expr->constant = hist_fn_call(expr, NULL, NULL, NULL, NULL);
|
||||
expr->fn_num = HIST_FIELD_FN_CONST;
|
||||
|
||||
expr->operands[0] = NULL;
|
||||
expr->operands[1] = NULL;
|
||||
@ -2739,8 +2765,6 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
|
||||
|
||||
expr->name = expr_str(expr, 0);
|
||||
} else {
|
||||
expr->fn = op_fn;
|
||||
|
||||
/* The operand sizes should be the same, so just pick one */
|
||||
expr->size = operand1->size;
|
||||
expr->is_signed = operand1->is_signed;
|
||||
@ -3065,7 +3089,7 @@ static inline void __update_field_vars(struct tracing_map_elt *elt,
|
||||
struct hist_field *var = field_var->var;
|
||||
struct hist_field *val = field_var->val;
|
||||
|
||||
var_val = val->fn(val, elt, buffer, rbe, rec);
|
||||
var_val = hist_fn_call(val, elt, buffer, rbe, rec);
|
||||
var_idx = var->var.idx;
|
||||
|
||||
if (val->flags & HIST_FIELD_FL_STRING) {
|
||||
@ -4186,6 +4210,74 @@ static u64 hist_field_execname(struct hist_field *hist_field,
|
||||
return (u64)(unsigned long)(elt_data->comm);
|
||||
}
|
||||
|
||||
static u64 hist_fn_call(struct hist_field *hist_field,
|
||||
struct tracing_map_elt *elt,
|
||||
struct trace_buffer *buffer,
|
||||
struct ring_buffer_event *rbe,
|
||||
void *event)
|
||||
{
|
||||
switch (hist_field->fn_num) {
|
||||
case HIST_FIELD_FN_VAR_REF:
|
||||
return hist_field_var_ref(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_COUNTER:
|
||||
return hist_field_counter(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_CONST:
|
||||
return hist_field_const(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_LOG2:
|
||||
return hist_field_log2(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_BUCKET:
|
||||
return hist_field_bucket(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_TIMESTAMP:
|
||||
return hist_field_timestamp(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_CPU:
|
||||
return hist_field_cpu(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_STRING:
|
||||
return hist_field_string(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_DYNSTRING:
|
||||
return hist_field_dynstring(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_RELDYNSTRING:
|
||||
return hist_field_reldynstring(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_PSTRING:
|
||||
return hist_field_pstring(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_S64:
|
||||
return hist_field_s64(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_U64:
|
||||
return hist_field_u64(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_S32:
|
||||
return hist_field_s32(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_U32:
|
||||
return hist_field_u32(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_S16:
|
||||
return hist_field_s16(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_U16:
|
||||
return hist_field_u16(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_S8:
|
||||
return hist_field_s8(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_U8:
|
||||
return hist_field_u8(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_UMINUS:
|
||||
return hist_field_unary_minus(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_MINUS:
|
||||
return hist_field_minus(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_PLUS:
|
||||
return hist_field_plus(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_DIV:
|
||||
return hist_field_div(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_MULT:
|
||||
return hist_field_mult(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_DIV_POWER2:
|
||||
return div_by_power_of_two(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_DIV_NOT_POWER2:
|
||||
return div_by_not_power_of_two(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_DIV_MULT_SHIFT:
|
||||
return div_by_mult_and_shift(hist_field, elt, buffer, rbe, event);
|
||||
case HIST_FIELD_FN_EXECNAME:
|
||||
return hist_field_execname(hist_field, elt, buffer, rbe, event);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Convert a var that points to common_pid.execname to a string */
|
||||
static void update_var_execname(struct hist_field *hist_field)
|
||||
{
|
||||
@ -4197,7 +4289,7 @@ static void update_var_execname(struct hist_field *hist_field)
|
||||
kfree_const(hist_field->type);
|
||||
hist_field->type = "char[]";
|
||||
|
||||
hist_field->fn = hist_field_execname;
|
||||
hist_field->fn_num = HIST_FIELD_FN_EXECNAME;
|
||||
}
|
||||
|
||||
static int create_var_field(struct hist_trigger_data *hist_data,
|
||||
@ -4956,7 +5048,7 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
|
||||
|
||||
for_each_hist_val_field(i, hist_data) {
|
||||
hist_field = hist_data->fields[i];
|
||||
hist_val = hist_field->fn(hist_field, elt, buffer, rbe, rec);
|
||||
hist_val = hist_fn_call(hist_field, elt, buffer, rbe, rec);
|
||||
if (hist_field->flags & HIST_FIELD_FL_VAR) {
|
||||
var_idx = hist_field->var.idx;
|
||||
|
||||
@ -4987,7 +5079,7 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
|
||||
for_each_hist_key_field(i, hist_data) {
|
||||
hist_field = hist_data->fields[i];
|
||||
if (hist_field->flags & HIST_FIELD_FL_VAR) {
|
||||
hist_val = hist_field->fn(hist_field, elt, buffer, rbe, rec);
|
||||
hist_val = hist_fn_call(hist_field, elt, buffer, rbe, rec);
|
||||
var_idx = hist_field->var.idx;
|
||||
tracing_map_set_var(elt, var_idx, hist_val);
|
||||
}
|
||||
@ -5062,7 +5154,7 @@ static void event_hist_trigger(struct event_trigger_data *data,
|
||||
HIST_STACKTRACE_SKIP);
|
||||
key = entries;
|
||||
} else {
|
||||
field_contents = key_field->fn(key_field, elt, buffer, rbe, rec);
|
||||
field_contents = hist_fn_call(key_field, elt, buffer, rbe, rec);
|
||||
if (key_field->flags & HIST_FIELD_FL_STRING) {
|
||||
key = (void *)(unsigned long)field_contents;
|
||||
use_compound_key = true;
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <linux/uio.h>
|
||||
#include <linux/ioctl.h>
|
||||
#include <linux/jhash.h>
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/trace_events.h>
|
||||
#include <linux/tracefs.h>
|
||||
#include <linux/types.h>
|
||||
@ -39,28 +40,69 @@
|
||||
*/
|
||||
#define MAX_PAGE_ORDER 0
|
||||
#define MAX_PAGES (1 << MAX_PAGE_ORDER)
|
||||
#define MAX_EVENTS (MAX_PAGES * PAGE_SIZE)
|
||||
#define MAX_BYTES (MAX_PAGES * PAGE_SIZE)
|
||||
#define MAX_EVENTS (MAX_BYTES * 8)
|
||||
|
||||
/* Limit how long of an event name plus args within the subsystem. */
|
||||
#define MAX_EVENT_DESC 512
|
||||
#define EVENT_NAME(user_event) ((user_event)->tracepoint.name)
|
||||
#define MAX_FIELD_ARRAY_SIZE 1024
|
||||
#define MAX_FIELD_ARG_NAME 256
|
||||
|
||||
static char *register_page_data;
|
||||
/*
|
||||
* The MAP_STATUS_* macros are used for taking a index and determining the
|
||||
* appropriate byte and the bit in the byte to set/reset for an event.
|
||||
*
|
||||
* The lower 3 bits of the index decide which bit to set.
|
||||
* The remaining upper bits of the index decide which byte to use for the bit.
|
||||
*
|
||||
* This is used when an event has a probe attached/removed to reflect live
|
||||
* status of the event wanting tracing or not to user-programs via shared
|
||||
* memory maps.
|
||||
*/
|
||||
#define MAP_STATUS_BYTE(index) ((index) >> 3)
|
||||
#define MAP_STATUS_MASK(index) BIT((index) & 7)
|
||||
|
||||
static DEFINE_MUTEX(reg_mutex);
|
||||
static DEFINE_HASHTABLE(register_table, 4);
|
||||
static DECLARE_BITMAP(page_bitmap, MAX_EVENTS);
|
||||
/*
|
||||
* Internal bits (kernel side only) to keep track of connected probes:
|
||||
* These are used when status is requested in text form about an event. These
|
||||
* bits are compared against an internal byte on the event to determine which
|
||||
* probes to print out to the user.
|
||||
*
|
||||
* These do not reflect the mapped bytes between the user and kernel space.
|
||||
*/
|
||||
#define EVENT_STATUS_FTRACE BIT(0)
|
||||
#define EVENT_STATUS_PERF BIT(1)
|
||||
#define EVENT_STATUS_OTHER BIT(7)
|
||||
|
||||
/*
|
||||
* Stores the pages, tables, and locks for a group of events.
|
||||
* Each logical grouping of events has its own group, with a
|
||||
* matching page for status checks within user programs. This
|
||||
* allows for isolation of events to user programs by various
|
||||
* means.
|
||||
*/
|
||||
struct user_event_group {
|
||||
struct page *pages;
|
||||
char *register_page_data;
|
||||
char *system_name;
|
||||
struct hlist_node node;
|
||||
struct mutex reg_mutex;
|
||||
DECLARE_HASHTABLE(register_table, 8);
|
||||
DECLARE_BITMAP(page_bitmap, MAX_EVENTS);
|
||||
};
|
||||
|
||||
/* Group for init_user_ns mapping, top-most group */
|
||||
static struct user_event_group *init_group;
|
||||
|
||||
/*
|
||||
* Stores per-event properties, as users register events
|
||||
* within a file a user_event might be created if it does not
|
||||
* already exist. These are globally used and their lifetime
|
||||
* is tied to the refcnt member. These cannot go away until the
|
||||
* refcnt reaches zero.
|
||||
* refcnt reaches one.
|
||||
*/
|
||||
struct user_event {
|
||||
struct user_event_group *group;
|
||||
struct tracepoint tracepoint;
|
||||
struct trace_event_call call;
|
||||
struct trace_event_class class;
|
||||
@ -68,10 +110,11 @@ struct user_event {
|
||||
struct hlist_node node;
|
||||
struct list_head fields;
|
||||
struct list_head validators;
|
||||
atomic_t refcnt;
|
||||
refcount_t refcnt;
|
||||
int index;
|
||||
int flags;
|
||||
int min_size;
|
||||
char status;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -86,6 +129,11 @@ struct user_event_refs {
|
||||
struct user_event *events[];
|
||||
};
|
||||
|
||||
struct user_event_file_info {
|
||||
struct user_event_group *group;
|
||||
struct user_event_refs *refs;
|
||||
};
|
||||
|
||||
#define VALIDATOR_ENSURE_NULL (1 << 0)
|
||||
#define VALIDATOR_REL (1 << 1)
|
||||
|
||||
@ -98,7 +146,8 @@ struct user_event_validator {
|
||||
typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i,
|
||||
void *tpdata, bool *faulted);
|
||||
|
||||
static int user_event_parse(char *name, char *args, char *flags,
|
||||
static int user_event_parse(struct user_event_group *group, char *name,
|
||||
char *args, char *flags,
|
||||
struct user_event **newuser);
|
||||
|
||||
static u32 user_event_key(char *name)
|
||||
@ -106,6 +155,144 @@ static u32 user_event_key(char *name)
|
||||
return jhash(name, strlen(name), 0);
|
||||
}
|
||||
|
||||
static void set_page_reservations(char *pages, bool set)
|
||||
{
|
||||
int page;
|
||||
|
||||
for (page = 0; page < MAX_PAGES; ++page) {
|
||||
void *addr = pages + (PAGE_SIZE * page);
|
||||
|
||||
if (set)
|
||||
SetPageReserved(virt_to_page(addr));
|
||||
else
|
||||
ClearPageReserved(virt_to_page(addr));
|
||||
}
|
||||
}
|
||||
|
||||
static void user_event_group_destroy(struct user_event_group *group)
|
||||
{
|
||||
if (group->register_page_data)
|
||||
set_page_reservations(group->register_page_data, false);
|
||||
|
||||
if (group->pages)
|
||||
__free_pages(group->pages, MAX_PAGE_ORDER);
|
||||
|
||||
kfree(group->system_name);
|
||||
kfree(group);
|
||||
}
|
||||
|
||||
static char *user_event_group_system_name(struct user_namespace *user_ns)
|
||||
{
|
||||
char *system_name;
|
||||
int len = sizeof(USER_EVENTS_SYSTEM) + 1;
|
||||
|
||||
if (user_ns != &init_user_ns) {
|
||||
/*
|
||||
* Unexpected at this point:
|
||||
* We only currently support init_user_ns.
|
||||
* When we enable more, this will trigger a failure so log.
|
||||
*/
|
||||
pr_warn("user_events: Namespace other than init_user_ns!\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
system_name = kmalloc(len, GFP_KERNEL);
|
||||
|
||||
if (!system_name)
|
||||
return NULL;
|
||||
|
||||
snprintf(system_name, len, "%s", USER_EVENTS_SYSTEM);
|
||||
|
||||
return system_name;
|
||||
}
|
||||
|
||||
static inline struct user_event_group
|
||||
*user_event_group_from_user_ns(struct user_namespace *user_ns)
|
||||
{
|
||||
if (user_ns == &init_user_ns)
|
||||
return init_group;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct user_event_group *current_user_event_group(void)
|
||||
{
|
||||
struct user_namespace *user_ns = current_user_ns();
|
||||
struct user_event_group *group = NULL;
|
||||
|
||||
while (user_ns) {
|
||||
group = user_event_group_from_user_ns(user_ns);
|
||||
|
||||
if (group)
|
||||
break;
|
||||
|
||||
user_ns = user_ns->parent;
|
||||
}
|
||||
|
||||
return group;
|
||||
}
|
||||
|
||||
static struct user_event_group
|
||||
*user_event_group_create(struct user_namespace *user_ns)
|
||||
{
|
||||
struct user_event_group *group;
|
||||
|
||||
group = kzalloc(sizeof(*group), GFP_KERNEL);
|
||||
|
||||
if (!group)
|
||||
return NULL;
|
||||
|
||||
group->system_name = user_event_group_system_name(user_ns);
|
||||
|
||||
if (!group->system_name)
|
||||
goto error;
|
||||
|
||||
group->pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, MAX_PAGE_ORDER);
|
||||
|
||||
if (!group->pages)
|
||||
goto error;
|
||||
|
||||
group->register_page_data = page_address(group->pages);
|
||||
|
||||
set_page_reservations(group->register_page_data, true);
|
||||
|
||||
/* Zero all bits beside 0 (which is reserved for failures) */
|
||||
bitmap_zero(group->page_bitmap, MAX_EVENTS);
|
||||
set_bit(0, group->page_bitmap);
|
||||
|
||||
mutex_init(&group->reg_mutex);
|
||||
hash_init(group->register_table);
|
||||
|
||||
return group;
|
||||
error:
|
||||
if (group)
|
||||
user_event_group_destroy(group);
|
||||
|
||||
return NULL;
|
||||
};
|
||||
|
||||
static __always_inline
|
||||
void user_event_register_set(struct user_event *user)
|
||||
{
|
||||
int i = user->index;
|
||||
|
||||
user->group->register_page_data[MAP_STATUS_BYTE(i)] |= MAP_STATUS_MASK(i);
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
void user_event_register_clear(struct user_event *user)
|
||||
{
|
||||
int i = user->index;
|
||||
|
||||
user->group->register_page_data[MAP_STATUS_BYTE(i)] &= ~MAP_STATUS_MASK(i);
|
||||
}
|
||||
|
||||
static __always_inline __must_check
|
||||
bool user_event_last_ref(struct user_event *user)
|
||||
{
|
||||
return refcount_read(&user->refcnt) == 1;
|
||||
}
|
||||
|
||||
static __always_inline __must_check
|
||||
size_t copy_nofault(void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
@ -141,7 +328,8 @@ static struct list_head *user_event_get_fields(struct trace_event_call *call)
|
||||
*
|
||||
* Upon success user_event has its ref count increased by 1.
|
||||
*/
|
||||
static int user_event_parse_cmd(char *raw_command, struct user_event **newuser)
|
||||
static int user_event_parse_cmd(struct user_event_group *group,
|
||||
char *raw_command, struct user_event **newuser)
|
||||
{
|
||||
char *name = raw_command;
|
||||
char *args = strpbrk(name, " ");
|
||||
@ -155,7 +343,7 @@ static int user_event_parse_cmd(char *raw_command, struct user_event **newuser)
|
||||
if (flags)
|
||||
*flags++ = '\0';
|
||||
|
||||
return user_event_parse(name, args, flags, newuser);
|
||||
return user_event_parse(group, name, args, flags, newuser);
|
||||
}
|
||||
|
||||
static int user_field_array_size(const char *type)
|
||||
@ -277,7 +465,7 @@ static int user_event_add_field(struct user_event *user, const char *type,
|
||||
goto add_field;
|
||||
|
||||
add_validator:
|
||||
if (strstr(type, "char") != 0)
|
||||
if (strstr(type, "char") != NULL)
|
||||
validator_flags |= VALIDATOR_ENSURE_NULL;
|
||||
|
||||
validator = kmalloc(sizeof(*validator), GFP_KERNEL);
|
||||
@ -458,7 +646,7 @@ static const char *user_field_format(const char *type)
|
||||
return "%d";
|
||||
if (strcmp(type, "unsigned char") == 0)
|
||||
return "%u";
|
||||
if (strstr(type, "char[") != 0)
|
||||
if (strstr(type, "char[") != NULL)
|
||||
return "%s";
|
||||
|
||||
/* Unknown, likely struct, allowed treat as 64-bit */
|
||||
@ -479,10 +667,52 @@ static bool user_field_is_dyn_string(const char *type, const char **str_func)
|
||||
|
||||
return false;
|
||||
check:
|
||||
return strstr(type, "char") != 0;
|
||||
return strstr(type, "char") != NULL;
|
||||
}
|
||||
|
||||
#define LEN_OR_ZERO (len ? len - pos : 0)
|
||||
static int user_dyn_field_set_string(int argc, const char **argv, int *iout,
|
||||
char *buf, int len, bool *colon)
|
||||
{
|
||||
int pos = 0, i = *iout;
|
||||
|
||||
*colon = false;
|
||||
|
||||
for (; i < argc; ++i) {
|
||||
if (i != *iout)
|
||||
pos += snprintf(buf + pos, LEN_OR_ZERO, " ");
|
||||
|
||||
pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", argv[i]);
|
||||
|
||||
if (strchr(argv[i], ';')) {
|
||||
++i;
|
||||
*colon = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Actual set, advance i */
|
||||
if (len != 0)
|
||||
*iout = i;
|
||||
|
||||
return pos + 1;
|
||||
}
|
||||
|
||||
static int user_field_set_string(struct ftrace_event_field *field,
|
||||
char *buf, int len, bool colon)
|
||||
{
|
||||
int pos = 0;
|
||||
|
||||
pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", field->type);
|
||||
pos += snprintf(buf + pos, LEN_OR_ZERO, " ");
|
||||
pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", field->name);
|
||||
|
||||
if (colon)
|
||||
pos += snprintf(buf + pos, LEN_OR_ZERO, ";");
|
||||
|
||||
return pos + 1;
|
||||
}
|
||||
|
||||
static int user_event_set_print_fmt(struct user_event *user, char *buf, int len)
|
||||
{
|
||||
struct ftrace_event_field *field, *next;
|
||||
@ -600,8 +830,8 @@ static int destroy_user_event(struct user_event *user)
|
||||
|
||||
dyn_event_remove(&user->devent);
|
||||
|
||||
register_page_data[user->index] = 0;
|
||||
clear_bit(user->index, page_bitmap);
|
||||
user_event_register_clear(user);
|
||||
clear_bit(user->index, user->group->page_bitmap);
|
||||
hash_del(&user->node);
|
||||
|
||||
user_event_destroy_validators(user);
|
||||
@ -612,16 +842,17 @@ static int destroy_user_event(struct user_event *user)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct user_event *find_user_event(char *name, u32 *outkey)
|
||||
static struct user_event *find_user_event(struct user_event_group *group,
|
||||
char *name, u32 *outkey)
|
||||
{
|
||||
struct user_event *user;
|
||||
u32 key = user_event_key(name);
|
||||
|
||||
*outkey = key;
|
||||
|
||||
hash_for_each_possible(register_table, user, node, key)
|
||||
hash_for_each_possible(group->register_table, user, node, key)
|
||||
if (!strcmp(EVENT_NAME(user), name)) {
|
||||
atomic_inc(&user->refcnt);
|
||||
refcount_inc(&user->refcnt);
|
||||
return user;
|
||||
}
|
||||
|
||||
@ -779,7 +1010,12 @@ static void update_reg_page_for(struct user_event *user)
|
||||
rcu_read_unlock_sched();
|
||||
}
|
||||
|
||||
register_page_data[user->index] = status;
|
||||
if (status)
|
||||
user_event_register_set(user);
|
||||
else
|
||||
user_event_register_clear(user);
|
||||
|
||||
user->status = status;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -835,17 +1071,18 @@ static int user_event_reg(struct trace_event_call *call,
|
||||
|
||||
return ret;
|
||||
inc:
|
||||
atomic_inc(&user->refcnt);
|
||||
refcount_inc(&user->refcnt);
|
||||
update_reg_page_for(user);
|
||||
return 0;
|
||||
dec:
|
||||
update_reg_page_for(user);
|
||||
atomic_dec(&user->refcnt);
|
||||
refcount_dec(&user->refcnt);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int user_event_create(const char *raw_command)
|
||||
{
|
||||
struct user_event_group *group;
|
||||
struct user_event *user;
|
||||
char *name;
|
||||
int ret;
|
||||
@ -861,14 +1098,19 @@ static int user_event_create(const char *raw_command)
|
||||
if (!name)
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_lock(®_mutex);
|
||||
group = current_user_event_group();
|
||||
|
||||
ret = user_event_parse_cmd(name, &user);
|
||||
if (!group)
|
||||
return -ENOENT;
|
||||
|
||||
mutex_lock(&group->reg_mutex);
|
||||
|
||||
ret = user_event_parse_cmd(group, name, &user);
|
||||
|
||||
if (!ret)
|
||||
atomic_dec(&user->refcnt);
|
||||
refcount_dec(&user->refcnt);
|
||||
|
||||
mutex_unlock(®_mutex);
|
||||
mutex_unlock(&group->reg_mutex);
|
||||
|
||||
if (ret)
|
||||
kfree(name);
|
||||
@ -910,14 +1152,14 @@ static bool user_event_is_busy(struct dyn_event *ev)
|
||||
{
|
||||
struct user_event *user = container_of(ev, struct user_event, devent);
|
||||
|
||||
return atomic_read(&user->refcnt) != 0;
|
||||
return !user_event_last_ref(user);
|
||||
}
|
||||
|
||||
static int user_event_free(struct dyn_event *ev)
|
||||
{
|
||||
struct user_event *user = container_of(ev, struct user_event, devent);
|
||||
|
||||
if (atomic_read(&user->refcnt) != 0)
|
||||
if (!user_event_last_ref(user))
|
||||
return -EBUSY;
|
||||
|
||||
return destroy_user_event(user);
|
||||
@ -926,49 +1168,35 @@ static int user_event_free(struct dyn_event *ev)
|
||||
static bool user_field_match(struct ftrace_event_field *field, int argc,
|
||||
const char **argv, int *iout)
|
||||
{
|
||||
char *field_name, *arg_name;
|
||||
int len, pos, i = *iout;
|
||||
char *field_name = NULL, *dyn_field_name = NULL;
|
||||
bool colon = false, match = false;
|
||||
int dyn_len, len;
|
||||
|
||||
if (i >= argc)
|
||||
if (*iout >= argc)
|
||||
return false;
|
||||
|
||||
len = MAX_FIELD_ARG_NAME;
|
||||
field_name = kmalloc(len, GFP_KERNEL);
|
||||
arg_name = kmalloc(len, GFP_KERNEL);
|
||||
dyn_len = user_dyn_field_set_string(argc, argv, iout, dyn_field_name,
|
||||
0, &colon);
|
||||
|
||||
if (!arg_name || !field_name)
|
||||
len = user_field_set_string(field, field_name, 0, colon);
|
||||
|
||||
if (dyn_len != len)
|
||||
return false;
|
||||
|
||||
dyn_field_name = kmalloc(dyn_len, GFP_KERNEL);
|
||||
field_name = kmalloc(len, GFP_KERNEL);
|
||||
|
||||
if (!dyn_field_name || !field_name)
|
||||
goto out;
|
||||
|
||||
pos = 0;
|
||||
user_dyn_field_set_string(argc, argv, iout, dyn_field_name,
|
||||
dyn_len, &colon);
|
||||
|
||||
for (; i < argc; ++i) {
|
||||
if (i != *iout)
|
||||
pos += snprintf(arg_name + pos, len - pos, " ");
|
||||
user_field_set_string(field, field_name, len, colon);
|
||||
|
||||
pos += snprintf(arg_name + pos, len - pos, argv[i]);
|
||||
|
||||
if (strchr(argv[i], ';')) {
|
||||
++i;
|
||||
colon = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pos = 0;
|
||||
|
||||
pos += snprintf(field_name + pos, len - pos, field->type);
|
||||
pos += snprintf(field_name + pos, len - pos, " ");
|
||||
pos += snprintf(field_name + pos, len - pos, field->name);
|
||||
|
||||
if (colon)
|
||||
pos += snprintf(field_name + pos, len - pos, ";");
|
||||
|
||||
*iout = i;
|
||||
|
||||
match = strcmp(arg_name, field_name) == 0;
|
||||
match = strcmp(dyn_field_name, field_name) == 0;
|
||||
out:
|
||||
kfree(arg_name);
|
||||
kfree(dyn_field_name);
|
||||
kfree(field_name);
|
||||
|
||||
return match;
|
||||
@ -1036,7 +1264,8 @@ static int user_event_trace_register(struct user_event *user)
|
||||
* The name buffer lifetime is owned by this method for success cases only.
|
||||
* Upon success the returned user_event has its ref count increased by 1.
|
||||
*/
|
||||
static int user_event_parse(char *name, char *args, char *flags,
|
||||
static int user_event_parse(struct user_event_group *group, char *name,
|
||||
char *args, char *flags,
|
||||
struct user_event **newuser)
|
||||
{
|
||||
int ret;
|
||||
@ -1046,7 +1275,7 @@ static int user_event_parse(char *name, char *args, char *flags,
|
||||
|
||||
/* Prevent dyn_event from racing */
|
||||
mutex_lock(&event_mutex);
|
||||
user = find_user_event(name, &key);
|
||||
user = find_user_event(group, name, &key);
|
||||
mutex_unlock(&event_mutex);
|
||||
|
||||
if (user) {
|
||||
@ -1059,7 +1288,7 @@ static int user_event_parse(char *name, char *args, char *flags,
|
||||
return 0;
|
||||
}
|
||||
|
||||
index = find_first_zero_bit(page_bitmap, MAX_EVENTS);
|
||||
index = find_first_zero_bit(group->page_bitmap, MAX_EVENTS);
|
||||
|
||||
if (index == MAX_EVENTS)
|
||||
return -EMFILE;
|
||||
@ -1073,6 +1302,7 @@ static int user_event_parse(char *name, char *args, char *flags,
|
||||
INIT_LIST_HEAD(&user->fields);
|
||||
INIT_LIST_HEAD(&user->validators);
|
||||
|
||||
user->group = group;
|
||||
user->tracepoint.name = name;
|
||||
|
||||
ret = user_event_parse_fields(user, args);
|
||||
@ -1091,8 +1321,8 @@ static int user_event_parse(char *name, char *args, char *flags,
|
||||
user->call.flags = TRACE_EVENT_FL_TRACEPOINT;
|
||||
user->call.tp = &user->tracepoint;
|
||||
user->call.event.funcs = &user_event_funcs;
|
||||
user->class.system = group->system_name;
|
||||
|
||||
user->class.system = USER_EVENTS_SYSTEM;
|
||||
user->class.fields_array = user_event_fields_array;
|
||||
user->class.get_fields = user_event_get_fields;
|
||||
user->class.reg = user_event_reg;
|
||||
@ -1110,13 +1340,13 @@ static int user_event_parse(char *name, char *args, char *flags,
|
||||
|
||||
user->index = index;
|
||||
|
||||
/* Ensure we track ref */
|
||||
atomic_inc(&user->refcnt);
|
||||
/* Ensure we track self ref and caller ref (2) */
|
||||
refcount_set(&user->refcnt, 2);
|
||||
|
||||
dyn_event_init(&user->devent, &user_event_dops);
|
||||
dyn_event_add(&user->devent, &user->call);
|
||||
set_bit(user->index, page_bitmap);
|
||||
hash_add(register_table, &user->node, key);
|
||||
set_bit(user->index, group->page_bitmap);
|
||||
hash_add(group->register_table, &user->node, key);
|
||||
|
||||
mutex_unlock(&event_mutex);
|
||||
|
||||
@ -1134,32 +1364,20 @@ static int user_event_parse(char *name, char *args, char *flags,
|
||||
/*
|
||||
* Deletes a previously created event if it is no longer being used.
|
||||
*/
|
||||
static int delete_user_event(char *name)
|
||||
static int delete_user_event(struct user_event_group *group, char *name)
|
||||
{
|
||||
u32 key;
|
||||
int ret;
|
||||
struct user_event *user = find_user_event(name, &key);
|
||||
struct user_event *user = find_user_event(group, name, &key);
|
||||
|
||||
if (!user)
|
||||
return -ENOENT;
|
||||
|
||||
/* Ensure we are the last ref */
|
||||
if (atomic_read(&user->refcnt) != 1) {
|
||||
ret = -EBUSY;
|
||||
goto put_ref;
|
||||
}
|
||||
refcount_dec(&user->refcnt);
|
||||
|
||||
ret = destroy_user_event(user);
|
||||
if (!user_event_last_ref(user))
|
||||
return -EBUSY;
|
||||
|
||||
if (ret)
|
||||
goto put_ref;
|
||||
|
||||
return ret;
|
||||
put_ref:
|
||||
/* No longer have this ref */
|
||||
atomic_dec(&user->refcnt);
|
||||
|
||||
return ret;
|
||||
return destroy_user_event(user);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1167,6 +1385,7 @@ static int delete_user_event(char *name)
|
||||
*/
|
||||
static ssize_t user_events_write_core(struct file *file, struct iov_iter *i)
|
||||
{
|
||||
struct user_event_file_info *info = file->private_data;
|
||||
struct user_event_refs *refs;
|
||||
struct user_event *user = NULL;
|
||||
struct tracepoint *tp;
|
||||
@ -1178,7 +1397,7 @@ static ssize_t user_events_write_core(struct file *file, struct iov_iter *i)
|
||||
|
||||
rcu_read_lock_sched();
|
||||
|
||||
refs = rcu_dereference_sched(file->private_data);
|
||||
refs = rcu_dereference_sched(info->refs);
|
||||
|
||||
/*
|
||||
* The refs->events array is protected by RCU, and new items may be
|
||||
@ -1236,6 +1455,28 @@ static ssize_t user_events_write_core(struct file *file, struct iov_iter *i)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int user_events_open(struct inode *node, struct file *file)
|
||||
{
|
||||
struct user_event_group *group;
|
||||
struct user_event_file_info *info;
|
||||
|
||||
group = current_user_event_group();
|
||||
|
||||
if (!group)
|
||||
return -ENOENT;
|
||||
|
||||
info = kzalloc(sizeof(*info), GFP_KERNEL);
|
||||
|
||||
if (!info)
|
||||
return -ENOMEM;
|
||||
|
||||
info->group = group;
|
||||
|
||||
file->private_data = info;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t user_events_write(struct file *file, const char __user *ubuf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
@ -1245,7 +1486,8 @@ static ssize_t user_events_write(struct file *file, const char __user *ubuf,
|
||||
if (unlikely(*ppos != 0))
|
||||
return -EFAULT;
|
||||
|
||||
if (unlikely(import_single_range(READ, (char *)ubuf, count, &iov, &i)))
|
||||
if (unlikely(import_single_range(WRITE, (char __user *)ubuf,
|
||||
count, &iov, &i)))
|
||||
return -EFAULT;
|
||||
|
||||
return user_events_write_core(file, &i);
|
||||
@ -1256,13 +1498,15 @@ static ssize_t user_events_write_iter(struct kiocb *kp, struct iov_iter *i)
|
||||
return user_events_write_core(kp->ki_filp, i);
|
||||
}
|
||||
|
||||
static int user_events_ref_add(struct file *file, struct user_event *user)
|
||||
static int user_events_ref_add(struct user_event_file_info *info,
|
||||
struct user_event *user)
|
||||
{
|
||||
struct user_event_group *group = info->group;
|
||||
struct user_event_refs *refs, *new_refs;
|
||||
int i, size, count = 0;
|
||||
|
||||
refs = rcu_dereference_protected(file->private_data,
|
||||
lockdep_is_held(®_mutex));
|
||||
refs = rcu_dereference_protected(info->refs,
|
||||
lockdep_is_held(&group->reg_mutex));
|
||||
|
||||
if (refs) {
|
||||
count = refs->count;
|
||||
@ -1286,9 +1530,9 @@ static int user_events_ref_add(struct file *file, struct user_event *user)
|
||||
|
||||
new_refs->events[i] = user;
|
||||
|
||||
atomic_inc(&user->refcnt);
|
||||
refcount_inc(&user->refcnt);
|
||||
|
||||
rcu_assign_pointer(file->private_data, new_refs);
|
||||
rcu_assign_pointer(info->refs, new_refs);
|
||||
|
||||
if (refs)
|
||||
kfree_rcu(refs, rcu);
|
||||
@ -1309,13 +1553,24 @@ static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg)
|
||||
if (size > PAGE_SIZE)
|
||||
return -E2BIG;
|
||||
|
||||
return copy_struct_from_user(kreg, sizeof(*kreg), ureg, size);
|
||||
if (size < offsetofend(struct user_reg, write_index))
|
||||
return -EINVAL;
|
||||
|
||||
ret = copy_struct_from_user(kreg, sizeof(*kreg), ureg, size);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
kreg->size = size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Registers a user_event on behalf of a user process.
|
||||
*/
|
||||
static long user_events_ioctl_reg(struct file *file, unsigned long uarg)
|
||||
static long user_events_ioctl_reg(struct user_event_file_info *info,
|
||||
unsigned long uarg)
|
||||
{
|
||||
struct user_reg __user *ureg = (struct user_reg __user *)uarg;
|
||||
struct user_reg reg;
|
||||
@ -1336,24 +1591,24 @@ static long user_events_ioctl_reg(struct file *file, unsigned long uarg)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = user_event_parse_cmd(name, &user);
|
||||
ret = user_event_parse_cmd(info->group, name, &user);
|
||||
|
||||
if (ret) {
|
||||
kfree(name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = user_events_ref_add(file, user);
|
||||
ret = user_events_ref_add(info, user);
|
||||
|
||||
/* No longer need parse ref, ref_add either worked or not */
|
||||
atomic_dec(&user->refcnt);
|
||||
refcount_dec(&user->refcnt);
|
||||
|
||||
/* Positive number is index and valid */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
put_user((u32)ret, &ureg->write_index);
|
||||
put_user(user->index, &ureg->status_index);
|
||||
put_user(user->index, &ureg->status_bit);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1361,7 +1616,8 @@ static long user_events_ioctl_reg(struct file *file, unsigned long uarg)
|
||||
/*
|
||||
* Deletes a user_event on behalf of a user process.
|
||||
*/
|
||||
static long user_events_ioctl_del(struct file *file, unsigned long uarg)
|
||||
static long user_events_ioctl_del(struct user_event_file_info *info,
|
||||
unsigned long uarg)
|
||||
{
|
||||
void __user *ubuf = (void __user *)uarg;
|
||||
char *name;
|
||||
@ -1374,7 +1630,7 @@ static long user_events_ioctl_del(struct file *file, unsigned long uarg)
|
||||
|
||||
/* event_mutex prevents dyn_event from racing */
|
||||
mutex_lock(&event_mutex);
|
||||
ret = delete_user_event(name);
|
||||
ret = delete_user_event(info->group, name);
|
||||
mutex_unlock(&event_mutex);
|
||||
|
||||
kfree(name);
|
||||
@ -1388,19 +1644,21 @@ static long user_events_ioctl_del(struct file *file, unsigned long uarg)
|
||||
static long user_events_ioctl(struct file *file, unsigned int cmd,
|
||||
unsigned long uarg)
|
||||
{
|
||||
struct user_event_file_info *info = file->private_data;
|
||||
struct user_event_group *group = info->group;
|
||||
long ret = -ENOTTY;
|
||||
|
||||
switch (cmd) {
|
||||
case DIAG_IOCSREG:
|
||||
mutex_lock(®_mutex);
|
||||
ret = user_events_ioctl_reg(file, uarg);
|
||||
mutex_unlock(®_mutex);
|
||||
mutex_lock(&group->reg_mutex);
|
||||
ret = user_events_ioctl_reg(info, uarg);
|
||||
mutex_unlock(&group->reg_mutex);
|
||||
break;
|
||||
|
||||
case DIAG_IOCSDEL:
|
||||
mutex_lock(®_mutex);
|
||||
ret = user_events_ioctl_del(file, uarg);
|
||||
mutex_unlock(®_mutex);
|
||||
mutex_lock(&group->reg_mutex);
|
||||
ret = user_events_ioctl_del(info, uarg);
|
||||
mutex_unlock(&group->reg_mutex);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1412,17 +1670,24 @@ static long user_events_ioctl(struct file *file, unsigned int cmd,
|
||||
*/
|
||||
static int user_events_release(struct inode *node, struct file *file)
|
||||
{
|
||||
struct user_event_file_info *info = file->private_data;
|
||||
struct user_event_group *group;
|
||||
struct user_event_refs *refs;
|
||||
struct user_event *user;
|
||||
int i;
|
||||
|
||||
if (!info)
|
||||
return -EINVAL;
|
||||
|
||||
group = info->group;
|
||||
|
||||
/*
|
||||
* Ensure refs cannot change under any situation by taking the
|
||||
* register mutex during the final freeing of the references.
|
||||
*/
|
||||
mutex_lock(®_mutex);
|
||||
mutex_lock(&group->reg_mutex);
|
||||
|
||||
refs = file->private_data;
|
||||
refs = info->refs;
|
||||
|
||||
if (!refs)
|
||||
goto out;
|
||||
@ -1436,37 +1701,56 @@ static int user_events_release(struct inode *node, struct file *file)
|
||||
user = refs->events[i];
|
||||
|
||||
if (user)
|
||||
atomic_dec(&user->refcnt);
|
||||
refcount_dec(&user->refcnt);
|
||||
}
|
||||
out:
|
||||
file->private_data = NULL;
|
||||
|
||||
mutex_unlock(®_mutex);
|
||||
mutex_unlock(&group->reg_mutex);
|
||||
|
||||
kfree(refs);
|
||||
kfree(info);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations user_data_fops = {
|
||||
.open = user_events_open,
|
||||
.write = user_events_write,
|
||||
.write_iter = user_events_write_iter,
|
||||
.unlocked_ioctl = user_events_ioctl,
|
||||
.release = user_events_release,
|
||||
};
|
||||
|
||||
static struct user_event_group *user_status_group(struct file *file)
|
||||
{
|
||||
struct seq_file *m = file->private_data;
|
||||
|
||||
if (!m)
|
||||
return NULL;
|
||||
|
||||
return m->private;
|
||||
}
|
||||
|
||||
/*
|
||||
* Maps the shared page into the user process for checking if event is enabled.
|
||||
*/
|
||||
static int user_status_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
char *pages;
|
||||
struct user_event_group *group = user_status_group(file);
|
||||
unsigned long size = vma->vm_end - vma->vm_start;
|
||||
|
||||
if (size != MAX_EVENTS)
|
||||
if (size != MAX_BYTES)
|
||||
return -EINVAL;
|
||||
|
||||
if (!group)
|
||||
return -EINVAL;
|
||||
|
||||
pages = group->register_page_data;
|
||||
|
||||
return remap_pfn_range(vma, vma->vm_start,
|
||||
virt_to_phys(register_page_data) >> PAGE_SHIFT,
|
||||
virt_to_phys(pages) >> PAGE_SHIFT,
|
||||
size, vm_get_page_prot(VM_READ));
|
||||
}
|
||||
|
||||
@ -1490,14 +1774,18 @@ static void user_seq_stop(struct seq_file *m, void *p)
|
||||
|
||||
static int user_seq_show(struct seq_file *m, void *p)
|
||||
{
|
||||
struct user_event_group *group = m->private;
|
||||
struct user_event *user;
|
||||
char status;
|
||||
int i, active = 0, busy = 0, flags;
|
||||
|
||||
mutex_lock(®_mutex);
|
||||
if (!group)
|
||||
return -EINVAL;
|
||||
|
||||
hash_for_each(register_table, i, user, node) {
|
||||
status = register_page_data[user->index];
|
||||
mutex_lock(&group->reg_mutex);
|
||||
|
||||
hash_for_each(group->register_table, i, user, node) {
|
||||
status = user->status;
|
||||
flags = user->flags;
|
||||
|
||||
seq_printf(m, "%d:%s", user->index, EVENT_NAME(user));
|
||||
@ -1520,7 +1808,7 @@ static int user_seq_show(struct seq_file *m, void *p)
|
||||
active++;
|
||||
}
|
||||
|
||||
mutex_unlock(®_mutex);
|
||||
mutex_unlock(&group->reg_mutex);
|
||||
|
||||
seq_puts(m, "\n");
|
||||
seq_printf(m, "Active: %d\n", active);
|
||||
@ -1539,7 +1827,24 @@ static const struct seq_operations user_seq_ops = {
|
||||
|
||||
static int user_status_open(struct inode *node, struct file *file)
|
||||
{
|
||||
return seq_open(file, &user_seq_ops);
|
||||
struct user_event_group *group;
|
||||
int ret;
|
||||
|
||||
group = current_user_event_group();
|
||||
|
||||
if (!group)
|
||||
return -ENOENT;
|
||||
|
||||
ret = seq_open(file, &user_seq_ops);
|
||||
|
||||
if (!ret) {
|
||||
/* Chain group to seq_file */
|
||||
struct seq_file *m = file->private_data;
|
||||
|
||||
m->private = group;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct file_operations user_status_fops = {
|
||||
@ -1580,42 +1885,21 @@ static int create_user_tracefs(void)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static void set_page_reservations(bool set)
|
||||
{
|
||||
int page;
|
||||
|
||||
for (page = 0; page < MAX_PAGES; ++page) {
|
||||
void *addr = register_page_data + (PAGE_SIZE * page);
|
||||
|
||||
if (set)
|
||||
SetPageReserved(virt_to_page(addr));
|
||||
else
|
||||
ClearPageReserved(virt_to_page(addr));
|
||||
}
|
||||
}
|
||||
|
||||
static int __init trace_events_user_init(void)
|
||||
{
|
||||
struct page *pages;
|
||||
int ret;
|
||||
|
||||
/* Zero all bits beside 0 (which is reserved for failures) */
|
||||
bitmap_zero(page_bitmap, MAX_EVENTS);
|
||||
set_bit(0, page_bitmap);
|
||||
init_group = user_event_group_create(&init_user_ns);
|
||||
|
||||
pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, MAX_PAGE_ORDER);
|
||||
if (!pages)
|
||||
if (!init_group)
|
||||
return -ENOMEM;
|
||||
register_page_data = page_address(pages);
|
||||
|
||||
set_page_reservations(true);
|
||||
|
||||
ret = create_user_tracefs();
|
||||
|
||||
if (ret) {
|
||||
pr_warn("user_events could not register with tracefs\n");
|
||||
set_page_reservations(false);
|
||||
__free_pages(pages, MAX_PAGE_ORDER);
|
||||
user_event_group_destroy(init_group);
|
||||
init_group = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1786,8 +1786,9 @@ static int start_per_cpu_kthreads(void)
|
||||
for_each_cpu(cpu, current_mask) {
|
||||
retval = start_kthread(cpu);
|
||||
if (retval) {
|
||||
cpus_read_unlock();
|
||||
stop_per_cpu_kthreads();
|
||||
break;
|
||||
return retval;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -445,7 +445,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
|
||||
C(SAME_PROBE, "There is already the exact same probe event"),\
|
||||
C(NO_EVENT_INFO, "This requires both group and event name to attach"),\
|
||||
C(BAD_ATTACH_EVENT, "Attached event does not exist"),\
|
||||
C(BAD_ATTACH_ARG, "Attached event does not have this field"),
|
||||
C(BAD_ATTACH_ARG, "Attached event does not have this field"),\
|
||||
C(NO_EP_FILTER, "No filter rule after 'if'"),
|
||||
|
||||
#undef C
|
||||
#define C(a, b) TP_ERR_##a
|
||||
|
@ -961,7 +961,7 @@ create_sort_entry(void *key, struct tracing_map_elt *elt)
|
||||
static void detect_dups(struct tracing_map_sort_entry **sort_entries,
|
||||
int n_entries, unsigned int key_size)
|
||||
{
|
||||
unsigned int dups = 0, total_dups = 0;
|
||||
unsigned int total_dups = 0;
|
||||
int i;
|
||||
void *key;
|
||||
|
||||
@ -974,11 +974,10 @@ static void detect_dups(struct tracing_map_sort_entry **sort_entries,
|
||||
key = sort_entries[0]->key;
|
||||
for (i = 1; i < n_entries; i++) {
|
||||
if (!memcmp(sort_entries[i]->key, key, key_size)) {
|
||||
dups++; total_dups++;
|
||||
total_dups++;
|
||||
continue;
|
||||
}
|
||||
key = sort_entries[i]->key;
|
||||
dups = 0;
|
||||
}
|
||||
|
||||
WARN_ONCE(total_dups > 0,
|
||||
|
@ -640,7 +640,6 @@ static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv)
|
||||
static int tracepoint_module_coming(struct module *mod)
|
||||
{
|
||||
struct tp_module *tp_mod;
|
||||
int ret = 0;
|
||||
|
||||
if (!mod->num_tracepoints)
|
||||
return 0;
|
||||
@ -652,19 +651,18 @@ static int tracepoint_module_coming(struct module *mod)
|
||||
*/
|
||||
if (trace_module_has_bad_taint(mod))
|
||||
return 0;
|
||||
mutex_lock(&tracepoint_module_list_mutex);
|
||||
|
||||
tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL);
|
||||
if (!tp_mod) {
|
||||
ret = -ENOMEM;
|
||||
goto end;
|
||||
}
|
||||
if (!tp_mod)
|
||||
return -ENOMEM;
|
||||
tp_mod->mod = mod;
|
||||
|
||||
mutex_lock(&tracepoint_module_list_mutex);
|
||||
list_add_tail(&tp_mod->list, &tracepoint_module_list);
|
||||
blocking_notifier_call_chain(&tracepoint_notify_list,
|
||||
MODULE_STATE_COMING, tp_mod);
|
||||
end:
|
||||
mutex_unlock(&tracepoint_module_list_mutex);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void tracepoint_module_going(struct module *mod)
|
||||
|
@ -12,13 +12,21 @@
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <asm/bitsperlong.h>
|
||||
#include <endian.h>
|
||||
#include <linux/user_events.h>
|
||||
|
||||
#if __BITS_PER_LONG == 64
|
||||
#define endian_swap(x) htole64(x)
|
||||
#else
|
||||
#define endian_swap(x) htole32(x)
|
||||
#endif
|
||||
|
||||
/* Assumes debugfs is mounted */
|
||||
const char *data_file = "/sys/kernel/debug/tracing/user_events_data";
|
||||
const char *status_file = "/sys/kernel/debug/tracing/user_events_status";
|
||||
|
||||
static int event_status(char **status)
|
||||
static int event_status(long **status)
|
||||
{
|
||||
int fd = open(status_file, O_RDONLY);
|
||||
|
||||
@ -33,7 +41,8 @@ static int event_status(char **status)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int event_reg(int fd, const char *command, int *status, int *write)
|
||||
static int event_reg(int fd, const char *command, long *index, long *mask,
|
||||
int *write)
|
||||
{
|
||||
struct user_reg reg = {0};
|
||||
|
||||
@ -43,7 +52,8 @@ static int event_reg(int fd, const char *command, int *status, int *write)
|
||||
if (ioctl(fd, DIAG_IOCSREG, ®) == -1)
|
||||
return -1;
|
||||
|
||||
*status = reg.status_index;
|
||||
*index = reg.status_bit / __BITS_PER_LONG;
|
||||
*mask = endian_swap(1L << (reg.status_bit % __BITS_PER_LONG));
|
||||
*write = reg.write_index;
|
||||
|
||||
return 0;
|
||||
@ -51,8 +61,9 @@ static int event_reg(int fd, const char *command, int *status, int *write)
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int data_fd, status, write;
|
||||
char *status_page;
|
||||
int data_fd, write;
|
||||
long index, mask;
|
||||
long *status_page;
|
||||
struct iovec io[2];
|
||||
__u32 count = 0;
|
||||
|
||||
@ -61,7 +72,7 @@ int main(int argc, char **argv)
|
||||
|
||||
data_fd = open(data_file, O_RDWR);
|
||||
|
||||
if (event_reg(data_fd, "test u32 count", &status, &write) == -1)
|
||||
if (event_reg(data_fd, "test u32 count", &index, &mask, &write) == -1)
|
||||
return errno;
|
||||
|
||||
/* Setup iovec */
|
||||
@ -75,7 +86,7 @@ int main(int argc, char **argv)
|
||||
getchar();
|
||||
|
||||
/* Check if anyone is listening */
|
||||
if (status_page[status]) {
|
||||
if (status_page[index] & mask) {
|
||||
/* Yep, trace out our data */
|
||||
writev(data_fd, (const struct iovec *)io, 2);
|
||||
|
||||
|
@ -0,0 +1,27 @@
|
||||
#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# description: Event probe event parser error log check
|
||||
# requires: dynamic_events events/syscalls/sys_enter_openat "<attached-group>.<attached-event> [<args>]":README error_log
|
||||
|
||||
check_error() { # command-with-error-pos-by-^
|
||||
ftrace_errlog_check 'event_probe' "$1" 'dynamic_events'
|
||||
}
|
||||
|
||||
check_error 'e ^a.' # NO_EVENT_INFO
|
||||
check_error 'e ^.b' # NO_EVENT_INFO
|
||||
check_error 'e ^a.b' # BAD_ATTACH_EVENT
|
||||
check_error 'e syscalls/sys_enter_openat ^foo' # BAD_ATTACH_ARG
|
||||
check_error 'e:^/bar syscalls/sys_enter_openat' # NO_GROUP_NAME
|
||||
check_error 'e:^12345678901234567890123456789012345678901234567890123456789012345/bar syscalls/sys_enter_openat' # GROUP_TOO_LONG
|
||||
|
||||
check_error 'e:^foo.1/bar syscalls/sys_enter_openat' # BAD_GROUP_NAME
|
||||
check_error 'e:^ syscalls/sys_enter_openat' # NO_EVENT_NAME
|
||||
check_error 'e:foo/^12345678901234567890123456789012345678901234567890123456789012345 syscalls/sys_enter_openat' # EVENT_TOO_LONG
|
||||
check_error 'e:foo/^bar.1 syscalls/sys_enter_openat' # BAD_EVENT_NAME
|
||||
|
||||
check_error 'e:foo/bar syscalls/sys_enter_openat arg=^dfd' # BAD_FETCH_ARG
|
||||
check_error 'e:foo/bar syscalls/sys_enter_openat ^arg=$foo' # BAD_ATTACH_ARG
|
||||
|
||||
check_error 'e:foo/bar syscalls/sys_enter_openat if ^' # NO_EP_FILTER
|
||||
|
||||
exit 0
|
@ -22,6 +22,11 @@ const char *enable_file = "/sys/kernel/debug/tracing/events/user_events/__test_e
|
||||
const char *trace_file = "/sys/kernel/debug/tracing/trace";
|
||||
const char *fmt_file = "/sys/kernel/debug/tracing/events/user_events/__test_event/format";
|
||||
|
||||
static inline int status_check(char *status_page, int status_bit)
|
||||
{
|
||||
return status_page[status_bit >> 3] & (1 << (status_bit & 7));
|
||||
}
|
||||
|
||||
static int trace_bytes(void)
|
||||
{
|
||||
int fd = open(trace_file, O_RDONLY);
|
||||
@ -197,12 +202,12 @@ TEST_F(user, register_events) {
|
||||
/* Register should work */
|
||||
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®));
|
||||
ASSERT_EQ(0, reg.write_index);
|
||||
ASSERT_NE(0, reg.status_index);
|
||||
ASSERT_NE(0, reg.status_bit);
|
||||
|
||||
/* Multiple registers should result in same index */
|
||||
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®));
|
||||
ASSERT_EQ(0, reg.write_index);
|
||||
ASSERT_NE(0, reg.status_index);
|
||||
ASSERT_NE(0, reg.status_bit);
|
||||
|
||||
/* Ensure disabled */
|
||||
self->enable_fd = open(enable_file, O_RDWR);
|
||||
@ -212,15 +217,15 @@ TEST_F(user, register_events) {
|
||||
/* MMAP should work and be zero'd */
|
||||
ASSERT_NE(MAP_FAILED, status_page);
|
||||
ASSERT_NE(NULL, status_page);
|
||||
ASSERT_EQ(0, status_page[reg.status_index]);
|
||||
ASSERT_EQ(0, status_check(status_page, reg.status_bit));
|
||||
|
||||
/* Enable event and ensure bits updated in status */
|
||||
ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
|
||||
ASSERT_EQ(EVENT_STATUS_FTRACE, status_page[reg.status_index]);
|
||||
ASSERT_NE(0, status_check(status_page, reg.status_bit));
|
||||
|
||||
/* Disable event and ensure bits updated in status */
|
||||
ASSERT_NE(-1, write(self->enable_fd, "0", sizeof("0")))
|
||||
ASSERT_EQ(0, status_page[reg.status_index]);
|
||||
ASSERT_EQ(0, status_check(status_page, reg.status_bit));
|
||||
|
||||
/* File still open should return -EBUSY for delete */
|
||||
ASSERT_EQ(-1, ioctl(self->data_fd, DIAG_IOCSDEL, "__test_event"));
|
||||
@ -240,6 +245,8 @@ TEST_F(user, write_events) {
|
||||
struct iovec io[3];
|
||||
__u32 field1, field2;
|
||||
int before = 0, after = 0;
|
||||
int page_size = sysconf(_SC_PAGESIZE);
|
||||
char *status_page;
|
||||
|
||||
reg.size = sizeof(reg);
|
||||
reg.name_args = (__u64)"__test_event u32 field1; u32 field2";
|
||||
@ -254,10 +261,18 @@ TEST_F(user, write_events) {
|
||||
io[2].iov_base = &field2;
|
||||
io[2].iov_len = sizeof(field2);
|
||||
|
||||
status_page = mmap(NULL, page_size, PROT_READ, MAP_SHARED,
|
||||
self->status_fd, 0);
|
||||
|
||||
/* Register should work */
|
||||
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®));
|
||||
ASSERT_EQ(0, reg.write_index);
|
||||
ASSERT_NE(0, reg.status_index);
|
||||
ASSERT_NE(0, reg.status_bit);
|
||||
|
||||
/* MMAP should work and be zero'd */
|
||||
ASSERT_NE(MAP_FAILED, status_page);
|
||||
ASSERT_NE(NULL, status_page);
|
||||
ASSERT_EQ(0, status_check(status_page, reg.status_bit));
|
||||
|
||||
/* Write should fail on invalid slot with ENOENT */
|
||||
io[0].iov_base = &field2;
|
||||
@ -271,6 +286,9 @@ TEST_F(user, write_events) {
|
||||
self->enable_fd = open(enable_file, O_RDWR);
|
||||
ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
|
||||
|
||||
/* Event should now be enabled */
|
||||
ASSERT_NE(0, status_check(status_page, reg.status_bit));
|
||||
|
||||
/* Write should make it out to ftrace buffers */
|
||||
before = trace_bytes();
|
||||
ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 3));
|
||||
@ -298,7 +316,7 @@ TEST_F(user, write_fault) {
|
||||
/* Register should work */
|
||||
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®));
|
||||
ASSERT_EQ(0, reg.write_index);
|
||||
ASSERT_NE(0, reg.status_index);
|
||||
ASSERT_NE(0, reg.status_bit);
|
||||
|
||||
/* Write should work normally */
|
||||
ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 2));
|
||||
@ -315,6 +333,11 @@ TEST_F(user, write_validator) {
|
||||
int loc, bytes;
|
||||
char data[8];
|
||||
int before = 0, after = 0;
|
||||
int page_size = sysconf(_SC_PAGESIZE);
|
||||
char *status_page;
|
||||
|
||||
status_page = mmap(NULL, page_size, PROT_READ, MAP_SHARED,
|
||||
self->status_fd, 0);
|
||||
|
||||
reg.size = sizeof(reg);
|
||||
reg.name_args = (__u64)"__test_event __rel_loc char[] data";
|
||||
@ -322,7 +345,12 @@ TEST_F(user, write_validator) {
|
||||
/* Register should work */
|
||||
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®));
|
||||
ASSERT_EQ(0, reg.write_index);
|
||||
ASSERT_NE(0, reg.status_index);
|
||||
ASSERT_NE(0, reg.status_bit);
|
||||
|
||||
/* MMAP should work and be zero'd */
|
||||
ASSERT_NE(MAP_FAILED, status_page);
|
||||
ASSERT_NE(NULL, status_page);
|
||||
ASSERT_EQ(0, status_check(status_page, reg.status_bit));
|
||||
|
||||
io[0].iov_base = ®.write_index;
|
||||
io[0].iov_len = sizeof(reg.write_index);
|
||||
@ -340,6 +368,9 @@ TEST_F(user, write_validator) {
|
||||
self->enable_fd = open(enable_file, O_RDWR);
|
||||
ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
|
||||
|
||||
/* Event should now be enabled */
|
||||
ASSERT_NE(0, status_check(status_page, reg.status_bit));
|
||||
|
||||
/* Full in-bounds write should work */
|
||||
before = trace_bytes();
|
||||
loc = DYN_LOC(0, bytes);
|
||||
|
@ -35,6 +35,11 @@ static long perf_event_open(struct perf_event_attr *pe, pid_t pid,
|
||||
return syscall(__NR_perf_event_open, pe, pid, cpu, group_fd, flags);
|
||||
}
|
||||
|
||||
static inline int status_check(char *status_page, int status_bit)
|
||||
{
|
||||
return status_page[status_bit >> 3] & (1 << (status_bit & 7));
|
||||
}
|
||||
|
||||
static int get_id(void)
|
||||
{
|
||||
FILE *fp = fopen(id_file, "r");
|
||||
@ -120,8 +125,8 @@ TEST_F(user, perf_write) {
|
||||
/* Register should work */
|
||||
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®));
|
||||
ASSERT_EQ(0, reg.write_index);
|
||||
ASSERT_NE(0, reg.status_index);
|
||||
ASSERT_EQ(0, status_page[reg.status_index]);
|
||||
ASSERT_NE(0, reg.status_bit);
|
||||
ASSERT_EQ(0, status_check(status_page, reg.status_bit));
|
||||
|
||||
/* Id should be there */
|
||||
id = get_id();
|
||||
@ -144,7 +149,7 @@ TEST_F(user, perf_write) {
|
||||
ASSERT_NE(MAP_FAILED, perf_page);
|
||||
|
||||
/* Status should be updated */
|
||||
ASSERT_EQ(EVENT_STATUS_PERF, status_page[reg.status_index]);
|
||||
ASSERT_NE(0, status_check(status_page, reg.status_bit));
|
||||
|
||||
event.index = reg.write_index;
|
||||
event.field1 = 0xc001;
|
||||
|
@ -27,7 +27,7 @@
|
||||
*
|
||||
* The rv monitor reference is needed for the monitor declaration.
|
||||
*/
|
||||
struct rv_monitor rv_MODEL_NAME;
|
||||
static struct rv_monitor rv_MODEL_NAME;
|
||||
DECLARE_DA_MON_GLOBAL(MODEL_NAME, MIN_TYPE);
|
||||
|
||||
/*
|
||||
@ -63,7 +63,7 @@ TRACEPOINT_DETACH
|
||||
/*
|
||||
* This is the monitor register section.
|
||||
*/
|
||||
struct rv_monitor rv_MODEL_NAME = {
|
||||
static struct rv_monitor rv_MODEL_NAME = {
|
||||
.name = "MODEL_NAME",
|
||||
.description = "auto-generated MODEL_NAME",
|
||||
.enable = enable_MODEL_NAME,
|
||||
@ -72,13 +72,13 @@ struct rv_monitor rv_MODEL_NAME = {
|
||||
.enabled = 0,
|
||||
};
|
||||
|
||||
static int register_MODEL_NAME(void)
|
||||
static int __init register_MODEL_NAME(void)
|
||||
{
|
||||
rv_register_monitor(&rv_MODEL_NAME);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void unregister_MODEL_NAME(void)
|
||||
static void __exit unregister_MODEL_NAME(void)
|
||||
{
|
||||
rv_unregister_monitor(&rv_MODEL_NAME);
|
||||
}
|
||||
|
@ -27,7 +27,7 @@
|
||||
*
|
||||
* The rv monitor reference is needed for the monitor declaration.
|
||||
*/
|
||||
struct rv_monitor rv_MODEL_NAME;
|
||||
static struct rv_monitor rv_MODEL_NAME;
|
||||
DECLARE_DA_MON_PER_CPU(MODEL_NAME, MIN_TYPE);
|
||||
|
||||
/*
|
||||
@ -63,7 +63,7 @@ TRACEPOINT_DETACH
|
||||
/*
|
||||
* This is the monitor register section.
|
||||
*/
|
||||
struct rv_monitor rv_MODEL_NAME = {
|
||||
static struct rv_monitor rv_MODEL_NAME = {
|
||||
.name = "MODEL_NAME",
|
||||
.description = "auto-generated MODEL_NAME",
|
||||
.enable = enable_MODEL_NAME,
|
||||
@ -72,13 +72,13 @@ struct rv_monitor rv_MODEL_NAME = {
|
||||
.enabled = 0,
|
||||
};
|
||||
|
||||
static int register_MODEL_NAME(void)
|
||||
static int __init register_MODEL_NAME(void)
|
||||
{
|
||||
rv_register_monitor(&rv_MODEL_NAME);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void unregister_MODEL_NAME(void)
|
||||
static void __exit unregister_MODEL_NAME(void)
|
||||
{
|
||||
rv_unregister_monitor(&rv_MODEL_NAME);
|
||||
}
|
||||
|
@ -27,7 +27,7 @@
|
||||
*
|
||||
* The rv monitor reference is needed for the monitor declaration.
|
||||
*/
|
||||
struct rv_monitor rv_MODEL_NAME;
|
||||
static struct rv_monitor rv_MODEL_NAME;
|
||||
DECLARE_DA_MON_PER_TASK(MODEL_NAME, MIN_TYPE);
|
||||
|
||||
/*
|
||||
@ -63,7 +63,7 @@ TRACEPOINT_DETACH
|
||||
/*
|
||||
* This is the monitor register section.
|
||||
*/
|
||||
struct rv_monitor rv_MODEL_NAME = {
|
||||
static struct rv_monitor rv_MODEL_NAME = {
|
||||
.name = "MODEL_NAME",
|
||||
.description = "auto-generated MODEL_NAME",
|
||||
.enable = enable_MODEL_NAME,
|
||||
@ -72,13 +72,13 @@ struct rv_monitor rv_MODEL_NAME = {
|
||||
.enabled = 0,
|
||||
};
|
||||
|
||||
static int register_MODEL_NAME(void)
|
||||
static int __init register_MODEL_NAME(void)
|
||||
{
|
||||
rv_register_monitor(&rv_MODEL_NAME);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void unregister_MODEL_NAME(void)
|
||||
static void __exit unregister_MODEL_NAME(void)
|
||||
{
|
||||
rv_unregister_monitor(&rv_MODEL_NAME);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user