mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2024-12-28 16:52:18 +00:00
8622e45b5d
Instead of constantly allocating and freeing very short-lived struct return_instance, reuse it as much as possible within current task. For that, store a linked list of reusable return_instances within current->utask. The only complication is that ri_timer() might be still processing such return_instance. And so while the main uretprobe processing logic might be already done with return_instance and would be OK to immediately reuse it for the next uretprobe instance, it's not correct to unconditionally reuse it just like that. Instead we make sure that ri_timer() can't possibly be processing it by using seqcount_t, with ri_timer() being "a writer", while free_ret_instance() being "a reader". If, after we unlink return instance from utask->return_instances list, we know that ri_timer() hasn't gotten to processing utask->return_instances yet, then we can be sure that immediate return_instance reuse is OK, and so we put it onto utask->ri_pool for future (potentially, almost immediate) reuse. This change shows improvements both in single CPU performance (by avoiding relatively expensive kmalloc/free combon) and in terms of multi-CPU scalability, where you can see that per-CPU throughput doesn't decline as steeply with increased number of CPUs (which were previously attributed to kmalloc()/free() through profiling): BASELINE (latest perf/core) =========================== uretprobe-nop ( 1 cpus): 1.898 ± 0.002M/s ( 1.898M/s/cpu) uretprobe-nop ( 2 cpus): 3.574 ± 0.011M/s ( 1.787M/s/cpu) uretprobe-nop ( 3 cpus): 5.279 ± 0.066M/s ( 1.760M/s/cpu) uretprobe-nop ( 4 cpus): 6.824 ± 0.047M/s ( 1.706M/s/cpu) uretprobe-nop ( 5 cpus): 8.339 ± 0.060M/s ( 1.668M/s/cpu) uretprobe-nop ( 6 cpus): 9.812 ± 0.047M/s ( 1.635M/s/cpu) uretprobe-nop ( 7 cpus): 11.030 ± 0.048M/s ( 1.576M/s/cpu) uretprobe-nop ( 8 cpus): 12.453 ± 0.126M/s ( 1.557M/s/cpu) uretprobe-nop (10 cpus): 14.838 ± 0.044M/s ( 1.484M/s/cpu) uretprobe-nop (12 cpus): 17.092 ± 0.115M/s ( 1.424M/s/cpu) uretprobe-nop (14 cpus): 19.576 ± 0.022M/s ( 1.398M/s/cpu) uretprobe-nop (16 cpus): 22.264 ± 0.015M/s ( 1.391M/s/cpu) uretprobe-nop (24 cpus): 33.534 ± 0.078M/s ( 1.397M/s/cpu) uretprobe-nop (32 cpus): 43.262 ± 0.127M/s ( 1.352M/s/cpu) uretprobe-nop (40 cpus): 53.252 ± 0.080M/s ( 1.331M/s/cpu) uretprobe-nop (48 cpus): 55.778 ± 0.045M/s ( 1.162M/s/cpu) uretprobe-nop (56 cpus): 56.850 ± 0.227M/s ( 1.015M/s/cpu) uretprobe-nop (64 cpus): 62.005 ± 0.077M/s ( 0.969M/s/cpu) uretprobe-nop (72 cpus): 66.445 ± 0.236M/s ( 0.923M/s/cpu) uretprobe-nop (80 cpus): 68.353 ± 0.180M/s ( 0.854M/s/cpu) THIS PATCHSET (on top of latest perf/core) ========================================== uretprobe-nop ( 1 cpus): 2.253 ± 0.004M/s ( 2.253M/s/cpu) uretprobe-nop ( 2 cpus): 4.281 ± 0.003M/s ( 2.140M/s/cpu) uretprobe-nop ( 3 cpus): 6.389 ± 0.027M/s ( 2.130M/s/cpu) uretprobe-nop ( 4 cpus): 8.328 ± 0.005M/s ( 2.082M/s/cpu) uretprobe-nop ( 5 cpus): 10.353 ± 0.001M/s ( 2.071M/s/cpu) uretprobe-nop ( 6 cpus): 12.513 ± 0.010M/s ( 2.086M/s/cpu) uretprobe-nop ( 7 cpus): 14.525 ± 0.017M/s ( 2.075M/s/cpu) uretprobe-nop ( 8 cpus): 15.633 ± 0.013M/s ( 1.954M/s/cpu) uretprobe-nop (10 cpus): 19.532 ± 0.011M/s ( 1.953M/s/cpu) uretprobe-nop (12 cpus): 21.405 ± 0.009M/s ( 1.784M/s/cpu) uretprobe-nop (14 cpus): 24.857 ± 0.020M/s ( 1.776M/s/cpu) uretprobe-nop (16 cpus): 26.466 ± 0.018M/s ( 1.654M/s/cpu) uretprobe-nop (24 cpus): 40.513 ± 0.222M/s ( 1.688M/s/cpu) uretprobe-nop (32 cpus): 54.180 ± 0.074M/s ( 1.693M/s/cpu) uretprobe-nop (40 cpus): 66.100 ± 0.082M/s ( 1.652M/s/cpu) uretprobe-nop (48 cpus): 70.544 ± 0.068M/s ( 1.470M/s/cpu) uretprobe-nop (56 cpus): 74.494 ± 0.055M/s ( 1.330M/s/cpu) uretprobe-nop (64 cpus): 79.317 ± 0.029M/s ( 1.239M/s/cpu) uretprobe-nop (72 cpus): 84.875 ± 0.020M/s ( 1.179M/s/cpu) uretprobe-nop (80 cpus): 92.318 ± 0.224M/s ( 1.154M/s/cpu) For reference, with uprobe-nop we hit the following throughput: uprobe-nop (80 cpus): 143.485 ± 0.035M/s ( 1.794M/s/cpu) So now uretprobe stays a bit closer to that performance. Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Oleg Nesterov <oleg@redhat.com> Link: https://lore.kernel.org/r/20241206002417.3295533-5-andrii@kernel.org
289 lines
9.4 KiB
C
289 lines
9.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
#ifndef _LINUX_UPROBES_H
|
|
#define _LINUX_UPROBES_H
|
|
/*
|
|
* User-space Probes (UProbes)
|
|
*
|
|
* Copyright (C) IBM Corporation, 2008-2012
|
|
* Authors:
|
|
* Srikar Dronamraju
|
|
* Jim Keniston
|
|
* Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
|
|
*/
|
|
|
|
#include <linux/errno.h>
|
|
#include <linux/rbtree.h>
|
|
#include <linux/types.h>
|
|
#include <linux/wait.h>
|
|
#include <linux/timer.h>
|
|
#include <linux/seqlock.h>
|
|
|
|
struct uprobe;
|
|
struct vm_area_struct;
|
|
struct mm_struct;
|
|
struct inode;
|
|
struct notifier_block;
|
|
struct page;
|
|
|
|
/*
|
|
* Allowed return values from uprobe consumer's handler callback
|
|
* with following meaning:
|
|
*
|
|
* UPROBE_HANDLER_REMOVE
|
|
* - Remove the uprobe breakpoint from current->mm.
|
|
* UPROBE_HANDLER_IGNORE
|
|
* - Ignore ret_handler callback for this consumer.
|
|
*/
|
|
#define UPROBE_HANDLER_REMOVE 1
|
|
#define UPROBE_HANDLER_IGNORE 2
|
|
|
|
#define MAX_URETPROBE_DEPTH 64
|
|
|
|
struct uprobe_consumer {
|
|
/*
|
|
* handler() can return UPROBE_HANDLER_REMOVE to signal the need to
|
|
* unregister uprobe for current process. If UPROBE_HANDLER_REMOVE is
|
|
* returned, filter() callback has to be implemented as well and it
|
|
* should return false to "confirm" the decision to uninstall uprobe
|
|
* for the current process. If filter() is omitted or returns true,
|
|
* UPROBE_HANDLER_REMOVE is effectively ignored.
|
|
*/
|
|
int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data);
|
|
int (*ret_handler)(struct uprobe_consumer *self,
|
|
unsigned long func,
|
|
struct pt_regs *regs, __u64 *data);
|
|
bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
|
|
|
|
struct list_head cons_node;
|
|
|
|
__u64 id; /* set when uprobe_consumer is registered */
|
|
};
|
|
|
|
#ifdef CONFIG_UPROBES
|
|
#include <asm/uprobes.h>
|
|
|
|
enum uprobe_task_state {
|
|
UTASK_RUNNING,
|
|
UTASK_SSTEP,
|
|
UTASK_SSTEP_ACK,
|
|
UTASK_SSTEP_TRAPPED,
|
|
};
|
|
|
|
/* The state of hybrid-lifetime uprobe inside struct return_instance */
|
|
enum hprobe_state {
|
|
HPROBE_LEASED, /* uretprobes_srcu-protected uprobe */
|
|
HPROBE_STABLE, /* refcounted uprobe */
|
|
HPROBE_GONE, /* NULL uprobe, SRCU expired, refcount failed */
|
|
HPROBE_CONSUMED, /* uprobe "consumed" by uretprobe handler */
|
|
};
|
|
|
|
/*
|
|
* Hybrid lifetime uprobe. Represents a uprobe instance that could be either
|
|
* SRCU protected (with SRCU protection eventually potentially timing out),
|
|
* refcounted using uprobe->ref, or there could be no valid uprobe (NULL).
|
|
*
|
|
* hprobe's internal state is setup such that background timer thread can
|
|
* atomically "downgrade" temporarily RCU-protected uprobe into refcounted one
|
|
* (or no uprobe, if refcounting failed).
|
|
*
|
|
* *stable* pointer always point to the uprobe (or could be NULL if there is
|
|
* was no valid underlying uprobe to begin with).
|
|
*
|
|
* *leased* pointer is the key to achieving race-free atomic lifetime state
|
|
* transition and can have three possible states:
|
|
* - either the same non-NULL value as *stable*, in which case uprobe is
|
|
* SRCU-protected;
|
|
* - NULL, in which case uprobe (if there is any) is refcounted;
|
|
* - special __UPROBE_DEAD value, which represents an uprobe that was SRCU
|
|
* protected initially, but SRCU period timed out and we attempted to
|
|
* convert it to refcounted, but refcount_inc_not_zero() failed, because
|
|
* uprobe effectively went away (the last consumer unsubscribed). In this
|
|
* case it's important to know that *stable* pointer (which still has
|
|
* non-NULL uprobe pointer) shouldn't be used, because lifetime of
|
|
* underlying uprobe is not guaranteed anymore. __UPROBE_DEAD is just an
|
|
* internal marker and is handled transparently by hprobe_fetch() helper.
|
|
*
|
|
* When uprobe is SRCU-protected, we also record srcu_idx value, necessary for
|
|
* SRCU unlocking.
|
|
*
|
|
* See hprobe_expire() and hprobe_fetch() for details of race-free uprobe
|
|
* state transitioning details. It all hinges on atomic xchg() over *leaded*
|
|
* pointer. *stable* pointer, once initially set, is not modified concurrently.
|
|
*/
|
|
struct hprobe {
|
|
enum hprobe_state state;
|
|
int srcu_idx;
|
|
struct uprobe *uprobe;
|
|
};
|
|
|
|
/*
|
|
* uprobe_task: Metadata of a task while it singlesteps.
|
|
*/
|
|
struct uprobe_task {
|
|
enum uprobe_task_state state;
|
|
|
|
unsigned int depth;
|
|
struct return_instance *return_instances;
|
|
|
|
struct return_instance *ri_pool;
|
|
struct timer_list ri_timer;
|
|
seqcount_t ri_seqcount;
|
|
|
|
union {
|
|
struct {
|
|
struct arch_uprobe_task autask;
|
|
unsigned long vaddr;
|
|
};
|
|
|
|
struct {
|
|
struct callback_head dup_xol_work;
|
|
unsigned long dup_xol_addr;
|
|
};
|
|
};
|
|
|
|
struct uprobe *active_uprobe;
|
|
unsigned long xol_vaddr;
|
|
|
|
struct arch_uprobe *auprobe;
|
|
};
|
|
|
|
struct return_consumer {
|
|
__u64 cookie;
|
|
__u64 id;
|
|
};
|
|
|
|
struct return_instance {
|
|
struct hprobe hprobe;
|
|
unsigned long func;
|
|
unsigned long stack; /* stack pointer */
|
|
unsigned long orig_ret_vaddr; /* original return address */
|
|
bool chained; /* true, if instance is nested */
|
|
int cons_cnt; /* total number of session consumers */
|
|
|
|
struct return_instance *next; /* keep as stack */
|
|
struct rcu_head rcu;
|
|
|
|
/* singular pre-allocated return_consumer instance for common case */
|
|
struct return_consumer consumer;
|
|
/*
|
|
* extra return_consumer instances for rare cases of multiple session consumers,
|
|
* contains (cons_cnt - 1) elements
|
|
*/
|
|
struct return_consumer *extra_consumers;
|
|
} ____cacheline_aligned;
|
|
|
|
enum rp_check {
|
|
RP_CHECK_CALL,
|
|
RP_CHECK_CHAIN_CALL,
|
|
RP_CHECK_RET,
|
|
};
|
|
|
|
struct xol_area;
|
|
|
|
struct uprobes_state {
|
|
struct xol_area *xol_area;
|
|
};
|
|
|
|
extern void __init uprobes_init(void);
|
|
extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
|
|
extern int set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
|
|
extern bool is_swbp_insn(uprobe_opcode_t *insn);
|
|
extern bool is_trap_insn(uprobe_opcode_t *insn);
|
|
extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
|
|
extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
|
|
extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
|
|
extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
|
|
extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool);
|
|
extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc);
|
|
extern void uprobe_unregister_sync(void);
|
|
extern int uprobe_mmap(struct vm_area_struct *vma);
|
|
extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
|
|
extern void uprobe_start_dup_mmap(void);
|
|
extern void uprobe_end_dup_mmap(void);
|
|
extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm);
|
|
extern void uprobe_free_utask(struct task_struct *t);
|
|
extern void uprobe_copy_process(struct task_struct *t, unsigned long flags);
|
|
extern int uprobe_post_sstep_notifier(struct pt_regs *regs);
|
|
extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
|
|
extern void uprobe_notify_resume(struct pt_regs *regs);
|
|
extern bool uprobe_deny_signal(void);
|
|
extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
|
|
extern void uprobe_clear_state(struct mm_struct *mm);
|
|
extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
|
|
extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
|
|
extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
|
|
extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
|
|
extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
|
|
extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
|
|
extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
|
|
extern bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, struct pt_regs *regs);
|
|
extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
|
|
extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
|
|
void *src, unsigned long len);
|
|
extern void uprobe_handle_trampoline(struct pt_regs *regs);
|
|
extern void *arch_uprobe_trampoline(unsigned long *psize);
|
|
extern unsigned long uprobe_get_trampoline_vaddr(void);
|
|
#else /* !CONFIG_UPROBES */
|
|
struct uprobes_state {
|
|
};
|
|
|
|
static inline void uprobes_init(void)
|
|
{
|
|
}
|
|
|
|
#define uprobe_get_trap_addr(regs) instruction_pointer(regs)
|
|
|
|
static inline struct uprobe *
|
|
uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc)
|
|
{
|
|
return ERR_PTR(-ENOSYS);
|
|
}
|
|
static inline int
|
|
uprobe_apply(struct uprobe* uprobe, struct uprobe_consumer *uc, bool add)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
static inline void
|
|
uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc)
|
|
{
|
|
}
|
|
static inline void uprobe_unregister_sync(void)
|
|
{
|
|
}
|
|
static inline int uprobe_mmap(struct vm_area_struct *vma)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline void
|
|
uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
|
|
{
|
|
}
|
|
static inline void uprobe_start_dup_mmap(void)
|
|
{
|
|
}
|
|
static inline void uprobe_end_dup_mmap(void)
|
|
{
|
|
}
|
|
static inline void
|
|
uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
|
|
{
|
|
}
|
|
static inline void uprobe_notify_resume(struct pt_regs *regs)
|
|
{
|
|
}
|
|
static inline bool uprobe_deny_signal(void)
|
|
{
|
|
return false;
|
|
}
|
|
static inline void uprobe_free_utask(struct task_struct *t)
|
|
{
|
|
}
|
|
static inline void uprobe_copy_process(struct task_struct *t, unsigned long flags)
|
|
{
|
|
}
|
|
static inline void uprobe_clear_state(struct mm_struct *mm)
|
|
{
|
|
}
|
|
#endif /* !CONFIG_UPROBES */
|
|
#endif /* _LINUX_UPROBES_H */
|