linux-next/include/linux/uprobes.h
Andrii Nakryiko 8622e45b5d uprobes: Reuse return_instances between multiple uretprobes within task
Instead of constantly allocating and freeing very short-lived
struct return_instance, reuse it as much as possible within current
task. For that, store a linked list of reusable return_instances within
current->utask.

The only complication is that ri_timer() might be still processing such
return_instance. And so while the main uretprobe processing logic might
be already done with return_instance and would be OK to immediately
reuse it for the next uretprobe instance, it's not correct to
unconditionally reuse it just like that.

Instead we make sure that ri_timer() can't possibly be processing it by
using seqcount_t, with ri_timer() being "a writer", while
free_ret_instance() being "a reader". If, after we unlink return
instance from utask->return_instances list, we know that ri_timer()
hasn't gotten to processing utask->return_instances yet, then we can be
sure that immediate return_instance reuse is OK, and so we put it
onto utask->ri_pool for future (potentially, almost immediate) reuse.

This change shows improvements both in single CPU performance (by
avoiding relatively expensive kmalloc/free combon) and in terms of
multi-CPU scalability, where you can see that per-CPU throughput doesn't
decline as steeply with increased number of CPUs (which were previously
attributed to kmalloc()/free() through profiling):

	BASELINE (latest perf/core)
	===========================
	uretprobe-nop         ( 1 cpus):    1.898 ± 0.002M/s  (  1.898M/s/cpu)
	uretprobe-nop         ( 2 cpus):    3.574 ± 0.011M/s  (  1.787M/s/cpu)
	uretprobe-nop         ( 3 cpus):    5.279 ± 0.066M/s  (  1.760M/s/cpu)
	uretprobe-nop         ( 4 cpus):    6.824 ± 0.047M/s  (  1.706M/s/cpu)
	uretprobe-nop         ( 5 cpus):    8.339 ± 0.060M/s  (  1.668M/s/cpu)
	uretprobe-nop         ( 6 cpus):    9.812 ± 0.047M/s  (  1.635M/s/cpu)
	uretprobe-nop         ( 7 cpus):   11.030 ± 0.048M/s  (  1.576M/s/cpu)
	uretprobe-nop         ( 8 cpus):   12.453 ± 0.126M/s  (  1.557M/s/cpu)
	uretprobe-nop         (10 cpus):   14.838 ± 0.044M/s  (  1.484M/s/cpu)
	uretprobe-nop         (12 cpus):   17.092 ± 0.115M/s  (  1.424M/s/cpu)
	uretprobe-nop         (14 cpus):   19.576 ± 0.022M/s  (  1.398M/s/cpu)
	uretprobe-nop         (16 cpus):   22.264 ± 0.015M/s  (  1.391M/s/cpu)
	uretprobe-nop         (24 cpus):   33.534 ± 0.078M/s  (  1.397M/s/cpu)
	uretprobe-nop         (32 cpus):   43.262 ± 0.127M/s  (  1.352M/s/cpu)
	uretprobe-nop         (40 cpus):   53.252 ± 0.080M/s  (  1.331M/s/cpu)
	uretprobe-nop         (48 cpus):   55.778 ± 0.045M/s  (  1.162M/s/cpu)
	uretprobe-nop         (56 cpus):   56.850 ± 0.227M/s  (  1.015M/s/cpu)
	uretprobe-nop         (64 cpus):   62.005 ± 0.077M/s  (  0.969M/s/cpu)
	uretprobe-nop         (72 cpus):   66.445 ± 0.236M/s  (  0.923M/s/cpu)
	uretprobe-nop         (80 cpus):   68.353 ± 0.180M/s  (  0.854M/s/cpu)

	THIS PATCHSET (on top of latest perf/core)
	==========================================
	uretprobe-nop         ( 1 cpus):    2.253 ± 0.004M/s  (  2.253M/s/cpu)
	uretprobe-nop         ( 2 cpus):    4.281 ± 0.003M/s  (  2.140M/s/cpu)
	uretprobe-nop         ( 3 cpus):    6.389 ± 0.027M/s  (  2.130M/s/cpu)
	uretprobe-nop         ( 4 cpus):    8.328 ± 0.005M/s  (  2.082M/s/cpu)
	uretprobe-nop         ( 5 cpus):   10.353 ± 0.001M/s  (  2.071M/s/cpu)
	uretprobe-nop         ( 6 cpus):   12.513 ± 0.010M/s  (  2.086M/s/cpu)
	uretprobe-nop         ( 7 cpus):   14.525 ± 0.017M/s  (  2.075M/s/cpu)
	uretprobe-nop         ( 8 cpus):   15.633 ± 0.013M/s  (  1.954M/s/cpu)
	uretprobe-nop         (10 cpus):   19.532 ± 0.011M/s  (  1.953M/s/cpu)
	uretprobe-nop         (12 cpus):   21.405 ± 0.009M/s  (  1.784M/s/cpu)
	uretprobe-nop         (14 cpus):   24.857 ± 0.020M/s  (  1.776M/s/cpu)
	uretprobe-nop         (16 cpus):   26.466 ± 0.018M/s  (  1.654M/s/cpu)
	uretprobe-nop         (24 cpus):   40.513 ± 0.222M/s  (  1.688M/s/cpu)
	uretprobe-nop         (32 cpus):   54.180 ± 0.074M/s  (  1.693M/s/cpu)
	uretprobe-nop         (40 cpus):   66.100 ± 0.082M/s  (  1.652M/s/cpu)
	uretprobe-nop         (48 cpus):   70.544 ± 0.068M/s  (  1.470M/s/cpu)
	uretprobe-nop         (56 cpus):   74.494 ± 0.055M/s  (  1.330M/s/cpu)
	uretprobe-nop         (64 cpus):   79.317 ± 0.029M/s  (  1.239M/s/cpu)
	uretprobe-nop         (72 cpus):   84.875 ± 0.020M/s  (  1.179M/s/cpu)
	uretprobe-nop         (80 cpus):   92.318 ± 0.224M/s  (  1.154M/s/cpu)

For reference, with uprobe-nop we hit the following throughput:

	uprobe-nop            (80 cpus):  143.485 ± 0.035M/s  (  1.794M/s/cpu)

So now uretprobe stays a bit closer to that performance.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Link: https://lore.kernel.org/r/20241206002417.3295533-5-andrii@kernel.org
2024-12-09 15:50:30 +01:00

289 lines
9.4 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _LINUX_UPROBES_H
#define _LINUX_UPROBES_H
/*
* User-space Probes (UProbes)
*
* Copyright (C) IBM Corporation, 2008-2012
* Authors:
* Srikar Dronamraju
* Jim Keniston
* Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
*/
#include <linux/errno.h>
#include <linux/rbtree.h>
#include <linux/types.h>
#include <linux/wait.h>
#include <linux/timer.h>
#include <linux/seqlock.h>
struct uprobe;
struct vm_area_struct;
struct mm_struct;
struct inode;
struct notifier_block;
struct page;
/*
* Allowed return values from uprobe consumer's handler callback
* with following meaning:
*
* UPROBE_HANDLER_REMOVE
* - Remove the uprobe breakpoint from current->mm.
* UPROBE_HANDLER_IGNORE
* - Ignore ret_handler callback for this consumer.
*/
#define UPROBE_HANDLER_REMOVE 1
#define UPROBE_HANDLER_IGNORE 2
#define MAX_URETPROBE_DEPTH 64
struct uprobe_consumer {
/*
* handler() can return UPROBE_HANDLER_REMOVE to signal the need to
* unregister uprobe for current process. If UPROBE_HANDLER_REMOVE is
* returned, filter() callback has to be implemented as well and it
* should return false to "confirm" the decision to uninstall uprobe
* for the current process. If filter() is omitted or returns true,
* UPROBE_HANDLER_REMOVE is effectively ignored.
*/
int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data);
int (*ret_handler)(struct uprobe_consumer *self,
unsigned long func,
struct pt_regs *regs, __u64 *data);
bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
struct list_head cons_node;
__u64 id; /* set when uprobe_consumer is registered */
};
#ifdef CONFIG_UPROBES
#include <asm/uprobes.h>
enum uprobe_task_state {
UTASK_RUNNING,
UTASK_SSTEP,
UTASK_SSTEP_ACK,
UTASK_SSTEP_TRAPPED,
};
/* The state of hybrid-lifetime uprobe inside struct return_instance */
enum hprobe_state {
HPROBE_LEASED, /* uretprobes_srcu-protected uprobe */
HPROBE_STABLE, /* refcounted uprobe */
HPROBE_GONE, /* NULL uprobe, SRCU expired, refcount failed */
HPROBE_CONSUMED, /* uprobe "consumed" by uretprobe handler */
};
/*
* Hybrid lifetime uprobe. Represents a uprobe instance that could be either
* SRCU protected (with SRCU protection eventually potentially timing out),
* refcounted using uprobe->ref, or there could be no valid uprobe (NULL).
*
* hprobe's internal state is setup such that background timer thread can
* atomically "downgrade" temporarily RCU-protected uprobe into refcounted one
* (or no uprobe, if refcounting failed).
*
* *stable* pointer always point to the uprobe (or could be NULL if there is
* was no valid underlying uprobe to begin with).
*
* *leased* pointer is the key to achieving race-free atomic lifetime state
* transition and can have three possible states:
* - either the same non-NULL value as *stable*, in which case uprobe is
* SRCU-protected;
* - NULL, in which case uprobe (if there is any) is refcounted;
* - special __UPROBE_DEAD value, which represents an uprobe that was SRCU
* protected initially, but SRCU period timed out and we attempted to
* convert it to refcounted, but refcount_inc_not_zero() failed, because
* uprobe effectively went away (the last consumer unsubscribed). In this
* case it's important to know that *stable* pointer (which still has
* non-NULL uprobe pointer) shouldn't be used, because lifetime of
* underlying uprobe is not guaranteed anymore. __UPROBE_DEAD is just an
* internal marker and is handled transparently by hprobe_fetch() helper.
*
* When uprobe is SRCU-protected, we also record srcu_idx value, necessary for
* SRCU unlocking.
*
* See hprobe_expire() and hprobe_fetch() for details of race-free uprobe
* state transitioning details. It all hinges on atomic xchg() over *leaded*
* pointer. *stable* pointer, once initially set, is not modified concurrently.
*/
struct hprobe {
enum hprobe_state state;
int srcu_idx;
struct uprobe *uprobe;
};
/*
* uprobe_task: Metadata of a task while it singlesteps.
*/
struct uprobe_task {
enum uprobe_task_state state;
unsigned int depth;
struct return_instance *return_instances;
struct return_instance *ri_pool;
struct timer_list ri_timer;
seqcount_t ri_seqcount;
union {
struct {
struct arch_uprobe_task autask;
unsigned long vaddr;
};
struct {
struct callback_head dup_xol_work;
unsigned long dup_xol_addr;
};
};
struct uprobe *active_uprobe;
unsigned long xol_vaddr;
struct arch_uprobe *auprobe;
};
struct return_consumer {
__u64 cookie;
__u64 id;
};
struct return_instance {
struct hprobe hprobe;
unsigned long func;
unsigned long stack; /* stack pointer */
unsigned long orig_ret_vaddr; /* original return address */
bool chained; /* true, if instance is nested */
int cons_cnt; /* total number of session consumers */
struct return_instance *next; /* keep as stack */
struct rcu_head rcu;
/* singular pre-allocated return_consumer instance for common case */
struct return_consumer consumer;
/*
* extra return_consumer instances for rare cases of multiple session consumers,
* contains (cons_cnt - 1) elements
*/
struct return_consumer *extra_consumers;
} ____cacheline_aligned;
enum rp_check {
RP_CHECK_CALL,
RP_CHECK_CHAIN_CALL,
RP_CHECK_RET,
};
struct xol_area;
struct uprobes_state {
struct xol_area *xol_area;
};
extern void __init uprobes_init(void);
extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
extern int set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
extern bool is_swbp_insn(uprobe_opcode_t *insn);
extern bool is_trap_insn(uprobe_opcode_t *insn);
extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool);
extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc);
extern void uprobe_unregister_sync(void);
extern int uprobe_mmap(struct vm_area_struct *vma);
extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
extern void uprobe_start_dup_mmap(void);
extern void uprobe_end_dup_mmap(void);
extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm);
extern void uprobe_free_utask(struct task_struct *t);
extern void uprobe_copy_process(struct task_struct *t, unsigned long flags);
extern int uprobe_post_sstep_notifier(struct pt_regs *regs);
extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
extern void uprobe_notify_resume(struct pt_regs *regs);
extern bool uprobe_deny_signal(void);
extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
extern void uprobe_clear_state(struct mm_struct *mm);
extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
extern bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, struct pt_regs *regs);
extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
void *src, unsigned long len);
extern void uprobe_handle_trampoline(struct pt_regs *regs);
extern void *arch_uprobe_trampoline(unsigned long *psize);
extern unsigned long uprobe_get_trampoline_vaddr(void);
#else /* !CONFIG_UPROBES */
struct uprobes_state {
};
static inline void uprobes_init(void)
{
}
#define uprobe_get_trap_addr(regs) instruction_pointer(regs)
static inline struct uprobe *
uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc)
{
return ERR_PTR(-ENOSYS);
}
static inline int
uprobe_apply(struct uprobe* uprobe, struct uprobe_consumer *uc, bool add)
{
return -ENOSYS;
}
static inline void
uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc)
{
}
static inline void uprobe_unregister_sync(void)
{
}
static inline int uprobe_mmap(struct vm_area_struct *vma)
{
return 0;
}
static inline void
uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
}
static inline void uprobe_start_dup_mmap(void)
{
}
static inline void uprobe_end_dup_mmap(void)
{
}
static inline void
uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
{
}
static inline void uprobe_notify_resume(struct pt_regs *regs)
{
}
static inline bool uprobe_deny_signal(void)
{
return false;
}
static inline void uprobe_free_utask(struct task_struct *t)
{
}
static inline void uprobe_copy_process(struct task_struct *t, unsigned long flags)
{
}
static inline void uprobe_clear_state(struct mm_struct *mm)
{
}
#endif /* !CONFIG_UPROBES */
#endif /* _LINUX_UPROBES_H */