Stable tag for bpf-next's uprobe work.

-----BEGIN PGP SIGNATURE-----
 
 iQJJBAABCgAzFiEEv3OU3/byMaA0LqWJdkfhpEvA5LoFAmcrTRsVHHBldGVyekBp
 bmZyYWRlYWQub3JnAAoJEHZH4aRLwOS6PLoP/jL4pUgW/ZrQFwpZh71BxeDt2Ka/
 Eb6AsHe0PcKAMJYaJDfin6FRU87hp3tHIefSGdexvSttWwbnwKl8cVb+Y7gVnytu
 b2PkMfiOFShKEhu6YAJmxWIOi6MDxonjIMQgjvsVGrZmHiPgGTrh+nnmHYQ+qxFq
 wCaZXO3E65drtZKbi1HddHDYR+e1mHQU0uC+mLO44sP3lzJVxPnYGKGjaS62Z/Da
 XF+3tz6jc6jpu08FJy8ltrqLvcHPmTuDkR6f8mG3Hc8Hw0mndY/4yk0bGbbHo7Vx
 y42Aq4UUgcpvb8OUIicMRLzp3hRjsSTn8UJjsinEaCexdw6ZZiZVU/YR9Mf5ivrJ
 dlplFJvP8b6psnHrRf5xJ1SUv7+dap075A3/28MEvGErZOINoULAGa/hJIndHfuL
 NeWaZj0+of2eAX1SDePia87jX1P9xuU6AEw944i2rhI4P1J5I6XYfcaDDICBYitv
 yREafY/i6wb/Q8GhpjWmSE7p4wUIi5o3CpZsncj7B4Me9JBdHWrcnyUY55Tz05mo
 zoKnNgYC3d9DAIwXvq7x6tM2Tw183YXul/aHJSr3/rFKuuGQx0XACt6BO+yI35q3
 6max4kMyr+kUqr9YYZtb9fuBw3TPhwY/zXG0ydSxNNh7oX+boxh4/bxXljLWXmRQ
 eHgsXuuF1YgCg1R9
 =Wiky
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-for-bpf-next' from tip tree

Stable tag for bpf-next's uprobe work.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
This commit is contained in:
Andrii Nakryiko 2024-11-06 08:13:03 -08:00
commit 5f67329cb2
14 changed files with 760 additions and 304 deletions

View File

@ -135,6 +135,7 @@ config KPROBES_ON_FTRACE
config UPROBES config UPROBES
def_bool n def_bool n
depends on ARCH_SUPPORTS_UPROBES depends on ARCH_SUPPORTS_UPROBES
select TASKS_TRACE_RCU
help help
Uprobes is the user-space counterpart to kprobes: they Uprobes is the user-space counterpart to kprobes: they
enable instrumentation applications (such as 'perf probe') enable instrumentation applications (such as 'perf probe')

View File

@ -943,11 +943,12 @@ static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, u
static int amd_pmu_v2_handle_irq(struct pt_regs *regs) static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
{ {
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
static atomic64_t status_warned = ATOMIC64_INIT(0);
u64 reserved, status, mask, new_bits, prev_bits;
struct perf_sample_data data; struct perf_sample_data data;
struct hw_perf_event *hwc; struct hw_perf_event *hwc;
struct perf_event *event; struct perf_event *event;
int handled = 0, idx; int handled = 0, idx;
u64 reserved, status, mask;
bool pmu_enabled; bool pmu_enabled;
/* /*
@ -1012,7 +1013,12 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
* the corresponding PMCs are expected to be inactive according to the * the corresponding PMCs are expected to be inactive according to the
* active_mask * active_mask
*/ */
WARN_ON(status > 0); if (status > 0) {
prev_bits = atomic64_fetch_or(status, &status_warned);
// A new bit was set for the very first time.
new_bits = status & ~prev_bits;
WARN(new_bits, "New overflows for inactive PMCs: %llx\n", new_bits);
}
/* Clear overflow and freeze bits */ /* Clear overflow and freeze bits */
amd_pmu_ack_global_status(~status); amd_pmu_ack_global_status(~status);

View File

@ -4599,6 +4599,28 @@ static inline bool erratum_hsw11(struct perf_event *event)
X86_CONFIG(.event=0xc0, .umask=0x01); X86_CONFIG(.event=0xc0, .umask=0x01);
} }
static struct event_constraint *
arl_h_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
if (pmu->pmu_type == hybrid_tiny)
return cmt_get_event_constraints(cpuc, idx, event);
return mtl_get_event_constraints(cpuc, idx, event);
}
static int arl_h_hw_config(struct perf_event *event)
{
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
if (pmu->pmu_type == hybrid_tiny)
return intel_pmu_hw_config(event);
return adl_hw_config(event);
}
/* /*
* The HSW11 requires a period larger than 100 which is the same as the BDM11. * The HSW11 requires a period larger than 100 which is the same as the BDM11.
* A minimum period of 128 is enforced as well for the INST_RETIRED.ALL. * A minimum period of 128 is enforced as well for the INST_RETIRED.ALL.
@ -4924,17 +4946,26 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
/* /*
* This essentially just maps between the 'hybrid_cpu_type' * This essentially just maps between the 'hybrid_cpu_type'
* and 'hybrid_pmu_type' enums: * and 'hybrid_pmu_type' enums except for ARL-H processor
* which needs to compare atom uarch native id since ARL-H
* contains two different atom uarchs.
*/ */
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type; enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type;
u32 native_id;
if (cpu_type == HYBRID_INTEL_CORE && if (cpu_type == HYBRID_INTEL_CORE && pmu_type == hybrid_big)
pmu_type == hybrid_big)
return &x86_pmu.hybrid_pmu[i];
if (cpu_type == HYBRID_INTEL_ATOM &&
pmu_type == hybrid_small)
return &x86_pmu.hybrid_pmu[i]; return &x86_pmu.hybrid_pmu[i];
if (cpu_type == HYBRID_INTEL_ATOM) {
if (x86_pmu.num_hybrid_pmus == 2 && pmu_type == hybrid_small)
return &x86_pmu.hybrid_pmu[i];
native_id = get_this_hybrid_cpu_native_id();
if (native_id == skt_native_id && pmu_type == hybrid_small)
return &x86_pmu.hybrid_pmu[i];
if (native_id == cmt_native_id && pmu_type == hybrid_tiny)
return &x86_pmu.hybrid_pmu[i];
}
} }
return NULL; return NULL;
@ -5965,6 +5996,37 @@ static struct attribute *lnl_hybrid_events_attrs[] = {
NULL NULL
}; };
/* The event string must be in PMU IDX order. */
EVENT_ATTR_STR_HYBRID(topdown-retiring,
td_retiring_arl_h,
"event=0xc2,umask=0x02;event=0x00,umask=0x80;event=0xc2,umask=0x0",
hybrid_big_small_tiny);
EVENT_ATTR_STR_HYBRID(topdown-bad-spec,
td_bad_spec_arl_h,
"event=0x73,umask=0x0;event=0x00,umask=0x81;event=0x73,umask=0x0",
hybrid_big_small_tiny);
EVENT_ATTR_STR_HYBRID(topdown-fe-bound,
td_fe_bound_arl_h,
"event=0x9c,umask=0x01;event=0x00,umask=0x82;event=0x71,umask=0x0",
hybrid_big_small_tiny);
EVENT_ATTR_STR_HYBRID(topdown-be-bound,
td_be_bound_arl_h,
"event=0xa4,umask=0x02;event=0x00,umask=0x83;event=0x74,umask=0x0",
hybrid_big_small_tiny);
static struct attribute *arl_h_hybrid_events_attrs[] = {
EVENT_PTR(slots_adl),
EVENT_PTR(td_retiring_arl_h),
EVENT_PTR(td_bad_spec_arl_h),
EVENT_PTR(td_fe_bound_arl_h),
EVENT_PTR(td_be_bound_arl_h),
EVENT_PTR(td_heavy_ops_adl),
EVENT_PTR(td_br_mis_adl),
EVENT_PTR(td_fetch_lat_adl),
EVENT_PTR(td_mem_bound_adl),
NULL,
};
/* Must be in IDX order */ /* Must be in IDX order */
EVENT_ATTR_STR_HYBRID(mem-loads, mem_ld_adl, "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3", hybrid_big_small); EVENT_ATTR_STR_HYBRID(mem-loads, mem_ld_adl, "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3", hybrid_big_small);
EVENT_ATTR_STR_HYBRID(mem-stores, mem_st_adl, "event=0xd0,umask=0x6;event=0xcd,umask=0x2", hybrid_big_small); EVENT_ATTR_STR_HYBRID(mem-stores, mem_st_adl, "event=0xd0,umask=0x6;event=0xcd,umask=0x2", hybrid_big_small);
@ -5983,6 +6045,21 @@ static struct attribute *mtl_hybrid_mem_attrs[] = {
NULL NULL
}; };
EVENT_ATTR_STR_HYBRID(mem-loads,
mem_ld_arl_h,
"event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3;event=0xd0,umask=0x5,ldlat=3",
hybrid_big_small_tiny);
EVENT_ATTR_STR_HYBRID(mem-stores,
mem_st_arl_h,
"event=0xd0,umask=0x6;event=0xcd,umask=0x2;event=0xd0,umask=0x6",
hybrid_big_small_tiny);
static struct attribute *arl_h_hybrid_mem_attrs[] = {
EVENT_PTR(mem_ld_arl_h),
EVENT_PTR(mem_st_arl_h),
NULL,
};
EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big); EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big);
EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big); EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big);
EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big); EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big);
@ -6006,8 +6083,8 @@ static struct attribute *adl_hybrid_tsx_attrs[] = {
FORMAT_ATTR_HYBRID(in_tx, hybrid_big); FORMAT_ATTR_HYBRID(in_tx, hybrid_big);
FORMAT_ATTR_HYBRID(in_tx_cp, hybrid_big); FORMAT_ATTR_HYBRID(in_tx_cp, hybrid_big);
FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small); FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small_tiny);
FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small); FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small_tiny);
FORMAT_ATTR_HYBRID(frontend, hybrid_big); FORMAT_ATTR_HYBRID(frontend, hybrid_big);
#define ADL_HYBRID_RTM_FORMAT_ATTR \ #define ADL_HYBRID_RTM_FORMAT_ATTR \
@ -6030,7 +6107,7 @@ static struct attribute *adl_hybrid_extra_attr[] = {
NULL NULL
}; };
FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small); FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small_tiny);
static struct attribute *mtl_hybrid_extra_attr_rtm[] = { static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
ADL_HYBRID_RTM_FORMAT_ATTR, ADL_HYBRID_RTM_FORMAT_ATTR,
@ -6238,8 +6315,9 @@ static inline int intel_pmu_v6_addr_offset(int index, bool eventsel)
} }
static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = { static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = {
{ hybrid_small, "cpu_atom" }, { hybrid_small, "cpu_atom" },
{ hybrid_big, "cpu_core" }, { hybrid_big, "cpu_core" },
{ hybrid_tiny, "cpu_lowpower" },
}; };
static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus) static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
@ -6272,7 +6350,7 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
0, x86_pmu_num_counters(&pmu->pmu), 0, 0); 0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
if (pmu->pmu_type & hybrid_small) { if (pmu->pmu_type & hybrid_small_tiny) {
pmu->intel_cap.perf_metrics = 0; pmu->intel_cap.perf_metrics = 0;
pmu->intel_cap.pebs_output_pt_available = 1; pmu->intel_cap.pebs_output_pt_available = 1;
pmu->mid_ack = true; pmu->mid_ack = true;
@ -7111,6 +7189,37 @@ __init int intel_pmu_init(void)
name = "lunarlake_hybrid"; name = "lunarlake_hybrid";
break; break;
case INTEL_ARROWLAKE_H:
intel_pmu_init_hybrid(hybrid_big_small_tiny);
x86_pmu.pebs_latency_data = arl_h_latency_data;
x86_pmu.get_event_constraints = arl_h_get_event_constraints;
x86_pmu.hw_config = arl_h_hw_config;
td_attr = arl_h_hybrid_events_attrs;
mem_attr = arl_h_hybrid_mem_attrs;
tsx_attr = adl_hybrid_tsx_attrs;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
/* Initialize big core specific PerfMon capabilities. */
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
intel_pmu_init_lnc(&pmu->pmu);
/* Initialize Atom core specific PerfMon capabilities. */
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
intel_pmu_init_skt(&pmu->pmu);
/* Initialize Lower Power Atom specific PerfMon capabilities. */
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX];
intel_pmu_init_grt(&pmu->pmu);
pmu->extra_regs = intel_cmt_extra_regs;
intel_pmu_pebs_data_source_arl_h();
pr_cont("ArrowLake-H Hybrid events, ");
name = "arrowlake_h_hybrid";
break;
default: default:
switch (x86_pmu.version) { switch (x86_pmu.version) {
case 1: case 1:

View File

@ -177,6 +177,17 @@ void __init intel_pmu_pebs_data_source_mtl(void)
__intel_pmu_pebs_data_source_cmt(data_source); __intel_pmu_pebs_data_source_cmt(data_source);
} }
void __init intel_pmu_pebs_data_source_arl_h(void)
{
u64 *data_source;
intel_pmu_pebs_data_source_lnl();
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX].pebs_data_source;
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
__intel_pmu_pebs_data_source_cmt(data_source);
}
void __init intel_pmu_pebs_data_source_cmt(void) void __init intel_pmu_pebs_data_source_cmt(void)
{ {
__intel_pmu_pebs_data_source_cmt(pebs_data_source); __intel_pmu_pebs_data_source_cmt(pebs_data_source);
@ -388,6 +399,16 @@ u64 lnl_latency_data(struct perf_event *event, u64 status)
return lnc_latency_data(event, status); return lnc_latency_data(event, status);
} }
u64 arl_h_latency_data(struct perf_event *event, u64 status)
{
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
if (pmu->pmu_type == hybrid_tiny)
return cmt_latency_data(event, status);
return lnl_latency_data(event, status);
}
static u64 load_latency_data(struct perf_event *event, u64 status) static u64 load_latency_data(struct perf_event *event, u64 status)
{ {
union intel_x86_pebs_dse dse; union intel_x86_pebs_dse dse;

View File

@ -668,24 +668,38 @@ enum {
#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 #define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10
#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) #define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
/*
* CPUID.1AH.EAX[31:0] uniquely identifies the microarchitecture
* of the core. Bits 31-24 indicates its core type (Core or Atom)
* and Bits [23:0] indicates the native model ID of the core.
* Core type and native model ID are defined in below enumerations.
*/
enum hybrid_cpu_type { enum hybrid_cpu_type {
HYBRID_INTEL_NONE, HYBRID_INTEL_NONE,
HYBRID_INTEL_ATOM = 0x20, HYBRID_INTEL_ATOM = 0x20,
HYBRID_INTEL_CORE = 0x40, HYBRID_INTEL_CORE = 0x40,
}; };
enum hybrid_pmu_type {
not_hybrid,
hybrid_small = BIT(0),
hybrid_big = BIT(1),
hybrid_big_small = hybrid_big | hybrid_small, /* only used for matching */
};
#define X86_HYBRID_PMU_ATOM_IDX 0 #define X86_HYBRID_PMU_ATOM_IDX 0
#define X86_HYBRID_PMU_CORE_IDX 1 #define X86_HYBRID_PMU_CORE_IDX 1
#define X86_HYBRID_PMU_TINY_IDX 2
#define X86_HYBRID_NUM_PMUS 2 enum hybrid_pmu_type {
not_hybrid,
hybrid_small = BIT(X86_HYBRID_PMU_ATOM_IDX),
hybrid_big = BIT(X86_HYBRID_PMU_CORE_IDX),
hybrid_tiny = BIT(X86_HYBRID_PMU_TINY_IDX),
/* The belows are only used for matching */
hybrid_big_small = hybrid_big | hybrid_small,
hybrid_small_tiny = hybrid_small | hybrid_tiny,
hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny,
};
enum atom_native_id {
cmt_native_id = 0x2, /* Crestmont */
skt_native_id = 0x3, /* Skymont */
};
struct x86_hybrid_pmu { struct x86_hybrid_pmu {
struct pmu pmu; struct pmu pmu;
@ -1578,6 +1592,8 @@ u64 cmt_latency_data(struct perf_event *event, u64 status);
u64 lnl_latency_data(struct perf_event *event, u64 status); u64 lnl_latency_data(struct perf_event *event, u64 status);
u64 arl_h_latency_data(struct perf_event *event, u64 status);
extern struct event_constraint intel_core2_pebs_event_constraints[]; extern struct event_constraint intel_core2_pebs_event_constraints[];
extern struct event_constraint intel_atom_pebs_event_constraints[]; extern struct event_constraint intel_atom_pebs_event_constraints[];
@ -1697,6 +1713,8 @@ void intel_pmu_pebs_data_source_grt(void);
void intel_pmu_pebs_data_source_mtl(void); void intel_pmu_pebs_data_source_mtl(void);
void intel_pmu_pebs_data_source_arl_h(void);
void intel_pmu_pebs_data_source_cmt(void); void intel_pmu_pebs_data_source_cmt(void);
void intel_pmu_pebs_data_source_lnl(void); void intel_pmu_pebs_data_source_lnl(void);

View File

@ -148,7 +148,6 @@ struct rapl_model {
/* 1/2^hw_unit Joule */ /* 1/2^hw_unit Joule */
static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
static struct rapl_pmus *rapl_pmus; static struct rapl_pmus *rapl_pmus;
static cpumask_t rapl_cpu_mask;
static unsigned int rapl_cntr_mask; static unsigned int rapl_cntr_mask;
static u64 rapl_timer_ms; static u64 rapl_timer_ms;
static struct perf_msr *rapl_msrs; static struct perf_msr *rapl_msrs;
@ -369,8 +368,6 @@ static int rapl_pmu_event_init(struct perf_event *event)
if (event->cpu < 0) if (event->cpu < 0)
return -EINVAL; return -EINVAL;
event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
if (!cfg || cfg >= NR_RAPL_DOMAINS + 1) if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
return -EINVAL; return -EINVAL;
@ -389,7 +386,6 @@ static int rapl_pmu_event_init(struct perf_event *event)
pmu = cpu_to_rapl_pmu(event->cpu); pmu = cpu_to_rapl_pmu(event->cpu);
if (!pmu) if (!pmu)
return -EINVAL; return -EINVAL;
event->cpu = pmu->cpu;
event->pmu_private = pmu; event->pmu_private = pmu;
event->hw.event_base = rapl_msrs[bit].msr; event->hw.event_base = rapl_msrs[bit].msr;
event->hw.config = cfg; event->hw.config = cfg;
@ -403,23 +399,6 @@ static void rapl_pmu_event_read(struct perf_event *event)
rapl_event_update(event); rapl_event_update(event);
} }
static ssize_t rapl_get_attr_cpumask(struct device *dev,
struct device_attribute *attr, char *buf)
{
return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
}
static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
static struct attribute *rapl_pmu_attrs[] = {
&dev_attr_cpumask.attr,
NULL,
};
static struct attribute_group rapl_pmu_attr_group = {
.attrs = rapl_pmu_attrs,
};
RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
@ -467,7 +446,6 @@ static struct attribute_group rapl_pmu_format_group = {
}; };
static const struct attribute_group *rapl_attr_groups[] = { static const struct attribute_group *rapl_attr_groups[] = {
&rapl_pmu_attr_group,
&rapl_pmu_format_group, &rapl_pmu_format_group,
&rapl_pmu_events_group, &rapl_pmu_events_group,
NULL, NULL,
@ -570,65 +548,6 @@ static struct perf_msr amd_rapl_msrs[] = {
[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 }, [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 },
}; };
static int rapl_cpu_offline(unsigned int cpu)
{
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
int target;
/* Check if exiting cpu is used for collecting rapl events */
if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
return 0;
pmu->cpu = -1;
/* Find a new cpu to collect rapl events */
target = cpumask_any_but(get_rapl_pmu_cpumask(cpu), cpu);
/* Migrate rapl events to the new target */
if (target < nr_cpu_ids) {
cpumask_set_cpu(target, &rapl_cpu_mask);
pmu->cpu = target;
perf_pmu_migrate_context(pmu->pmu, cpu, target);
}
return 0;
}
static int rapl_cpu_online(unsigned int cpu)
{
s32 rapl_pmu_idx = get_rapl_pmu_idx(cpu);
if (rapl_pmu_idx < 0) {
pr_err("topology_logical_(package/die)_id() returned a negative value");
return -EINVAL;
}
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
int target;
if (!pmu) {
pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
if (!pmu)
return -ENOMEM;
raw_spin_lock_init(&pmu->lock);
INIT_LIST_HEAD(&pmu->active_list);
pmu->pmu = &rapl_pmus->pmu;
pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
rapl_hrtimer_init(pmu);
rapl_pmus->pmus[rapl_pmu_idx] = pmu;
}
/*
* Check if there is an online cpu in the package which collects rapl
* events already.
*/
target = cpumask_any_and(&rapl_cpu_mask, get_rapl_pmu_cpumask(cpu));
if (target < nr_cpu_ids)
return 0;
cpumask_set_cpu(cpu, &rapl_cpu_mask);
pmu->cpu = cpu;
return 0;
}
static int rapl_check_hw_unit(struct rapl_model *rm) static int rapl_check_hw_unit(struct rapl_model *rm)
{ {
u64 msr_rapl_power_unit_bits; u64 msr_rapl_power_unit_bits;
@ -707,12 +626,41 @@ static const struct attribute_group *rapl_attr_update[] = {
NULL, NULL,
}; };
static int __init init_rapl_pmu(void)
{
struct rapl_pmu *pmu;
int idx;
for (idx = 0; idx < rapl_pmus->nr_rapl_pmu; idx++) {
pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
if (!pmu)
goto free;
raw_spin_lock_init(&pmu->lock);
INIT_LIST_HEAD(&pmu->active_list);
pmu->pmu = &rapl_pmus->pmu;
pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
rapl_hrtimer_init(pmu);
rapl_pmus->pmus[idx] = pmu;
}
return 0;
free:
for (; idx > 0; idx--)
kfree(rapl_pmus->pmus[idx - 1]);
return -ENOMEM;
}
static int __init init_rapl_pmus(void) static int __init init_rapl_pmus(void)
{ {
int nr_rapl_pmu = topology_max_packages(); int nr_rapl_pmu = topology_max_packages();
int rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
if (!rapl_pmu_is_pkg_scope()) if (!rapl_pmu_is_pkg_scope()) {
nr_rapl_pmu *= topology_max_dies_per_package(); nr_rapl_pmu *= topology_max_dies_per_package();
rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
}
rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL); rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL);
if (!rapl_pmus) if (!rapl_pmus)
@ -728,9 +676,11 @@ static int __init init_rapl_pmus(void)
rapl_pmus->pmu.start = rapl_pmu_event_start; rapl_pmus->pmu.start = rapl_pmu_event_start;
rapl_pmus->pmu.stop = rapl_pmu_event_stop; rapl_pmus->pmu.stop = rapl_pmu_event_stop;
rapl_pmus->pmu.read = rapl_pmu_event_read; rapl_pmus->pmu.read = rapl_pmu_event_read;
rapl_pmus->pmu.scope = rapl_pmu_scope;
rapl_pmus->pmu.module = THIS_MODULE; rapl_pmus->pmu.module = THIS_MODULE;
rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
return 0;
return init_rapl_pmu();
} }
static struct rapl_model model_snb = { static struct rapl_model model_snb = {
@ -876,24 +826,13 @@ static int __init rapl_pmu_init(void)
if (ret) if (ret)
return ret; return ret;
/*
* Install callbacks. Core will call them for each online cpu.
*/
ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
"perf/x86/rapl:online",
rapl_cpu_online, rapl_cpu_offline);
if (ret)
goto out;
ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
if (ret) if (ret)
goto out1; goto out;
rapl_advertise(); rapl_advertise();
return 0; return 0;
out1:
cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
out: out:
pr_warn("Initialization failed (%d), disabled\n", ret); pr_warn("Initialization failed (%d), disabled\n", ret);
cleanup_rapl_pmus(); cleanup_rapl_pmus();
@ -903,7 +842,6 @@ module_init(rapl_pmu_init);
static void __exit intel_rapl_exit(void) static void __exit intel_rapl_exit(void)
{ {
cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
perf_pmu_unregister(&rapl_pmus->pmu); perf_pmu_unregister(&rapl_pmus->pmu);
cleanup_rapl_pmus(); cleanup_rapl_pmus();
} }

View File

@ -32,6 +32,7 @@ extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
extern bool handle_guest_split_lock(unsigned long ip); extern bool handle_guest_split_lock(unsigned long ip);
extern void handle_bus_lock(struct pt_regs *regs); extern void handle_bus_lock(struct pt_regs *regs);
u8 get_this_hybrid_cpu_type(void); u8 get_this_hybrid_cpu_type(void);
u32 get_this_hybrid_cpu_native_id(void);
#else #else
static inline void __init sld_setup(struct cpuinfo_x86 *c) {} static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code) static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
@ -50,6 +51,11 @@ static inline u8 get_this_hybrid_cpu_type(void)
{ {
return 0; return 0;
} }
static inline u32 get_this_hybrid_cpu_native_id(void)
{
return 0;
}
#endif #endif
#ifdef CONFIG_IA32_FEAT_CTL #ifdef CONFIG_IA32_FEAT_CTL
void init_ia32_feat_ctl(struct cpuinfo_x86 *c); void init_ia32_feat_ctl(struct cpuinfo_x86 *c);

View File

@ -1299,3 +1299,18 @@ u8 get_this_hybrid_cpu_type(void)
return cpuid_eax(0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT; return cpuid_eax(0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT;
} }
/**
* get_this_hybrid_cpu_native_id() - Get the native id of this hybrid CPU
*
* Returns the uarch native ID [23:0] of a CPU in a hybrid processor.
* If the processor is not hybrid, returns 0.
*/
u32 get_this_hybrid_cpu_native_id(void)
{
if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
return 0;
return cpuid_eax(0x0000001a) &
(BIT_ULL(X86_HYBRID_CPU_TYPE_ID_SHIFT) - 1);
}

View File

@ -208,7 +208,6 @@ enum cpuhp_state {
CPUHP_AP_PERF_X86_UNCORE_ONLINE, CPUHP_AP_PERF_X86_UNCORE_ONLINE,
CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
CPUHP_AP_PERF_X86_AMD_POWER_ONLINE, CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
CPUHP_AP_PERF_X86_RAPL_ONLINE,
CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE,
CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE,
CPUHP_AP_PERF_ARM_CCI_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE,

View File

@ -15,6 +15,7 @@
#include <linux/rbtree.h> #include <linux/rbtree.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/timer.h>
struct uprobe; struct uprobe;
struct vm_area_struct; struct vm_area_struct;
@ -23,8 +24,17 @@ struct inode;
struct notifier_block; struct notifier_block;
struct page; struct page;
/*
* Allowed return values from uprobe consumer's handler callback
* with following meaning:
*
* UPROBE_HANDLER_REMOVE
* - Remove the uprobe breakpoint from current->mm.
* UPROBE_HANDLER_IGNORE
* - Ignore ret_handler callback for this consumer.
*/
#define UPROBE_HANDLER_REMOVE 1 #define UPROBE_HANDLER_REMOVE 1
#define UPROBE_HANDLER_MASK 1 #define UPROBE_HANDLER_IGNORE 2
#define MAX_URETPROBE_DEPTH 64 #define MAX_URETPROBE_DEPTH 64
@ -37,13 +47,15 @@ struct uprobe_consumer {
* for the current process. If filter() is omitted or returns true, * for the current process. If filter() is omitted or returns true,
* UPROBE_HANDLER_REMOVE is effectively ignored. * UPROBE_HANDLER_REMOVE is effectively ignored.
*/ */
int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data);
int (*ret_handler)(struct uprobe_consumer *self, int (*ret_handler)(struct uprobe_consumer *self,
unsigned long func, unsigned long func,
struct pt_regs *regs); struct pt_regs *regs, __u64 *data);
bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm); bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
struct list_head cons_node; struct list_head cons_node;
__u64 id; /* set when uprobe_consumer is registered */
}; };
#ifdef CONFIG_UPROBES #ifdef CONFIG_UPROBES
@ -56,6 +68,53 @@ enum uprobe_task_state {
UTASK_SSTEP_TRAPPED, UTASK_SSTEP_TRAPPED,
}; };
/* The state of hybrid-lifetime uprobe inside struct return_instance */
enum hprobe_state {
HPROBE_LEASED, /* uretprobes_srcu-protected uprobe */
HPROBE_STABLE, /* refcounted uprobe */
HPROBE_GONE, /* NULL uprobe, SRCU expired, refcount failed */
HPROBE_CONSUMED, /* uprobe "consumed" by uretprobe handler */
};
/*
* Hybrid lifetime uprobe. Represents a uprobe instance that could be either
* SRCU protected (with SRCU protection eventually potentially timing out),
* refcounted using uprobe->ref, or there could be no valid uprobe (NULL).
*
* hprobe's internal state is setup such that background timer thread can
* atomically "downgrade" temporarily RCU-protected uprobe into refcounted one
* (or no uprobe, if refcounting failed).
*
* *stable* pointer always point to the uprobe (or could be NULL if there is
* was no valid underlying uprobe to begin with).
*
* *leased* pointer is the key to achieving race-free atomic lifetime state
* transition and can have three possible states:
* - either the same non-NULL value as *stable*, in which case uprobe is
* SRCU-protected;
* - NULL, in which case uprobe (if there is any) is refcounted;
* - special __UPROBE_DEAD value, which represents an uprobe that was SRCU
* protected initially, but SRCU period timed out and we attempted to
* convert it to refcounted, but refcount_inc_not_zero() failed, because
* uprobe effectively went away (the last consumer unsubscribed). In this
* case it's important to know that *stable* pointer (which still has
* non-NULL uprobe pointer) shouldn't be used, because lifetime of
* underlying uprobe is not guaranteed anymore. __UPROBE_DEAD is just an
* internal marker and is handled transparently by hprobe_fetch() helper.
*
* When uprobe is SRCU-protected, we also record srcu_idx value, necessary for
* SRCU unlocking.
*
* See hprobe_expire() and hprobe_fetch() for details of race-free uprobe
* state transitioning details. It all hinges on atomic xchg() over *leaded*
* pointer. *stable* pointer, once initially set, is not modified concurrently.
*/
struct hprobe {
enum hprobe_state state;
int srcu_idx;
struct uprobe *uprobe;
};
/* /*
* uprobe_task: Metadata of a task while it singlesteps. * uprobe_task: Metadata of a task while it singlesteps.
*/ */
@ -75,6 +134,7 @@ struct uprobe_task {
}; };
struct uprobe *active_uprobe; struct uprobe *active_uprobe;
struct timer_list ri_timer;
unsigned long xol_vaddr; unsigned long xol_vaddr;
struct arch_uprobe *auprobe; struct arch_uprobe *auprobe;
@ -83,15 +143,24 @@ struct uprobe_task {
unsigned int depth; unsigned int depth;
}; };
struct return_consumer {
__u64 cookie;
__u64 id;
};
struct return_instance { struct return_instance {
struct uprobe *uprobe; struct hprobe hprobe;
unsigned long func; unsigned long func;
unsigned long stack; /* stack pointer */ unsigned long stack; /* stack pointer */
unsigned long orig_ret_vaddr; /* original return address */ unsigned long orig_ret_vaddr; /* original return address */
bool chained; /* true, if instance is nested */ bool chained; /* true, if instance is nested */
int consumers_cnt;
struct return_instance *next; /* keep as stack */ struct return_instance *next; /* keep as stack */
}; struct rcu_head rcu;
struct return_consumer consumers[] __counted_by(consumers_cnt);
} ____cacheline_aligned;
enum rp_check { enum rp_check {
RP_CHECK_CALL, RP_CHECK_CALL,

View File

@ -26,6 +26,9 @@
#include <linux/task_work.h> #include <linux/task_work.h>
#include <linux/shmem_fs.h> #include <linux/shmem_fs.h>
#include <linux/khugepaged.h> #include <linux/khugepaged.h>
#include <linux/rcupdate_trace.h>
#include <linux/workqueue.h>
#include <linux/srcu.h>
#include <linux/uprobes.h> #include <linux/uprobes.h>
@ -42,8 +45,6 @@ static struct rb_root uprobes_tree = RB_ROOT;
static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */ static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */
static seqcount_rwlock_t uprobes_seqcount = SEQCNT_RWLOCK_ZERO(uprobes_seqcount, &uprobes_treelock); static seqcount_rwlock_t uprobes_seqcount = SEQCNT_RWLOCK_ZERO(uprobes_seqcount, &uprobes_treelock);
DEFINE_STATIC_SRCU(uprobes_srcu);
#define UPROBES_HASH_SZ 13 #define UPROBES_HASH_SZ 13
/* serialize uprobe->pending_list */ /* serialize uprobe->pending_list */
static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
@ -51,6 +52,9 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem); DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem);
/* Covers return_instance's uprobe lifetime. */
DEFINE_STATIC_SRCU(uretprobes_srcu);
/* Have a copy of original instruction */ /* Have a copy of original instruction */
#define UPROBE_COPY_INSN 0 #define UPROBE_COPY_INSN 0
@ -62,10 +66,13 @@ struct uprobe {
struct list_head pending_list; struct list_head pending_list;
struct list_head consumers; struct list_head consumers;
struct inode *inode; /* Also hold a ref to inode */ struct inode *inode; /* Also hold a ref to inode */
struct rcu_head rcu; union {
struct rcu_head rcu;
struct work_struct work;
};
loff_t offset; loff_t offset;
loff_t ref_ctr_offset; loff_t ref_ctr_offset;
unsigned long flags; unsigned long flags; /* "unsigned long" so bitops work */
/* /*
* The generic code assumes that it has two members of unknown type * The generic code assumes that it has two members of unknown type
@ -100,7 +107,6 @@ static LIST_HEAD(delayed_uprobe_list);
*/ */
struct xol_area { struct xol_area {
wait_queue_head_t wq; /* if all slots are busy */ wait_queue_head_t wq; /* if all slots are busy */
atomic_t slot_count; /* number of in-use slots */
unsigned long *bitmap; /* 0 = free slot */ unsigned long *bitmap; /* 0 = free slot */
struct page *page; struct page *page;
@ -620,17 +626,23 @@ static inline bool uprobe_is_active(struct uprobe *uprobe)
return !RB_EMPTY_NODE(&uprobe->rb_node); return !RB_EMPTY_NODE(&uprobe->rb_node);
} }
static void uprobe_free_rcu(struct rcu_head *rcu) static void uprobe_free_rcu_tasks_trace(struct rcu_head *rcu)
{ {
struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu); struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu);
kfree(uprobe); kfree(uprobe);
} }
static void put_uprobe(struct uprobe *uprobe) static void uprobe_free_srcu(struct rcu_head *rcu)
{ {
if (!refcount_dec_and_test(&uprobe->ref)) struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu);
return;
call_rcu_tasks_trace(&uprobe->rcu, uprobe_free_rcu_tasks_trace);
}
static void uprobe_free_deferred(struct work_struct *work)
{
struct uprobe *uprobe = container_of(work, struct uprobe, work);
write_lock(&uprobes_treelock); write_lock(&uprobes_treelock);
@ -651,7 +663,162 @@ static void put_uprobe(struct uprobe *uprobe)
delayed_uprobe_remove(uprobe, NULL); delayed_uprobe_remove(uprobe, NULL);
mutex_unlock(&delayed_uprobe_lock); mutex_unlock(&delayed_uprobe_lock);
call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_rcu); /* start srcu -> rcu_tasks_trace -> kfree chain */
call_srcu(&uretprobes_srcu, &uprobe->rcu, uprobe_free_srcu);
}
static void put_uprobe(struct uprobe *uprobe)
{
if (!refcount_dec_and_test(&uprobe->ref))
return;
INIT_WORK(&uprobe->work, uprobe_free_deferred);
schedule_work(&uprobe->work);
}
/* Initialize hprobe as SRCU-protected "leased" uprobe */
static void hprobe_init_leased(struct hprobe *hprobe, struct uprobe *uprobe, int srcu_idx)
{
WARN_ON(!uprobe);
hprobe->state = HPROBE_LEASED;
hprobe->uprobe = uprobe;
hprobe->srcu_idx = srcu_idx;
}
/* Initialize hprobe as refcounted ("stable") uprobe (uprobe can be NULL). */
static void hprobe_init_stable(struct hprobe *hprobe, struct uprobe *uprobe)
{
hprobe->state = uprobe ? HPROBE_STABLE : HPROBE_GONE;
hprobe->uprobe = uprobe;
hprobe->srcu_idx = -1;
}
/*
* hprobe_consume() fetches hprobe's underlying uprobe and detects whether
* uprobe is SRCU protected or is refcounted. hprobe_consume() can be
* used only once for a given hprobe.
*
* Caller has to call hprobe_finalize() and pass previous hprobe_state, so
* that hprobe_finalize() can perform SRCU unlock or put uprobe, whichever
* is appropriate.
*/
static inline struct uprobe *hprobe_consume(struct hprobe *hprobe, enum hprobe_state *hstate)
{
*hstate = xchg(&hprobe->state, HPROBE_CONSUMED);
switch (*hstate) {
case HPROBE_LEASED:
case HPROBE_STABLE:
return hprobe->uprobe;
case HPROBE_GONE: /* uprobe is NULL, no SRCU */
case HPROBE_CONSUMED: /* uprobe was finalized already, do nothing */
return NULL;
default:
WARN(1, "hprobe invalid state %d", *hstate);
return NULL;
}
}
/*
* Reset hprobe state and, if hprobe was LEASED, release SRCU lock.
* hprobe_finalize() can only be used from current context after
* hprobe_consume() call (which determines uprobe and hstate value).
*/
static void hprobe_finalize(struct hprobe *hprobe, enum hprobe_state hstate)
{
switch (hstate) {
case HPROBE_LEASED:
__srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx);
break;
case HPROBE_STABLE:
put_uprobe(hprobe->uprobe);
break;
case HPROBE_GONE:
case HPROBE_CONSUMED:
break;
default:
WARN(1, "hprobe invalid state %d", hstate);
break;
}
}
/*
* Attempt to switch (atomically) uprobe from being SRCU protected (LEASED)
* to refcounted (STABLE) state. Competes with hprobe_consume(); only one of
* them can win the race to perform SRCU unlocking. Whoever wins must perform
* SRCU unlock.
*
* Returns underlying valid uprobe or NULL, if there was no underlying uprobe
* to begin with or we failed to bump its refcount and it's going away.
*
* Returned non-NULL uprobe can be still safely used within an ongoing SRCU
* locked region. If `get` is true, it's guaranteed that non-NULL uprobe has
* an extra refcount for caller to assume and use. Otherwise, it's not
* guaranteed that returned uprobe has a positive refcount, so caller has to
* attempt try_get_uprobe(), if it needs to preserve uprobe beyond current
* SRCU lock region. See dup_utask().
*/
static struct uprobe *hprobe_expire(struct hprobe *hprobe, bool get)
{
enum hprobe_state hstate;
/*
* return_instance's hprobe is protected by RCU.
* Underlying uprobe is itself protected from reuse by SRCU.
*/
lockdep_assert(rcu_read_lock_held() && srcu_read_lock_held(&uretprobes_srcu));
hstate = READ_ONCE(hprobe->state);
switch (hstate) {
case HPROBE_STABLE:
/* uprobe has positive refcount, bump refcount, if necessary */
return get ? get_uprobe(hprobe->uprobe) : hprobe->uprobe;
case HPROBE_GONE:
/*
* SRCU was unlocked earlier and we didn't manage to take
* uprobe refcnt, so it's effectively NULL
*/
return NULL;
case HPROBE_CONSUMED:
/*
* uprobe was consumed, so it's effectively NULL as far as
* uretprobe processing logic is concerned
*/
return NULL;
case HPROBE_LEASED: {
struct uprobe *uprobe = try_get_uprobe(hprobe->uprobe);
/*
* Try to switch hprobe state, guarding against
* hprobe_consume() or another hprobe_expire() racing with us.
* Note, if we failed to get uprobe refcount, we use special
* HPROBE_GONE state to signal that hprobe->uprobe shouldn't
* be used as it will be freed after SRCU is unlocked.
*/
if (try_cmpxchg(&hprobe->state, &hstate, uprobe ? HPROBE_STABLE : HPROBE_GONE)) {
/* We won the race, we are the ones to unlock SRCU */
__srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx);
return get ? get_uprobe(uprobe) : uprobe;
}
/*
* We lost the race, undo refcount bump (if it ever happened),
* unless caller would like an extra refcount anyways.
*/
if (uprobe && !get)
put_uprobe(uprobe);
/*
* Even if hprobe_consume() or another hprobe_expire() wins
* the state update race and unlocks SRCU from under us, we
* still have a guarantee that underyling uprobe won't be
* freed due to ongoing caller's SRCU lock region, so we can
* return it regardless. Also, if `get` was true, we also have
* an extra ref for the caller to own. This is used in dup_utask().
*/
return uprobe;
}
default:
WARN(1, "unknown hprobe state %d", hstate);
return NULL;
}
} }
static __always_inline static __always_inline
@ -706,7 +873,7 @@ static struct uprobe *find_uprobe_rcu(struct inode *inode, loff_t offset)
struct rb_node *node; struct rb_node *node;
unsigned int seq; unsigned int seq;
lockdep_assert(srcu_read_lock_held(&uprobes_srcu)); lockdep_assert(rcu_read_lock_trace_held());
do { do {
seq = read_seqcount_begin(&uprobes_seqcount); seq = read_seqcount_begin(&uprobes_seqcount);
@ -825,8 +992,11 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
{ {
static atomic64_t id;
down_write(&uprobe->consumer_rwsem); down_write(&uprobe->consumer_rwsem);
list_add_rcu(&uc->cons_node, &uprobe->consumers); list_add_rcu(&uc->cons_node, &uprobe->consumers);
uc->id = (__u64) atomic64_inc_return(&id);
up_write(&uprobe->consumer_rwsem); up_write(&uprobe->consumer_rwsem);
} }
@ -934,8 +1104,7 @@ static bool filter_chain(struct uprobe *uprobe, struct mm_struct *mm)
bool ret = false; bool ret = false;
down_read(&uprobe->consumer_rwsem); down_read(&uprobe->consumer_rwsem);
list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node, list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
srcu_read_lock_held(&uprobes_srcu)) {
ret = consumer_filter(uc, mm); ret = consumer_filter(uc, mm);
if (ret) if (ret)
break; break;
@ -1156,7 +1325,8 @@ void uprobe_unregister_sync(void)
* unlucky enough caller can free consumer's memory and cause * unlucky enough caller can free consumer's memory and cause
* handler_chain() or handle_uretprobe_chain() to do an use-after-free. * handler_chain() or handle_uretprobe_chain() to do an use-after-free.
*/ */
synchronize_srcu(&uprobes_srcu); synchronize_rcu_tasks_trace();
synchronize_srcu(&uretprobes_srcu);
} }
EXPORT_SYMBOL_GPL(uprobe_unregister_sync); EXPORT_SYMBOL_GPL(uprobe_unregister_sync);
@ -1240,19 +1410,18 @@ EXPORT_SYMBOL_GPL(uprobe_register);
int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool add) int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool add)
{ {
struct uprobe_consumer *con; struct uprobe_consumer *con;
int ret = -ENOENT, srcu_idx; int ret = -ENOENT;
down_write(&uprobe->register_rwsem); down_write(&uprobe->register_rwsem);
srcu_idx = srcu_read_lock(&uprobes_srcu); rcu_read_lock_trace();
list_for_each_entry_srcu(con, &uprobe->consumers, cons_node, list_for_each_entry_rcu(con, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
srcu_read_lock_held(&uprobes_srcu)) {
if (con == uc) { if (con == uc) {
ret = register_for_each_vma(uprobe, add ? uc : NULL); ret = register_for_each_vma(uprobe, add ? uc : NULL);
break; break;
} }
} }
srcu_read_unlock(&uprobes_srcu, srcu_idx); rcu_read_unlock_trace();
up_write(&uprobe->register_rwsem); up_write(&uprobe->register_rwsem);
@ -1475,9 +1644,15 @@ static vm_fault_t xol_fault(const struct vm_special_mapping *sm,
return 0; return 0;
} }
static int xol_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
{
return -EPERM;
}
static const struct vm_special_mapping xol_mapping = { static const struct vm_special_mapping xol_mapping = {
.name = "[uprobes]", .name = "[uprobes]",
.fault = xol_fault, .fault = xol_fault,
.mremap = xol_mremap,
}; };
/* Slot allocation for XOL */ /* Slot allocation for XOL */
@ -1553,7 +1728,6 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
init_waitqueue_head(&area->wq); init_waitqueue_head(&area->wq);
/* Reserve the 1st slot for get_trampoline_vaddr() */ /* Reserve the 1st slot for get_trampoline_vaddr() */
set_bit(0, area->bitmap); set_bit(0, area->bitmap);
atomic_set(&area->slot_count, 1);
insns = arch_uprobe_trampoline(&insns_size); insns = arch_uprobe_trampoline(&insns_size);
arch_uprobe_copy_ixol(area->page, 0, insns, insns_size); arch_uprobe_copy_ixol(area->page, 0, insns, insns_size);
@ -1626,92 +1800,57 @@ void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
} }
} }
/* static unsigned long xol_get_slot_nr(struct xol_area *area)
* - search for a free slot.
*/
static unsigned long xol_take_insn_slot(struct xol_area *area)
{ {
unsigned long slot_addr; unsigned long slot_nr;
int slot_nr;
do { slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE);
slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE); if (slot_nr < UINSNS_PER_PAGE) {
if (slot_nr < UINSNS_PER_PAGE) { if (!test_and_set_bit(slot_nr, area->bitmap))
if (!test_and_set_bit(slot_nr, area->bitmap)) return slot_nr;
break; }
slot_nr = UINSNS_PER_PAGE; return UINSNS_PER_PAGE;
continue;
}
wait_event(area->wq, (atomic_read(&area->slot_count) < UINSNS_PER_PAGE));
} while (slot_nr >= UINSNS_PER_PAGE);
slot_addr = area->vaddr + (slot_nr * UPROBE_XOL_SLOT_BYTES);
atomic_inc(&area->slot_count);
return slot_addr;
} }
/* /*
* xol_get_insn_slot - allocate a slot for xol. * xol_get_insn_slot - allocate a slot for xol.
* Returns the allocated slot address or 0.
*/ */
static unsigned long xol_get_insn_slot(struct uprobe *uprobe) static bool xol_get_insn_slot(struct uprobe *uprobe, struct uprobe_task *utask)
{ {
struct xol_area *area; struct xol_area *area = get_xol_area();
unsigned long xol_vaddr; unsigned long slot_nr;
area = get_xol_area();
if (!area) if (!area)
return 0; return false;
xol_vaddr = xol_take_insn_slot(area); wait_event(area->wq, (slot_nr = xol_get_slot_nr(area)) < UINSNS_PER_PAGE);
if (unlikely(!xol_vaddr))
return 0;
arch_uprobe_copy_ixol(area->page, xol_vaddr, utask->xol_vaddr = area->vaddr + slot_nr * UPROBE_XOL_SLOT_BYTES;
arch_uprobe_copy_ixol(area->page, utask->xol_vaddr,
&uprobe->arch.ixol, sizeof(uprobe->arch.ixol)); &uprobe->arch.ixol, sizeof(uprobe->arch.ixol));
return true;
return xol_vaddr;
} }
/* /*
* xol_free_insn_slot - If slot was earlier allocated by * xol_free_insn_slot - free the slot allocated by xol_get_insn_slot()
* @xol_get_insn_slot(), make the slot available for
* subsequent requests.
*/ */
static void xol_free_insn_slot(struct task_struct *tsk) static void xol_free_insn_slot(struct uprobe_task *utask)
{ {
struct xol_area *area; struct xol_area *area = current->mm->uprobes_state.xol_area;
unsigned long vma_end; unsigned long offset = utask->xol_vaddr - area->vaddr;
unsigned long slot_addr; unsigned int slot_nr;
if (!tsk->mm || !tsk->mm->uprobes_state.xol_area || !tsk->utask) utask->xol_vaddr = 0;
/* xol_vaddr must fit into [area->vaddr, area->vaddr + PAGE_SIZE) */
if (WARN_ON_ONCE(offset >= PAGE_SIZE))
return; return;
slot_addr = tsk->utask->xol_vaddr; slot_nr = offset / UPROBE_XOL_SLOT_BYTES;
if (unlikely(!slot_addr)) clear_bit(slot_nr, area->bitmap);
return; smp_mb__after_atomic(); /* pairs with prepare_to_wait() */
if (waitqueue_active(&area->wq))
area = tsk->mm->uprobes_state.xol_area; wake_up(&area->wq);
vma_end = area->vaddr + PAGE_SIZE;
if (area->vaddr <= slot_addr && slot_addr < vma_end) {
unsigned long offset;
int slot_nr;
offset = slot_addr - area->vaddr;
slot_nr = offset / UPROBE_XOL_SLOT_BYTES;
if (slot_nr >= UINSNS_PER_PAGE)
return;
clear_bit(slot_nr, area->bitmap);
atomic_dec(&area->slot_count);
smp_mb__after_atomic(); /* pairs with prepare_to_wait() */
if (waitqueue_active(&area->wq))
wake_up(&area->wq);
tsk->utask->xol_vaddr = 0;
}
} }
void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
@ -1750,11 +1889,18 @@ unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
return instruction_pointer(regs); return instruction_pointer(regs);
} }
static struct return_instance *free_ret_instance(struct return_instance *ri) static struct return_instance *free_ret_instance(struct return_instance *ri, bool cleanup_hprobe)
{ {
struct return_instance *next = ri->next; struct return_instance *next = ri->next;
put_uprobe(ri->uprobe);
kfree(ri); if (cleanup_hprobe) {
enum hprobe_state hstate;
(void)hprobe_consume(&ri->hprobe, &hstate);
hprobe_finalize(&ri->hprobe, hstate);
}
kfree_rcu(ri, rcu);
return next; return next;
} }
@ -1770,18 +1916,50 @@ void uprobe_free_utask(struct task_struct *t)
if (!utask) if (!utask)
return; return;
if (utask->active_uprobe) WARN_ON_ONCE(utask->active_uprobe || utask->xol_vaddr);
put_uprobe(utask->active_uprobe);
timer_delete_sync(&utask->ri_timer);
ri = utask->return_instances; ri = utask->return_instances;
while (ri) while (ri)
ri = free_ret_instance(ri); ri = free_ret_instance(ri, true /* cleanup_hprobe */);
xol_free_insn_slot(t);
kfree(utask); kfree(utask);
t->utask = NULL; t->utask = NULL;
} }
#define RI_TIMER_PERIOD (HZ / 10) /* 100 ms */
#define for_each_ret_instance_rcu(pos, head) \
for (pos = rcu_dereference_raw(head); pos; pos = rcu_dereference_raw(pos->next))
static void ri_timer(struct timer_list *timer)
{
struct uprobe_task *utask = container_of(timer, struct uprobe_task, ri_timer);
struct return_instance *ri;
/* SRCU protects uprobe from reuse for the cmpxchg() inside hprobe_expire(). */
guard(srcu)(&uretprobes_srcu);
/* RCU protects return_instance from freeing. */
guard(rcu)();
for_each_ret_instance_rcu(ri, utask->return_instances)
hprobe_expire(&ri->hprobe, false);
}
static struct uprobe_task *alloc_utask(void)
{
struct uprobe_task *utask;
utask = kzalloc(sizeof(*utask), GFP_KERNEL);
if (!utask)
return NULL;
timer_setup(&utask->ri_timer, ri_timer, 0);
return utask;
}
/* /*
* Allocate a uprobe_task object for the task if necessary. * Allocate a uprobe_task object for the task if necessary.
* Called when the thread hits a breakpoint. * Called when the thread hits a breakpoint.
@ -1793,38 +1971,73 @@ void uprobe_free_utask(struct task_struct *t)
static struct uprobe_task *get_utask(void) static struct uprobe_task *get_utask(void)
{ {
if (!current->utask) if (!current->utask)
current->utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); current->utask = alloc_utask();
return current->utask; return current->utask;
} }
static size_t ri_size(int consumers_cnt)
{
struct return_instance *ri;
return sizeof(*ri) + sizeof(ri->consumers[0]) * consumers_cnt;
}
#define DEF_CNT 4
static struct return_instance *alloc_return_instance(void)
{
struct return_instance *ri;
ri = kzalloc(ri_size(DEF_CNT), GFP_KERNEL);
if (!ri)
return ZERO_SIZE_PTR;
ri->consumers_cnt = DEF_CNT;
return ri;
}
static struct return_instance *dup_return_instance(struct return_instance *old)
{
size_t size = ri_size(old->consumers_cnt);
return kmemdup(old, size, GFP_KERNEL);
}
static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
{ {
struct uprobe_task *n_utask; struct uprobe_task *n_utask;
struct return_instance **p, *o, *n; struct return_instance **p, *o, *n;
struct uprobe *uprobe;
n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); n_utask = alloc_utask();
if (!n_utask) if (!n_utask)
return -ENOMEM; return -ENOMEM;
t->utask = n_utask; t->utask = n_utask;
/* protect uprobes from freeing, we'll need try_get_uprobe() them */
guard(srcu)(&uretprobes_srcu);
p = &n_utask->return_instances; p = &n_utask->return_instances;
for (o = o_utask->return_instances; o; o = o->next) { for (o = o_utask->return_instances; o; o = o->next) {
n = kmalloc(sizeof(struct return_instance), GFP_KERNEL); n = dup_return_instance(o);
if (!n) if (!n)
return -ENOMEM; return -ENOMEM;
*n = *o; /* if uprobe is non-NULL, we'll have an extra refcount for uprobe */
/* uprobe = hprobe_expire(&o->hprobe, true);
* uprobe's refcnt has to be positive at this point, kept by
* utask->return_instances items; return_instances can't be
* removed right now, as task is blocked due to duping; so
* get_uprobe() is safe to use here.
*/
get_uprobe(n->uprobe);
n->next = NULL;
*p = n; /*
* New utask will have stable properly refcounted uprobe or
* NULL. Even if we failed to get refcounted uprobe, we still
* need to preserve full set of return_instances for proper
* uretprobe handling and nesting in forked task.
*/
hprobe_init_stable(&n->hprobe, uprobe);
n->next = NULL;
rcu_assign_pointer(*p, n);
p = &n->next; p = &n->next;
n_utask->depth++; n_utask->depth++;
} }
@ -1900,45 +2113,34 @@ static void cleanup_return_instances(struct uprobe_task *utask, bool chained,
enum rp_check ctx = chained ? RP_CHECK_CHAIN_CALL : RP_CHECK_CALL; enum rp_check ctx = chained ? RP_CHECK_CHAIN_CALL : RP_CHECK_CALL;
while (ri && !arch_uretprobe_is_alive(ri, ctx, regs)) { while (ri && !arch_uretprobe_is_alive(ri, ctx, regs)) {
ri = free_ret_instance(ri); ri = free_ret_instance(ri, true /* cleanup_hprobe */);
utask->depth--; utask->depth--;
} }
utask->return_instances = ri; rcu_assign_pointer(utask->return_instances, ri);
} }
static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs,
struct return_instance *ri)
{ {
struct return_instance *ri; struct uprobe_task *utask = current->utask;
struct uprobe_task *utask;
unsigned long orig_ret_vaddr, trampoline_vaddr; unsigned long orig_ret_vaddr, trampoline_vaddr;
bool chained; bool chained;
int srcu_idx;
if (!get_xol_area()) if (!get_xol_area())
return; goto free;
utask = get_utask();
if (!utask)
return;
if (utask->depth >= MAX_URETPROBE_DEPTH) { if (utask->depth >= MAX_URETPROBE_DEPTH) {
printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to" printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to"
" nestedness limit pid/tgid=%d/%d\n", " nestedness limit pid/tgid=%d/%d\n",
current->pid, current->tgid); current->pid, current->tgid);
return; goto free;
} }
/* we need to bump refcount to store uprobe in utask */
if (!try_get_uprobe(uprobe))
return;
ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
if (!ri)
goto fail;
trampoline_vaddr = uprobe_get_trampoline_vaddr(); trampoline_vaddr = uprobe_get_trampoline_vaddr();
orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs); orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
if (orig_ret_vaddr == -1) if (orig_ret_vaddr == -1)
goto fail; goto free;
/* drop the entries invalidated by longjmp() */ /* drop the entries invalidated by longjmp() */
chained = (orig_ret_vaddr == trampoline_vaddr); chained = (orig_ret_vaddr == trampoline_vaddr);
@ -1956,53 +2158,51 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
* attack from user-space. * attack from user-space.
*/ */
uprobe_warn(current, "handle tail call"); uprobe_warn(current, "handle tail call");
goto fail; goto free;
} }
orig_ret_vaddr = utask->return_instances->orig_ret_vaddr; orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
} }
ri->uprobe = uprobe;
/* __srcu_read_lock() because SRCU lock survives switch to user space */
srcu_idx = __srcu_read_lock(&uretprobes_srcu);
ri->func = instruction_pointer(regs); ri->func = instruction_pointer(regs);
ri->stack = user_stack_pointer(regs); ri->stack = user_stack_pointer(regs);
ri->orig_ret_vaddr = orig_ret_vaddr; ri->orig_ret_vaddr = orig_ret_vaddr;
ri->chained = chained; ri->chained = chained;
utask->depth++; utask->depth++;
hprobe_init_leased(&ri->hprobe, uprobe, srcu_idx);
ri->next = utask->return_instances; ri->next = utask->return_instances;
utask->return_instances = ri; rcu_assign_pointer(utask->return_instances, ri);
mod_timer(&utask->ri_timer, jiffies + RI_TIMER_PERIOD);
return; return;
fail: free:
kfree(ri); kfree(ri);
put_uprobe(uprobe);
} }
/* Prepare to single-step probed instruction out of line. */ /* Prepare to single-step probed instruction out of line. */
static int static int
pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
{ {
struct uprobe_task *utask; struct uprobe_task *utask = current->utask;
unsigned long xol_vaddr;
int err; int err;
utask = get_utask();
if (!utask)
return -ENOMEM;
if (!try_get_uprobe(uprobe)) if (!try_get_uprobe(uprobe))
return -EINVAL; return -EINVAL;
xol_vaddr = xol_get_insn_slot(uprobe); if (!xol_get_insn_slot(uprobe, utask)) {
if (!xol_vaddr) {
err = -ENOMEM; err = -ENOMEM;
goto err_out; goto err_out;
} }
utask->xol_vaddr = xol_vaddr;
utask->vaddr = bp_vaddr; utask->vaddr = bp_vaddr;
err = arch_uprobe_pre_xol(&uprobe->arch, regs); err = arch_uprobe_pre_xol(&uprobe->arch, regs);
if (unlikely(err)) { if (unlikely(err)) {
xol_free_insn_slot(current); xol_free_insn_slot(utask);
goto err_out; goto err_out;
} }
@ -2125,35 +2325,90 @@ static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swb
return uprobe; return uprobe;
} }
static struct return_instance*
push_consumer(struct return_instance *ri, int idx, __u64 id, __u64 cookie)
{
if (unlikely(ri == ZERO_SIZE_PTR))
return ri;
if (unlikely(idx >= ri->consumers_cnt)) {
struct return_instance *old_ri = ri;
ri->consumers_cnt += DEF_CNT;
ri = krealloc(old_ri, ri_size(old_ri->consumers_cnt), GFP_KERNEL);
if (!ri) {
kfree(old_ri);
return ZERO_SIZE_PTR;
}
}
ri->consumers[idx].id = id;
ri->consumers[idx].cookie = cookie;
return ri;
}
static struct return_consumer *
return_consumer_find(struct return_instance *ri, int *iter, int id)
{
struct return_consumer *ric;
int idx = *iter;
for (ric = &ri->consumers[idx]; idx < ri->consumers_cnt; idx++, ric++) {
if (ric->id == id) {
*iter = idx + 1;
return ric;
}
}
return NULL;
}
static bool ignore_ret_handler(int rc)
{
return rc == UPROBE_HANDLER_REMOVE || rc == UPROBE_HANDLER_IGNORE;
}
static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
{ {
struct uprobe_consumer *uc; struct uprobe_consumer *uc;
int remove = UPROBE_HANDLER_REMOVE; bool has_consumers = false, remove = true;
bool need_prep = false; /* prepare return uprobe, when needed */ struct return_instance *ri = NULL;
bool has_consumers = false; int push_idx = 0;
current->utask->auprobe = &uprobe->arch; current->utask->auprobe = &uprobe->arch;
list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node, list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
srcu_read_lock_held(&uprobes_srcu)) { bool session = uc->handler && uc->ret_handler;
__u64 cookie = 0;
int rc = 0; int rc = 0;
if (uc->handler) { if (uc->handler) {
rc = uc->handler(uc, regs); rc = uc->handler(uc, regs, &cookie);
WARN(rc & ~UPROBE_HANDLER_MASK, WARN(rc < 0 || rc > 2,
"bad rc=0x%x from %ps()\n", rc, uc->handler); "bad rc=0x%x from %ps()\n", rc, uc->handler);
} }
if (uc->ret_handler) remove &= rc == UPROBE_HANDLER_REMOVE;
need_prep = true;
remove &= rc;
has_consumers = true; has_consumers = true;
if (!uc->ret_handler || ignore_ret_handler(rc))
continue;
if (!ri)
ri = alloc_return_instance();
if (session)
ri = push_consumer(ri, push_idx++, uc->id, cookie);
} }
current->utask->auprobe = NULL; current->utask->auprobe = NULL;
if (need_prep && !remove) if (!ZERO_OR_NULL_PTR(ri)) {
prepare_uretprobe(uprobe, regs); /* put bp at return */ /*
* The push_idx value has the final number of return consumers,
* and ri->consumers_cnt has number of allocated consumers.
*/
ri->consumers_cnt = push_idx;
prepare_uretprobe(uprobe, regs, ri);
}
if (remove && has_consumers) { if (remove && has_consumers) {
down_read(&uprobe->register_rwsem); down_read(&uprobe->register_rwsem);
@ -2169,19 +2424,27 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
} }
static void static void
handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs) handle_uretprobe_chain(struct return_instance *ri, struct uprobe *uprobe, struct pt_regs *regs)
{ {
struct uprobe *uprobe = ri->uprobe; struct return_consumer *ric;
struct uprobe_consumer *uc; struct uprobe_consumer *uc;
int srcu_idx; int ric_idx = 0;
srcu_idx = srcu_read_lock(&uprobes_srcu); /* all consumers unsubscribed meanwhile */
list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node, if (unlikely(!uprobe))
srcu_read_lock_held(&uprobes_srcu)) { return;
if (uc->ret_handler)
uc->ret_handler(uc, ri->func, regs); rcu_read_lock_trace();
list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
bool session = uc->handler && uc->ret_handler;
if (uc->ret_handler) {
ric = return_consumer_find(ri, &ric_idx, uc->id);
if (!session || ric)
uc->ret_handler(uc, ri->func, regs, ric ? &ric->cookie : NULL);
}
} }
srcu_read_unlock(&uprobes_srcu, srcu_idx); rcu_read_unlock_trace();
} }
static struct return_instance *find_next_ret_chain(struct return_instance *ri) static struct return_instance *find_next_ret_chain(struct return_instance *ri)
@ -2200,6 +2463,8 @@ void uprobe_handle_trampoline(struct pt_regs *regs)
{ {
struct uprobe_task *utask; struct uprobe_task *utask;
struct return_instance *ri, *next; struct return_instance *ri, *next;
struct uprobe *uprobe;
enum hprobe_state hstate;
bool valid; bool valid;
utask = current->utask; utask = current->utask;
@ -2230,21 +2495,24 @@ void uprobe_handle_trampoline(struct pt_regs *regs)
* trampoline addresses on the stack are replaced with correct * trampoline addresses on the stack are replaced with correct
* original return addresses * original return addresses
*/ */
utask->return_instances = ri->next; rcu_assign_pointer(utask->return_instances, ri->next);
uprobe = hprobe_consume(&ri->hprobe, &hstate);
if (valid) if (valid)
handle_uretprobe_chain(ri, regs); handle_uretprobe_chain(ri, uprobe, regs);
ri = free_ret_instance(ri); hprobe_finalize(&ri->hprobe, hstate);
/* We already took care of hprobe, no need to waste more time on that. */
ri = free_ret_instance(ri, false /* !cleanup_hprobe */);
utask->depth--; utask->depth--;
} while (ri != next); } while (ri != next);
} while (!valid); } while (!valid);
utask->return_instances = ri;
return; return;
sigill: sigill:
uprobe_warn(current, "handle uretprobe, sending SIGILL."); uprobe_warn(current, "handle uretprobe, sending SIGILL.");
force_sig(SIGILL); force_sig(SIGILL);
} }
bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs) bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs)
@ -2266,13 +2534,13 @@ static void handle_swbp(struct pt_regs *regs)
{ {
struct uprobe *uprobe; struct uprobe *uprobe;
unsigned long bp_vaddr; unsigned long bp_vaddr;
int is_swbp, srcu_idx; int is_swbp;
bp_vaddr = uprobe_get_swbp_addr(regs); bp_vaddr = uprobe_get_swbp_addr(regs);
if (bp_vaddr == uprobe_get_trampoline_vaddr()) if (bp_vaddr == uprobe_get_trampoline_vaddr())
return uprobe_handle_trampoline(regs); return uprobe_handle_trampoline(regs);
srcu_idx = srcu_read_lock(&uprobes_srcu); rcu_read_lock_trace();
uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp); uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp);
if (!uprobe) { if (!uprobe) {
@ -2330,7 +2598,7 @@ static void handle_swbp(struct pt_regs *regs)
out: out:
/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */ /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
srcu_read_unlock(&uprobes_srcu, srcu_idx); rcu_read_unlock_trace();
} }
/* /*
@ -2353,7 +2621,7 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
put_uprobe(uprobe); put_uprobe(uprobe);
utask->active_uprobe = NULL; utask->active_uprobe = NULL;
utask->state = UTASK_RUNNING; utask->state = UTASK_RUNNING;
xol_free_insn_slot(current); xol_free_insn_slot(utask);
spin_lock_irq(&current->sighand->siglock); spin_lock_irq(&current->sighand->siglock);
recalc_sigpending(); /* see uprobe_deny_signal() */ recalc_sigpending(); /* see uprobe_deny_signal() */

View File

@ -3264,7 +3264,8 @@ uprobe_multi_link_filter(struct uprobe_consumer *con, struct mm_struct *mm)
} }
static int static int
uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs) uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs,
__u64 *data)
{ {
struct bpf_uprobe *uprobe; struct bpf_uprobe *uprobe;
@ -3273,7 +3274,8 @@ uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs)
} }
static int static int
uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs) uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs,
__u64 *data)
{ {
struct bpf_uprobe *uprobe; struct bpf_uprobe *uprobe;

View File

@ -89,9 +89,11 @@ static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev)
static int register_uprobe_event(struct trace_uprobe *tu); static int register_uprobe_event(struct trace_uprobe *tu);
static int unregister_uprobe_event(struct trace_uprobe *tu); static int unregister_uprobe_event(struct trace_uprobe *tu);
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs,
__u64 *data);
static int uretprobe_dispatcher(struct uprobe_consumer *con, static int uretprobe_dispatcher(struct uprobe_consumer *con,
unsigned long func, struct pt_regs *regs); unsigned long func, struct pt_regs *regs,
__u64 *data);
#ifdef CONFIG_STACK_GROWSUP #ifdef CONFIG_STACK_GROWSUP
static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n) static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
@ -1522,7 +1524,8 @@ trace_uprobe_register(struct trace_event_call *event, enum trace_reg type,
} }
} }
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs,
__u64 *data)
{ {
struct trace_uprobe *tu; struct trace_uprobe *tu;
struct uprobe_dispatch_data udd; struct uprobe_dispatch_data udd;
@ -1553,7 +1556,8 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
} }
static int uretprobe_dispatcher(struct uprobe_consumer *con, static int uretprobe_dispatcher(struct uprobe_consumer *con,
unsigned long func, struct pt_regs *regs) unsigned long func, struct pt_regs *regs,
__u64 *data)
{ {
struct trace_uprobe *tu; struct trace_uprobe *tu;
struct uprobe_dispatch_data udd; struct uprobe_dispatch_data udd;

View File

@ -463,7 +463,7 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
static int static int
uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func, uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func,
struct pt_regs *regs) struct pt_regs *regs, __u64 *data)
{ {
regs->ax = 0x12345678deadbeef; regs->ax = 0x12345678deadbeef;