mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-12-29 17:25:38 +00:00
Stable tag for bpf-next's uprobe work.
-----BEGIN PGP SIGNATURE----- iQJJBAABCgAzFiEEv3OU3/byMaA0LqWJdkfhpEvA5LoFAmcrTRsVHHBldGVyekBp bmZyYWRlYWQub3JnAAoJEHZH4aRLwOS6PLoP/jL4pUgW/ZrQFwpZh71BxeDt2Ka/ Eb6AsHe0PcKAMJYaJDfin6FRU87hp3tHIefSGdexvSttWwbnwKl8cVb+Y7gVnytu b2PkMfiOFShKEhu6YAJmxWIOi6MDxonjIMQgjvsVGrZmHiPgGTrh+nnmHYQ+qxFq wCaZXO3E65drtZKbi1HddHDYR+e1mHQU0uC+mLO44sP3lzJVxPnYGKGjaS62Z/Da XF+3tz6jc6jpu08FJy8ltrqLvcHPmTuDkR6f8mG3Hc8Hw0mndY/4yk0bGbbHo7Vx y42Aq4UUgcpvb8OUIicMRLzp3hRjsSTn8UJjsinEaCexdw6ZZiZVU/YR9Mf5ivrJ dlplFJvP8b6psnHrRf5xJ1SUv7+dap075A3/28MEvGErZOINoULAGa/hJIndHfuL NeWaZj0+of2eAX1SDePia87jX1P9xuU6AEw944i2rhI4P1J5I6XYfcaDDICBYitv yREafY/i6wb/Q8GhpjWmSE7p4wUIi5o3CpZsncj7B4Me9JBdHWrcnyUY55Tz05mo zoKnNgYC3d9DAIwXvq7x6tM2Tw183YXul/aHJSr3/rFKuuGQx0XACt6BO+yI35q3 6max4kMyr+kUqr9YYZtb9fuBw3TPhwY/zXG0ydSxNNh7oX+boxh4/bxXljLWXmRQ eHgsXuuF1YgCg1R9 =Wiky -----END PGP SIGNATURE----- Merge tag 'perf-core-for-bpf-next' from tip tree Stable tag for bpf-next's uprobe work. Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
This commit is contained in:
commit
5f67329cb2
@ -135,6 +135,7 @@ config KPROBES_ON_FTRACE
|
||||
config UPROBES
|
||||
def_bool n
|
||||
depends on ARCH_SUPPORTS_UPROBES
|
||||
select TASKS_TRACE_RCU
|
||||
help
|
||||
Uprobes is the user-space counterpart to kprobes: they
|
||||
enable instrumentation applications (such as 'perf probe')
|
||||
|
@ -943,11 +943,12 @@ static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, u
|
||||
static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
static atomic64_t status_warned = ATOMIC64_INIT(0);
|
||||
u64 reserved, status, mask, new_bits, prev_bits;
|
||||
struct perf_sample_data data;
|
||||
struct hw_perf_event *hwc;
|
||||
struct perf_event *event;
|
||||
int handled = 0, idx;
|
||||
u64 reserved, status, mask;
|
||||
bool pmu_enabled;
|
||||
|
||||
/*
|
||||
@ -1012,7 +1013,12 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
|
||||
* the corresponding PMCs are expected to be inactive according to the
|
||||
* active_mask
|
||||
*/
|
||||
WARN_ON(status > 0);
|
||||
if (status > 0) {
|
||||
prev_bits = atomic64_fetch_or(status, &status_warned);
|
||||
// A new bit was set for the very first time.
|
||||
new_bits = status & ~prev_bits;
|
||||
WARN(new_bits, "New overflows for inactive PMCs: %llx\n", new_bits);
|
||||
}
|
||||
|
||||
/* Clear overflow and freeze bits */
|
||||
amd_pmu_ack_global_status(~status);
|
||||
|
@ -4599,6 +4599,28 @@ static inline bool erratum_hsw11(struct perf_event *event)
|
||||
X86_CONFIG(.event=0xc0, .umask=0x01);
|
||||
}
|
||||
|
||||
static struct event_constraint *
|
||||
arl_h_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
struct perf_event *event)
|
||||
{
|
||||
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
|
||||
|
||||
if (pmu->pmu_type == hybrid_tiny)
|
||||
return cmt_get_event_constraints(cpuc, idx, event);
|
||||
|
||||
return mtl_get_event_constraints(cpuc, idx, event);
|
||||
}
|
||||
|
||||
static int arl_h_hw_config(struct perf_event *event)
|
||||
{
|
||||
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
|
||||
|
||||
if (pmu->pmu_type == hybrid_tiny)
|
||||
return intel_pmu_hw_config(event);
|
||||
|
||||
return adl_hw_config(event);
|
||||
}
|
||||
|
||||
/*
|
||||
* The HSW11 requires a period larger than 100 which is the same as the BDM11.
|
||||
* A minimum period of 128 is enforced as well for the INST_RETIRED.ALL.
|
||||
@ -4924,17 +4946,26 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
|
||||
|
||||
/*
|
||||
* This essentially just maps between the 'hybrid_cpu_type'
|
||||
* and 'hybrid_pmu_type' enums:
|
||||
* and 'hybrid_pmu_type' enums except for ARL-H processor
|
||||
* which needs to compare atom uarch native id since ARL-H
|
||||
* contains two different atom uarchs.
|
||||
*/
|
||||
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
|
||||
enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type;
|
||||
u32 native_id;
|
||||
|
||||
if (cpu_type == HYBRID_INTEL_CORE &&
|
||||
pmu_type == hybrid_big)
|
||||
return &x86_pmu.hybrid_pmu[i];
|
||||
if (cpu_type == HYBRID_INTEL_ATOM &&
|
||||
pmu_type == hybrid_small)
|
||||
if (cpu_type == HYBRID_INTEL_CORE && pmu_type == hybrid_big)
|
||||
return &x86_pmu.hybrid_pmu[i];
|
||||
if (cpu_type == HYBRID_INTEL_ATOM) {
|
||||
if (x86_pmu.num_hybrid_pmus == 2 && pmu_type == hybrid_small)
|
||||
return &x86_pmu.hybrid_pmu[i];
|
||||
|
||||
native_id = get_this_hybrid_cpu_native_id();
|
||||
if (native_id == skt_native_id && pmu_type == hybrid_small)
|
||||
return &x86_pmu.hybrid_pmu[i];
|
||||
if (native_id == cmt_native_id && pmu_type == hybrid_tiny)
|
||||
return &x86_pmu.hybrid_pmu[i];
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
@ -5965,6 +5996,37 @@ static struct attribute *lnl_hybrid_events_attrs[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
/* The event string must be in PMU IDX order. */
|
||||
EVENT_ATTR_STR_HYBRID(topdown-retiring,
|
||||
td_retiring_arl_h,
|
||||
"event=0xc2,umask=0x02;event=0x00,umask=0x80;event=0xc2,umask=0x0",
|
||||
hybrid_big_small_tiny);
|
||||
EVENT_ATTR_STR_HYBRID(topdown-bad-spec,
|
||||
td_bad_spec_arl_h,
|
||||
"event=0x73,umask=0x0;event=0x00,umask=0x81;event=0x73,umask=0x0",
|
||||
hybrid_big_small_tiny);
|
||||
EVENT_ATTR_STR_HYBRID(topdown-fe-bound,
|
||||
td_fe_bound_arl_h,
|
||||
"event=0x9c,umask=0x01;event=0x00,umask=0x82;event=0x71,umask=0x0",
|
||||
hybrid_big_small_tiny);
|
||||
EVENT_ATTR_STR_HYBRID(topdown-be-bound,
|
||||
td_be_bound_arl_h,
|
||||
"event=0xa4,umask=0x02;event=0x00,umask=0x83;event=0x74,umask=0x0",
|
||||
hybrid_big_small_tiny);
|
||||
|
||||
static struct attribute *arl_h_hybrid_events_attrs[] = {
|
||||
EVENT_PTR(slots_adl),
|
||||
EVENT_PTR(td_retiring_arl_h),
|
||||
EVENT_PTR(td_bad_spec_arl_h),
|
||||
EVENT_PTR(td_fe_bound_arl_h),
|
||||
EVENT_PTR(td_be_bound_arl_h),
|
||||
EVENT_PTR(td_heavy_ops_adl),
|
||||
EVENT_PTR(td_br_mis_adl),
|
||||
EVENT_PTR(td_fetch_lat_adl),
|
||||
EVENT_PTR(td_mem_bound_adl),
|
||||
NULL,
|
||||
};
|
||||
|
||||
/* Must be in IDX order */
|
||||
EVENT_ATTR_STR_HYBRID(mem-loads, mem_ld_adl, "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3", hybrid_big_small);
|
||||
EVENT_ATTR_STR_HYBRID(mem-stores, mem_st_adl, "event=0xd0,umask=0x6;event=0xcd,umask=0x2", hybrid_big_small);
|
||||
@ -5983,6 +6045,21 @@ static struct attribute *mtl_hybrid_mem_attrs[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
EVENT_ATTR_STR_HYBRID(mem-loads,
|
||||
mem_ld_arl_h,
|
||||
"event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3;event=0xd0,umask=0x5,ldlat=3",
|
||||
hybrid_big_small_tiny);
|
||||
EVENT_ATTR_STR_HYBRID(mem-stores,
|
||||
mem_st_arl_h,
|
||||
"event=0xd0,umask=0x6;event=0xcd,umask=0x2;event=0xd0,umask=0x6",
|
||||
hybrid_big_small_tiny);
|
||||
|
||||
static struct attribute *arl_h_hybrid_mem_attrs[] = {
|
||||
EVENT_PTR(mem_ld_arl_h),
|
||||
EVENT_PTR(mem_st_arl_h),
|
||||
NULL,
|
||||
};
|
||||
|
||||
EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big);
|
||||
EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big);
|
||||
EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big);
|
||||
@ -6006,8 +6083,8 @@ static struct attribute *adl_hybrid_tsx_attrs[] = {
|
||||
|
||||
FORMAT_ATTR_HYBRID(in_tx, hybrid_big);
|
||||
FORMAT_ATTR_HYBRID(in_tx_cp, hybrid_big);
|
||||
FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small);
|
||||
FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small);
|
||||
FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small_tiny);
|
||||
FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small_tiny);
|
||||
FORMAT_ATTR_HYBRID(frontend, hybrid_big);
|
||||
|
||||
#define ADL_HYBRID_RTM_FORMAT_ATTR \
|
||||
@ -6030,7 +6107,7 @@ static struct attribute *adl_hybrid_extra_attr[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small);
|
||||
FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small_tiny);
|
||||
|
||||
static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
|
||||
ADL_HYBRID_RTM_FORMAT_ATTR,
|
||||
@ -6238,8 +6315,9 @@ static inline int intel_pmu_v6_addr_offset(int index, bool eventsel)
|
||||
}
|
||||
|
||||
static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = {
|
||||
{ hybrid_small, "cpu_atom" },
|
||||
{ hybrid_big, "cpu_core" },
|
||||
{ hybrid_small, "cpu_atom" },
|
||||
{ hybrid_big, "cpu_core" },
|
||||
{ hybrid_tiny, "cpu_lowpower" },
|
||||
};
|
||||
|
||||
static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
|
||||
@ -6272,7 +6350,7 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
|
||||
0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
|
||||
|
||||
pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
|
||||
if (pmu->pmu_type & hybrid_small) {
|
||||
if (pmu->pmu_type & hybrid_small_tiny) {
|
||||
pmu->intel_cap.perf_metrics = 0;
|
||||
pmu->intel_cap.pebs_output_pt_available = 1;
|
||||
pmu->mid_ack = true;
|
||||
@ -7111,6 +7189,37 @@ __init int intel_pmu_init(void)
|
||||
name = "lunarlake_hybrid";
|
||||
break;
|
||||
|
||||
case INTEL_ARROWLAKE_H:
|
||||
intel_pmu_init_hybrid(hybrid_big_small_tiny);
|
||||
|
||||
x86_pmu.pebs_latency_data = arl_h_latency_data;
|
||||
x86_pmu.get_event_constraints = arl_h_get_event_constraints;
|
||||
x86_pmu.hw_config = arl_h_hw_config;
|
||||
|
||||
td_attr = arl_h_hybrid_events_attrs;
|
||||
mem_attr = arl_h_hybrid_mem_attrs;
|
||||
tsx_attr = adl_hybrid_tsx_attrs;
|
||||
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
|
||||
mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
|
||||
|
||||
/* Initialize big core specific PerfMon capabilities. */
|
||||
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
|
||||
intel_pmu_init_lnc(&pmu->pmu);
|
||||
|
||||
/* Initialize Atom core specific PerfMon capabilities. */
|
||||
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
|
||||
intel_pmu_init_skt(&pmu->pmu);
|
||||
|
||||
/* Initialize Lower Power Atom specific PerfMon capabilities. */
|
||||
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX];
|
||||
intel_pmu_init_grt(&pmu->pmu);
|
||||
pmu->extra_regs = intel_cmt_extra_regs;
|
||||
|
||||
intel_pmu_pebs_data_source_arl_h();
|
||||
pr_cont("ArrowLake-H Hybrid events, ");
|
||||
name = "arrowlake_h_hybrid";
|
||||
break;
|
||||
|
||||
default:
|
||||
switch (x86_pmu.version) {
|
||||
case 1:
|
||||
|
@ -177,6 +177,17 @@ void __init intel_pmu_pebs_data_source_mtl(void)
|
||||
__intel_pmu_pebs_data_source_cmt(data_source);
|
||||
}
|
||||
|
||||
void __init intel_pmu_pebs_data_source_arl_h(void)
|
||||
{
|
||||
u64 *data_source;
|
||||
|
||||
intel_pmu_pebs_data_source_lnl();
|
||||
|
||||
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX].pebs_data_source;
|
||||
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
|
||||
__intel_pmu_pebs_data_source_cmt(data_source);
|
||||
}
|
||||
|
||||
void __init intel_pmu_pebs_data_source_cmt(void)
|
||||
{
|
||||
__intel_pmu_pebs_data_source_cmt(pebs_data_source);
|
||||
@ -388,6 +399,16 @@ u64 lnl_latency_data(struct perf_event *event, u64 status)
|
||||
return lnc_latency_data(event, status);
|
||||
}
|
||||
|
||||
u64 arl_h_latency_data(struct perf_event *event, u64 status)
|
||||
{
|
||||
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
|
||||
|
||||
if (pmu->pmu_type == hybrid_tiny)
|
||||
return cmt_latency_data(event, status);
|
||||
|
||||
return lnl_latency_data(event, status);
|
||||
}
|
||||
|
||||
static u64 load_latency_data(struct perf_event *event, u64 status)
|
||||
{
|
||||
union intel_x86_pebs_dse dse;
|
||||
|
@ -668,24 +668,38 @@ enum {
|
||||
#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10
|
||||
#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
|
||||
|
||||
/*
|
||||
* CPUID.1AH.EAX[31:0] uniquely identifies the microarchitecture
|
||||
* of the core. Bits 31-24 indicates its core type (Core or Atom)
|
||||
* and Bits [23:0] indicates the native model ID of the core.
|
||||
* Core type and native model ID are defined in below enumerations.
|
||||
*/
|
||||
enum hybrid_cpu_type {
|
||||
HYBRID_INTEL_NONE,
|
||||
HYBRID_INTEL_ATOM = 0x20,
|
||||
HYBRID_INTEL_CORE = 0x40,
|
||||
};
|
||||
|
||||
enum hybrid_pmu_type {
|
||||
not_hybrid,
|
||||
hybrid_small = BIT(0),
|
||||
hybrid_big = BIT(1),
|
||||
|
||||
hybrid_big_small = hybrid_big | hybrid_small, /* only used for matching */
|
||||
};
|
||||
|
||||
#define X86_HYBRID_PMU_ATOM_IDX 0
|
||||
#define X86_HYBRID_PMU_CORE_IDX 1
|
||||
#define X86_HYBRID_PMU_TINY_IDX 2
|
||||
|
||||
#define X86_HYBRID_NUM_PMUS 2
|
||||
enum hybrid_pmu_type {
|
||||
not_hybrid,
|
||||
hybrid_small = BIT(X86_HYBRID_PMU_ATOM_IDX),
|
||||
hybrid_big = BIT(X86_HYBRID_PMU_CORE_IDX),
|
||||
hybrid_tiny = BIT(X86_HYBRID_PMU_TINY_IDX),
|
||||
|
||||
/* The belows are only used for matching */
|
||||
hybrid_big_small = hybrid_big | hybrid_small,
|
||||
hybrid_small_tiny = hybrid_small | hybrid_tiny,
|
||||
hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny,
|
||||
};
|
||||
|
||||
enum atom_native_id {
|
||||
cmt_native_id = 0x2, /* Crestmont */
|
||||
skt_native_id = 0x3, /* Skymont */
|
||||
};
|
||||
|
||||
struct x86_hybrid_pmu {
|
||||
struct pmu pmu;
|
||||
@ -1578,6 +1592,8 @@ u64 cmt_latency_data(struct perf_event *event, u64 status);
|
||||
|
||||
u64 lnl_latency_data(struct perf_event *event, u64 status);
|
||||
|
||||
u64 arl_h_latency_data(struct perf_event *event, u64 status);
|
||||
|
||||
extern struct event_constraint intel_core2_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_atom_pebs_event_constraints[];
|
||||
@ -1697,6 +1713,8 @@ void intel_pmu_pebs_data_source_grt(void);
|
||||
|
||||
void intel_pmu_pebs_data_source_mtl(void);
|
||||
|
||||
void intel_pmu_pebs_data_source_arl_h(void);
|
||||
|
||||
void intel_pmu_pebs_data_source_cmt(void);
|
||||
|
||||
void intel_pmu_pebs_data_source_lnl(void);
|
||||
|
@ -148,7 +148,6 @@ struct rapl_model {
|
||||
/* 1/2^hw_unit Joule */
|
||||
static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
|
||||
static struct rapl_pmus *rapl_pmus;
|
||||
static cpumask_t rapl_cpu_mask;
|
||||
static unsigned int rapl_cntr_mask;
|
||||
static u64 rapl_timer_ms;
|
||||
static struct perf_msr *rapl_msrs;
|
||||
@ -369,8 +368,6 @@ static int rapl_pmu_event_init(struct perf_event *event)
|
||||
if (event->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
|
||||
|
||||
if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
|
||||
return -EINVAL;
|
||||
|
||||
@ -389,7 +386,6 @@ static int rapl_pmu_event_init(struct perf_event *event)
|
||||
pmu = cpu_to_rapl_pmu(event->cpu);
|
||||
if (!pmu)
|
||||
return -EINVAL;
|
||||
event->cpu = pmu->cpu;
|
||||
event->pmu_private = pmu;
|
||||
event->hw.event_base = rapl_msrs[bit].msr;
|
||||
event->hw.config = cfg;
|
||||
@ -403,23 +399,6 @@ static void rapl_pmu_event_read(struct perf_event *event)
|
||||
rapl_event_update(event);
|
||||
}
|
||||
|
||||
static ssize_t rapl_get_attr_cpumask(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
|
||||
|
||||
static struct attribute *rapl_pmu_attrs[] = {
|
||||
&dev_attr_cpumask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group rapl_pmu_attr_group = {
|
||||
.attrs = rapl_pmu_attrs,
|
||||
};
|
||||
|
||||
RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
|
||||
RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
|
||||
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
|
||||
@ -467,7 +446,6 @@ static struct attribute_group rapl_pmu_format_group = {
|
||||
};
|
||||
|
||||
static const struct attribute_group *rapl_attr_groups[] = {
|
||||
&rapl_pmu_attr_group,
|
||||
&rapl_pmu_format_group,
|
||||
&rapl_pmu_events_group,
|
||||
NULL,
|
||||
@ -570,65 +548,6 @@ static struct perf_msr amd_rapl_msrs[] = {
|
||||
[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 },
|
||||
};
|
||||
|
||||
static int rapl_cpu_offline(unsigned int cpu)
|
||||
{
|
||||
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
|
||||
int target;
|
||||
|
||||
/* Check if exiting cpu is used for collecting rapl events */
|
||||
if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
|
||||
return 0;
|
||||
|
||||
pmu->cpu = -1;
|
||||
/* Find a new cpu to collect rapl events */
|
||||
target = cpumask_any_but(get_rapl_pmu_cpumask(cpu), cpu);
|
||||
|
||||
/* Migrate rapl events to the new target */
|
||||
if (target < nr_cpu_ids) {
|
||||
cpumask_set_cpu(target, &rapl_cpu_mask);
|
||||
pmu->cpu = target;
|
||||
perf_pmu_migrate_context(pmu->pmu, cpu, target);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rapl_cpu_online(unsigned int cpu)
|
||||
{
|
||||
s32 rapl_pmu_idx = get_rapl_pmu_idx(cpu);
|
||||
if (rapl_pmu_idx < 0) {
|
||||
pr_err("topology_logical_(package/die)_id() returned a negative value");
|
||||
return -EINVAL;
|
||||
}
|
||||
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
|
||||
int target;
|
||||
|
||||
if (!pmu) {
|
||||
pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
|
||||
if (!pmu)
|
||||
return -ENOMEM;
|
||||
|
||||
raw_spin_lock_init(&pmu->lock);
|
||||
INIT_LIST_HEAD(&pmu->active_list);
|
||||
pmu->pmu = &rapl_pmus->pmu;
|
||||
pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
|
||||
rapl_hrtimer_init(pmu);
|
||||
|
||||
rapl_pmus->pmus[rapl_pmu_idx] = pmu;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if there is an online cpu in the package which collects rapl
|
||||
* events already.
|
||||
*/
|
||||
target = cpumask_any_and(&rapl_cpu_mask, get_rapl_pmu_cpumask(cpu));
|
||||
if (target < nr_cpu_ids)
|
||||
return 0;
|
||||
|
||||
cpumask_set_cpu(cpu, &rapl_cpu_mask);
|
||||
pmu->cpu = cpu;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rapl_check_hw_unit(struct rapl_model *rm)
|
||||
{
|
||||
u64 msr_rapl_power_unit_bits;
|
||||
@ -707,12 +626,41 @@ static const struct attribute_group *rapl_attr_update[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static int __init init_rapl_pmu(void)
|
||||
{
|
||||
struct rapl_pmu *pmu;
|
||||
int idx;
|
||||
|
||||
for (idx = 0; idx < rapl_pmus->nr_rapl_pmu; idx++) {
|
||||
pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
|
||||
if (!pmu)
|
||||
goto free;
|
||||
|
||||
raw_spin_lock_init(&pmu->lock);
|
||||
INIT_LIST_HEAD(&pmu->active_list);
|
||||
pmu->pmu = &rapl_pmus->pmu;
|
||||
pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
|
||||
rapl_hrtimer_init(pmu);
|
||||
|
||||
rapl_pmus->pmus[idx] = pmu;
|
||||
}
|
||||
|
||||
return 0;
|
||||
free:
|
||||
for (; idx > 0; idx--)
|
||||
kfree(rapl_pmus->pmus[idx - 1]);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static int __init init_rapl_pmus(void)
|
||||
{
|
||||
int nr_rapl_pmu = topology_max_packages();
|
||||
int rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
|
||||
|
||||
if (!rapl_pmu_is_pkg_scope())
|
||||
if (!rapl_pmu_is_pkg_scope()) {
|
||||
nr_rapl_pmu *= topology_max_dies_per_package();
|
||||
rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
|
||||
}
|
||||
|
||||
rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL);
|
||||
if (!rapl_pmus)
|
||||
@ -728,9 +676,11 @@ static int __init init_rapl_pmus(void)
|
||||
rapl_pmus->pmu.start = rapl_pmu_event_start;
|
||||
rapl_pmus->pmu.stop = rapl_pmu_event_stop;
|
||||
rapl_pmus->pmu.read = rapl_pmu_event_read;
|
||||
rapl_pmus->pmu.scope = rapl_pmu_scope;
|
||||
rapl_pmus->pmu.module = THIS_MODULE;
|
||||
rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
|
||||
return 0;
|
||||
|
||||
return init_rapl_pmu();
|
||||
}
|
||||
|
||||
static struct rapl_model model_snb = {
|
||||
@ -876,24 +826,13 @@ static int __init rapl_pmu_init(void)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Install callbacks. Core will call them for each online cpu.
|
||||
*/
|
||||
ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
|
||||
"perf/x86/rapl:online",
|
||||
rapl_cpu_online, rapl_cpu_offline);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
|
||||
if (ret)
|
||||
goto out1;
|
||||
goto out;
|
||||
|
||||
rapl_advertise();
|
||||
return 0;
|
||||
|
||||
out1:
|
||||
cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
|
||||
out:
|
||||
pr_warn("Initialization failed (%d), disabled\n", ret);
|
||||
cleanup_rapl_pmus();
|
||||
@ -903,7 +842,6 @@ module_init(rapl_pmu_init);
|
||||
|
||||
static void __exit intel_rapl_exit(void)
|
||||
{
|
||||
cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
|
||||
perf_pmu_unregister(&rapl_pmus->pmu);
|
||||
cleanup_rapl_pmus();
|
||||
}
|
||||
|
@ -32,6 +32,7 @@ extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
|
||||
extern bool handle_guest_split_lock(unsigned long ip);
|
||||
extern void handle_bus_lock(struct pt_regs *regs);
|
||||
u8 get_this_hybrid_cpu_type(void);
|
||||
u32 get_this_hybrid_cpu_native_id(void);
|
||||
#else
|
||||
static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
|
||||
static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
|
||||
@ -50,6 +51,11 @@ static inline u8 get_this_hybrid_cpu_type(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline u32 get_this_hybrid_cpu_native_id(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
#ifdef CONFIG_IA32_FEAT_CTL
|
||||
void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
|
||||
|
@ -1299,3 +1299,18 @@ u8 get_this_hybrid_cpu_type(void)
|
||||
|
||||
return cpuid_eax(0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT;
|
||||
}
|
||||
|
||||
/**
|
||||
* get_this_hybrid_cpu_native_id() - Get the native id of this hybrid CPU
|
||||
*
|
||||
* Returns the uarch native ID [23:0] of a CPU in a hybrid processor.
|
||||
* If the processor is not hybrid, returns 0.
|
||||
*/
|
||||
u32 get_this_hybrid_cpu_native_id(void)
|
||||
{
|
||||
if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
|
||||
return 0;
|
||||
|
||||
return cpuid_eax(0x0000001a) &
|
||||
(BIT_ULL(X86_HYBRID_CPU_TYPE_ID_SHIFT) - 1);
|
||||
}
|
||||
|
@ -208,7 +208,6 @@ enum cpuhp_state {
|
||||
CPUHP_AP_PERF_X86_UNCORE_ONLINE,
|
||||
CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
|
||||
CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
|
||||
CPUHP_AP_PERF_X86_RAPL_ONLINE,
|
||||
CPUHP_AP_PERF_S390_CF_ONLINE,
|
||||
CPUHP_AP_PERF_S390_SF_ONLINE,
|
||||
CPUHP_AP_PERF_ARM_CCI_ONLINE,
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/timer.h>
|
||||
|
||||
struct uprobe;
|
||||
struct vm_area_struct;
|
||||
@ -23,8 +24,17 @@ struct inode;
|
||||
struct notifier_block;
|
||||
struct page;
|
||||
|
||||
/*
|
||||
* Allowed return values from uprobe consumer's handler callback
|
||||
* with following meaning:
|
||||
*
|
||||
* UPROBE_HANDLER_REMOVE
|
||||
* - Remove the uprobe breakpoint from current->mm.
|
||||
* UPROBE_HANDLER_IGNORE
|
||||
* - Ignore ret_handler callback for this consumer.
|
||||
*/
|
||||
#define UPROBE_HANDLER_REMOVE 1
|
||||
#define UPROBE_HANDLER_MASK 1
|
||||
#define UPROBE_HANDLER_IGNORE 2
|
||||
|
||||
#define MAX_URETPROBE_DEPTH 64
|
||||
|
||||
@ -37,13 +47,15 @@ struct uprobe_consumer {
|
||||
* for the current process. If filter() is omitted or returns true,
|
||||
* UPROBE_HANDLER_REMOVE is effectively ignored.
|
||||
*/
|
||||
int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
|
||||
int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data);
|
||||
int (*ret_handler)(struct uprobe_consumer *self,
|
||||
unsigned long func,
|
||||
struct pt_regs *regs);
|
||||
struct pt_regs *regs, __u64 *data);
|
||||
bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
|
||||
|
||||
struct list_head cons_node;
|
||||
|
||||
__u64 id; /* set when uprobe_consumer is registered */
|
||||
};
|
||||
|
||||
#ifdef CONFIG_UPROBES
|
||||
@ -56,6 +68,53 @@ enum uprobe_task_state {
|
||||
UTASK_SSTEP_TRAPPED,
|
||||
};
|
||||
|
||||
/* The state of hybrid-lifetime uprobe inside struct return_instance */
|
||||
enum hprobe_state {
|
||||
HPROBE_LEASED, /* uretprobes_srcu-protected uprobe */
|
||||
HPROBE_STABLE, /* refcounted uprobe */
|
||||
HPROBE_GONE, /* NULL uprobe, SRCU expired, refcount failed */
|
||||
HPROBE_CONSUMED, /* uprobe "consumed" by uretprobe handler */
|
||||
};
|
||||
|
||||
/*
|
||||
* Hybrid lifetime uprobe. Represents a uprobe instance that could be either
|
||||
* SRCU protected (with SRCU protection eventually potentially timing out),
|
||||
* refcounted using uprobe->ref, or there could be no valid uprobe (NULL).
|
||||
*
|
||||
* hprobe's internal state is setup such that background timer thread can
|
||||
* atomically "downgrade" temporarily RCU-protected uprobe into refcounted one
|
||||
* (or no uprobe, if refcounting failed).
|
||||
*
|
||||
* *stable* pointer always point to the uprobe (or could be NULL if there is
|
||||
* was no valid underlying uprobe to begin with).
|
||||
*
|
||||
* *leased* pointer is the key to achieving race-free atomic lifetime state
|
||||
* transition and can have three possible states:
|
||||
* - either the same non-NULL value as *stable*, in which case uprobe is
|
||||
* SRCU-protected;
|
||||
* - NULL, in which case uprobe (if there is any) is refcounted;
|
||||
* - special __UPROBE_DEAD value, which represents an uprobe that was SRCU
|
||||
* protected initially, but SRCU period timed out and we attempted to
|
||||
* convert it to refcounted, but refcount_inc_not_zero() failed, because
|
||||
* uprobe effectively went away (the last consumer unsubscribed). In this
|
||||
* case it's important to know that *stable* pointer (which still has
|
||||
* non-NULL uprobe pointer) shouldn't be used, because lifetime of
|
||||
* underlying uprobe is not guaranteed anymore. __UPROBE_DEAD is just an
|
||||
* internal marker and is handled transparently by hprobe_fetch() helper.
|
||||
*
|
||||
* When uprobe is SRCU-protected, we also record srcu_idx value, necessary for
|
||||
* SRCU unlocking.
|
||||
*
|
||||
* See hprobe_expire() and hprobe_fetch() for details of race-free uprobe
|
||||
* state transitioning details. It all hinges on atomic xchg() over *leaded*
|
||||
* pointer. *stable* pointer, once initially set, is not modified concurrently.
|
||||
*/
|
||||
struct hprobe {
|
||||
enum hprobe_state state;
|
||||
int srcu_idx;
|
||||
struct uprobe *uprobe;
|
||||
};
|
||||
|
||||
/*
|
||||
* uprobe_task: Metadata of a task while it singlesteps.
|
||||
*/
|
||||
@ -75,6 +134,7 @@ struct uprobe_task {
|
||||
};
|
||||
|
||||
struct uprobe *active_uprobe;
|
||||
struct timer_list ri_timer;
|
||||
unsigned long xol_vaddr;
|
||||
|
||||
struct arch_uprobe *auprobe;
|
||||
@ -83,15 +143,24 @@ struct uprobe_task {
|
||||
unsigned int depth;
|
||||
};
|
||||
|
||||
struct return_consumer {
|
||||
__u64 cookie;
|
||||
__u64 id;
|
||||
};
|
||||
|
||||
struct return_instance {
|
||||
struct uprobe *uprobe;
|
||||
struct hprobe hprobe;
|
||||
unsigned long func;
|
||||
unsigned long stack; /* stack pointer */
|
||||
unsigned long orig_ret_vaddr; /* original return address */
|
||||
bool chained; /* true, if instance is nested */
|
||||
int consumers_cnt;
|
||||
|
||||
struct return_instance *next; /* keep as stack */
|
||||
};
|
||||
struct rcu_head rcu;
|
||||
|
||||
struct return_consumer consumers[] __counted_by(consumers_cnt);
|
||||
} ____cacheline_aligned;
|
||||
|
||||
enum rp_check {
|
||||
RP_CHECK_CALL,
|
||||
|
@ -26,6 +26,9 @@
|
||||
#include <linux/task_work.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
#include <linux/khugepaged.h>
|
||||
#include <linux/rcupdate_trace.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/srcu.h>
|
||||
|
||||
#include <linux/uprobes.h>
|
||||
|
||||
@ -42,8 +45,6 @@ static struct rb_root uprobes_tree = RB_ROOT;
|
||||
static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */
|
||||
static seqcount_rwlock_t uprobes_seqcount = SEQCNT_RWLOCK_ZERO(uprobes_seqcount, &uprobes_treelock);
|
||||
|
||||
DEFINE_STATIC_SRCU(uprobes_srcu);
|
||||
|
||||
#define UPROBES_HASH_SZ 13
|
||||
/* serialize uprobe->pending_list */
|
||||
static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
|
||||
@ -51,6 +52,9 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
|
||||
|
||||
DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem);
|
||||
|
||||
/* Covers return_instance's uprobe lifetime. */
|
||||
DEFINE_STATIC_SRCU(uretprobes_srcu);
|
||||
|
||||
/* Have a copy of original instruction */
|
||||
#define UPROBE_COPY_INSN 0
|
||||
|
||||
@ -62,10 +66,13 @@ struct uprobe {
|
||||
struct list_head pending_list;
|
||||
struct list_head consumers;
|
||||
struct inode *inode; /* Also hold a ref to inode */
|
||||
struct rcu_head rcu;
|
||||
union {
|
||||
struct rcu_head rcu;
|
||||
struct work_struct work;
|
||||
};
|
||||
loff_t offset;
|
||||
loff_t ref_ctr_offset;
|
||||
unsigned long flags;
|
||||
unsigned long flags; /* "unsigned long" so bitops work */
|
||||
|
||||
/*
|
||||
* The generic code assumes that it has two members of unknown type
|
||||
@ -100,7 +107,6 @@ static LIST_HEAD(delayed_uprobe_list);
|
||||
*/
|
||||
struct xol_area {
|
||||
wait_queue_head_t wq; /* if all slots are busy */
|
||||
atomic_t slot_count; /* number of in-use slots */
|
||||
unsigned long *bitmap; /* 0 = free slot */
|
||||
|
||||
struct page *page;
|
||||
@ -620,17 +626,23 @@ static inline bool uprobe_is_active(struct uprobe *uprobe)
|
||||
return !RB_EMPTY_NODE(&uprobe->rb_node);
|
||||
}
|
||||
|
||||
static void uprobe_free_rcu(struct rcu_head *rcu)
|
||||
static void uprobe_free_rcu_tasks_trace(struct rcu_head *rcu)
|
||||
{
|
||||
struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu);
|
||||
|
||||
kfree(uprobe);
|
||||
}
|
||||
|
||||
static void put_uprobe(struct uprobe *uprobe)
|
||||
static void uprobe_free_srcu(struct rcu_head *rcu)
|
||||
{
|
||||
if (!refcount_dec_and_test(&uprobe->ref))
|
||||
return;
|
||||
struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu);
|
||||
|
||||
call_rcu_tasks_trace(&uprobe->rcu, uprobe_free_rcu_tasks_trace);
|
||||
}
|
||||
|
||||
static void uprobe_free_deferred(struct work_struct *work)
|
||||
{
|
||||
struct uprobe *uprobe = container_of(work, struct uprobe, work);
|
||||
|
||||
write_lock(&uprobes_treelock);
|
||||
|
||||
@ -651,7 +663,162 @@ static void put_uprobe(struct uprobe *uprobe)
|
||||
delayed_uprobe_remove(uprobe, NULL);
|
||||
mutex_unlock(&delayed_uprobe_lock);
|
||||
|
||||
call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_rcu);
|
||||
/* start srcu -> rcu_tasks_trace -> kfree chain */
|
||||
call_srcu(&uretprobes_srcu, &uprobe->rcu, uprobe_free_srcu);
|
||||
}
|
||||
|
||||
static void put_uprobe(struct uprobe *uprobe)
|
||||
{
|
||||
if (!refcount_dec_and_test(&uprobe->ref))
|
||||
return;
|
||||
|
||||
INIT_WORK(&uprobe->work, uprobe_free_deferred);
|
||||
schedule_work(&uprobe->work);
|
||||
}
|
||||
|
||||
/* Initialize hprobe as SRCU-protected "leased" uprobe */
|
||||
static void hprobe_init_leased(struct hprobe *hprobe, struct uprobe *uprobe, int srcu_idx)
|
||||
{
|
||||
WARN_ON(!uprobe);
|
||||
hprobe->state = HPROBE_LEASED;
|
||||
hprobe->uprobe = uprobe;
|
||||
hprobe->srcu_idx = srcu_idx;
|
||||
}
|
||||
|
||||
/* Initialize hprobe as refcounted ("stable") uprobe (uprobe can be NULL). */
|
||||
static void hprobe_init_stable(struct hprobe *hprobe, struct uprobe *uprobe)
|
||||
{
|
||||
hprobe->state = uprobe ? HPROBE_STABLE : HPROBE_GONE;
|
||||
hprobe->uprobe = uprobe;
|
||||
hprobe->srcu_idx = -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* hprobe_consume() fetches hprobe's underlying uprobe and detects whether
|
||||
* uprobe is SRCU protected or is refcounted. hprobe_consume() can be
|
||||
* used only once for a given hprobe.
|
||||
*
|
||||
* Caller has to call hprobe_finalize() and pass previous hprobe_state, so
|
||||
* that hprobe_finalize() can perform SRCU unlock or put uprobe, whichever
|
||||
* is appropriate.
|
||||
*/
|
||||
static inline struct uprobe *hprobe_consume(struct hprobe *hprobe, enum hprobe_state *hstate)
|
||||
{
|
||||
*hstate = xchg(&hprobe->state, HPROBE_CONSUMED);
|
||||
switch (*hstate) {
|
||||
case HPROBE_LEASED:
|
||||
case HPROBE_STABLE:
|
||||
return hprobe->uprobe;
|
||||
case HPROBE_GONE: /* uprobe is NULL, no SRCU */
|
||||
case HPROBE_CONSUMED: /* uprobe was finalized already, do nothing */
|
||||
return NULL;
|
||||
default:
|
||||
WARN(1, "hprobe invalid state %d", *hstate);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset hprobe state and, if hprobe was LEASED, release SRCU lock.
|
||||
* hprobe_finalize() can only be used from current context after
|
||||
* hprobe_consume() call (which determines uprobe and hstate value).
|
||||
*/
|
||||
static void hprobe_finalize(struct hprobe *hprobe, enum hprobe_state hstate)
|
||||
{
|
||||
switch (hstate) {
|
||||
case HPROBE_LEASED:
|
||||
__srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx);
|
||||
break;
|
||||
case HPROBE_STABLE:
|
||||
put_uprobe(hprobe->uprobe);
|
||||
break;
|
||||
case HPROBE_GONE:
|
||||
case HPROBE_CONSUMED:
|
||||
break;
|
||||
default:
|
||||
WARN(1, "hprobe invalid state %d", hstate);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to switch (atomically) uprobe from being SRCU protected (LEASED)
|
||||
* to refcounted (STABLE) state. Competes with hprobe_consume(); only one of
|
||||
* them can win the race to perform SRCU unlocking. Whoever wins must perform
|
||||
* SRCU unlock.
|
||||
*
|
||||
* Returns underlying valid uprobe or NULL, if there was no underlying uprobe
|
||||
* to begin with or we failed to bump its refcount and it's going away.
|
||||
*
|
||||
* Returned non-NULL uprobe can be still safely used within an ongoing SRCU
|
||||
* locked region. If `get` is true, it's guaranteed that non-NULL uprobe has
|
||||
* an extra refcount for caller to assume and use. Otherwise, it's not
|
||||
* guaranteed that returned uprobe has a positive refcount, so caller has to
|
||||
* attempt try_get_uprobe(), if it needs to preserve uprobe beyond current
|
||||
* SRCU lock region. See dup_utask().
|
||||
*/
|
||||
static struct uprobe *hprobe_expire(struct hprobe *hprobe, bool get)
|
||||
{
|
||||
enum hprobe_state hstate;
|
||||
|
||||
/*
|
||||
* return_instance's hprobe is protected by RCU.
|
||||
* Underlying uprobe is itself protected from reuse by SRCU.
|
||||
*/
|
||||
lockdep_assert(rcu_read_lock_held() && srcu_read_lock_held(&uretprobes_srcu));
|
||||
|
||||
hstate = READ_ONCE(hprobe->state);
|
||||
switch (hstate) {
|
||||
case HPROBE_STABLE:
|
||||
/* uprobe has positive refcount, bump refcount, if necessary */
|
||||
return get ? get_uprobe(hprobe->uprobe) : hprobe->uprobe;
|
||||
case HPROBE_GONE:
|
||||
/*
|
||||
* SRCU was unlocked earlier and we didn't manage to take
|
||||
* uprobe refcnt, so it's effectively NULL
|
||||
*/
|
||||
return NULL;
|
||||
case HPROBE_CONSUMED:
|
||||
/*
|
||||
* uprobe was consumed, so it's effectively NULL as far as
|
||||
* uretprobe processing logic is concerned
|
||||
*/
|
||||
return NULL;
|
||||
case HPROBE_LEASED: {
|
||||
struct uprobe *uprobe = try_get_uprobe(hprobe->uprobe);
|
||||
/*
|
||||
* Try to switch hprobe state, guarding against
|
||||
* hprobe_consume() or another hprobe_expire() racing with us.
|
||||
* Note, if we failed to get uprobe refcount, we use special
|
||||
* HPROBE_GONE state to signal that hprobe->uprobe shouldn't
|
||||
* be used as it will be freed after SRCU is unlocked.
|
||||
*/
|
||||
if (try_cmpxchg(&hprobe->state, &hstate, uprobe ? HPROBE_STABLE : HPROBE_GONE)) {
|
||||
/* We won the race, we are the ones to unlock SRCU */
|
||||
__srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx);
|
||||
return get ? get_uprobe(uprobe) : uprobe;
|
||||
}
|
||||
|
||||
/*
|
||||
* We lost the race, undo refcount bump (if it ever happened),
|
||||
* unless caller would like an extra refcount anyways.
|
||||
*/
|
||||
if (uprobe && !get)
|
||||
put_uprobe(uprobe);
|
||||
/*
|
||||
* Even if hprobe_consume() or another hprobe_expire() wins
|
||||
* the state update race and unlocks SRCU from under us, we
|
||||
* still have a guarantee that underyling uprobe won't be
|
||||
* freed due to ongoing caller's SRCU lock region, so we can
|
||||
* return it regardless. Also, if `get` was true, we also have
|
||||
* an extra ref for the caller to own. This is used in dup_utask().
|
||||
*/
|
||||
return uprobe;
|
||||
}
|
||||
default:
|
||||
WARN(1, "unknown hprobe state %d", hstate);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
@ -706,7 +873,7 @@ static struct uprobe *find_uprobe_rcu(struct inode *inode, loff_t offset)
|
||||
struct rb_node *node;
|
||||
unsigned int seq;
|
||||
|
||||
lockdep_assert(srcu_read_lock_held(&uprobes_srcu));
|
||||
lockdep_assert(rcu_read_lock_trace_held());
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&uprobes_seqcount);
|
||||
@ -825,8 +992,11 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
|
||||
|
||||
static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
|
||||
{
|
||||
static atomic64_t id;
|
||||
|
||||
down_write(&uprobe->consumer_rwsem);
|
||||
list_add_rcu(&uc->cons_node, &uprobe->consumers);
|
||||
uc->id = (__u64) atomic64_inc_return(&id);
|
||||
up_write(&uprobe->consumer_rwsem);
|
||||
}
|
||||
|
||||
@ -934,8 +1104,7 @@ static bool filter_chain(struct uprobe *uprobe, struct mm_struct *mm)
|
||||
bool ret = false;
|
||||
|
||||
down_read(&uprobe->consumer_rwsem);
|
||||
list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
|
||||
srcu_read_lock_held(&uprobes_srcu)) {
|
||||
list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
|
||||
ret = consumer_filter(uc, mm);
|
||||
if (ret)
|
||||
break;
|
||||
@ -1156,7 +1325,8 @@ void uprobe_unregister_sync(void)
|
||||
* unlucky enough caller can free consumer's memory and cause
|
||||
* handler_chain() or handle_uretprobe_chain() to do an use-after-free.
|
||||
*/
|
||||
synchronize_srcu(&uprobes_srcu);
|
||||
synchronize_rcu_tasks_trace();
|
||||
synchronize_srcu(&uretprobes_srcu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(uprobe_unregister_sync);
|
||||
|
||||
@ -1240,19 +1410,18 @@ EXPORT_SYMBOL_GPL(uprobe_register);
|
||||
int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool add)
|
||||
{
|
||||
struct uprobe_consumer *con;
|
||||
int ret = -ENOENT, srcu_idx;
|
||||
int ret = -ENOENT;
|
||||
|
||||
down_write(&uprobe->register_rwsem);
|
||||
|
||||
srcu_idx = srcu_read_lock(&uprobes_srcu);
|
||||
list_for_each_entry_srcu(con, &uprobe->consumers, cons_node,
|
||||
srcu_read_lock_held(&uprobes_srcu)) {
|
||||
rcu_read_lock_trace();
|
||||
list_for_each_entry_rcu(con, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
|
||||
if (con == uc) {
|
||||
ret = register_for_each_vma(uprobe, add ? uc : NULL);
|
||||
break;
|
||||
}
|
||||
}
|
||||
srcu_read_unlock(&uprobes_srcu, srcu_idx);
|
||||
rcu_read_unlock_trace();
|
||||
|
||||
up_write(&uprobe->register_rwsem);
|
||||
|
||||
@ -1475,9 +1644,15 @@ static vm_fault_t xol_fault(const struct vm_special_mapping *sm,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xol_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
|
||||
{
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
static const struct vm_special_mapping xol_mapping = {
|
||||
.name = "[uprobes]",
|
||||
.fault = xol_fault,
|
||||
.mremap = xol_mremap,
|
||||
};
|
||||
|
||||
/* Slot allocation for XOL */
|
||||
@ -1553,7 +1728,6 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
|
||||
init_waitqueue_head(&area->wq);
|
||||
/* Reserve the 1st slot for get_trampoline_vaddr() */
|
||||
set_bit(0, area->bitmap);
|
||||
atomic_set(&area->slot_count, 1);
|
||||
insns = arch_uprobe_trampoline(&insns_size);
|
||||
arch_uprobe_copy_ixol(area->page, 0, insns, insns_size);
|
||||
|
||||
@ -1626,92 +1800,57 @@ void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* - search for a free slot.
|
||||
*/
|
||||
static unsigned long xol_take_insn_slot(struct xol_area *area)
|
||||
static unsigned long xol_get_slot_nr(struct xol_area *area)
|
||||
{
|
||||
unsigned long slot_addr;
|
||||
int slot_nr;
|
||||
unsigned long slot_nr;
|
||||
|
||||
do {
|
||||
slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE);
|
||||
if (slot_nr < UINSNS_PER_PAGE) {
|
||||
if (!test_and_set_bit(slot_nr, area->bitmap))
|
||||
break;
|
||||
slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE);
|
||||
if (slot_nr < UINSNS_PER_PAGE) {
|
||||
if (!test_and_set_bit(slot_nr, area->bitmap))
|
||||
return slot_nr;
|
||||
}
|
||||
|
||||
slot_nr = UINSNS_PER_PAGE;
|
||||
continue;
|
||||
}
|
||||
wait_event(area->wq, (atomic_read(&area->slot_count) < UINSNS_PER_PAGE));
|
||||
} while (slot_nr >= UINSNS_PER_PAGE);
|
||||
|
||||
slot_addr = area->vaddr + (slot_nr * UPROBE_XOL_SLOT_BYTES);
|
||||
atomic_inc(&area->slot_count);
|
||||
|
||||
return slot_addr;
|
||||
return UINSNS_PER_PAGE;
|
||||
}
|
||||
|
||||
/*
|
||||
* xol_get_insn_slot - allocate a slot for xol.
|
||||
* Returns the allocated slot address or 0.
|
||||
*/
|
||||
static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
|
||||
static bool xol_get_insn_slot(struct uprobe *uprobe, struct uprobe_task *utask)
|
||||
{
|
||||
struct xol_area *area;
|
||||
unsigned long xol_vaddr;
|
||||
struct xol_area *area = get_xol_area();
|
||||
unsigned long slot_nr;
|
||||
|
||||
area = get_xol_area();
|
||||
if (!area)
|
||||
return 0;
|
||||
return false;
|
||||
|
||||
xol_vaddr = xol_take_insn_slot(area);
|
||||
if (unlikely(!xol_vaddr))
|
||||
return 0;
|
||||
wait_event(area->wq, (slot_nr = xol_get_slot_nr(area)) < UINSNS_PER_PAGE);
|
||||
|
||||
arch_uprobe_copy_ixol(area->page, xol_vaddr,
|
||||
utask->xol_vaddr = area->vaddr + slot_nr * UPROBE_XOL_SLOT_BYTES;
|
||||
arch_uprobe_copy_ixol(area->page, utask->xol_vaddr,
|
||||
&uprobe->arch.ixol, sizeof(uprobe->arch.ixol));
|
||||
|
||||
return xol_vaddr;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* xol_free_insn_slot - If slot was earlier allocated by
|
||||
* @xol_get_insn_slot(), make the slot available for
|
||||
* subsequent requests.
|
||||
* xol_free_insn_slot - free the slot allocated by xol_get_insn_slot()
|
||||
*/
|
||||
static void xol_free_insn_slot(struct task_struct *tsk)
|
||||
static void xol_free_insn_slot(struct uprobe_task *utask)
|
||||
{
|
||||
struct xol_area *area;
|
||||
unsigned long vma_end;
|
||||
unsigned long slot_addr;
|
||||
struct xol_area *area = current->mm->uprobes_state.xol_area;
|
||||
unsigned long offset = utask->xol_vaddr - area->vaddr;
|
||||
unsigned int slot_nr;
|
||||
|
||||
if (!tsk->mm || !tsk->mm->uprobes_state.xol_area || !tsk->utask)
|
||||
utask->xol_vaddr = 0;
|
||||
/* xol_vaddr must fit into [area->vaddr, area->vaddr + PAGE_SIZE) */
|
||||
if (WARN_ON_ONCE(offset >= PAGE_SIZE))
|
||||
return;
|
||||
|
||||
slot_addr = tsk->utask->xol_vaddr;
|
||||
if (unlikely(!slot_addr))
|
||||
return;
|
||||
|
||||
area = tsk->mm->uprobes_state.xol_area;
|
||||
vma_end = area->vaddr + PAGE_SIZE;
|
||||
if (area->vaddr <= slot_addr && slot_addr < vma_end) {
|
||||
unsigned long offset;
|
||||
int slot_nr;
|
||||
|
||||
offset = slot_addr - area->vaddr;
|
||||
slot_nr = offset / UPROBE_XOL_SLOT_BYTES;
|
||||
if (slot_nr >= UINSNS_PER_PAGE)
|
||||
return;
|
||||
|
||||
clear_bit(slot_nr, area->bitmap);
|
||||
atomic_dec(&area->slot_count);
|
||||
smp_mb__after_atomic(); /* pairs with prepare_to_wait() */
|
||||
if (waitqueue_active(&area->wq))
|
||||
wake_up(&area->wq);
|
||||
|
||||
tsk->utask->xol_vaddr = 0;
|
||||
}
|
||||
slot_nr = offset / UPROBE_XOL_SLOT_BYTES;
|
||||
clear_bit(slot_nr, area->bitmap);
|
||||
smp_mb__after_atomic(); /* pairs with prepare_to_wait() */
|
||||
if (waitqueue_active(&area->wq))
|
||||
wake_up(&area->wq);
|
||||
}
|
||||
|
||||
void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
|
||||
@ -1750,11 +1889,18 @@ unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
|
||||
return instruction_pointer(regs);
|
||||
}
|
||||
|
||||
static struct return_instance *free_ret_instance(struct return_instance *ri)
|
||||
static struct return_instance *free_ret_instance(struct return_instance *ri, bool cleanup_hprobe)
|
||||
{
|
||||
struct return_instance *next = ri->next;
|
||||
put_uprobe(ri->uprobe);
|
||||
kfree(ri);
|
||||
|
||||
if (cleanup_hprobe) {
|
||||
enum hprobe_state hstate;
|
||||
|
||||
(void)hprobe_consume(&ri->hprobe, &hstate);
|
||||
hprobe_finalize(&ri->hprobe, hstate);
|
||||
}
|
||||
|
||||
kfree_rcu(ri, rcu);
|
||||
return next;
|
||||
}
|
||||
|
||||
@ -1770,18 +1916,50 @@ void uprobe_free_utask(struct task_struct *t)
|
||||
if (!utask)
|
||||
return;
|
||||
|
||||
if (utask->active_uprobe)
|
||||
put_uprobe(utask->active_uprobe);
|
||||
WARN_ON_ONCE(utask->active_uprobe || utask->xol_vaddr);
|
||||
|
||||
timer_delete_sync(&utask->ri_timer);
|
||||
|
||||
ri = utask->return_instances;
|
||||
while (ri)
|
||||
ri = free_ret_instance(ri);
|
||||
ri = free_ret_instance(ri, true /* cleanup_hprobe */);
|
||||
|
||||
xol_free_insn_slot(t);
|
||||
kfree(utask);
|
||||
t->utask = NULL;
|
||||
}
|
||||
|
||||
#define RI_TIMER_PERIOD (HZ / 10) /* 100 ms */
|
||||
|
||||
#define for_each_ret_instance_rcu(pos, head) \
|
||||
for (pos = rcu_dereference_raw(head); pos; pos = rcu_dereference_raw(pos->next))
|
||||
|
||||
static void ri_timer(struct timer_list *timer)
|
||||
{
|
||||
struct uprobe_task *utask = container_of(timer, struct uprobe_task, ri_timer);
|
||||
struct return_instance *ri;
|
||||
|
||||
/* SRCU protects uprobe from reuse for the cmpxchg() inside hprobe_expire(). */
|
||||
guard(srcu)(&uretprobes_srcu);
|
||||
/* RCU protects return_instance from freeing. */
|
||||
guard(rcu)();
|
||||
|
||||
for_each_ret_instance_rcu(ri, utask->return_instances)
|
||||
hprobe_expire(&ri->hprobe, false);
|
||||
}
|
||||
|
||||
static struct uprobe_task *alloc_utask(void)
|
||||
{
|
||||
struct uprobe_task *utask;
|
||||
|
||||
utask = kzalloc(sizeof(*utask), GFP_KERNEL);
|
||||
if (!utask)
|
||||
return NULL;
|
||||
|
||||
timer_setup(&utask->ri_timer, ri_timer, 0);
|
||||
|
||||
return utask;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a uprobe_task object for the task if necessary.
|
||||
* Called when the thread hits a breakpoint.
|
||||
@ -1793,38 +1971,73 @@ void uprobe_free_utask(struct task_struct *t)
|
||||
static struct uprobe_task *get_utask(void)
|
||||
{
|
||||
if (!current->utask)
|
||||
current->utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL);
|
||||
current->utask = alloc_utask();
|
||||
return current->utask;
|
||||
}
|
||||
|
||||
static size_t ri_size(int consumers_cnt)
|
||||
{
|
||||
struct return_instance *ri;
|
||||
|
||||
return sizeof(*ri) + sizeof(ri->consumers[0]) * consumers_cnt;
|
||||
}
|
||||
|
||||
#define DEF_CNT 4
|
||||
|
||||
static struct return_instance *alloc_return_instance(void)
|
||||
{
|
||||
struct return_instance *ri;
|
||||
|
||||
ri = kzalloc(ri_size(DEF_CNT), GFP_KERNEL);
|
||||
if (!ri)
|
||||
return ZERO_SIZE_PTR;
|
||||
|
||||
ri->consumers_cnt = DEF_CNT;
|
||||
return ri;
|
||||
}
|
||||
|
||||
static struct return_instance *dup_return_instance(struct return_instance *old)
|
||||
{
|
||||
size_t size = ri_size(old->consumers_cnt);
|
||||
|
||||
return kmemdup(old, size, GFP_KERNEL);
|
||||
}
|
||||
|
||||
static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
|
||||
{
|
||||
struct uprobe_task *n_utask;
|
||||
struct return_instance **p, *o, *n;
|
||||
struct uprobe *uprobe;
|
||||
|
||||
n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL);
|
||||
n_utask = alloc_utask();
|
||||
if (!n_utask)
|
||||
return -ENOMEM;
|
||||
t->utask = n_utask;
|
||||
|
||||
/* protect uprobes from freeing, we'll need try_get_uprobe() them */
|
||||
guard(srcu)(&uretprobes_srcu);
|
||||
|
||||
p = &n_utask->return_instances;
|
||||
for (o = o_utask->return_instances; o; o = o->next) {
|
||||
n = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
|
||||
n = dup_return_instance(o);
|
||||
if (!n)
|
||||
return -ENOMEM;
|
||||
|
||||
*n = *o;
|
||||
/*
|
||||
* uprobe's refcnt has to be positive at this point, kept by
|
||||
* utask->return_instances items; return_instances can't be
|
||||
* removed right now, as task is blocked due to duping; so
|
||||
* get_uprobe() is safe to use here.
|
||||
*/
|
||||
get_uprobe(n->uprobe);
|
||||
n->next = NULL;
|
||||
/* if uprobe is non-NULL, we'll have an extra refcount for uprobe */
|
||||
uprobe = hprobe_expire(&o->hprobe, true);
|
||||
|
||||
*p = n;
|
||||
/*
|
||||
* New utask will have stable properly refcounted uprobe or
|
||||
* NULL. Even if we failed to get refcounted uprobe, we still
|
||||
* need to preserve full set of return_instances for proper
|
||||
* uretprobe handling and nesting in forked task.
|
||||
*/
|
||||
hprobe_init_stable(&n->hprobe, uprobe);
|
||||
|
||||
n->next = NULL;
|
||||
rcu_assign_pointer(*p, n);
|
||||
p = &n->next;
|
||||
|
||||
n_utask->depth++;
|
||||
}
|
||||
|
||||
@ -1900,45 +2113,34 @@ static void cleanup_return_instances(struct uprobe_task *utask, bool chained,
|
||||
enum rp_check ctx = chained ? RP_CHECK_CHAIN_CALL : RP_CHECK_CALL;
|
||||
|
||||
while (ri && !arch_uretprobe_is_alive(ri, ctx, regs)) {
|
||||
ri = free_ret_instance(ri);
|
||||
ri = free_ret_instance(ri, true /* cleanup_hprobe */);
|
||||
utask->depth--;
|
||||
}
|
||||
utask->return_instances = ri;
|
||||
rcu_assign_pointer(utask->return_instances, ri);
|
||||
}
|
||||
|
||||
static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs,
|
||||
struct return_instance *ri)
|
||||
{
|
||||
struct return_instance *ri;
|
||||
struct uprobe_task *utask;
|
||||
struct uprobe_task *utask = current->utask;
|
||||
unsigned long orig_ret_vaddr, trampoline_vaddr;
|
||||
bool chained;
|
||||
int srcu_idx;
|
||||
|
||||
if (!get_xol_area())
|
||||
return;
|
||||
|
||||
utask = get_utask();
|
||||
if (!utask)
|
||||
return;
|
||||
goto free;
|
||||
|
||||
if (utask->depth >= MAX_URETPROBE_DEPTH) {
|
||||
printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to"
|
||||
" nestedness limit pid/tgid=%d/%d\n",
|
||||
current->pid, current->tgid);
|
||||
return;
|
||||
goto free;
|
||||
}
|
||||
|
||||
/* we need to bump refcount to store uprobe in utask */
|
||||
if (!try_get_uprobe(uprobe))
|
||||
return;
|
||||
|
||||
ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
|
||||
if (!ri)
|
||||
goto fail;
|
||||
|
||||
trampoline_vaddr = uprobe_get_trampoline_vaddr();
|
||||
orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
|
||||
if (orig_ret_vaddr == -1)
|
||||
goto fail;
|
||||
goto free;
|
||||
|
||||
/* drop the entries invalidated by longjmp() */
|
||||
chained = (orig_ret_vaddr == trampoline_vaddr);
|
||||
@ -1956,53 +2158,51 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
* attack from user-space.
|
||||
*/
|
||||
uprobe_warn(current, "handle tail call");
|
||||
goto fail;
|
||||
goto free;
|
||||
}
|
||||
orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
|
||||
}
|
||||
ri->uprobe = uprobe;
|
||||
|
||||
/* __srcu_read_lock() because SRCU lock survives switch to user space */
|
||||
srcu_idx = __srcu_read_lock(&uretprobes_srcu);
|
||||
|
||||
ri->func = instruction_pointer(regs);
|
||||
ri->stack = user_stack_pointer(regs);
|
||||
ri->orig_ret_vaddr = orig_ret_vaddr;
|
||||
ri->chained = chained;
|
||||
|
||||
utask->depth++;
|
||||
|
||||
hprobe_init_leased(&ri->hprobe, uprobe, srcu_idx);
|
||||
ri->next = utask->return_instances;
|
||||
utask->return_instances = ri;
|
||||
rcu_assign_pointer(utask->return_instances, ri);
|
||||
|
||||
mod_timer(&utask->ri_timer, jiffies + RI_TIMER_PERIOD);
|
||||
|
||||
return;
|
||||
fail:
|
||||
free:
|
||||
kfree(ri);
|
||||
put_uprobe(uprobe);
|
||||
}
|
||||
|
||||
/* Prepare to single-step probed instruction out of line. */
|
||||
static int
|
||||
pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
|
||||
{
|
||||
struct uprobe_task *utask;
|
||||
unsigned long xol_vaddr;
|
||||
struct uprobe_task *utask = current->utask;
|
||||
int err;
|
||||
|
||||
utask = get_utask();
|
||||
if (!utask)
|
||||
return -ENOMEM;
|
||||
|
||||
if (!try_get_uprobe(uprobe))
|
||||
return -EINVAL;
|
||||
|
||||
xol_vaddr = xol_get_insn_slot(uprobe);
|
||||
if (!xol_vaddr) {
|
||||
if (!xol_get_insn_slot(uprobe, utask)) {
|
||||
err = -ENOMEM;
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
utask->xol_vaddr = xol_vaddr;
|
||||
utask->vaddr = bp_vaddr;
|
||||
|
||||
err = arch_uprobe_pre_xol(&uprobe->arch, regs);
|
||||
if (unlikely(err)) {
|
||||
xol_free_insn_slot(current);
|
||||
xol_free_insn_slot(utask);
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
@ -2125,35 +2325,90 @@ static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swb
|
||||
return uprobe;
|
||||
}
|
||||
|
||||
static struct return_instance*
|
||||
push_consumer(struct return_instance *ri, int idx, __u64 id, __u64 cookie)
|
||||
{
|
||||
if (unlikely(ri == ZERO_SIZE_PTR))
|
||||
return ri;
|
||||
|
||||
if (unlikely(idx >= ri->consumers_cnt)) {
|
||||
struct return_instance *old_ri = ri;
|
||||
|
||||
ri->consumers_cnt += DEF_CNT;
|
||||
ri = krealloc(old_ri, ri_size(old_ri->consumers_cnt), GFP_KERNEL);
|
||||
if (!ri) {
|
||||
kfree(old_ri);
|
||||
return ZERO_SIZE_PTR;
|
||||
}
|
||||
}
|
||||
|
||||
ri->consumers[idx].id = id;
|
||||
ri->consumers[idx].cookie = cookie;
|
||||
return ri;
|
||||
}
|
||||
|
||||
static struct return_consumer *
|
||||
return_consumer_find(struct return_instance *ri, int *iter, int id)
|
||||
{
|
||||
struct return_consumer *ric;
|
||||
int idx = *iter;
|
||||
|
||||
for (ric = &ri->consumers[idx]; idx < ri->consumers_cnt; idx++, ric++) {
|
||||
if (ric->id == id) {
|
||||
*iter = idx + 1;
|
||||
return ric;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static bool ignore_ret_handler(int rc)
|
||||
{
|
||||
return rc == UPROBE_HANDLER_REMOVE || rc == UPROBE_HANDLER_IGNORE;
|
||||
}
|
||||
|
||||
static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe_consumer *uc;
|
||||
int remove = UPROBE_HANDLER_REMOVE;
|
||||
bool need_prep = false; /* prepare return uprobe, when needed */
|
||||
bool has_consumers = false;
|
||||
bool has_consumers = false, remove = true;
|
||||
struct return_instance *ri = NULL;
|
||||
int push_idx = 0;
|
||||
|
||||
current->utask->auprobe = &uprobe->arch;
|
||||
|
||||
list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
|
||||
srcu_read_lock_held(&uprobes_srcu)) {
|
||||
list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
|
||||
bool session = uc->handler && uc->ret_handler;
|
||||
__u64 cookie = 0;
|
||||
int rc = 0;
|
||||
|
||||
if (uc->handler) {
|
||||
rc = uc->handler(uc, regs);
|
||||
WARN(rc & ~UPROBE_HANDLER_MASK,
|
||||
rc = uc->handler(uc, regs, &cookie);
|
||||
WARN(rc < 0 || rc > 2,
|
||||
"bad rc=0x%x from %ps()\n", rc, uc->handler);
|
||||
}
|
||||
|
||||
if (uc->ret_handler)
|
||||
need_prep = true;
|
||||
|
||||
remove &= rc;
|
||||
remove &= rc == UPROBE_HANDLER_REMOVE;
|
||||
has_consumers = true;
|
||||
|
||||
if (!uc->ret_handler || ignore_ret_handler(rc))
|
||||
continue;
|
||||
|
||||
if (!ri)
|
||||
ri = alloc_return_instance();
|
||||
|
||||
if (session)
|
||||
ri = push_consumer(ri, push_idx++, uc->id, cookie);
|
||||
}
|
||||
current->utask->auprobe = NULL;
|
||||
|
||||
if (need_prep && !remove)
|
||||
prepare_uretprobe(uprobe, regs); /* put bp at return */
|
||||
if (!ZERO_OR_NULL_PTR(ri)) {
|
||||
/*
|
||||
* The push_idx value has the final number of return consumers,
|
||||
* and ri->consumers_cnt has number of allocated consumers.
|
||||
*/
|
||||
ri->consumers_cnt = push_idx;
|
||||
prepare_uretprobe(uprobe, regs, ri);
|
||||
}
|
||||
|
||||
if (remove && has_consumers) {
|
||||
down_read(&uprobe->register_rwsem);
|
||||
@ -2169,19 +2424,27 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
}
|
||||
|
||||
static void
|
||||
handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
|
||||
handle_uretprobe_chain(struct return_instance *ri, struct uprobe *uprobe, struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe *uprobe = ri->uprobe;
|
||||
struct return_consumer *ric;
|
||||
struct uprobe_consumer *uc;
|
||||
int srcu_idx;
|
||||
int ric_idx = 0;
|
||||
|
||||
srcu_idx = srcu_read_lock(&uprobes_srcu);
|
||||
list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
|
||||
srcu_read_lock_held(&uprobes_srcu)) {
|
||||
if (uc->ret_handler)
|
||||
uc->ret_handler(uc, ri->func, regs);
|
||||
/* all consumers unsubscribed meanwhile */
|
||||
if (unlikely(!uprobe))
|
||||
return;
|
||||
|
||||
rcu_read_lock_trace();
|
||||
list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
|
||||
bool session = uc->handler && uc->ret_handler;
|
||||
|
||||
if (uc->ret_handler) {
|
||||
ric = return_consumer_find(ri, &ric_idx, uc->id);
|
||||
if (!session || ric)
|
||||
uc->ret_handler(uc, ri->func, regs, ric ? &ric->cookie : NULL);
|
||||
}
|
||||
}
|
||||
srcu_read_unlock(&uprobes_srcu, srcu_idx);
|
||||
rcu_read_unlock_trace();
|
||||
}
|
||||
|
||||
static struct return_instance *find_next_ret_chain(struct return_instance *ri)
|
||||
@ -2200,6 +2463,8 @@ void uprobe_handle_trampoline(struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe_task *utask;
|
||||
struct return_instance *ri, *next;
|
||||
struct uprobe *uprobe;
|
||||
enum hprobe_state hstate;
|
||||
bool valid;
|
||||
|
||||
utask = current->utask;
|
||||
@ -2230,21 +2495,24 @@ void uprobe_handle_trampoline(struct pt_regs *regs)
|
||||
* trampoline addresses on the stack are replaced with correct
|
||||
* original return addresses
|
||||
*/
|
||||
utask->return_instances = ri->next;
|
||||
rcu_assign_pointer(utask->return_instances, ri->next);
|
||||
|
||||
uprobe = hprobe_consume(&ri->hprobe, &hstate);
|
||||
if (valid)
|
||||
handle_uretprobe_chain(ri, regs);
|
||||
ri = free_ret_instance(ri);
|
||||
handle_uretprobe_chain(ri, uprobe, regs);
|
||||
hprobe_finalize(&ri->hprobe, hstate);
|
||||
|
||||
/* We already took care of hprobe, no need to waste more time on that. */
|
||||
ri = free_ret_instance(ri, false /* !cleanup_hprobe */);
|
||||
utask->depth--;
|
||||
} while (ri != next);
|
||||
} while (!valid);
|
||||
|
||||
utask->return_instances = ri;
|
||||
return;
|
||||
|
||||
sigill:
|
||||
sigill:
|
||||
uprobe_warn(current, "handle uretprobe, sending SIGILL.");
|
||||
force_sig(SIGILL);
|
||||
|
||||
}
|
||||
|
||||
bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs)
|
||||
@ -2266,13 +2534,13 @@ static void handle_swbp(struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe *uprobe;
|
||||
unsigned long bp_vaddr;
|
||||
int is_swbp, srcu_idx;
|
||||
int is_swbp;
|
||||
|
||||
bp_vaddr = uprobe_get_swbp_addr(regs);
|
||||
if (bp_vaddr == uprobe_get_trampoline_vaddr())
|
||||
return uprobe_handle_trampoline(regs);
|
||||
|
||||
srcu_idx = srcu_read_lock(&uprobes_srcu);
|
||||
rcu_read_lock_trace();
|
||||
|
||||
uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp);
|
||||
if (!uprobe) {
|
||||
@ -2330,7 +2598,7 @@ static void handle_swbp(struct pt_regs *regs)
|
||||
|
||||
out:
|
||||
/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
|
||||
srcu_read_unlock(&uprobes_srcu, srcu_idx);
|
||||
rcu_read_unlock_trace();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2353,7 +2621,7 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
|
||||
put_uprobe(uprobe);
|
||||
utask->active_uprobe = NULL;
|
||||
utask->state = UTASK_RUNNING;
|
||||
xol_free_insn_slot(current);
|
||||
xol_free_insn_slot(utask);
|
||||
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
recalc_sigpending(); /* see uprobe_deny_signal() */
|
||||
|
@ -3264,7 +3264,8 @@ uprobe_multi_link_filter(struct uprobe_consumer *con, struct mm_struct *mm)
|
||||
}
|
||||
|
||||
static int
|
||||
uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs)
|
||||
uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs,
|
||||
__u64 *data)
|
||||
{
|
||||
struct bpf_uprobe *uprobe;
|
||||
|
||||
@ -3273,7 +3274,8 @@ uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs)
|
||||
}
|
||||
|
||||
static int
|
||||
uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs)
|
||||
uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs,
|
||||
__u64 *data)
|
||||
{
|
||||
struct bpf_uprobe *uprobe;
|
||||
|
||||
|
@ -89,9 +89,11 @@ static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev)
|
||||
static int register_uprobe_event(struct trace_uprobe *tu);
|
||||
static int unregister_uprobe_event(struct trace_uprobe *tu);
|
||||
|
||||
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
|
||||
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs,
|
||||
__u64 *data);
|
||||
static int uretprobe_dispatcher(struct uprobe_consumer *con,
|
||||
unsigned long func, struct pt_regs *regs);
|
||||
unsigned long func, struct pt_regs *regs,
|
||||
__u64 *data);
|
||||
|
||||
#ifdef CONFIG_STACK_GROWSUP
|
||||
static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
|
||||
@ -1522,7 +1524,8 @@ trace_uprobe_register(struct trace_event_call *event, enum trace_reg type,
|
||||
}
|
||||
}
|
||||
|
||||
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
|
||||
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs,
|
||||
__u64 *data)
|
||||
{
|
||||
struct trace_uprobe *tu;
|
||||
struct uprobe_dispatch_data udd;
|
||||
@ -1553,7 +1556,8 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
|
||||
}
|
||||
|
||||
static int uretprobe_dispatcher(struct uprobe_consumer *con,
|
||||
unsigned long func, struct pt_regs *regs)
|
||||
unsigned long func, struct pt_regs *regs,
|
||||
__u64 *data)
|
||||
{
|
||||
struct trace_uprobe *tu;
|
||||
struct uprobe_dispatch_data udd;
|
||||
|
@ -463,7 +463,7 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
|
||||
|
||||
static int
|
||||
uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func,
|
||||
struct pt_regs *regs)
|
||||
struct pt_regs *regs, __u64 *data)
|
||||
|
||||
{
|
||||
regs->ax = 0x12345678deadbeef;
|
||||
|
Loading…
Reference in New Issue
Block a user