mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-14 17:53:39 +00:00
Merge branch into tip/master: 'perf/core'
# New commits in perf/core: b709eb872e19 ("perf: map pages in advance") 6d642735cdb6 ("perf/x86/intel/uncore: Support more units on Granite Rapids") 3f710be02ea6 ("perf/x86/intel/uncore: Clean up func_id") 0e45818ec189 ("perf/x86/intel: Support RDPMC metrics clear mode") 02c56362a7d3 ("uprobes: Guard against kmemdup() failing in dup_return_instance()") d29e744c7167 ("perf/x86: Relax privilege filter restriction on AMD IBS") 6057b90ecc84 ("perf/core: Export perf_exclude_event()") 8622e45b5da1 ("uprobes: Reuse return_instances between multiple uretprobes within task") 0cf981de7687 ("uprobes: Ensure return_instance is detached from the list before freeing") 636666a1c733 ("uprobes: Decouple return_instance list traversal and freeing") 2ff913ab3f47 ("uprobes: Simplify session consumer tracking") e0925f2dc4de ("uprobes: add speculative lockless VMA-to-inode-to-uprobe resolution") 83e3dc9a5d4d ("uprobes: simplify find_active_uprobe_rcu() VMA checks") 03a001b156d2 ("mm: introduce mmap_lock_speculate_{try_begin|retry}") eb449bd96954 ("mm: convert mm_lock_seq to a proper seqcount") 7528585290a1 ("mm/gup: Use raw_seqcount_try_begin()") 96450ead1652 ("seqlock: add raw_seqcount_try_begin") b4943b8bfc41 ("perf/x86/rapl: Add core energy counter support for AMD CPUs") 54d2759778c1 ("perf/x86/rapl: Move the cntr_mask to rapl_pmus struct") bdc57ec70548 ("perf/x86/rapl: Remove the global variable rapl_msrs") abf03d9bd20c ("perf/x86/rapl: Modify the generic variable names to *_pkg*") eeca4c6b2529 ("perf/x86/rapl: Add arguments to the init and cleanup functions") cd29d83a6d81 ("perf/x86/rapl: Make rapl_model struct global") 8bf1c86e5ac8 ("perf/x86/rapl: Rename rapl_pmu variables") 1d5e2f637a94 ("perf/x86/rapl: Remove the cpu_to_rapl_pmu() function") e4b444347795 ("x86/topology: Introduce topology_logical_core_id()") 2f2db347071a ("perf/x86/rapl: Remove the unused get_rapl_pmu_cpumask() function") ae55e308bde2 ("perf/x86/intel/ds: Simplify the PEBS records processing for adaptive PEBS") 3c00ed344cef ("perf/x86/intel/ds: Factor out functions for PEBS records processing") 7087bfb0adc9 ("perf/x86/intel/ds: Clarify adaptive PEBS processing") faac6f105ef1 ("perf/core: Check sample_type in perf_sample_save_brstack") f226805bc5f6 ("perf/core: Check sample_type in perf_sample_save_callchain") b9c44b91476b ("perf/core: Save raw sample data conditionally based on sample type") Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
40c78513bc
@ -135,6 +135,10 @@ Thread-related topology information in the kernel:
|
||||
The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo
|
||||
"core_id."
|
||||
|
||||
- topology_logical_core_id();
|
||||
|
||||
The logical core ID to which a thread belongs.
|
||||
|
||||
|
||||
|
||||
System topology examples
|
||||
|
@ -981,7 +981,7 @@ static int cfdiag_push_sample(struct perf_event *event,
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
|
||||
raw.frag.size = cpuhw->usedss;
|
||||
raw.frag.data = cpuhw->stop;
|
||||
perf_sample_save_raw_data(&data, &raw);
|
||||
perf_sample_save_raw_data(&data, event, &raw);
|
||||
}
|
||||
|
||||
overflow = perf_event_overflow(event, &data, ®s);
|
||||
|
@ -981,7 +981,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
|
||||
cpuhw->flags &= ~PMU_F_ENABLED;
|
||||
}
|
||||
|
||||
/* perf_exclude_event() - Filter event
|
||||
/* perf_event_exclude() - Filter event
|
||||
* @event: The perf event
|
||||
* @regs: pt_regs structure
|
||||
* @sde_regs: Sample-data-entry (sde) regs structure
|
||||
@ -990,7 +990,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
|
||||
*
|
||||
* Return non-zero if the event shall be excluded.
|
||||
*/
|
||||
static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
|
||||
static int perf_event_exclude(struct perf_event *event, struct pt_regs *regs,
|
||||
struct perf_sf_sde_regs *sde_regs)
|
||||
{
|
||||
if (event->attr.exclude_user && user_mode(regs))
|
||||
@ -1073,7 +1073,7 @@ static int perf_push_sample(struct perf_event *event,
|
||||
data.tid_entry.pid = basic->hpp & LPP_PID_MASK;
|
||||
|
||||
overflow = 0;
|
||||
if (perf_exclude_event(event, ®s, sde_regs))
|
||||
if (perf_event_exclude(event, ®s, sde_regs))
|
||||
goto out;
|
||||
if (perf_event_overflow(event, &data, ®s)) {
|
||||
overflow = 1;
|
||||
|
@ -478,7 +478,7 @@ static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump,
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
|
||||
raw.frag.size = rawsize;
|
||||
raw.frag.data = cpump->save;
|
||||
perf_sample_save_raw_data(&data, &raw);
|
||||
perf_sample_save_raw_data(&data, event, &raw);
|
||||
}
|
||||
|
||||
overflow = perf_event_overflow(event, &data, ®s);
|
||||
|
@ -503,7 +503,7 @@ static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump,
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
|
||||
raw.frag.size = rawsize;
|
||||
raw.frag.data = cpump->save;
|
||||
perf_sample_save_raw_data(&data, &raw);
|
||||
perf_sample_save_raw_data(&data, event, &raw);
|
||||
}
|
||||
|
||||
overflow = perf_event_overflow(event, &data, ®s);
|
||||
|
@ -1001,8 +1001,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
|
||||
if (!x86_perf_event_set_period(event))
|
||||
continue;
|
||||
|
||||
if (has_branch_stack(event))
|
||||
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
|
||||
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
|
||||
|
||||
if (perf_event_overflow(event, &data, regs))
|
||||
x86_pmu_stop(event, 0);
|
||||
|
@ -31,6 +31,8 @@ static u32 ibs_caps;
|
||||
#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
|
||||
#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
|
||||
|
||||
/* attr.config2 */
|
||||
#define IBS_SW_FILTER_MASK 1
|
||||
|
||||
/*
|
||||
* IBS states:
|
||||
@ -290,6 +292,16 @@ static int perf_ibs_init(struct perf_event *event)
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* handle exclude_{user,kernel} in the IRQ handler */
|
||||
if (event->attr.exclude_host || event->attr.exclude_guest ||
|
||||
event->attr.exclude_idle)
|
||||
return -EINVAL;
|
||||
|
||||
if (!(event->attr.config2 & IBS_SW_FILTER_MASK) &&
|
||||
(event->attr.exclude_kernel || event->attr.exclude_user ||
|
||||
event->attr.exclude_hv))
|
||||
return -EINVAL;
|
||||
|
||||
ret = validate_group(event);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -550,24 +562,14 @@ static struct attribute *attrs_empty[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group empty_format_group = {
|
||||
.name = "format",
|
||||
.attrs = attrs_empty,
|
||||
};
|
||||
|
||||
static struct attribute_group empty_caps_group = {
|
||||
.name = "caps",
|
||||
.attrs = attrs_empty,
|
||||
};
|
||||
|
||||
static const struct attribute_group *empty_attr_groups[] = {
|
||||
&empty_format_group,
|
||||
&empty_caps_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
PMU_FORMAT_ATTR(rand_en, "config:57");
|
||||
PMU_FORMAT_ATTR(cnt_ctl, "config:19");
|
||||
PMU_FORMAT_ATTR(swfilt, "config2:0");
|
||||
PMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59");
|
||||
PMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16");
|
||||
PMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1");
|
||||
@ -578,8 +580,9 @@ zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int
|
||||
return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0;
|
||||
}
|
||||
|
||||
static struct attribute *rand_en_attrs[] = {
|
||||
static struct attribute *fetch_attrs[] = {
|
||||
&format_attr_rand_en.attr,
|
||||
&format_attr_swfilt.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@ -593,9 +596,9 @@ static struct attribute *zen4_ibs_extensions_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group group_rand_en = {
|
||||
static struct attribute_group group_fetch_formats = {
|
||||
.name = "format",
|
||||
.attrs = rand_en_attrs,
|
||||
.attrs = fetch_attrs,
|
||||
};
|
||||
|
||||
static struct attribute_group group_fetch_l3missonly = {
|
||||
@ -611,7 +614,7 @@ static struct attribute_group group_zen4_ibs_extensions = {
|
||||
};
|
||||
|
||||
static const struct attribute_group *fetch_attr_groups[] = {
|
||||
&group_rand_en,
|
||||
&group_fetch_formats,
|
||||
&empty_caps_group,
|
||||
NULL,
|
||||
};
|
||||
@ -628,6 +631,11 @@ cnt_ctl_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||
return ibs_caps & IBS_CAPS_OPCNT ? attr->mode : 0;
|
||||
}
|
||||
|
||||
static struct attribute *op_attrs[] = {
|
||||
&format_attr_swfilt.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *cnt_ctl_attrs[] = {
|
||||
&format_attr_cnt_ctl.attr,
|
||||
NULL,
|
||||
@ -638,6 +646,11 @@ static struct attribute *op_l3missonly_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group group_op_formats = {
|
||||
.name = "format",
|
||||
.attrs = op_attrs,
|
||||
};
|
||||
|
||||
static struct attribute_group group_cnt_ctl = {
|
||||
.name = "format",
|
||||
.attrs = cnt_ctl_attrs,
|
||||
@ -650,6 +663,12 @@ static struct attribute_group group_op_l3missonly = {
|
||||
.is_visible = zen4_ibs_extensions_is_visible,
|
||||
};
|
||||
|
||||
static const struct attribute_group *op_attr_groups[] = {
|
||||
&group_op_formats,
|
||||
&empty_caps_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group *op_attr_update[] = {
|
||||
&group_cnt_ctl,
|
||||
&group_op_l3missonly,
|
||||
@ -667,7 +686,6 @@ static struct perf_ibs perf_ibs_fetch = {
|
||||
.start = perf_ibs_start,
|
||||
.stop = perf_ibs_stop,
|
||||
.read = perf_ibs_read,
|
||||
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
|
||||
},
|
||||
.msr = MSR_AMD64_IBSFETCHCTL,
|
||||
.config_mask = IBS_FETCH_CONFIG_MASK,
|
||||
@ -691,7 +709,6 @@ static struct perf_ibs perf_ibs_op = {
|
||||
.start = perf_ibs_start,
|
||||
.stop = perf_ibs_stop,
|
||||
.read = perf_ibs_read,
|
||||
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
|
||||
},
|
||||
.msr = MSR_AMD64_IBSOPCTL,
|
||||
.config_mask = IBS_OP_CONFIG_MASK,
|
||||
@ -1111,6 +1128,12 @@ fail:
|
||||
regs.flags |= PERF_EFLAGS_EXACT;
|
||||
}
|
||||
|
||||
if ((event->attr.config2 & IBS_SW_FILTER_MASK) &&
|
||||
perf_exclude_event(event, ®s)) {
|
||||
throttle = perf_event_account_interrupt(event);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
|
||||
raw = (struct perf_raw_record){
|
||||
.frag = {
|
||||
@ -1118,7 +1141,7 @@ fail:
|
||||
.data = ibs_data.data,
|
||||
},
|
||||
};
|
||||
perf_sample_save_raw_data(&data, &raw);
|
||||
perf_sample_save_raw_data(&data, event, &raw);
|
||||
}
|
||||
|
||||
if (perf_ibs == &perf_ibs_op)
|
||||
@ -1129,8 +1152,7 @@ fail:
|
||||
* recorded as part of interrupt regs. Thus we need to use rip from
|
||||
* interrupt regs while unwinding call stack.
|
||||
*/
|
||||
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
|
||||
perf_sample_save_callchain(&data, event, iregs);
|
||||
perf_sample_save_callchain(&data, event, iregs);
|
||||
|
||||
throttle = perf_event_overflow(event, &data, ®s);
|
||||
out:
|
||||
@ -1228,7 +1250,7 @@ static __init int perf_ibs_op_init(void)
|
||||
if (ibs_caps & IBS_CAPS_ZEN4)
|
||||
perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY;
|
||||
|
||||
perf_ibs_op.pmu.attr_groups = empty_attr_groups;
|
||||
perf_ibs_op.pmu.attr_groups = op_attr_groups;
|
||||
perf_ibs_op.pmu.attr_update = op_attr_update;
|
||||
|
||||
return perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
|
||||
|
@ -1707,8 +1707,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
|
||||
|
||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
||||
|
||||
if (has_branch_stack(event))
|
||||
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
|
||||
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
|
||||
|
||||
if (perf_event_overflow(event, &data, regs))
|
||||
x86_pmu_stop(event, 0);
|
||||
|
@ -2826,6 +2826,9 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
|
||||
return;
|
||||
|
||||
idx = INTEL_PMC_IDX_FIXED_SLOTS;
|
||||
|
||||
if (event->attr.config1 & INTEL_TD_CFG_METRIC_CLEAR)
|
||||
bits |= INTEL_FIXED_3_METRICS_CLEAR;
|
||||
}
|
||||
|
||||
intel_set_masks(event, idx);
|
||||
@ -4081,7 +4084,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||
* is used in a metrics group, it too cannot support sampling.
|
||||
*/
|
||||
if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && is_topdown_event(event)) {
|
||||
if (event->attr.config1 || event->attr.config2)
|
||||
/* The metrics_clear can only be set for the slots event */
|
||||
if (event->attr.config1 &&
|
||||
(!is_slots_event(event) || (event->attr.config1 & ~INTEL_TD_CFG_METRIC_CLEAR)))
|
||||
return -EINVAL;
|
||||
|
||||
if (event->attr.config2)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
@ -4690,6 +4698,8 @@ PMU_FORMAT_ATTR(in_tx, "config:32" );
|
||||
PMU_FORMAT_ATTR(in_tx_cp, "config:33" );
|
||||
PMU_FORMAT_ATTR(eq, "config:36" ); /* v6 + */
|
||||
|
||||
PMU_FORMAT_ATTR(metrics_clear, "config1:0"); /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */
|
||||
|
||||
static ssize_t umask2_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *page)
|
||||
@ -4709,6 +4719,7 @@ static struct device_attribute format_attr_umask2 =
|
||||
static struct attribute *format_evtsel_ext_attrs[] = {
|
||||
&format_attr_umask2.attr,
|
||||
&format_attr_eq.attr,
|
||||
&format_attr_metrics_clear.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
@ -4733,6 +4744,13 @@ evtsel_ext_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||
if (i == 1)
|
||||
return (mask & ARCH_PERFMON_EVENTSEL_EQ) ? attr->mode : 0;
|
||||
|
||||
/* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */
|
||||
if (i == 2) {
|
||||
union perf_capabilities intel_cap = hybrid(dev_get_drvdata(dev), intel_cap);
|
||||
|
||||
return intel_cap.rdpmc_metrics_clear ? attr->mode : 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1789,8 +1789,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
|
||||
* previous PMI context or an (I)RET happened between the record and
|
||||
* PMI.
|
||||
*/
|
||||
if (sample_type & PERF_SAMPLE_CALLCHAIN)
|
||||
perf_sample_save_callchain(data, event, iregs);
|
||||
perf_sample_save_callchain(data, event, iregs);
|
||||
|
||||
/*
|
||||
* We use the interrupt regs as a base because the PEBS record does not
|
||||
@ -1889,8 +1888,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
|
||||
if (x86_pmu.intel_cap.pebs_format >= 3)
|
||||
setup_pebs_time(event, data, pebs->tsc);
|
||||
|
||||
if (has_branch_stack(event))
|
||||
perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
|
||||
perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
|
||||
}
|
||||
|
||||
static void adaptive_pebs_save_regs(struct pt_regs *regs,
|
||||
@ -1917,8 +1915,6 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs,
|
||||
}
|
||||
|
||||
#define PEBS_LATENCY_MASK 0xffff
|
||||
#define PEBS_CACHE_LATENCY_OFFSET 32
|
||||
#define PEBS_RETIRE_LATENCY_OFFSET 32
|
||||
|
||||
/*
|
||||
* With adaptive PEBS the layout depends on what fields are configured.
|
||||
@ -1932,8 +1928,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct pebs_basic *basic = __pebs;
|
||||
void *next_record = basic + 1;
|
||||
u64 sample_type;
|
||||
u64 format_size;
|
||||
u64 sample_type, format_group;
|
||||
struct pebs_meminfo *meminfo = NULL;
|
||||
struct pebs_gprs *gprs = NULL;
|
||||
struct x86_perf_regs *perf_regs;
|
||||
@ -1945,7 +1940,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
perf_regs->xmm_regs = NULL;
|
||||
|
||||
sample_type = event->attr.sample_type;
|
||||
format_size = basic->format_size;
|
||||
format_group = basic->format_group;
|
||||
perf_sample_data_init(data, 0, event->hw.last_period);
|
||||
data->period = event->hw.last_period;
|
||||
|
||||
@ -1957,8 +1952,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
* previous PMI context or an (I)RET happened between the record and
|
||||
* PMI.
|
||||
*/
|
||||
if (sample_type & PERF_SAMPLE_CALLCHAIN)
|
||||
perf_sample_save_callchain(data, event, iregs);
|
||||
perf_sample_save_callchain(data, event, iregs);
|
||||
|
||||
*regs = *iregs;
|
||||
/* The ip in basic is EventingIP */
|
||||
@ -1967,7 +1961,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
|
||||
if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)
|
||||
data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK;
|
||||
data->weight.var3_w = basic->retire_latency;
|
||||
else
|
||||
data->weight.var3_w = 0;
|
||||
}
|
||||
@ -1977,12 +1971,12 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
* But PERF_SAMPLE_TRANSACTION needs gprs->ax.
|
||||
* Save the pointer here but process later.
|
||||
*/
|
||||
if (format_size & PEBS_DATACFG_MEMINFO) {
|
||||
if (format_group & PEBS_DATACFG_MEMINFO) {
|
||||
meminfo = next_record;
|
||||
next_record = meminfo + 1;
|
||||
}
|
||||
|
||||
if (format_size & PEBS_DATACFG_GP) {
|
||||
if (format_group & PEBS_DATACFG_GP) {
|
||||
gprs = next_record;
|
||||
next_record = gprs + 1;
|
||||
|
||||
@ -1995,14 +1989,13 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
adaptive_pebs_save_regs(regs, gprs);
|
||||
}
|
||||
|
||||
if (format_size & PEBS_DATACFG_MEMINFO) {
|
||||
if (format_group & PEBS_DATACFG_MEMINFO) {
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
|
||||
u64 weight = meminfo->latency;
|
||||
u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
|
||||
meminfo->cache_latency : meminfo->mem_latency;
|
||||
|
||||
if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) {
|
||||
data->weight.var2_w = weight & PEBS_LATENCY_MASK;
|
||||
weight >>= PEBS_CACHE_LATENCY_OFFSET;
|
||||
}
|
||||
if (x86_pmu.flags & PMU_FL_INSTR_LATENCY)
|
||||
data->weight.var2_w = meminfo->instr_latency;
|
||||
|
||||
/*
|
||||
* Although meminfo::latency is defined as a u64,
|
||||
@ -2010,12 +2003,13 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
* in practice on Ice Lake and earlier platforms.
|
||||
*/
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT) {
|
||||
data->weight.full = weight ?:
|
||||
data->weight.full = latency ?:
|
||||
intel_get_tsx_weight(meminfo->tsx_tuning);
|
||||
} else {
|
||||
data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
|
||||
data->weight.var1_dw = (u32)latency ?:
|
||||
intel_get_tsx_weight(meminfo->tsx_tuning);
|
||||
}
|
||||
|
||||
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
|
||||
}
|
||||
|
||||
@ -2036,16 +2030,16 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
}
|
||||
}
|
||||
|
||||
if (format_size & PEBS_DATACFG_XMMS) {
|
||||
if (format_group & PEBS_DATACFG_XMMS) {
|
||||
struct pebs_xmm *xmm = next_record;
|
||||
|
||||
next_record = xmm + 1;
|
||||
perf_regs->xmm_regs = xmm->xmm;
|
||||
}
|
||||
|
||||
if (format_size & PEBS_DATACFG_LBRS) {
|
||||
if (format_group & PEBS_DATACFG_LBRS) {
|
||||
struct lbr_entry *lbr = next_record;
|
||||
int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
|
||||
int num_lbr = ((format_group >> PEBS_DATACFG_LBR_SHIFT)
|
||||
& 0xff) + 1;
|
||||
next_record = next_record + num_lbr * sizeof(struct lbr_entry);
|
||||
|
||||
@ -2055,11 +2049,11 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
}
|
||||
}
|
||||
|
||||
WARN_ONCE(next_record != __pebs + (format_size >> 48),
|
||||
"PEBS record size %llu, expected %llu, config %llx\n",
|
||||
format_size >> 48,
|
||||
WARN_ONCE(next_record != __pebs + basic->format_size,
|
||||
"PEBS record size %u, expected %llu, config %llx\n",
|
||||
basic->format_size,
|
||||
(u64)(next_record - __pebs),
|
||||
basic->format_size);
|
||||
format_group);
|
||||
}
|
||||
|
||||
static inline void *
|
||||
@ -2170,46 +2164,33 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef void (*setup_fn)(struct perf_event *, struct pt_regs *, void *,
|
||||
struct perf_sample_data *, struct pt_regs *);
|
||||
|
||||
static struct pt_regs dummy_iregs;
|
||||
|
||||
static __always_inline void
|
||||
__intel_pmu_pebs_event(struct perf_event *event,
|
||||
struct pt_regs *iregs,
|
||||
struct pt_regs *regs,
|
||||
struct perf_sample_data *data,
|
||||
void *base, void *top,
|
||||
int bit, int count,
|
||||
void (*setup_sample)(struct perf_event *,
|
||||
struct pt_regs *,
|
||||
void *,
|
||||
struct perf_sample_data *,
|
||||
struct pt_regs *))
|
||||
void *at,
|
||||
setup_fn setup_sample)
|
||||
{
|
||||
setup_sample(event, iregs, at, data, regs);
|
||||
perf_event_output(event, data, regs);
|
||||
}
|
||||
|
||||
static __always_inline void
|
||||
__intel_pmu_pebs_last_event(struct perf_event *event,
|
||||
struct pt_regs *iregs,
|
||||
struct pt_regs *regs,
|
||||
struct perf_sample_data *data,
|
||||
void *at,
|
||||
int count,
|
||||
setup_fn setup_sample)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct x86_perf_regs perf_regs;
|
||||
struct pt_regs *regs = &perf_regs.regs;
|
||||
void *at = get_next_pebs_record_by_bit(base, top, bit);
|
||||
static struct pt_regs dummy_iregs;
|
||||
|
||||
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
|
||||
/*
|
||||
* Now, auto-reload is only enabled in fixed period mode.
|
||||
* The reload value is always hwc->sample_period.
|
||||
* May need to change it, if auto-reload is enabled in
|
||||
* freq mode later.
|
||||
*/
|
||||
intel_pmu_save_and_restart_reload(event, count);
|
||||
} else if (!intel_pmu_save_and_restart(event))
|
||||
return;
|
||||
|
||||
if (!iregs)
|
||||
iregs = &dummy_iregs;
|
||||
|
||||
while (count > 1) {
|
||||
setup_sample(event, iregs, at, data, regs);
|
||||
perf_event_output(event, data, regs);
|
||||
at += cpuc->pebs_record_size;
|
||||
at = get_next_pebs_record_by_bit(at, top, bit);
|
||||
count--;
|
||||
}
|
||||
|
||||
setup_sample(event, iregs, at, data, regs);
|
||||
if (iregs == &dummy_iregs) {
|
||||
@ -2228,6 +2209,44 @@ __intel_pmu_pebs_event(struct perf_event *event,
|
||||
if (perf_event_overflow(event, data, regs))
|
||||
x86_pmu_stop(event, 0);
|
||||
}
|
||||
|
||||
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
|
||||
/*
|
||||
* Now, auto-reload is only enabled in fixed period mode.
|
||||
* The reload value is always hwc->sample_period.
|
||||
* May need to change it, if auto-reload is enabled in
|
||||
* freq mode later.
|
||||
*/
|
||||
intel_pmu_save_and_restart_reload(event, count);
|
||||
} else
|
||||
intel_pmu_save_and_restart(event);
|
||||
}
|
||||
|
||||
static __always_inline void
|
||||
__intel_pmu_pebs_events(struct perf_event *event,
|
||||
struct pt_regs *iregs,
|
||||
struct perf_sample_data *data,
|
||||
void *base, void *top,
|
||||
int bit, int count,
|
||||
setup_fn setup_sample)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct x86_perf_regs perf_regs;
|
||||
struct pt_regs *regs = &perf_regs.regs;
|
||||
void *at = get_next_pebs_record_by_bit(base, top, bit);
|
||||
int cnt = count;
|
||||
|
||||
if (!iregs)
|
||||
iregs = &dummy_iregs;
|
||||
|
||||
while (cnt > 1) {
|
||||
__intel_pmu_pebs_event(event, iregs, regs, data, at, setup_sample);
|
||||
at += cpuc->pebs_record_size;
|
||||
at = get_next_pebs_record_by_bit(at, top, bit);
|
||||
cnt--;
|
||||
}
|
||||
|
||||
__intel_pmu_pebs_last_event(event, iregs, regs, data, at, count, setup_sample);
|
||||
}
|
||||
|
||||
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
|
||||
@ -2264,8 +2283,8 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_
|
||||
return;
|
||||
}
|
||||
|
||||
__intel_pmu_pebs_event(event, iregs, data, at, top, 0, n,
|
||||
setup_pebs_fixed_sample_data);
|
||||
__intel_pmu_pebs_events(event, iregs, data, at, top, 0, n,
|
||||
setup_pebs_fixed_sample_data);
|
||||
}
|
||||
|
||||
static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
|
||||
@ -2396,9 +2415,9 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
|
||||
}
|
||||
|
||||
if (counts[bit]) {
|
||||
__intel_pmu_pebs_event(event, iregs, data, base,
|
||||
top, bit, counts[bit],
|
||||
setup_pebs_fixed_sample_data);
|
||||
__intel_pmu_pebs_events(event, iregs, data, base,
|
||||
top, bit, counts[bit],
|
||||
setup_pebs_fixed_sample_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2406,8 +2425,12 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
|
||||
static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
|
||||
{
|
||||
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
|
||||
void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct debug_store *ds = cpuc->ds;
|
||||
struct x86_perf_regs perf_regs;
|
||||
struct pt_regs *regs = &perf_regs.regs;
|
||||
struct pebs_basic *basic;
|
||||
struct perf_event *event;
|
||||
void *base, *at, *top;
|
||||
int bit;
|
||||
@ -2429,30 +2452,41 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
|
||||
return;
|
||||
}
|
||||
|
||||
for (at = base; at < top; at += cpuc->pebs_record_size) {
|
||||
if (!iregs)
|
||||
iregs = &dummy_iregs;
|
||||
|
||||
/* Process all but the last event for each counter. */
|
||||
for (at = base; at < top; at += basic->format_size) {
|
||||
u64 pebs_status;
|
||||
|
||||
pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
|
||||
pebs_status &= mask;
|
||||
basic = at;
|
||||
if (basic->format_size != cpuc->pebs_record_size)
|
||||
continue;
|
||||
|
||||
for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX)
|
||||
counts[bit]++;
|
||||
pebs_status = basic->applicable_counters & cpuc->pebs_enabled & mask;
|
||||
for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
|
||||
event = cpuc->events[bit];
|
||||
|
||||
if (WARN_ON_ONCE(!event) ||
|
||||
WARN_ON_ONCE(!event->attr.precise_ip))
|
||||
continue;
|
||||
|
||||
if (counts[bit]++) {
|
||||
__intel_pmu_pebs_event(event, iregs, regs, data, last[bit],
|
||||
setup_pebs_adaptive_sample_data);
|
||||
}
|
||||
last[bit] = at;
|
||||
}
|
||||
}
|
||||
|
||||
for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) {
|
||||
if (counts[bit] == 0)
|
||||
if (!counts[bit])
|
||||
continue;
|
||||
|
||||
event = cpuc->events[bit];
|
||||
if (WARN_ON_ONCE(!event))
|
||||
continue;
|
||||
|
||||
if (WARN_ON_ONCE(!event->attr.precise_ip))
|
||||
continue;
|
||||
|
||||
__intel_pmu_pebs_event(event, iregs, data, base,
|
||||
top, bit, counts[bit],
|
||||
setup_pebs_adaptive_sample_data);
|
||||
__intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
|
||||
counts[bit], setup_pebs_adaptive_sample_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -745,7 +745,7 @@ static int uncore_pmu_event_init(struct perf_event *event)
|
||||
|
||||
pmu = uncore_event_to_pmu(event);
|
||||
/* no device found for this pmu */
|
||||
if (pmu->func_id < 0)
|
||||
if (!pmu->registered)
|
||||
return -ENOENT;
|
||||
|
||||
/* Sampling not supported yet */
|
||||
@ -992,7 +992,7 @@ static void uncore_types_exit(struct intel_uncore_type **types)
|
||||
uncore_type_exit(*types);
|
||||
}
|
||||
|
||||
static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
|
||||
static int __init uncore_type_init(struct intel_uncore_type *type)
|
||||
{
|
||||
struct intel_uncore_pmu *pmus;
|
||||
size_t size;
|
||||
@ -1005,7 +1005,6 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
|
||||
size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
|
||||
|
||||
for (i = 0; i < type->num_boxes; i++) {
|
||||
pmus[i].func_id = setid ? i : -1;
|
||||
pmus[i].pmu_idx = i;
|
||||
pmus[i].type = type;
|
||||
pmus[i].boxes = kzalloc(size, GFP_KERNEL);
|
||||
@ -1055,12 +1054,12 @@ err:
|
||||
}
|
||||
|
||||
static int __init
|
||||
uncore_types_init(struct intel_uncore_type **types, bool setid)
|
||||
uncore_types_init(struct intel_uncore_type **types)
|
||||
{
|
||||
int ret;
|
||||
|
||||
for (; *types; types++) {
|
||||
ret = uncore_type_init(*types, setid);
|
||||
ret = uncore_type_init(*types);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -1160,11 +1159,6 @@ static int uncore_pci_pmu_register(struct pci_dev *pdev,
|
||||
if (!box)
|
||||
return -ENOMEM;
|
||||
|
||||
if (pmu->func_id < 0)
|
||||
pmu->func_id = pdev->devfn;
|
||||
else
|
||||
WARN_ON_ONCE(pmu->func_id != pdev->devfn);
|
||||
|
||||
atomic_inc(&box->refcnt);
|
||||
box->dieid = die;
|
||||
box->pci_dev = pdev;
|
||||
@ -1410,7 +1404,7 @@ static int __init uncore_pci_init(void)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = uncore_types_init(uncore_pci_uncores, false);
|
||||
ret = uncore_types_init(uncore_pci_uncores);
|
||||
if (ret)
|
||||
goto errtype;
|
||||
|
||||
@ -1678,7 +1672,7 @@ static int __init uncore_cpu_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = uncore_types_init(uncore_msr_uncores, true);
|
||||
ret = uncore_types_init(uncore_msr_uncores);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -1697,7 +1691,7 @@ static int __init uncore_mmio_init(void)
|
||||
struct intel_uncore_type **types = uncore_mmio_uncores;
|
||||
int ret;
|
||||
|
||||
ret = uncore_types_init(types, true);
|
||||
ret = uncore_types_init(types);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
|
@ -125,7 +125,6 @@ struct intel_uncore_pmu {
|
||||
struct pmu pmu;
|
||||
char name[UNCORE_PMU_NAME_LEN];
|
||||
int pmu_idx;
|
||||
int func_id;
|
||||
bool registered;
|
||||
atomic_t activeboxes;
|
||||
cpumask_t cpu_mask;
|
||||
|
@ -910,7 +910,7 @@ static int snb_uncore_imc_event_init(struct perf_event *event)
|
||||
|
||||
pmu = uncore_event_to_pmu(event);
|
||||
/* no device found for this pmu */
|
||||
if (pmu->func_id < 0)
|
||||
if (!pmu->registered)
|
||||
return -ENOENT;
|
||||
|
||||
/* Sampling not supported yet */
|
||||
|
@ -6684,17 +6684,8 @@ void spr_uncore_mmio_init(void)
|
||||
/* GNR uncore support */
|
||||
|
||||
#define UNCORE_GNR_NUM_UNCORE_TYPES 23
|
||||
#define UNCORE_GNR_TYPE_15 15
|
||||
#define UNCORE_GNR_B2UPI 18
|
||||
#define UNCORE_GNR_TYPE_21 21
|
||||
#define UNCORE_GNR_TYPE_22 22
|
||||
|
||||
int gnr_uncore_units_ignore[] = {
|
||||
UNCORE_SPR_UPI,
|
||||
UNCORE_GNR_TYPE_15,
|
||||
UNCORE_GNR_B2UPI,
|
||||
UNCORE_GNR_TYPE_21,
|
||||
UNCORE_GNR_TYPE_22,
|
||||
UNCORE_IGNORE_END
|
||||
};
|
||||
|
||||
@ -6703,6 +6694,31 @@ static struct intel_uncore_type gnr_uncore_ubox = {
|
||||
.attr_update = uncore_alias_groups,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type gnr_uncore_pciex8 = {
|
||||
SPR_UNCORE_PCI_COMMON_FORMAT(),
|
||||
.name = "pciex8",
|
||||
};
|
||||
|
||||
static struct intel_uncore_type gnr_uncore_pciex16 = {
|
||||
SPR_UNCORE_PCI_COMMON_FORMAT(),
|
||||
.name = "pciex16",
|
||||
};
|
||||
|
||||
static struct intel_uncore_type gnr_uncore_upi = {
|
||||
SPR_UNCORE_PCI_COMMON_FORMAT(),
|
||||
.name = "upi",
|
||||
};
|
||||
|
||||
static struct intel_uncore_type gnr_uncore_b2upi = {
|
||||
SPR_UNCORE_PCI_COMMON_FORMAT(),
|
||||
.name = "b2upi",
|
||||
};
|
||||
|
||||
static struct intel_uncore_type gnr_uncore_b2hot = {
|
||||
.name = "b2hot",
|
||||
.attr_update = uncore_alias_groups,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type gnr_uncore_b2cmi = {
|
||||
SPR_UNCORE_PCI_COMMON_FORMAT(),
|
||||
.name = "b2cmi",
|
||||
@ -6727,21 +6743,21 @@ static struct intel_uncore_type *gnr_uncores[UNCORE_GNR_NUM_UNCORE_TYPES] = {
|
||||
&gnr_uncore_ubox,
|
||||
&spr_uncore_imc,
|
||||
NULL,
|
||||
&gnr_uncore_upi,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
&spr_uncore_cxlcm,
|
||||
&spr_uncore_cxldp,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
&gnr_uncore_b2hot,
|
||||
&gnr_uncore_b2cmi,
|
||||
&gnr_uncore_b2cxl,
|
||||
NULL,
|
||||
&gnr_uncore_b2upi,
|
||||
NULL,
|
||||
&gnr_uncore_mdf_sbo,
|
||||
NULL,
|
||||
NULL,
|
||||
&gnr_uncore_pciex16,
|
||||
&gnr_uncore_pciex8,
|
||||
};
|
||||
|
||||
static struct freerunning_counters gnr_iio_freerunning[] = {
|
||||
|
@ -624,6 +624,7 @@ union perf_capabilities {
|
||||
u64 pebs_output_pt_available:1;
|
||||
u64 pebs_timing_info:1;
|
||||
u64 anythread_deprecated:1;
|
||||
u64 rdpmc_metrics_clear:1;
|
||||
};
|
||||
u64 capabilities;
|
||||
};
|
||||
|
@ -39,6 +39,10 @@
|
||||
* event: rapl_energy_psys
|
||||
* perf code: 0x5
|
||||
*
|
||||
* core counter: consumption of a single physical core
|
||||
* event: rapl_energy_core (power_core PMU)
|
||||
* perf code: 0x1
|
||||
*
|
||||
* We manage those counters as free running (read-only). They may be
|
||||
* use simultaneously by other tools, such as turbostat.
|
||||
*
|
||||
@ -70,18 +74,22 @@ MODULE_LICENSE("GPL");
|
||||
/*
|
||||
* RAPL energy status counters
|
||||
*/
|
||||
enum perf_rapl_events {
|
||||
enum perf_rapl_pkg_events {
|
||||
PERF_RAPL_PP0 = 0, /* all cores */
|
||||
PERF_RAPL_PKG, /* entire package */
|
||||
PERF_RAPL_RAM, /* DRAM */
|
||||
PERF_RAPL_PP1, /* gpu */
|
||||
PERF_RAPL_PSYS, /* psys */
|
||||
|
||||
PERF_RAPL_MAX,
|
||||
NR_RAPL_DOMAINS = PERF_RAPL_MAX,
|
||||
PERF_RAPL_PKG_EVENTS_MAX,
|
||||
NR_RAPL_PKG_DOMAINS = PERF_RAPL_PKG_EVENTS_MAX,
|
||||
};
|
||||
|
||||
static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
|
||||
#define PERF_RAPL_CORE 0 /* single core */
|
||||
#define PERF_RAPL_CORE_EVENTS_MAX 1
|
||||
#define NR_RAPL_CORE_DOMAINS PERF_RAPL_CORE_EVENTS_MAX
|
||||
|
||||
static const char *const rapl_pkg_domain_names[NR_RAPL_PKG_DOMAINS] __initconst = {
|
||||
"pp0-core",
|
||||
"package",
|
||||
"dram",
|
||||
@ -89,6 +97,8 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
|
||||
"psys",
|
||||
};
|
||||
|
||||
static const char *const rapl_core_domain_name __initconst = "core";
|
||||
|
||||
/*
|
||||
* event code: LSB 8 bits, passed in attr->config
|
||||
* any other bit is reserved
|
||||
@ -112,7 +122,7 @@ static struct perf_pmu_events_attr event_attr_##v = { \
|
||||
* considered as either pkg-scope or die-scope, and we are considering
|
||||
* them as die-scope.
|
||||
*/
|
||||
#define rapl_pmu_is_pkg_scope() \
|
||||
#define rapl_pkg_pmu_is_pkg_scope() \
|
||||
(boot_cpu_data.x86_vendor == X86_VENDOR_AMD || \
|
||||
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
|
||||
|
||||
@ -129,7 +139,8 @@ struct rapl_pmu {
|
||||
struct rapl_pmus {
|
||||
struct pmu pmu;
|
||||
unsigned int nr_rapl_pmu;
|
||||
struct rapl_pmu *pmus[] __counted_by(nr_rapl_pmu);
|
||||
unsigned int cntr_mask;
|
||||
struct rapl_pmu *rapl_pmu[] __counted_by(nr_rapl_pmu);
|
||||
};
|
||||
|
||||
enum rapl_unit_quirk {
|
||||
@ -139,44 +150,43 @@ enum rapl_unit_quirk {
|
||||
};
|
||||
|
||||
struct rapl_model {
|
||||
struct perf_msr *rapl_msrs;
|
||||
unsigned long events;
|
||||
struct perf_msr *rapl_pkg_msrs;
|
||||
struct perf_msr *rapl_core_msrs;
|
||||
unsigned long pkg_events;
|
||||
unsigned long core_events;
|
||||
unsigned int msr_power_unit;
|
||||
enum rapl_unit_quirk unit_quirk;
|
||||
};
|
||||
|
||||
/* 1/2^hw_unit Joule */
|
||||
static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
|
||||
static struct rapl_pmus *rapl_pmus;
|
||||
static unsigned int rapl_cntr_mask;
|
||||
static int rapl_pkg_hw_unit[NR_RAPL_PKG_DOMAINS] __read_mostly;
|
||||
static int rapl_core_hw_unit __read_mostly;
|
||||
static struct rapl_pmus *rapl_pmus_pkg;
|
||||
static struct rapl_pmus *rapl_pmus_core;
|
||||
static u64 rapl_timer_ms;
|
||||
static struct perf_msr *rapl_msrs;
|
||||
static struct rapl_model *rapl_model;
|
||||
|
||||
/*
|
||||
* Helper functions to get the correct topology macros according to the
|
||||
* Helper function to get the correct topology id according to the
|
||||
* RAPL PMU scope.
|
||||
*/
|
||||
static inline unsigned int get_rapl_pmu_idx(int cpu)
|
||||
static inline unsigned int get_rapl_pmu_idx(int cpu, int scope)
|
||||
{
|
||||
return rapl_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) :
|
||||
topology_logical_die_id(cpu);
|
||||
}
|
||||
|
||||
static inline const struct cpumask *get_rapl_pmu_cpumask(int cpu)
|
||||
{
|
||||
return rapl_pmu_is_pkg_scope() ? topology_core_cpumask(cpu) :
|
||||
topology_die_cpumask(cpu);
|
||||
}
|
||||
|
||||
static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
|
||||
{
|
||||
unsigned int rapl_pmu_idx = get_rapl_pmu_idx(cpu);
|
||||
|
||||
/*
|
||||
* The unsigned check also catches the '-1' return value for non
|
||||
* existent mappings in the topology map.
|
||||
* Returns unsigned int, which converts the '-1' return value
|
||||
* (for non-existent mappings in topology map) to UINT_MAX, so
|
||||
* the error check in the caller is simplified.
|
||||
*/
|
||||
return rapl_pmu_idx < rapl_pmus->nr_rapl_pmu ? rapl_pmus->pmus[rapl_pmu_idx] : NULL;
|
||||
switch (scope) {
|
||||
case PERF_PMU_SCOPE_PKG:
|
||||
return topology_logical_package_id(cpu);
|
||||
case PERF_PMU_SCOPE_DIE:
|
||||
return topology_logical_die_id(cpu);
|
||||
case PERF_PMU_SCOPE_CORE:
|
||||
return topology_logical_core_id(cpu);
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static inline u64 rapl_read_counter(struct perf_event *event)
|
||||
@ -186,19 +196,20 @@ static inline u64 rapl_read_counter(struct perf_event *event)
|
||||
return raw;
|
||||
}
|
||||
|
||||
static inline u64 rapl_scale(u64 v, int cfg)
|
||||
static inline u64 rapl_scale(u64 v, struct perf_event *event)
|
||||
{
|
||||
if (cfg > NR_RAPL_DOMAINS) {
|
||||
pr_warn("Invalid domain %d, failed to scale data\n", cfg);
|
||||
return v;
|
||||
}
|
||||
int hw_unit = rapl_pkg_hw_unit[event->hw.config - 1];
|
||||
|
||||
if (event->pmu->scope == PERF_PMU_SCOPE_CORE)
|
||||
hw_unit = rapl_core_hw_unit;
|
||||
|
||||
/*
|
||||
* scale delta to smallest unit (1/2^32)
|
||||
* users must then scale back: count * 1/(1e9*2^32) to get Joules
|
||||
* or use ldexp(count, -32).
|
||||
* Watts = Joules/Time delta
|
||||
*/
|
||||
return v << (32 - rapl_hw_unit[cfg - 1]);
|
||||
return v << (32 - hw_unit);
|
||||
}
|
||||
|
||||
static u64 rapl_event_update(struct perf_event *event)
|
||||
@ -225,7 +236,7 @@ static u64 rapl_event_update(struct perf_event *event)
|
||||
delta = (new_raw_count << shift) - (prev_raw_count << shift);
|
||||
delta >>= shift;
|
||||
|
||||
sdelta = rapl_scale(delta, event->hw.config);
|
||||
sdelta = rapl_scale(delta, event);
|
||||
|
||||
local64_add(sdelta, &event->count);
|
||||
|
||||
@ -240,34 +251,34 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu)
|
||||
|
||||
static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
|
||||
{
|
||||
struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
|
||||
struct rapl_pmu *rapl_pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
|
||||
struct perf_event *event;
|
||||
unsigned long flags;
|
||||
|
||||
if (!pmu->n_active)
|
||||
if (!rapl_pmu->n_active)
|
||||
return HRTIMER_NORESTART;
|
||||
|
||||
raw_spin_lock_irqsave(&pmu->lock, flags);
|
||||
raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
|
||||
|
||||
list_for_each_entry(event, &pmu->active_list, active_entry)
|
||||
list_for_each_entry(event, &rapl_pmu->active_list, active_entry)
|
||||
rapl_event_update(event);
|
||||
|
||||
raw_spin_unlock_irqrestore(&pmu->lock, flags);
|
||||
raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
|
||||
|
||||
hrtimer_forward_now(hrtimer, pmu->timer_interval);
|
||||
hrtimer_forward_now(hrtimer, rapl_pmu->timer_interval);
|
||||
|
||||
return HRTIMER_RESTART;
|
||||
}
|
||||
|
||||
static void rapl_hrtimer_init(struct rapl_pmu *pmu)
|
||||
static void rapl_hrtimer_init(struct rapl_pmu *rapl_pmu)
|
||||
{
|
||||
struct hrtimer *hr = &pmu->hrtimer;
|
||||
struct hrtimer *hr = &rapl_pmu->hrtimer;
|
||||
|
||||
hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
hr->function = rapl_hrtimer_handle;
|
||||
}
|
||||
|
||||
static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
|
||||
static void __rapl_pmu_event_start(struct rapl_pmu *rapl_pmu,
|
||||
struct perf_event *event)
|
||||
{
|
||||
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
|
||||
@ -275,39 +286,39 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
|
||||
|
||||
event->hw.state = 0;
|
||||
|
||||
list_add_tail(&event->active_entry, &pmu->active_list);
|
||||
list_add_tail(&event->active_entry, &rapl_pmu->active_list);
|
||||
|
||||
local64_set(&event->hw.prev_count, rapl_read_counter(event));
|
||||
|
||||
pmu->n_active++;
|
||||
if (pmu->n_active == 1)
|
||||
rapl_start_hrtimer(pmu);
|
||||
rapl_pmu->n_active++;
|
||||
if (rapl_pmu->n_active == 1)
|
||||
rapl_start_hrtimer(rapl_pmu);
|
||||
}
|
||||
|
||||
static void rapl_pmu_event_start(struct perf_event *event, int mode)
|
||||
{
|
||||
struct rapl_pmu *pmu = event->pmu_private;
|
||||
struct rapl_pmu *rapl_pmu = event->pmu_private;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&pmu->lock, flags);
|
||||
__rapl_pmu_event_start(pmu, event);
|
||||
raw_spin_unlock_irqrestore(&pmu->lock, flags);
|
||||
raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
|
||||
__rapl_pmu_event_start(rapl_pmu, event);
|
||||
raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
|
||||
}
|
||||
|
||||
static void rapl_pmu_event_stop(struct perf_event *event, int mode)
|
||||
{
|
||||
struct rapl_pmu *pmu = event->pmu_private;
|
||||
struct rapl_pmu *rapl_pmu = event->pmu_private;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&pmu->lock, flags);
|
||||
raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
|
||||
|
||||
/* mark event as deactivated and stopped */
|
||||
if (!(hwc->state & PERF_HES_STOPPED)) {
|
||||
WARN_ON_ONCE(pmu->n_active <= 0);
|
||||
pmu->n_active--;
|
||||
if (pmu->n_active == 0)
|
||||
hrtimer_cancel(&pmu->hrtimer);
|
||||
WARN_ON_ONCE(rapl_pmu->n_active <= 0);
|
||||
rapl_pmu->n_active--;
|
||||
if (rapl_pmu->n_active == 0)
|
||||
hrtimer_cancel(&rapl_pmu->hrtimer);
|
||||
|
||||
list_del(&event->active_entry);
|
||||
|
||||
@ -325,23 +336,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode)
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
|
||||
raw_spin_unlock_irqrestore(&pmu->lock, flags);
|
||||
raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
|
||||
}
|
||||
|
||||
static int rapl_pmu_event_add(struct perf_event *event, int mode)
|
||||
{
|
||||
struct rapl_pmu *pmu = event->pmu_private;
|
||||
struct rapl_pmu *rapl_pmu = event->pmu_private;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&pmu->lock, flags);
|
||||
raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
|
||||
|
||||
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
||||
|
||||
if (mode & PERF_EF_START)
|
||||
__rapl_pmu_event_start(pmu, event);
|
||||
__rapl_pmu_event_start(rapl_pmu, event);
|
||||
|
||||
raw_spin_unlock_irqrestore(&pmu->lock, flags);
|
||||
raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -354,12 +365,14 @@ static void rapl_pmu_event_del(struct perf_event *event, int flags)
|
||||
static int rapl_pmu_event_init(struct perf_event *event)
|
||||
{
|
||||
u64 cfg = event->attr.config & RAPL_EVENT_MASK;
|
||||
int bit, ret = 0;
|
||||
struct rapl_pmu *pmu;
|
||||
int bit, rapl_pmus_scope, ret = 0;
|
||||
struct rapl_pmu *rapl_pmu;
|
||||
unsigned int rapl_pmu_idx;
|
||||
struct rapl_pmus *rapl_pmus;
|
||||
|
||||
/* only look at RAPL events */
|
||||
if (event->attr.type != rapl_pmus->pmu.type)
|
||||
return -ENOENT;
|
||||
/* unsupported modes and filters */
|
||||
if (event->attr.sample_period) /* no sampling */
|
||||
return -EINVAL;
|
||||
|
||||
/* check only supported bits are set */
|
||||
if (event->attr.config & ~RAPL_EVENT_MASK)
|
||||
@ -368,26 +381,49 @@ static int rapl_pmu_event_init(struct perf_event *event)
|
||||
if (event->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
|
||||
rapl_pmus = container_of(event->pmu, struct rapl_pmus, pmu);
|
||||
if (!rapl_pmus)
|
||||
return -EINVAL;
|
||||
rapl_pmus_scope = rapl_pmus->pmu.scope;
|
||||
|
||||
cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1);
|
||||
bit = cfg - 1;
|
||||
if (rapl_pmus_scope == PERF_PMU_SCOPE_PKG || rapl_pmus_scope == PERF_PMU_SCOPE_DIE) {
|
||||
/* only look at RAPL package events */
|
||||
if (event->attr.type != rapl_pmus_pkg->pmu.type)
|
||||
return -ENOENT;
|
||||
|
||||
cfg = array_index_nospec((long)cfg, NR_RAPL_PKG_DOMAINS + 1);
|
||||
if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1)
|
||||
return -EINVAL;
|
||||
|
||||
bit = cfg - 1;
|
||||
event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr;
|
||||
} else if (rapl_pmus_scope == PERF_PMU_SCOPE_CORE) {
|
||||
/* only look at RAPL core events */
|
||||
if (event->attr.type != rapl_pmus_core->pmu.type)
|
||||
return -ENOENT;
|
||||
|
||||
cfg = array_index_nospec((long)cfg, NR_RAPL_CORE_DOMAINS + 1);
|
||||
if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1)
|
||||
return -EINVAL;
|
||||
|
||||
bit = cfg - 1;
|
||||
event->hw.event_base = rapl_model->rapl_core_msrs[bit].msr;
|
||||
} else
|
||||
return -EINVAL;
|
||||
|
||||
/* check event supported */
|
||||
if (!(rapl_cntr_mask & (1 << bit)))
|
||||
if (!(rapl_pmus->cntr_mask & (1 << bit)))
|
||||
return -EINVAL;
|
||||
|
||||
/* unsupported modes and filters */
|
||||
if (event->attr.sample_period) /* no sampling */
|
||||
rapl_pmu_idx = get_rapl_pmu_idx(event->cpu, rapl_pmus_scope);
|
||||
if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu)
|
||||
return -EINVAL;
|
||||
|
||||
/* must be done before validate_group */
|
||||
pmu = cpu_to_rapl_pmu(event->cpu);
|
||||
if (!pmu)
|
||||
rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx];
|
||||
if (!rapl_pmu)
|
||||
return -EINVAL;
|
||||
event->pmu_private = pmu;
|
||||
event->hw.event_base = rapl_msrs[bit].msr;
|
||||
|
||||
event->pmu_private = rapl_pmu;
|
||||
event->hw.config = cfg;
|
||||
event->hw.idx = bit;
|
||||
|
||||
@ -404,12 +440,14 @@ RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
|
||||
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
|
||||
RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
|
||||
RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05");
|
||||
RAPL_EVENT_ATTR_STR(energy-core, rapl_core, "event=0x01");
|
||||
|
||||
RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
|
||||
RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
|
||||
RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules");
|
||||
RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules");
|
||||
RAPL_EVENT_ATTR_STR(energy-psys.unit, rapl_psys_unit, "Joules");
|
||||
RAPL_EVENT_ATTR_STR(energy-core.unit, rapl_core_unit, "Joules");
|
||||
|
||||
/*
|
||||
* we compute in 0.23 nJ increments regardless of MSR
|
||||
@ -419,6 +457,7 @@ RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890
|
||||
RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
|
||||
RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
|
||||
RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10");
|
||||
RAPL_EVENT_ATTR_STR(energy-core.scale, rapl_core_scale, "2.3283064365386962890625e-10");
|
||||
|
||||
/*
|
||||
* There are no default events, but we need to create
|
||||
@ -451,6 +490,12 @@ static const struct attribute_group *rapl_attr_groups[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group *rapl_core_attr_groups[] = {
|
||||
&rapl_pmu_format_group,
|
||||
&rapl_pmu_events_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *rapl_events_cores[] = {
|
||||
EVENT_PTR(rapl_cores),
|
||||
EVENT_PTR(rapl_cores_unit),
|
||||
@ -511,6 +556,18 @@ static struct attribute_group rapl_events_psys_group = {
|
||||
.attrs = rapl_events_psys,
|
||||
};
|
||||
|
||||
static struct attribute *rapl_events_core[] = {
|
||||
EVENT_PTR(rapl_core),
|
||||
EVENT_PTR(rapl_core_unit),
|
||||
EVENT_PTR(rapl_core_scale),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group rapl_events_core_group = {
|
||||
.name = "events",
|
||||
.attrs = rapl_events_core,
|
||||
};
|
||||
|
||||
static bool test_msr(int idx, void *data)
|
||||
{
|
||||
return test_bit(idx, (unsigned long *) data);
|
||||
@ -536,11 +593,11 @@ static struct perf_msr intel_rapl_spr_msrs[] = {
|
||||
};
|
||||
|
||||
/*
|
||||
* Force to PERF_RAPL_MAX size due to:
|
||||
* - perf_msr_probe(PERF_RAPL_MAX)
|
||||
* Force to PERF_RAPL_PKG_EVENTS_MAX size due to:
|
||||
* - perf_msr_probe(PERF_RAPL_PKG_EVENTS_MAX)
|
||||
* - want to use same event codes across both architectures
|
||||
*/
|
||||
static struct perf_msr amd_rapl_msrs[] = {
|
||||
static struct perf_msr amd_rapl_pkg_msrs[] = {
|
||||
[PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, NULL, false, 0 },
|
||||
[PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
|
||||
[PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, NULL, false, 0 },
|
||||
@ -548,18 +605,25 @@ static struct perf_msr amd_rapl_msrs[] = {
|
||||
[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 },
|
||||
};
|
||||
|
||||
static int rapl_check_hw_unit(struct rapl_model *rm)
|
||||
static struct perf_msr amd_rapl_core_msrs[] = {
|
||||
[PERF_RAPL_CORE] = { MSR_AMD_CORE_ENERGY_STATUS, &rapl_events_core_group,
|
||||
test_msr, false, RAPL_MSR_MASK },
|
||||
};
|
||||
|
||||
static int rapl_check_hw_unit(void)
|
||||
{
|
||||
u64 msr_rapl_power_unit_bits;
|
||||
int i;
|
||||
|
||||
/* protect rdmsrl() to handle virtualization */
|
||||
if (rdmsrl_safe(rm->msr_power_unit, &msr_rapl_power_unit_bits))
|
||||
if (rdmsrl_safe(rapl_model->msr_power_unit, &msr_rapl_power_unit_bits))
|
||||
return -1;
|
||||
for (i = 0; i < NR_RAPL_DOMAINS; i++)
|
||||
rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
|
||||
for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++)
|
||||
rapl_pkg_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
|
||||
|
||||
switch (rm->unit_quirk) {
|
||||
rapl_core_hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
|
||||
|
||||
switch (rapl_model->unit_quirk) {
|
||||
/*
|
||||
* DRAM domain on HSW server and KNL has fixed energy unit which can be
|
||||
* different than the unit from power unit MSR. See
|
||||
@ -567,17 +631,16 @@ static int rapl_check_hw_unit(struct rapl_model *rm)
|
||||
* of 2. Datasheet, September 2014, Reference Number: 330784-001 "
|
||||
*/
|
||||
case RAPL_UNIT_QUIRK_INTEL_HSW:
|
||||
rapl_hw_unit[PERF_RAPL_RAM] = 16;
|
||||
rapl_pkg_hw_unit[PERF_RAPL_RAM] = 16;
|
||||
break;
|
||||
/* SPR uses a fixed energy unit for Psys domain. */
|
||||
case RAPL_UNIT_QUIRK_INTEL_SPR:
|
||||
rapl_hw_unit[PERF_RAPL_PSYS] = 0;
|
||||
rapl_pkg_hw_unit[PERF_RAPL_PSYS] = 0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Calculate the timer rate:
|
||||
* Use reference of 200W for scaling the timeout to avoid counter
|
||||
@ -586,9 +649,9 @@ static int rapl_check_hw_unit(struct rapl_model *rm)
|
||||
* if hw unit is 32, then we use 2 ms 1/200/2
|
||||
*/
|
||||
rapl_timer_ms = 2;
|
||||
if (rapl_hw_unit[0] < 32) {
|
||||
if (rapl_pkg_hw_unit[0] < 32) {
|
||||
rapl_timer_ms = (1000 / (2 * 100));
|
||||
rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1));
|
||||
rapl_timer_ms *= (1ULL << (32 - rapl_pkg_hw_unit[0] - 1));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -596,24 +659,32 @@ static int rapl_check_hw_unit(struct rapl_model *rm)
|
||||
static void __init rapl_advertise(void)
|
||||
{
|
||||
int i;
|
||||
int num_counters = hweight32(rapl_pmus_pkg->cntr_mask);
|
||||
|
||||
if (rapl_pmus_core)
|
||||
num_counters += hweight32(rapl_pmus_core->cntr_mask);
|
||||
|
||||
pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
|
||||
hweight32(rapl_cntr_mask), rapl_timer_ms);
|
||||
num_counters, rapl_timer_ms);
|
||||
|
||||
for (i = 0; i < NR_RAPL_DOMAINS; i++) {
|
||||
if (rapl_cntr_mask & (1 << i)) {
|
||||
for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) {
|
||||
if (rapl_pmus_pkg->cntr_mask & (1 << i)) {
|
||||
pr_info("hw unit of domain %s 2^-%d Joules\n",
|
||||
rapl_domain_names[i], rapl_hw_unit[i]);
|
||||
rapl_pkg_domain_names[i], rapl_pkg_hw_unit[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (rapl_pmus_core && (rapl_pmus_core->cntr_mask & (1 << PERF_RAPL_CORE)))
|
||||
pr_info("hw unit of domain %s 2^-%d Joules\n",
|
||||
rapl_core_domain_name, rapl_core_hw_unit);
|
||||
}
|
||||
|
||||
static void cleanup_rapl_pmus(void)
|
||||
static void cleanup_rapl_pmus(struct rapl_pmus *rapl_pmus)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < rapl_pmus->nr_rapl_pmu; i++)
|
||||
kfree(rapl_pmus->pmus[i]);
|
||||
kfree(rapl_pmus->rapl_pmu[i]);
|
||||
kfree(rapl_pmus);
|
||||
}
|
||||
|
||||
@ -626,46 +697,60 @@ static const struct attribute_group *rapl_attr_update[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static int __init init_rapl_pmu(void)
|
||||
static const struct attribute_group *rapl_core_attr_update[] = {
|
||||
&rapl_events_core_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static int __init init_rapl_pmu(struct rapl_pmus *rapl_pmus)
|
||||
{
|
||||
struct rapl_pmu *pmu;
|
||||
struct rapl_pmu *rapl_pmu;
|
||||
int idx;
|
||||
|
||||
for (idx = 0; idx < rapl_pmus->nr_rapl_pmu; idx++) {
|
||||
pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
|
||||
if (!pmu)
|
||||
rapl_pmu = kzalloc(sizeof(*rapl_pmu), GFP_KERNEL);
|
||||
if (!rapl_pmu)
|
||||
goto free;
|
||||
|
||||
raw_spin_lock_init(&pmu->lock);
|
||||
INIT_LIST_HEAD(&pmu->active_list);
|
||||
pmu->pmu = &rapl_pmus->pmu;
|
||||
pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
|
||||
rapl_hrtimer_init(pmu);
|
||||
raw_spin_lock_init(&rapl_pmu->lock);
|
||||
INIT_LIST_HEAD(&rapl_pmu->active_list);
|
||||
rapl_pmu->pmu = &rapl_pmus->pmu;
|
||||
rapl_pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
|
||||
rapl_hrtimer_init(rapl_pmu);
|
||||
|
||||
rapl_pmus->pmus[idx] = pmu;
|
||||
rapl_pmus->rapl_pmu[idx] = rapl_pmu;
|
||||
}
|
||||
|
||||
return 0;
|
||||
free:
|
||||
for (; idx > 0; idx--)
|
||||
kfree(rapl_pmus->pmus[idx - 1]);
|
||||
kfree(rapl_pmus->rapl_pmu[idx - 1]);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static int __init init_rapl_pmus(void)
|
||||
static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_scope,
|
||||
const struct attribute_group **rapl_attr_groups,
|
||||
const struct attribute_group **rapl_attr_update)
|
||||
{
|
||||
int nr_rapl_pmu = topology_max_packages();
|
||||
int rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
|
||||
struct rapl_pmus *rapl_pmus;
|
||||
|
||||
if (!rapl_pmu_is_pkg_scope()) {
|
||||
nr_rapl_pmu *= topology_max_dies_per_package();
|
||||
rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
|
||||
}
|
||||
/*
|
||||
* rapl_pmu_scope must be either PKG, DIE or CORE
|
||||
*/
|
||||
if (rapl_pmu_scope == PERF_PMU_SCOPE_DIE)
|
||||
nr_rapl_pmu *= topology_max_dies_per_package();
|
||||
else if (rapl_pmu_scope == PERF_PMU_SCOPE_CORE)
|
||||
nr_rapl_pmu *= topology_num_cores_per_package();
|
||||
else if (rapl_pmu_scope != PERF_PMU_SCOPE_PKG)
|
||||
return -EINVAL;
|
||||
|
||||
rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL);
|
||||
rapl_pmus = kzalloc(struct_size(rapl_pmus, rapl_pmu, nr_rapl_pmu), GFP_KERNEL);
|
||||
if (!rapl_pmus)
|
||||
return -ENOMEM;
|
||||
|
||||
*rapl_pmus_ptr = rapl_pmus;
|
||||
|
||||
rapl_pmus->nr_rapl_pmu = nr_rapl_pmu;
|
||||
rapl_pmus->pmu.attr_groups = rapl_attr_groups;
|
||||
rapl_pmus->pmu.attr_update = rapl_attr_update;
|
||||
@ -680,75 +765,77 @@ static int __init init_rapl_pmus(void)
|
||||
rapl_pmus->pmu.module = THIS_MODULE;
|
||||
rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
|
||||
|
||||
return init_rapl_pmu();
|
||||
return init_rapl_pmu(rapl_pmus);
|
||||
}
|
||||
|
||||
static struct rapl_model model_snb = {
|
||||
.events = BIT(PERF_RAPL_PP0) |
|
||||
.pkg_events = BIT(PERF_RAPL_PP0) |
|
||||
BIT(PERF_RAPL_PKG) |
|
||||
BIT(PERF_RAPL_PP1),
|
||||
.msr_power_unit = MSR_RAPL_POWER_UNIT,
|
||||
.rapl_msrs = intel_rapl_msrs,
|
||||
.rapl_pkg_msrs = intel_rapl_msrs,
|
||||
};
|
||||
|
||||
static struct rapl_model model_snbep = {
|
||||
.events = BIT(PERF_RAPL_PP0) |
|
||||
.pkg_events = BIT(PERF_RAPL_PP0) |
|
||||
BIT(PERF_RAPL_PKG) |
|
||||
BIT(PERF_RAPL_RAM),
|
||||
.msr_power_unit = MSR_RAPL_POWER_UNIT,
|
||||
.rapl_msrs = intel_rapl_msrs,
|
||||
.rapl_pkg_msrs = intel_rapl_msrs,
|
||||
};
|
||||
|
||||
static struct rapl_model model_hsw = {
|
||||
.events = BIT(PERF_RAPL_PP0) |
|
||||
.pkg_events = BIT(PERF_RAPL_PP0) |
|
||||
BIT(PERF_RAPL_PKG) |
|
||||
BIT(PERF_RAPL_RAM) |
|
||||
BIT(PERF_RAPL_PP1),
|
||||
.msr_power_unit = MSR_RAPL_POWER_UNIT,
|
||||
.rapl_msrs = intel_rapl_msrs,
|
||||
.rapl_pkg_msrs = intel_rapl_msrs,
|
||||
};
|
||||
|
||||
static struct rapl_model model_hsx = {
|
||||
.events = BIT(PERF_RAPL_PP0) |
|
||||
.pkg_events = BIT(PERF_RAPL_PP0) |
|
||||
BIT(PERF_RAPL_PKG) |
|
||||
BIT(PERF_RAPL_RAM),
|
||||
.unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW,
|
||||
.msr_power_unit = MSR_RAPL_POWER_UNIT,
|
||||
.rapl_msrs = intel_rapl_msrs,
|
||||
.rapl_pkg_msrs = intel_rapl_msrs,
|
||||
};
|
||||
|
||||
static struct rapl_model model_knl = {
|
||||
.events = BIT(PERF_RAPL_PKG) |
|
||||
.pkg_events = BIT(PERF_RAPL_PKG) |
|
||||
BIT(PERF_RAPL_RAM),
|
||||
.unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW,
|
||||
.msr_power_unit = MSR_RAPL_POWER_UNIT,
|
||||
.rapl_msrs = intel_rapl_msrs,
|
||||
.rapl_pkg_msrs = intel_rapl_msrs,
|
||||
};
|
||||
|
||||
static struct rapl_model model_skl = {
|
||||
.events = BIT(PERF_RAPL_PP0) |
|
||||
.pkg_events = BIT(PERF_RAPL_PP0) |
|
||||
BIT(PERF_RAPL_PKG) |
|
||||
BIT(PERF_RAPL_RAM) |
|
||||
BIT(PERF_RAPL_PP1) |
|
||||
BIT(PERF_RAPL_PSYS),
|
||||
.msr_power_unit = MSR_RAPL_POWER_UNIT,
|
||||
.rapl_msrs = intel_rapl_msrs,
|
||||
.rapl_pkg_msrs = intel_rapl_msrs,
|
||||
};
|
||||
|
||||
static struct rapl_model model_spr = {
|
||||
.events = BIT(PERF_RAPL_PP0) |
|
||||
.pkg_events = BIT(PERF_RAPL_PP0) |
|
||||
BIT(PERF_RAPL_PKG) |
|
||||
BIT(PERF_RAPL_RAM) |
|
||||
BIT(PERF_RAPL_PSYS),
|
||||
.unit_quirk = RAPL_UNIT_QUIRK_INTEL_SPR,
|
||||
.msr_power_unit = MSR_RAPL_POWER_UNIT,
|
||||
.rapl_msrs = intel_rapl_spr_msrs,
|
||||
.rapl_pkg_msrs = intel_rapl_spr_msrs,
|
||||
};
|
||||
|
||||
static struct rapl_model model_amd_hygon = {
|
||||
.events = BIT(PERF_RAPL_PKG),
|
||||
.pkg_events = BIT(PERF_RAPL_PKG),
|
||||
.core_events = BIT(PERF_RAPL_CORE),
|
||||
.msr_power_unit = MSR_AMD_RAPL_POWER_UNIT,
|
||||
.rapl_msrs = amd_rapl_msrs,
|
||||
.rapl_pkg_msrs = amd_rapl_pkg_msrs,
|
||||
.rapl_core_msrs = amd_rapl_core_msrs,
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id rapl_model_match[] __initconst = {
|
||||
@ -804,45 +891,73 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
|
||||
static int __init rapl_pmu_init(void)
|
||||
{
|
||||
const struct x86_cpu_id *id;
|
||||
struct rapl_model *rm;
|
||||
int rapl_pkg_pmu_scope = PERF_PMU_SCOPE_DIE;
|
||||
int ret;
|
||||
|
||||
if (rapl_pkg_pmu_is_pkg_scope())
|
||||
rapl_pkg_pmu_scope = PERF_PMU_SCOPE_PKG;
|
||||
|
||||
id = x86_match_cpu(rapl_model_match);
|
||||
if (!id)
|
||||
return -ENODEV;
|
||||
|
||||
rm = (struct rapl_model *) id->driver_data;
|
||||
rapl_model = (struct rapl_model *) id->driver_data;
|
||||
|
||||
rapl_msrs = rm->rapl_msrs;
|
||||
|
||||
rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX,
|
||||
false, (void *) &rm->events);
|
||||
|
||||
ret = rapl_check_hw_unit(rm);
|
||||
ret = rapl_check_hw_unit();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = init_rapl_pmus();
|
||||
ret = init_rapl_pmus(&rapl_pmus_pkg, rapl_pkg_pmu_scope, rapl_attr_groups,
|
||||
rapl_attr_update);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
|
||||
rapl_pmus_pkg->cntr_mask = perf_msr_probe(rapl_model->rapl_pkg_msrs,
|
||||
PERF_RAPL_PKG_EVENTS_MAX, false,
|
||||
(void *) &rapl_model->pkg_events);
|
||||
|
||||
ret = perf_pmu_register(&rapl_pmus_pkg->pmu, "power", -1);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (rapl_model->core_events) {
|
||||
ret = init_rapl_pmus(&rapl_pmus_core, PERF_PMU_SCOPE_CORE,
|
||||
rapl_core_attr_groups,
|
||||
rapl_core_attr_update);
|
||||
if (ret) {
|
||||
pr_warn("power-core PMU initialization failed (%d)\n", ret);
|
||||
goto core_init_failed;
|
||||
}
|
||||
|
||||
rapl_pmus_core->cntr_mask = perf_msr_probe(rapl_model->rapl_core_msrs,
|
||||
PERF_RAPL_CORE_EVENTS_MAX, false,
|
||||
(void *) &rapl_model->core_events);
|
||||
|
||||
ret = perf_pmu_register(&rapl_pmus_core->pmu, "power_core", -1);
|
||||
if (ret) {
|
||||
pr_warn("power-core PMU registration failed (%d)\n", ret);
|
||||
cleanup_rapl_pmus(rapl_pmus_core);
|
||||
}
|
||||
}
|
||||
|
||||
core_init_failed:
|
||||
rapl_advertise();
|
||||
return 0;
|
||||
|
||||
out:
|
||||
pr_warn("Initialization failed (%d), disabled\n", ret);
|
||||
cleanup_rapl_pmus();
|
||||
cleanup_rapl_pmus(rapl_pmus_pkg);
|
||||
return ret;
|
||||
}
|
||||
module_init(rapl_pmu_init);
|
||||
|
||||
static void __exit intel_rapl_exit(void)
|
||||
{
|
||||
perf_pmu_unregister(&rapl_pmus->pmu);
|
||||
cleanup_rapl_pmus();
|
||||
if (rapl_pmus_core) {
|
||||
perf_pmu_unregister(&rapl_pmus_core->pmu);
|
||||
cleanup_rapl_pmus(rapl_pmus_core);
|
||||
}
|
||||
perf_pmu_unregister(&rapl_pmus_pkg->pmu);
|
||||
cleanup_rapl_pmus(rapl_pmus_pkg);
|
||||
}
|
||||
module_exit(intel_rapl_exit);
|
||||
|
@ -41,6 +41,7 @@
|
||||
#define INTEL_FIXED_0_USER (1ULL << 1)
|
||||
#define INTEL_FIXED_0_ANYTHREAD (1ULL << 2)
|
||||
#define INTEL_FIXED_0_ENABLE_PMI (1ULL << 3)
|
||||
#define INTEL_FIXED_3_METRICS_CLEAR (1ULL << 2)
|
||||
|
||||
#define HSW_IN_TX (1ULL << 32)
|
||||
#define HSW_IN_TX_CHECKPOINTED (1ULL << 33)
|
||||
@ -372,6 +373,9 @@ static inline bool use_fixed_pseudo_encoding(u64 code)
|
||||
#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_MEM_BOUND
|
||||
#define INTEL_TD_METRIC_NUM 8
|
||||
|
||||
#define INTEL_TD_CFG_METRIC_CLEAR_BIT 0
|
||||
#define INTEL_TD_CFG_METRIC_CLEAR BIT_ULL(INTEL_TD_CFG_METRIC_CLEAR_BIT)
|
||||
|
||||
static inline bool is_metric_idx(int idx)
|
||||
{
|
||||
return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM;
|
||||
@ -422,7 +426,9 @@ static inline bool is_topdown_idx(int idx)
|
||||
*/
|
||||
|
||||
struct pebs_basic {
|
||||
u64 format_size;
|
||||
u64 format_group:32,
|
||||
retire_latency:16,
|
||||
format_size:16;
|
||||
u64 ip;
|
||||
u64 applicable_counters;
|
||||
u64 tsc;
|
||||
@ -431,7 +437,17 @@ struct pebs_basic {
|
||||
struct pebs_meminfo {
|
||||
u64 address;
|
||||
u64 aux;
|
||||
u64 latency;
|
||||
union {
|
||||
/* pre Alder Lake */
|
||||
u64 mem_latency;
|
||||
/* Alder Lake and later */
|
||||
struct {
|
||||
u64 instr_latency:16;
|
||||
u64 pad2:16;
|
||||
u64 cache_latency:16;
|
||||
u64 pad3:16;
|
||||
};
|
||||
};
|
||||
u64 tsx_tuning;
|
||||
};
|
||||
|
||||
|
@ -98,6 +98,7 @@ struct cpuinfo_topology {
|
||||
// Logical ID mappings
|
||||
u32 logical_pkg_id;
|
||||
u32 logical_die_id;
|
||||
u32 logical_core_id;
|
||||
|
||||
// AMD Node ID and Nodes per Package info
|
||||
u32 amd_node_id;
|
||||
|
@ -143,6 +143,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu);
|
||||
#define topology_logical_package_id(cpu) (cpu_data(cpu).topo.logical_pkg_id)
|
||||
#define topology_physical_package_id(cpu) (cpu_data(cpu).topo.pkg_id)
|
||||
#define topology_logical_die_id(cpu) (cpu_data(cpu).topo.logical_die_id)
|
||||
#define topology_logical_core_id(cpu) (cpu_data(cpu).topo.logical_core_id)
|
||||
#define topology_die_id(cpu) (cpu_data(cpu).topo.die_id)
|
||||
#define topology_core_id(cpu) (cpu_data(cpu).topo.core_id)
|
||||
#define topology_ppin(cpu) (cpu_data(cpu).ppin)
|
||||
|
@ -25,6 +25,7 @@ static int cpu_debug_show(struct seq_file *m, void *p)
|
||||
seq_printf(m, "cpu_type: %s\n", get_topology_cpu_type_name(c));
|
||||
seq_printf(m, "logical_pkg_id: %u\n", c->topo.logical_pkg_id);
|
||||
seq_printf(m, "logical_die_id: %u\n", c->topo.logical_die_id);
|
||||
seq_printf(m, "logical_core_id: %u\n", c->topo.logical_core_id);
|
||||
seq_printf(m, "llc_id: %u\n", c->topo.llc_id);
|
||||
seq_printf(m, "l2c_id: %u\n", c->topo.l2c_id);
|
||||
seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id);
|
||||
|
@ -185,6 +185,7 @@ static void topo_set_ids(struct topo_scan *tscan, bool early)
|
||||
if (!early) {
|
||||
c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN);
|
||||
c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN);
|
||||
c->topo.logical_core_id = topology_get_logical_id(apicid, TOPO_CORE_DOMAIN);
|
||||
}
|
||||
|
||||
/* Package relative core ID */
|
||||
|
@ -711,7 +711,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
|
||||
* we don't rely on for anything - the mm_lock_seq read against which we
|
||||
* need ordering is below.
|
||||
*/
|
||||
if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq))
|
||||
if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq.sequence))
|
||||
return false;
|
||||
|
||||
if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0))
|
||||
@ -728,7 +728,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
|
||||
* after it has been unlocked.
|
||||
* This pairs with RELEASE semantics in vma_end_write_all().
|
||||
*/
|
||||
if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) {
|
||||
if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&vma->vm_mm->mm_lock_seq))) {
|
||||
up_read(&vma->vm_lock->lock);
|
||||
return false;
|
||||
}
|
||||
@ -743,7 +743,7 @@ static inline void vma_end_read(struct vm_area_struct *vma)
|
||||
}
|
||||
|
||||
/* WARNING! Can only be used if mmap_lock is expected to be write-locked */
|
||||
static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
|
||||
static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
|
||||
{
|
||||
mmap_assert_write_locked(vma->vm_mm);
|
||||
|
||||
@ -751,7 +751,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
|
||||
* current task is holding mmap_write_lock, both vma->vm_lock_seq and
|
||||
* mm->mm_lock_seq can't be concurrently modified.
|
||||
*/
|
||||
*mm_lock_seq = vma->vm_mm->mm_lock_seq;
|
||||
*mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence;
|
||||
return (vma->vm_lock_seq == *mm_lock_seq);
|
||||
}
|
||||
|
||||
@ -762,7 +762,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
|
||||
*/
|
||||
static inline void vma_start_write(struct vm_area_struct *vma)
|
||||
{
|
||||
int mm_lock_seq;
|
||||
unsigned int mm_lock_seq;
|
||||
|
||||
if (__is_vma_write_locked(vma, &mm_lock_seq))
|
||||
return;
|
||||
@ -780,7 +780,7 @@ static inline void vma_start_write(struct vm_area_struct *vma)
|
||||
|
||||
static inline void vma_assert_write_locked(struct vm_area_struct *vma)
|
||||
{
|
||||
int mm_lock_seq;
|
||||
unsigned int mm_lock_seq;
|
||||
|
||||
VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
|
||||
}
|
||||
|
@ -727,7 +727,7 @@ struct vm_area_struct {
|
||||
* counter reuse can only lead to occasional unnecessary use of the
|
||||
* slowpath.
|
||||
*/
|
||||
int vm_lock_seq;
|
||||
unsigned int vm_lock_seq;
|
||||
/* Unstable RCU readers are allowed to read this. */
|
||||
struct vma_lock *vm_lock;
|
||||
#endif
|
||||
@ -921,6 +921,9 @@ struct mm_struct {
|
||||
* Roughly speaking, incrementing the sequence number is
|
||||
* equivalent to releasing locks on VMAs; reading the sequence
|
||||
* number can be part of taking a read lock on a VMA.
|
||||
* Incremented every time mmap_lock is write-locked/unlocked.
|
||||
* Initialized to 0, therefore odd values indicate mmap_lock
|
||||
* is write-locked and even values that it's released.
|
||||
*
|
||||
* Can be modified under write mmap_lock using RELEASE
|
||||
* semantics.
|
||||
@ -929,7 +932,7 @@ struct mm_struct {
|
||||
* Can be read with ACQUIRE semantics if not holding write
|
||||
* mmap_lock.
|
||||
*/
|
||||
int mm_lock_seq;
|
||||
seqcount_t mm_lock_seq;
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -71,6 +71,91 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PER_VMA_LOCK
|
||||
|
||||
static inline void mm_lock_seqcount_init(struct mm_struct *mm)
|
||||
{
|
||||
seqcount_init(&mm->mm_lock_seq);
|
||||
}
|
||||
|
||||
static inline void mm_lock_seqcount_begin(struct mm_struct *mm)
|
||||
{
|
||||
do_raw_write_seqcount_begin(&mm->mm_lock_seq);
|
||||
}
|
||||
|
||||
static inline void mm_lock_seqcount_end(struct mm_struct *mm)
|
||||
{
|
||||
ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq);
|
||||
do_raw_write_seqcount_end(&mm->mm_lock_seq);
|
||||
}
|
||||
|
||||
static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
|
||||
{
|
||||
/*
|
||||
* Since mmap_lock is a sleeping lock, and waiting for it to become
|
||||
* unlocked is more or less equivalent with taking it ourselves, don't
|
||||
* bother with the speculative path if mmap_lock is already write-locked
|
||||
* and take the slow path, which takes the lock.
|
||||
*/
|
||||
return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq);
|
||||
}
|
||||
|
||||
static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
|
||||
{
|
||||
return read_seqcount_retry(&mm->mm_lock_seq, seq);
|
||||
}
|
||||
|
||||
#else /* CONFIG_PER_VMA_LOCK */
|
||||
|
||||
static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
|
||||
static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
|
||||
static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
|
||||
|
||||
static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_PER_VMA_LOCK */
|
||||
|
||||
static inline void mmap_init_lock(struct mm_struct *mm)
|
||||
{
|
||||
init_rwsem(&mm->mmap_lock);
|
||||
mm_lock_seqcount_init(mm);
|
||||
}
|
||||
|
||||
static inline void mmap_write_lock(struct mm_struct *mm)
|
||||
{
|
||||
__mmap_lock_trace_start_locking(mm, true);
|
||||
down_write(&mm->mmap_lock);
|
||||
mm_lock_seqcount_begin(mm);
|
||||
__mmap_lock_trace_acquire_returned(mm, true, true);
|
||||
}
|
||||
|
||||
static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
|
||||
{
|
||||
__mmap_lock_trace_start_locking(mm, true);
|
||||
down_write_nested(&mm->mmap_lock, subclass);
|
||||
mm_lock_seqcount_begin(mm);
|
||||
__mmap_lock_trace_acquire_returned(mm, true, true);
|
||||
}
|
||||
|
||||
static inline int mmap_write_lock_killable(struct mm_struct *mm)
|
||||
{
|
||||
int ret;
|
||||
|
||||
__mmap_lock_trace_start_locking(mm, true);
|
||||
ret = down_write_killable(&mm->mmap_lock);
|
||||
if (!ret)
|
||||
mm_lock_seqcount_begin(mm);
|
||||
__mmap_lock_trace_acquire_returned(mm, true, ret == 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop all currently-held per-VMA locks.
|
||||
* This is called from the mmap_lock implementation directly before releasing
|
||||
@ -82,46 +167,7 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm)
|
||||
static inline void vma_end_write_all(struct mm_struct *mm)
|
||||
{
|
||||
mmap_assert_write_locked(mm);
|
||||
/*
|
||||
* Nobody can concurrently modify mm->mm_lock_seq due to exclusive
|
||||
* mmap_lock being held.
|
||||
* We need RELEASE semantics here to ensure that preceding stores into
|
||||
* the VMA take effect before we unlock it with this store.
|
||||
* Pairs with ACQUIRE semantics in vma_start_read().
|
||||
*/
|
||||
smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1);
|
||||
}
|
||||
#else
|
||||
static inline void vma_end_write_all(struct mm_struct *mm) {}
|
||||
#endif
|
||||
|
||||
static inline void mmap_init_lock(struct mm_struct *mm)
|
||||
{
|
||||
init_rwsem(&mm->mmap_lock);
|
||||
}
|
||||
|
||||
static inline void mmap_write_lock(struct mm_struct *mm)
|
||||
{
|
||||
__mmap_lock_trace_start_locking(mm, true);
|
||||
down_write(&mm->mmap_lock);
|
||||
__mmap_lock_trace_acquire_returned(mm, true, true);
|
||||
}
|
||||
|
||||
static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
|
||||
{
|
||||
__mmap_lock_trace_start_locking(mm, true);
|
||||
down_write_nested(&mm->mmap_lock, subclass);
|
||||
__mmap_lock_trace_acquire_returned(mm, true, true);
|
||||
}
|
||||
|
||||
static inline int mmap_write_lock_killable(struct mm_struct *mm)
|
||||
{
|
||||
int ret;
|
||||
|
||||
__mmap_lock_trace_start_locking(mm, true);
|
||||
ret = down_write_killable(&mm->mmap_lock);
|
||||
__mmap_lock_trace_acquire_returned(mm, true, ret == 0);
|
||||
return ret;
|
||||
mm_lock_seqcount_end(mm);
|
||||
}
|
||||
|
||||
static inline void mmap_write_unlock(struct mm_struct *mm)
|
||||
|
@ -1279,6 +1279,11 @@ static inline void perf_sample_save_callchain(struct perf_sample_data *data,
|
||||
{
|
||||
int size = 1;
|
||||
|
||||
if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
|
||||
return;
|
||||
if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_CALLCHAIN))
|
||||
return;
|
||||
|
||||
data->callchain = perf_callchain(event, regs);
|
||||
size += data->callchain->nr;
|
||||
|
||||
@ -1287,12 +1292,18 @@ static inline void perf_sample_save_callchain(struct perf_sample_data *data,
|
||||
}
|
||||
|
||||
static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
|
||||
struct perf_event *event,
|
||||
struct perf_raw_record *raw)
|
||||
{
|
||||
struct perf_raw_frag *frag = &raw->frag;
|
||||
u32 sum = 0;
|
||||
int size;
|
||||
|
||||
if (!(event->attr.sample_type & PERF_SAMPLE_RAW))
|
||||
return;
|
||||
if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_RAW))
|
||||
return;
|
||||
|
||||
do {
|
||||
sum += frag->size;
|
||||
if (perf_raw_frag_last(frag))
|
||||
@ -1309,6 +1320,11 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
|
||||
data->sample_flags |= PERF_SAMPLE_RAW;
|
||||
}
|
||||
|
||||
static inline bool has_branch_stack(struct perf_event *event)
|
||||
{
|
||||
return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
|
||||
}
|
||||
|
||||
static inline void perf_sample_save_brstack(struct perf_sample_data *data,
|
||||
struct perf_event *event,
|
||||
struct perf_branch_stack *brs,
|
||||
@ -1316,6 +1332,11 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data,
|
||||
{
|
||||
int size = sizeof(u64); /* nr */
|
||||
|
||||
if (!has_branch_stack(event))
|
||||
return;
|
||||
if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_BRANCH_STACK))
|
||||
return;
|
||||
|
||||
if (branch_sample_hw_index(event))
|
||||
size += sizeof(u64);
|
||||
size += brs->nr * sizeof(struct perf_branch_entry);
|
||||
@ -1669,6 +1690,8 @@ static inline int perf_allow_tracepoint(struct perf_event_attr *attr)
|
||||
return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT);
|
||||
}
|
||||
|
||||
extern int perf_exclude_event(struct perf_event *event, struct pt_regs *regs);
|
||||
|
||||
extern void perf_event_init(void);
|
||||
extern void perf_tp_event(u16 event_type, u64 count, void *record,
|
||||
int entry_size, struct pt_regs *regs,
|
||||
@ -1705,11 +1728,6 @@ static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
|
||||
# define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs)
|
||||
#endif
|
||||
|
||||
static inline bool has_branch_stack(struct perf_event *event)
|
||||
{
|
||||
return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
|
||||
}
|
||||
|
||||
static inline bool needs_branch_stack(struct perf_event *event)
|
||||
{
|
||||
return event->attr.branch_sample_type != 0;
|
||||
@ -1879,6 +1897,10 @@ static inline u64 perf_event_pause(struct perf_event *event, bool reset)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline int perf_exclude_event(struct perf_event *event, struct pt_regs *regs)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
|
||||
|
@ -318,6 +318,28 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex)
|
||||
__seq; \
|
||||
})
|
||||
|
||||
/**
|
||||
* raw_seqcount_try_begin() - begin a seqcount_t read critical section
|
||||
* w/o lockdep and w/o counter stabilization
|
||||
* @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
|
||||
*
|
||||
* Similar to raw_seqcount_begin(), except it enables eliding the critical
|
||||
* section entirely if odd, instead of doing the speculation knowing it will
|
||||
* fail.
|
||||
*
|
||||
* Useful when counter stabilization is more or less equivalent to taking
|
||||
* the lock and there is a slowpath that does that.
|
||||
*
|
||||
* If true, start will be set to the (even) sequence count read.
|
||||
*
|
||||
* Return: true when a read critical section is started.
|
||||
*/
|
||||
#define raw_seqcount_try_begin(s, start) \
|
||||
({ \
|
||||
start = raw_read_seqcount(s); \
|
||||
!(start & 1); \
|
||||
})
|
||||
|
||||
/**
|
||||
* raw_seqcount_begin() - begin a seqcount_t read critical section w/o
|
||||
* lockdep and w/o counter stabilization
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/seqlock.h>
|
||||
|
||||
struct uprobe;
|
||||
struct vm_area_struct;
|
||||
@ -124,6 +125,10 @@ struct uprobe_task {
|
||||
unsigned int depth;
|
||||
struct return_instance *return_instances;
|
||||
|
||||
struct return_instance *ri_pool;
|
||||
struct timer_list ri_timer;
|
||||
seqcount_t ri_seqcount;
|
||||
|
||||
union {
|
||||
struct {
|
||||
struct arch_uprobe_task autask;
|
||||
@ -137,7 +142,6 @@ struct uprobe_task {
|
||||
};
|
||||
|
||||
struct uprobe *active_uprobe;
|
||||
struct timer_list ri_timer;
|
||||
unsigned long xol_vaddr;
|
||||
|
||||
struct arch_uprobe *auprobe;
|
||||
@ -154,12 +158,18 @@ struct return_instance {
|
||||
unsigned long stack; /* stack pointer */
|
||||
unsigned long orig_ret_vaddr; /* original return address */
|
||||
bool chained; /* true, if instance is nested */
|
||||
int consumers_cnt;
|
||||
int cons_cnt; /* total number of session consumers */
|
||||
|
||||
struct return_instance *next; /* keep as stack */
|
||||
struct rcu_head rcu;
|
||||
|
||||
struct return_consumer consumers[] __counted_by(consumers_cnt);
|
||||
/* singular pre-allocated return_consumer instance for common case */
|
||||
struct return_consumer consumer;
|
||||
/*
|
||||
* extra return_consumer instances for rare cases of multiple session consumers,
|
||||
* contains (cons_cnt - 1) elements
|
||||
*/
|
||||
struct return_consumer *extra_consumers;
|
||||
} ____cacheline_aligned;
|
||||
|
||||
enum rp_check {
|
||||
|
@ -6277,41 +6277,6 @@ unlock:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_event_update_userpage);
|
||||
|
||||
static vm_fault_t perf_mmap_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct perf_event *event = vmf->vma->vm_file->private_data;
|
||||
struct perf_buffer *rb;
|
||||
vm_fault_t ret = VM_FAULT_SIGBUS;
|
||||
|
||||
if (vmf->flags & FAULT_FLAG_MKWRITE) {
|
||||
if (vmf->pgoff == 0)
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
rb = rcu_dereference(event->rb);
|
||||
if (!rb)
|
||||
goto unlock;
|
||||
|
||||
if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
|
||||
goto unlock;
|
||||
|
||||
vmf->page = perf_mmap_to_page(rb, vmf->pgoff);
|
||||
if (!vmf->page)
|
||||
goto unlock;
|
||||
|
||||
get_page(vmf->page);
|
||||
vmf->page->mapping = vmf->vma->vm_file->f_mapping;
|
||||
vmf->page->index = vmf->pgoff;
|
||||
|
||||
ret = 0;
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ring_buffer_attach(struct perf_event *event,
|
||||
struct perf_buffer *rb)
|
||||
{
|
||||
@ -6551,13 +6516,87 @@ out_put:
|
||||
ring_buffer_put(rb); /* could be last */
|
||||
}
|
||||
|
||||
static vm_fault_t perf_mmap_pfn_mkwrite(struct vm_fault *vmf)
|
||||
{
|
||||
/* The first page is the user control page, others are read-only. */
|
||||
return vmf->pgoff == 0 ? 0 : VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct perf_mmap_vmops = {
|
||||
.open = perf_mmap_open,
|
||||
.close = perf_mmap_close, /* non mergeable */
|
||||
.fault = perf_mmap_fault,
|
||||
.page_mkwrite = perf_mmap_fault,
|
||||
.pfn_mkwrite = perf_mmap_pfn_mkwrite,
|
||||
};
|
||||
|
||||
static int map_range(struct perf_buffer *rb, struct vm_area_struct *vma)
|
||||
{
|
||||
unsigned long nr_pages = vma_pages(vma);
|
||||
int err = 0;
|
||||
unsigned long pagenum;
|
||||
|
||||
/*
|
||||
* We map this as a VM_PFNMAP VMA.
|
||||
*
|
||||
* This is not ideal as this is designed broadly for mappings of PFNs
|
||||
* referencing memory-mapped I/O ranges or non-system RAM i.e. for which
|
||||
* !pfn_valid(pfn).
|
||||
*
|
||||
* We are mapping kernel-allocated memory (memory we manage ourselves)
|
||||
* which would more ideally be mapped using vm_insert_page() or a
|
||||
* similar mechanism, that is as a VM_MIXEDMAP mapping.
|
||||
*
|
||||
* However this won't work here, because:
|
||||
*
|
||||
* 1. It uses vma->vm_page_prot, but this field has not been completely
|
||||
* setup at the point of the f_op->mmp() hook, so we are unable to
|
||||
* indicate that this should be mapped CoW in order that the
|
||||
* mkwrite() hook can be invoked to make the first page R/W and the
|
||||
* rest R/O as desired.
|
||||
*
|
||||
* 2. Anything other than a VM_PFNMAP of valid PFNs will result in
|
||||
* vm_normal_page() returning a struct page * pointer, which means
|
||||
* vm_ops->page_mkwrite() will be invoked rather than
|
||||
* vm_ops->pfn_mkwrite(), and this means we have to set page->mapping
|
||||
* to work around retry logic in the fault handler, however this
|
||||
* field is no longer allowed to be used within struct page.
|
||||
*
|
||||
* 3. Having a struct page * made available in the fault logic also
|
||||
* means that the page gets put on the rmap and becomes
|
||||
* inappropriately accessible and subject to map and ref counting.
|
||||
*
|
||||
* Ideally we would have a mechanism that could explicitly express our
|
||||
* desires, but this is not currently the case, so we instead use
|
||||
* VM_PFNMAP.
|
||||
*
|
||||
* We manage the lifetime of these mappings with internal refcounts (see
|
||||
* perf_mmap_open() and perf_mmap_close()) so we ensure the lifetime of
|
||||
* this mapping is maintained correctly.
|
||||
*/
|
||||
for (pagenum = 0; pagenum < nr_pages; pagenum++) {
|
||||
unsigned long va = vma->vm_start + PAGE_SIZE * pagenum;
|
||||
struct page *page = perf_mmap_to_page(rb, vma->vm_pgoff + pagenum);
|
||||
|
||||
if (page == NULL) {
|
||||
err = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Map readonly, perf_mmap_pfn_mkwrite() called on write fault. */
|
||||
err = remap_pfn_range(vma, va, page_to_pfn(page), PAGE_SIZE,
|
||||
vm_get_page_prot(vma->vm_flags & ~VM_SHARED));
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
/* Clear any partial mappings on error. */
|
||||
if (err)
|
||||
zap_page_range_single(vma, vma->vm_start, nr_pages * PAGE_SIZE, NULL);
|
||||
#endif
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int perf_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
struct perf_event *event = file->private_data;
|
||||
@ -6682,6 +6721,8 @@ again:
|
||||
goto again;
|
||||
}
|
||||
|
||||
/* We need the rb to map pages. */
|
||||
rb = event->rb;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
@ -6776,6 +6817,9 @@ aux_unlock:
|
||||
vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
|
||||
vma->vm_ops = &perf_mmap_vmops;
|
||||
|
||||
if (!ret)
|
||||
ret = map_range(rb, vma);
|
||||
|
||||
if (event->pmu->event_mapped)
|
||||
event->pmu->event_mapped(event, vma->vm_mm);
|
||||
|
||||
@ -10039,8 +10083,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
|
||||
perf_swevent_overflow(event, 0, data, regs);
|
||||
}
|
||||
|
||||
static int perf_exclude_event(struct perf_event *event,
|
||||
struct pt_regs *regs)
|
||||
int perf_exclude_event(struct perf_event *event, struct pt_regs *regs)
|
||||
{
|
||||
if (event->hw.state & PERF_HES_STOPPED)
|
||||
return 1;
|
||||
@ -10425,9 +10468,9 @@ static struct pmu perf_tracepoint = {
|
||||
};
|
||||
|
||||
static int perf_tp_filter_match(struct perf_event *event,
|
||||
struct perf_sample_data *data)
|
||||
struct perf_raw_record *raw)
|
||||
{
|
||||
void *record = data->raw->frag.data;
|
||||
void *record = raw->frag.data;
|
||||
|
||||
/* only top level events have filters set */
|
||||
if (event->parent)
|
||||
@ -10439,7 +10482,7 @@ static int perf_tp_filter_match(struct perf_event *event,
|
||||
}
|
||||
|
||||
static int perf_tp_event_match(struct perf_event *event,
|
||||
struct perf_sample_data *data,
|
||||
struct perf_raw_record *raw,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
if (event->hw.state & PERF_HES_STOPPED)
|
||||
@ -10450,7 +10493,7 @@ static int perf_tp_event_match(struct perf_event *event,
|
||||
if (event->attr.exclude_kernel && !user_mode(regs))
|
||||
return 0;
|
||||
|
||||
if (!perf_tp_filter_match(event, data))
|
||||
if (!perf_tp_filter_match(event, raw))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
@ -10476,6 +10519,7 @@ EXPORT_SYMBOL_GPL(perf_trace_run_bpf_submit);
|
||||
static void __perf_tp_event_target_task(u64 count, void *record,
|
||||
struct pt_regs *regs,
|
||||
struct perf_sample_data *data,
|
||||
struct perf_raw_record *raw,
|
||||
struct perf_event *event)
|
||||
{
|
||||
struct trace_entry *entry = record;
|
||||
@ -10485,13 +10529,17 @@ static void __perf_tp_event_target_task(u64 count, void *record,
|
||||
/* Cannot deliver synchronous signal to other task. */
|
||||
if (event->attr.sigtrap)
|
||||
return;
|
||||
if (perf_tp_event_match(event, data, regs))
|
||||
if (perf_tp_event_match(event, raw, regs)) {
|
||||
perf_sample_data_init(data, 0, 0);
|
||||
perf_sample_save_raw_data(data, event, raw);
|
||||
perf_swevent_event(event, count, data, regs);
|
||||
}
|
||||
}
|
||||
|
||||
static void perf_tp_event_target_task(u64 count, void *record,
|
||||
struct pt_regs *regs,
|
||||
struct perf_sample_data *data,
|
||||
struct perf_raw_record *raw,
|
||||
struct perf_event_context *ctx)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
@ -10499,15 +10547,15 @@ static void perf_tp_event_target_task(u64 count, void *record,
|
||||
struct perf_event *event, *sibling;
|
||||
|
||||
perf_event_groups_for_cpu_pmu(event, &ctx->pinned_groups, cpu, pmu) {
|
||||
__perf_tp_event_target_task(count, record, regs, data, event);
|
||||
__perf_tp_event_target_task(count, record, regs, data, raw, event);
|
||||
for_each_sibling_event(sibling, event)
|
||||
__perf_tp_event_target_task(count, record, regs, data, sibling);
|
||||
__perf_tp_event_target_task(count, record, regs, data, raw, sibling);
|
||||
}
|
||||
|
||||
perf_event_groups_for_cpu_pmu(event, &ctx->flexible_groups, cpu, pmu) {
|
||||
__perf_tp_event_target_task(count, record, regs, data, event);
|
||||
__perf_tp_event_target_task(count, record, regs, data, raw, event);
|
||||
for_each_sibling_event(sibling, event)
|
||||
__perf_tp_event_target_task(count, record, regs, data, sibling);
|
||||
__perf_tp_event_target_task(count, record, regs, data, raw, sibling);
|
||||
}
|
||||
}
|
||||
|
||||
@ -10525,15 +10573,10 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
|
||||
},
|
||||
};
|
||||
|
||||
perf_sample_data_init(&data, 0, 0);
|
||||
perf_sample_save_raw_data(&data, &raw);
|
||||
|
||||
perf_trace_buf_update(record, event_type);
|
||||
|
||||
hlist_for_each_entry_rcu(event, head, hlist_entry) {
|
||||
if (perf_tp_event_match(event, &data, regs)) {
|
||||
perf_swevent_event(event, count, &data, regs);
|
||||
|
||||
if (perf_tp_event_match(event, &raw, regs)) {
|
||||
/*
|
||||
* Here use the same on-stack perf_sample_data,
|
||||
* some members in data are event-specific and
|
||||
@ -10543,7 +10586,8 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
|
||||
* because data->sample_flags is set.
|
||||
*/
|
||||
perf_sample_data_init(&data, 0, 0);
|
||||
perf_sample_save_raw_data(&data, &raw);
|
||||
perf_sample_save_raw_data(&data, event, &raw);
|
||||
perf_swevent_event(event, count, &data, regs);
|
||||
}
|
||||
}
|
||||
|
||||
@ -10560,7 +10604,7 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
|
||||
goto unlock;
|
||||
|
||||
raw_spin_lock(&ctx->lock);
|
||||
perf_tp_event_target_task(count, record, regs, &data, ctx);
|
||||
perf_tp_event_target_task(count, record, regs, &data, &raw, ctx);
|
||||
raw_spin_unlock(&ctx->lock);
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
|
@ -643,7 +643,6 @@ static void rb_free_aux_page(struct perf_buffer *rb, int idx)
|
||||
struct page *page = virt_to_page(rb->aux_pages[idx]);
|
||||
|
||||
ClearPagePrivate(page);
|
||||
page->mapping = NULL;
|
||||
__free_page(page);
|
||||
}
|
||||
|
||||
@ -819,7 +818,6 @@ static void perf_mmap_free_page(void *addr)
|
||||
{
|
||||
struct page *page = virt_to_page(addr);
|
||||
|
||||
page->mapping = NULL;
|
||||
__free_page(page);
|
||||
}
|
||||
|
||||
@ -890,28 +888,13 @@ __perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff)
|
||||
return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
|
||||
}
|
||||
|
||||
static void perf_mmap_unmark_page(void *addr)
|
||||
{
|
||||
struct page *page = vmalloc_to_page(addr);
|
||||
|
||||
page->mapping = NULL;
|
||||
}
|
||||
|
||||
static void rb_free_work(struct work_struct *work)
|
||||
{
|
||||
struct perf_buffer *rb;
|
||||
void *base;
|
||||
int i, nr;
|
||||
|
||||
rb = container_of(work, struct perf_buffer, work);
|
||||
nr = data_page_nr(rb);
|
||||
|
||||
base = rb->user_page;
|
||||
/* The '<=' counts in the user page. */
|
||||
for (i = 0; i <= nr; i++)
|
||||
perf_mmap_unmark_page(base + (i * PAGE_SIZE));
|
||||
|
||||
vfree(base);
|
||||
vfree(rb->user_page);
|
||||
kfree(rb);
|
||||
}
|
||||
|
||||
|
@ -1888,9 +1888,33 @@ unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
|
||||
return instruction_pointer(regs);
|
||||
}
|
||||
|
||||
static struct return_instance *free_ret_instance(struct return_instance *ri, bool cleanup_hprobe)
|
||||
static void ri_pool_push(struct uprobe_task *utask, struct return_instance *ri)
|
||||
{
|
||||
struct return_instance *next = ri->next;
|
||||
ri->cons_cnt = 0;
|
||||
ri->next = utask->ri_pool;
|
||||
utask->ri_pool = ri;
|
||||
}
|
||||
|
||||
static struct return_instance *ri_pool_pop(struct uprobe_task *utask)
|
||||
{
|
||||
struct return_instance *ri = utask->ri_pool;
|
||||
|
||||
if (likely(ri))
|
||||
utask->ri_pool = ri->next;
|
||||
|
||||
return ri;
|
||||
}
|
||||
|
||||
static void ri_free(struct return_instance *ri)
|
||||
{
|
||||
kfree(ri->extra_consumers);
|
||||
kfree_rcu(ri, rcu);
|
||||
}
|
||||
|
||||
static void free_ret_instance(struct uprobe_task *utask,
|
||||
struct return_instance *ri, bool cleanup_hprobe)
|
||||
{
|
||||
unsigned seq;
|
||||
|
||||
if (cleanup_hprobe) {
|
||||
enum hprobe_state hstate;
|
||||
@ -1899,8 +1923,22 @@ static struct return_instance *free_ret_instance(struct return_instance *ri, boo
|
||||
hprobe_finalize(&ri->hprobe, hstate);
|
||||
}
|
||||
|
||||
kfree_rcu(ri, rcu);
|
||||
return next;
|
||||
/*
|
||||
* At this point return_instance is unlinked from utask's
|
||||
* return_instances list and this has become visible to ri_timer().
|
||||
* If seqcount now indicates that ri_timer's return instance
|
||||
* processing loop isn't active, we can return ri into the pool of
|
||||
* to-be-reused return instances for future uretprobes. If ri_timer()
|
||||
* happens to be running right now, though, we fallback to safety and
|
||||
* just perform RCU-delated freeing of ri.
|
||||
*/
|
||||
if (raw_seqcount_try_begin(&utask->ri_seqcount, seq)) {
|
||||
/* immediate reuse of ri without RCU GP is OK */
|
||||
ri_pool_push(utask, ri);
|
||||
} else {
|
||||
/* we might be racing with ri_timer(), so play it safe */
|
||||
ri_free(ri);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1910,7 +1948,7 @@ static struct return_instance *free_ret_instance(struct return_instance *ri, boo
|
||||
void uprobe_free_utask(struct task_struct *t)
|
||||
{
|
||||
struct uprobe_task *utask = t->utask;
|
||||
struct return_instance *ri;
|
||||
struct return_instance *ri, *ri_next;
|
||||
|
||||
if (!utask)
|
||||
return;
|
||||
@ -1921,8 +1959,19 @@ void uprobe_free_utask(struct task_struct *t)
|
||||
timer_delete_sync(&utask->ri_timer);
|
||||
|
||||
ri = utask->return_instances;
|
||||
while (ri)
|
||||
ri = free_ret_instance(ri, true /* cleanup_hprobe */);
|
||||
while (ri) {
|
||||
ri_next = ri->next;
|
||||
free_ret_instance(utask, ri, true /* cleanup_hprobe */);
|
||||
ri = ri_next;
|
||||
}
|
||||
|
||||
/* free_ret_instance() above might add to ri_pool, so this loop should come last */
|
||||
ri = utask->ri_pool;
|
||||
while (ri) {
|
||||
ri_next = ri->next;
|
||||
ri_free(ri);
|
||||
ri = ri_next;
|
||||
}
|
||||
|
||||
kfree(utask);
|
||||
}
|
||||
@ -1942,8 +1991,12 @@ static void ri_timer(struct timer_list *timer)
|
||||
/* RCU protects return_instance from freeing. */
|
||||
guard(rcu)();
|
||||
|
||||
write_seqcount_begin(&utask->ri_seqcount);
|
||||
|
||||
for_each_ret_instance_rcu(ri, utask->return_instances)
|
||||
hprobe_expire(&ri->hprobe, false);
|
||||
|
||||
write_seqcount_end(&utask->ri_seqcount);
|
||||
}
|
||||
|
||||
static struct uprobe_task *alloc_utask(void)
|
||||
@ -1955,6 +2008,7 @@ static struct uprobe_task *alloc_utask(void)
|
||||
return NULL;
|
||||
|
||||
timer_setup(&utask->ri_timer, ri_timer, 0);
|
||||
seqcount_init(&utask->ri_seqcount);
|
||||
|
||||
return utask;
|
||||
}
|
||||
@ -1974,32 +2028,40 @@ static struct uprobe_task *get_utask(void)
|
||||
return current->utask;
|
||||
}
|
||||
|
||||
static size_t ri_size(int consumers_cnt)
|
||||
static struct return_instance *alloc_return_instance(struct uprobe_task *utask)
|
||||
{
|
||||
struct return_instance *ri;
|
||||
|
||||
return sizeof(*ri) + sizeof(ri->consumers[0]) * consumers_cnt;
|
||||
}
|
||||
ri = ri_pool_pop(utask);
|
||||
if (ri)
|
||||
return ri;
|
||||
|
||||
#define DEF_CNT 4
|
||||
|
||||
static struct return_instance *alloc_return_instance(void)
|
||||
{
|
||||
struct return_instance *ri;
|
||||
|
||||
ri = kzalloc(ri_size(DEF_CNT), GFP_KERNEL);
|
||||
ri = kzalloc(sizeof(*ri), GFP_KERNEL);
|
||||
if (!ri)
|
||||
return ZERO_SIZE_PTR;
|
||||
|
||||
ri->consumers_cnt = DEF_CNT;
|
||||
return ri;
|
||||
}
|
||||
|
||||
static struct return_instance *dup_return_instance(struct return_instance *old)
|
||||
{
|
||||
size_t size = ri_size(old->consumers_cnt);
|
||||
struct return_instance *ri;
|
||||
|
||||
return kmemdup(old, size, GFP_KERNEL);
|
||||
ri = kmemdup(old, sizeof(*ri), GFP_KERNEL);
|
||||
if (!ri)
|
||||
return NULL;
|
||||
|
||||
if (unlikely(old->cons_cnt > 1)) {
|
||||
ri->extra_consumers = kmemdup(old->extra_consumers,
|
||||
sizeof(ri->extra_consumers[0]) * (old->cons_cnt - 1),
|
||||
GFP_KERNEL);
|
||||
if (!ri->extra_consumers) {
|
||||
kfree(ri);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return ri;
|
||||
}
|
||||
|
||||
static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
|
||||
@ -2108,14 +2170,17 @@ unsigned long uprobe_get_trampoline_vaddr(void)
|
||||
static void cleanup_return_instances(struct uprobe_task *utask, bool chained,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
struct return_instance *ri = utask->return_instances;
|
||||
struct return_instance *ri = utask->return_instances, *ri_next;
|
||||
enum rp_check ctx = chained ? RP_CHECK_CHAIN_CALL : RP_CHECK_CALL;
|
||||
|
||||
while (ri && !arch_uretprobe_is_alive(ri, ctx, regs)) {
|
||||
ri = free_ret_instance(ri, true /* cleanup_hprobe */);
|
||||
ri_next = ri->next;
|
||||
rcu_assign_pointer(utask->return_instances, ri_next);
|
||||
utask->depth--;
|
||||
|
||||
free_ret_instance(utask, ri, true /* cleanup_hprobe */);
|
||||
ri = ri_next;
|
||||
}
|
||||
rcu_assign_pointer(utask->return_instances, ri);
|
||||
}
|
||||
|
||||
static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs,
|
||||
@ -2180,7 +2245,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs,
|
||||
|
||||
return;
|
||||
free:
|
||||
kfree(ri);
|
||||
ri_free(ri);
|
||||
}
|
||||
|
||||
/* Prepare to single-step probed instruction out of line. */
|
||||
@ -2294,6 +2359,47 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
|
||||
return is_trap_insn(&opcode);
|
||||
}
|
||||
|
||||
static struct uprobe *find_active_uprobe_speculative(unsigned long bp_vaddr)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct uprobe *uprobe = NULL;
|
||||
struct vm_area_struct *vma;
|
||||
struct file *vm_file;
|
||||
loff_t offset;
|
||||
unsigned int seq;
|
||||
|
||||
guard(rcu)();
|
||||
|
||||
if (!mmap_lock_speculate_try_begin(mm, &seq))
|
||||
return NULL;
|
||||
|
||||
vma = vma_lookup(mm, bp_vaddr);
|
||||
if (!vma)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* vm_file memory can be reused for another instance of struct file,
|
||||
* but can't be freed from under us, so it's safe to read fields from
|
||||
* it, even if the values are some garbage values; ultimately
|
||||
* find_uprobe_rcu() + mmap_lock_speculation_end() check will ensure
|
||||
* that whatever we speculatively found is correct
|
||||
*/
|
||||
vm_file = READ_ONCE(vma->vm_file);
|
||||
if (!vm_file)
|
||||
return NULL;
|
||||
|
||||
offset = (loff_t)(vma->vm_pgoff << PAGE_SHIFT) + (bp_vaddr - vma->vm_start);
|
||||
uprobe = find_uprobe_rcu(vm_file->f_inode, offset);
|
||||
if (!uprobe)
|
||||
return NULL;
|
||||
|
||||
/* now double check that nothing about MM changed */
|
||||
if (mmap_lock_speculate_retry(mm, seq))
|
||||
return NULL;
|
||||
|
||||
return uprobe;
|
||||
}
|
||||
|
||||
/* assumes being inside RCU protected region */
|
||||
static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swbp)
|
||||
{
|
||||
@ -2301,10 +2407,14 @@ static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swb
|
||||
struct uprobe *uprobe = NULL;
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
uprobe = find_active_uprobe_speculative(bp_vaddr);
|
||||
if (uprobe)
|
||||
return uprobe;
|
||||
|
||||
mmap_read_lock(mm);
|
||||
vma = vma_lookup(mm, bp_vaddr);
|
||||
if (vma) {
|
||||
if (valid_vma(vma, false)) {
|
||||
if (vma->vm_file) {
|
||||
struct inode *inode = file_inode(vma->vm_file);
|
||||
loff_t offset = vaddr_to_offset(vma, bp_vaddr);
|
||||
|
||||
@ -2324,25 +2434,27 @@ static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swb
|
||||
return uprobe;
|
||||
}
|
||||
|
||||
static struct return_instance*
|
||||
push_consumer(struct return_instance *ri, int idx, __u64 id, __u64 cookie)
|
||||
static struct return_instance *push_consumer(struct return_instance *ri, __u64 id, __u64 cookie)
|
||||
{
|
||||
struct return_consumer *ric;
|
||||
|
||||
if (unlikely(ri == ZERO_SIZE_PTR))
|
||||
return ri;
|
||||
|
||||
if (unlikely(idx >= ri->consumers_cnt)) {
|
||||
struct return_instance *old_ri = ri;
|
||||
|
||||
ri->consumers_cnt += DEF_CNT;
|
||||
ri = krealloc(old_ri, ri_size(old_ri->consumers_cnt), GFP_KERNEL);
|
||||
if (!ri) {
|
||||
kfree(old_ri);
|
||||
if (unlikely(ri->cons_cnt > 0)) {
|
||||
ric = krealloc(ri->extra_consumers, sizeof(*ric) * ri->cons_cnt, GFP_KERNEL);
|
||||
if (!ric) {
|
||||
ri_free(ri);
|
||||
return ZERO_SIZE_PTR;
|
||||
}
|
||||
ri->extra_consumers = ric;
|
||||
}
|
||||
|
||||
ri->consumers[idx].id = id;
|
||||
ri->consumers[idx].cookie = cookie;
|
||||
ric = likely(ri->cons_cnt == 0) ? &ri->consumer : &ri->extra_consumers[ri->cons_cnt - 1];
|
||||
ric->id = id;
|
||||
ric->cookie = cookie;
|
||||
|
||||
ri->cons_cnt++;
|
||||
return ri;
|
||||
}
|
||||
|
||||
@ -2350,14 +2462,17 @@ static struct return_consumer *
|
||||
return_consumer_find(struct return_instance *ri, int *iter, int id)
|
||||
{
|
||||
struct return_consumer *ric;
|
||||
int idx = *iter;
|
||||
int idx;
|
||||
|
||||
for (ric = &ri->consumers[idx]; idx < ri->consumers_cnt; idx++, ric++) {
|
||||
for (idx = *iter; idx < ri->cons_cnt; idx++)
|
||||
{
|
||||
ric = likely(idx == 0) ? &ri->consumer : &ri->extra_consumers[idx - 1];
|
||||
if (ric->id == id) {
|
||||
*iter = idx + 1;
|
||||
return ric;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -2371,9 +2486,9 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
struct uprobe_consumer *uc;
|
||||
bool has_consumers = false, remove = true;
|
||||
struct return_instance *ri = NULL;
|
||||
int push_idx = 0;
|
||||
struct uprobe_task *utask = current->utask;
|
||||
|
||||
current->utask->auprobe = &uprobe->arch;
|
||||
utask->auprobe = &uprobe->arch;
|
||||
|
||||
list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
|
||||
bool session = uc->handler && uc->ret_handler;
|
||||
@ -2393,21 +2508,15 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
continue;
|
||||
|
||||
if (!ri)
|
||||
ri = alloc_return_instance();
|
||||
ri = alloc_return_instance(utask);
|
||||
|
||||
if (session)
|
||||
ri = push_consumer(ri, push_idx++, uc->id, cookie);
|
||||
ri = push_consumer(ri, uc->id, cookie);
|
||||
}
|
||||
current->utask->auprobe = NULL;
|
||||
utask->auprobe = NULL;
|
||||
|
||||
if (!ZERO_OR_NULL_PTR(ri)) {
|
||||
/*
|
||||
* The push_idx value has the final number of return consumers,
|
||||
* and ri->consumers_cnt has number of allocated consumers.
|
||||
*/
|
||||
ri->consumers_cnt = push_idx;
|
||||
if (!ZERO_OR_NULL_PTR(ri))
|
||||
prepare_uretprobe(uprobe, regs, ri);
|
||||
}
|
||||
|
||||
if (remove && has_consumers) {
|
||||
down_read(&uprobe->register_rwsem);
|
||||
@ -2461,7 +2570,7 @@ static struct return_instance *find_next_ret_chain(struct return_instance *ri)
|
||||
void uprobe_handle_trampoline(struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe_task *utask;
|
||||
struct return_instance *ri, *next;
|
||||
struct return_instance *ri, *ri_next, *next_chain;
|
||||
struct uprobe *uprobe;
|
||||
enum hprobe_state hstate;
|
||||
bool valid;
|
||||
@ -2481,8 +2590,8 @@ void uprobe_handle_trampoline(struct pt_regs *regs)
|
||||
* or NULL; the latter case means that nobody but ri->func
|
||||
* could hit this trampoline on return. TODO: sigaltstack().
|
||||
*/
|
||||
next = find_next_ret_chain(ri);
|
||||
valid = !next || arch_uretprobe_is_alive(next, RP_CHECK_RET, regs);
|
||||
next_chain = find_next_ret_chain(ri);
|
||||
valid = !next_chain || arch_uretprobe_is_alive(next_chain, RP_CHECK_RET, regs);
|
||||
|
||||
instruction_pointer_set(regs, ri->orig_ret_vaddr);
|
||||
do {
|
||||
@ -2494,7 +2603,9 @@ void uprobe_handle_trampoline(struct pt_regs *regs)
|
||||
* trampoline addresses on the stack are replaced with correct
|
||||
* original return addresses
|
||||
*/
|
||||
rcu_assign_pointer(utask->return_instances, ri->next);
|
||||
ri_next = ri->next;
|
||||
rcu_assign_pointer(utask->return_instances, ri_next);
|
||||
utask->depth--;
|
||||
|
||||
uprobe = hprobe_consume(&ri->hprobe, &hstate);
|
||||
if (valid)
|
||||
@ -2502,9 +2613,9 @@ void uprobe_handle_trampoline(struct pt_regs *regs)
|
||||
hprobe_finalize(&ri->hprobe, hstate);
|
||||
|
||||
/* We already took care of hprobe, no need to waste more time on that. */
|
||||
ri = free_ret_instance(ri, false /* !cleanup_hprobe */);
|
||||
utask->depth--;
|
||||
} while (ri != next);
|
||||
free_ret_instance(utask, ri, false /* !cleanup_hprobe */);
|
||||
ri = ri_next;
|
||||
} while (ri != next_chain);
|
||||
} while (!valid);
|
||||
|
||||
return;
|
||||
|
@ -448,7 +448,7 @@ static bool vma_lock_alloc(struct vm_area_struct *vma)
|
||||
return false;
|
||||
|
||||
init_rwsem(&vma->vm_lock->lock);
|
||||
vma->vm_lock_seq = -1;
|
||||
vma->vm_lock_seq = UINT_MAX;
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1262,9 +1262,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
|
||||
seqcount_init(&mm->write_protect_seq);
|
||||
mmap_init_lock(mm);
|
||||
INIT_LIST_HEAD(&mm->mmlist);
|
||||
#ifdef CONFIG_PER_VMA_LOCK
|
||||
mm->mm_lock_seq = 0;
|
||||
#endif
|
||||
mm_pgtables_bytes_init(mm);
|
||||
mm->map_count = 0;
|
||||
mm->locked_vm = 0;
|
||||
|
@ -619,7 +619,8 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
|
||||
|
||||
static __always_inline u64
|
||||
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
|
||||
u64 flags, struct perf_sample_data *sd)
|
||||
u64 flags, struct perf_raw_record *raw,
|
||||
struct perf_sample_data *sd)
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
unsigned int cpu = smp_processor_id();
|
||||
@ -644,6 +645,8 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
|
||||
if (unlikely(event->oncpu != cpu))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
perf_sample_save_raw_data(sd, event, raw);
|
||||
|
||||
return perf_event_output(event, sd, regs);
|
||||
}
|
||||
|
||||
@ -687,9 +690,8 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
|
||||
}
|
||||
|
||||
perf_sample_data_init(sd, 0, 0);
|
||||
perf_sample_save_raw_data(sd, &raw);
|
||||
|
||||
err = __bpf_perf_event_output(regs, map, flags, sd);
|
||||
err = __bpf_perf_event_output(regs, map, flags, &raw, sd);
|
||||
out:
|
||||
this_cpu_dec(bpf_trace_nest_level);
|
||||
preempt_enable();
|
||||
@ -748,9 +750,8 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
|
||||
|
||||
perf_fetch_caller_regs(regs);
|
||||
perf_sample_data_init(sd, 0, 0);
|
||||
perf_sample_save_raw_data(sd, &raw);
|
||||
|
||||
ret = __bpf_perf_event_output(regs, map, flags, sd);
|
||||
ret = __bpf_perf_event_output(regs, map, flags, &raw, sd);
|
||||
out:
|
||||
this_cpu_dec(bpf_event_output_nest_level);
|
||||
preempt_enable();
|
||||
|
3
mm/gup.c
3
mm/gup.c
@ -3360,8 +3360,7 @@ static unsigned long gup_fast(unsigned long start, unsigned long end,
|
||||
return 0;
|
||||
|
||||
if (gup_flags & FOLL_PIN) {
|
||||
seq = raw_read_seqcount(¤t->mm->write_protect_seq);
|
||||
if (seq & 1)
|
||||
if (!raw_seqcount_try_begin(¤t->mm->write_protect_seq, seq))
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -40,7 +40,7 @@ struct mm_struct init_mm = {
|
||||
.arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
|
||||
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
|
||||
#ifdef CONFIG_PER_VMA_LOCK
|
||||
.mm_lock_seq = 0,
|
||||
.mm_lock_seq = SEQCNT_ZERO(init_mm.mm_lock_seq),
|
||||
#endif
|
||||
.user_ns = &init_user_ns,
|
||||
.cpu_bitmap = CPU_BITS_NONE,
|
||||
|
@ -89,7 +89,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
|
||||
* begun. Linking to the tree will have caused this to be incremented,
|
||||
* which means we will get a false positive otherwise.
|
||||
*/
|
||||
vma->vm_lock_seq = -1;
|
||||
vma->vm_lock_seq = UINT_MAX;
|
||||
|
||||
return vma;
|
||||
}
|
||||
@ -214,7 +214,7 @@ static bool vma_write_started(struct vm_area_struct *vma)
|
||||
int seq = vma->vm_lock_seq;
|
||||
|
||||
/* We reset after each check. */
|
||||
vma->vm_lock_seq = -1;
|
||||
vma->vm_lock_seq = UINT_MAX;
|
||||
|
||||
/* The vma_start_write() stub simply increments this value. */
|
||||
return seq > -1;
|
||||
|
@ -241,7 +241,7 @@ struct vm_area_struct {
|
||||
* counter reuse can only lead to occasional unnecessary use of the
|
||||
* slowpath.
|
||||
*/
|
||||
int vm_lock_seq;
|
||||
unsigned int vm_lock_seq;
|
||||
struct vma_lock *vm_lock;
|
||||
#endif
|
||||
|
||||
@ -416,7 +416,7 @@ static inline bool vma_lock_alloc(struct vm_area_struct *vma)
|
||||
return false;
|
||||
|
||||
init_rwsem(&vma->vm_lock->lock);
|
||||
vma->vm_lock_seq = -1;
|
||||
vma->vm_lock_seq = UINT_MAX;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user