2017-11-07 17:30:07 +01:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
2014-09-30 14:48:25 +01:00
|
|
|
/*
|
|
|
|
* cacheinfo support - processor cache information via sysfs
|
|
|
|
*
|
|
|
|
* Based on arch/x86/kernel/cpu/intel_cacheinfo.c
|
|
|
|
* Author: Sudeep Holla <sudeep.holla@arm.com>
|
|
|
|
*/
|
2016-10-28 09:45:30 +01:00
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
|
2016-10-28 09:45:29 +01:00
|
|
|
#include <linux/acpi.h>
|
2014-09-30 14:48:25 +01:00
|
|
|
#include <linux/bitops.h>
|
|
|
|
#include <linux/cacheinfo.h>
|
|
|
|
#include <linux/compiler.h>
|
|
|
|
#include <linux/cpu.h>
|
|
|
|
#include <linux/device.h>
|
|
|
|
#include <linux/init.h>
|
2023-03-29 10:52:07 -05:00
|
|
|
#include <linux/of.h>
|
2014-09-30 14:48:25 +01:00
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/smp.h>
|
|
|
|
#include <linux/sysfs.h>
|
|
|
|
|
|
|
|
/* pointer to per cpu cacheinfo */
|
|
|
|
static DEFINE_PER_CPU(struct cpu_cacheinfo, ci_cpu_cacheinfo);
|
|
|
|
#define ci_cacheinfo(cpu) (&per_cpu(ci_cpu_cacheinfo, cpu))
|
|
|
|
#define cache_leaves(cpu) (ci_cacheinfo(cpu)->num_leaves)
|
|
|
|
#define per_cpu_cacheinfo(cpu) (ci_cacheinfo(cpu)->info_list)
|
2022-07-04 11:15:47 +01:00
|
|
|
#define per_cpu_cacheinfo_idx(cpu, idx) \
|
|
|
|
(per_cpu_cacheinfo(cpu) + (idx))
|
2014-09-30 14:48:25 +01:00
|
|
|
|
2023-04-14 10:14:52 +02:00
|
|
|
/* Set if no cache information is found in DT/ACPI. */
|
|
|
|
static bool use_arch_info;
|
|
|
|
|
2014-09-30 14:48:25 +01:00
|
|
|
struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu)
|
|
|
|
{
|
|
|
|
return ci_cacheinfo(cpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
|
|
|
|
struct cacheinfo *sib_leaf)
|
|
|
|
{
|
2022-07-04 11:15:48 +01:00
|
|
|
/*
|
|
|
|
* For non DT/ACPI systems, assume unique level 1 caches,
|
2023-04-14 10:14:49 +02:00
|
|
|
* system-wide shared caches for all other levels.
|
2022-07-04 11:15:48 +01:00
|
|
|
*/
|
2023-04-14 10:14:52 +02:00
|
|
|
if (!(IS_ENABLED(CONFIG_OF) || IS_ENABLED(CONFIG_ACPI)) ||
|
|
|
|
use_arch_info)
|
2023-04-14 10:14:49 +02:00
|
|
|
return (this_leaf->level != 1) && (sib_leaf->level != 1);
|
2022-07-04 11:15:48 +01:00
|
|
|
|
2022-07-04 11:15:51 +01:00
|
|
|
if ((sib_leaf->attributes & CACHE_ID) &&
|
|
|
|
(this_leaf->attributes & CACHE_ID))
|
|
|
|
return sib_leaf->id == this_leaf->id;
|
|
|
|
|
2018-05-11 18:57:58 -05:00
|
|
|
return sib_leaf->fw_token == this_leaf->fw_token;
|
2014-09-30 14:48:25 +01:00
|
|
|
}
|
2016-10-28 09:45:31 +01:00
|
|
|
|
2022-07-04 11:15:49 +01:00
|
|
|
bool last_level_cache_is_valid(unsigned int cpu)
|
|
|
|
{
|
|
|
|
struct cacheinfo *llc;
|
|
|
|
|
cacheinfo: Allocate memory during CPU hotplug if not done from the primary CPU
Commit
5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU")
adds functionality that architectures can use to optionally allocate and
build cacheinfo early during boot. Commit
6539cffa9495 ("cacheinfo: Add arch specific early level initializer")
lets secondary CPUs correct (and reallocate memory) cacheinfo data if
needed.
If the early build functionality is not used and cacheinfo does not need
correction, memory for cacheinfo is never allocated. x86 does not use
the early build functionality. Consequently, during the cacheinfo CPU
hotplug callback, last_level_cache_is_valid() attempts to dereference
a NULL pointer:
BUG: kernel NULL pointer dereference, address: 0000000000000100
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not present page
PGD 0 P4D 0
Oops: 0000 [#1] PREEPMT SMP NOPTI
CPU: 0 PID 19 Comm: cpuhp/0 Not tainted 6.4.0-rc2 #1
RIP: 0010: last_level_cache_is_valid+0x95/0xe0a
Allocate memory for cacheinfo during the cacheinfo CPU hotplug callback
if not done earlier.
Moreover, before determining the validity of the last-level cache info,
ensure that it has been allocated. Simply checking for non-zero
cache_leaves() is not sufficient, as some architectures (e.g., Intel
processors) have non-zero cache_leaves() before allocation.
Dereferencing NULL cacheinfo can occur in update_per_cpu_data_slice_size().
This function iterates over all online CPUs. However, a CPU may have come
online recently, but its cacheinfo may not have been allocated yet.
While here, remove an unnecessary indentation in allocate_cache_info().
[ bp: Massage. ]
Fixes: 6539cffa9495 ("cacheinfo: Add arch specific early level initializer")
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Cc: stable@vger.kernel.org # 6.3+
Link: https://lore.kernel.org/r/20241128002247.26726-2-ricardo.neri-calderon@linux.intel.com
2024-11-27 16:22:46 -08:00
|
|
|
if (!cache_leaves(cpu) || !per_cpu_cacheinfo(cpu))
|
2022-07-04 11:15:49 +01:00
|
|
|
return false;
|
|
|
|
|
|
|
|
llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
|
|
|
|
|
2022-07-04 11:15:51 +01:00
|
|
|
return (llc->attributes & CACHE_ID) || !!llc->fw_token;
|
|
|
|
|
2022-07-04 11:15:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y)
|
|
|
|
{
|
|
|
|
struct cacheinfo *llc_x, *llc_y;
|
|
|
|
|
|
|
|
if (!last_level_cache_is_valid(cpu_x) ||
|
|
|
|
!last_level_cache_is_valid(cpu_y))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
llc_x = per_cpu_cacheinfo_idx(cpu_x, cache_leaves(cpu_x) - 1);
|
|
|
|
llc_y = per_cpu_cacheinfo_idx(cpu_y, cache_leaves(cpu_y) - 1);
|
|
|
|
|
|
|
|
return cache_leaves_are_shared(llc_x, llc_y);
|
|
|
|
}
|
|
|
|
|
2022-07-04 11:15:48 +01:00
|
|
|
#ifdef CONFIG_OF
|
2023-04-14 10:14:50 +02:00
|
|
|
|
|
|
|
static bool of_check_cache_nodes(struct device_node *np);
|
|
|
|
|
2016-10-28 09:45:31 +01:00
|
|
|
/* OF properties to query for a given cache type */
|
|
|
|
struct cache_type_info {
|
|
|
|
const char *size_prop;
|
|
|
|
const char *line_size_props[2];
|
|
|
|
const char *nr_sets_prop;
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct cache_type_info cache_type_info[] = {
|
|
|
|
{
|
|
|
|
.size_prop = "cache-size",
|
|
|
|
.line_size_props = { "cache-line-size",
|
|
|
|
"cache-block-size", },
|
|
|
|
.nr_sets_prop = "cache-sets",
|
|
|
|
}, {
|
|
|
|
.size_prop = "i-cache-size",
|
|
|
|
.line_size_props = { "i-cache-line-size",
|
|
|
|
"i-cache-block-size", },
|
|
|
|
.nr_sets_prop = "i-cache-sets",
|
|
|
|
}, {
|
|
|
|
.size_prop = "d-cache-size",
|
|
|
|
.line_size_props = { "d-cache-line-size",
|
|
|
|
"d-cache-block-size", },
|
|
|
|
.nr_sets_prop = "d-cache-sets",
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline int get_cacheinfo_idx(enum cache_type type)
|
|
|
|
{
|
|
|
|
if (type == CACHE_TYPE_UNIFIED)
|
|
|
|
return 0;
|
|
|
|
return type;
|
|
|
|
}
|
|
|
|
|
2018-05-11 18:57:57 -05:00
|
|
|
static void cache_size(struct cacheinfo *this_leaf, struct device_node *np)
|
2016-10-28 09:45:31 +01:00
|
|
|
{
|
|
|
|
const char *propname;
|
|
|
|
int ct_idx;
|
|
|
|
|
|
|
|
ct_idx = get_cacheinfo_idx(this_leaf->type);
|
|
|
|
propname = cache_type_info[ct_idx].size_prop;
|
|
|
|
|
2018-12-19 16:16:03 +08:00
|
|
|
of_property_read_u32(np, propname, &this_leaf->size);
|
2016-10-28 09:45:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* not cache_line_size() because that's a macro in include/linux/cache.h */
|
2018-05-11 18:57:57 -05:00
|
|
|
static void cache_get_line_size(struct cacheinfo *this_leaf,
|
|
|
|
struct device_node *np)
|
2016-10-28 09:45:31 +01:00
|
|
|
{
|
|
|
|
int i, lim, ct_idx;
|
|
|
|
|
|
|
|
ct_idx = get_cacheinfo_idx(this_leaf->type);
|
|
|
|
lim = ARRAY_SIZE(cache_type_info[ct_idx].line_size_props);
|
|
|
|
|
|
|
|
for (i = 0; i < lim; i++) {
|
2018-07-06 13:50:31 +01:00
|
|
|
int ret;
|
|
|
|
u32 line_size;
|
2016-10-28 09:45:31 +01:00
|
|
|
const char *propname;
|
|
|
|
|
|
|
|
propname = cache_type_info[ct_idx].line_size_props[i];
|
2018-07-06 13:50:31 +01:00
|
|
|
ret = of_property_read_u32(np, propname, &line_size);
|
|
|
|
if (!ret) {
|
|
|
|
this_leaf->coherency_line_size = line_size;
|
2016-10-28 09:45:31 +01:00
|
|
|
break;
|
2018-07-06 13:50:31 +01:00
|
|
|
}
|
2016-10-28 09:45:31 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-11 18:57:57 -05:00
|
|
|
static void cache_nr_sets(struct cacheinfo *this_leaf, struct device_node *np)
|
2016-10-28 09:45:31 +01:00
|
|
|
{
|
|
|
|
const char *propname;
|
|
|
|
int ct_idx;
|
|
|
|
|
|
|
|
ct_idx = get_cacheinfo_idx(this_leaf->type);
|
|
|
|
propname = cache_type_info[ct_idx].nr_sets_prop;
|
|
|
|
|
2018-12-19 16:16:03 +08:00
|
|
|
of_property_read_u32(np, propname, &this_leaf->number_of_sets);
|
2016-10-28 09:45:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void cache_associativity(struct cacheinfo *this_leaf)
|
|
|
|
{
|
|
|
|
unsigned int line_size = this_leaf->coherency_line_size;
|
|
|
|
unsigned int nr_sets = this_leaf->number_of_sets;
|
|
|
|
unsigned int size = this_leaf->size;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the cache is fully associative, there is no need to
|
|
|
|
* check the other properties.
|
|
|
|
*/
|
|
|
|
if (!(nr_sets == 1) && (nr_sets > 0 && size > 0 && line_size > 0))
|
|
|
|
this_leaf->ways_of_associativity = (size / nr_sets) / line_size;
|
|
|
|
}
|
|
|
|
|
2018-05-11 18:57:57 -05:00
|
|
|
static bool cache_node_is_unified(struct cacheinfo *this_leaf,
|
|
|
|
struct device_node *np)
|
2017-11-17 11:56:41 +00:00
|
|
|
{
|
2018-05-11 18:57:57 -05:00
|
|
|
return of_property_read_bool(np, "cache-unified");
|
2017-11-17 11:56:41 +00:00
|
|
|
}
|
|
|
|
|
2018-05-11 18:57:57 -05:00
|
|
|
static void cache_of_set_props(struct cacheinfo *this_leaf,
|
|
|
|
struct device_node *np)
|
2016-10-28 09:45:31 +01:00
|
|
|
{
|
2018-05-11 18:57:57 -05:00
|
|
|
/*
|
|
|
|
* init_cache_level must setup the cache level correctly
|
|
|
|
* overriding the architecturally specified levels, so
|
|
|
|
* if type is NONE at this stage, it should be unified
|
|
|
|
*/
|
|
|
|
if (this_leaf->type == CACHE_TYPE_NOCACHE &&
|
|
|
|
cache_node_is_unified(this_leaf, np))
|
|
|
|
this_leaf->type = CACHE_TYPE_UNIFIED;
|
|
|
|
cache_size(this_leaf, np);
|
|
|
|
cache_get_line_size(this_leaf, np);
|
|
|
|
cache_nr_sets(this_leaf, np);
|
|
|
|
cache_associativity(this_leaf);
|
2016-10-28 09:45:31 +01:00
|
|
|
}
|
2018-05-11 18:57:56 -05:00
|
|
|
|
|
|
|
static int cache_setup_of_node(unsigned int cpu)
|
|
|
|
{
|
|
|
|
struct cacheinfo *this_leaf;
|
|
|
|
unsigned int index = 0;
|
|
|
|
|
2024-07-19 17:13:35 +02:00
|
|
|
struct device_node *np __free(device_node) = of_cpu_device_node_get(cpu);
|
2018-05-11 18:57:56 -05:00
|
|
|
if (!np) {
|
|
|
|
pr_err("Failed to find cpu%d device node\n", cpu);
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
2023-04-14 10:14:50 +02:00
|
|
|
if (!of_check_cache_nodes(np)) {
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
2018-05-11 18:57:56 -05:00
|
|
|
while (index < cache_leaves(cpu)) {
|
2022-07-04 11:15:47 +01:00
|
|
|
this_leaf = per_cpu_cacheinfo_idx(cpu, index);
|
2022-10-26 20:59:54 +02:00
|
|
|
if (this_leaf->level != 1) {
|
2024-07-19 17:13:35 +02:00
|
|
|
struct device_node *prev __free(device_node) = np;
|
2018-05-11 18:57:56 -05:00
|
|
|
np = of_find_next_cache_node(np);
|
2022-10-26 20:59:54 +02:00
|
|
|
if (!np)
|
|
|
|
break;
|
|
|
|
}
|
2018-05-11 18:57:57 -05:00
|
|
|
cache_of_set_props(this_leaf, np);
|
2018-05-11 18:57:58 -05:00
|
|
|
this_leaf->fw_token = np;
|
2018-05-11 18:57:56 -05:00
|
|
|
index++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (index != cache_leaves(cpu)) /* not all OF nodes populated */
|
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2023-01-04 19:30:24 +01:00
|
|
|
|
2023-04-14 10:14:50 +02:00
|
|
|
static bool of_check_cache_nodes(struct device_node *np)
|
|
|
|
{
|
|
|
|
if (of_property_present(np, "cache-size") ||
|
|
|
|
of_property_present(np, "i-cache-size") ||
|
|
|
|
of_property_present(np, "d-cache-size") ||
|
|
|
|
of_property_present(np, "cache-unified"))
|
|
|
|
return true;
|
|
|
|
|
2024-07-19 17:13:35 +02:00
|
|
|
struct device_node *next __free(device_node) = of_find_next_cache_node(np);
|
2023-04-14 10:14:50 +02:00
|
|
|
if (next) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2023-01-04 19:30:26 +01:00
|
|
|
static int of_count_cache_leaves(struct device_node *np)
|
2023-01-04 19:30:24 +01:00
|
|
|
{
|
2023-01-04 19:30:26 +01:00
|
|
|
unsigned int leaves = 0;
|
2023-01-04 19:30:24 +01:00
|
|
|
|
2024-11-04 13:03:42 -06:00
|
|
|
if (of_property_present(np, "cache-size"))
|
2023-01-04 19:30:24 +01:00
|
|
|
++leaves;
|
2024-11-04 13:03:42 -06:00
|
|
|
if (of_property_present(np, "i-cache-size"))
|
2023-01-04 19:30:24 +01:00
|
|
|
++leaves;
|
2024-11-04 13:03:42 -06:00
|
|
|
if (of_property_present(np, "d-cache-size"))
|
2023-01-04 19:30:24 +01:00
|
|
|
++leaves;
|
2023-01-04 19:30:26 +01:00
|
|
|
|
|
|
|
if (!leaves) {
|
|
|
|
/* The '[i-|d-|]cache-size' property is required, but
|
|
|
|
* if absent, fallback on the 'cache-unified' property.
|
|
|
|
*/
|
|
|
|
if (of_property_read_bool(np, "cache-unified"))
|
|
|
|
return 1;
|
|
|
|
else
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
return leaves;
|
|
|
|
}
|
|
|
|
|
|
|
|
int init_of_cache_level(unsigned int cpu)
|
|
|
|
{
|
|
|
|
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
|
2024-07-19 17:13:35 +02:00
|
|
|
struct device_node *np __free(device_node) = of_cpu_device_node_get(cpu);
|
2023-01-04 19:30:26 +01:00
|
|
|
unsigned int levels = 0, leaves, level;
|
|
|
|
|
2023-04-14 10:14:50 +02:00
|
|
|
if (!of_check_cache_nodes(np)) {
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
2023-01-04 19:30:26 +01:00
|
|
|
leaves = of_count_cache_leaves(np);
|
2023-01-04 19:30:24 +01:00
|
|
|
if (leaves > 0)
|
|
|
|
levels = 1;
|
|
|
|
|
2024-07-19 17:13:35 +02:00
|
|
|
while (1) {
|
|
|
|
struct device_node *prev __free(device_node) = np;
|
|
|
|
np = of_find_next_cache_node(np);
|
|
|
|
if (!np)
|
|
|
|
break;
|
|
|
|
|
2023-01-04 19:30:24 +01:00
|
|
|
if (!of_device_is_compatible(np, "cache"))
|
2024-07-19 17:13:35 +02:00
|
|
|
return -EINVAL;
|
2023-01-04 19:30:24 +01:00
|
|
|
if (of_property_read_u32(np, "cache-level", &level))
|
2024-07-19 17:13:35 +02:00
|
|
|
return -EINVAL;
|
2023-01-04 19:30:24 +01:00
|
|
|
if (level <= levels)
|
2024-07-19 17:13:35 +02:00
|
|
|
return -EINVAL;
|
2023-01-04 19:30:26 +01:00
|
|
|
|
|
|
|
leaves += of_count_cache_leaves(np);
|
2023-01-04 19:30:24 +01:00
|
|
|
levels = level;
|
|
|
|
}
|
|
|
|
|
|
|
|
this_cpu_ci->num_levels = levels;
|
|
|
|
this_cpu_ci->num_leaves = leaves;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-30 14:48:25 +01:00
|
|
|
#else
|
|
|
|
static inline int cache_setup_of_node(unsigned int cpu) { return 0; }
|
2023-01-04 19:30:24 +01:00
|
|
|
int init_of_cache_level(unsigned int cpu) { return 0; }
|
2014-09-30 14:48:25 +01:00
|
|
|
#endif
|
|
|
|
|
2018-05-11 18:58:02 -05:00
|
|
|
int __weak cache_setup_acpi(unsigned int cpu)
|
|
|
|
{
|
|
|
|
return -ENOTSUPP;
|
|
|
|
}
|
|
|
|
|
2019-05-28 10:16:53 +08:00
|
|
|
unsigned int coherency_max_size;
|
|
|
|
|
2022-07-04 11:15:50 +01:00
|
|
|
static int cache_setup_properties(unsigned int cpu)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (of_have_populated_dt())
|
|
|
|
ret = cache_setup_of_node(cpu);
|
|
|
|
else if (!acpi_disabled)
|
|
|
|
ret = cache_setup_acpi(cpu);
|
|
|
|
|
2023-04-14 10:14:52 +02:00
|
|
|
// Assume there is no cache information available in DT/ACPI from now.
|
|
|
|
if (ret && use_arch_cache_info())
|
|
|
|
use_arch_info = true;
|
|
|
|
|
2022-07-04 11:15:50 +01:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-09-30 14:48:25 +01:00
|
|
|
static int cache_shared_cpu_map_setup(unsigned int cpu)
|
|
|
|
{
|
|
|
|
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
|
|
|
|
struct cacheinfo *this_leaf, *sib_leaf;
|
2023-01-17 10:51:33 +00:00
|
|
|
unsigned int index, sib_index;
|
2016-10-28 09:45:29 +01:00
|
|
|
int ret = 0;
|
2014-09-30 14:48:25 +01:00
|
|
|
|
2016-10-28 09:45:28 +01:00
|
|
|
if (this_cpu_ci->cpu_map_populated)
|
|
|
|
return 0;
|
|
|
|
|
2022-07-04 11:15:50 +01:00
|
|
|
/*
|
|
|
|
* skip setting up cache properties if LLC is valid, just need
|
|
|
|
* to update the shared cpu_map if the cache attributes were
|
|
|
|
* populated early before all the cpus are brought online
|
|
|
|
*/
|
2023-04-14 10:14:52 +02:00
|
|
|
if (!last_level_cache_is_valid(cpu) && !use_arch_info) {
|
2022-07-04 11:15:50 +01:00
|
|
|
ret = cache_setup_properties(cpu);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
2014-09-30 14:48:25 +01:00
|
|
|
|
|
|
|
for (index = 0; index < cache_leaves(cpu); index++) {
|
|
|
|
unsigned int i;
|
|
|
|
|
2022-07-04 11:15:47 +01:00
|
|
|
this_leaf = per_cpu_cacheinfo_idx(cpu, index);
|
2014-09-30 14:48:25 +01:00
|
|
|
|
|
|
|
cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
|
|
|
|
for_each_online_cpu(i) {
|
2024-10-23 08:11:18 +03:00
|
|
|
if (i == cpu || !per_cpu_cacheinfo(i))
|
2014-09-30 14:48:25 +01:00
|
|
|
continue;/* skip if itself or no cacheinfo */
|
2023-01-17 10:51:33 +00:00
|
|
|
for (sib_index = 0; sib_index < cache_leaves(i); sib_index++) {
|
|
|
|
sib_leaf = per_cpu_cacheinfo_idx(i, sib_index);
|
drivers: base: cacheinfo: Fix shared_cpu_map changes in event of CPU hotplug
While building the shared_cpu_map, check if the cache level and cache
type matches. On certain systems that build the cache topology based on
the instance ID, there are cases where the same ID may repeat across
multiple cache levels, leading inaccurate topology.
In event of CPU offlining, the cache_shared_cpu_map_remove() does not
consider if IDs at same level are being compared. As a result, when same
IDs repeat across different cache levels, the CPU going offline is not
removed from all the shared_cpu_map.
Below is the output of cache topology of CPU8 and it's SMT sibling after
CPU8 is offlined on a dual socket 3rd Generation AMD EPYC processor
(2 x 64C/128T) running kernel release v6.3:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu136/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list: 136
/sys/devices/system/cpu/cpu136/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu136/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list: 9-15,136-143
CPU8 is removed from index0 (L1i) but remains in the shared_cpu_list of
index1 (L1d) and index2 (L2). Since L1i, L1d, and L2 are shared by the
SMT siblings, and they have the same cache instance ID, CPU 2 is only
removed from the first index with matching ID which is index1 (L1i) in
this case. With this fix, the results are as expected when performing
the same experiment on the same system:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu136/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list: 136
/sys/devices/system/cpu/cpu136/cache/index1/shared_cpu_list: 136
/sys/devices/system/cpu/cpu136/cache/index2/shared_cpu_list: 136
/sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list: 9-15,136-143
When rebuilding topology, the same problem appears as
cache_shared_cpu_map_setup() implements a similar logic. Consider the
same 3rd Generation EPYC processor: CPUs in Core 1, that share the L1
and L2 caches, have L1 and L2 instance ID as 1. For all the CPUs on
the second chiplet, the L3 ID is also 1 leading to grouping on CPUs from
Core 1 (1, 17) and the entire second chiplet (8-15, 24-31) as CPUs
sharing one cache domain. This went undetected since x86 processors
depended on arch specific populate_cache_leaves() method to repopulate
the shared_cpus_map when CPU came back online until kernel release
v6.3-rc5.
Fixes: 198102c9103f ("cacheinfo: Fix shared_cpu_map to handle shared caches at different levels")
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20230508084115.1157-2-kprateek.nayak@amd.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2023-05-08 14:11:14 +05:30
|
|
|
|
|
|
|
/*
|
|
|
|
* Comparing cache IDs only makes sense if the leaves
|
|
|
|
* belong to the same cache level of same type. Skip
|
|
|
|
* the check if level and type do not match.
|
|
|
|
*/
|
|
|
|
if (sib_leaf->level != this_leaf->level ||
|
|
|
|
sib_leaf->type != this_leaf->type)
|
|
|
|
continue;
|
|
|
|
|
2023-01-17 10:51:33 +00:00
|
|
|
if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
|
|
|
|
cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map);
|
|
|
|
cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
|
|
|
|
break;
|
|
|
|
}
|
2014-09-30 14:48:25 +01:00
|
|
|
}
|
|
|
|
}
|
2019-05-28 10:16:53 +08:00
|
|
|
/* record the maximum cache line size */
|
|
|
|
if (this_leaf->coherency_line_size > coherency_max_size)
|
|
|
|
coherency_max_size = this_leaf->coherency_line_size;
|
2014-09-30 14:48:25 +01:00
|
|
|
}
|
|
|
|
|
drivers: base: cacheinfo: Update cpu_map_populated during CPU Hotplug
Until commit 5c2712387d48 ("cacheinfo: Fix LLC is not exported through
sysfs"), cacheinfo called populate_cache_leaves() for CPU coming online
which let the arch specific functions handle (at least on x86)
populating the shared_cpu_map. However, with the changes in the
aforementioned commit, populate_cache_leaves() is not called when a CPU
comes online as a result of hotplug since last_level_cache_is_valid()
returns true as the cacheinfo data is not discarded. The CPU coming
online is not present in shared_cpu_map, however, it will not be added
since the cpu_cacheinfo->cpu_map_populated flag is set (it is set in
populate_cache_leaves() when cacheinfo is first populated for x86)
This can lead to inconsistencies in the shared_cpu_map when an offlined
CPU comes online again. Example below depicts the inconsistency in the
shared_cpu_list in cacheinfo when CPU8 is offlined and onlined again on
a 3rd Generation EPYC processor:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# echo 1 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8
# cat /sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list
136
# cat /sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list
9-15,136-143
Clear the flag when the CPU is removed from shared_cpu_map when
cache_shared_cpu_map_remove() is called during CPU hotplug. This will
allow cache_shared_cpu_map_setup() to add the CPU coming back online in
the shared_cpu_map. Set the flag again when the shared_cpu_map is setup.
Following are results of performing the same test as described above with
the changes:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# echo 1 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# cat /sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list
8,136
# cat /sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list
8-15,136-143
Fixes: 5c2712387d48 ("cacheinfo: Fix LLC is not exported through sysfs")
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Reviewed-by: Yicong Yang <yangyicong@hisilicon.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20230508084115.1157-3-kprateek.nayak@amd.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2023-05-08 14:11:15 +05:30
|
|
|
/* shared_cpu_map is now populated for the cpu */
|
|
|
|
this_cpu_ci->cpu_map_populated = true;
|
2014-09-30 14:48:25 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void cache_shared_cpu_map_remove(unsigned int cpu)
|
|
|
|
{
|
drivers: base: cacheinfo: Update cpu_map_populated during CPU Hotplug
Until commit 5c2712387d48 ("cacheinfo: Fix LLC is not exported through
sysfs"), cacheinfo called populate_cache_leaves() for CPU coming online
which let the arch specific functions handle (at least on x86)
populating the shared_cpu_map. However, with the changes in the
aforementioned commit, populate_cache_leaves() is not called when a CPU
comes online as a result of hotplug since last_level_cache_is_valid()
returns true as the cacheinfo data is not discarded. The CPU coming
online is not present in shared_cpu_map, however, it will not be added
since the cpu_cacheinfo->cpu_map_populated flag is set (it is set in
populate_cache_leaves() when cacheinfo is first populated for x86)
This can lead to inconsistencies in the shared_cpu_map when an offlined
CPU comes online again. Example below depicts the inconsistency in the
shared_cpu_list in cacheinfo when CPU8 is offlined and onlined again on
a 3rd Generation EPYC processor:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# echo 1 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8
# cat /sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list
136
# cat /sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list
9-15,136-143
Clear the flag when the CPU is removed from shared_cpu_map when
cache_shared_cpu_map_remove() is called during CPU hotplug. This will
allow cache_shared_cpu_map_setup() to add the CPU coming back online in
the shared_cpu_map. Set the flag again when the shared_cpu_map is setup.
Following are results of performing the same test as described above with
the changes:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# echo 1 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# cat /sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list
8,136
# cat /sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list
8-15,136-143
Fixes: 5c2712387d48 ("cacheinfo: Fix LLC is not exported through sysfs")
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Reviewed-by: Yicong Yang <yangyicong@hisilicon.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20230508084115.1157-3-kprateek.nayak@amd.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2023-05-08 14:11:15 +05:30
|
|
|
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
|
2014-09-30 14:48:25 +01:00
|
|
|
struct cacheinfo *this_leaf, *sib_leaf;
|
2023-01-17 10:51:33 +00:00
|
|
|
unsigned int sibling, index, sib_index;
|
2014-09-30 14:48:25 +01:00
|
|
|
|
|
|
|
for (index = 0; index < cache_leaves(cpu); index++) {
|
2022-07-04 11:15:47 +01:00
|
|
|
this_leaf = per_cpu_cacheinfo_idx(cpu, index);
|
2014-09-30 14:48:25 +01:00
|
|
|
for_each_cpu(sibling, &this_leaf->shared_cpu_map) {
|
2024-10-23 08:11:18 +03:00
|
|
|
if (sibling == cpu || !per_cpu_cacheinfo(sibling))
|
2022-07-04 11:15:52 +01:00
|
|
|
continue;/* skip if itself or no cacheinfo */
|
2015-08-08 10:46:02 +02:00
|
|
|
|
2023-01-17 10:51:33 +00:00
|
|
|
for (sib_index = 0; sib_index < cache_leaves(sibling); sib_index++) {
|
|
|
|
sib_leaf = per_cpu_cacheinfo_idx(sibling, sib_index);
|
drivers: base: cacheinfo: Fix shared_cpu_map changes in event of CPU hotplug
While building the shared_cpu_map, check if the cache level and cache
type matches. On certain systems that build the cache topology based on
the instance ID, there are cases where the same ID may repeat across
multiple cache levels, leading inaccurate topology.
In event of CPU offlining, the cache_shared_cpu_map_remove() does not
consider if IDs at same level are being compared. As a result, when same
IDs repeat across different cache levels, the CPU going offline is not
removed from all the shared_cpu_map.
Below is the output of cache topology of CPU8 and it's SMT sibling after
CPU8 is offlined on a dual socket 3rd Generation AMD EPYC processor
(2 x 64C/128T) running kernel release v6.3:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu136/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list: 136
/sys/devices/system/cpu/cpu136/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu136/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list: 9-15,136-143
CPU8 is removed from index0 (L1i) but remains in the shared_cpu_list of
index1 (L1d) and index2 (L2). Since L1i, L1d, and L2 are shared by the
SMT siblings, and they have the same cache instance ID, CPU 2 is only
removed from the first index with matching ID which is index1 (L1i) in
this case. With this fix, the results are as expected when performing
the same experiment on the same system:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu136/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list: 136
/sys/devices/system/cpu/cpu136/cache/index1/shared_cpu_list: 136
/sys/devices/system/cpu/cpu136/cache/index2/shared_cpu_list: 136
/sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list: 9-15,136-143
When rebuilding topology, the same problem appears as
cache_shared_cpu_map_setup() implements a similar logic. Consider the
same 3rd Generation EPYC processor: CPUs in Core 1, that share the L1
and L2 caches, have L1 and L2 instance ID as 1. For all the CPUs on
the second chiplet, the L3 ID is also 1 leading to grouping on CPUs from
Core 1 (1, 17) and the entire second chiplet (8-15, 24-31) as CPUs
sharing one cache domain. This went undetected since x86 processors
depended on arch specific populate_cache_leaves() method to repopulate
the shared_cpus_map when CPU came back online until kernel release
v6.3-rc5.
Fixes: 198102c9103f ("cacheinfo: Fix shared_cpu_map to handle shared caches at different levels")
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20230508084115.1157-2-kprateek.nayak@amd.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2023-05-08 14:11:14 +05:30
|
|
|
|
|
|
|
/*
|
|
|
|
* Comparing cache IDs only makes sense if the leaves
|
|
|
|
* belong to the same cache level of same type. Skip
|
|
|
|
* the check if level and type do not match.
|
|
|
|
*/
|
|
|
|
if (sib_leaf->level != this_leaf->level ||
|
|
|
|
sib_leaf->type != this_leaf->type)
|
|
|
|
continue;
|
|
|
|
|
2023-01-17 10:51:33 +00:00
|
|
|
if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
|
|
|
|
cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
|
|
|
|
cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2014-09-30 14:48:25 +01:00
|
|
|
}
|
|
|
|
}
|
drivers: base: cacheinfo: Update cpu_map_populated during CPU Hotplug
Until commit 5c2712387d48 ("cacheinfo: Fix LLC is not exported through
sysfs"), cacheinfo called populate_cache_leaves() for CPU coming online
which let the arch specific functions handle (at least on x86)
populating the shared_cpu_map. However, with the changes in the
aforementioned commit, populate_cache_leaves() is not called when a CPU
comes online as a result of hotplug since last_level_cache_is_valid()
returns true as the cacheinfo data is not discarded. The CPU coming
online is not present in shared_cpu_map, however, it will not be added
since the cpu_cacheinfo->cpu_map_populated flag is set (it is set in
populate_cache_leaves() when cacheinfo is first populated for x86)
This can lead to inconsistencies in the shared_cpu_map when an offlined
CPU comes online again. Example below depicts the inconsistency in the
shared_cpu_list in cacheinfo when CPU8 is offlined and onlined again on
a 3rd Generation EPYC processor:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# echo 1 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8
# cat /sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list
136
# cat /sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list
9-15,136-143
Clear the flag when the CPU is removed from shared_cpu_map when
cache_shared_cpu_map_remove() is called during CPU hotplug. This will
allow cache_shared_cpu_map_setup() to add the CPU coming back online in
the shared_cpu_map. Set the flag again when the shared_cpu_map is setup.
Following are results of performing the same test as described above with
the changes:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# echo 1 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# cat /sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list
8,136
# cat /sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list
8-15,136-143
Fixes: 5c2712387d48 ("cacheinfo: Fix LLC is not exported through sysfs")
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Reviewed-by: Yicong Yang <yangyicong@hisilicon.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20230508084115.1157-3-kprateek.nayak@amd.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2023-05-08 14:11:15 +05:30
|
|
|
|
|
|
|
/* cpu is no longer populated in the shared map */
|
|
|
|
this_cpu_ci->cpu_map_populated = false;
|
2014-09-30 14:48:25 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void free_cache_attributes(unsigned int cpu)
|
|
|
|
{
|
2015-08-08 10:46:02 +02:00
|
|
|
if (!per_cpu_cacheinfo(cpu))
|
|
|
|
return;
|
|
|
|
|
2014-09-30 14:48:25 +01:00
|
|
|
cache_shared_cpu_map_remove(cpu);
|
|
|
|
}
|
|
|
|
|
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
|
|
|
int __weak early_cache_level(unsigned int cpu)
|
|
|
|
{
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
2014-09-30 14:48:25 +01:00
|
|
|
int __weak init_cache_level(unsigned int cpu)
|
|
|
|
{
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
|
|
|
int __weak populate_cache_leaves(unsigned int cpu)
|
|
|
|
{
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
cacheinfo: Allocate memory during CPU hotplug if not done from the primary CPU
Commit
5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU")
adds functionality that architectures can use to optionally allocate and
build cacheinfo early during boot. Commit
6539cffa9495 ("cacheinfo: Add arch specific early level initializer")
lets secondary CPUs correct (and reallocate memory) cacheinfo data if
needed.
If the early build functionality is not used and cacheinfo does not need
correction, memory for cacheinfo is never allocated. x86 does not use
the early build functionality. Consequently, during the cacheinfo CPU
hotplug callback, last_level_cache_is_valid() attempts to dereference
a NULL pointer:
BUG: kernel NULL pointer dereference, address: 0000000000000100
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not present page
PGD 0 P4D 0
Oops: 0000 [#1] PREEPMT SMP NOPTI
CPU: 0 PID 19 Comm: cpuhp/0 Not tainted 6.4.0-rc2 #1
RIP: 0010: last_level_cache_is_valid+0x95/0xe0a
Allocate memory for cacheinfo during the cacheinfo CPU hotplug callback
if not done earlier.
Moreover, before determining the validity of the last-level cache info,
ensure that it has been allocated. Simply checking for non-zero
cache_leaves() is not sufficient, as some architectures (e.g., Intel
processors) have non-zero cache_leaves() before allocation.
Dereferencing NULL cacheinfo can occur in update_per_cpu_data_slice_size().
This function iterates over all online CPUs. However, a CPU may have come
online recently, but its cacheinfo may not have been allocated yet.
While here, remove an unnecessary indentation in allocate_cache_info().
[ bp: Massage. ]
Fixes: 6539cffa9495 ("cacheinfo: Add arch specific early level initializer")
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Cc: stable@vger.kernel.org # 6.3+
Link: https://lore.kernel.org/r/20241128002247.26726-2-ricardo.neri-calderon@linux.intel.com
2024-11-27 16:22:46 -08:00
|
|
|
static inline int allocate_cache_info(int cpu)
|
arch_topology: Build cacheinfo from primary CPU
commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection
in the CPU hotplug path")
adds a call to detect_cache_attributes() to populate the cacheinfo
before updating the siblings mask. detect_cache_attributes() allocates
memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT
kernels, on secondary CPUs, this triggers a:
'BUG: sleeping function called from invalid context' [1]
as the code is executed with preemption and interrupts disabled.
The primary CPU was previously storing the cache information using
the now removed (struct cpu_topology).llc_id:
commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from
the CPU topology")
allocate_cache_info() tries to build the cacheinfo from the primary
CPU prior secondary CPUs boot, if the DT/ACPI description
contains cache information.
If allocate_cache_info() fails, then fallback to the current state
for the cacheinfo allocation. [1] will be triggered in such case.
When unplugging a CPU, the cacheinfo memory cannot be freed. If it
was, then the memory would be allocated early by the re-plugged
CPU and would trigger [1].
Note that populate_cache_leaves() might be called multiple times
due to populate_leaves being moved up. This is required since
detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu)
being allocated but not populated.
[1]:
| BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46
| in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111
| preempt_count: 1, expected: 0
| RCU nest depth: 1, expected: 1
| 3 locks held by swapper/111/0:
| #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8
| #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0
| #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80
| irq event stamp: 0
| hardirqs last enabled at (0): 0x0
| hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last enabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last disabled at (0): 0x0
| Preemption disabled at:
| migrate_enable+0x30/0x130
| CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...]
| Call trace:
| __kmalloc+0xbc/0x1e8
| detect_cache_attributes+0x2d4/0x5f0
| update_siblings_masks+0x30/0x368
| store_cpu_topology+0x78/0xb8
| secondary_start_kernel+0xd0/0x198
| __secondary_switched+0xb0/0xb4
Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-01-04 19:30:29 +01:00
|
|
|
{
|
cacheinfo: Allocate memory during CPU hotplug if not done from the primary CPU
Commit
5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU")
adds functionality that architectures can use to optionally allocate and
build cacheinfo early during boot. Commit
6539cffa9495 ("cacheinfo: Add arch specific early level initializer")
lets secondary CPUs correct (and reallocate memory) cacheinfo data if
needed.
If the early build functionality is not used and cacheinfo does not need
correction, memory for cacheinfo is never allocated. x86 does not use
the early build functionality. Consequently, during the cacheinfo CPU
hotplug callback, last_level_cache_is_valid() attempts to dereference
a NULL pointer:
BUG: kernel NULL pointer dereference, address: 0000000000000100
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not present page
PGD 0 P4D 0
Oops: 0000 [#1] PREEPMT SMP NOPTI
CPU: 0 PID 19 Comm: cpuhp/0 Not tainted 6.4.0-rc2 #1
RIP: 0010: last_level_cache_is_valid+0x95/0xe0a
Allocate memory for cacheinfo during the cacheinfo CPU hotplug callback
if not done earlier.
Moreover, before determining the validity of the last-level cache info,
ensure that it has been allocated. Simply checking for non-zero
cache_leaves() is not sufficient, as some architectures (e.g., Intel
processors) have non-zero cache_leaves() before allocation.
Dereferencing NULL cacheinfo can occur in update_per_cpu_data_slice_size().
This function iterates over all online CPUs. However, a CPU may have come
online recently, but its cacheinfo may not have been allocated yet.
While here, remove an unnecessary indentation in allocate_cache_info().
[ bp: Massage. ]
Fixes: 6539cffa9495 ("cacheinfo: Add arch specific early level initializer")
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Cc: stable@vger.kernel.org # 6.3+
Link: https://lore.kernel.org/r/20241128002247.26726-2-ricardo.neri-calderon@linux.intel.com
2024-11-27 16:22:46 -08:00
|
|
|
per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu), sizeof(struct cacheinfo), GFP_ATOMIC);
|
arch_topology: Build cacheinfo from primary CPU
commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection
in the CPU hotplug path")
adds a call to detect_cache_attributes() to populate the cacheinfo
before updating the siblings mask. detect_cache_attributes() allocates
memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT
kernels, on secondary CPUs, this triggers a:
'BUG: sleeping function called from invalid context' [1]
as the code is executed with preemption and interrupts disabled.
The primary CPU was previously storing the cache information using
the now removed (struct cpu_topology).llc_id:
commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from
the CPU topology")
allocate_cache_info() tries to build the cacheinfo from the primary
CPU prior secondary CPUs boot, if the DT/ACPI description
contains cache information.
If allocate_cache_info() fails, then fallback to the current state
for the cacheinfo allocation. [1] will be triggered in such case.
When unplugging a CPU, the cacheinfo memory cannot be freed. If it
was, then the memory would be allocated early by the re-plugged
CPU and would trigger [1].
Note that populate_cache_leaves() might be called multiple times
due to populate_leaves being moved up. This is required since
detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu)
being allocated but not populated.
[1]:
| BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46
| in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111
| preempt_count: 1, expected: 0
| RCU nest depth: 1, expected: 1
| 3 locks held by swapper/111/0:
| #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8
| #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0
| #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80
| irq event stamp: 0
| hardirqs last enabled at (0): 0x0
| hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last enabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last disabled at (0): 0x0
| Preemption disabled at:
| migrate_enable+0x30/0x130
| CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...]
| Call trace:
| __kmalloc+0xbc/0x1e8
| detect_cache_attributes+0x2d4/0x5f0
| update_siblings_masks+0x30/0x368
| store_cpu_topology+0x78/0xb8
| secondary_start_kernel+0xd0/0x198
| __secondary_switched+0xb0/0xb4
Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-01-04 19:30:29 +01:00
|
|
|
if (!per_cpu_cacheinfo(cpu)) {
|
|
|
|
cache_leaves(cpu) = 0;
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int fetch_cache_info(unsigned int cpu)
|
|
|
|
{
|
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
|
|
|
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
|
2023-01-24 16:40:46 +01:00
|
|
|
unsigned int levels = 0, split_levels = 0;
|
arch_topology: Build cacheinfo from primary CPU
commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection
in the CPU hotplug path")
adds a call to detect_cache_attributes() to populate the cacheinfo
before updating the siblings mask. detect_cache_attributes() allocates
memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT
kernels, on secondary CPUs, this triggers a:
'BUG: sleeping function called from invalid context' [1]
as the code is executed with preemption and interrupts disabled.
The primary CPU was previously storing the cache information using
the now removed (struct cpu_topology).llc_id:
commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from
the CPU topology")
allocate_cache_info() tries to build the cacheinfo from the primary
CPU prior secondary CPUs boot, if the DT/ACPI description
contains cache information.
If allocate_cache_info() fails, then fallback to the current state
for the cacheinfo allocation. [1] will be triggered in such case.
When unplugging a CPU, the cacheinfo memory cannot be freed. If it
was, then the memory would be allocated early by the re-plugged
CPU and would trigger [1].
Note that populate_cache_leaves() might be called multiple times
due to populate_leaves being moved up. This is required since
detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu)
being allocated but not populated.
[1]:
| BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46
| in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111
| preempt_count: 1, expected: 0
| RCU nest depth: 1, expected: 1
| 3 locks held by swapper/111/0:
| #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8
| #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0
| #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80
| irq event stamp: 0
| hardirqs last enabled at (0): 0x0
| hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last enabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last disabled at (0): 0x0
| Preemption disabled at:
| migrate_enable+0x30/0x130
| CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...]
| Call trace:
| __kmalloc+0xbc/0x1e8
| detect_cache_attributes+0x2d4/0x5f0
| update_siblings_masks+0x30/0x368
| store_cpu_topology+0x78/0xb8
| secondary_start_kernel+0xd0/0x198
| __secondary_switched+0xb0/0xb4
Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-01-04 19:30:29 +01:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (acpi_disabled) {
|
|
|
|
ret = init_of_cache_level(cpu);
|
|
|
|
} else {
|
|
|
|
ret = acpi_get_cache_info(cpu, &levels, &split_levels);
|
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
|
|
|
if (!ret) {
|
|
|
|
this_cpu_ci->num_levels = levels;
|
|
|
|
/*
|
|
|
|
* This assumes that:
|
|
|
|
* - there cannot be any split caches (data/instruction)
|
|
|
|
* above a unified cache
|
|
|
|
* - data/instruction caches come by pair
|
|
|
|
*/
|
|
|
|
this_cpu_ci->num_leaves = levels + split_levels;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret || !cache_leaves(cpu)) {
|
|
|
|
ret = early_cache_level(cpu);
|
|
|
|
if (ret)
|
arch_topology: Build cacheinfo from primary CPU
commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection
in the CPU hotplug path")
adds a call to detect_cache_attributes() to populate the cacheinfo
before updating the siblings mask. detect_cache_attributes() allocates
memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT
kernels, on secondary CPUs, this triggers a:
'BUG: sleeping function called from invalid context' [1]
as the code is executed with preemption and interrupts disabled.
The primary CPU was previously storing the cache information using
the now removed (struct cpu_topology).llc_id:
commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from
the CPU topology")
allocate_cache_info() tries to build the cacheinfo from the primary
CPU prior secondary CPUs boot, if the DT/ACPI description
contains cache information.
If allocate_cache_info() fails, then fallback to the current state
for the cacheinfo allocation. [1] will be triggered in such case.
When unplugging a CPU, the cacheinfo memory cannot be freed. If it
was, then the memory would be allocated early by the re-plugged
CPU and would trigger [1].
Note that populate_cache_leaves() might be called multiple times
due to populate_leaves being moved up. This is required since
detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu)
being allocated but not populated.
[1]:
| BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46
| in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111
| preempt_count: 1, expected: 0
| RCU nest depth: 1, expected: 1
| 3 locks held by swapper/111/0:
| #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8
| #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0
| #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80
| irq event stamp: 0
| hardirqs last enabled at (0): 0x0
| hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last enabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last disabled at (0): 0x0
| Preemption disabled at:
| migrate_enable+0x30/0x130
| CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...]
| Call trace:
| __kmalloc+0xbc/0x1e8
| detect_cache_attributes+0x2d4/0x5f0
| update_siblings_masks+0x30/0x368
| store_cpu_topology+0x78/0xb8
| secondary_start_kernel+0xd0/0x198
| __secondary_switched+0xb0/0xb4
Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-01-04 19:30:29 +01:00
|
|
|
return ret;
|
|
|
|
|
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
|
|
|
if (!cache_leaves(cpu))
|
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
this_cpu_ci->early_ci_levels = true;
|
arch_topology: Build cacheinfo from primary CPU
commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection
in the CPU hotplug path")
adds a call to detect_cache_attributes() to populate the cacheinfo
before updating the siblings mask. detect_cache_attributes() allocates
memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT
kernels, on secondary CPUs, this triggers a:
'BUG: sleeping function called from invalid context' [1]
as the code is executed with preemption and interrupts disabled.
The primary CPU was previously storing the cache information using
the now removed (struct cpu_topology).llc_id:
commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from
the CPU topology")
allocate_cache_info() tries to build the cacheinfo from the primary
CPU prior secondary CPUs boot, if the DT/ACPI description
contains cache information.
If allocate_cache_info() fails, then fallback to the current state
for the cacheinfo allocation. [1] will be triggered in such case.
When unplugging a CPU, the cacheinfo memory cannot be freed. If it
was, then the memory would be allocated early by the re-plugged
CPU and would trigger [1].
Note that populate_cache_leaves() might be called multiple times
due to populate_leaves being moved up. This is required since
detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu)
being allocated but not populated.
[1]:
| BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46
| in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111
| preempt_count: 1, expected: 0
| RCU nest depth: 1, expected: 1
| 3 locks held by swapper/111/0:
| #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8
| #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0
| #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80
| irq event stamp: 0
| hardirqs last enabled at (0): 0x0
| hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last enabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last disabled at (0): 0x0
| Preemption disabled at:
| migrate_enable+0x30/0x130
| CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...]
| Call trace:
| __kmalloc+0xbc/0x1e8
| detect_cache_attributes+0x2d4/0x5f0
| update_siblings_masks+0x30/0x368
| store_cpu_topology+0x78/0xb8
| secondary_start_kernel+0xd0/0x198
| __secondary_switched+0xb0/0xb4
Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-01-04 19:30:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return allocate_cache_info(cpu);
|
|
|
|
}
|
|
|
|
|
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
|
|
|
static inline int init_level_allocate_ci(unsigned int cpu)
|
2014-09-30 14:48:25 +01:00
|
|
|
{
|
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
|
|
|
unsigned int early_leaves = cache_leaves(cpu);
|
2014-09-30 14:48:25 +01:00
|
|
|
|
arch_topology: Build cacheinfo from primary CPU
commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection
in the CPU hotplug path")
adds a call to detect_cache_attributes() to populate the cacheinfo
before updating the siblings mask. detect_cache_attributes() allocates
memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT
kernels, on secondary CPUs, this triggers a:
'BUG: sleeping function called from invalid context' [1]
as the code is executed with preemption and interrupts disabled.
The primary CPU was previously storing the cache information using
the now removed (struct cpu_topology).llc_id:
commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from
the CPU topology")
allocate_cache_info() tries to build the cacheinfo from the primary
CPU prior secondary CPUs boot, if the DT/ACPI description
contains cache information.
If allocate_cache_info() fails, then fallback to the current state
for the cacheinfo allocation. [1] will be triggered in such case.
When unplugging a CPU, the cacheinfo memory cannot be freed. If it
was, then the memory would be allocated early by the re-plugged
CPU and would trigger [1].
Note that populate_cache_leaves() might be called multiple times
due to populate_leaves being moved up. This is required since
detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu)
being allocated but not populated.
[1]:
| BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46
| in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111
| preempt_count: 1, expected: 0
| RCU nest depth: 1, expected: 1
| 3 locks held by swapper/111/0:
| #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8
| #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0
| #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80
| irq event stamp: 0
| hardirqs last enabled at (0): 0x0
| hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last enabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last disabled at (0): 0x0
| Preemption disabled at:
| migrate_enable+0x30/0x130
| CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...]
| Call trace:
| __kmalloc+0xbc/0x1e8
| detect_cache_attributes+0x2d4/0x5f0
| update_siblings_masks+0x30/0x368
| store_cpu_topology+0x78/0xb8
| secondary_start_kernel+0xd0/0x198
| __secondary_switched+0xb0/0xb4
Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-01-04 19:30:29 +01:00
|
|
|
/* Since early initialization/allocation of the cacheinfo is allowed
|
|
|
|
* via fetch_cache_info() and this also gets called as CPU hotplug
|
|
|
|
* callbacks via cacheinfo_cpu_online, the init/alloc can be skipped
|
|
|
|
* as it will happen only once (the cacheinfo memory is never freed).
|
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
|
|
|
* Just populate the cacheinfo. However, if the cacheinfo has been
|
|
|
|
* allocated early through the arch-specific early_cache_level() call,
|
|
|
|
* there is a chance the info is wrong (this can happen on arm64). In
|
|
|
|
* that case, call init_cache_level() anyway to give the arch-specific
|
|
|
|
* code a chance to make things right.
|
2022-07-04 11:15:50 +01:00
|
|
|
*/
|
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
|
|
|
if (per_cpu_cacheinfo(cpu) && !ci_cacheinfo(cpu)->early_ci_levels)
|
|
|
|
return 0;
|
2022-07-04 11:15:50 +01:00
|
|
|
|
2015-05-27 11:26:13 +01:00
|
|
|
if (init_cache_level(cpu) || !cache_leaves(cpu))
|
2014-09-30 14:48:25 +01:00
|
|
|
return -ENOENT;
|
|
|
|
|
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
|
|
|
/*
|
|
|
|
* Now that we have properly initialized the cache level info, make
|
|
|
|
* sure we don't try to do that again the next time we are called
|
|
|
|
* (e.g. as CPU hotplug callbacks).
|
|
|
|
*/
|
|
|
|
ci_cacheinfo(cpu)->early_ci_levels = false;
|
|
|
|
|
cacheinfo: Allocate memory during CPU hotplug if not done from the primary CPU
Commit
5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU")
adds functionality that architectures can use to optionally allocate and
build cacheinfo early during boot. Commit
6539cffa9495 ("cacheinfo: Add arch specific early level initializer")
lets secondary CPUs correct (and reallocate memory) cacheinfo data if
needed.
If the early build functionality is not used and cacheinfo does not need
correction, memory for cacheinfo is never allocated. x86 does not use
the early build functionality. Consequently, during the cacheinfo CPU
hotplug callback, last_level_cache_is_valid() attempts to dereference
a NULL pointer:
BUG: kernel NULL pointer dereference, address: 0000000000000100
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not present page
PGD 0 P4D 0
Oops: 0000 [#1] PREEPMT SMP NOPTI
CPU: 0 PID 19 Comm: cpuhp/0 Not tainted 6.4.0-rc2 #1
RIP: 0010: last_level_cache_is_valid+0x95/0xe0a
Allocate memory for cacheinfo during the cacheinfo CPU hotplug callback
if not done earlier.
Moreover, before determining the validity of the last-level cache info,
ensure that it has been allocated. Simply checking for non-zero
cache_leaves() is not sufficient, as some architectures (e.g., Intel
processors) have non-zero cache_leaves() before allocation.
Dereferencing NULL cacheinfo can occur in update_per_cpu_data_slice_size().
This function iterates over all online CPUs. However, a CPU may have come
online recently, but its cacheinfo may not have been allocated yet.
While here, remove an unnecessary indentation in allocate_cache_info().
[ bp: Massage. ]
Fixes: 6539cffa9495 ("cacheinfo: Add arch specific early level initializer")
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Cc: stable@vger.kernel.org # 6.3+
Link: https://lore.kernel.org/r/20241128002247.26726-2-ricardo.neri-calderon@linux.intel.com
2024-11-27 16:22:46 -08:00
|
|
|
/*
|
|
|
|
* Some architectures (e.g., x86) do not use early initialization.
|
|
|
|
* Allocate memory now in such case.
|
|
|
|
*/
|
|
|
|
if (cache_leaves(cpu) <= early_leaves && per_cpu_cacheinfo(cpu))
|
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
kfree(per_cpu_cacheinfo(cpu));
|
|
|
|
return allocate_cache_info(cpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
int detect_cache_attributes(unsigned int cpu)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = init_level_allocate_ci(cpu);
|
arch_topology: Build cacheinfo from primary CPU
commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection
in the CPU hotplug path")
adds a call to detect_cache_attributes() to populate the cacheinfo
before updating the siblings mask. detect_cache_attributes() allocates
memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT
kernels, on secondary CPUs, this triggers a:
'BUG: sleeping function called from invalid context' [1]
as the code is executed with preemption and interrupts disabled.
The primary CPU was previously storing the cache information using
the now removed (struct cpu_topology).llc_id:
commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from
the CPU topology")
allocate_cache_info() tries to build the cacheinfo from the primary
CPU prior secondary CPUs boot, if the DT/ACPI description
contains cache information.
If allocate_cache_info() fails, then fallback to the current state
for the cacheinfo allocation. [1] will be triggered in such case.
When unplugging a CPU, the cacheinfo memory cannot be freed. If it
was, then the memory would be allocated early by the re-plugged
CPU and would trigger [1].
Note that populate_cache_leaves() might be called multiple times
due to populate_leaves being moved up. This is required since
detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu)
being allocated but not populated.
[1]:
| BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46
| in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111
| preempt_count: 1, expected: 0
| RCU nest depth: 1, expected: 1
| 3 locks held by swapper/111/0:
| #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8
| #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0
| #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80
| irq event stamp: 0
| hardirqs last enabled at (0): 0x0
| hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last enabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last disabled at (0): 0x0
| Preemption disabled at:
| migrate_enable+0x30/0x130
| CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...]
| Call trace:
| __kmalloc+0xbc/0x1e8
| detect_cache_attributes+0x2d4/0x5f0
| update_siblings_masks+0x30/0x368
| store_cpu_topology+0x78/0xb8
| secondary_start_kernel+0xd0/0x198
| __secondary_switched+0xb0/0xb4
Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-01-04 19:30:29 +01:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2014-09-30 14:48:25 +01:00
|
|
|
|
2018-05-11 18:57:57 -05:00
|
|
|
/*
|
2023-03-28 19:49:15 +08:00
|
|
|
* If LLC is valid the cache leaves were already populated so just go to
|
|
|
|
* update the cpu map.
|
2018-05-11 18:57:57 -05:00
|
|
|
*/
|
2023-03-28 19:49:15 +08:00
|
|
|
if (!last_level_cache_is_valid(cpu)) {
|
|
|
|
/*
|
|
|
|
* populate_cache_leaves() may completely setup the cache leaves and
|
|
|
|
* shared_cpu_map or it may leave it partially setup.
|
|
|
|
*/
|
|
|
|
ret = populate_cache_leaves(cpu);
|
|
|
|
if (ret)
|
|
|
|
goto free_ci;
|
|
|
|
}
|
2022-07-04 11:15:50 +01:00
|
|
|
|
2014-09-30 14:48:25 +01:00
|
|
|
/*
|
2018-05-11 18:57:58 -05:00
|
|
|
* For systems using DT for cache hierarchy, fw_token
|
|
|
|
* and shared_cpu_map will be set up here only if they are
|
|
|
|
* not populated already
|
2014-09-30 14:48:25 +01:00
|
|
|
*/
|
|
|
|
ret = cache_shared_cpu_map_setup(cpu);
|
2015-03-17 17:28:46 +00:00
|
|
|
if (ret) {
|
2016-10-28 09:45:29 +01:00
|
|
|
pr_warn("Unable to detect cache hierarchy for CPU %d\n", cpu);
|
2014-09-30 14:48:25 +01:00
|
|
|
goto free_ci;
|
2015-03-17 17:28:46 +00:00
|
|
|
}
|
2016-10-28 09:45:31 +01:00
|
|
|
|
2014-09-30 14:48:25 +01:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
free_ci:
|
|
|
|
free_cache_attributes(cpu);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* pointer to cpuX/cache device */
|
|
|
|
static DEFINE_PER_CPU(struct device *, ci_cache_dev);
|
|
|
|
#define per_cpu_cache_dev(cpu) (per_cpu(ci_cache_dev, cpu))
|
|
|
|
|
|
|
|
static cpumask_t cache_dev_map;
|
|
|
|
|
|
|
|
/* pointer to array of devices for cpuX/cache/indexY */
|
|
|
|
static DEFINE_PER_CPU(struct device **, ci_index_dev);
|
|
|
|
#define per_cpu_index_dev(cpu) (per_cpu(ci_index_dev, cpu))
|
|
|
|
#define per_cache_index_dev(cpu, idx) ((per_cpu_index_dev(cpu))[idx])
|
|
|
|
|
|
|
|
#define show_one(file_name, object) \
|
|
|
|
static ssize_t file_name##_show(struct device *dev, \
|
|
|
|
struct device_attribute *attr, char *buf) \
|
|
|
|
{ \
|
|
|
|
struct cacheinfo *this_leaf = dev_get_drvdata(dev); \
|
2020-09-16 13:40:42 -07:00
|
|
|
return sysfs_emit(buf, "%u\n", this_leaf->object); \
|
2014-09-30 14:48:25 +01:00
|
|
|
}
|
|
|
|
|
2016-10-22 06:19:49 -07:00
|
|
|
show_one(id, id);
|
2014-09-30 14:48:25 +01:00
|
|
|
show_one(level, level);
|
|
|
|
show_one(coherency_line_size, coherency_line_size);
|
|
|
|
show_one(number_of_sets, number_of_sets);
|
|
|
|
show_one(physical_line_partition, physical_line_partition);
|
|
|
|
show_one(ways_of_associativity, ways_of_associativity);
|
|
|
|
|
|
|
|
static ssize_t size_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct cacheinfo *this_leaf = dev_get_drvdata(dev);
|
|
|
|
|
drivers core: Use sysfs_emit and sysfs_emit_at for show(device *...) functions
Convert the various sprintf fmaily calls in sysfs device show functions
to sysfs_emit and sysfs_emit_at for PAGE_SIZE buffer safety.
Done with:
$ spatch -sp-file sysfs_emit_dev.cocci --in-place --max-width=80 .
And cocci script:
$ cat sysfs_emit_dev.cocci
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- sprintf(buf,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- snprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- scnprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
expression chr;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- strcpy(buf, chr);
+ sysfs_emit(buf, chr);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- sprintf(buf,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- snprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- scnprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
- len += scnprintf(buf + len, PAGE_SIZE - len,
+ len += sysfs_emit_at(buf, len,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
expression chr;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
...
- strcpy(buf, chr);
- return strlen(buf);
+ return sysfs_emit(buf, chr);
}
Signed-off-by: Joe Perches <joe@perches.com>
Link: https://lore.kernel.org/r/3d033c33056d88bbe34d4ddb62afd05ee166ab9a.1600285923.git.joe@perches.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-09-16 13:40:39 -07:00
|
|
|
return sysfs_emit(buf, "%uK\n", this_leaf->size >> 10);
|
2014-09-30 14:48:25 +01:00
|
|
|
}
|
|
|
|
|
2020-09-16 13:40:44 -07:00
|
|
|
static ssize_t shared_cpu_map_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
2014-09-30 14:48:25 +01:00
|
|
|
{
|
|
|
|
struct cacheinfo *this_leaf = dev_get_drvdata(dev);
|
|
|
|
const struct cpumask *mask = &this_leaf->shared_cpu_map;
|
|
|
|
|
2020-09-16 13:40:44 -07:00
|
|
|
return sysfs_emit(buf, "%*pb\n", nr_cpu_ids, mask);
|
2014-09-30 14:48:25 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t shared_cpu_list_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
2020-09-16 13:40:44 -07:00
|
|
|
struct cacheinfo *this_leaf = dev_get_drvdata(dev);
|
|
|
|
const struct cpumask *mask = &this_leaf->shared_cpu_map;
|
|
|
|
|
|
|
|
return sysfs_emit(buf, "%*pbl\n", nr_cpu_ids, mask);
|
2014-09-30 14:48:25 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t type_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct cacheinfo *this_leaf = dev_get_drvdata(dev);
|
2020-09-16 13:40:40 -07:00
|
|
|
const char *output;
|
2014-09-30 14:48:25 +01:00
|
|
|
|
|
|
|
switch (this_leaf->type) {
|
|
|
|
case CACHE_TYPE_DATA:
|
2020-09-16 13:40:40 -07:00
|
|
|
output = "Data";
|
|
|
|
break;
|
2014-09-30 14:48:25 +01:00
|
|
|
case CACHE_TYPE_INST:
|
2020-09-16 13:40:40 -07:00
|
|
|
output = "Instruction";
|
|
|
|
break;
|
2014-09-30 14:48:25 +01:00
|
|
|
case CACHE_TYPE_UNIFIED:
|
2020-09-16 13:40:40 -07:00
|
|
|
output = "Unified";
|
|
|
|
break;
|
2014-09-30 14:48:25 +01:00
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2020-09-16 13:40:40 -07:00
|
|
|
|
|
|
|
return sysfs_emit(buf, "%s\n", output);
|
2014-09-30 14:48:25 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t allocation_policy_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct cacheinfo *this_leaf = dev_get_drvdata(dev);
|
|
|
|
unsigned int ci_attr = this_leaf->attributes;
|
2020-09-16 13:40:40 -07:00
|
|
|
const char *output;
|
2014-09-30 14:48:25 +01:00
|
|
|
|
|
|
|
if ((ci_attr & CACHE_READ_ALLOCATE) && (ci_attr & CACHE_WRITE_ALLOCATE))
|
2020-09-16 13:40:40 -07:00
|
|
|
output = "ReadWriteAllocate";
|
2014-09-30 14:48:25 +01:00
|
|
|
else if (ci_attr & CACHE_READ_ALLOCATE)
|
2020-09-16 13:40:40 -07:00
|
|
|
output = "ReadAllocate";
|
2014-09-30 14:48:25 +01:00
|
|
|
else if (ci_attr & CACHE_WRITE_ALLOCATE)
|
2020-09-16 13:40:40 -07:00
|
|
|
output = "WriteAllocate";
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return sysfs_emit(buf, "%s\n", output);
|
2014-09-30 14:48:25 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t write_policy_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct cacheinfo *this_leaf = dev_get_drvdata(dev);
|
|
|
|
unsigned int ci_attr = this_leaf->attributes;
|
|
|
|
int n = 0;
|
|
|
|
|
|
|
|
if (ci_attr & CACHE_WRITE_THROUGH)
|
drivers core: Use sysfs_emit and sysfs_emit_at for show(device *...) functions
Convert the various sprintf fmaily calls in sysfs device show functions
to sysfs_emit and sysfs_emit_at for PAGE_SIZE buffer safety.
Done with:
$ spatch -sp-file sysfs_emit_dev.cocci --in-place --max-width=80 .
And cocci script:
$ cat sysfs_emit_dev.cocci
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- sprintf(buf,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- snprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- scnprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
expression chr;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- strcpy(buf, chr);
+ sysfs_emit(buf, chr);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- sprintf(buf,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- snprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- scnprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
- len += scnprintf(buf + len, PAGE_SIZE - len,
+ len += sysfs_emit_at(buf, len,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
expression chr;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
...
- strcpy(buf, chr);
- return strlen(buf);
+ return sysfs_emit(buf, chr);
}
Signed-off-by: Joe Perches <joe@perches.com>
Link: https://lore.kernel.org/r/3d033c33056d88bbe34d4ddb62afd05ee166ab9a.1600285923.git.joe@perches.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-09-16 13:40:39 -07:00
|
|
|
n = sysfs_emit(buf, "WriteThrough\n");
|
2014-09-30 14:48:25 +01:00
|
|
|
else if (ci_attr & CACHE_WRITE_BACK)
|
drivers core: Use sysfs_emit and sysfs_emit_at for show(device *...) functions
Convert the various sprintf fmaily calls in sysfs device show functions
to sysfs_emit and sysfs_emit_at for PAGE_SIZE buffer safety.
Done with:
$ spatch -sp-file sysfs_emit_dev.cocci --in-place --max-width=80 .
And cocci script:
$ cat sysfs_emit_dev.cocci
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- sprintf(buf,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- snprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- scnprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
expression chr;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- strcpy(buf, chr);
+ sysfs_emit(buf, chr);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- sprintf(buf,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- snprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- scnprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
- len += scnprintf(buf + len, PAGE_SIZE - len,
+ len += sysfs_emit_at(buf, len,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
expression chr;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
...
- strcpy(buf, chr);
- return strlen(buf);
+ return sysfs_emit(buf, chr);
}
Signed-off-by: Joe Perches <joe@perches.com>
Link: https://lore.kernel.org/r/3d033c33056d88bbe34d4ddb62afd05ee166ab9a.1600285923.git.joe@perches.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-09-16 13:40:39 -07:00
|
|
|
n = sysfs_emit(buf, "WriteBack\n");
|
2014-09-30 14:48:25 +01:00
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2016-10-22 06:19:49 -07:00
|
|
|
static DEVICE_ATTR_RO(id);
|
2014-09-30 14:48:25 +01:00
|
|
|
static DEVICE_ATTR_RO(level);
|
|
|
|
static DEVICE_ATTR_RO(type);
|
|
|
|
static DEVICE_ATTR_RO(coherency_line_size);
|
|
|
|
static DEVICE_ATTR_RO(ways_of_associativity);
|
|
|
|
static DEVICE_ATTR_RO(number_of_sets);
|
|
|
|
static DEVICE_ATTR_RO(size);
|
|
|
|
static DEVICE_ATTR_RO(allocation_policy);
|
|
|
|
static DEVICE_ATTR_RO(write_policy);
|
|
|
|
static DEVICE_ATTR_RO(shared_cpu_map);
|
|
|
|
static DEVICE_ATTR_RO(shared_cpu_list);
|
|
|
|
static DEVICE_ATTR_RO(physical_line_partition);
|
|
|
|
|
|
|
|
static struct attribute *cache_default_attrs[] = {
|
2016-10-22 06:19:49 -07:00
|
|
|
&dev_attr_id.attr,
|
2014-09-30 14:48:25 +01:00
|
|
|
&dev_attr_type.attr,
|
|
|
|
&dev_attr_level.attr,
|
|
|
|
&dev_attr_shared_cpu_map.attr,
|
|
|
|
&dev_attr_shared_cpu_list.attr,
|
|
|
|
&dev_attr_coherency_line_size.attr,
|
|
|
|
&dev_attr_ways_of_associativity.attr,
|
|
|
|
&dev_attr_number_of_sets.attr,
|
|
|
|
&dev_attr_size.attr,
|
|
|
|
&dev_attr_allocation_policy.attr,
|
|
|
|
&dev_attr_write_policy.attr,
|
|
|
|
&dev_attr_physical_line_partition.attr,
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
static umode_t
|
|
|
|
cache_default_attrs_is_visible(struct kobject *kobj,
|
|
|
|
struct attribute *attr, int unused)
|
|
|
|
{
|
|
|
|
struct device *dev = kobj_to_dev(kobj);
|
|
|
|
struct cacheinfo *this_leaf = dev_get_drvdata(dev);
|
|
|
|
const struct cpumask *mask = &this_leaf->shared_cpu_map;
|
|
|
|
umode_t mode = attr->mode;
|
|
|
|
|
2016-10-22 06:19:49 -07:00
|
|
|
if ((attr == &dev_attr_id.attr) && (this_leaf->attributes & CACHE_ID))
|
|
|
|
return mode;
|
2014-09-30 14:48:25 +01:00
|
|
|
if ((attr == &dev_attr_type.attr) && this_leaf->type)
|
|
|
|
return mode;
|
|
|
|
if ((attr == &dev_attr_level.attr) && this_leaf->level)
|
|
|
|
return mode;
|
|
|
|
if ((attr == &dev_attr_shared_cpu_map.attr) && !cpumask_empty(mask))
|
|
|
|
return mode;
|
|
|
|
if ((attr == &dev_attr_shared_cpu_list.attr) && !cpumask_empty(mask))
|
|
|
|
return mode;
|
|
|
|
if ((attr == &dev_attr_coherency_line_size.attr) &&
|
|
|
|
this_leaf->coherency_line_size)
|
|
|
|
return mode;
|
|
|
|
if ((attr == &dev_attr_ways_of_associativity.attr) &&
|
|
|
|
this_leaf->size) /* allow 0 = full associativity */
|
|
|
|
return mode;
|
|
|
|
if ((attr == &dev_attr_number_of_sets.attr) &&
|
|
|
|
this_leaf->number_of_sets)
|
|
|
|
return mode;
|
|
|
|
if ((attr == &dev_attr_size.attr) && this_leaf->size)
|
|
|
|
return mode;
|
|
|
|
if ((attr == &dev_attr_write_policy.attr) &&
|
|
|
|
(this_leaf->attributes & CACHE_WRITE_POLICY_MASK))
|
|
|
|
return mode;
|
|
|
|
if ((attr == &dev_attr_allocation_policy.attr) &&
|
|
|
|
(this_leaf->attributes & CACHE_ALLOCATE_POLICY_MASK))
|
|
|
|
return mode;
|
|
|
|
if ((attr == &dev_attr_physical_line_partition.attr) &&
|
|
|
|
this_leaf->physical_line_partition)
|
|
|
|
return mode;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct attribute_group cache_default_group = {
|
|
|
|
.attrs = cache_default_attrs,
|
|
|
|
.is_visible = cache_default_attrs_is_visible,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct attribute_group *cache_default_groups[] = {
|
|
|
|
&cache_default_group,
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct attribute_group *cache_private_groups[] = {
|
|
|
|
&cache_default_group,
|
|
|
|
NULL, /* Place holder for private group */
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
|
|
|
|
const struct attribute_group *
|
|
|
|
__weak cache_get_priv_group(struct cacheinfo *this_leaf)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct attribute_group **
|
|
|
|
cache_get_attribute_groups(struct cacheinfo *this_leaf)
|
|
|
|
{
|
|
|
|
const struct attribute_group *priv_group =
|
|
|
|
cache_get_priv_group(this_leaf);
|
|
|
|
|
|
|
|
if (!priv_group)
|
|
|
|
return cache_default_groups;
|
|
|
|
|
|
|
|
if (!cache_private_groups[1])
|
|
|
|
cache_private_groups[1] = priv_group;
|
|
|
|
|
|
|
|
return cache_private_groups;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Add/Remove cache interface for CPU device */
|
|
|
|
static void cpu_cache_sysfs_exit(unsigned int cpu)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct device *ci_dev;
|
|
|
|
|
|
|
|
if (per_cpu_index_dev(cpu)) {
|
|
|
|
for (i = 0; i < cache_leaves(cpu); i++) {
|
|
|
|
ci_dev = per_cache_index_dev(cpu, i);
|
|
|
|
if (!ci_dev)
|
|
|
|
continue;
|
|
|
|
device_unregister(ci_dev);
|
|
|
|
}
|
|
|
|
kfree(per_cpu_index_dev(cpu));
|
|
|
|
per_cpu_index_dev(cpu) = NULL;
|
|
|
|
}
|
|
|
|
device_unregister(per_cpu_cache_dev(cpu));
|
|
|
|
per_cpu_cache_dev(cpu) = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int cpu_cache_sysfs_init(unsigned int cpu)
|
|
|
|
{
|
|
|
|
struct device *dev = get_cpu_device(cpu);
|
|
|
|
|
|
|
|
if (per_cpu_cacheinfo(cpu) == NULL)
|
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
per_cpu_cache_dev(cpu) = cpu_device_create(dev, NULL, NULL, "cache");
|
|
|
|
if (IS_ERR(per_cpu_cache_dev(cpu)))
|
|
|
|
return PTR_ERR(per_cpu_cache_dev(cpu));
|
|
|
|
|
|
|
|
/* Allocate all required memory */
|
|
|
|
per_cpu_index_dev(cpu) = kcalloc(cache_leaves(cpu),
|
|
|
|
sizeof(struct device *), GFP_KERNEL);
|
|
|
|
if (unlikely(per_cpu_index_dev(cpu) == NULL))
|
|
|
|
goto err_out;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err_out:
|
|
|
|
cpu_cache_sysfs_exit(cpu);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int cache_add_dev(unsigned int cpu)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
int rc;
|
|
|
|
struct device *ci_dev, *parent;
|
|
|
|
struct cacheinfo *this_leaf;
|
|
|
|
const struct attribute_group **cache_groups;
|
|
|
|
|
|
|
|
rc = cpu_cache_sysfs_init(cpu);
|
|
|
|
if (unlikely(rc < 0))
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
parent = per_cpu_cache_dev(cpu);
|
|
|
|
for (i = 0; i < cache_leaves(cpu); i++) {
|
2022-07-04 11:15:47 +01:00
|
|
|
this_leaf = per_cpu_cacheinfo_idx(cpu, i);
|
2014-09-30 14:48:25 +01:00
|
|
|
if (this_leaf->disable_sysfs)
|
|
|
|
continue;
|
2018-10-04 09:20:05 -06:00
|
|
|
if (this_leaf->type == CACHE_TYPE_NOCACHE)
|
|
|
|
break;
|
2014-09-30 14:48:25 +01:00
|
|
|
cache_groups = cache_get_attribute_groups(this_leaf);
|
|
|
|
ci_dev = cpu_device_create(parent, this_leaf, cache_groups,
|
|
|
|
"index%1u", i);
|
|
|
|
if (IS_ERR(ci_dev)) {
|
|
|
|
rc = PTR_ERR(ci_dev);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
per_cache_index_dev(cpu, i) = ci_dev;
|
|
|
|
}
|
|
|
|
cpumask_set_cpu(cpu, &cache_dev_map);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
err:
|
|
|
|
cpu_cache_sysfs_exit(cpu);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2024-01-26 16:19:44 +08:00
|
|
|
static unsigned int cpu_map_shared_cache(bool online, unsigned int cpu,
|
|
|
|
cpumask_t **map)
|
|
|
|
{
|
|
|
|
struct cacheinfo *llc, *sib_llc;
|
|
|
|
unsigned int sibling;
|
|
|
|
|
|
|
|
if (!last_level_cache_is_valid(cpu))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
|
|
|
|
|
|
|
|
if (llc->type != CACHE_TYPE_DATA && llc->type != CACHE_TYPE_UNIFIED)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (online) {
|
|
|
|
*map = &llc->shared_cpu_map;
|
|
|
|
return cpumask_weight(*map);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* shared_cpu_map of offlined CPU will be cleared, so use sibling map */
|
|
|
|
for_each_cpu(sibling, &llc->shared_cpu_map) {
|
|
|
|
if (sibling == cpu || !last_level_cache_is_valid(sibling))
|
|
|
|
continue;
|
|
|
|
sib_llc = per_cpu_cacheinfo_idx(sibling, cache_leaves(sibling) - 1);
|
|
|
|
*map = &sib_llc->shared_cpu_map;
|
|
|
|
return cpumask_weight(*map);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-10-16 13:29:55 +08:00
|
|
|
/*
|
|
|
|
* Calculate the size of the per-CPU data cache slice. This can be
|
|
|
|
* used to estimate the size of the data cache slice that can be used
|
|
|
|
* by one CPU under ideal circumstances. UNIFIED caches are counted
|
|
|
|
* in addition to DATA caches. So, please consider code cache usage
|
|
|
|
* when use the result.
|
|
|
|
*
|
|
|
|
* Because the cache inclusive/non-inclusive information isn't
|
|
|
|
* available, we just use the size of the per-CPU slice of LLC to make
|
|
|
|
* the result more predictable across architectures.
|
|
|
|
*/
|
|
|
|
static void update_per_cpu_data_slice_size_cpu(unsigned int cpu)
|
|
|
|
{
|
|
|
|
struct cpu_cacheinfo *ci;
|
|
|
|
struct cacheinfo *llc;
|
|
|
|
unsigned int nr_shared;
|
|
|
|
|
|
|
|
if (!last_level_cache_is_valid(cpu))
|
|
|
|
return;
|
|
|
|
|
|
|
|
ci = ci_cacheinfo(cpu);
|
|
|
|
llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
|
|
|
|
|
|
|
|
if (llc->type != CACHE_TYPE_DATA && llc->type != CACHE_TYPE_UNIFIED)
|
|
|
|
return;
|
|
|
|
|
|
|
|
nr_shared = cpumask_weight(&llc->shared_cpu_map);
|
|
|
|
if (nr_shared)
|
|
|
|
ci->per_cpu_data_slice_size = llc->size / nr_shared;
|
|
|
|
}
|
|
|
|
|
2024-01-26 16:19:44 +08:00
|
|
|
static void update_per_cpu_data_slice_size(bool cpu_online, unsigned int cpu,
|
|
|
|
cpumask_t *cpu_map)
|
2023-10-16 13:29:55 +08:00
|
|
|
{
|
|
|
|
unsigned int icpu;
|
|
|
|
|
2024-01-26 16:19:44 +08:00
|
|
|
for_each_cpu(icpu, cpu_map) {
|
2023-10-16 13:29:55 +08:00
|
|
|
if (!cpu_online && icpu == cpu)
|
|
|
|
continue;
|
|
|
|
update_per_cpu_data_slice_size_cpu(icpu);
|
2024-01-26 16:19:44 +08:00
|
|
|
setup_pcp_cacheinfo(icpu);
|
2023-10-16 13:29:55 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-11-03 15:50:08 +01:00
|
|
|
static int cacheinfo_cpu_online(unsigned int cpu)
|
2014-09-30 14:48:25 +01:00
|
|
|
{
|
2016-11-03 15:50:08 +01:00
|
|
|
int rc = detect_cache_attributes(cpu);
|
2024-01-26 16:19:44 +08:00
|
|
|
cpumask_t *cpu_map;
|
2014-09-30 14:48:25 +01:00
|
|
|
|
2016-11-03 15:50:08 +01:00
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
rc = cache_add_dev(cpu);
|
|
|
|
if (rc)
|
2023-10-16 13:29:55 +08:00
|
|
|
goto err;
|
2024-01-26 16:19:44 +08:00
|
|
|
if (cpu_map_shared_cache(true, cpu, &cpu_map))
|
|
|
|
update_per_cpu_data_slice_size(true, cpu, cpu_map);
|
2023-10-16 13:29:55 +08:00
|
|
|
return 0;
|
|
|
|
err:
|
|
|
|
free_cache_attributes(cpu);
|
2016-11-03 15:50:08 +01:00
|
|
|
return rc;
|
2014-09-30 14:48:25 +01:00
|
|
|
}
|
|
|
|
|
2016-11-03 15:50:08 +01:00
|
|
|
static int cacheinfo_cpu_pre_down(unsigned int cpu)
|
2014-09-30 14:48:25 +01:00
|
|
|
{
|
2024-01-26 16:19:44 +08:00
|
|
|
cpumask_t *cpu_map;
|
|
|
|
unsigned int nr_shared;
|
|
|
|
|
|
|
|
nr_shared = cpu_map_shared_cache(false, cpu, &cpu_map);
|
2016-11-03 15:50:08 +01:00
|
|
|
if (cpumask_test_and_clear_cpu(cpu, &cache_dev_map))
|
|
|
|
cpu_cache_sysfs_exit(cpu);
|
|
|
|
|
|
|
|
free_cache_attributes(cpu);
|
2024-01-26 16:19:44 +08:00
|
|
|
if (nr_shared > 1)
|
|
|
|
update_per_cpu_data_slice_size(false, cpu, cpu_map);
|
2016-11-03 15:50:08 +01:00
|
|
|
return 0;
|
2014-09-30 14:48:25 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static int __init cacheinfo_sysfs_init(void)
|
|
|
|
{
|
2019-06-24 18:36:56 +01:00
|
|
|
return cpuhp_setup_state(CPUHP_AP_BASE_CACHEINFO_ONLINE,
|
|
|
|
"base/cacheinfo:online",
|
2016-11-03 15:50:08 +01:00
|
|
|
cacheinfo_cpu_online, cacheinfo_cpu_pre_down);
|
2014-09-30 14:48:25 +01:00
|
|
|
}
|
|
|
|
device_initcall(cacheinfo_sysfs_init);
|