linux-next/drivers/of/of_numa.c

188 lines
3.9 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* OF NUMA Parsing support.
*
* Copyright (C) 2015 - 2016 Cavium Inc.
*/
#define pr_fmt(fmt) "OF: NUMA: " fmt
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/nodemask.h>
mm: introduce numa_memblks Move code dealing with numa_memblks from arch/x86 to mm/ and add Kconfig options to let x86 select it in its Kconfig. This code will be later reused by arch_numa. No functional changes. Link: https://lkml.kernel.org/r/20240807064110.1003856-18-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> Tested-by: Zi Yan <ziy@nvidia.com> # for x86_64 and arm64 Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [arm64 + CXL via QEMU] Acked-by: Dan Williams <dan.j.williams@intel.com> Acked-by: David Hildenbrand <david@redhat.com> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Andreas Larsson <andreas@gaisler.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: David S. Miller <davem@davemloft.net> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiaxun Yang <jiaxun.yang@flygoat.com> Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Palmer Dabbelt <palmer@dabbelt.com> Cc: Rafael J. Wysocki <rafael@kernel.org> Cc: Rob Herring (Arm) <robh@kernel.org> Cc: Samuel Holland <samuel.holland@sifive.com> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-08-07 06:41:01 +00:00
#include <linux/numa_memblks.h>
#include <asm/numa.h>
/* define default numa node to 0 */
#define DEFAULT_NODE 0
/*
* Even though we connect cpus to numa domains later in SMP
* init, we need to know the node ids now for all cpus.
*/
static void __init of_numa_parse_cpu_nodes(void)
{
u32 nid;
int r;
struct device_node *np;
for_each_of_cpu_node(np) {
r = of_property_read_u32(np, "numa-node-id", &nid);
if (r)
continue;
pr_debug("CPU on %u\n", nid);
if (nid >= MAX_NUMNODES)
pr_warn("Node id %u exceeds maximum value\n", nid);
else
node_set(nid, numa_nodes_parsed);
}
}
static int __init of_numa_parse_memory_nodes(void)
{
struct device_node *np = NULL;
struct resource rsrc;
u32 nid;
of, numa: return -EINVAL when no numa-node-id is found Currently of_numa_parse_memory_nodes() returns 0 if no "memory" node in device tree contains "numa-node-id" property. This makes of_numa_init() to return "success" despite no NUMA nodes were actually parsed and set up. arch_numa workarounds this by returning an error if numa_nodes_parsed is empty. numa_memblks however would WARN() in such case and since it will be used by arch_numa shortly, such warning is not desirable. Make sure of_numa_init() returns -EINVAL when no NUMA node information was found in the device tree. Link: https://lkml.kernel.org/r/20240807064110.1003856-24-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [arm64 + CXL via QEMU] Acked-by: Dan Williams <dan.j.williams@intel.com> Acked-by: David Hildenbrand <david@redhat.com> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Andreas Larsson <andreas@gaisler.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: David S. Miller <davem@davemloft.net> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiaxun Yang <jiaxun.yang@flygoat.com> Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Palmer Dabbelt <palmer@dabbelt.com> Cc: Rafael J. Wysocki <rafael@kernel.org> Cc: Rob Herring (Arm) <robh@kernel.org> Cc: Samuel Holland <samuel.holland@sifive.com> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-08-07 06:41:07 +00:00
int i, r = -EINVAL;
for_each_node_by_type(np, "memory") {
r = of_property_read_u32(np, "numa-node-id", &nid);
if (r == -EINVAL)
/*
* property doesn't exist if -EINVAL, continue
* looking for more memory nodes with
* "numa-node-id" property
*/
continue;
if (nid >= MAX_NUMNODES) {
pr_warn("Node id %u exceeds maximum value\n", nid);
r = -EINVAL;
}
for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++)
r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1);
if (!i || r) {
of_node_put(np);
pr_err("bad property in memory node\n");
return r ? : -EINVAL;
}
}
of, numa: return -EINVAL when no numa-node-id is found Currently of_numa_parse_memory_nodes() returns 0 if no "memory" node in device tree contains "numa-node-id" property. This makes of_numa_init() to return "success" despite no NUMA nodes were actually parsed and set up. arch_numa workarounds this by returning an error if numa_nodes_parsed is empty. numa_memblks however would WARN() in such case and since it will be used by arch_numa shortly, such warning is not desirable. Make sure of_numa_init() returns -EINVAL when no NUMA node information was found in the device tree. Link: https://lkml.kernel.org/r/20240807064110.1003856-24-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [arm64 + CXL via QEMU] Acked-by: Dan Williams <dan.j.williams@intel.com> Acked-by: David Hildenbrand <david@redhat.com> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Andreas Larsson <andreas@gaisler.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: David S. Miller <davem@davemloft.net> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiaxun Yang <jiaxun.yang@flygoat.com> Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Palmer Dabbelt <palmer@dabbelt.com> Cc: Rafael J. Wysocki <rafael@kernel.org> Cc: Rob Herring (Arm) <robh@kernel.org> Cc: Samuel Holland <samuel.holland@sifive.com> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-08-07 06:41:07 +00:00
return r;
}
static int __init of_numa_parse_distance_map_v1(struct device_node *map)
{
const __be32 *matrix;
int entry_count;
int i;
pr_info("parsing numa-distance-map-v1\n");
matrix = of_get_property(map, "distance-matrix", NULL);
if (!matrix) {
pr_err("No distance-matrix property in distance-map\n");
return -EINVAL;
}
entry_count = of_property_count_u32_elems(map, "distance-matrix");
if (entry_count <= 0) {
pr_err("Invalid distance-matrix\n");
return -EINVAL;
}
for (i = 0; i + 2 < entry_count; i += 3) {
u32 nodea, nodeb, distance;
nodea = of_read_number(matrix, 1);
matrix++;
nodeb = of_read_number(matrix, 1);
matrix++;
distance = of_read_number(matrix, 1);
matrix++;
of, numa: Validate some distance map rules Currently the NUMA distance map parsing does not validate the distance table for the distance-matrix rules 1-2 in [1]. However the arch NUMA code may enforce some of these rules, but not all. Such is the case for the arm64 port, which does not enforce the rule that the distance between separates nodes cannot equal LOCAL_DISTANCE. The patch adds the following rules validation: - distance of node to self equals LOCAL_DISTANCE - distance of separate nodes > LOCAL_DISTANCE This change avoids a yet-unresolved crash reported in [2]. A note on dealing with symmetrical distances between nodes: Validating symmetrical distances between nodes is difficult. If it were mandated in the bindings that every distance must be recorded in the table, then it would be easy. However, it isn't. In addition to this, it is also possible to record [b, a] distance only (and not [a, b]). So, when processing the table for [b, a], we cannot assert that current distance of [a, b] != [b, a] as invalid, as [a, b] distance may not be present in the table and current distance would be default at REMOTE_DISTANCE. As such, we maintain the policy that we overwrite distance [a, b] = [b, a] for b > a. This policy is different to kernel ACPI SLIT validation, which allows non-symmetrical distances (ACPI spec SLIT rules allow it). However, the distance debug message is dropped as it may be misleading (for a distance which is later overwritten). Some final notes on semantics: - It is implied that it is the responsibility of the arch NUMA code to reset the NUMA distance map for an error in distance map parsing. - It is the responsibility of the FW NUMA topology parsing (whether OF or ACPI) to enforce NUMA distance rules, and not arch NUMA code. [1] Documents/devicetree/bindings/numa.txt [2] https://www.spinics.net/lists/arm-kernel/msg683304.html Cc: stable@vger.kernel.org # 4.7 Signed-off-by: John Garry <john.garry@huawei.com> Acked-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Rob Herring <robh@kernel.org>
2018-11-08 10:17:03 +00:00
if ((nodea == nodeb && distance != LOCAL_DISTANCE) ||
(nodea != nodeb && distance <= LOCAL_DISTANCE)) {
pr_err("Invalid distance[node%d -> node%d] = %d\n",
nodea, nodeb, distance);
return -EINVAL;
}
node_set(nodea, numa_nodes_parsed);
numa_set_distance(nodea, nodeb, distance);
/* Set default distance of node B->A same as A->B */
if (nodeb > nodea)
numa_set_distance(nodeb, nodea, distance);
}
return 0;
}
static int __init of_numa_parse_distance_map(void)
{
int ret = 0;
struct device_node *np;
np = of_find_compatible_node(NULL, NULL,
"numa-distance-map-v1");
if (np)
ret = of_numa_parse_distance_map_v1(np);
of_node_put(np);
return ret;
}
int of_node_to_nid(struct device_node *device)
{
struct device_node *np;
u32 nid;
int r = -ENODATA;
np = of_node_get(device);
while (np) {
r = of_property_read_u32(np, "numa-node-id", &nid);
/*
* -EINVAL indicates the property was not found, and
* we walk up the tree trying to find a parent with a
* "numa-node-id". Any other type of error indicates
* a bad device tree and we give up.
*/
if (r != -EINVAL)
break;
np = of_get_next_parent(np);
}
if (np && r)
pr_warn("Invalid \"numa-node-id\" property in node %pOFn\n",
np);
of_node_put(np);
of, numa: Return NUMA_NO_NODE from disable of_node_to_nid() if nid not possible. On arm64 NUMA kernels we can pass "numa=off" on the command line to disable NUMA. A side effect of this is that kmalloc_node() calls to non-zero nodes will crash the system with an OOPS: [ 0.000000] ITS@0x0000901000020000: allocated 2097152 Devices @10002000000 (flat, esz 8, psz 64K, shr 1) [ 0.000000] Unable to handle kernel NULL pointer dereference at virtual address 00001680 [ 0.000000] pgd = fffffc0009470000 [ 0.000000] [00001680] *pgd=0000010ffff90003, *pud=0000010ffff90003, *pmd=0000010ffff90003, *pte=0000000000000000 [ 0.000000] Internal error: Oops: 96000006 [#1] SMP . . . [ 0.000000] [<fffffc00081c8950>] __alloc_pages_nodemask+0xa4/0xe68 [ 0.000000] [<fffffc000821fa70>] new_slab+0xd0/0x564 [ 0.000000] [<fffffc0008221e24>] ___slab_alloc+0x2e4/0x514 [ 0.000000] [<fffffc0008239498>] __slab_alloc+0x48/0x58 [ 0.000000] [<fffffc0008222c20>] __kmalloc_node+0xd0/0x2dc [ 0.000000] [<fffffc0008115374>] __irq_domain_add+0x7c/0x164 [ 0.000000] [<fffffc0008b461dc>] its_probe+0x784/0x81c [ 0.000000] [<fffffc0008b462bc>] its_init+0x48/0x1b0 [ 0.000000] [<fffffc0008b4543c>] gic_init_bases+0x228/0x360 [ 0.000000] [<fffffc0008b456bc>] gic_of_init+0x148/0x1cc [ 0.000000] [<fffffc0008b5aec8>] of_irq_init+0x184/0x298 [ 0.000000] [<fffffc0008b43f9c>] irqchip_init+0x14/0x38 [ 0.000000] [<fffffc0008b12d60>] init_IRQ+0xc/0x30 [ 0.000000] [<fffffc0008b10a3c>] start_kernel+0x240/0x3b8 [ 0.000000] [<fffffc0008b101c4>] __primary_switched+0x30/0x6c [ 0.000000] Code: 912ec2a0 b9403809 0a0902fb 37b007db (f9400300) . . . This is caused by code like this in kernel/irq/irqdomain.c domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size), GFP_KERNEL, of_node_to_nid(of_node)); When NUMA is disabled, the concept of a node is really undefined, so of_node_to_nid() should unconditionally return NUMA_NO_NODE. Fix by returning NUMA_NO_NODE when the nid is not in the set of possible nodes. Reported-by: Gilbert Netzer <noname@pdc.kth.se> Signed-off-by: David Daney <david.daney@cavium.com> Cc: stable@vger.kernel.org # 4.7+ Signed-off-by: Rob Herring <robh@kernel.org>
2016-10-28 21:15:02 +00:00
/*
* If numa=off passed on command line, or with a defective
* device tree, the nid may not be in the set of possible
* nodes. Check for this case and return NUMA_NO_NODE.
*/
if (!r && nid < MAX_NUMNODES && node_possible(nid))
return nid;
return NUMA_NO_NODE;
}
int __init of_numa_init(void)
{
int r;
of_numa_parse_cpu_nodes();
r = of_numa_parse_memory_nodes();
if (r)
return r;
return of_numa_parse_distance_map();
}