mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-06 14:05:39 +00:00
Merge branch 'for-5.7/numa' into libnvdimm-for-next
- Promote numa_map_to_online_node() to a cross-kernel generic facility. - Save x86 numa information to allow for node-id lookups for reserved memory ranges, deploy that capability for the e820-pmem driver. - Introduce phys_to_target_node() to facilitate drivers that want to know resulting numa node if a given reserved address range was onlined.
This commit is contained in:
commit
d3b88655c0
@ -285,25 +285,6 @@ int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int papr_scm_node(int node)
|
||||
{
|
||||
int min_dist = INT_MAX, dist;
|
||||
int nid, min_node;
|
||||
|
||||
if ((node == NUMA_NO_NODE) || node_online(node))
|
||||
return node;
|
||||
|
||||
min_node = first_online_node;
|
||||
for_each_online_node(nid) {
|
||||
dist = node_distance(node, nid);
|
||||
if (dist < min_dist) {
|
||||
min_dist = dist;
|
||||
min_node = nid;
|
||||
}
|
||||
}
|
||||
return min_node;
|
||||
}
|
||||
|
||||
static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
|
||||
{
|
||||
struct device *dev = &p->pdev->dev;
|
||||
@ -349,7 +330,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
|
||||
|
||||
memset(&ndr_desc, 0, sizeof(ndr_desc));
|
||||
target_nid = dev_to_node(&p->pdev->dev);
|
||||
online_nid = papr_scm_node(target_nid);
|
||||
online_nid = numa_map_to_online_node(target_nid);
|
||||
ndr_desc.numa_node = online_nid;
|
||||
ndr_desc.target_node = target_nid;
|
||||
ndr_desc.res = &p->res;
|
||||
|
@ -1664,6 +1664,7 @@ config X86_PMEM_LEGACY
|
||||
depends on PHYS_ADDR_T_64BIT
|
||||
depends on BLK_DEV
|
||||
select X86_PMEM_LEGACY_DEVICE
|
||||
select NUMA_KEEP_MEMINFO if NUMA
|
||||
select LIBNVDIMM
|
||||
help
|
||||
Treat memory marked using the non-standard e820 type of 12 as used
|
||||
|
@ -25,11 +25,8 @@ nodemask_t numa_nodes_parsed __initdata;
|
||||
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
|
||||
EXPORT_SYMBOL(node_data);
|
||||
|
||||
static struct numa_meminfo numa_meminfo
|
||||
#ifndef CONFIG_MEMORY_HOTPLUG
|
||||
__initdata
|
||||
#endif
|
||||
;
|
||||
static struct numa_meminfo numa_meminfo __initdata_or_meminfo;
|
||||
static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;
|
||||
|
||||
static int numa_distance_cnt;
|
||||
static u8 *numa_distance;
|
||||
@ -168,6 +165,19 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
|
||||
(mi->nr_blks - idx) * sizeof(mi->blk[0]));
|
||||
}
|
||||
|
||||
/**
|
||||
* numa_move_tail_memblk - Move a numa_memblk from one numa_meminfo to another
|
||||
* @dst: numa_meminfo to append block to
|
||||
* @idx: Index of memblk to remove
|
||||
* @src: numa_meminfo to remove memblk from
|
||||
*/
|
||||
static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx,
|
||||
struct numa_meminfo *src)
|
||||
{
|
||||
dst->blk[dst->nr_blks++] = src->blk[idx];
|
||||
numa_remove_memblk_from(idx, src);
|
||||
}
|
||||
|
||||
/**
|
||||
* numa_add_memblk - Add one numa_memblk to numa_meminfo
|
||||
* @nid: NUMA node ID of the new memblk
|
||||
@ -237,14 +247,19 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
|
||||
for (i = 0; i < mi->nr_blks; i++) {
|
||||
struct numa_memblk *bi = &mi->blk[i];
|
||||
|
||||
/* make sure all blocks are inside the limits */
|
||||
/* move / save reserved memory ranges */
|
||||
if (!memblock_overlaps_region(&memblock.memory,
|
||||
bi->start, bi->end - bi->start)) {
|
||||
numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* make sure all non-reserved blocks are inside the limits */
|
||||
bi->start = max(bi->start, low);
|
||||
bi->end = min(bi->end, high);
|
||||
|
||||
/* and there's no empty or non-exist block */
|
||||
if (bi->start >= bi->end ||
|
||||
!memblock_overlaps_region(&memblock.memory,
|
||||
bi->start, bi->end - bi->start))
|
||||
/* and there's no empty block */
|
||||
if (bi->start >= bi->end)
|
||||
numa_remove_memblk_from(i--, mi);
|
||||
}
|
||||
|
||||
@ -881,16 +896,38 @@ EXPORT_SYMBOL(cpumask_of_node);
|
||||
|
||||
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
int memory_add_physaddr_to_nid(u64 start)
|
||||
#ifdef CONFIG_NUMA_KEEP_MEMINFO
|
||||
static int meminfo_to_nid(struct numa_meminfo *mi, u64 start)
|
||||
{
|
||||
struct numa_meminfo *mi = &numa_meminfo;
|
||||
int nid = mi->blk[0].nid;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < mi->nr_blks; i++)
|
||||
if (mi->blk[i].start <= start && mi->blk[i].end > start)
|
||||
nid = mi->blk[i].nid;
|
||||
return mi->blk[i].nid;
|
||||
return NUMA_NO_NODE;
|
||||
}
|
||||
|
||||
int phys_to_target_node(phys_addr_t start)
|
||||
{
|
||||
int nid = meminfo_to_nid(&numa_meminfo, start);
|
||||
|
||||
/*
|
||||
* Prefer online nodes, but if reserved memory might be
|
||||
* hot-added continue the search with reserved ranges.
|
||||
*/
|
||||
if (nid != NUMA_NO_NODE)
|
||||
return nid;
|
||||
|
||||
return meminfo_to_nid(&numa_reserved_meminfo, start);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(phys_to_target_node);
|
||||
|
||||
int memory_add_physaddr_to_nid(u64 start)
|
||||
{
|
||||
int nid = meminfo_to_nid(&numa_meminfo, start);
|
||||
|
||||
if (nid == NUMA_NO_NODE)
|
||||
nid = numa_meminfo.blk[0].nid;
|
||||
return nid;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
|
||||
|
@ -72,47 +72,6 @@ int acpi_map_pxm_to_node(int pxm)
|
||||
}
|
||||
EXPORT_SYMBOL(acpi_map_pxm_to_node);
|
||||
|
||||
/**
|
||||
* acpi_map_pxm_to_online_node - Map proximity ID to online node
|
||||
* @pxm: ACPI proximity ID
|
||||
*
|
||||
* This is similar to acpi_map_pxm_to_node(), but always returns an online
|
||||
* node. When the mapped node from a given proximity ID is offline, it
|
||||
* looks up the node distance table and returns the nearest online node.
|
||||
*
|
||||
* ACPI device drivers, which are called after the NUMA initialization has
|
||||
* completed in the kernel, can call this interface to obtain their device
|
||||
* NUMA topology from ACPI tables. Such drivers do not have to deal with
|
||||
* offline nodes. A node may be offline when a device proximity ID is
|
||||
* unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
|
||||
* "numa=off" on x86.
|
||||
*/
|
||||
int acpi_map_pxm_to_online_node(int pxm)
|
||||
{
|
||||
int node, min_node;
|
||||
|
||||
node = acpi_map_pxm_to_node(pxm);
|
||||
|
||||
if (node == NUMA_NO_NODE)
|
||||
node = 0;
|
||||
|
||||
min_node = node;
|
||||
if (!node_online(node)) {
|
||||
int min_dist = INT_MAX, dist, n;
|
||||
|
||||
for_each_online_node(n) {
|
||||
dist = node_distance(node, n);
|
||||
if (dist < min_dist) {
|
||||
min_dist = dist;
|
||||
min_node = n;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return min_node;
|
||||
}
|
||||
EXPORT_SYMBOL(acpi_map_pxm_to_online_node);
|
||||
|
||||
static void __init
|
||||
acpi_table_print_srat_entry(struct acpi_subtable_header *header)
|
||||
{
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <linux/memory_hotplug.h>
|
||||
#include <linux/libnvdimm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/numa.h>
|
||||
|
||||
static int e820_pmem_remove(struct platform_device *pdev)
|
||||
{
|
||||
@ -16,27 +17,16 @@ static int e820_pmem_remove(struct platform_device *pdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
static int e820_range_to_nid(resource_size_t addr)
|
||||
{
|
||||
return memory_add_physaddr_to_nid(addr);
|
||||
}
|
||||
#else
|
||||
static int e820_range_to_nid(resource_size_t addr)
|
||||
{
|
||||
return NUMA_NO_NODE;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int e820_register_one(struct resource *res, void *data)
|
||||
{
|
||||
struct nd_region_desc ndr_desc;
|
||||
struct nvdimm_bus *nvdimm_bus = data;
|
||||
int nid = phys_to_target_node(res->start);
|
||||
|
||||
memset(&ndr_desc, 0, sizeof(ndr_desc));
|
||||
ndr_desc.res = res;
|
||||
ndr_desc.numa_node = e820_range_to_nid(res->start);
|
||||
ndr_desc.target_node = ndr_desc.numa_node;
|
||||
ndr_desc.numa_node = numa_map_to_online_node(nid);
|
||||
ndr_desc.target_node = nid;
|
||||
set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
|
||||
if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
|
||||
return -ENXIO;
|
||||
|
@ -416,9 +416,30 @@ extern void acpi_osi_setup(char *str);
|
||||
extern bool acpi_osi_is_win8(void);
|
||||
|
||||
#ifdef CONFIG_ACPI_NUMA
|
||||
int acpi_map_pxm_to_online_node(int pxm);
|
||||
int acpi_map_pxm_to_node(int pxm);
|
||||
int acpi_get_node(acpi_handle handle);
|
||||
|
||||
/**
|
||||
* acpi_map_pxm_to_online_node - Map proximity ID to online node
|
||||
* @pxm: ACPI proximity ID
|
||||
*
|
||||
* This is similar to acpi_map_pxm_to_node(), but always returns an online
|
||||
* node. When the mapped node from a given proximity ID is offline, it
|
||||
* looks up the node distance table and returns the nearest online node.
|
||||
*
|
||||
* ACPI device drivers, which are called after the NUMA initialization has
|
||||
* completed in the kernel, can call this interface to obtain their device
|
||||
* NUMA topology from ACPI tables. Such drivers do not have to deal with
|
||||
* offline nodes. A node may be offline when a device proximity ID is
|
||||
* unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
|
||||
* "numa=off" on x86.
|
||||
*/
|
||||
static inline int acpi_map_pxm_to_online_node(int pxm)
|
||||
{
|
||||
int node = acpi_map_pxm_to_node(pxm);
|
||||
|
||||
return numa_map_to_online_node(node);
|
||||
}
|
||||
#else
|
||||
static inline int acpi_map_pxm_to_online_node(int pxm)
|
||||
{
|
||||
|
@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_NUMA_H
|
||||
#define _LINUX_NUMA_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#ifdef CONFIG_NODES_SHIFT
|
||||
#define NODES_SHIFT CONFIG_NODES_SHIFT
|
||||
@ -13,4 +13,32 @@
|
||||
|
||||
#define NUMA_NO_NODE (-1)
|
||||
|
||||
/* optionally keep NUMA memory info available post init */
|
||||
#ifdef CONFIG_NUMA_KEEP_MEMINFO
|
||||
#define __initdata_or_meminfo
|
||||
#else
|
||||
#define __initdata_or_meminfo __initdata
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
/* Generic implementation available */
|
||||
int numa_map_to_online_node(int node);
|
||||
|
||||
/*
|
||||
* Optional architecture specific implementation, users need a "depends
|
||||
* on $ARCH"
|
||||
*/
|
||||
int phys_to_target_node(phys_addr_t addr);
|
||||
#else
|
||||
static inline int numa_map_to_online_node(int node)
|
||||
{
|
||||
return NUMA_NO_NODE;
|
||||
}
|
||||
|
||||
static inline int phys_to_target_node(phys_addr_t addr)
|
||||
{
|
||||
return NUMA_NO_NODE;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_NUMA_H */
|
||||
|
@ -139,6 +139,10 @@ config HAVE_FAST_GUP
|
||||
config ARCH_KEEP_MEMBLOCK
|
||||
bool
|
||||
|
||||
# Keep arch NUMA mapping infrastructure post-init.
|
||||
config NUMA_KEEP_MEMINFO
|
||||
bool
|
||||
|
||||
config MEMORY_ISOLATION
|
||||
bool
|
||||
|
||||
@ -154,6 +158,7 @@ config MEMORY_HOTPLUG
|
||||
bool "Allow for memory hot-add"
|
||||
depends on SPARSEMEM || X86_64_ACPI_NUMA
|
||||
depends on ARCH_ENABLE_MEMORY_HOTPLUG
|
||||
select NUMA_KEEP_MEMINFO if NUMA
|
||||
|
||||
config MEMORY_HOTPLUG_SPARSE
|
||||
def_bool y
|
||||
|
@ -127,6 +127,32 @@ static struct mempolicy default_policy = {
|
||||
|
||||
static struct mempolicy preferred_node_policy[MAX_NUMNODES];
|
||||
|
||||
/**
|
||||
* numa_map_to_online_node - Find closest online node
|
||||
* @nid: Node id to start the search
|
||||
*
|
||||
* Lookup the next closest node by distance if @nid is not online.
|
||||
*/
|
||||
int numa_map_to_online_node(int node)
|
||||
{
|
||||
int min_dist = INT_MAX, dist, n, min_node;
|
||||
|
||||
if (node == NUMA_NO_NODE || node_online(node))
|
||||
return node;
|
||||
|
||||
min_node = node;
|
||||
for_each_online_node(n) {
|
||||
dist = node_distance(node, n);
|
||||
if (dist < min_dist) {
|
||||
min_dist = dist;
|
||||
min_node = n;
|
||||
}
|
||||
}
|
||||
|
||||
return min_node;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(numa_map_to_online_node);
|
||||
|
||||
struct mempolicy *get_task_policy(struct task_struct *p)
|
||||
{
|
||||
struct mempolicy *pol = p->mempolicy;
|
||||
|
Loading…
Reference in New Issue
Block a user