mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-10 07:00:48 +00:00
x86-64, NUMA: Kill numa_nodes[]
numa_nodes[] doesn't carry any information which isn't present in numa_meminfo. Each entry is simply min/max range of all the memblks for the node. This is not only redundant but also inaccurate when memblks for different nodes interleave - for example, find_node_by_addr() can return the wrong nodeid. Kill numa_nodes[] and always use numa_meminfo instead. * nodes_cover_memory() is renamed to numa_meminfo_cover_memory() and now operations on numa_meminfo and returns bool. * setup_node_bootmem() needs min/max range. Compute the range on the fly. setup_node_bootmem() invocation is restructured to use outer loop instead of hardcoding the double invocations. * find_node_by_addr() now operates on numa_meminfo. * setup_physnodes() builds physnodes[] from memblks. This will go away when emulation code is updated to use struct numa_meminfo. This patch also makes the following misc changes. * Clearing of nodes_add[] clearing is converted to memset(). * numa_add_memblk() in amd_numa_init() is moved down a bit for consistency. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Brian Gerst <brgerst@gmail.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Shaohui Zheng <shaohui.zheng@intel.com> Cc: David Rientjes <rientjes@google.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: H. Peter Anvin <hpa@linux.intel.com>
This commit is contained in:
parent
a844ef46fa
commit
91556237ec
@ -26,7 +26,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
|
||||
|
||||
extern nodemask_t cpu_nodes_parsed __initdata;
|
||||
extern nodemask_t mem_nodes_parsed __initdata;
|
||||
extern struct bootnode numa_nodes[MAX_NUMNODES] __initdata;
|
||||
|
||||
extern int __cpuinit numa_cpu_node(int cpu);
|
||||
extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
|
||||
|
@ -165,12 +165,8 @@ int __init amd_numa_init(void)
|
||||
pr_info("Node %d MemBase %016lx Limit %016lx\n",
|
||||
nodeid, base, limit);
|
||||
|
||||
numa_nodes[nodeid].start = base;
|
||||
numa_nodes[nodeid].end = limit;
|
||||
numa_add_memblk(nodeid, base, limit);
|
||||
|
||||
prevbase = base;
|
||||
|
||||
numa_add_memblk(nodeid, base, limit);
|
||||
node_set(nodeid, mem_nodes_parsed);
|
||||
node_set(nodeid, cpu_nodes_parsed);
|
||||
}
|
||||
|
@ -46,8 +46,6 @@ static unsigned long __initdata nodemap_size;
|
||||
|
||||
static struct numa_meminfo numa_meminfo __initdata;
|
||||
|
||||
struct bootnode numa_nodes[MAX_NUMNODES] __initdata;
|
||||
|
||||
/*
|
||||
* Given a shift value, try to populate memnodemap[]
|
||||
* Returns :
|
||||
@ -349,17 +347,17 @@ static int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
|
||||
* Sanity check to catch more bad NUMA configurations (they are amazingly
|
||||
* common). Make sure the nodes cover all memory.
|
||||
*/
|
||||
static int __init nodes_cover_memory(const struct bootnode *nodes)
|
||||
static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
|
||||
{
|
||||
unsigned long numaram, e820ram;
|
||||
int i;
|
||||
|
||||
numaram = 0;
|
||||
for_each_node_mask(i, mem_nodes_parsed) {
|
||||
unsigned long s = nodes[i].start >> PAGE_SHIFT;
|
||||
unsigned long e = nodes[i].end >> PAGE_SHIFT;
|
||||
for (i = 0; i < mi->nr_blks; i++) {
|
||||
unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
|
||||
unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
|
||||
numaram += e - s;
|
||||
numaram -= __absent_pages_in_range(i, s, e);
|
||||
numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
|
||||
if ((long)numaram < 0)
|
||||
numaram = 0;
|
||||
}
|
||||
@ -371,14 +369,14 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
|
||||
printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
|
||||
(numaram << PAGE_SHIFT) >> 20,
|
||||
(e820ram << PAGE_SHIFT) >> 20);
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int __init numa_register_memblks(struct numa_meminfo *mi)
|
||||
{
|
||||
int i;
|
||||
int i, j, nid;
|
||||
|
||||
/* Account for nodes with cpus and no memory */
|
||||
nodes_or(node_possible_map, mem_nodes_parsed, cpu_nodes_parsed);
|
||||
@ -398,23 +396,34 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
|
||||
|
||||
/* for out of order entries */
|
||||
sort_node_map();
|
||||
if (!nodes_cover_memory(numa_nodes))
|
||||
if (!numa_meminfo_cover_memory(mi))
|
||||
return -EINVAL;
|
||||
|
||||
init_memory_mapping_high();
|
||||
|
||||
/* Finally register nodes. */
|
||||
for_each_node_mask(i, node_possible_map)
|
||||
setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end);
|
||||
|
||||
/*
|
||||
* Try again in case setup_node_bootmem missed one due to missing
|
||||
* bootmem.
|
||||
* Finally register nodes. Do it twice in case setup_node_bootmem
|
||||
* missed one due to missing bootmem.
|
||||
*/
|
||||
for_each_node_mask(i, node_possible_map)
|
||||
if (!node_online(i))
|
||||
setup_node_bootmem(i, numa_nodes[i].start,
|
||||
numa_nodes[i].end);
|
||||
for (i = 0; i < 2; i++) {
|
||||
for_each_node_mask(nid, node_possible_map) {
|
||||
u64 start = (u64)max_pfn << PAGE_SHIFT;
|
||||
u64 end = 0;
|
||||
|
||||
if (node_online(nid))
|
||||
continue;
|
||||
|
||||
for (j = 0; j < mi->nr_blks; j++) {
|
||||
if (nid != mi->blk[j].nid)
|
||||
continue;
|
||||
start = min(mi->blk[j].start, start);
|
||||
end = max(mi->blk[j].end, end);
|
||||
}
|
||||
|
||||
if (start < end)
|
||||
setup_node_bootmem(nid, start, end);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -432,33 +441,41 @@ void __init numa_emu_cmdline(char *str)
|
||||
|
||||
int __init find_node_by_addr(unsigned long addr)
|
||||
{
|
||||
int ret = NUMA_NO_NODE;
|
||||
const struct numa_meminfo *mi = &numa_meminfo;
|
||||
int i;
|
||||
|
||||
for_each_node_mask(i, mem_nodes_parsed) {
|
||||
for (i = 0; i < mi->nr_blks; i++) {
|
||||
/*
|
||||
* Find the real node that this emulated node appears on. For
|
||||
* the sake of simplicity, we only use a real node's starting
|
||||
* address to determine which emulated node it appears on.
|
||||
*/
|
||||
if (addr >= numa_nodes[i].start && addr < numa_nodes[i].end) {
|
||||
ret = i;
|
||||
break;
|
||||
}
|
||||
if (addr >= mi->blk[i].start && addr < mi->blk[i].end)
|
||||
return mi->blk[i].nid;
|
||||
}
|
||||
return ret;
|
||||
return NUMA_NO_NODE;
|
||||
}
|
||||
|
||||
static int __init setup_physnodes(unsigned long start, unsigned long end)
|
||||
{
|
||||
const struct numa_meminfo *mi = &numa_meminfo;
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
memset(physnodes, 0, sizeof(physnodes));
|
||||
|
||||
for_each_node_mask(i, mem_nodes_parsed) {
|
||||
physnodes[i].start = numa_nodes[i].start;
|
||||
physnodes[i].end = numa_nodes[i].end;
|
||||
for (i = 0; i < mi->nr_blks; i++) {
|
||||
int nid = mi->blk[i].nid;
|
||||
|
||||
if (physnodes[nid].start == physnodes[nid].end) {
|
||||
physnodes[nid].start = mi->blk[i].start;
|
||||
physnodes[nid].end = mi->blk[i].end;
|
||||
} else {
|
||||
physnodes[nid].start = min(physnodes[nid].start,
|
||||
mi->blk[i].start);
|
||||
physnodes[nid].end = max(physnodes[nid].end,
|
||||
mi->blk[i].end);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -809,8 +826,6 @@ static int dummy_numa_init(void)
|
||||
node_set(0, cpu_nodes_parsed);
|
||||
node_set(0, mem_nodes_parsed);
|
||||
numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
|
||||
numa_nodes[0].start = 0;
|
||||
numa_nodes[0].end = (u64)max_pfn << PAGE_SHIFT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -841,7 +856,6 @@ void __init initmem_init(void)
|
||||
nodes_clear(node_possible_map);
|
||||
nodes_clear(node_online_map);
|
||||
memset(&numa_meminfo, 0, sizeof(numa_meminfo));
|
||||
memset(numa_nodes, 0, sizeof(numa_nodes));
|
||||
remove_all_active_ranges();
|
||||
|
||||
if (numa_init[i]() < 0)
|
||||
|
@ -37,13 +37,9 @@ static __init int setup_node(int pxm)
|
||||
|
||||
static __init void bad_srat(void)
|
||||
{
|
||||
int i;
|
||||
printk(KERN_ERR "SRAT: SRAT not used.\n");
|
||||
acpi_numa = -1;
|
||||
for (i = 0; i < MAX_NUMNODES; i++) {
|
||||
numa_nodes[i].start = numa_nodes[i].end = 0;
|
||||
nodes_add[i].start = nodes_add[i].end = 0;
|
||||
}
|
||||
memset(nodes_add, 0, sizeof(nodes_add));
|
||||
}
|
||||
|
||||
static __init inline int srat_disabled(void)
|
||||
@ -210,7 +206,6 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
|
||||
void __init
|
||||
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
|
||||
{
|
||||
struct bootnode *nd;
|
||||
unsigned long start, end;
|
||||
int node, pxm;
|
||||
|
||||
@ -243,18 +238,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
|
||||
printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
|
||||
start, end);
|
||||
|
||||
if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
|
||||
nd = &numa_nodes[node];
|
||||
if (!node_test_and_set(node, mem_nodes_parsed)) {
|
||||
nd->start = start;
|
||||
nd->end = end;
|
||||
} else {
|
||||
if (start < nd->start)
|
||||
nd->start = start;
|
||||
if (nd->end < end)
|
||||
nd->end = end;
|
||||
}
|
||||
} else
|
||||
if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE))
|
||||
node_set(node, mem_nodes_parsed);
|
||||
else
|
||||
update_nodes_add(node, start, end);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user