mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-10 15:10:38 +00:00
x86-64, NUMA: Make emulation code build numa_meminfo and share the registration path
NUMA emulation code built nodes[] array and had its own registration path to set up the emulated nodes. Update it such that it generates emulated numa_meminfo and returns control to initmem_init() and shares the same registration path with non-emulated cases. Because {acpi|amd}_fake_nodes() expect nodes[] parameter, fake_physnodes() now generates nodes[] from numa_meminfo. This will go away with further updates. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Brian Gerst <brgerst@gmail.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Shaohui Zheng <shaohui.zheng@intel.com> Cc: David Rientjes <rientjes@google.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: H. Peter Anvin <hpa@linux.intel.com>
This commit is contained in:
parent
9d073caeb3
commit
c88aea7a70
@ -541,7 +541,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
|
|||||||
|
|
||||||
#ifdef CONFIG_NUMA_EMU
|
#ifdef CONFIG_NUMA_EMU
|
||||||
/* Numa emulation */
|
/* Numa emulation */
|
||||||
static struct bootnode nodes[MAX_NUMNODES] __initdata;
|
|
||||||
static struct bootnode physnodes[MAX_NUMNODES] __initdata;
|
static struct bootnode physnodes[MAX_NUMNODES] __initdata;
|
||||||
|
|
||||||
static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata;
|
static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata;
|
||||||
@ -626,9 +625,24 @@ static int __init setup_physnodes(unsigned long start, unsigned long end)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
|
static void __init fake_physnodes(int acpi, int amd,
|
||||||
|
const struct numa_meminfo *ei)
|
||||||
{
|
{
|
||||||
int i;
|
static struct bootnode nodes[MAX_NUMNODES] __initdata;
|
||||||
|
int i, nr_nodes = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < ei->nr_blks; i++) {
|
||||||
|
int nid = ei->blk[i].nid;
|
||||||
|
|
||||||
|
if (nodes[nid].start == nodes[nid].end) {
|
||||||
|
nodes[nid].start = ei->blk[i].start;
|
||||||
|
nodes[nid].end = ei->blk[i].end;
|
||||||
|
nr_nodes++;
|
||||||
|
} else {
|
||||||
|
nodes[nid].start = min(ei->blk[i].start, nodes[nid].start);
|
||||||
|
nodes[nid].end = max(ei->blk[i].end, nodes[nid].end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
BUG_ON(acpi && amd);
|
BUG_ON(acpi && amd);
|
||||||
#ifdef CONFIG_ACPI_NUMA
|
#ifdef CONFIG_ACPI_NUMA
|
||||||
@ -645,45 +659,44 @@ static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Setups up nid to range from addr to addr + size. If the end
|
* Sets up nid to range from @start to @end. The return value is -errno if
|
||||||
* boundary is greater than max_addr, then max_addr is used instead.
|
* something went wrong, 0 otherwise.
|
||||||
* The return value is 0 if there is additional memory left for
|
|
||||||
* allocation past addr and -1 otherwise. addr is adjusted to be at
|
|
||||||
* the end of the node.
|
|
||||||
*/
|
*/
|
||||||
static int __init setup_node_range(int nid, int physnid,
|
static int __init emu_setup_memblk(struct numa_meminfo *ei,
|
||||||
u64 *addr, u64 size, u64 max_addr)
|
int nid, int physnid, u64 start, u64 end)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
struct numa_memblk *eb = &ei->blk[ei->nr_blks];
|
||||||
nodes[nid].start = *addr;
|
|
||||||
*addr += size;
|
if (ei->nr_blks >= NR_NODE_MEMBLKS) {
|
||||||
if (*addr >= max_addr) {
|
pr_err("NUMA: Too many emulated memblks, failing emulation\n");
|
||||||
*addr = max_addr;
|
return -EINVAL;
|
||||||
ret = -1;
|
|
||||||
}
|
}
|
||||||
nodes[nid].end = *addr;
|
|
||||||
node_set(nid, node_possible_map);
|
ei->nr_blks++;
|
||||||
|
eb->start = start;
|
||||||
|
eb->end = end;
|
||||||
|
eb->nid = nid;
|
||||||
|
|
||||||
if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
|
if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
|
||||||
emu_nid_to_phys[nid] = physnid;
|
emu_nid_to_phys[nid] = physnid;
|
||||||
|
|
||||||
printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
|
printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
|
||||||
nodes[nid].start, nodes[nid].end,
|
eb->start, eb->end, (eb->end - eb->start) >> 20);
|
||||||
(nodes[nid].end - nodes[nid].start) >> 20);
|
return 0;
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
|
* Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
|
||||||
* to max_addr. The return value is the number of nodes allocated.
|
* to max_addr. The return value is the number of nodes allocated.
|
||||||
*/
|
*/
|
||||||
static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
|
static int __init split_nodes_interleave(struct numa_meminfo *ei,
|
||||||
|
u64 addr, u64 max_addr, int nr_nodes)
|
||||||
{
|
{
|
||||||
nodemask_t physnode_mask = NODE_MASK_NONE;
|
nodemask_t physnode_mask = NODE_MASK_NONE;
|
||||||
u64 size;
|
u64 size;
|
||||||
int big;
|
int big;
|
||||||
int ret = 0;
|
int nid = 0;
|
||||||
int i;
|
int i, ret;
|
||||||
|
|
||||||
if (nr_nodes <= 0)
|
if (nr_nodes <= 0)
|
||||||
return -1;
|
return -1;
|
||||||
@ -721,7 +734,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
|
|||||||
u64 end = physnodes[i].start + size;
|
u64 end = physnodes[i].start + size;
|
||||||
u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
|
u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
|
||||||
|
|
||||||
if (ret < big)
|
if (nid < big)
|
||||||
end += FAKE_NODE_MIN_SIZE;
|
end += FAKE_NODE_MIN_SIZE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -760,16 +773,21 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
|
|||||||
* happen as a result of rounding down each node's size
|
* happen as a result of rounding down each node's size
|
||||||
* to FAKE_NODE_MIN_SIZE.
|
* to FAKE_NODE_MIN_SIZE.
|
||||||
*/
|
*/
|
||||||
if (nodes_weight(physnode_mask) + ret >= nr_nodes)
|
if (nodes_weight(physnode_mask) + nid >= nr_nodes)
|
||||||
end = physnodes[i].end;
|
end = physnodes[i].end;
|
||||||
|
|
||||||
if (setup_node_range(ret++, i, &physnodes[i].start,
|
ret = emu_setup_memblk(ei, nid++, i,
|
||||||
end - physnodes[i].start,
|
physnodes[i].start,
|
||||||
physnodes[i].end) < 0)
|
min(end, physnodes[i].end));
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
physnodes[i].start = min(end, physnodes[i].end);
|
||||||
|
if (physnodes[i].start == physnodes[i].end)
|
||||||
node_clear(i, physnode_mask);
|
node_clear(i, physnode_mask);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ret;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -794,12 +812,13 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
|
|||||||
* Sets up fake nodes of `size' interleaved over physical nodes ranging from
|
* Sets up fake nodes of `size' interleaved over physical nodes ranging from
|
||||||
* `addr' to `max_addr'. The return value is the number of nodes allocated.
|
* `addr' to `max_addr'. The return value is the number of nodes allocated.
|
||||||
*/
|
*/
|
||||||
static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
|
static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
|
||||||
|
u64 addr, u64 max_addr, u64 size)
|
||||||
{
|
{
|
||||||
nodemask_t physnode_mask = NODE_MASK_NONE;
|
nodemask_t physnode_mask = NODE_MASK_NONE;
|
||||||
u64 min_size;
|
u64 min_size;
|
||||||
int ret = 0;
|
int nid = 0;
|
||||||
int i;
|
int i, ret;
|
||||||
|
|
||||||
if (!size)
|
if (!size)
|
||||||
return -1;
|
return -1;
|
||||||
@ -854,30 +873,31 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
|
|||||||
memblock_x86_hole_size(end, physnodes[i].end) < size)
|
memblock_x86_hole_size(end, physnodes[i].end) < size)
|
||||||
end = physnodes[i].end;
|
end = physnodes[i].end;
|
||||||
|
|
||||||
/*
|
ret = emu_setup_memblk(ei, nid++, i,
|
||||||
* Setup the fake node that will be allocated as bootmem
|
physnodes[i].start,
|
||||||
* later. If setup_node_range() returns non-zero, there
|
min(end, physnodes[i].end));
|
||||||
* is no more memory available on this physical node.
|
if (ret < 0)
|
||||||
*/
|
return ret;
|
||||||
if (setup_node_range(ret++, i, &physnodes[i].start,
|
|
||||||
end - physnodes[i].start,
|
physnodes[i].start = min(end, physnodes[i].end);
|
||||||
physnodes[i].end) < 0)
|
if (physnodes[i].start == physnodes[i].end)
|
||||||
node_clear(i, physnode_mask);
|
node_clear(i, physnode_mask);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ret;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Sets up the system RAM area from start_pfn to last_pfn according to the
|
* Sets up the system RAM area from start_pfn to last_pfn according to the
|
||||||
* numa=fake command-line option.
|
* numa=fake command-line option.
|
||||||
*/
|
*/
|
||||||
static int __init numa_emulation(int acpi, int amd)
|
static bool __init numa_emulation(int acpi, int amd)
|
||||||
{
|
{
|
||||||
static struct numa_meminfo ei __initdata;
|
static struct numa_meminfo ei __initdata;
|
||||||
const u64 max_addr = max_pfn << PAGE_SHIFT;
|
const u64 max_addr = max_pfn << PAGE_SHIFT;
|
||||||
int num_nodes;
|
int i, ret;
|
||||||
int i;
|
|
||||||
|
memset(&ei, 0, sizeof(ei));
|
||||||
|
|
||||||
for (i = 0; i < MAX_NUMNODES; i++)
|
for (i = 0; i < MAX_NUMNODES; i++)
|
||||||
emu_nid_to_phys[i] = NUMA_NO_NODE;
|
emu_nid_to_phys[i] = NUMA_NO_NODE;
|
||||||
@ -891,52 +911,33 @@ static int __init numa_emulation(int acpi, int amd)
|
|||||||
u64 size;
|
u64 size;
|
||||||
|
|
||||||
size = memparse(emu_cmdline, &emu_cmdline);
|
size = memparse(emu_cmdline, &emu_cmdline);
|
||||||
num_nodes = split_nodes_size_interleave(0, max_addr, size);
|
ret = split_nodes_size_interleave(&ei, 0, max_addr, size);
|
||||||
} else {
|
} else {
|
||||||
unsigned long n;
|
unsigned long n;
|
||||||
|
|
||||||
n = simple_strtoul(emu_cmdline, NULL, 0);
|
n = simple_strtoul(emu_cmdline, NULL, 0);
|
||||||
num_nodes = split_nodes_interleave(0, max_addr, n);
|
ret = split_nodes_interleave(&ei, 0, max_addr, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (num_nodes < 0)
|
if (ret < 0)
|
||||||
return num_nodes;
|
return false;
|
||||||
|
|
||||||
|
if (numa_cleanup_meminfo(&ei) < 0) {
|
||||||
|
pr_warning("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* commit */
|
||||||
|
numa_meminfo = ei;
|
||||||
|
|
||||||
/* make sure all emulated nodes are mapped to a physical node */
|
/* make sure all emulated nodes are mapped to a physical node */
|
||||||
for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
|
for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
|
||||||
if (emu_nid_to_phys[i] == NUMA_NO_NODE)
|
if (emu_nid_to_phys[i] == NUMA_NO_NODE)
|
||||||
emu_nid_to_phys[i] = 0;
|
emu_nid_to_phys[i] = 0;
|
||||||
|
|
||||||
ei.nr_blks = num_nodes;
|
fake_physnodes(acpi, amd, &ei);
|
||||||
for (i = 0; i < ei.nr_blks; i++) {
|
|
||||||
ei.blk[i].start = nodes[i].start;
|
|
||||||
ei.blk[i].end = nodes[i].end;
|
|
||||||
ei.blk[i].nid = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
memnode_shift = compute_hash_shift(&ei);
|
|
||||||
if (memnode_shift < 0) {
|
|
||||||
memnode_shift = 0;
|
|
||||||
printk(KERN_ERR "No NUMA hash function found. NUMA emulation "
|
|
||||||
"disabled.\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We need to vacate all active ranges that may have been registered for
|
|
||||||
* the e820 memory map.
|
|
||||||
*/
|
|
||||||
remove_all_active_ranges();
|
|
||||||
for_each_node_mask(i, node_possible_map)
|
|
||||||
memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
|
|
||||||
nodes[i].end >> PAGE_SHIFT);
|
|
||||||
init_memory_mapping_high();
|
|
||||||
for_each_node_mask(i, node_possible_map)
|
|
||||||
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
|
|
||||||
fake_physnodes(acpi, amd, num_nodes);
|
|
||||||
numa_init_array();
|
|
||||||
numa_emu_dist = true;
|
numa_emu_dist = true;
|
||||||
return 0;
|
return true;
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_NUMA_EMU */
|
#endif /* CONFIG_NUMA_EMU */
|
||||||
|
|
||||||
@ -988,15 +989,13 @@ void __init initmem_init(void)
|
|||||||
continue;
|
continue;
|
||||||
#ifdef CONFIG_NUMA_EMU
|
#ifdef CONFIG_NUMA_EMU
|
||||||
setup_physnodes(0, max_pfn << PAGE_SHIFT);
|
setup_physnodes(0, max_pfn << PAGE_SHIFT);
|
||||||
if (emu_cmdline && !numa_emulation(i == 0, i == 1))
|
/*
|
||||||
return;
|
* If requested, try emulation. If emulation is not used,
|
||||||
|
* build identity emu_nid_to_phys[] for numa_add_cpu()
|
||||||
/* not emulating, build identity mapping for numa_add_cpu() */
|
*/
|
||||||
for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++)
|
if (!emu_cmdline || !numa_emulation(i == 0, i == 1))
|
||||||
emu_nid_to_phys[j] = j;
|
for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++)
|
||||||
|
emu_nid_to_phys[j] = j;
|
||||||
nodes_clear(node_possible_map);
|
|
||||||
nodes_clear(node_online_map);
|
|
||||||
#endif
|
#endif
|
||||||
if (numa_register_memblks(&numa_meminfo) < 0)
|
if (numa_register_memblks(&numa_meminfo) < 0)
|
||||||
continue;
|
continue;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user