diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 8dfa3054f10f..15d23da2455f 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -173,6 +173,12 @@ config ACPI_SRAT bool default y depends on NUMA && (X86_SUMMIT || X86_GENERICARCH) + select ACPI_NUMA + +config HAVE_ARCH_PARSE_SRAT + bool + default y + depends on ACPI_SRAT config X86_SUMMIT_NUMA bool diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c index 52b3ed5d2cb5..989c85255dbe 100644 --- a/arch/i386/kernel/srat.c +++ b/arch/i386/kernel/srat.c @@ -39,7 +39,6 @@ #define NODE_ARRAY_OFFSET(x) ((x) % 8) /* 8 bits/char */ #define BMAP_SET(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit)) #define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit))) -#define MAX_PXM_DOMAINS 256 /* 1 byte and no promises about values */ /* bitmap length; _PXM is at most 255 */ #define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ @@ -213,19 +212,11 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c node_end_pfn[nid] = memory_chunk->end_pfn; } -static u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */ - -int pxm_to_node(int pxm) -{ - return pxm_to_nid_map[pxm]; -} - /* Parse the ACPI Static Resource Affinity Table */ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) { u8 *start, *end, *p; int i, j, nid; - u8 nid_to_pxm_map[MAX_NUMNODES];/* logical node ID to _PXM map */ start = (u8 *)(&(sratp->reserved) + 1); /* skip header */ p = start; @@ -235,10 +226,6 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); memset(zholes_size, 0, sizeof(zholes_size)); - /* -1 in these maps means not available */ - memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map)); - memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map)); - num_memory_chunks = 0; while (p < end) { switch (*p) { @@ -278,9 +265,7 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) nodes_clear(node_online_map); for (i = 0; i < MAX_PXM_DOMAINS; i++) { if (BMAP_TEST(pxm_bitmap, i)) { - nid = num_online_nodes(); - pxm_to_nid_map[i] = nid; - nid_to_pxm_map[nid] = i; + int nid = acpi_map_pxm_to_node(i); node_set_online(nid); } } @@ -288,7 +273,7 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) /* set cnode id in memory chunk structure */ for (i = 0; i < num_memory_chunks; i++) - node_memory_chunk[i].nid = pxm_to_nid_map[node_memory_chunk[i].pxm]; + node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm); printk("pxm bitmap: "); for (i = 0; i < sizeof(pxm_bitmap); i++) { diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index bdccd0b1eb60..3ce443e6c016 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -1958,7 +1958,7 @@ sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) if (pxm < 0) return; - node = pxm_to_nid_map[pxm]; + node = pxm_to_node(pxm); if (node >= MAX_NUMNODES || !node_online(node)) return; diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 58c93a30348c..d1c52cf67882 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -415,9 +415,6 @@ static int __initdata srat_num_cpus; /* number of cpus */ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN]; #define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) #define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) -/* maps to convert between proximity domain and logical node ID */ -int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS]; -int __initdata nid_to_pxm_map[MAX_NUMNODES]; static struct acpi_table_slit __initdata *slit_table; static int get_processor_proximity_domain(struct acpi_table_processor_affinity *pa) @@ -533,22 +530,17 @@ void __init acpi_numa_arch_fixup(void) * MCD - This can probably be dropped now. No need for pxm ID to node ID * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES. */ - /* calculate total number of nodes in system from PXM bitmap */ - memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map)); - memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map)); nodes_clear(node_online_map); for (i = 0; i < MAX_PXM_DOMAINS; i++) { if (pxm_bit_test(i)) { - int nid = num_online_nodes(); - pxm_to_nid_map[i] = nid; - nid_to_pxm_map[nid] = i; + int nid = acpi_map_pxm_to_node(i); node_set_online(nid); } } /* set logical node id in memory chunk structure */ for (i = 0; i < num_node_memblks; i++) - node_memblk[i].nid = pxm_to_nid_map[node_memblk[i].nid]; + node_memblk[i].nid = pxm_to_node(node_memblk[i].nid); /* assign memory bank numbers for each chunk on each node */ for_each_online_node(i) { @@ -562,7 +554,7 @@ void __init acpi_numa_arch_fixup(void) /* set logical node id in cpu structure */ for (i = 0; i < srat_num_cpus; i++) - node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid]; + node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid); printk(KERN_INFO "Number of logical nodes in system = %d\n", num_online_nodes()); @@ -575,11 +567,11 @@ void __init acpi_numa_arch_fixup(void) for (i = 0; i < slit_table->localities; i++) { if (!pxm_bit_test(i)) continue; - node_from = pxm_to_nid_map[i]; + node_from = pxm_to_node(i); for (j = 0; j < slit_table->localities; j++) { if (!pxm_bit_test(j)) continue; - node_to = pxm_to_nid_map[j]; + node_to = pxm_to_node(j); node_distance(node_from, node_to) = slit_table->entry[i * slit_table->localities + j]; } @@ -785,9 +777,9 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid) /* * Assuming that the container driver would have set the proximity - * domain and would have initialized pxm_to_nid_map[pxm_id] && pxm_flag + * domain and would have initialized pxm_to_node(pxm_id) && pxm_flag */ - node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_nid_map[pxm_id]; + node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_node(pxm_id); node_cpuid[cpu].phys_id = physid; #endif @@ -966,7 +958,7 @@ acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret) if (pxm < 0) return AE_OK; - node = pxm_to_nid_map[pxm]; + node = pxm_to_node(pxm); if (node >= MAX_NUMNODES || !node_online(node) || cpus_empty(node_to_cpumask(node))) diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index ab829a22f8a4..cf7751b99d1c 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -352,7 +352,7 @@ pci_acpi_scan_root(struct acpi_device *device, int domain, int bus) pxm = acpi_get_pxm(controller->acpi_handle); #ifdef CONFIG_NUMA if (pxm >= 0) - controller->node = pxm_to_nid_map[pxm]; + controller->node = pxm_to_node(pxm); #endif acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window, diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 30988dfbddff..93577abae36d 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -139,7 +139,7 @@ static int __init pxm_to_nasid(int pxm) int i; int nid; - nid = pxm_to_nid_map[pxm]; + nid = pxm_to_node(pxm); for (i = 0; i < num_node_memblks; i++) { if (node_memblk[i].nid == nid) { return NASID_GET(node_memblk[i].start_paddr); @@ -704,7 +704,7 @@ void __init build_cnode_tables(void) * cnode == node for all C & M bricks. */ for_each_online_node(node) { - nasid = pxm_to_nasid(nid_to_pxm_map[node]); + nasid = pxm_to_nasid(node_to_pxm(node)); sn_cnodeid_to_nasid[node] = nasid; physical_node_map[nasid] = node; } diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index 474df22c6ed2..502fce65e96a 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c @@ -30,7 +30,6 @@ static struct acpi_table_slit *acpi_slit; static nodemask_t nodes_parsed __initdata; -static nodemask_t nodes_found __initdata; static struct bootnode nodes[MAX_NUMNODES] __initdata; static struct bootnode nodes_add[MAX_NUMNODES] __initdata; static int found_add_area __initdata; @@ -38,33 +37,14 @@ int hotadd_percent __initdata = 0; #ifndef RESERVE_HOTADD #define hotadd_percent 0 /* Ignore all settings */ #endif -static u8 pxm2node[256] = { [0 ... 255] = 0xff }; /* Too small nodes confuse the VM badly. Usually they result from BIOS bugs. */ #define NODE_MIN_SIZE (4*1024*1024) -static int node_to_pxm(int n); - -int pxm_to_node(int pxm) -{ - if ((unsigned)pxm >= 256) - return -1; - /* Extend 0xff to (int)-1 */ - return (signed char)pxm2node[pxm]; -} - static __init int setup_node(int pxm) { - unsigned node = pxm2node[pxm]; - if (node == 0xff) { - if (nodes_weight(nodes_found) >= MAX_NUMNODES) - return -1; - node = first_unset_node(nodes_found); - node_set(node, nodes_found); - pxm2node[pxm] = node; - } - return pxm2node[pxm]; + return acpi_map_pxm_to_node(pxm); } static __init int conflicting_nodes(unsigned long start, unsigned long end) @@ -440,17 +420,6 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) return 0; } -static int node_to_pxm(int n) -{ - int i; - if (pxm2node[n] == n) - return n; - for (i = 0; i < 256; i++) - if (pxm2node[i] == n) - return i; - return 0; -} - void __init srat_reserve_add_area(int nodeid) { if (found_add_area && nodes_add[nodeid].end) { diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index c24652d31bf9..230c53852231 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -162,7 +162,7 @@ config ACPI_THERMAL config ACPI_NUMA bool "NUMA support" depends on NUMA - depends on (IA64 || X86_64) + depends on (X86 || IA64) default y if IA64_GENERIC || IA64_SGI_SN2 config ACPI_ASUS diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 64b98e82feb7..e2c1a16078c9 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -36,12 +36,60 @@ #define _COMPONENT ACPI_NUMA ACPI_MODULE_NAME("numa") +static nodemask_t nodes_found_map = NODE_MASK_NONE; +#define PXM_INVAL -1 +#define NID_INVAL -1 + +/* maps to convert between proximity domain and logical node ID */ +int __cpuinitdata pxm_to_node_map[MAX_PXM_DOMAINS] + = { [0 ... MAX_PXM_DOMAINS - 1] = NID_INVAL }; +int __cpuinitdata node_to_pxm_map[MAX_NUMNODES] + = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL }; + extern int __init acpi_table_parse_madt_family(enum acpi_table_id id, unsigned long madt_size, int entry_id, acpi_madt_entry_handler handler, unsigned int max_entries); +int __cpuinit pxm_to_node(int pxm) +{ + if (pxm < 0) + return NID_INVAL; + return pxm_to_node_map[pxm]; +} + +int __cpuinit node_to_pxm(int node) +{ + if (node < 0) + return PXM_INVAL; + return node_to_pxm_map[node]; +} + +int __cpuinit acpi_map_pxm_to_node(int pxm) +{ + int node = pxm_to_node_map[pxm]; + + if (node < 0){ + if (nodes_weight(nodes_found_map) >= MAX_NUMNODES) + return NID_INVAL; + node = first_unset_node(nodes_found_map); + pxm_to_node_map[pxm] = node; + node_to_pxm_map[node] = pxm; + node_set(node, nodes_found_map); + } + + return node; +} + +void __cpuinit acpi_unmap_pxm_to_node(int node) +{ + int pxm = node_to_pxm_map[node]; + pxm_to_node_map[pxm] = NID_INVAL; + node_to_pxm_map[node] = PXM_INVAL; + node_clear(node, nodes_found_map); +} + void __init acpi_table_print_srat_entry(acpi_table_entry_header * header) { diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h new file mode 100644 index 000000000000..1049f2a0a6db --- /dev/null +++ b/include/acpi/acpi_numa.h @@ -0,0 +1,23 @@ +#ifndef __ACPI_NUMA_H +#define __ACPI_NUMA_H + +#ifdef CONFIG_ACPI_NUMA +#include + +/* Proximity bitmap length */ +#if MAX_NUMNODES > 256 +#define MAX_PXM_DOMAINS MAX_NUMNODES +#else +#define MAX_PXM_DOMAINS (256) /* Old pxm spec is defined 8 bit */ +#endif + +extern int __cpuinitdata pxm_to_node_map[MAX_PXM_DOMAINS]; +extern int __cpuinitdata node_to_pxm_map[MAX_NUMNODES]; + +extern int __cpuinit pxm_to_node(int); +extern int __cpuinit node_to_pxm(int); +extern int __cpuinit acpi_map_pxm_to_node(int); +extern void __cpuinit acpi_unmap_pxm_to_node(int); + +#endif /* CONFIG_ACPI_NUMA */ +#endif /* __ACP_NUMA_H */ diff --git a/include/asm-x86_64/numa.h b/include/asm-x86_64/numa.h index 1cc92fe02503..933ff11ece15 100644 --- a/include/asm-x86_64/numa.h +++ b/include/asm-x86_64/numa.h @@ -8,7 +8,6 @@ struct bootnode { }; extern int compute_hash_shift(struct bootnode *nodes, int numnodes); -extern int pxm_to_node(int nid); #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 1cf0b91d05bd..90d6df1551ed 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -37,6 +37,7 @@ #include #include #include +#include #include @@ -407,10 +408,18 @@ void acpi_table_print_madt_entry (acpi_table_entry_header *madt); void acpi_table_print_srat_entry (acpi_table_entry_header *srat); /* the following four functions are architecture-dependent */ +#ifdef CONFIG_HAVE_ARCH_PARSE_SRAT +#define NR_NODE_MEMBLKS MAX_NUMNODES +#define acpi_numa_slit_init(slit) do {} while (0) +#define acpi_numa_processor_affinity_init(pa) do {} while (0) +#define acpi_numa_memory_affinity_init(ma) do {} while (0) +#define acpi_numa_arch_fixup() do {} while (0) +#else void acpi_numa_slit_init (struct acpi_table_slit *slit); void acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa); void acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma); void acpi_numa_arch_fixup(void); +#endif #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */