mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-06 05:02:31 +00:00
cc6515591b
Patch series "drivers/base/memory: determine and store zone for single-zone memory blocks", v2. I remember talking to Michal in the past about removing test_pages_in_a_zone(), which we use for: * verifying that a memory block we intend to offline is really only managed by a single zone. We don't support offlining of memory blocks that are managed by multiple zones (e.g., multiple nodes, DMA and DMA32) * exposing that zone to user space via /sys/devices/system/memory/memory*/valid_zones Now that I identified some more cases where test_pages_in_a_zone() might go wrong, and we received an UBSAN report (see patch #3), let's get rid of this PFN walker. So instead of detecting the zone at runtime with test_pages_in_a_zone() by scanning the memmap, let's determine and remember for each memory block if it's managed by a single zone. The stored zone can then be used for the above two cases, avoiding a manual lookup using test_pages_in_a_zone(). This avoids eventually stumbling over uninitialized memmaps in corner cases, especially when ZONE_DEVICE ranges partly fall into memory block (that are responsible for managing System RAM). Handling memory onlining is easy, because we online to exactly one zone. Handling boot memory is more tricky, because we want to avoid scanning all zones of all nodes to detect possible zones that overlap with the physical memory region of interest. Fortunately, we already have code that determines the applicable nodes for a memory block, to create sysfs links -- we'll hook into that. Patch #1 is a simple cleanup I had laying around for a longer time. Patch #2 contains the main logic to remove test_pages_in_a_zone() and further details. [1] https://lkml.kernel.org/r/20220128144540.153902-1-david@redhat.com [2] https://lkml.kernel.org/r/20220203105212.30385-1-david@redhat.com This patch (of 2): Let's adjust the stale terminology, making it match unregister_memory_block_under_nodes() and do_register_memory_block_under_node(). We're dealing with memory block devices, which span 1..X memory sections. Link: https://lkml.kernel.org/r/20220210184359.235565-1-david@redhat.com Link: https://lkml.kernel.org/r/20220210184359.235565-2-david@redhat.com Signed-off-by: David Hildenbrand <david@redhat.com> Acked-by: Oscar Salvador <osalvador@suse.de> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Michal Hocko <mhocko@suse.com> Cc: "Rafael J. Wysocki" <rafael@kernel.org> Cc: Rafael Parra <rparrazo@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
194 lines
4.8 KiB
C
194 lines
4.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* include/linux/node.h - generic node definition
|
|
*
|
|
* This is mainly for topological representation. We define the
|
|
* basic 'struct node' here, which can be embedded in per-arch
|
|
* definitions of processors.
|
|
*
|
|
* Basic handling of the devices is done in drivers/base/node.c
|
|
* and system devices are handled in drivers/base/sys.c.
|
|
*
|
|
* Nodes are exported via driverfs in the class/node/devices/
|
|
* directory.
|
|
*/
|
|
#ifndef _LINUX_NODE_H_
|
|
#define _LINUX_NODE_H_
|
|
|
|
#include <linux/device.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/list.h>
|
|
#include <linux/workqueue.h>
|
|
|
|
/**
|
|
* struct node_hmem_attrs - heterogeneous memory performance attributes
|
|
*
|
|
* @read_bandwidth: Read bandwidth in MB/s
|
|
* @write_bandwidth: Write bandwidth in MB/s
|
|
* @read_latency: Read latency in nanoseconds
|
|
* @write_latency: Write latency in nanoseconds
|
|
*/
|
|
struct node_hmem_attrs {
|
|
unsigned int read_bandwidth;
|
|
unsigned int write_bandwidth;
|
|
unsigned int read_latency;
|
|
unsigned int write_latency;
|
|
};
|
|
|
|
enum cache_indexing {
|
|
NODE_CACHE_DIRECT_MAP,
|
|
NODE_CACHE_INDEXED,
|
|
NODE_CACHE_OTHER,
|
|
};
|
|
|
|
enum cache_write_policy {
|
|
NODE_CACHE_WRITE_BACK,
|
|
NODE_CACHE_WRITE_THROUGH,
|
|
NODE_CACHE_WRITE_OTHER,
|
|
};
|
|
|
|
/**
|
|
* struct node_cache_attrs - system memory caching attributes
|
|
*
|
|
* @indexing: The ways memory blocks may be placed in cache
|
|
* @write_policy: Write back or write through policy
|
|
* @size: Total size of cache in bytes
|
|
* @line_size: Number of bytes fetched on a cache miss
|
|
* @level: The cache hierarchy level
|
|
*/
|
|
struct node_cache_attrs {
|
|
enum cache_indexing indexing;
|
|
enum cache_write_policy write_policy;
|
|
u64 size;
|
|
u16 line_size;
|
|
u8 level;
|
|
};
|
|
|
|
#ifdef CONFIG_HMEM_REPORTING
|
|
void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs);
|
|
void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs,
|
|
unsigned access);
|
|
#else
|
|
static inline void node_add_cache(unsigned int nid,
|
|
struct node_cache_attrs *cache_attrs)
|
|
{
|
|
}
|
|
|
|
static inline void node_set_perf_attrs(unsigned int nid,
|
|
struct node_hmem_attrs *hmem_attrs,
|
|
unsigned access)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
struct node {
|
|
struct device dev;
|
|
struct list_head access_list;
|
|
|
|
#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_HUGETLBFS)
|
|
struct work_struct node_work;
|
|
#endif
|
|
#ifdef CONFIG_HMEM_REPORTING
|
|
struct list_head cache_attrs;
|
|
struct device *cache_dev;
|
|
#endif
|
|
};
|
|
|
|
struct memory_block;
|
|
extern struct node *node_devices[];
|
|
typedef void (*node_registration_func_t)(struct node *);
|
|
|
|
#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA)
|
|
void register_memory_blocks_under_node(int nid, unsigned long start_pfn,
|
|
unsigned long end_pfn,
|
|
enum meminit_context context);
|
|
#else
|
|
static inline void register_memory_blocks_under_node(int nid, unsigned long start_pfn,
|
|
unsigned long end_pfn,
|
|
enum meminit_context context)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
extern void unregister_node(struct node *node);
|
|
#ifdef CONFIG_NUMA
|
|
extern void node_dev_init(void);
|
|
/* Core of the node registration - only memory hotplug should use this */
|
|
extern int __register_one_node(int nid);
|
|
|
|
/* Registers an online node */
|
|
static inline int register_one_node(int nid)
|
|
{
|
|
int error = 0;
|
|
|
|
if (node_online(nid)) {
|
|
struct pglist_data *pgdat = NODE_DATA(nid);
|
|
unsigned long start_pfn = pgdat->node_start_pfn;
|
|
unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
|
|
|
|
error = __register_one_node(nid);
|
|
if (error)
|
|
return error;
|
|
register_memory_blocks_under_node(nid, start_pfn, end_pfn,
|
|
MEMINIT_EARLY);
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
extern void unregister_one_node(int nid);
|
|
extern int register_cpu_under_node(unsigned int cpu, unsigned int nid);
|
|
extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
|
|
extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
|
|
|
|
extern int register_memory_node_under_compute_node(unsigned int mem_nid,
|
|
unsigned int cpu_nid,
|
|
unsigned access);
|
|
|
|
#ifdef CONFIG_HUGETLBFS
|
|
extern void register_hugetlbfs_with_node(node_registration_func_t doregister,
|
|
node_registration_func_t unregister);
|
|
#endif
|
|
#else
|
|
static inline void node_dev_init(void)
|
|
{
|
|
}
|
|
static inline int __register_one_node(int nid)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline int register_one_node(int nid)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline int unregister_one_node(int nid)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline int register_cpu_under_node(unsigned int cpu, unsigned int nid)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
|
|
{
|
|
}
|
|
|
|
static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
|
|
node_registration_func_t unreg)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#define to_node(device) container_of(device, struct node, dev)
|
|
|
|
static inline bool node_is_toptier(int node)
|
|
{
|
|
return node_state(node, N_CPU);
|
|
}
|
|
|
|
#endif /* _LINUX_NODE_H_ */
|