mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-16 18:26:42 +00:00
02c163e959
- Supplement ACPI HMAT reported memory performance with native CXL memory performance enumeration - Add support for CXL error injection via the ACPI EINJ mechanism - Cleanup CXL DOE and CDAT integration - Miscellaneous cleanups and fixes -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQSbo+XnGs+rwLz9XGXfioYZHlFsZwUCZfSddgAKCRDfioYZHlFs Z+QJAQC0DaU/QbziEUFgd6r92nLA9PHLWi2zhjsSjyuJ3kh3IQD+KN0h8IZ+Av05 EOjLw21+ejwJ2dtCDcy2dlSpS6653wc= =czLR -----END PGP SIGNATURE----- Merge tag 'cxl-for-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl Pull CXL updates from Dan Williams: "CXL has mechanisms to enumerate the performance characteristics of memory devices. Those mechanisms allow Linux to build the equivalent of ACPI SRAT, SLIT, and HMAT tables dynamically at runtime. That capability is necessary because static ACPI can not represent dynamic CXL configurations (and reconfigurations). So, building on the v6.8 work to add "Quality of Service" enumeration, this update plumbs CXL "access coordinates" (read/write access latency and bandwidth) in all the same places that ACPI HMAT feeds similar data. Follow-on patches from the -mm side can then use that data to feed mechanisms like mm/memory-tiers.c. Greg has acked the touch to drivers/base/. The other feature update this cycle is support for CXL error injection via the ACPI EINJ module. That facility enables injection of bus protocol errors provided the user knows the magic address values to insert in the interface. To hide that magic, and make this easier to use, new error injection attributes were added to CXL debugfs. That interface injects the errors relative to a CXL object rather than require user tooling to know how to lookup and inject RCRB (Root Complex Register Block) addresses into the raw EINJ debugfs interface. It received some helpful review comments from Tony, but no explicit acks from the ACPI side. The primary user visible change for existing EINJ users is that they may find that einj.ko was already loaded by cxl_core.ko. Previously, einj.ko was only loaded on demand. The usual collection of miscellaneous cleanups are also present this cycle. Summary: - Supplement ACPI HMAT reported memory performance with native CXL memory performance enumeration - Add support for CXL error injection via the ACPI EINJ mechanism - Cleanup CXL DOE and CDAT integration - Miscellaneous cleanups and fixes" * tag 'cxl-for-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (21 commits) Documentation/ABI/testing/debugfs-cxl: Fix "Unexpected indentation" lib/firmware_table: Provide buffer length argument to cdat_table_parse() cxl/pci: Get rid of pointer arithmetic reading CDAT table cxl/pci: Rename DOE mailbox handle to doe_mb cxl: Fix the incorrect assignment of SSLBIS entry pointer initial location cxl/core: Add CXL EINJ debugfs files EINJ, Documentation: Update EINJ kernel doc EINJ: Add CXL error type support EINJ: Migrate to a platform driver cxl/region: Deal with numa nodes not enumerated by SRAT cxl/region: Add memory hotplug notifier for cxl region cxl/region: Add sysfs attribute for locality attributes of CXL regions cxl/region: Calculate performance data for a region cxl: Set cxlmd->endpoint before adding port device cxl: Move QoS class to be calculated from the nearest CPU cxl: Split out host bridge access coordinates cxl: Split out combine_coordinates() for common shared usage ACPI: HMAT / cxl: Add retrieval of generic port coordinates for both access classes ACPI: HMAT: Introduce 2 levels of generic port access class base/node / ACPI: Enumerate node access class for 'struct access_coordinate' ...
193 lines
6.4 KiB
C
193 lines
6.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* include/linux/memory.h - generic memory definition
|
|
*
|
|
* This is mainly for topological representation. We define the
|
|
* basic "struct memory_block" here, which can be embedded in per-arch
|
|
* definitions or NUMA information.
|
|
*
|
|
* Basic handling of the devices is done in drivers/base/memory.c
|
|
* and system devices are handled in drivers/base/sys.c.
|
|
*
|
|
* Memory block are exported via sysfs in the class/memory/devices/
|
|
* directory.
|
|
*
|
|
*/
|
|
#ifndef _LINUX_MEMORY_H_
|
|
#define _LINUX_MEMORY_H_
|
|
|
|
#include <linux/node.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/mutex.h>
|
|
|
|
#define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS)
|
|
|
|
/**
|
|
* struct memory_group - a logical group of memory blocks
|
|
* @nid: The node id for all memory blocks inside the memory group.
|
|
* @blocks: List of all memory blocks belonging to this memory group.
|
|
* @present_kernel_pages: Present (online) memory outside ZONE_MOVABLE of this
|
|
* memory group.
|
|
* @present_movable_pages: Present (online) memory in ZONE_MOVABLE of this
|
|
* memory group.
|
|
* @is_dynamic: The memory group type: static vs. dynamic
|
|
* @s.max_pages: Valid with &memory_group.is_dynamic == false. The maximum
|
|
* number of pages we'll have in this static memory group.
|
|
* @d.unit_pages: Valid with &memory_group.is_dynamic == true. Unit in pages
|
|
* in which memory is added/removed in this dynamic memory group.
|
|
* This granularity defines the alignment of a unit in physical
|
|
* address space; it has to be at least as big as a single
|
|
* memory block.
|
|
*
|
|
* A memory group logically groups memory blocks; each memory block
|
|
* belongs to at most one memory group. A memory group corresponds to
|
|
* a memory device, such as a DIMM or a NUMA node, which spans multiple
|
|
* memory blocks and might even span multiple non-contiguous physical memory
|
|
* ranges.
|
|
*
|
|
* Modification of members after registration is serialized by memory
|
|
* hot(un)plug code.
|
|
*/
|
|
struct memory_group {
|
|
int nid;
|
|
struct list_head memory_blocks;
|
|
unsigned long present_kernel_pages;
|
|
unsigned long present_movable_pages;
|
|
bool is_dynamic;
|
|
union {
|
|
struct {
|
|
unsigned long max_pages;
|
|
} s;
|
|
struct {
|
|
unsigned long unit_pages;
|
|
} d;
|
|
};
|
|
};
|
|
|
|
struct memory_block {
|
|
unsigned long start_section_nr;
|
|
unsigned long state; /* serialized by the dev->lock */
|
|
int online_type; /* for passing data to online routine */
|
|
int nid; /* NID for this memory block */
|
|
/*
|
|
* The single zone of this memory block if all PFNs of this memory block
|
|
* that are System RAM (not a memory hole, not ZONE_DEVICE ranges) are
|
|
* managed by a single zone. NULL if multiple zones (including nodes)
|
|
* apply.
|
|
*/
|
|
struct zone *zone;
|
|
struct device dev;
|
|
struct vmem_altmap *altmap;
|
|
struct memory_group *group; /* group (if any) for this block */
|
|
struct list_head group_next; /* next block inside memory group */
|
|
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
|
|
atomic_long_t nr_hwpoison;
|
|
#endif
|
|
};
|
|
|
|
int arch_get_memory_phys_device(unsigned long start_pfn);
|
|
unsigned long memory_block_size_bytes(void);
|
|
int set_memory_block_size_order(unsigned int order);
|
|
|
|
/* These states are exposed to userspace as text strings in sysfs */
|
|
#define MEM_ONLINE (1<<0) /* exposed to userspace */
|
|
#define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */
|
|
#define MEM_OFFLINE (1<<2) /* exposed to userspace */
|
|
#define MEM_GOING_ONLINE (1<<3)
|
|
#define MEM_CANCEL_ONLINE (1<<4)
|
|
#define MEM_CANCEL_OFFLINE (1<<5)
|
|
#define MEM_PREPARE_ONLINE (1<<6)
|
|
#define MEM_FINISH_OFFLINE (1<<7)
|
|
|
|
struct memory_notify {
|
|
/*
|
|
* The altmap_start_pfn and altmap_nr_pages fields are designated for
|
|
* specifying the altmap range and are exclusively intended for use in
|
|
* MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE notifiers.
|
|
*/
|
|
unsigned long altmap_start_pfn;
|
|
unsigned long altmap_nr_pages;
|
|
unsigned long start_pfn;
|
|
unsigned long nr_pages;
|
|
int status_change_nid_normal;
|
|
int status_change_nid;
|
|
};
|
|
|
|
struct notifier_block;
|
|
struct mem_section;
|
|
|
|
/*
|
|
* Priorities for the hotplug memory callback routines (stored in decreasing
|
|
* order in the callback chain)
|
|
*/
|
|
#define DEFAULT_CALLBACK_PRI 0
|
|
#define SLAB_CALLBACK_PRI 1
|
|
#define HMAT_CALLBACK_PRI 2
|
|
#define CXL_CALLBACK_PRI 5
|
|
#define MM_COMPUTE_BATCH_PRI 10
|
|
#define CPUSET_CALLBACK_PRI 10
|
|
#define MEMTIER_HOTPLUG_PRI 100
|
|
#define KSM_CALLBACK_PRI 100
|
|
|
|
#ifndef CONFIG_MEMORY_HOTPLUG
|
|
static inline void memory_dev_init(void)
|
|
{
|
|
return;
|
|
}
|
|
static inline int register_memory_notifier(struct notifier_block *nb)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline void unregister_memory_notifier(struct notifier_block *nb)
|
|
{
|
|
}
|
|
static inline int memory_notify(unsigned long val, void *v)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
|
|
{
|
|
return 0;
|
|
}
|
|
#else /* CONFIG_MEMORY_HOTPLUG */
|
|
extern int register_memory_notifier(struct notifier_block *nb);
|
|
extern void unregister_memory_notifier(struct notifier_block *nb);
|
|
int create_memory_block_devices(unsigned long start, unsigned long size,
|
|
struct vmem_altmap *altmap,
|
|
struct memory_group *group);
|
|
void remove_memory_block_devices(unsigned long start, unsigned long size);
|
|
extern void memory_dev_init(void);
|
|
extern int memory_notify(unsigned long val, void *v);
|
|
extern struct memory_block *find_memory_block(unsigned long section_nr);
|
|
typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *);
|
|
extern int walk_memory_blocks(unsigned long start, unsigned long size,
|
|
void *arg, walk_memory_blocks_func_t func);
|
|
extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func);
|
|
|
|
extern int memory_group_register_static(int nid, unsigned long max_pages);
|
|
extern int memory_group_register_dynamic(int nid, unsigned long unit_pages);
|
|
extern int memory_group_unregister(int mgid);
|
|
struct memory_group *memory_group_find_by_id(int mgid);
|
|
typedef int (*walk_memory_groups_func_t)(struct memory_group *, void *);
|
|
int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
|
|
struct memory_group *excluded, void *arg);
|
|
#define hotplug_memory_notifier(fn, pri) ({ \
|
|
static __meminitdata struct notifier_block fn##_mem_nb =\
|
|
{ .notifier_call = fn, .priority = pri };\
|
|
register_memory_notifier(&fn##_mem_nb); \
|
|
})
|
|
|
|
#ifdef CONFIG_NUMA
|
|
void memory_block_add_nid(struct memory_block *mem, int nid,
|
|
enum meminit_context context);
|
|
#endif /* CONFIG_NUMA */
|
|
#endif /* CONFIG_MEMORY_HOTPLUG */
|
|
|
|
/*
|
|
* Kernel text modification mutex, used for code patching. Users of this lock
|
|
* can sleep.
|
|
*/
|
|
extern struct mutex text_mutex;
|
|
|
|
#endif /* _LINUX_MEMORY_H_ */
|