mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-20 07:09:58 +00:00
7c3dc440b1
- CXL RAM region enumeration: instantiate 'struct cxl_region' objects for platform firmware created memory regions - CXL RAM region provisioning: complement the existing PMEM region creation support with RAM region support - "Soft Reservation" policy change: Online (memory hot-add) soft-reserved memory (EFI_MEMORY_SP) by default, but still allow for setting aside such memory for dedicated access via device-dax. - CXL Events and Interrupts: Takeover CXL event handling from platform-firmware (ACPI calls this CXL Memory Error Reporting) and export CXL Events via Linux Trace Events. - Convey CXL _OSC results to drivers: Similar to PCI, let the CXL subsystem interrogate the result of CXL _OSC negotiation. - Emulate CXL DVSEC Range Registers as "decoders": Allow for first-generation devices that pre-date the definition of the CXL HDM Decoder Capability to translate the CXL DVSEC Range Registers into 'struct cxl_decoder' objects. - Set timestamp: Per spec, set the device timestamp in case of hotplug, or if platform-firwmare failed to set it. - General fixups: linux-next build issues, non-urgent fixes for pre-production hardware, unit test fixes, spelling and debug message improvements. -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQSbo+XnGs+rwLz9XGXfioYZHlFsZwUCY/WYcgAKCRDfioYZHlFs Z6m3APkBUtiEEm1o8ikdu5llUS1OTLBwqjJDwGMTyf8X/WDXhgD+J2mLsCgARS7X 5IS0RAtefutrW5sQpUucPM7QiLuraAY= =kOXC -----END PGP SIGNATURE----- Merge tag 'cxl-for-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl Pull Compute Express Link (CXL) updates from Dan Williams: "To date Linux has been dependent on platform-firmware to map CXL RAM regions and handle events / errors from devices. With this update we can now parse / update the CXL memory layout, and report events / errors from devices. This is a precursor for the CXL subsystem to handle the end-to-end "RAS" flow for CXL memory. i.e. the flow that for DDR-attached-DRAM is handled by the EDAC driver where it maps system physical address events to a field-replaceable-unit (FRU / endpoint device). In general, CXL has the potential to standardize what has historically been a pile of memory-controller-specific error handling logic. Another change of note is the default policy for handling RAM-backed device-dax instances. Previously the default access mode was "device", mmap(2) a device special file to access memory. The new default is "kmem" where the address range is assigned to the core-mm via add_memory_driver_managed(). This saves typical users from wondering why their platform memory is not visible via free(1) and stuck behind a device-file. At the same time it allows expert users to deploy policy to, for example, get dedicated access to high performance memory, or hide low performance memory from general purpose kernel allocations. This affects not only CXL, but also systems with high-bandwidth-memory that platform-firmware tags with the EFI_MEMORY_SP (special purpose) designation. Summary: - CXL RAM region enumeration: instantiate 'struct cxl_region' objects for platform firmware created memory regions - CXL RAM region provisioning: complement the existing PMEM region creation support with RAM region support - "Soft Reservation" policy change: Online (memory hot-add) soft-reserved memory (EFI_MEMORY_SP) by default, but still allow for setting aside such memory for dedicated access via device-dax. - CXL Events and Interrupts: Takeover CXL event handling from platform-firmware (ACPI calls this CXL Memory Error Reporting) and export CXL Events via Linux Trace Events. - Convey CXL _OSC results to drivers: Similar to PCI, let the CXL subsystem interrogate the result of CXL _OSC negotiation. - Emulate CXL DVSEC Range Registers as "decoders": Allow for first-generation devices that pre-date the definition of the CXL HDM Decoder Capability to translate the CXL DVSEC Range Registers into 'struct cxl_decoder' objects. - Set timestamp: Per spec, set the device timestamp in case of hotplug, or if platform-firwmare failed to set it. - General fixups: linux-next build issues, non-urgent fixes for pre-production hardware, unit test fixes, spelling and debug message improvements" * tag 'cxl-for-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (66 commits) dax/kmem: Fix leak of memory-hotplug resources cxl/mem: Add kdoc param for event log driver state cxl/trace: Add serial number to trace points cxl/trace: Add host output to trace points cxl/trace: Standardize device information output cxl/pci: Remove locked check for dvsec_range_allowed() cxl/hdm: Add emulation when HDM decoders are not committed cxl/hdm: Create emulated cxl_hdm for devices that do not have HDM decoders cxl/hdm: Emulate HDM decoder from DVSEC range registers cxl/pci: Refactor cxl_hdm_decode_init() cxl/port: Export cxl_dvsec_rr_decode() to cxl_port cxl/pci: Break out range register decoding from cxl_hdm_decode_init() cxl: add RAS status unmasking for CXL cxl: remove unnecessary calling of pci_enable_pcie_error_reporting() dax/hmem: build hmem device support as module if possible dax: cxl: add CXL_REGION dependency cxl: avoid returning uninitialized error code cxl/pmem: Fix nvdimm registration races cxl/mem: Fix UAPI command comment cxl/uapi: Tag commands from cxl_query_cmd() ...
389 lines
9.3 KiB
C
389 lines
9.3 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/* Copyright(c) 2020 Intel Corporation. */
|
|
|
|
#include <linux/device.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/idr.h>
|
|
#include <linux/pci.h>
|
|
#include <cxlmem.h>
|
|
#include "core.h"
|
|
|
|
static DECLARE_RWSEM(cxl_memdev_rwsem);
|
|
|
|
/*
|
|
* An entire PCI topology full of devices should be enough for any
|
|
* config
|
|
*/
|
|
#define CXL_MEM_MAX_DEVS 65536
|
|
|
|
static int cxl_mem_major;
|
|
static DEFINE_IDA(cxl_memdev_ida);
|
|
|
|
static void cxl_memdev_release(struct device *dev)
|
|
{
|
|
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
|
|
|
|
ida_free(&cxl_memdev_ida, cxlmd->id);
|
|
kfree(cxlmd);
|
|
}
|
|
|
|
static char *cxl_memdev_devnode(const struct device *dev, umode_t *mode, kuid_t *uid,
|
|
kgid_t *gid)
|
|
{
|
|
return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
|
|
}
|
|
|
|
static ssize_t firmware_version_show(struct device *dev,
|
|
struct device_attribute *attr, char *buf)
|
|
{
|
|
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
|
|
struct cxl_dev_state *cxlds = cxlmd->cxlds;
|
|
|
|
return sysfs_emit(buf, "%.16s\n", cxlds->firmware_version);
|
|
}
|
|
static DEVICE_ATTR_RO(firmware_version);
|
|
|
|
static ssize_t payload_max_show(struct device *dev,
|
|
struct device_attribute *attr, char *buf)
|
|
{
|
|
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
|
|
struct cxl_dev_state *cxlds = cxlmd->cxlds;
|
|
|
|
return sysfs_emit(buf, "%zu\n", cxlds->payload_size);
|
|
}
|
|
static DEVICE_ATTR_RO(payload_max);
|
|
|
|
static ssize_t label_storage_size_show(struct device *dev,
|
|
struct device_attribute *attr, char *buf)
|
|
{
|
|
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
|
|
struct cxl_dev_state *cxlds = cxlmd->cxlds;
|
|
|
|
return sysfs_emit(buf, "%zu\n", cxlds->lsa_size);
|
|
}
|
|
static DEVICE_ATTR_RO(label_storage_size);
|
|
|
|
static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
|
|
char *buf)
|
|
{
|
|
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
|
|
struct cxl_dev_state *cxlds = cxlmd->cxlds;
|
|
unsigned long long len = resource_size(&cxlds->ram_res);
|
|
|
|
return sysfs_emit(buf, "%#llx\n", len);
|
|
}
|
|
|
|
static struct device_attribute dev_attr_ram_size =
|
|
__ATTR(size, 0444, ram_size_show, NULL);
|
|
|
|
static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
|
|
char *buf)
|
|
{
|
|
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
|
|
struct cxl_dev_state *cxlds = cxlmd->cxlds;
|
|
unsigned long long len = resource_size(&cxlds->pmem_res);
|
|
|
|
return sysfs_emit(buf, "%#llx\n", len);
|
|
}
|
|
|
|
static struct device_attribute dev_attr_pmem_size =
|
|
__ATTR(size, 0444, pmem_size_show, NULL);
|
|
|
|
static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
|
|
char *buf)
|
|
{
|
|
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
|
|
struct cxl_dev_state *cxlds = cxlmd->cxlds;
|
|
|
|
return sysfs_emit(buf, "%#llx\n", cxlds->serial);
|
|
}
|
|
static DEVICE_ATTR_RO(serial);
|
|
|
|
static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
|
|
char *buf)
|
|
{
|
|
return sprintf(buf, "%d\n", dev_to_node(dev));
|
|
}
|
|
static DEVICE_ATTR_RO(numa_node);
|
|
|
|
static struct attribute *cxl_memdev_attributes[] = {
|
|
&dev_attr_serial.attr,
|
|
&dev_attr_firmware_version.attr,
|
|
&dev_attr_payload_max.attr,
|
|
&dev_attr_label_storage_size.attr,
|
|
&dev_attr_numa_node.attr,
|
|
NULL,
|
|
};
|
|
|
|
static struct attribute *cxl_memdev_pmem_attributes[] = {
|
|
&dev_attr_pmem_size.attr,
|
|
NULL,
|
|
};
|
|
|
|
static struct attribute *cxl_memdev_ram_attributes[] = {
|
|
&dev_attr_ram_size.attr,
|
|
NULL,
|
|
};
|
|
|
|
static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a,
|
|
int n)
|
|
{
|
|
if (!IS_ENABLED(CONFIG_NUMA) && a == &dev_attr_numa_node.attr)
|
|
return 0;
|
|
return a->mode;
|
|
}
|
|
|
|
static struct attribute_group cxl_memdev_attribute_group = {
|
|
.attrs = cxl_memdev_attributes,
|
|
.is_visible = cxl_memdev_visible,
|
|
};
|
|
|
|
static struct attribute_group cxl_memdev_ram_attribute_group = {
|
|
.name = "ram",
|
|
.attrs = cxl_memdev_ram_attributes,
|
|
};
|
|
|
|
static struct attribute_group cxl_memdev_pmem_attribute_group = {
|
|
.name = "pmem",
|
|
.attrs = cxl_memdev_pmem_attributes,
|
|
};
|
|
|
|
static const struct attribute_group *cxl_memdev_attribute_groups[] = {
|
|
&cxl_memdev_attribute_group,
|
|
&cxl_memdev_ram_attribute_group,
|
|
&cxl_memdev_pmem_attribute_group,
|
|
NULL,
|
|
};
|
|
|
|
static const struct device_type cxl_memdev_type = {
|
|
.name = "cxl_memdev",
|
|
.release = cxl_memdev_release,
|
|
.devnode = cxl_memdev_devnode,
|
|
.groups = cxl_memdev_attribute_groups,
|
|
};
|
|
|
|
bool is_cxl_memdev(const struct device *dev)
|
|
{
|
|
return dev->type == &cxl_memdev_type;
|
|
}
|
|
EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL);
|
|
|
|
/**
|
|
* set_exclusive_cxl_commands() - atomically disable user cxl commands
|
|
* @cxlds: The device state to operate on
|
|
* @cmds: bitmap of commands to mark exclusive
|
|
*
|
|
* Grab the cxl_memdev_rwsem in write mode to flush in-flight
|
|
* invocations of the ioctl path and then disable future execution of
|
|
* commands with the command ids set in @cmds.
|
|
*/
|
|
void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds)
|
|
{
|
|
down_write(&cxl_memdev_rwsem);
|
|
bitmap_or(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds,
|
|
CXL_MEM_COMMAND_ID_MAX);
|
|
up_write(&cxl_memdev_rwsem);
|
|
}
|
|
EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, CXL);
|
|
|
|
/**
|
|
* clear_exclusive_cxl_commands() - atomically enable user cxl commands
|
|
* @cxlds: The device state to modify
|
|
* @cmds: bitmap of commands to mark available for userspace
|
|
*/
|
|
void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds)
|
|
{
|
|
down_write(&cxl_memdev_rwsem);
|
|
bitmap_andnot(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds,
|
|
CXL_MEM_COMMAND_ID_MAX);
|
|
up_write(&cxl_memdev_rwsem);
|
|
}
|
|
EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL);
|
|
|
|
static void cxl_memdev_shutdown(struct device *dev)
|
|
{
|
|
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
|
|
|
|
down_write(&cxl_memdev_rwsem);
|
|
cxlmd->cxlds = NULL;
|
|
up_write(&cxl_memdev_rwsem);
|
|
}
|
|
|
|
static void cxl_memdev_unregister(void *_cxlmd)
|
|
{
|
|
struct cxl_memdev *cxlmd = _cxlmd;
|
|
struct device *dev = &cxlmd->dev;
|
|
|
|
cxl_memdev_shutdown(dev);
|
|
cdev_device_del(&cxlmd->cdev, dev);
|
|
put_device(dev);
|
|
}
|
|
|
|
static void detach_memdev(struct work_struct *work)
|
|
{
|
|
struct cxl_memdev *cxlmd;
|
|
|
|
cxlmd = container_of(work, typeof(*cxlmd), detach_work);
|
|
device_release_driver(&cxlmd->dev);
|
|
put_device(&cxlmd->dev);
|
|
}
|
|
|
|
static struct lock_class_key cxl_memdev_key;
|
|
|
|
static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
|
|
const struct file_operations *fops)
|
|
{
|
|
struct cxl_memdev *cxlmd;
|
|
struct device *dev;
|
|
struct cdev *cdev;
|
|
int rc;
|
|
|
|
cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
|
|
if (!cxlmd)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
rc = ida_alloc_max(&cxl_memdev_ida, CXL_MEM_MAX_DEVS - 1, GFP_KERNEL);
|
|
if (rc < 0)
|
|
goto err;
|
|
cxlmd->id = rc;
|
|
cxlmd->depth = -1;
|
|
|
|
dev = &cxlmd->dev;
|
|
device_initialize(dev);
|
|
lockdep_set_class(&dev->mutex, &cxl_memdev_key);
|
|
dev->parent = cxlds->dev;
|
|
dev->bus = &cxl_bus_type;
|
|
dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
|
|
dev->type = &cxl_memdev_type;
|
|
device_set_pm_not_required(dev);
|
|
INIT_WORK(&cxlmd->detach_work, detach_memdev);
|
|
|
|
cdev = &cxlmd->cdev;
|
|
cdev_init(cdev, fops);
|
|
return cxlmd;
|
|
|
|
err:
|
|
kfree(cxlmd);
|
|
return ERR_PTR(rc);
|
|
}
|
|
|
|
static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
|
|
unsigned long arg)
|
|
{
|
|
switch (cmd) {
|
|
case CXL_MEM_QUERY_COMMANDS:
|
|
return cxl_query_cmd(cxlmd, (void __user *)arg);
|
|
case CXL_MEM_SEND_COMMAND:
|
|
return cxl_send_cmd(cxlmd, (void __user *)arg);
|
|
default:
|
|
return -ENOTTY;
|
|
}
|
|
}
|
|
|
|
static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
|
|
unsigned long arg)
|
|
{
|
|
struct cxl_memdev *cxlmd = file->private_data;
|
|
int rc = -ENXIO;
|
|
|
|
down_read(&cxl_memdev_rwsem);
|
|
if (cxlmd->cxlds)
|
|
rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
|
|
up_read(&cxl_memdev_rwsem);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int cxl_memdev_open(struct inode *inode, struct file *file)
|
|
{
|
|
struct cxl_memdev *cxlmd =
|
|
container_of(inode->i_cdev, typeof(*cxlmd), cdev);
|
|
|
|
get_device(&cxlmd->dev);
|
|
file->private_data = cxlmd;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int cxl_memdev_release_file(struct inode *inode, struct file *file)
|
|
{
|
|
struct cxl_memdev *cxlmd =
|
|
container_of(inode->i_cdev, typeof(*cxlmd), cdev);
|
|
|
|
put_device(&cxlmd->dev);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static const struct file_operations cxl_memdev_fops = {
|
|
.owner = THIS_MODULE,
|
|
.unlocked_ioctl = cxl_memdev_ioctl,
|
|
.open = cxl_memdev_open,
|
|
.release = cxl_memdev_release_file,
|
|
.compat_ioctl = compat_ptr_ioctl,
|
|
.llseek = noop_llseek,
|
|
};
|
|
|
|
struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
|
|
{
|
|
struct cxl_memdev *cxlmd;
|
|
struct device *dev;
|
|
struct cdev *cdev;
|
|
int rc;
|
|
|
|
cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops);
|
|
if (IS_ERR(cxlmd))
|
|
return cxlmd;
|
|
|
|
dev = &cxlmd->dev;
|
|
rc = dev_set_name(dev, "mem%d", cxlmd->id);
|
|
if (rc)
|
|
goto err;
|
|
|
|
/*
|
|
* Activate ioctl operations, no cxl_memdev_rwsem manipulation
|
|
* needed as this is ordered with cdev_add() publishing the device.
|
|
*/
|
|
cxlmd->cxlds = cxlds;
|
|
cxlds->cxlmd = cxlmd;
|
|
|
|
cdev = &cxlmd->cdev;
|
|
rc = cdev_device_add(cdev, dev);
|
|
if (rc)
|
|
goto err;
|
|
|
|
rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd);
|
|
if (rc)
|
|
return ERR_PTR(rc);
|
|
return cxlmd;
|
|
|
|
err:
|
|
/*
|
|
* The cdev was briefly live, shutdown any ioctl operations that
|
|
* saw that state.
|
|
*/
|
|
cxl_memdev_shutdown(dev);
|
|
put_device(dev);
|
|
return ERR_PTR(rc);
|
|
}
|
|
EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
|
|
|
|
__init int cxl_memdev_init(void)
|
|
{
|
|
dev_t devt;
|
|
int rc;
|
|
|
|
rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl");
|
|
if (rc)
|
|
return rc;
|
|
|
|
cxl_mem_major = MAJOR(devt);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void cxl_memdev_exit(void)
|
|
{
|
|
unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
|
|
}
|