Dave Jiang 793a539ac7 cxl: Explicitly initialize resources when media is not ready
When media is not ready do not assume that the capacity information from
the identify command is valid, i.e. ->total_bytes
->partition_align_bytes ->{volatile,persistent}_only_bytes. Explicitly
zero out the capacity resources and exit early.

Given zero-init of those fields this patch is functionally equivalent to
the prior state, but it improves readability and robustness going
forward.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/168506118166.3004974.13523455340007852589.stgit@djiang5-mobl3
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-05-26 13:34:39 -07:00

1281 lines
34 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
#include <linux/security.h>
#include <linux/debugfs.h>
#include <linux/ktime.h>
#include <linux/mutex.h>
#include <asm/unaligned.h>
#include <cxlpci.h>
#include <cxlmem.h>
#include <cxl.h>
#include "core.h"
#include "trace.h"
static bool cxl_raw_allow_all;
/**
* DOC: cxl mbox
*
* Core implementation of the CXL 2.0 Type-3 Memory Device Mailbox. The
* implementation is used by the cxl_pci driver to initialize the device
* and implement the cxl_mem.h IOCTL UAPI. It also implements the
* backend of the cxl_pmem_ctl() transport for LIBNVDIMM.
*/
#define cxl_for_each_cmd(cmd) \
for ((cmd) = &cxl_mem_commands[0]; \
((cmd) - cxl_mem_commands) < ARRAY_SIZE(cxl_mem_commands); (cmd)++)
#define CXL_CMD(_id, sin, sout, _flags) \
[CXL_MEM_COMMAND_ID_##_id] = { \
.info = { \
.id = CXL_MEM_COMMAND_ID_##_id, \
.size_in = sin, \
.size_out = sout, \
}, \
.opcode = CXL_MBOX_OP_##_id, \
.flags = _flags, \
}
#define CXL_VARIABLE_PAYLOAD ~0U
/*
* This table defines the supported mailbox commands for the driver. This table
* is made up of a UAPI structure. Non-negative values as parameters in the
* table will be validated against the user's input. For example, if size_in is
* 0, and the user passed in 1, it is an error.
*/
static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE),
#ifdef CONFIG_CXL_MEM_RAW_COMMANDS
CXL_CMD(RAW, CXL_VARIABLE_PAYLOAD, CXL_VARIABLE_PAYLOAD, 0),
#endif
CXL_CMD(GET_SUPPORTED_LOGS, 0, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE),
CXL_CMD(GET_FW_INFO, 0, 0x50, 0),
CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0),
CXL_CMD(GET_LSA, 0x8, CXL_VARIABLE_PAYLOAD, 0),
CXL_CMD(GET_HEALTH_INFO, 0, 0x12, 0),
CXL_CMD(GET_LOG, 0x18, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE),
CXL_CMD(SET_PARTITION_INFO, 0x0a, 0, 0),
CXL_CMD(SET_LSA, CXL_VARIABLE_PAYLOAD, 0, 0),
CXL_CMD(GET_ALERT_CONFIG, 0, 0x10, 0),
CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0),
CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0),
CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0),
CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0),
};
/*
* Commands that RAW doesn't permit. The rationale for each:
*
* CXL_MBOX_OP_ACTIVATE_FW: Firmware activation requires adjustment /
* coordination of transaction timeout values at the root bridge level.
*
* CXL_MBOX_OP_SET_PARTITION_INFO: The device memory map may change live
* and needs to be coordinated with HDM updates.
*
* CXL_MBOX_OP_SET_LSA: The label storage area may be cached by the
* driver and any writes from userspace invalidates those contents.
*
* CXL_MBOX_OP_SET_SHUTDOWN_STATE: Set shutdown state assumes no writes
* to the device after it is marked clean, userspace can not make that
* assertion.
*
* CXL_MBOX_OP_[GET_]SCAN_MEDIA: The kernel provides a native error list that
* is kept up to date with patrol notifications and error management.
*
* CXL_MBOX_OP_[GET_,INJECT_,CLEAR_]POISON: These commands require kernel
* driver orchestration for safety.
*/
static u16 cxl_disabled_raw_commands[] = {
CXL_MBOX_OP_ACTIVATE_FW,
CXL_MBOX_OP_SET_PARTITION_INFO,
CXL_MBOX_OP_SET_LSA,
CXL_MBOX_OP_SET_SHUTDOWN_STATE,
CXL_MBOX_OP_SCAN_MEDIA,
CXL_MBOX_OP_GET_SCAN_MEDIA,
CXL_MBOX_OP_GET_POISON,
CXL_MBOX_OP_INJECT_POISON,
CXL_MBOX_OP_CLEAR_POISON,
};
/*
* Command sets that RAW doesn't permit. All opcodes in this set are
* disabled because they pass plain text security payloads over the
* user/kernel boundary. This functionality is intended to be wrapped
* behind the keys ABI which allows for encrypted payloads in the UAPI
*/
static u8 security_command_sets[] = {
0x44, /* Sanitize */
0x45, /* Persistent Memory Data-at-rest Security */
0x46, /* Security Passthrough */
};
static bool cxl_is_security_command(u16 opcode)
{
int i;
for (i = 0; i < ARRAY_SIZE(security_command_sets); i++)
if (security_command_sets[i] == (opcode >> 8))
return true;
return false;
}
static bool cxl_is_poison_command(u16 opcode)
{
#define CXL_MBOX_OP_POISON_CMDS 0x43
if ((opcode >> 8) == CXL_MBOX_OP_POISON_CMDS)
return true;
return false;
}
static void cxl_set_poison_cmd_enabled(struct cxl_poison_state *poison,
u16 opcode)
{
switch (opcode) {
case CXL_MBOX_OP_GET_POISON:
set_bit(CXL_POISON_ENABLED_LIST, poison->enabled_cmds);
break;
case CXL_MBOX_OP_INJECT_POISON:
set_bit(CXL_POISON_ENABLED_INJECT, poison->enabled_cmds);
break;
case CXL_MBOX_OP_CLEAR_POISON:
set_bit(CXL_POISON_ENABLED_CLEAR, poison->enabled_cmds);
break;
case CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS:
set_bit(CXL_POISON_ENABLED_SCAN_CAPS, poison->enabled_cmds);
break;
case CXL_MBOX_OP_SCAN_MEDIA:
set_bit(CXL_POISON_ENABLED_SCAN_MEDIA, poison->enabled_cmds);
break;
case CXL_MBOX_OP_GET_SCAN_MEDIA:
set_bit(CXL_POISON_ENABLED_SCAN_RESULTS, poison->enabled_cmds);
break;
default:
break;
}
}
static struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
{
struct cxl_mem_command *c;
cxl_for_each_cmd(c)
if (c->opcode == opcode)
return c;
return NULL;
}
static const char *cxl_mem_opcode_to_name(u16 opcode)
{
struct cxl_mem_command *c;
c = cxl_mem_find_command(opcode);
if (!c)
return NULL;
return cxl_command_names[c->info.id].name;
}
/**
* cxl_internal_send_cmd() - Kernel internal interface to send a mailbox command
* @cxlds: The device data for the operation
* @mbox_cmd: initialized command to execute
*
* Context: Any context.
* Return:
* * %>=0 - Number of bytes returned in @out.
* * %-E2BIG - Payload is too large for hardware.
* * %-EBUSY - Couldn't acquire exclusive mailbox access.
* * %-EFAULT - Hardware error occurred.
* * %-ENXIO - Command completed, but device reported an error.
* * %-EIO - Unexpected output size.
*
* Mailbox commands may execute successfully yet the device itself reported an
* error. While this distinction can be useful for commands from userspace, the
* kernel will only be able to use results when both are successful.
*/
int cxl_internal_send_cmd(struct cxl_dev_state *cxlds,
struct cxl_mbox_cmd *mbox_cmd)
{
size_t out_size, min_out;
int rc;
if (mbox_cmd->size_in > cxlds->payload_size ||
mbox_cmd->size_out > cxlds->payload_size)
return -E2BIG;
out_size = mbox_cmd->size_out;
min_out = mbox_cmd->min_out;
rc = cxlds->mbox_send(cxlds, mbox_cmd);
/*
* EIO is reserved for a payload size mismatch and mbox_send()
* may not return this error.
*/
if (WARN_ONCE(rc == -EIO, "Bad return code: -EIO"))
return -ENXIO;
if (rc)
return rc;
if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS)
return cxl_mbox_cmd_rc2errno(mbox_cmd);
if (!out_size)
return 0;
/*
* Variable sized output needs to at least satisfy the caller's
* minimum if not the fully requested size.
*/
if (min_out == 0)
min_out = out_size;
if (mbox_cmd->size_out < min_out)
return -EIO;
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_internal_send_cmd, CXL);
static bool cxl_mem_raw_command_allowed(u16 opcode)
{
int i;
if (!IS_ENABLED(CONFIG_CXL_MEM_RAW_COMMANDS))
return false;
if (security_locked_down(LOCKDOWN_PCI_ACCESS))
return false;
if (cxl_raw_allow_all)
return true;
if (cxl_is_security_command(opcode))
return false;
for (i = 0; i < ARRAY_SIZE(cxl_disabled_raw_commands); i++)
if (cxl_disabled_raw_commands[i] == opcode)
return false;
return true;
}
/**
* cxl_payload_from_user_allowed() - Check contents of in_payload.
* @opcode: The mailbox command opcode.
* @payload_in: Pointer to the input payload passed in from user space.
*
* Return:
* * true - payload_in passes check for @opcode.
* * false - payload_in contains invalid or unsupported values.
*
* The driver may inspect payload contents before sending a mailbox
* command from user space to the device. The intent is to reject
* commands with input payloads that are known to be unsafe. This
* check is not intended to replace the users careful selection of
* mailbox command parameters and makes no guarantee that the user
* command will succeed, nor that it is appropriate.
*
* The specific checks are determined by the opcode.
*/
static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in)
{
switch (opcode) {
case CXL_MBOX_OP_SET_PARTITION_INFO: {
struct cxl_mbox_set_partition_info *pi = payload_in;
if (pi->flags & CXL_SET_PARTITION_IMMEDIATE_FLAG)
return false;
break;
}
default:
break;
}
return true;
}
static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox,
struct cxl_dev_state *cxlds, u16 opcode,
size_t in_size, size_t out_size, u64 in_payload)
{
*mbox = (struct cxl_mbox_cmd) {
.opcode = opcode,
.size_in = in_size,
};
if (in_size) {
mbox->payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
in_size);
if (IS_ERR(mbox->payload_in))
return PTR_ERR(mbox->payload_in);
if (!cxl_payload_from_user_allowed(opcode, mbox->payload_in)) {
dev_dbg(cxlds->dev, "%s: input payload not allowed\n",
cxl_mem_opcode_to_name(opcode));
kvfree(mbox->payload_in);
return -EBUSY;
}
}
/* Prepare to handle a full payload for variable sized output */
if (out_size == CXL_VARIABLE_PAYLOAD)
mbox->size_out = cxlds->payload_size;
else
mbox->size_out = out_size;
if (mbox->size_out) {
mbox->payload_out = kvzalloc(mbox->size_out, GFP_KERNEL);
if (!mbox->payload_out) {
kvfree(mbox->payload_in);
return -ENOMEM;
}
}
return 0;
}
static void cxl_mbox_cmd_dtor(struct cxl_mbox_cmd *mbox)
{
kvfree(mbox->payload_in);
kvfree(mbox->payload_out);
}
static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
const struct cxl_send_command *send_cmd,
struct cxl_dev_state *cxlds)
{
if (send_cmd->raw.rsvd)
return -EINVAL;
/*
* Unlike supported commands, the output size of RAW commands
* gets passed along without further checking, so it must be
* validated here.
*/
if (send_cmd->out.size > cxlds->payload_size)
return -EINVAL;
if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
return -EPERM;
dev_WARN_ONCE(cxlds->dev, true, "raw command path used\n");
*mem_cmd = (struct cxl_mem_command) {
.info = {
.id = CXL_MEM_COMMAND_ID_RAW,
.size_in = send_cmd->in.size,
.size_out = send_cmd->out.size,
},
.opcode = send_cmd->raw.opcode
};
return 0;
}
static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
const struct cxl_send_command *send_cmd,
struct cxl_dev_state *cxlds)
{
struct cxl_mem_command *c = &cxl_mem_commands[send_cmd->id];
const struct cxl_command_info *info = &c->info;
if (send_cmd->flags & ~CXL_MEM_COMMAND_FLAG_MASK)
return -EINVAL;
if (send_cmd->rsvd)
return -EINVAL;
if (send_cmd->in.rsvd || send_cmd->out.rsvd)
return -EINVAL;
/* Check that the command is enabled for hardware */
if (!test_bit(info->id, cxlds->enabled_cmds))
return -ENOTTY;
/* Check that the command is not claimed for exclusive kernel use */
if (test_bit(info->id, cxlds->exclusive_cmds))
return -EBUSY;
/* Check the input buffer is the expected size */
if ((info->size_in != CXL_VARIABLE_PAYLOAD) &&
(info->size_in != send_cmd->in.size))
return -ENOMEM;
/* Check the output buffer is at least large enough */
if ((info->size_out != CXL_VARIABLE_PAYLOAD) &&
(send_cmd->out.size < info->size_out))
return -ENOMEM;
*mem_cmd = (struct cxl_mem_command) {
.info = {
.id = info->id,
.flags = info->flags,
.size_in = send_cmd->in.size,
.size_out = send_cmd->out.size,
},
.opcode = c->opcode
};
return 0;
}
/**
* cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND.
* @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd.
* @cxlds: The device data for the operation
* @send_cmd: &struct cxl_send_command copied in from userspace.
*
* Return:
* * %0 - @out_cmd is ready to send.
* * %-ENOTTY - Invalid command specified.
* * %-EINVAL - Reserved fields or invalid values were used.
* * %-ENOMEM - Input or output buffer wasn't sized properly.
* * %-EPERM - Attempted to use a protected command.
* * %-EBUSY - Kernel has claimed exclusive access to this opcode
*
* The result of this command is a fully validated command in @mbox_cmd that is
* safe to send to the hardware.
*/
static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd,
struct cxl_dev_state *cxlds,
const struct cxl_send_command *send_cmd)
{
struct cxl_mem_command mem_cmd;
int rc;
if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX)
return -ENOTTY;
/*
* The user can never specify an input payload larger than what hardware
* supports, but output can be arbitrarily large (simply write out as
* much data as the hardware provides).
*/
if (send_cmd->in.size > cxlds->payload_size)
return -EINVAL;
/* Sanitize and construct a cxl_mem_command */
if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW)
rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, cxlds);
else
rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, cxlds);
if (rc)
return rc;
/* Sanitize and construct a cxl_mbox_cmd */
return cxl_mbox_cmd_ctor(mbox_cmd, cxlds, mem_cmd.opcode,
mem_cmd.info.size_in, mem_cmd.info.size_out,
send_cmd->in.payload);
}
int cxl_query_cmd(struct cxl_memdev *cxlmd,
struct cxl_mem_query_commands __user *q)
{
struct device *dev = &cxlmd->dev;
struct cxl_mem_command *cmd;
u32 n_commands;
int j = 0;
dev_dbg(dev, "Query IOCTL\n");
if (get_user(n_commands, &q->n_commands))
return -EFAULT;
/* returns the total number if 0 elements are requested. */
if (n_commands == 0)
return put_user(ARRAY_SIZE(cxl_mem_commands), &q->n_commands);
/*
* otherwise, return max(n_commands, total commands) cxl_command_info
* structures.
*/
cxl_for_each_cmd(cmd) {
struct cxl_command_info info = cmd->info;
if (test_bit(info.id, cxlmd->cxlds->enabled_cmds))
info.flags |= CXL_MEM_COMMAND_FLAG_ENABLED;
if (test_bit(info.id, cxlmd->cxlds->exclusive_cmds))
info.flags |= CXL_MEM_COMMAND_FLAG_EXCLUSIVE;
if (copy_to_user(&q->commands[j++], &info, sizeof(info)))
return -EFAULT;
if (j == n_commands)
break;
}
return 0;
}
/**
* handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace.
* @cxlds: The device data for the operation
* @mbox_cmd: The validated mailbox command.
* @out_payload: Pointer to userspace's output payload.
* @size_out: (Input) Max payload size to copy out.
* (Output) Payload size hardware generated.
* @retval: Hardware generated return code from the operation.
*
* Return:
* * %0 - Mailbox transaction succeeded. This implies the mailbox
* protocol completed successfully not that the operation itself
* was successful.
* * %-ENOMEM - Couldn't allocate a bounce buffer.
* * %-EFAULT - Something happened with copy_to/from_user.
* * %-EINTR - Mailbox acquisition interrupted.
* * %-EXXX - Transaction level failures.
*
* Dispatches a mailbox command on behalf of a userspace request.
* The output payload is copied to userspace.
*
* See cxl_send_cmd().
*/
static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds,
struct cxl_mbox_cmd *mbox_cmd,
u64 out_payload, s32 *size_out,
u32 *retval)
{
struct device *dev = cxlds->dev;
int rc;
dev_dbg(dev,
"Submitting %s command for user\n"
"\topcode: %x\n"
"\tsize: %zx\n",
cxl_mem_opcode_to_name(mbox_cmd->opcode),
mbox_cmd->opcode, mbox_cmd->size_in);
rc = cxlds->mbox_send(cxlds, mbox_cmd);
if (rc)
goto out;
/*
* @size_out contains the max size that's allowed to be written back out
* to userspace. While the payload may have written more output than
* this it will have to be ignored.
*/
if (mbox_cmd->size_out) {
dev_WARN_ONCE(dev, mbox_cmd->size_out > *size_out,
"Invalid return size\n");
if (copy_to_user(u64_to_user_ptr(out_payload),
mbox_cmd->payload_out, mbox_cmd->size_out)) {
rc = -EFAULT;
goto out;
}
}
*size_out = mbox_cmd->size_out;
*retval = mbox_cmd->return_code;
out:
cxl_mbox_cmd_dtor(mbox_cmd);
return rc;
}
int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
{
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct device *dev = &cxlmd->dev;
struct cxl_send_command send;
struct cxl_mbox_cmd mbox_cmd;
int rc;
dev_dbg(dev, "Send IOCTL\n");
if (copy_from_user(&send, s, sizeof(send)))
return -EFAULT;
rc = cxl_validate_cmd_from_user(&mbox_cmd, cxlmd->cxlds, &send);
if (rc)
return rc;
rc = handle_mailbox_cmd_from_user(cxlds, &mbox_cmd, send.out.payload,
&send.out.size, &send.retval);
if (rc)
return rc;
if (copy_to_user(s, &send, sizeof(send)))
return -EFAULT;
return 0;
}
static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 *size, u8 *out)
{
u32 remaining = *size;
u32 offset = 0;
while (remaining) {
u32 xfer_size = min_t(u32, remaining, cxlds->payload_size);
struct cxl_mbox_cmd mbox_cmd;
struct cxl_mbox_get_log log;
int rc;
log = (struct cxl_mbox_get_log) {
.uuid = *uuid,
.offset = cpu_to_le32(offset),
.length = cpu_to_le32(xfer_size),
};
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_LOG,
.size_in = sizeof(log),
.payload_in = &log,
.size_out = xfer_size,
.payload_out = out,
};
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
/*
* The output payload length that indicates the number
* of valid bytes can be smaller than the Log buffer
* size.
*/
if (rc == -EIO && mbox_cmd.size_out < xfer_size) {
offset += mbox_cmd.size_out;
break;
}
if (rc < 0)
return rc;
out += xfer_size;
remaining -= xfer_size;
offset += xfer_size;
}
*size = offset;
return 0;
}
/**
* cxl_walk_cel() - Walk through the Command Effects Log.
* @cxlds: The device data for the operation
* @size: Length of the Command Effects Log.
* @cel: CEL
*
* Iterate over each entry in the CEL and determine if the driver supports the
* command. If so, the command is enabled for the device and can be used later.
*/
static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel)
{
struct cxl_cel_entry *cel_entry;
const int cel_entries = size / sizeof(*cel_entry);
int i;
cel_entry = (struct cxl_cel_entry *) cel;
for (i = 0; i < cel_entries; i++) {
u16 opcode = le16_to_cpu(cel_entry[i].opcode);
struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
if (!cmd && !cxl_is_poison_command(opcode)) {
dev_dbg(cxlds->dev,
"Opcode 0x%04x unsupported by driver\n", opcode);
continue;
}
if (cmd)
set_bit(cmd->info.id, cxlds->enabled_cmds);
if (cxl_is_poison_command(opcode))
cxl_set_poison_cmd_enabled(&cxlds->poison, opcode);
dev_dbg(cxlds->dev, "Opcode 0x%04x enabled\n", opcode);
}
}
static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_dev_state *cxlds)
{
struct cxl_mbox_get_supported_logs *ret;
struct cxl_mbox_cmd mbox_cmd;
int rc;
ret = kvmalloc(cxlds->payload_size, GFP_KERNEL);
if (!ret)
return ERR_PTR(-ENOMEM);
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_SUPPORTED_LOGS,
.size_out = cxlds->payload_size,
.payload_out = ret,
/* At least the record number field must be valid */
.min_out = 2,
};
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc < 0) {
kvfree(ret);
return ERR_PTR(rc);
}
return ret;
}
enum {
CEL_UUID,
VENDOR_DEBUG_UUID,
};
/* See CXL 2.0 Table 170. Get Log Input Payload */
static const uuid_t log_uuid[] = {
[CEL_UUID] = DEFINE_CXL_CEL_UUID,
[VENDOR_DEBUG_UUID] = DEFINE_CXL_VENDOR_DEBUG_UUID,
};
/**
* cxl_enumerate_cmds() - Enumerate commands for a device.
* @cxlds: The device data for the operation
*
* Returns 0 if enumerate completed successfully.
*
* CXL devices have optional support for certain commands. This function will
* determine the set of supported commands for the hardware and update the
* enabled_cmds bitmap in the @cxlds.
*/
int cxl_enumerate_cmds(struct cxl_dev_state *cxlds)
{
struct cxl_mbox_get_supported_logs *gsl;
struct device *dev = cxlds->dev;
struct cxl_mem_command *cmd;
int i, rc;
gsl = cxl_get_gsl(cxlds);
if (IS_ERR(gsl))
return PTR_ERR(gsl);
rc = -ENOENT;
for (i = 0; i < le16_to_cpu(gsl->entries); i++) {
u32 size = le32_to_cpu(gsl->entry[i].size);
uuid_t uuid = gsl->entry[i].uuid;
u8 *log;
dev_dbg(dev, "Found LOG type %pU of size %d", &uuid, size);
if (!uuid_equal(&uuid, &log_uuid[CEL_UUID]))
continue;
log = kvmalloc(size, GFP_KERNEL);
if (!log) {
rc = -ENOMEM;
goto out;
}
rc = cxl_xfer_log(cxlds, &uuid, &size, log);
if (rc) {
kvfree(log);
goto out;
}
cxl_walk_cel(cxlds, size, log);
kvfree(log);
/* In case CEL was bogus, enable some default commands. */
cxl_for_each_cmd(cmd)
if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE)
set_bit(cmd->info.id, cxlds->enabled_cmds);
/* Found the required CEL */
rc = 0;
}
out:
kvfree(gsl);
return rc;
}
EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
/*
* General Media Event Record
* CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
*/
static const uuid_t gen_media_event_uuid =
UUID_INIT(0xfbcd0a77, 0xc260, 0x417f,
0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6);
/*
* DRAM Event Record
* CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
*/
static const uuid_t dram_event_uuid =
UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab,
0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24);
/*
* Memory Module Event Record
* CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
*/
static const uuid_t mem_mod_event_uuid =
UUID_INIT(0xfe927475, 0xdd59, 0x4339,
0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74);
static void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
enum cxl_event_log_type type,
struct cxl_event_record_raw *record)
{
uuid_t *id = &record->hdr.id;
if (uuid_equal(id, &gen_media_event_uuid)) {
struct cxl_event_gen_media *rec =
(struct cxl_event_gen_media *)record;
trace_cxl_general_media(cxlmd, type, rec);
} else if (uuid_equal(id, &dram_event_uuid)) {
struct cxl_event_dram *rec = (struct cxl_event_dram *)record;
trace_cxl_dram(cxlmd, type, rec);
} else if (uuid_equal(id, &mem_mod_event_uuid)) {
struct cxl_event_mem_module *rec =
(struct cxl_event_mem_module *)record;
trace_cxl_memory_module(cxlmd, type, rec);
} else {
/* For unknown record types print just the header */
trace_cxl_generic_event(cxlmd, type, record);
}
}
static int cxl_clear_event_record(struct cxl_dev_state *cxlds,
enum cxl_event_log_type log,
struct cxl_get_event_payload *get_pl)
{
struct cxl_mbox_clear_event_payload *payload;
u16 total = le16_to_cpu(get_pl->record_count);
u8 max_handles = CXL_CLEAR_EVENT_MAX_HANDLES;
size_t pl_size = struct_size(payload, handles, max_handles);
struct cxl_mbox_cmd mbox_cmd;
u16 cnt;
int rc = 0;
int i;
/* Payload size may limit the max handles */
if (pl_size > cxlds->payload_size) {
max_handles = (cxlds->payload_size - sizeof(*payload)) /
sizeof(__le16);
pl_size = struct_size(payload, handles, max_handles);
}
payload = kvzalloc(pl_size, GFP_KERNEL);
if (!payload)
return -ENOMEM;
*payload = (struct cxl_mbox_clear_event_payload) {
.event_log = log,
};
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_CLEAR_EVENT_RECORD,
.payload_in = payload,
.size_in = pl_size,
};
/*
* Clear Event Records uses u8 for the handle cnt while Get Event
* Record can return up to 0xffff records.
*/
i = 0;
for (cnt = 0; cnt < total; cnt++) {
payload->handles[i++] = get_pl->records[cnt].hdr.handle;
dev_dbg(cxlds->dev, "Event log '%d': Clearing %u\n",
log, le16_to_cpu(payload->handles[i]));
if (i == max_handles) {
payload->nr_recs = i;
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc)
goto free_pl;
i = 0;
}
}
/* Clear what is left if any */
if (i) {
payload->nr_recs = i;
mbox_cmd.size_in = struct_size(payload, handles, i);
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc)
goto free_pl;
}
free_pl:
kvfree(payload);
return rc;
}
static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
enum cxl_event_log_type type)
{
struct cxl_get_event_payload *payload;
struct cxl_mbox_cmd mbox_cmd;
u8 log_type = type;
u16 nr_rec;
mutex_lock(&cxlds->event.log_lock);
payload = cxlds->event.buf;
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_EVENT_RECORD,
.payload_in = &log_type,
.size_in = sizeof(log_type),
.payload_out = payload,
.size_out = cxlds->payload_size,
.min_out = struct_size(payload, records, 0),
};
do {
int rc, i;
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc) {
dev_err_ratelimited(cxlds->dev,
"Event log '%d': Failed to query event records : %d",
type, rc);
break;
}
nr_rec = le16_to_cpu(payload->record_count);
if (!nr_rec)
break;
for (i = 0; i < nr_rec; i++)
cxl_event_trace_record(cxlds->cxlmd, type,
&payload->records[i]);
if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW)
trace_cxl_overflow(cxlds->cxlmd, type, payload);
rc = cxl_clear_event_record(cxlds, type, payload);
if (rc) {
dev_err_ratelimited(cxlds->dev,
"Event log '%d': Failed to clear events : %d",
type, rc);
break;
}
} while (nr_rec);
mutex_unlock(&cxlds->event.log_lock);
}
/**
* cxl_mem_get_event_records - Get Event Records from the device
* @cxlds: The device data for the operation
* @status: Event Status register value identifying which events are available.
*
* Retrieve all event records available on the device, report them as trace
* events, and clear them.
*
* See CXL rev 3.0 @8.2.9.2.2 Get Event Records
* See CXL rev 3.0 @8.2.9.2.3 Clear Event Records
*/
void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status)
{
dev_dbg(cxlds->dev, "Reading event logs: %x\n", status);
if (status & CXLDEV_EVENT_STATUS_FATAL)
cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FATAL);
if (status & CXLDEV_EVENT_STATUS_FAIL)
cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL);
if (status & CXLDEV_EVENT_STATUS_WARN)
cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN);
if (status & CXLDEV_EVENT_STATUS_INFO)
cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO);
}
EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL);
/**
* cxl_mem_get_partition_info - Get partition info
* @cxlds: The device data for the operation
*
* Retrieve the current partition info for the device specified. The active
* values are the current capacity in bytes. If not 0, the 'next' values are
* the pending values, in bytes, which take affect on next cold reset.
*
* Return: 0 if no error: or the result of the mailbox command.
*
* See CXL @8.2.9.5.2.1 Get Partition Info
*/
static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds)
{
struct cxl_mbox_get_partition_info pi;
struct cxl_mbox_cmd mbox_cmd;
int rc;
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_PARTITION_INFO,
.size_out = sizeof(pi),
.payload_out = &pi,
};
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc)
return rc;
cxlds->active_volatile_bytes =
le64_to_cpu(pi.active_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
cxlds->active_persistent_bytes =
le64_to_cpu(pi.active_persistent_cap) * CXL_CAPACITY_MULTIPLIER;
cxlds->next_volatile_bytes =
le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
cxlds->next_persistent_bytes =
le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
return 0;
}
/**
* cxl_dev_state_identify() - Send the IDENTIFY command to the device.
* @cxlds: The device data for the operation
*
* Return: 0 if identify was executed successfully or media not ready.
*
* This will dispatch the identify command to the device and on success populate
* structures to be exported to sysfs.
*/
int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
{
/* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
struct cxl_mbox_identify id;
struct cxl_mbox_cmd mbox_cmd;
u32 val;
int rc;
if (!cxlds->media_ready)
return 0;
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_IDENTIFY,
.size_out = sizeof(id),
.payload_out = &id,
};
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc < 0)
return rc;
cxlds->total_bytes =
le64_to_cpu(id.total_capacity) * CXL_CAPACITY_MULTIPLIER;
cxlds->volatile_only_bytes =
le64_to_cpu(id.volatile_capacity) * CXL_CAPACITY_MULTIPLIER;
cxlds->persistent_only_bytes =
le64_to_cpu(id.persistent_capacity) * CXL_CAPACITY_MULTIPLIER;
cxlds->partition_align_bytes =
le64_to_cpu(id.partition_align) * CXL_CAPACITY_MULTIPLIER;
cxlds->lsa_size = le32_to_cpu(id.lsa_size);
memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision));
if (test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds)) {
val = get_unaligned_le24(id.poison_list_max_mer);
cxlds->poison.max_errors = min_t(u32, val, CXL_POISON_LIST_MAX);
}
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL);
static int add_dpa_res(struct device *dev, struct resource *parent,
struct resource *res, resource_size_t start,
resource_size_t size, const char *type)
{
int rc;
res->name = type;
res->start = start;
res->end = start + size - 1;
res->flags = IORESOURCE_MEM;
if (resource_size(res) == 0) {
dev_dbg(dev, "DPA(%s): no capacity\n", res->name);
return 0;
}
rc = request_resource(parent, res);
if (rc) {
dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name,
res, rc);
return rc;
}
dev_dbg(dev, "DPA(%s): %pr\n", res->name, res);
return 0;
}
int cxl_mem_create_range_info(struct cxl_dev_state *cxlds)
{
struct device *dev = cxlds->dev;
int rc;
if (!cxlds->media_ready) {
cxlds->dpa_res = DEFINE_RES_MEM(0, 0);
cxlds->ram_res = DEFINE_RES_MEM(0, 0);
cxlds->pmem_res = DEFINE_RES_MEM(0, 0);
return 0;
}
cxlds->dpa_res =
(struct resource)DEFINE_RES_MEM(0, cxlds->total_bytes);
if (cxlds->partition_align_bytes == 0) {
rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
cxlds->volatile_only_bytes, "ram");
if (rc)
return rc;
return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res,
cxlds->volatile_only_bytes,
cxlds->persistent_only_bytes, "pmem");
}
rc = cxl_mem_get_partition_info(cxlds);
if (rc) {
dev_err(dev, "Failed to query partition information\n");
return rc;
}
rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
cxlds->active_volatile_bytes, "ram");
if (rc)
return rc;
return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res,
cxlds->active_volatile_bytes,
cxlds->active_persistent_bytes, "pmem");
}
EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL);
int cxl_set_timestamp(struct cxl_dev_state *cxlds)
{
struct cxl_mbox_cmd mbox_cmd;
struct cxl_mbox_set_timestamp_in pi;
int rc;
pi.timestamp = cpu_to_le64(ktime_get_real_ns());
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_SET_TIMESTAMP,
.size_in = sizeof(pi),
.payload_in = &pi,
};
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
/*
* Command is optional. Devices may have another way of providing
* a timestamp, or may return all 0s in timestamp fields.
* Don't report an error if this command isn't supported
*/
if (rc && (mbox_cmd.return_code != CXL_MBOX_CMD_RC_UNSUPPORTED))
return rc;
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, CXL);
int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
struct cxl_region *cxlr)
{
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_mbox_poison_out *po;
struct cxl_mbox_poison_in pi;
struct cxl_mbox_cmd mbox_cmd;
int nr_records = 0;
int rc;
rc = mutex_lock_interruptible(&cxlds->poison.lock);
if (rc)
return rc;
po = cxlds->poison.list_out;
pi.offset = cpu_to_le64(offset);
pi.length = cpu_to_le64(len / CXL_POISON_LEN_MULT);
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_POISON,
.size_in = sizeof(pi),
.payload_in = &pi,
.size_out = cxlds->payload_size,
.payload_out = po,
.min_out = struct_size(po, record, 0),
};
do {
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc)
break;
for (int i = 0; i < le16_to_cpu(po->count); i++)
trace_cxl_poison(cxlmd, cxlr, &po->record[i],
po->flags, po->overflow_ts,
CXL_POISON_TRACE_LIST);
/* Protect against an uncleared _FLAG_MORE */
nr_records = nr_records + le16_to_cpu(po->count);
if (nr_records >= cxlds->poison.max_errors) {
dev_dbg(&cxlmd->dev, "Max Error Records reached: %d\n",
nr_records);
break;
}
} while (po->flags & CXL_POISON_FLAG_MORE);
mutex_unlock(&cxlds->poison.lock);
return rc;
}
EXPORT_SYMBOL_NS_GPL(cxl_mem_get_poison, CXL);
static void free_poison_buf(void *buf)
{
kvfree(buf);
}
/* Get Poison List output buffer is protected by cxlds->poison.lock */
static int cxl_poison_alloc_buf(struct cxl_dev_state *cxlds)
{
cxlds->poison.list_out = kvmalloc(cxlds->payload_size, GFP_KERNEL);
if (!cxlds->poison.list_out)
return -ENOMEM;
return devm_add_action_or_reset(cxlds->dev, free_poison_buf,
cxlds->poison.list_out);
}
int cxl_poison_state_init(struct cxl_dev_state *cxlds)
{
int rc;
if (!test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds))
return 0;
rc = cxl_poison_alloc_buf(cxlds);
if (rc) {
clear_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds);
return rc;
}
mutex_init(&cxlds->poison.lock);
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, CXL);
struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
{
struct cxl_dev_state *cxlds;
cxlds = devm_kzalloc(dev, sizeof(*cxlds), GFP_KERNEL);
if (!cxlds) {
dev_err(dev, "No memory available\n");
return ERR_PTR(-ENOMEM);
}
mutex_init(&cxlds->mbox_mutex);
mutex_init(&cxlds->event.log_lock);
cxlds->dev = dev;
return cxlds;
}
EXPORT_SYMBOL_NS_GPL(cxl_dev_state_create, CXL);
void __init cxl_mbox_init(void)
{
struct dentry *mbox_debugfs;
mbox_debugfs = cxl_debugfs_create_dir("mbox");
debugfs_create_bool("raw_allow_all", 0600, mbox_debugfs,
&cxl_raw_allow_all);
}