linux-stable/include/uapi/linux/virtio_mem.h
David Hildenbrand e4544c550e virtio-mem: support suspend+resume
With virtio-mem, primarily hibernation is problematic: as the machine shuts
down, the virtio-mem device loses its state. Powering the machine back up
is like losing a bunch of DIMMs. While there would be ways to add limited
support, suspend+resume is more commonly used for VMs and "easier" to
support cleanly.

s2idle can be supported without any device dependencies. Similarly, one
would expect suspend-to-ram (i.e., S3) to work out of the box. However,
QEMU currently unplugs all device memory when resuming the VM, using a
cold reset on the "wakeup" path. In order to support S3, we need a feature
flag for the device to tell us if memory remains plugged when waking up. In
the future, QEMU will implement this feature.

So let's always support s2idle and support S3 with plugged memory only if
the device indicates support. Block hibernation early using the PM
notifier.

Trying to hibernate now fails early:
	# echo disk > /sys/power/state
	[   26.455369] PM: hibernation: hibernation entry
	[   26.458271] virtio_mem virtio0: hibernation is not supported.
	[   26.462498] PM: hibernation: hibernation exit
	-bash: echo: write error: Operation not permitted

s2idle works even without the new feature bit:
	# echo s2idle > /sys/power/mem_sleep
	# echo mem > /sys/power/state
	[   52.083725] PM: suspend entry (s2idle)
	[   52.095950] Filesystems sync: 0.010 seconds
	[   52.101493] Freezing user space processes
	[   52.104213] Freezing user space processes completed (elapsed 0.001 seconds)
	[   52.106520] OOM killer disabled.
	[   52.107655] Freezing remaining freezable tasks
	[   52.110880] Freezing remaining freezable tasks completed (elapsed 0.001 seconds)
	[   52.113296] printk: Suspending console(s) (use no_console_suspend to debug)

S3 does not work without the feature bit when memory is plugged:
	# echo deep > /sys/power/mem_sleep
	# echo mem > /sys/power/state
	[   32.788281] PM: suspend entry (deep)
	[   32.816630] Filesystems sync: 0.027 seconds
	[   32.820029] Freezing user space processes
	[   32.823870] Freezing user space processes completed (elapsed 0.001 seconds)
	[   32.827756] OOM killer disabled.
	[   32.829608] Freezing remaining freezable tasks
	[   32.833842] Freezing remaining freezable tasks completed (elapsed 0.001 seconds)
	[   32.837953] printk: Suspending console(s) (use no_console_suspend to debug)
	[   32.916172] virtio_mem virtio0: suspend+resume with plugged memory is not supported
	[   32.916181] virtio-pci 0000:00:02.0: PM: pci_pm_suspend(): virtio_pci_freeze+0x0/0x50 returns -1
	[   32.916197] virtio-pci 0000:00:02.0: PM: dpm_run_callback(): pci_pm_suspend+0x0/0x170 returns -1
	[   32.916210] virtio-pci 0000:00:02.0: PM: failed to suspend async: error -1

But S3 works with the new feature bit when memory is plugged (patched
QEMU):
	# echo deep > /sys/power/mem_sleep
	# echo mem > /sys/power/state
	[   33.983694] PM: suspend entry (deep)
	[   34.009828] Filesystems sync: 0.024 seconds
	[   34.013589] Freezing user space processes
	[   34.016722] Freezing user space processes completed (elapsed 0.001 seconds)
	[   34.019092] OOM killer disabled.
	[   34.020291] Freezing remaining freezable tasks
	[   34.023549] Freezing remaining freezable tasks completed (elapsed 0.001 seconds)
	[   34.026090] printk: Suspending console(s) (use no_console_suspend to debug)

Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20240318120645.105664-1-david@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-05-22 08:31:15 -04:00

217 lines
7.1 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause */
/*
* Virtio Mem Device
*
* Copyright Red Hat, Inc. 2020
*
* Authors:
* David Hildenbrand <david@redhat.com>
*
* This header is BSD licensed so anyone can use the definitions
* to implement compatible drivers/servers:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of IBM nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _LINUX_VIRTIO_MEM_H
#define _LINUX_VIRTIO_MEM_H
#include <linux/types.h>
#include <linux/virtio_types.h>
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>
/*
* Each virtio-mem device manages a dedicated region in physical address
* space. Each device can belong to a single NUMA node, multiple devices
* for a single NUMA node are possible. A virtio-mem device is like a
* "resizable DIMM" consisting of small memory blocks that can be plugged
* or unplugged. The device driver is responsible for (un)plugging memory
* blocks on demand.
*
* Virtio-mem devices can only operate on their assigned memory region in
* order to (un)plug memory. A device cannot (un)plug memory belonging to
* other devices.
*
* The "region_size" corresponds to the maximum amount of memory that can
* be provided by a device. The "size" corresponds to the amount of memory
* that is currently plugged. "requested_size" corresponds to a request
* from the device to the device driver to (un)plug blocks. The
* device driver should try to (un)plug blocks in order to reach the
* "requested_size". It is impossible to plug more memory than requested.
*
* The "usable_region_size" represents the memory region that can actually
* be used to (un)plug memory. It is always at least as big as the
* "requested_size" and will grow dynamically. It will only shrink when
* explicitly triggered (VIRTIO_MEM_REQ_UNPLUG).
*
* There are no guarantees what will happen if unplugged memory is
* read/written. In general, unplugged memory should not be touched, because
* the resulting action is undefined. There is one exception: without
* VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, unplugged memory inside the usable
* region can be read, to simplify creation of memory dumps.
*
* It can happen that the device cannot process a request, because it is
* busy. The device driver has to retry later.
*
* Usually, during system resets all memory will get unplugged, so the
* device driver can start with a clean state. However, in specific
* scenarios (if the device is busy) it can happen that the device still
* has memory plugged. The device driver can request to unplug all memory
* (VIRTIO_MEM_REQ_UNPLUG) - which might take a while to succeed if the
* device is busy.
*/
/* --- virtio-mem: feature bits --- */
/* node_id is an ACPI PXM and is valid */
#define VIRTIO_MEM_F_ACPI_PXM 0
/* unplugged memory must not be accessed */
#define VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE 1
/* plugged memory will remain plugged when suspending+resuming */
#define VIRTIO_MEM_F_PERSISTENT_SUSPEND 2
/* --- virtio-mem: guest -> host requests --- */
/* request to plug memory blocks */
#define VIRTIO_MEM_REQ_PLUG 0
/* request to unplug memory blocks */
#define VIRTIO_MEM_REQ_UNPLUG 1
/* request to unplug all blocks and shrink the usable size */
#define VIRTIO_MEM_REQ_UNPLUG_ALL 2
/* request information about the plugged state of memory blocks */
#define VIRTIO_MEM_REQ_STATE 3
struct virtio_mem_req_plug {
__virtio64 addr;
__virtio16 nb_blocks;
__virtio16 padding[3];
};
struct virtio_mem_req_unplug {
__virtio64 addr;
__virtio16 nb_blocks;
__virtio16 padding[3];
};
struct virtio_mem_req_state {
__virtio64 addr;
__virtio16 nb_blocks;
__virtio16 padding[3];
};
struct virtio_mem_req {
__virtio16 type;
__virtio16 padding[3];
union {
struct virtio_mem_req_plug plug;
struct virtio_mem_req_unplug unplug;
struct virtio_mem_req_state state;
} u;
};
/* --- virtio-mem: host -> guest response --- */
/*
* Request processed successfully, applicable for
* - VIRTIO_MEM_REQ_PLUG
* - VIRTIO_MEM_REQ_UNPLUG
* - VIRTIO_MEM_REQ_UNPLUG_ALL
* - VIRTIO_MEM_REQ_STATE
*/
#define VIRTIO_MEM_RESP_ACK 0
/*
* Request denied - e.g. trying to plug more than requested, applicable for
* - VIRTIO_MEM_REQ_PLUG
*/
#define VIRTIO_MEM_RESP_NACK 1
/*
* Request cannot be processed right now, try again later, applicable for
* - VIRTIO_MEM_REQ_PLUG
* - VIRTIO_MEM_REQ_UNPLUG
* - VIRTIO_MEM_REQ_UNPLUG_ALL
*/
#define VIRTIO_MEM_RESP_BUSY 2
/*
* Error in request (e.g. addresses/alignment), applicable for
* - VIRTIO_MEM_REQ_PLUG
* - VIRTIO_MEM_REQ_UNPLUG
* - VIRTIO_MEM_REQ_STATE
*/
#define VIRTIO_MEM_RESP_ERROR 3
/* State of memory blocks is "plugged" */
#define VIRTIO_MEM_STATE_PLUGGED 0
/* State of memory blocks is "unplugged" */
#define VIRTIO_MEM_STATE_UNPLUGGED 1
/* State of memory blocks is "mixed" */
#define VIRTIO_MEM_STATE_MIXED 2
struct virtio_mem_resp_state {
__virtio16 state;
};
struct virtio_mem_resp {
__virtio16 type;
__virtio16 padding[3];
union {
struct virtio_mem_resp_state state;
} u;
};
/* --- virtio-mem: configuration --- */
struct virtio_mem_config {
/* Block size and alignment. Cannot change. */
__le64 block_size;
/* Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. */
__le16 node_id;
__u8 padding[6];
/* Start address of the memory region. Cannot change. */
__le64 addr;
/* Region size (maximum). Cannot change. */
__le64 region_size;
/*
* Currently usable region size. Can grow up to region_size. Can
* shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config
* update will be sent).
*/
__le64 usable_region_size;
/*
* Currently used size. Changes due to plug/unplug requests, but no
* config updates will be sent.
*/
__le64 plugged_size;
/* Requested size. New plug requests cannot exceed it. Can change. */
__le64 requested_size;
};
#endif /* _LINUX_VIRTIO_MEM_H */