vfio/virtio: Add support for the basic live migration functionality

Add support for basic live migration functionality in VFIO over
virtio-net devices, aligned with the virtio device specification 1.4.

This includes the following VFIO features:
VFIO_MIGRATION_STOP_COPY, VFIO_MIGRATION_P2P.

The implementation registers with the VFIO subsystem using vfio_pci_core
and then incorporates the virtio-specific logic for the migration
process.

The migration follows the definitions in uapi/vfio.h and leverages the
virtio VF-to-PF admin queue command channel for execution device parts
related commands.

Additional Notes:
-----------------
The kernel protocol between the source and target devices contains a
header with metadata, including record size, tag, and flags.

The record size allows the target to recognize and read a complete image
from the source before passing the device part data. This adheres to the
virtio device specification, which mandates that partial device parts
cannot be supplied.

The tag and flags serve as placeholders for future extensions of the
kernel protocol between the source and target, ensuring backward and
forward compatibility.

Both the source and target comply with the virtio device specification
by using a device part object with a unique ID as part of the migration
process. Since this resource is limited to a maximum of 255, its
lifecycle is confined to periods with an active live migration flow.

According to the virtio specification, a device has only two modes:
RUNNING and STOPPED. As a result, certain VFIO transitions (i.e.,
RUNNING_P2P->STOP, STOP->RUNNING_P2P) are treated as no-ops. When
transitioning to RUNNING_P2P, the device state is set to STOP, and it
will remain STOPPED until the transition out of RUNNING_P2P->RUNNING, at
which point it returns to RUNNING. During transition to STOP, the virtio
device only stops initiating outgoing requests(e.g. DMA, MSIx, etc.) but
still must accept incoming operations.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20241113115200.209269-6-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
This commit is contained in:
Yishai Hadas 2024-11-13 13:51:58 +02:00 committed by Alex Williamson
parent 52a22c0ed0
commit 0bbc82e4ec
4 changed files with 1285 additions and 25 deletions

View File

@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_VIRTIO_VFIO_PCI) += virtio-vfio-pci.o
virtio-vfio-pci-y := main.o
virtio-vfio-pci-y := main.o migrate.o

View File

@ -0,0 +1,104 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef VIRTIO_VFIO_COMMON_H
#define VIRTIO_VFIO_COMMON_H
#include <linux/kernel.h>
#include <linux/virtio.h>
#include <linux/vfio_pci_core.h>
#include <linux/virtio_pci.h>
enum virtiovf_migf_state {
VIRTIOVF_MIGF_STATE_ERROR = 1,
};
enum virtiovf_load_state {
VIRTIOVF_LOAD_STATE_READ_HEADER,
VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA,
VIRTIOVF_LOAD_STATE_READ_HEADER_DATA,
VIRTIOVF_LOAD_STATE_PREP_CHUNK,
VIRTIOVF_LOAD_STATE_READ_CHUNK,
VIRTIOVF_LOAD_STATE_LOAD_CHUNK,
};
struct virtiovf_data_buffer {
struct sg_append_table table;
loff_t start_pos;
u64 length;
u64 allocated_length;
struct list_head buf_elm;
u8 include_header_object:1;
struct virtiovf_migration_file *migf;
/* Optimize virtiovf_get_migration_page() for sequential access */
struct scatterlist *last_offset_sg;
unsigned int sg_last_entry;
unsigned long last_offset;
};
enum virtiovf_migf_header_flags {
VIRTIOVF_MIGF_HEADER_FLAGS_TAG_MANDATORY = 0,
VIRTIOVF_MIGF_HEADER_FLAGS_TAG_OPTIONAL = 1 << 0,
};
enum virtiovf_migf_header_tag {
VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA = 0,
};
struct virtiovf_migration_header {
__le64 record_size;
/* For future use in case we may need to change the kernel protocol */
__le32 flags; /* Use virtiovf_migf_header_flags */
__le32 tag; /* Use virtiovf_migf_header_tag */
__u8 data[]; /* Its size is given in the record_size */
};
struct virtiovf_migration_file {
struct file *filp;
/* synchronize access to the file state */
struct mutex lock;
loff_t max_pos;
u64 record_size;
u32 record_tag;
u8 has_obj_id:1;
u32 obj_id;
enum virtiovf_migf_state state;
enum virtiovf_load_state load_state;
/* synchronize access to the lists */
spinlock_t list_lock;
struct list_head buf_list;
struct list_head avail_list;
struct virtiovf_data_buffer *buf;
struct virtiovf_data_buffer *buf_header;
struct virtiovf_pci_core_device *virtvdev;
};
struct virtiovf_pci_core_device {
struct vfio_pci_core_device core_device;
u8 *bar0_virtual_buf;
/* synchronize access to the virtual buf */
struct mutex bar_mutex;
void __iomem *notify_addr;
u64 notify_offset;
__le32 pci_base_addr_0;
__le16 pci_cmd;
u8 bar0_virtual_buf_size;
u8 notify_bar;
/* LM related */
u8 migrate_cap:1;
u8 deferred_reset:1;
/* protect migration state */
struct mutex state_mutex;
enum vfio_device_mig_state mig_state;
/* protect the reset_done flow */
spinlock_t reset_lock;
struct virtiovf_migration_file *resuming_migf;
struct virtiovf_migration_file *saving_migf;
};
void virtiovf_set_migratable(struct virtiovf_pci_core_device *virtvdev);
void virtiovf_open_migration(struct virtiovf_pci_core_device *virtvdev);
void virtiovf_close_migration(struct virtiovf_pci_core_device *virtvdev);
void virtiovf_migration_reset_done(struct pci_dev *pdev);
#endif /* VIRTIO_VFIO_COMMON_H */

View File

@ -16,18 +16,7 @@
#include <linux/virtio_net.h>
#include <linux/virtio_pci_admin.h>
struct virtiovf_pci_core_device {
struct vfio_pci_core_device core_device;
u8 *bar0_virtual_buf;
/* synchronize access to the virtual buf */
struct mutex bar_mutex;
void __iomem *notify_addr;
u64 notify_offset;
__le32 pci_base_addr_0;
__le16 pci_cmd;
u8 bar0_virtual_buf_size;
u8 notify_bar;
};
#include "common.h"
static int
virtiovf_issue_legacy_rw_cmd(struct virtiovf_pci_core_device *virtvdev,
@ -355,8 +344,8 @@ virtiovf_set_notify_addr(struct virtiovf_pci_core_device *virtvdev)
static int virtiovf_pci_open_device(struct vfio_device *core_vdev)
{
struct virtiovf_pci_core_device *virtvdev = container_of(
core_vdev, struct virtiovf_pci_core_device, core_device.vdev);
struct virtiovf_pci_core_device *virtvdev = container_of(core_vdev,
struct virtiovf_pci_core_device, core_device.vdev);
struct vfio_pci_core_device *vdev = &virtvdev->core_device;
int ret;
@ -377,10 +366,20 @@ static int virtiovf_pci_open_device(struct vfio_device *core_vdev)
}
}
virtiovf_open_migration(virtvdev);
vfio_pci_core_finish_enable(vdev);
return 0;
}
static void virtiovf_pci_close_device(struct vfio_device *core_vdev)
{
struct virtiovf_pci_core_device *virtvdev = container_of(core_vdev,
struct virtiovf_pci_core_device, core_device.vdev);
virtiovf_close_migration(virtvdev);
vfio_pci_core_close_device(core_vdev);
}
static int virtiovf_get_device_config_size(unsigned short device)
{
/* Network card */
@ -406,8 +405,8 @@ static int virtiovf_read_notify_info(struct virtiovf_pci_core_device *virtvdev)
static int virtiovf_pci_init_device(struct vfio_device *core_vdev)
{
struct virtiovf_pci_core_device *virtvdev = container_of(
core_vdev, struct virtiovf_pci_core_device, core_device.vdev);
struct virtiovf_pci_core_device *virtvdev = container_of(core_vdev,
struct virtiovf_pci_core_device, core_device.vdev);
struct pci_dev *pdev;
int ret;
@ -416,6 +415,10 @@ static int virtiovf_pci_init_device(struct vfio_device *core_vdev)
return ret;
pdev = virtvdev->core_device.pdev;
/*
* The vfio_device_ops.init() callback is set to virtiovf_pci_init_device()
* only when legacy I/O is supported. Now, let's initialize it.
*/
ret = virtiovf_read_notify_info(virtvdev);
if (ret)
return ret;
@ -433,19 +436,38 @@ static int virtiovf_pci_init_device(struct vfio_device *core_vdev)
static void virtiovf_pci_core_release_dev(struct vfio_device *core_vdev)
{
struct virtiovf_pci_core_device *virtvdev = container_of(
core_vdev, struct virtiovf_pci_core_device, core_device.vdev);
struct virtiovf_pci_core_device *virtvdev = container_of(core_vdev,
struct virtiovf_pci_core_device, core_device.vdev);
kfree(virtvdev->bar0_virtual_buf);
vfio_pci_core_release_dev(core_vdev);
}
static const struct vfio_device_ops virtiovf_vfio_pci_tran_ops = {
.name = "virtio-vfio-pci-trans",
static const struct vfio_device_ops virtiovf_vfio_pci_lm_ops = {
.name = "virtio-vfio-pci-lm",
.init = vfio_pci_core_init_dev,
.release = virtiovf_pci_core_release_dev,
.open_device = virtiovf_pci_open_device,
.close_device = virtiovf_pci_close_device,
.ioctl = vfio_pci_core_ioctl,
.device_feature = vfio_pci_core_ioctl_feature,
.read = vfio_pci_core_read,
.write = vfio_pci_core_write,
.mmap = vfio_pci_core_mmap,
.request = vfio_pci_core_request,
.match = vfio_pci_core_match,
.bind_iommufd = vfio_iommufd_physical_bind,
.unbind_iommufd = vfio_iommufd_physical_unbind,
.attach_ioas = vfio_iommufd_physical_attach_ioas,
.detach_ioas = vfio_iommufd_physical_detach_ioas,
};
static const struct vfio_device_ops virtiovf_vfio_pci_tran_lm_ops = {
.name = "virtio-vfio-pci-trans-lm",
.init = virtiovf_pci_init_device,
.release = virtiovf_pci_core_release_dev,
.open_device = virtiovf_pci_open_device,
.close_device = vfio_pci_core_close_device,
.close_device = virtiovf_pci_close_device,
.ioctl = virtiovf_vfio_pci_core_ioctl,
.device_feature = vfio_pci_core_ioctl_feature,
.read = virtiovf_pci_core_read,
@ -490,17 +512,28 @@ static int virtiovf_pci_probe(struct pci_dev *pdev,
{
const struct vfio_device_ops *ops = &virtiovf_vfio_pci_ops;
struct virtiovf_pci_core_device *virtvdev;
bool sup_legacy_io = false;
bool sup_lm = false;
int ret;
if (pdev->is_virtfn && virtio_pci_admin_has_legacy_io(pdev) &&
!virtiovf_bar0_exists(pdev))
ops = &virtiovf_vfio_pci_tran_ops;
if (pdev->is_virtfn) {
sup_legacy_io = virtio_pci_admin_has_legacy_io(pdev) &&
!virtiovf_bar0_exists(pdev);
sup_lm = virtio_pci_admin_has_dev_parts(pdev);
if (sup_legacy_io)
ops = &virtiovf_vfio_pci_tran_lm_ops;
else if (sup_lm)
ops = &virtiovf_vfio_pci_lm_ops;
}
virtvdev = vfio_alloc_device(virtiovf_pci_core_device, core_device.vdev,
&pdev->dev, ops);
if (IS_ERR(virtvdev))
return PTR_ERR(virtvdev);
if (sup_lm)
virtiovf_set_migratable(virtvdev);
dev_set_drvdata(&pdev->dev, &virtvdev->core_device);
ret = vfio_pci_core_register_device(&virtvdev->core_device);
if (ret)
@ -532,6 +565,7 @@ static void virtiovf_pci_aer_reset_done(struct pci_dev *pdev)
struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev);
virtvdev->pci_cmd = 0;
virtiovf_migration_reset_done(pdev);
}
static const struct pci_error_handlers virtiovf_err_handlers = {

File diff suppressed because it is too large Load Diff