2019-06-04 10:11:33 +02:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2012-07-31 08:16:24 -06:00
|
|
|
/*
|
|
|
|
* VFIO PCI interrupt handling
|
|
|
|
*
|
|
|
|
* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
|
|
|
|
* Author: Alex Williamson <alex.williamson@redhat.com>
|
|
|
|
*
|
|
|
|
* Derived from original vfio:
|
|
|
|
* Copyright 2010 Cisco Systems, Inc. All rights reserved.
|
|
|
|
* Author: Tom Lyon, pugs@cisco.com
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/device.h>
|
|
|
|
#include <linux/interrupt.h>
|
|
|
|
#include <linux/eventfd.h>
|
2014-09-29 10:16:24 -06:00
|
|
|
#include <linux/msi.h>
|
2012-07-31 08:16:24 -06:00
|
|
|
#include <linux/pci.h>
|
|
|
|
#include <linux/file.h>
|
|
|
|
#include <linux/vfio.h>
|
|
|
|
#include <linux/wait.h>
|
2013-03-15 12:58:20 -06:00
|
|
|
#include <linux/slab.h>
|
2012-07-31 08:16:24 -06:00
|
|
|
|
2022-08-26 16:34:01 -03:00
|
|
|
#include "vfio_pci_priv.h"
|
|
|
|
|
|
|
|
struct vfio_pci_irq_ctx {
|
|
|
|
struct eventfd_ctx *trigger;
|
|
|
|
struct virqfd *unmask;
|
|
|
|
struct virqfd *mask;
|
|
|
|
char *name;
|
|
|
|
bool masked;
|
|
|
|
struct irq_bypass_producer producer;
|
|
|
|
};
|
2012-07-31 08:16:24 -06:00
|
|
|
|
2022-08-26 16:34:03 -03:00
|
|
|
static bool irq_is(struct vfio_pci_core_device *vdev, int type)
|
|
|
|
{
|
|
|
|
return vdev->irq_type == type;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool is_intx(struct vfio_pci_core_device *vdev)
|
|
|
|
{
|
|
|
|
return vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool is_irq_none(struct vfio_pci_core_device *vdev)
|
|
|
|
{
|
|
|
|
return !(vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX ||
|
|
|
|
vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX ||
|
|
|
|
vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX);
|
|
|
|
}
|
|
|
|
|
2023-05-11 08:44:30 -07:00
|
|
|
static
|
|
|
|
struct vfio_pci_irq_ctx *vfio_irq_ctx_get(struct vfio_pci_core_device *vdev,
|
|
|
|
unsigned long index)
|
|
|
|
{
|
2023-05-11 08:44:32 -07:00
|
|
|
return xa_load(&vdev->ctx, index);
|
2023-05-11 08:44:30 -07:00
|
|
|
}
|
|
|
|
|
2023-05-11 08:44:32 -07:00
|
|
|
static void vfio_irq_ctx_free(struct vfio_pci_core_device *vdev,
|
|
|
|
struct vfio_pci_irq_ctx *ctx, unsigned long index)
|
2023-05-11 08:44:30 -07:00
|
|
|
{
|
2023-05-11 08:44:32 -07:00
|
|
|
xa_erase(&vdev->ctx, index);
|
|
|
|
kfree(ctx);
|
2023-05-11 08:44:30 -07:00
|
|
|
}
|
|
|
|
|
2023-05-11 08:44:32 -07:00
|
|
|
static struct vfio_pci_irq_ctx *
|
|
|
|
vfio_irq_ctx_alloc(struct vfio_pci_core_device *vdev, unsigned long index)
|
2023-05-11 08:44:30 -07:00
|
|
|
{
|
2023-05-11 08:44:32 -07:00
|
|
|
struct vfio_pci_irq_ctx *ctx;
|
|
|
|
int ret;
|
2023-05-11 08:44:30 -07:00
|
|
|
|
2023-05-11 08:44:32 -07:00
|
|
|
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
|
|
|
|
if (!ctx)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
ret = xa_insert(&vdev->ctx, index, ctx, GFP_KERNEL_ACCOUNT);
|
|
|
|
if (ret) {
|
|
|
|
kfree(ctx);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ctx;
|
2023-05-11 08:44:30 -07:00
|
|
|
}
|
|
|
|
|
2012-07-31 08:16:24 -06:00
|
|
|
/*
|
|
|
|
* INTx
|
|
|
|
*/
|
2015-03-16 14:08:53 -06:00
|
|
|
static void vfio_send_intx_eventfd(void *opaque, void *unused)
|
2012-07-31 08:16:24 -06:00
|
|
|
{
|
2021-08-26 13:39:02 +03:00
|
|
|
struct vfio_pci_core_device *vdev = opaque;
|
2015-03-16 14:08:53 -06:00
|
|
|
|
2023-05-11 08:44:30 -07:00
|
|
|
if (likely(is_intx(vdev) && !vdev->virq_disabled)) {
|
|
|
|
struct vfio_pci_irq_ctx *ctx;
|
2024-03-08 16:05:25 -07:00
|
|
|
struct eventfd_ctx *trigger;
|
2023-05-11 08:44:30 -07:00
|
|
|
|
|
|
|
ctx = vfio_irq_ctx_get(vdev, 0);
|
|
|
|
if (WARN_ON_ONCE(!ctx))
|
|
|
|
return;
|
2024-03-08 16:05:25 -07:00
|
|
|
|
|
|
|
trigger = READ_ONCE(ctx->trigger);
|
|
|
|
if (likely(trigger))
|
|
|
|
eventfd_signal(trigger);
|
2023-05-11 08:44:30 -07:00
|
|
|
}
|
2012-07-31 08:16:24 -06:00
|
|
|
}
|
|
|
|
|
vfio/pci: Mask INTx during runtime suspend
This patch adds INTx handling during runtime suspend/resume.
All the suspend/resume related code for the user to put the device
into the low power state will be added in subsequent patches.
The INTx lines may be shared among devices. Whenever any INTx
interrupt comes for the VFIO devices, then vfio_intx_handler() will be
called for each device sharing the interrupt. Inside vfio_intx_handler(),
it calls pci_check_and_mask_intx() and checks if the interrupt has
been generated for the current device. Now, if the device is already
in the D3cold state, then the config space can not be read. Attempt
to read config space in D3cold state can cause system unresponsiveness
in a few systems. To prevent this, mask INTx in runtime suspend callback,
and unmask the same in runtime resume callback. If INTx has been already
masked, then no handling is needed in runtime suspend/resume callbacks.
'pm_intx_masked' tracks this, and vfio_pci_intx_mask() has been updated
to return true if the INTx vfio_pci_irq_ctx.masked value is changed
inside this function.
For the runtime suspend which is triggered for the no user of VFIO
device, the 'irq_type' will be VFIO_PCI_NUM_IRQS and these
callbacks won't do anything.
The MSI/MSI-X are not shared so similar handling should not be
needed for MSI/MSI-X. vfio_msihandler() triggers eventfd_signal()
without doing any device-specific config access. When the user performs
any config access or IOCTL after receiving the eventfd notification,
then the device will be moved to the D0 state first before
servicing any request.
Another option was to check this flag 'pm_intx_masked' inside
vfio_intx_handler() instead of masking the interrupts. This flag
is being set inside the runtime_suspend callback but the device
can be in non-D3cold state (for example, if the user has disabled D3cold
explicitly by sysfs, the D3cold is not supported in the platform, etc.).
Also, in D3cold supported case, the device will be in D0 till the
PCI core moves the device into D3cold. In this case, there is
a possibility that the device can generate an interrupt. Adding check
in the IRQ handler will not clear the IRQ status and the interrupt
line will still be asserted. This can cause interrupt flooding.
Signed-off-by: Abhishek Sahu <abhsahu@nvidia.com>
Link: https://lore.kernel.org/r/20220829114850.4341-4-abhsahu@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2022-08-29 17:18:48 +05:30
|
|
|
/* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */
|
2024-03-08 16:05:23 -07:00
|
|
|
static bool __vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
|
2012-07-31 08:16:24 -06:00
|
|
|
{
|
|
|
|
struct pci_dev *pdev = vdev->pdev;
|
2023-05-11 08:44:30 -07:00
|
|
|
struct vfio_pci_irq_ctx *ctx;
|
2012-07-31 08:16:24 -06:00
|
|
|
unsigned long flags;
|
vfio/pci: Mask INTx during runtime suspend
This patch adds INTx handling during runtime suspend/resume.
All the suspend/resume related code for the user to put the device
into the low power state will be added in subsequent patches.
The INTx lines may be shared among devices. Whenever any INTx
interrupt comes for the VFIO devices, then vfio_intx_handler() will be
called for each device sharing the interrupt. Inside vfio_intx_handler(),
it calls pci_check_and_mask_intx() and checks if the interrupt has
been generated for the current device. Now, if the device is already
in the D3cold state, then the config space can not be read. Attempt
to read config space in D3cold state can cause system unresponsiveness
in a few systems. To prevent this, mask INTx in runtime suspend callback,
and unmask the same in runtime resume callback. If INTx has been already
masked, then no handling is needed in runtime suspend/resume callbacks.
'pm_intx_masked' tracks this, and vfio_pci_intx_mask() has been updated
to return true if the INTx vfio_pci_irq_ctx.masked value is changed
inside this function.
For the runtime suspend which is triggered for the no user of VFIO
device, the 'irq_type' will be VFIO_PCI_NUM_IRQS and these
callbacks won't do anything.
The MSI/MSI-X are not shared so similar handling should not be
needed for MSI/MSI-X. vfio_msihandler() triggers eventfd_signal()
without doing any device-specific config access. When the user performs
any config access or IOCTL after receiving the eventfd notification,
then the device will be moved to the D0 state first before
servicing any request.
Another option was to check this flag 'pm_intx_masked' inside
vfio_intx_handler() instead of masking the interrupts. This flag
is being set inside the runtime_suspend callback but the device
can be in non-D3cold state (for example, if the user has disabled D3cold
explicitly by sysfs, the D3cold is not supported in the platform, etc.).
Also, in D3cold supported case, the device will be in D0 till the
PCI core moves the device into D3cold. In this case, there is
a possibility that the device can generate an interrupt. Adding check
in the IRQ handler will not clear the IRQ status and the interrupt
line will still be asserted. This can cause interrupt flooding.
Signed-off-by: Abhishek Sahu <abhsahu@nvidia.com>
Link: https://lore.kernel.org/r/20220829114850.4341-4-abhsahu@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2022-08-29 17:18:48 +05:30
|
|
|
bool masked_changed = false;
|
2012-07-31 08:16:24 -06:00
|
|
|
|
2024-03-08 16:05:23 -07:00
|
|
|
lockdep_assert_held(&vdev->igate);
|
|
|
|
|
2012-07-31 08:16:24 -06:00
|
|
|
spin_lock_irqsave(&vdev->irqlock, flags);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Masking can come from interrupt, ioctl, or config space
|
|
|
|
* via INTx disable. The latter means this can get called
|
|
|
|
* even when not using intx delivery. In this case, just
|
|
|
|
* try to have the physical bit follow the virtual bit.
|
|
|
|
*/
|
|
|
|
if (unlikely(!is_intx(vdev))) {
|
|
|
|
if (vdev->pci_2_3)
|
|
|
|
pci_intx(pdev, 0);
|
2023-05-11 08:44:30 -07:00
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx = vfio_irq_ctx_get(vdev, 0);
|
|
|
|
if (WARN_ON_ONCE(!ctx))
|
|
|
|
goto out_unlock;
|
|
|
|
|
|
|
|
if (!ctx->masked) {
|
2012-07-31 08:16:24 -06:00
|
|
|
/*
|
|
|
|
* Can't use check_and_mask here because we always want to
|
|
|
|
* mask, not just when something is pending.
|
|
|
|
*/
|
|
|
|
if (vdev->pci_2_3)
|
|
|
|
pci_intx(pdev, 0);
|
|
|
|
else
|
|
|
|
disable_irq_nosync(pdev->irq);
|
|
|
|
|
2023-05-11 08:44:30 -07:00
|
|
|
ctx->masked = true;
|
vfio/pci: Mask INTx during runtime suspend
This patch adds INTx handling during runtime suspend/resume.
All the suspend/resume related code for the user to put the device
into the low power state will be added in subsequent patches.
The INTx lines may be shared among devices. Whenever any INTx
interrupt comes for the VFIO devices, then vfio_intx_handler() will be
called for each device sharing the interrupt. Inside vfio_intx_handler(),
it calls pci_check_and_mask_intx() and checks if the interrupt has
been generated for the current device. Now, if the device is already
in the D3cold state, then the config space can not be read. Attempt
to read config space in D3cold state can cause system unresponsiveness
in a few systems. To prevent this, mask INTx in runtime suspend callback,
and unmask the same in runtime resume callback. If INTx has been already
masked, then no handling is needed in runtime suspend/resume callbacks.
'pm_intx_masked' tracks this, and vfio_pci_intx_mask() has been updated
to return true if the INTx vfio_pci_irq_ctx.masked value is changed
inside this function.
For the runtime suspend which is triggered for the no user of VFIO
device, the 'irq_type' will be VFIO_PCI_NUM_IRQS and these
callbacks won't do anything.
The MSI/MSI-X are not shared so similar handling should not be
needed for MSI/MSI-X. vfio_msihandler() triggers eventfd_signal()
without doing any device-specific config access. When the user performs
any config access or IOCTL after receiving the eventfd notification,
then the device will be moved to the D0 state first before
servicing any request.
Another option was to check this flag 'pm_intx_masked' inside
vfio_intx_handler() instead of masking the interrupts. This flag
is being set inside the runtime_suspend callback but the device
can be in non-D3cold state (for example, if the user has disabled D3cold
explicitly by sysfs, the D3cold is not supported in the platform, etc.).
Also, in D3cold supported case, the device will be in D0 till the
PCI core moves the device into D3cold. In this case, there is
a possibility that the device can generate an interrupt. Adding check
in the IRQ handler will not clear the IRQ status and the interrupt
line will still be asserted. This can cause interrupt flooding.
Signed-off-by: Abhishek Sahu <abhsahu@nvidia.com>
Link: https://lore.kernel.org/r/20220829114850.4341-4-abhsahu@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2022-08-29 17:18:48 +05:30
|
|
|
masked_changed = true;
|
2012-07-31 08:16:24 -06:00
|
|
|
}
|
|
|
|
|
2023-05-11 08:44:30 -07:00
|
|
|
out_unlock:
|
2012-07-31 08:16:24 -06:00
|
|
|
spin_unlock_irqrestore(&vdev->irqlock, flags);
|
vfio/pci: Mask INTx during runtime suspend
This patch adds INTx handling during runtime suspend/resume.
All the suspend/resume related code for the user to put the device
into the low power state will be added in subsequent patches.
The INTx lines may be shared among devices. Whenever any INTx
interrupt comes for the VFIO devices, then vfio_intx_handler() will be
called for each device sharing the interrupt. Inside vfio_intx_handler(),
it calls pci_check_and_mask_intx() and checks if the interrupt has
been generated for the current device. Now, if the device is already
in the D3cold state, then the config space can not be read. Attempt
to read config space in D3cold state can cause system unresponsiveness
in a few systems. To prevent this, mask INTx in runtime suspend callback,
and unmask the same in runtime resume callback. If INTx has been already
masked, then no handling is needed in runtime suspend/resume callbacks.
'pm_intx_masked' tracks this, and vfio_pci_intx_mask() has been updated
to return true if the INTx vfio_pci_irq_ctx.masked value is changed
inside this function.
For the runtime suspend which is triggered for the no user of VFIO
device, the 'irq_type' will be VFIO_PCI_NUM_IRQS and these
callbacks won't do anything.
The MSI/MSI-X are not shared so similar handling should not be
needed for MSI/MSI-X. vfio_msihandler() triggers eventfd_signal()
without doing any device-specific config access. When the user performs
any config access or IOCTL after receiving the eventfd notification,
then the device will be moved to the D0 state first before
servicing any request.
Another option was to check this flag 'pm_intx_masked' inside
vfio_intx_handler() instead of masking the interrupts. This flag
is being set inside the runtime_suspend callback but the device
can be in non-D3cold state (for example, if the user has disabled D3cold
explicitly by sysfs, the D3cold is not supported in the platform, etc.).
Also, in D3cold supported case, the device will be in D0 till the
PCI core moves the device into D3cold. In this case, there is
a possibility that the device can generate an interrupt. Adding check
in the IRQ handler will not clear the IRQ status and the interrupt
line will still be asserted. This can cause interrupt flooding.
Signed-off-by: Abhishek Sahu <abhsahu@nvidia.com>
Link: https://lore.kernel.org/r/20220829114850.4341-4-abhsahu@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2022-08-29 17:18:48 +05:30
|
|
|
return masked_changed;
|
2012-07-31 08:16:24 -06:00
|
|
|
}
|
|
|
|
|
2024-03-08 16:05:23 -07:00
|
|
|
bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
|
|
|
|
{
|
|
|
|
bool mask_changed;
|
|
|
|
|
|
|
|
mutex_lock(&vdev->igate);
|
|
|
|
mask_changed = __vfio_pci_intx_mask(vdev);
|
|
|
|
mutex_unlock(&vdev->igate);
|
|
|
|
|
|
|
|
return mask_changed;
|
|
|
|
}
|
|
|
|
|
2012-07-31 08:16:24 -06:00
|
|
|
/*
|
|
|
|
* If this is triggered by an eventfd, we can't call eventfd_signal
|
|
|
|
* or else we'll deadlock on the eventfd wait queue. Return >0 when
|
|
|
|
* a signal is necessary, which can then be handled via a work queue
|
|
|
|
* or directly depending on the caller.
|
|
|
|
*/
|
2015-03-16 14:08:53 -06:00
|
|
|
static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
|
2012-07-31 08:16:24 -06:00
|
|
|
{
|
2021-08-26 13:39:02 +03:00
|
|
|
struct vfio_pci_core_device *vdev = opaque;
|
2012-07-31 08:16:24 -06:00
|
|
|
struct pci_dev *pdev = vdev->pdev;
|
2023-05-11 08:44:30 -07:00
|
|
|
struct vfio_pci_irq_ctx *ctx;
|
2012-07-31 08:16:24 -06:00
|
|
|
unsigned long flags;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&vdev->irqlock, flags);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Unmasking comes from ioctl or config, so again, have the
|
|
|
|
* physical bit follow the virtual even when not using INTx.
|
|
|
|
*/
|
|
|
|
if (unlikely(!is_intx(vdev))) {
|
|
|
|
if (vdev->pci_2_3)
|
|
|
|
pci_intx(pdev, 1);
|
2023-05-11 08:44:30 -07:00
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx = vfio_irq_ctx_get(vdev, 0);
|
|
|
|
if (WARN_ON_ONCE(!ctx))
|
|
|
|
goto out_unlock;
|
|
|
|
|
|
|
|
if (ctx->masked && !vdev->virq_disabled) {
|
2012-07-31 08:16:24 -06:00
|
|
|
/*
|
|
|
|
* A pending interrupt here would immediately trigger,
|
|
|
|
* but we can avoid that overhead by just re-sending
|
|
|
|
* the interrupt to the user.
|
|
|
|
*/
|
|
|
|
if (vdev->pci_2_3) {
|
|
|
|
if (!pci_check_and_unmask_intx(pdev))
|
|
|
|
ret = 1;
|
|
|
|
} else
|
|
|
|
enable_irq(pdev->irq);
|
|
|
|
|
2023-05-11 08:44:30 -07:00
|
|
|
ctx->masked = (ret > 0);
|
2012-07-31 08:16:24 -06:00
|
|
|
}
|
|
|
|
|
2023-05-11 08:44:30 -07:00
|
|
|
out_unlock:
|
2012-07-31 08:16:24 -06:00
|
|
|
spin_unlock_irqrestore(&vdev->irqlock, flags);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2024-03-08 16:05:23 -07:00
|
|
|
static void __vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
|
2012-07-31 08:16:24 -06:00
|
|
|
{
|
2024-03-08 16:05:23 -07:00
|
|
|
lockdep_assert_held(&vdev->igate);
|
|
|
|
|
2012-07-31 08:16:24 -06:00
|
|
|
if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0)
|
|
|
|
vfio_send_intx_eventfd(vdev, NULL);
|
|
|
|
}
|
|
|
|
|
2024-03-08 16:05:23 -07:00
|
|
|
void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
|
|
|
|
{
|
|
|
|
mutex_lock(&vdev->igate);
|
|
|
|
__vfio_pci_intx_unmask(vdev);
|
|
|
|
mutex_unlock(&vdev->igate);
|
|
|
|
}
|
|
|
|
|
2012-07-31 08:16:24 -06:00
|
|
|
static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
|
|
|
|
{
|
2021-08-26 13:39:02 +03:00
|
|
|
struct vfio_pci_core_device *vdev = dev_id;
|
2023-05-11 08:44:30 -07:00
|
|
|
struct vfio_pci_irq_ctx *ctx;
|
2012-07-31 08:16:24 -06:00
|
|
|
unsigned long flags;
|
|
|
|
int ret = IRQ_NONE;
|
|
|
|
|
2023-05-11 08:44:30 -07:00
|
|
|
ctx = vfio_irq_ctx_get(vdev, 0);
|
|
|
|
if (WARN_ON_ONCE(!ctx))
|
|
|
|
return ret;
|
|
|
|
|
2012-07-31 08:16:24 -06:00
|
|
|
spin_lock_irqsave(&vdev->irqlock, flags);
|
|
|
|
|
|
|
|
if (!vdev->pci_2_3) {
|
|
|
|
disable_irq_nosync(vdev->pdev->irq);
|
2023-05-11 08:44:30 -07:00
|
|
|
ctx->masked = true;
|
2012-07-31 08:16:24 -06:00
|
|
|
ret = IRQ_HANDLED;
|
2023-05-11 08:44:30 -07:00
|
|
|
} else if (!ctx->masked && /* may be shared */
|
2012-07-31 08:16:24 -06:00
|
|
|
pci_check_and_mask_intx(vdev->pdev)) {
|
2023-05-11 08:44:30 -07:00
|
|
|
ctx->masked = true;
|
2012-07-31 08:16:24 -06:00
|
|
|
ret = IRQ_HANDLED;
|
|
|
|
}
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&vdev->irqlock, flags);
|
|
|
|
|
|
|
|
if (ret == IRQ_HANDLED)
|
|
|
|
vfio_send_intx_eventfd(vdev, NULL);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2024-03-08 16:05:25 -07:00
|
|
|
static int vfio_intx_enable(struct vfio_pci_core_device *vdev,
|
|
|
|
struct eventfd_ctx *trigger)
|
2012-07-31 08:16:24 -06:00
|
|
|
{
|
2024-03-08 16:05:25 -07:00
|
|
|
struct pci_dev *pdev = vdev->pdev;
|
2023-05-11 08:44:30 -07:00
|
|
|
struct vfio_pci_irq_ctx *ctx;
|
2024-03-08 16:05:25 -07:00
|
|
|
unsigned long irqflags;
|
|
|
|
char *name;
|
|
|
|
int ret;
|
2023-05-11 08:44:30 -07:00
|
|
|
|
2012-07-31 08:16:24 -06:00
|
|
|
if (!is_irq_none(vdev))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2024-03-08 16:05:25 -07:00
|
|
|
if (!pdev->irq)
|
2012-07-31 08:16:24 -06:00
|
|
|
return -ENODEV;
|
|
|
|
|
2024-03-08 16:05:25 -07:00
|
|
|
name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev));
|
|
|
|
if (!name)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2023-05-11 08:44:32 -07:00
|
|
|
ctx = vfio_irq_ctx_alloc(vdev, 0);
|
|
|
|
if (!ctx)
|
|
|
|
return -ENOMEM;
|
2012-07-31 08:16:24 -06:00
|
|
|
|
2024-03-08 16:05:25 -07:00
|
|
|
ctx->name = name;
|
|
|
|
ctx->trigger = trigger;
|
|
|
|
|
2012-10-10 09:10:32 -06:00
|
|
|
/*
|
2024-03-08 16:05:25 -07:00
|
|
|
* Fill the initial masked state based on virq_disabled. After
|
|
|
|
* enable, changing the DisINTx bit in vconfig directly changes INTx
|
|
|
|
* masking. igate prevents races during setup, once running masked
|
|
|
|
* is protected via irqlock.
|
|
|
|
*
|
|
|
|
* Devices supporting DisINTx also reflect the current mask state in
|
|
|
|
* the physical DisINTx bit, which is not affected during IRQ setup.
|
|
|
|
*
|
|
|
|
* Devices without DisINTx support require an exclusive interrupt.
|
|
|
|
* IRQ masking is performed at the IRQ chip. Again, igate protects
|
|
|
|
* against races during setup and IRQ handlers and irqfds are not
|
|
|
|
* yet active, therefore masked is stable and can be used to
|
|
|
|
* conditionally auto-enable the IRQ.
|
|
|
|
*
|
|
|
|
* irq_type must be stable while the IRQ handler is registered,
|
|
|
|
* therefore it must be set before request_irq().
|
2012-10-10 09:10:32 -06:00
|
|
|
*/
|
2023-05-11 08:44:30 -07:00
|
|
|
ctx->masked = vdev->virq_disabled;
|
2024-03-08 16:05:25 -07:00
|
|
|
if (vdev->pci_2_3) {
|
|
|
|
pci_intx(pdev, !ctx->masked);
|
|
|
|
irqflags = IRQF_SHARED;
|
|
|
|
} else {
|
|
|
|
irqflags = ctx->masked ? IRQF_NO_AUTOEN : 0;
|
|
|
|
}
|
2012-10-10 09:10:32 -06:00
|
|
|
|
2012-07-31 08:16:24 -06:00
|
|
|
vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
|
|
|
|
|
2024-03-08 16:05:25 -07:00
|
|
|
ret = request_irq(pdev->irq, vfio_intx_handler,
|
|
|
|
irqflags, ctx->name, vdev);
|
|
|
|
if (ret) {
|
|
|
|
vdev->irq_type = VFIO_PCI_NUM_IRQS;
|
|
|
|
kfree(name);
|
|
|
|
vfio_irq_ctx_free(vdev, ctx, 0);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-07-31 08:16:24 -06:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-03-08 16:05:25 -07:00
|
|
|
static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev,
|
|
|
|
struct eventfd_ctx *trigger)
|
2012-07-31 08:16:24 -06:00
|
|
|
{
|
|
|
|
struct pci_dev *pdev = vdev->pdev;
|
2023-05-11 08:44:30 -07:00
|
|
|
struct vfio_pci_irq_ctx *ctx;
|
2024-03-08 16:05:25 -07:00
|
|
|
struct eventfd_ctx *old;
|
2012-07-31 08:16:24 -06:00
|
|
|
|
2023-05-11 08:44:30 -07:00
|
|
|
ctx = vfio_irq_ctx_get(vdev, 0);
|
|
|
|
if (WARN_ON_ONCE(!ctx))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2024-03-08 16:05:25 -07:00
|
|
|
old = ctx->trigger;
|
2012-07-31 08:16:24 -06:00
|
|
|
|
2024-03-08 16:05:25 -07:00
|
|
|
WRITE_ONCE(ctx->trigger, trigger);
|
2012-07-31 08:16:24 -06:00
|
|
|
|
2024-03-08 16:05:25 -07:00
|
|
|
/* Releasing an old ctx requires synchronizing in-flight users */
|
|
|
|
if (old) {
|
|
|
|
synchronize_irq(pdev->irq);
|
|
|
|
vfio_virqfd_flush_thread(&ctx->unmask);
|
|
|
|
eventfd_ctx_put(old);
|
2012-07-31 08:16:24 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-08-26 13:39:02 +03:00
|
|
|
static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
|
2012-07-31 08:16:24 -06:00
|
|
|
{
|
2024-03-08 16:05:25 -07:00
|
|
|
struct pci_dev *pdev = vdev->pdev;
|
2023-05-11 08:44:30 -07:00
|
|
|
struct vfio_pci_irq_ctx *ctx;
|
|
|
|
|
|
|
|
ctx = vfio_irq_ctx_get(vdev, 0);
|
|
|
|
WARN_ON_ONCE(!ctx);
|
|
|
|
if (ctx) {
|
|
|
|
vfio_virqfd_disable(&ctx->unmask);
|
|
|
|
vfio_virqfd_disable(&ctx->mask);
|
2024-03-08 16:05:25 -07:00
|
|
|
free_irq(pdev->irq, vdev);
|
|
|
|
if (ctx->trigger)
|
|
|
|
eventfd_ctx_put(ctx->trigger);
|
|
|
|
kfree(ctx->name);
|
|
|
|
vfio_irq_ctx_free(vdev, ctx, 0);
|
2023-05-11 08:44:30 -07:00
|
|
|
}
|
2012-07-31 08:16:24 -06:00
|
|
|
vdev->irq_type = VFIO_PCI_NUM_IRQS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* MSI/MSI-X
|
|
|
|
*/
|
|
|
|
static irqreturn_t vfio_msihandler(int irq, void *arg)
|
|
|
|
{
|
|
|
|
struct eventfd_ctx *trigger = arg;
|
|
|
|
|
2023-11-22 13:48:23 +01:00
|
|
|
eventfd_signal(trigger);
|
2012-07-31 08:16:24 -06:00
|
|
|
return IRQ_HANDLED;
|
|
|
|
}
|
|
|
|
|
2021-08-26 13:39:02 +03:00
|
|
|
static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msix)
|
2012-07-31 08:16:24 -06:00
|
|
|
{
|
|
|
|
struct pci_dev *pdev = vdev->pdev;
|
2016-09-11 15:31:26 +02:00
|
|
|
unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI;
|
2012-07-31 08:16:24 -06:00
|
|
|
int ret;
|
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory
Accessing the disabled memory space of a PCI device would typically
result in a master abort response on conventional PCI, or an
unsupported request on PCI express. The user would generally see
these as a -1 response for the read return data and the write would be
silently discarded, possibly with an uncorrected, non-fatal AER error
triggered on the host. Some systems however take it upon themselves
to bring down the entire system when they see something that might
indicate a loss of data, such as this discarded write to a disabled
memory space.
To avoid this, we want to try to block the user from accessing memory
spaces while they're disabled. We start with a semaphore around the
memory enable bit, where writers modify the memory enable state and
must be serialized, while readers make use of the memory region and
can access in parallel. Writers include both direct manipulation via
the command register, as well as any reset path where the internal
mechanics of the reset may both explicitly and implicitly disable
memory access, and manipulation of the MSI-X configuration, where the
MSI-X vector table resides in MMIO space of the device. Readers
include the read and write file ops to access the vfio device fd
offsets as well as memory mapped access. In the latter case, we make
use of our new vma list support to zap, or invalidate, those memory
mappings in order to force them to be faulted back in on access.
Our semaphore usage will stall user access to MMIO spaces across
internal operations like reset, but the user might experience new
behavior when trying to access the MMIO space while disabled via the
PCI command register. Access via read or write while disabled will
return -EIO and access via memory maps will result in a SIGBUS. This
is expected to be compatible with known use cases and potentially
provides better error handling capabilities than present in the
hardware, while avoiding the more readily accessible and severe
platform error responses that might otherwise occur.
Fixes: CVE-2020-12888
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
|
|
|
u16 cmd;
|
2012-07-31 08:16:24 -06:00
|
|
|
|
|
|
|
if (!is_irq_none(vdev))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2016-09-11 15:31:26 +02:00
|
|
|
/* return the number of supported vectors if we can't get all: */
|
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory
Accessing the disabled memory space of a PCI device would typically
result in a master abort response on conventional PCI, or an
unsupported request on PCI express. The user would generally see
these as a -1 response for the read return data and the write would be
silently discarded, possibly with an uncorrected, non-fatal AER error
triggered on the host. Some systems however take it upon themselves
to bring down the entire system when they see something that might
indicate a loss of data, such as this discarded write to a disabled
memory space.
To avoid this, we want to try to block the user from accessing memory
spaces while they're disabled. We start with a semaphore around the
memory enable bit, where writers modify the memory enable state and
must be serialized, while readers make use of the memory region and
can access in parallel. Writers include both direct manipulation via
the command register, as well as any reset path where the internal
mechanics of the reset may both explicitly and implicitly disable
memory access, and manipulation of the MSI-X configuration, where the
MSI-X vector table resides in MMIO space of the device. Readers
include the read and write file ops to access the vfio device fd
offsets as well as memory mapped access. In the latter case, we make
use of our new vma list support to zap, or invalidate, those memory
mappings in order to force them to be faulted back in on access.
Our semaphore usage will stall user access to MMIO spaces across
internal operations like reset, but the user might experience new
behavior when trying to access the MMIO space while disabled via the
PCI command register. Access via read or write while disabled will
return -EIO and access via memory maps will result in a SIGBUS. This
is expected to be compatible with known use cases and potentially
provides better error handling capabilities than present in the
hardware, while avoiding the more readily accessible and severe
platform error responses that might otherwise occur.
Fixes: CVE-2020-12888
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
|
|
|
cmd = vfio_pci_memory_lock_and_enable(vdev);
|
2016-09-11 15:31:26 +02:00
|
|
|
ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
|
|
|
|
if (ret < nvec) {
|
|
|
|
if (ret > 0)
|
|
|
|
pci_free_irq_vectors(pdev);
|
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory
Accessing the disabled memory space of a PCI device would typically
result in a master abort response on conventional PCI, or an
unsupported request on PCI express. The user would generally see
these as a -1 response for the read return data and the write would be
silently discarded, possibly with an uncorrected, non-fatal AER error
triggered on the host. Some systems however take it upon themselves
to bring down the entire system when they see something that might
indicate a loss of data, such as this discarded write to a disabled
memory space.
To avoid this, we want to try to block the user from accessing memory
spaces while they're disabled. We start with a semaphore around the
memory enable bit, where writers modify the memory enable state and
must be serialized, while readers make use of the memory region and
can access in parallel. Writers include both direct manipulation via
the command register, as well as any reset path where the internal
mechanics of the reset may both explicitly and implicitly disable
memory access, and manipulation of the MSI-X configuration, where the
MSI-X vector table resides in MMIO space of the device. Readers
include the read and write file ops to access the vfio device fd
offsets as well as memory mapped access. In the latter case, we make
use of our new vma list support to zap, or invalidate, those memory
mappings in order to force them to be faulted back in on access.
Our semaphore usage will stall user access to MMIO spaces across
internal operations like reset, but the user might experience new
behavior when trying to access the MMIO space while disabled via the
PCI command register. Access via read or write while disabled will
return -EIO and access via memory maps will result in a SIGBUS. This
is expected to be compatible with known use cases and potentially
provides better error handling capabilities than present in the
hardware, while avoiding the more readily accessible and severe
platform error responses that might otherwise occur.
Fixes: CVE-2020-12888
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
|
|
|
vfio_pci_memory_unlock_and_restore(vdev, cmd);
|
2016-09-11 15:31:26 +02:00
|
|
|
return ret;
|
2012-07-31 08:16:24 -06:00
|
|
|
}
|
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory
Accessing the disabled memory space of a PCI device would typically
result in a master abort response on conventional PCI, or an
unsupported request on PCI express. The user would generally see
these as a -1 response for the read return data and the write would be
silently discarded, possibly with an uncorrected, non-fatal AER error
triggered on the host. Some systems however take it upon themselves
to bring down the entire system when they see something that might
indicate a loss of data, such as this discarded write to a disabled
memory space.
To avoid this, we want to try to block the user from accessing memory
spaces while they're disabled. We start with a semaphore around the
memory enable bit, where writers modify the memory enable state and
must be serialized, while readers make use of the memory region and
can access in parallel. Writers include both direct manipulation via
the command register, as well as any reset path where the internal
mechanics of the reset may both explicitly and implicitly disable
memory access, and manipulation of the MSI-X configuration, where the
MSI-X vector table resides in MMIO space of the device. Readers
include the read and write file ops to access the vfio device fd
offsets as well as memory mapped access. In the latter case, we make
use of our new vma list support to zap, or invalidate, those memory
mappings in order to force them to be faulted back in on access.
Our semaphore usage will stall user access to MMIO spaces across
internal operations like reset, but the user might experience new
behavior when trying to access the MMIO space while disabled via the
PCI command register. Access via read or write while disabled will
return -EIO and access via memory maps will result in a SIGBUS. This
is expected to be compatible with known use cases and potentially
provides better error handling capabilities than present in the
hardware, while avoiding the more readily accessible and severe
platform error responses that might otherwise occur.
Fixes: CVE-2020-12888
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
|
|
|
vfio_pci_memory_unlock_and_restore(vdev, cmd);
|
2012-07-31 08:16:24 -06:00
|
|
|
|
|
|
|
vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
|
|
|
|
VFIO_PCI_MSI_IRQ_INDEX;
|
|
|
|
|
|
|
|
if (!msix) {
|
|
|
|
/*
|
|
|
|
* Compute the virtual hardware field for max msi vectors -
|
|
|
|
* it is the log base 2 of the number of vectors.
|
|
|
|
*/
|
|
|
|
vdev->msi_qmax = fls(nvec * 2 - 1) - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-05-11 08:44:37 -07:00
|
|
|
/*
|
|
|
|
* vfio_msi_alloc_irq() returns the Linux IRQ number of an MSI or MSI-X device
|
|
|
|
* interrupt vector. If a Linux IRQ number is not available then a new
|
|
|
|
* interrupt is allocated if dynamic MSI-X is supported.
|
|
|
|
*
|
|
|
|
* Where is vfio_msi_free_irq()? Allocated interrupts are maintained,
|
|
|
|
* essentially forming a cache that subsequent allocations can draw from.
|
|
|
|
* Interrupts are freed using pci_free_irq_vectors() when MSI/MSI-X is
|
|
|
|
* disabled.
|
|
|
|
*/
|
|
|
|
static int vfio_msi_alloc_irq(struct vfio_pci_core_device *vdev,
|
|
|
|
unsigned int vector, bool msix)
|
|
|
|
{
|
|
|
|
struct pci_dev *pdev = vdev->pdev;
|
|
|
|
struct msi_map map;
|
|
|
|
int irq;
|
|
|
|
u16 cmd;
|
|
|
|
|
|
|
|
irq = pci_irq_vector(pdev, vector);
|
|
|
|
if (WARN_ON_ONCE(irq == 0))
|
|
|
|
return -EINVAL;
|
|
|
|
if (irq > 0 || !msix || !vdev->has_dyn_msix)
|
|
|
|
return irq;
|
|
|
|
|
|
|
|
cmd = vfio_pci_memory_lock_and_enable(vdev);
|
|
|
|
map = pci_msix_alloc_irq_at(pdev, vector, NULL);
|
|
|
|
vfio_pci_memory_unlock_and_restore(vdev, cmd);
|
|
|
|
|
|
|
|
return map.index < 0 ? map.index : map.virq;
|
|
|
|
}
|
|
|
|
|
2021-08-26 13:39:02 +03:00
|
|
|
static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
|
2023-05-11 08:44:29 -07:00
|
|
|
unsigned int vector, int fd, bool msix)
|
2012-07-31 08:16:24 -06:00
|
|
|
{
|
|
|
|
struct pci_dev *pdev = vdev->pdev;
|
2023-05-11 08:44:30 -07:00
|
|
|
struct vfio_pci_irq_ctx *ctx;
|
2012-07-31 08:16:24 -06:00
|
|
|
struct eventfd_ctx *trigger;
|
2023-05-11 08:44:37 -07:00
|
|
|
int irq = -EINVAL, ret;
|
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory
Accessing the disabled memory space of a PCI device would typically
result in a master abort response on conventional PCI, or an
unsupported request on PCI express. The user would generally see
these as a -1 response for the read return data and the write would be
silently discarded, possibly with an uncorrected, non-fatal AER error
triggered on the host. Some systems however take it upon themselves
to bring down the entire system when they see something that might
indicate a loss of data, such as this discarded write to a disabled
memory space.
To avoid this, we want to try to block the user from accessing memory
spaces while they're disabled. We start with a semaphore around the
memory enable bit, where writers modify the memory enable state and
must be serialized, while readers make use of the memory region and
can access in parallel. Writers include both direct manipulation via
the command register, as well as any reset path where the internal
mechanics of the reset may both explicitly and implicitly disable
memory access, and manipulation of the MSI-X configuration, where the
MSI-X vector table resides in MMIO space of the device. Readers
include the read and write file ops to access the vfio device fd
offsets as well as memory mapped access. In the latter case, we make
use of our new vma list support to zap, or invalidate, those memory
mappings in order to force them to be faulted back in on access.
Our semaphore usage will stall user access to MMIO spaces across
internal operations like reset, but the user might experience new
behavior when trying to access the MMIO space while disabled via the
PCI command register. Access via read or write while disabled will
return -EIO and access via memory maps will result in a SIGBUS. This
is expected to be compatible with known use cases and potentially
provides better error handling capabilities than present in the
hardware, while avoiding the more readily accessible and severe
platform error responses that might otherwise occur.
Fixes: CVE-2020-12888
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
|
|
|
u16 cmd;
|
2012-07-31 08:16:24 -06:00
|
|
|
|
2023-05-11 08:44:32 -07:00
|
|
|
ctx = vfio_irq_ctx_get(vdev, vector);
|
|
|
|
|
|
|
|
if (ctx) {
|
2023-05-11 08:44:30 -07:00
|
|
|
irq_bypass_unregister_producer(&ctx->producer);
|
2023-05-11 08:44:37 -07:00
|
|
|
irq = pci_irq_vector(pdev, vector);
|
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory
Accessing the disabled memory space of a PCI device would typically
result in a master abort response on conventional PCI, or an
unsupported request on PCI express. The user would generally see
these as a -1 response for the read return data and the write would be
silently discarded, possibly with an uncorrected, non-fatal AER error
triggered on the host. Some systems however take it upon themselves
to bring down the entire system when they see something that might
indicate a loss of data, such as this discarded write to a disabled
memory space.
To avoid this, we want to try to block the user from accessing memory
spaces while they're disabled. We start with a semaphore around the
memory enable bit, where writers modify the memory enable state and
must be serialized, while readers make use of the memory region and
can access in parallel. Writers include both direct manipulation via
the command register, as well as any reset path where the internal
mechanics of the reset may both explicitly and implicitly disable
memory access, and manipulation of the MSI-X configuration, where the
MSI-X vector table resides in MMIO space of the device. Readers
include the read and write file ops to access the vfio device fd
offsets as well as memory mapped access. In the latter case, we make
use of our new vma list support to zap, or invalidate, those memory
mappings in order to force them to be faulted back in on access.
Our semaphore usage will stall user access to MMIO spaces across
internal operations like reset, but the user might experience new
behavior when trying to access the MMIO space while disabled via the
PCI command register. Access via read or write while disabled will
return -EIO and access via memory maps will result in a SIGBUS. This
is expected to be compatible with known use cases and potentially
provides better error handling capabilities than present in the
hardware, while avoiding the more readily accessible and severe
platform error responses that might otherwise occur.
Fixes: CVE-2020-12888
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
|
|
|
cmd = vfio_pci_memory_lock_and_enable(vdev);
|
2023-05-11 08:44:30 -07:00
|
|
|
free_irq(irq, ctx->trigger);
|
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory
Accessing the disabled memory space of a PCI device would typically
result in a master abort response on conventional PCI, or an
unsupported request on PCI express. The user would generally see
these as a -1 response for the read return data and the write would be
silently discarded, possibly with an uncorrected, non-fatal AER error
triggered on the host. Some systems however take it upon themselves
to bring down the entire system when they see something that might
indicate a loss of data, such as this discarded write to a disabled
memory space.
To avoid this, we want to try to block the user from accessing memory
spaces while they're disabled. We start with a semaphore around the
memory enable bit, where writers modify the memory enable state and
must be serialized, while readers make use of the memory region and
can access in parallel. Writers include both direct manipulation via
the command register, as well as any reset path where the internal
mechanics of the reset may both explicitly and implicitly disable
memory access, and manipulation of the MSI-X configuration, where the
MSI-X vector table resides in MMIO space of the device. Readers
include the read and write file ops to access the vfio device fd
offsets as well as memory mapped access. In the latter case, we make
use of our new vma list support to zap, or invalidate, those memory
mappings in order to force them to be faulted back in on access.
Our semaphore usage will stall user access to MMIO spaces across
internal operations like reset, but the user might experience new
behavior when trying to access the MMIO space while disabled via the
PCI command register. Access via read or write while disabled will
return -EIO and access via memory maps will result in a SIGBUS. This
is expected to be compatible with known use cases and potentially
provides better error handling capabilities than present in the
hardware, while avoiding the more readily accessible and severe
platform error responses that might otherwise occur.
Fixes: CVE-2020-12888
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
|
|
|
vfio_pci_memory_unlock_and_restore(vdev, cmd);
|
2023-05-11 08:44:37 -07:00
|
|
|
/* Interrupt stays allocated, will be freed at MSI-X disable. */
|
2023-05-11 08:44:30 -07:00
|
|
|
kfree(ctx->name);
|
|
|
|
eventfd_ctx_put(ctx->trigger);
|
2023-05-11 08:44:32 -07:00
|
|
|
vfio_irq_ctx_free(vdev, ctx, vector);
|
2012-07-31 08:16:24 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
if (fd < 0)
|
|
|
|
return 0;
|
|
|
|
|
2023-05-11 08:44:37 -07:00
|
|
|
if (irq == -EINVAL) {
|
|
|
|
/* Interrupt stays allocated, will be freed at MSI-X disable. */
|
|
|
|
irq = vfio_msi_alloc_irq(vdev, vector, msix);
|
|
|
|
if (irq < 0)
|
|
|
|
return irq;
|
|
|
|
}
|
|
|
|
|
2023-05-11 08:44:32 -07:00
|
|
|
ctx = vfio_irq_ctx_alloc(vdev, vector);
|
|
|
|
if (!ctx)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2023-05-11 08:44:30 -07:00
|
|
|
ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-msi%s[%d](%s)",
|
|
|
|
msix ? "x" : "", vector, pci_name(pdev));
|
2023-05-11 08:44:32 -07:00
|
|
|
if (!ctx->name) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out_free_ctx;
|
|
|
|
}
|
2012-07-31 08:16:24 -06:00
|
|
|
|
|
|
|
trigger = eventfd_ctx_fdget(fd);
|
|
|
|
if (IS_ERR(trigger)) {
|
2023-05-11 08:44:31 -07:00
|
|
|
ret = PTR_ERR(trigger);
|
|
|
|
goto out_free_name;
|
2012-07-31 08:16:24 -06:00
|
|
|
}
|
|
|
|
|
2014-09-29 10:16:24 -06:00
|
|
|
/*
|
2023-05-11 08:44:34 -07:00
|
|
|
* If the vector was previously allocated, refresh the on-device
|
|
|
|
* message data before enabling in case it had been cleared or
|
|
|
|
* corrupted (e.g. due to backdoor resets) since writing.
|
2014-09-29 10:16:24 -06:00
|
|
|
*/
|
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory
Accessing the disabled memory space of a PCI device would typically
result in a master abort response on conventional PCI, or an
unsupported request on PCI express. The user would generally see
these as a -1 response for the read return data and the write would be
silently discarded, possibly with an uncorrected, non-fatal AER error
triggered on the host. Some systems however take it upon themselves
to bring down the entire system when they see something that might
indicate a loss of data, such as this discarded write to a disabled
memory space.
To avoid this, we want to try to block the user from accessing memory
spaces while they're disabled. We start with a semaphore around the
memory enable bit, where writers modify the memory enable state and
must be serialized, while readers make use of the memory region and
can access in parallel. Writers include both direct manipulation via
the command register, as well as any reset path where the internal
mechanics of the reset may both explicitly and implicitly disable
memory access, and manipulation of the MSI-X configuration, where the
MSI-X vector table resides in MMIO space of the device. Readers
include the read and write file ops to access the vfio device fd
offsets as well as memory mapped access. In the latter case, we make
use of our new vma list support to zap, or invalidate, those memory
mappings in order to force them to be faulted back in on access.
Our semaphore usage will stall user access to MMIO spaces across
internal operations like reset, but the user might experience new
behavior when trying to access the MMIO space while disabled via the
PCI command register. Access via read or write while disabled will
return -EIO and access via memory maps will result in a SIGBUS. This
is expected to be compatible with known use cases and potentially
provides better error handling capabilities than present in the
hardware, while avoiding the more readily accessible and severe
platform error responses that might otherwise occur.
Fixes: CVE-2020-12888
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
|
|
|
cmd = vfio_pci_memory_lock_and_enable(vdev);
|
2014-09-29 10:16:24 -06:00
|
|
|
if (msix) {
|
|
|
|
struct msi_msg msg;
|
|
|
|
|
|
|
|
get_cached_msi_msg(irq, &msg);
|
2014-11-09 23:10:34 +08:00
|
|
|
pci_write_msi_msg(irq, &msg);
|
2014-09-29 10:16:24 -06:00
|
|
|
}
|
|
|
|
|
2023-05-11 08:44:30 -07:00
|
|
|
ret = request_irq(irq, vfio_msihandler, 0, ctx->name, trigger);
|
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory
Accessing the disabled memory space of a PCI device would typically
result in a master abort response on conventional PCI, or an
unsupported request on PCI express. The user would generally see
these as a -1 response for the read return data and the write would be
silently discarded, possibly with an uncorrected, non-fatal AER error
triggered on the host. Some systems however take it upon themselves
to bring down the entire system when they see something that might
indicate a loss of data, such as this discarded write to a disabled
memory space.
To avoid this, we want to try to block the user from accessing memory
spaces while they're disabled. We start with a semaphore around the
memory enable bit, where writers modify the memory enable state and
must be serialized, while readers make use of the memory region and
can access in parallel. Writers include both direct manipulation via
the command register, as well as any reset path where the internal
mechanics of the reset may both explicitly and implicitly disable
memory access, and manipulation of the MSI-X configuration, where the
MSI-X vector table resides in MMIO space of the device. Readers
include the read and write file ops to access the vfio device fd
offsets as well as memory mapped access. In the latter case, we make
use of our new vma list support to zap, or invalidate, those memory
mappings in order to force them to be faulted back in on access.
Our semaphore usage will stall user access to MMIO spaces across
internal operations like reset, but the user might experience new
behavior when trying to access the MMIO space while disabled via the
PCI command register. Access via read or write while disabled will
return -EIO and access via memory maps will result in a SIGBUS. This
is expected to be compatible with known use cases and potentially
provides better error handling capabilities than present in the
hardware, while avoiding the more readily accessible and severe
platform error responses that might otherwise occur.
Fixes: CVE-2020-12888
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
|
|
|
vfio_pci_memory_unlock_and_restore(vdev, cmd);
|
2023-05-11 08:44:31 -07:00
|
|
|
if (ret)
|
|
|
|
goto out_put_eventfd_ctx;
|
2012-07-31 08:16:24 -06:00
|
|
|
|
2023-05-11 08:44:30 -07:00
|
|
|
ctx->producer.token = trigger;
|
|
|
|
ctx->producer.irq = irq;
|
|
|
|
ret = irq_bypass_register_producer(&ctx->producer);
|
2020-10-19 07:13:55 -06:00
|
|
|
if (unlikely(ret)) {
|
2015-09-18 22:29:50 +08:00
|
|
|
dev_info(&pdev->dev,
|
|
|
|
"irq bypass producer (token %p) registration fails: %d\n",
|
2023-05-11 08:44:30 -07:00
|
|
|
ctx->producer.token, ret);
|
2015-09-18 22:29:50 +08:00
|
|
|
|
2023-05-11 08:44:30 -07:00
|
|
|
ctx->producer.token = NULL;
|
2020-10-19 07:13:55 -06:00
|
|
|
}
|
2023-05-11 08:44:30 -07:00
|
|
|
ctx->trigger = trigger;
|
2012-07-31 08:16:24 -06:00
|
|
|
|
|
|
|
return 0;
|
2023-05-11 08:44:31 -07:00
|
|
|
|
|
|
|
out_put_eventfd_ctx:
|
|
|
|
eventfd_ctx_put(trigger);
|
|
|
|
out_free_name:
|
|
|
|
kfree(ctx->name);
|
2023-05-11 08:44:32 -07:00
|
|
|
out_free_ctx:
|
|
|
|
vfio_irq_ctx_free(vdev, ctx, vector);
|
2023-05-11 08:44:31 -07:00
|
|
|
return ret;
|
2012-07-31 08:16:24 -06:00
|
|
|
}
|
|
|
|
|
2021-08-26 13:39:02 +03:00
|
|
|
static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
|
2012-07-31 08:16:24 -06:00
|
|
|
unsigned count, int32_t *fds, bool msix)
|
|
|
|
{
|
2023-05-11 08:44:29 -07:00
|
|
|
unsigned int i, j;
|
|
|
|
int ret = 0;
|
2012-07-31 08:16:24 -06:00
|
|
|
|
|
|
|
for (i = 0, j = start; i < count && !ret; i++, j++) {
|
|
|
|
int fd = fds ? fds[i] : -1;
|
|
|
|
ret = vfio_msi_set_vector_signal(vdev, j, fd, msix);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret) {
|
2023-05-11 08:44:29 -07:00
|
|
|
for (i = start; i < j; i++)
|
|
|
|
vfio_msi_set_vector_signal(vdev, i, -1, msix);
|
2012-07-31 08:16:24 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2021-08-26 13:39:02 +03:00
|
|
|
static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
|
2012-07-31 08:16:24 -06:00
|
|
|
{
|
|
|
|
struct pci_dev *pdev = vdev->pdev;
|
2023-05-11 08:44:30 -07:00
|
|
|
struct vfio_pci_irq_ctx *ctx;
|
2023-05-11 08:44:32 -07:00
|
|
|
unsigned long i;
|
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory
Accessing the disabled memory space of a PCI device would typically
result in a master abort response on conventional PCI, or an
unsupported request on PCI express. The user would generally see
these as a -1 response for the read return data and the write would be
silently discarded, possibly with an uncorrected, non-fatal AER error
triggered on the host. Some systems however take it upon themselves
to bring down the entire system when they see something that might
indicate a loss of data, such as this discarded write to a disabled
memory space.
To avoid this, we want to try to block the user from accessing memory
spaces while they're disabled. We start with a semaphore around the
memory enable bit, where writers modify the memory enable state and
must be serialized, while readers make use of the memory region and
can access in parallel. Writers include both direct manipulation via
the command register, as well as any reset path where the internal
mechanics of the reset may both explicitly and implicitly disable
memory access, and manipulation of the MSI-X configuration, where the
MSI-X vector table resides in MMIO space of the device. Readers
include the read and write file ops to access the vfio device fd
offsets as well as memory mapped access. In the latter case, we make
use of our new vma list support to zap, or invalidate, those memory
mappings in order to force them to be faulted back in on access.
Our semaphore usage will stall user access to MMIO spaces across
internal operations like reset, but the user might experience new
behavior when trying to access the MMIO space while disabled via the
PCI command register. Access via read or write while disabled will
return -EIO and access via memory maps will result in a SIGBUS. This
is expected to be compatible with known use cases and potentially
provides better error handling capabilities than present in the
hardware, while avoiding the more readily accessible and severe
platform error responses that might otherwise occur.
Fixes: CVE-2020-12888
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
|
|
|
u16 cmd;
|
2012-07-31 08:16:24 -06:00
|
|
|
|
2023-05-11 08:44:32 -07:00
|
|
|
xa_for_each(&vdev->ctx, i, ctx) {
|
|
|
|
vfio_virqfd_disable(&ctx->unmask);
|
|
|
|
vfio_virqfd_disable(&ctx->mask);
|
|
|
|
vfio_msi_set_vector_signal(vdev, i, -1, msix);
|
2012-07-31 08:16:24 -06:00
|
|
|
}
|
|
|
|
|
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory
Accessing the disabled memory space of a PCI device would typically
result in a master abort response on conventional PCI, or an
unsupported request on PCI express. The user would generally see
these as a -1 response for the read return data and the write would be
silently discarded, possibly with an uncorrected, non-fatal AER error
triggered on the host. Some systems however take it upon themselves
to bring down the entire system when they see something that might
indicate a loss of data, such as this discarded write to a disabled
memory space.
To avoid this, we want to try to block the user from accessing memory
spaces while they're disabled. We start with a semaphore around the
memory enable bit, where writers modify the memory enable state and
must be serialized, while readers make use of the memory region and
can access in parallel. Writers include both direct manipulation via
the command register, as well as any reset path where the internal
mechanics of the reset may both explicitly and implicitly disable
memory access, and manipulation of the MSI-X configuration, where the
MSI-X vector table resides in MMIO space of the device. Readers
include the read and write file ops to access the vfio device fd
offsets as well as memory mapped access. In the latter case, we make
use of our new vma list support to zap, or invalidate, those memory
mappings in order to force them to be faulted back in on access.
Our semaphore usage will stall user access to MMIO spaces across
internal operations like reset, but the user might experience new
behavior when trying to access the MMIO space while disabled via the
PCI command register. Access via read or write while disabled will
return -EIO and access via memory maps will result in a SIGBUS. This
is expected to be compatible with known use cases and potentially
provides better error handling capabilities than present in the
hardware, while avoiding the more readily accessible and severe
platform error responses that might otherwise occur.
Fixes: CVE-2020-12888
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
|
|
|
cmd = vfio_pci_memory_lock_and_enable(vdev);
|
2016-09-11 15:31:26 +02:00
|
|
|
pci_free_irq_vectors(pdev);
|
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory
Accessing the disabled memory space of a PCI device would typically
result in a master abort response on conventional PCI, or an
unsupported request on PCI express. The user would generally see
these as a -1 response for the read return data and the write would be
silently discarded, possibly with an uncorrected, non-fatal AER error
triggered on the host. Some systems however take it upon themselves
to bring down the entire system when they see something that might
indicate a loss of data, such as this discarded write to a disabled
memory space.
To avoid this, we want to try to block the user from accessing memory
spaces while they're disabled. We start with a semaphore around the
memory enable bit, where writers modify the memory enable state and
must be serialized, while readers make use of the memory region and
can access in parallel. Writers include both direct manipulation via
the command register, as well as any reset path where the internal
mechanics of the reset may both explicitly and implicitly disable
memory access, and manipulation of the MSI-X configuration, where the
MSI-X vector table resides in MMIO space of the device. Readers
include the read and write file ops to access the vfio device fd
offsets as well as memory mapped access. In the latter case, we make
use of our new vma list support to zap, or invalidate, those memory
mappings in order to force them to be faulted back in on access.
Our semaphore usage will stall user access to MMIO spaces across
internal operations like reset, but the user might experience new
behavior when trying to access the MMIO space while disabled via the
PCI command register. Access via read or write while disabled will
return -EIO and access via memory maps will result in a SIGBUS. This
is expected to be compatible with known use cases and potentially
provides better error handling capabilities than present in the
hardware, while avoiding the more readily accessible and severe
platform error responses that might otherwise occur.
Fixes: CVE-2020-12888
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
|
|
|
vfio_pci_memory_unlock_and_restore(vdev, cmd);
|
2012-07-31 08:16:24 -06:00
|
|
|
|
2016-09-26 13:52:19 -06:00
|
|
|
/*
|
|
|
|
* Both disable paths above use pci_intx_for_msi() to clear DisINTx
|
|
|
|
* via their shutdown paths. Restore for NoINTx devices.
|
|
|
|
*/
|
|
|
|
if (vdev->nointx)
|
|
|
|
pci_intx(pdev, 0);
|
|
|
|
|
2012-07-31 08:16:24 -06:00
|
|
|
vdev->irq_type = VFIO_PCI_NUM_IRQS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* IOCTL support
|
|
|
|
*/
|
2021-08-26 13:39:02 +03:00
|
|
|
static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev,
|
2012-07-31 08:16:24 -06:00
|
|
|
unsigned index, unsigned start,
|
|
|
|
unsigned count, uint32_t flags, void *data)
|
|
|
|
{
|
|
|
|
if (!is_intx(vdev) || start != 0 || count != 1)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (flags & VFIO_IRQ_SET_DATA_NONE) {
|
2024-03-08 16:05:23 -07:00
|
|
|
__vfio_pci_intx_unmask(vdev);
|
2012-07-31 08:16:24 -06:00
|
|
|
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
|
|
|
|
uint8_t unmask = *(uint8_t *)data;
|
|
|
|
if (unmask)
|
2024-03-08 16:05:23 -07:00
|
|
|
__vfio_pci_intx_unmask(vdev);
|
2012-07-31 08:16:24 -06:00
|
|
|
} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
|
2023-05-11 08:44:30 -07:00
|
|
|
struct vfio_pci_irq_ctx *ctx = vfio_irq_ctx_get(vdev, 0);
|
2012-07-31 08:16:24 -06:00
|
|
|
int32_t fd = *(int32_t *)data;
|
2023-05-11 08:44:30 -07:00
|
|
|
|
|
|
|
if (WARN_ON_ONCE(!ctx))
|
|
|
|
return -EINVAL;
|
2012-07-31 08:16:24 -06:00
|
|
|
if (fd >= 0)
|
2015-03-16 14:08:53 -06:00
|
|
|
return vfio_virqfd_enable((void *) vdev,
|
2015-03-16 14:08:51 -06:00
|
|
|
vfio_pci_intx_unmask_handler,
|
|
|
|
vfio_send_intx_eventfd, NULL,
|
2023-05-11 08:44:30 -07:00
|
|
|
&ctx->unmask, fd);
|
2012-07-31 08:16:24 -06:00
|
|
|
|
2023-05-11 08:44:30 -07:00
|
|
|
vfio_virqfd_disable(&ctx->unmask);
|
2012-07-31 08:16:24 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-08-26 13:39:02 +03:00
|
|
|
static int vfio_pci_set_intx_mask(struct vfio_pci_core_device *vdev,
|
2012-07-31 08:16:24 -06:00
|
|
|
unsigned index, unsigned start,
|
|
|
|
unsigned count, uint32_t flags, void *data)
|
|
|
|
{
|
|
|
|
if (!is_intx(vdev) || start != 0 || count != 1)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (flags & VFIO_IRQ_SET_DATA_NONE) {
|
2024-03-08 16:05:23 -07:00
|
|
|
__vfio_pci_intx_mask(vdev);
|
2012-07-31 08:16:24 -06:00
|
|
|
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
|
|
|
|
uint8_t mask = *(uint8_t *)data;
|
|
|
|
if (mask)
|
2024-03-08 16:05:23 -07:00
|
|
|
__vfio_pci_intx_mask(vdev);
|
2012-07-31 08:16:24 -06:00
|
|
|
} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
|
|
|
|
return -ENOTTY; /* XXX implement me */
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-08-26 13:39:02 +03:00
|
|
|
static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev,
|
2012-07-31 08:16:24 -06:00
|
|
|
unsigned index, unsigned start,
|
|
|
|
unsigned count, uint32_t flags, void *data)
|
|
|
|
{
|
|
|
|
if (is_intx(vdev) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
|
|
|
|
vfio_intx_disable(vdev);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(is_intx(vdev) || is_irq_none(vdev)) || start != 0 || count != 1)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
|
2024-03-08 16:05:25 -07:00
|
|
|
struct eventfd_ctx *trigger = NULL;
|
2012-07-31 08:16:24 -06:00
|
|
|
int32_t fd = *(int32_t *)data;
|
|
|
|
int ret;
|
|
|
|
|
2024-03-08 16:05:25 -07:00
|
|
|
if (fd >= 0) {
|
|
|
|
trigger = eventfd_ctx_fdget(fd);
|
|
|
|
if (IS_ERR(trigger))
|
|
|
|
return PTR_ERR(trigger);
|
|
|
|
}
|
2012-07-31 08:16:24 -06:00
|
|
|
|
2024-03-08 16:05:25 -07:00
|
|
|
if (is_intx(vdev))
|
|
|
|
ret = vfio_intx_set_signal(vdev, trigger);
|
|
|
|
else
|
|
|
|
ret = vfio_intx_enable(vdev, trigger);
|
2012-07-31 08:16:24 -06:00
|
|
|
|
2024-03-08 16:05:25 -07:00
|
|
|
if (ret && trigger)
|
|
|
|
eventfd_ctx_put(trigger);
|
2012-07-31 08:16:24 -06:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!is_intx(vdev))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (flags & VFIO_IRQ_SET_DATA_NONE) {
|
|
|
|
vfio_send_intx_eventfd(vdev, NULL);
|
|
|
|
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
|
|
|
|
uint8_t trigger = *(uint8_t *)data;
|
|
|
|
if (trigger)
|
|
|
|
vfio_send_intx_eventfd(vdev, NULL);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-08-26 13:39:02 +03:00
|
|
|
static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
|
2012-07-31 08:16:24 -06:00
|
|
|
unsigned index, unsigned start,
|
|
|
|
unsigned count, uint32_t flags, void *data)
|
|
|
|
{
|
2023-05-11 08:44:30 -07:00
|
|
|
struct vfio_pci_irq_ctx *ctx;
|
2023-05-11 08:44:29 -07:00
|
|
|
unsigned int i;
|
2012-07-31 08:16:24 -06:00
|
|
|
bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
|
|
|
|
|
|
|
|
if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
|
|
|
|
vfio_msi_disable(vdev, msix);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(irq_is(vdev, index) || is_irq_none(vdev)))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
|
|
|
|
int32_t *fds = data;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (vdev->irq_type == index)
|
|
|
|
return vfio_msi_set_block(vdev, start, count,
|
|
|
|
fds, msix);
|
|
|
|
|
|
|
|
ret = vfio_msi_enable(vdev, start + count, msix);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
ret = vfio_msi_set_block(vdev, start, count, fds, msix);
|
|
|
|
if (ret)
|
|
|
|
vfio_msi_disable(vdev, msix);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2023-05-11 08:44:33 -07:00
|
|
|
if (!irq_is(vdev, index))
|
2012-07-31 08:16:24 -06:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
for (i = start; i < start + count; i++) {
|
2023-05-11 08:44:30 -07:00
|
|
|
ctx = vfio_irq_ctx_get(vdev, i);
|
2023-05-11 08:44:32 -07:00
|
|
|
if (!ctx)
|
2012-07-31 08:16:24 -06:00
|
|
|
continue;
|
|
|
|
if (flags & VFIO_IRQ_SET_DATA_NONE) {
|
2023-11-22 13:48:23 +01:00
|
|
|
eventfd_signal(ctx->trigger);
|
2012-07-31 08:16:24 -06:00
|
|
|
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
|
|
|
|
uint8_t *bools = data;
|
|
|
|
if (bools[i - start])
|
2023-11-22 13:48:23 +01:00
|
|
|
eventfd_signal(ctx->trigger);
|
2012-07-31 08:16:24 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-02-06 15:05:07 -07:00
|
|
|
static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
|
2016-08-08 16:16:23 -06:00
|
|
|
unsigned int count, uint32_t flags,
|
|
|
|
void *data)
|
2013-03-11 09:31:22 -06:00
|
|
|
{
|
|
|
|
/* DATA_NONE/DATA_BOOL enables loopback testing */
|
|
|
|
if (flags & VFIO_IRQ_SET_DATA_NONE) {
|
2016-08-08 16:16:23 -06:00
|
|
|
if (*ctx) {
|
|
|
|
if (count) {
|
2023-11-22 13:48:23 +01:00
|
|
|
eventfd_signal(*ctx);
|
2016-08-08 16:16:23 -06:00
|
|
|
} else {
|
|
|
|
eventfd_ctx_put(*ctx);
|
|
|
|
*ctx = NULL;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2013-03-11 09:31:22 -06:00
|
|
|
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
|
2016-08-08 16:16:23 -06:00
|
|
|
uint8_t trigger;
|
|
|
|
|
|
|
|
if (!count)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
trigger = *(uint8_t *)data;
|
2015-02-06 15:05:07 -07:00
|
|
|
if (trigger && *ctx)
|
2023-11-22 13:48:23 +01:00
|
|
|
eventfd_signal(*ctx);
|
2013-03-11 09:31:22 -06:00
|
|
|
|
|
|
|
return 0;
|
2016-08-08 16:16:23 -06:00
|
|
|
} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
|
|
|
|
int32_t fd;
|
|
|
|
|
|
|
|
if (!count)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
fd = *(int32_t *)data;
|
|
|
|
if (fd == -1) {
|
|
|
|
if (*ctx)
|
|
|
|
eventfd_ctx_put(*ctx);
|
|
|
|
*ctx = NULL;
|
|
|
|
} else if (fd >= 0) {
|
|
|
|
struct eventfd_ctx *efdctx;
|
|
|
|
|
|
|
|
efdctx = eventfd_ctx_fdget(fd);
|
|
|
|
if (IS_ERR(efdctx))
|
|
|
|
return PTR_ERR(efdctx);
|
|
|
|
|
|
|
|
if (*ctx)
|
|
|
|
eventfd_ctx_put(*ctx);
|
|
|
|
|
|
|
|
*ctx = efdctx;
|
|
|
|
}
|
2013-03-11 09:31:22 -06:00
|
|
|
return 0;
|
2016-08-08 16:16:23 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
return -EINVAL;
|
2013-03-11 09:31:22 -06:00
|
|
|
}
|
2015-02-06 15:05:07 -07:00
|
|
|
|
2021-08-26 13:39:02 +03:00
|
|
|
static int vfio_pci_set_err_trigger(struct vfio_pci_core_device *vdev,
|
2015-02-06 15:05:07 -07:00
|
|
|
unsigned index, unsigned start,
|
|
|
|
unsigned count, uint32_t flags, void *data)
|
|
|
|
{
|
2016-08-08 16:16:23 -06:00
|
|
|
if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1)
|
2015-02-06 15:05:07 -07:00
|
|
|
return -EINVAL;
|
|
|
|
|
2016-08-08 16:16:23 -06:00
|
|
|
return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger,
|
|
|
|
count, flags, data);
|
2015-02-06 15:05:07 -07:00
|
|
|
}
|
|
|
|
|
2021-08-26 13:39:02 +03:00
|
|
|
static int vfio_pci_set_req_trigger(struct vfio_pci_core_device *vdev,
|
2015-02-06 15:05:08 -07:00
|
|
|
unsigned index, unsigned start,
|
|
|
|
unsigned count, uint32_t flags, void *data)
|
|
|
|
{
|
2016-08-08 16:16:23 -06:00
|
|
|
if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1)
|
2015-02-06 15:05:08 -07:00
|
|
|
return -EINVAL;
|
|
|
|
|
2016-08-08 16:16:23 -06:00
|
|
|
return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger,
|
|
|
|
count, flags, data);
|
2015-02-06 15:05:08 -07:00
|
|
|
}
|
|
|
|
|
2021-08-26 13:39:02 +03:00
|
|
|
int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, uint32_t flags,
|
2012-07-31 08:16:24 -06:00
|
|
|
unsigned index, unsigned start, unsigned count,
|
|
|
|
void *data)
|
|
|
|
{
|
2021-08-26 13:39:02 +03:00
|
|
|
int (*func)(struct vfio_pci_core_device *vdev, unsigned index,
|
2012-07-31 08:16:24 -06:00
|
|
|
unsigned start, unsigned count, uint32_t flags,
|
|
|
|
void *data) = NULL;
|
|
|
|
|
|
|
|
switch (index) {
|
|
|
|
case VFIO_PCI_INTX_IRQ_INDEX:
|
|
|
|
switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
|
|
|
|
case VFIO_IRQ_SET_ACTION_MASK:
|
|
|
|
func = vfio_pci_set_intx_mask;
|
|
|
|
break;
|
|
|
|
case VFIO_IRQ_SET_ACTION_UNMASK:
|
|
|
|
func = vfio_pci_set_intx_unmask;
|
|
|
|
break;
|
|
|
|
case VFIO_IRQ_SET_ACTION_TRIGGER:
|
|
|
|
func = vfio_pci_set_intx_trigger;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case VFIO_PCI_MSI_IRQ_INDEX:
|
|
|
|
case VFIO_PCI_MSIX_IRQ_INDEX:
|
|
|
|
switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
|
|
|
|
case VFIO_IRQ_SET_ACTION_MASK:
|
|
|
|
case VFIO_IRQ_SET_ACTION_UNMASK:
|
|
|
|
/* XXX Need masking support exported */
|
|
|
|
break;
|
|
|
|
case VFIO_IRQ_SET_ACTION_TRIGGER:
|
|
|
|
func = vfio_pci_set_msi_trigger;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2013-03-11 09:31:22 -06:00
|
|
|
case VFIO_PCI_ERR_IRQ_INDEX:
|
|
|
|
switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
|
|
|
|
case VFIO_IRQ_SET_ACTION_TRIGGER:
|
|
|
|
if (pci_is_pcie(vdev->pdev))
|
|
|
|
func = vfio_pci_set_err_trigger;
|
|
|
|
break;
|
|
|
|
}
|
2015-03-12 14:43:12 +11:00
|
|
|
break;
|
2015-02-06 15:05:08 -07:00
|
|
|
case VFIO_PCI_REQ_IRQ_INDEX:
|
|
|
|
switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
|
|
|
|
case VFIO_IRQ_SET_ACTION_TRIGGER:
|
|
|
|
func = vfio_pci_set_req_trigger;
|
|
|
|
break;
|
|
|
|
}
|
2015-03-12 14:43:12 +11:00
|
|
|
break;
|
2012-07-31 08:16:24 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!func)
|
|
|
|
return -ENOTTY;
|
|
|
|
|
|
|
|
return func(vdev, index, start, count, flags, data);
|
|
|
|
}
|