PCI: Expose PCIe Resizable BAR support via sysfs

Add a simple sysfs interface to Resizable BAR support, largely for the
purposes of assigning such devices to a VM through VFIO.  Resizable BARs
present a difficult feature to expose to a VM through emulation, as
resizing a BAR is done on the host.  It can fail, and often does, but we
have no means via emulation of a PCIe REBAR capability to handle the error
cases.

A vfio-pci specific ioctl interface is also cumbersome as there are often
multiple devices within the same bridge aperture and handling them is a
challenge.  In the interface proposed here, expanding a BAR potentially
requires such devices to be soft-removed during the resize operation and
rescanned after, in order for all the necessary resources to be released.
A pci-sysfs interface is also more universal than a vfio specific
interface.

Please see the ABI documentation update for usage.

Link: https://lore.kernel.org/r/166336088796.3597940.14973499936692558556.stgit@omen
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Cc: Krzysztof Wilczyński <kw@linux.com>
This commit is contained in:
Alex Williamson 2022-09-16 14:44:48 -06:00 committed by Bjorn Helgaas
parent 568035b01c
commit 91fa127794
2 changed files with 141 additions and 0 deletions

View File

@ -457,3 +457,36 @@ Description:
The file is writable if the PF is bound to a driver that The file is writable if the PF is bound to a driver that
implements ->sriov_set_msix_vec_count(). implements ->sriov_set_msix_vec_count().
What: /sys/bus/pci/devices/.../resourceN_resize
Date: September 2022
Contact: Alex Williamson <alex.williamson@redhat.com>
Description:
These files provide an interface to PCIe Resizable BAR support.
A file is created for each BAR resource (N) supported by the
PCIe Resizable BAR extended capability of the device. Reading
each file exposes the bitmap of available resource sizes:
# cat resource1_resize
00000000000001c0
The bitmap represents supported resource sizes for the BAR,
where bit0 = 1MB, bit1 = 2MB, bit2 = 4MB, etc. In the above
example the device supports 64MB, 128MB, and 256MB BAR sizes.
When writing the file, the user provides the bit position of
the desired resource size, for example:
# echo 7 > resource1_resize
This indicates to set the size value corresponding to bit 7,
128MB. The resulting size is 2 ^ (bit# + 20). This definition
matches the PCIe specification of this capability.
In order to make use of resource resizing, all PCI drivers must
be unbound from the device and peer devices under the same
parent bridge may need to be soft removed. In the case of
VGA devices, writing a resize value will remove low level
console drivers from the device. Raw users of pci-sysfs
resourceN attributes must be terminated prior to resizing.
Success of the resizing operation is not guaranteed.

View File

@ -28,6 +28,7 @@
#include <linux/pm_runtime.h> #include <linux/pm_runtime.h>
#include <linux/msi.h> #include <linux/msi.h>
#include <linux/of.h> #include <linux/of.h>
#include <linux/aperture.h>
#include "pci.h" #include "pci.h"
static int sysfs_initialized; /* = 0 */ static int sysfs_initialized; /* = 0 */
@ -1373,6 +1374,112 @@ static const struct attribute_group pci_dev_reset_attr_group = {
.is_visible = pci_dev_reset_attr_is_visible, .is_visible = pci_dev_reset_attr_is_visible,
}; };
#define pci_dev_resource_resize_attr(n) \
static ssize_t resource##n##_resize_show(struct device *dev, \
struct device_attribute *attr, \
char * buf) \
{ \
struct pci_dev *pdev = to_pci_dev(dev); \
ssize_t ret; \
\
pci_config_pm_runtime_get(pdev); \
\
ret = sysfs_emit(buf, "%016llx\n", \
(u64)pci_rebar_get_possible_sizes(pdev, n)); \
\
pci_config_pm_runtime_put(pdev); \
\
return ret; \
} \
\
static ssize_t resource##n##_resize_store(struct device *dev, \
struct device_attribute *attr,\
const char *buf, size_t count)\
{ \
struct pci_dev *pdev = to_pci_dev(dev); \
unsigned long size, flags; \
int ret, i; \
u16 cmd; \
\
if (kstrtoul(buf, 0, &size) < 0) \
return -EINVAL; \
\
device_lock(dev); \
if (dev->driver) { \
ret = -EBUSY; \
goto unlock; \
} \
\
pci_config_pm_runtime_get(pdev); \
\
if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) { \
ret = aperture_remove_conflicting_pci_devices(pdev, \
"resourceN_resize"); \
if (ret) \
goto pm_put; \
} \
\
pci_read_config_word(pdev, PCI_COMMAND, &cmd); \
pci_write_config_word(pdev, PCI_COMMAND, \
cmd & ~PCI_COMMAND_MEMORY); \
\
flags = pci_resource_flags(pdev, n); \
\
pci_remove_resource_files(pdev); \
\
for (i = 0; i < PCI_STD_NUM_BARS; i++) { \
if (pci_resource_len(pdev, i) && \
pci_resource_flags(pdev, i) == flags) \
pci_release_resource(pdev, i); \
} \
\
ret = pci_resize_resource(pdev, n, size); \
\
pci_assign_unassigned_bus_resources(pdev->bus); \
\
if (pci_create_resource_files(pdev)) \
pci_warn(pdev, "Failed to recreate resource files after BAR resizing\n");\
\
pci_write_config_word(pdev, PCI_COMMAND, cmd); \
pm_put: \
pci_config_pm_runtime_put(pdev); \
unlock: \
device_unlock(dev); \
\
return ret ? ret : count; \
} \
static DEVICE_ATTR_RW(resource##n##_resize)
pci_dev_resource_resize_attr(0);
pci_dev_resource_resize_attr(1);
pci_dev_resource_resize_attr(2);
pci_dev_resource_resize_attr(3);
pci_dev_resource_resize_attr(4);
pci_dev_resource_resize_attr(5);
static struct attribute *resource_resize_attrs[] = {
&dev_attr_resource0_resize.attr,
&dev_attr_resource1_resize.attr,
&dev_attr_resource2_resize.attr,
&dev_attr_resource3_resize.attr,
&dev_attr_resource4_resize.attr,
&dev_attr_resource5_resize.attr,
NULL,
};
static umode_t resource_resize_is_visible(struct kobject *kobj,
struct attribute *a, int n)
{
struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
return pci_rebar_get_current_size(pdev, n) < 0 ? 0 : a->mode;
}
static const struct attribute_group pci_dev_resource_resize_group = {
.attrs = resource_resize_attrs,
.is_visible = resource_resize_is_visible,
};
int __must_check pci_create_sysfs_dev_files(struct pci_dev *pdev) int __must_check pci_create_sysfs_dev_files(struct pci_dev *pdev)
{ {
if (!sysfs_initialized) if (!sysfs_initialized)
@ -1494,6 +1601,7 @@ const struct attribute_group *pci_dev_groups[] = {
#ifdef CONFIG_ACPI #ifdef CONFIG_ACPI
&pci_dev_acpi_attr_group, &pci_dev_acpi_attr_group,
#endif #endif
&pci_dev_resource_resize_group,
NULL, NULL,
}; };