From 9dcf01d95721261844d8c07c142efc143f7d38e3 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 18 May 2021 22:21:32 +0300 Subject: [PATCH 01/19] vfio: centralize module refcount in subsystem layer Remove code duplication and move module refcounting to the subsystem module. Signed-off-by: Max Gurtovoy Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20210518192133.59195-2-mgurtovoy@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/fsl-mc/vfio_fsl_mc.c | 16 +++------------- drivers/vfio/mdev/vfio_mdev.c | 13 +------------ drivers/vfio/pci/vfio_pci.c | 7 ------- drivers/vfio/platform/vfio_platform_common.c | 6 ------ drivers/vfio/vfio.c | 10 ++++++++++ 5 files changed, 14 insertions(+), 38 deletions(-) diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index 980e59551301..90cad109583b 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -140,26 +140,18 @@ static int vfio_fsl_mc_open(struct vfio_device *core_vdev) { struct vfio_fsl_mc_device *vdev = container_of(core_vdev, struct vfio_fsl_mc_device, vdev); - int ret; - - if (!try_module_get(THIS_MODULE)) - return -ENODEV; + int ret = 0; mutex_lock(&vdev->reflck->lock); if (!vdev->refcnt) { ret = vfio_fsl_mc_regions_init(vdev); if (ret) - goto err_reg_init; + goto out; } vdev->refcnt++; - +out: mutex_unlock(&vdev->reflck->lock); - return 0; - -err_reg_init: - mutex_unlock(&vdev->reflck->lock); - module_put(THIS_MODULE); return ret; } @@ -196,8 +188,6 @@ static void vfio_fsl_mc_release(struct vfio_device *core_vdev) } mutex_unlock(&vdev->reflck->lock); - - module_put(THIS_MODULE); } static long vfio_fsl_mc_ioctl(struct vfio_device *core_vdev, diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c index 922729071c5a..5ef4815609ed 100644 --- a/drivers/vfio/mdev/vfio_mdev.c +++ b/drivers/vfio/mdev/vfio_mdev.c @@ -26,19 +26,10 @@ static int vfio_mdev_open(struct vfio_device *core_vdev) struct mdev_device *mdev = to_mdev_device(core_vdev->dev); struct mdev_parent *parent = mdev->type->parent; - int ret; - if (unlikely(!parent->ops->open)) return -EINVAL; - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - - ret = parent->ops->open(mdev); - if (ret) - module_put(THIS_MODULE); - - return ret; + return parent->ops->open(mdev); } static void vfio_mdev_release(struct vfio_device *core_vdev) @@ -48,8 +39,6 @@ static void vfio_mdev_release(struct vfio_device *core_vdev) if (likely(parent->ops->release)) parent->ops->release(mdev); - - module_put(THIS_MODULE); } static long vfio_mdev_unlocked_ioctl(struct vfio_device *core_vdev, diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index bd7c482c948a..f6729baa1bf4 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -558,8 +558,6 @@ static void vfio_pci_release(struct vfio_device *core_vdev) } mutex_unlock(&vdev->reflck->lock); - - module_put(THIS_MODULE); } static int vfio_pci_open(struct vfio_device *core_vdev) @@ -568,9 +566,6 @@ static int vfio_pci_open(struct vfio_device *core_vdev) container_of(core_vdev, struct vfio_pci_device, vdev); int ret = 0; - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - mutex_lock(&vdev->reflck->lock); if (!vdev->refcnt) { @@ -584,8 +579,6 @@ static int vfio_pci_open(struct vfio_device *core_vdev) vdev->refcnt++; error: mutex_unlock(&vdev->reflck->lock); - if (ret) - module_put(THIS_MODULE); return ret; } diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 470fcf7dac56..703164df7637 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -241,8 +241,6 @@ static void vfio_platform_release(struct vfio_device *core_vdev) } mutex_unlock(&driver_lock); - - module_put(vdev->parent_module); } static int vfio_platform_open(struct vfio_device *core_vdev) @@ -251,9 +249,6 @@ static int vfio_platform_open(struct vfio_device *core_vdev) container_of(core_vdev, struct vfio_platform_device, vdev); int ret; - if (!try_module_get(vdev->parent_module)) - return -ENODEV; - mutex_lock(&driver_lock); if (!vdev->refcnt) { @@ -291,7 +286,6 @@ static int vfio_platform_open(struct vfio_device *core_vdev) vfio_platform_regions_cleanup(vdev); err_reg: mutex_unlock(&driver_lock); - module_put(vdev->parent_module); return ret; } diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 5e631c359ef2..02cc51ce6891 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1369,8 +1369,14 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) if (IS_ERR(device)) return PTR_ERR(device); + if (!try_module_get(device->dev->driver->owner)) { + vfio_device_put(device); + return -ENODEV; + } + ret = device->ops->open(device); if (ret) { + module_put(device->dev->driver->owner); vfio_device_put(device); return ret; } @@ -1382,6 +1388,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) ret = get_unused_fd_flags(O_CLOEXEC); if (ret < 0) { device->ops->release(device); + module_put(device->dev->driver->owner); vfio_device_put(device); return ret; } @@ -1392,6 +1399,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) put_unused_fd(ret); ret = PTR_ERR(filep); device->ops->release(device); + module_put(device->dev->driver->owner); vfio_device_put(device); return ret; } @@ -1550,6 +1558,8 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) device->ops->release(device); + module_put(device->dev->driver->owner); + vfio_group_try_dissolve_container(device->group); vfio_device_put(device); From 3b62a62429b26709895846180c93f0c21547f7ac Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 18 May 2021 22:21:33 +0300 Subject: [PATCH 02/19] vfio/platform: remove unneeded parent_module attribute The vfio core driver is now taking refcount on the provider drivers, remove redundant parent_module attribute from vfio_platform_device structure. Signed-off-by: Max Gurtovoy Acked-by: Eric Auger Link: https://lore.kernel.org/r/20210518192133.59195-3-mgurtovoy@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/platform/vfio_amba.c | 1 - drivers/vfio/platform/vfio_platform.c | 1 - drivers/vfio/platform/vfio_platform_private.h | 1 - 3 files changed, 3 deletions(-) diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c index f970eb2a999f..badfffea14fb 100644 --- a/drivers/vfio/platform/vfio_amba.c +++ b/drivers/vfio/platform/vfio_amba.c @@ -59,7 +59,6 @@ static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id) vdev->flags = VFIO_DEVICE_FLAGS_AMBA; vdev->get_resource = get_amba_resource; vdev->get_irq = get_amba_irq; - vdev->parent_module = THIS_MODULE; vdev->reset_required = false; ret = vfio_platform_probe_common(vdev, &adev->dev); diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c index e4027799a154..68a1c87066d7 100644 --- a/drivers/vfio/platform/vfio_platform.c +++ b/drivers/vfio/platform/vfio_platform.c @@ -50,7 +50,6 @@ static int vfio_platform_probe(struct platform_device *pdev) vdev->flags = VFIO_DEVICE_FLAGS_PLATFORM; vdev->get_resource = get_platform_resource; vdev->get_irq = get_platform_irq; - vdev->parent_module = THIS_MODULE; vdev->reset_required = reset_required; ret = vfio_platform_probe_common(vdev, &pdev->dev); diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index a5ba82c8cbc3..dfb834c13659 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -50,7 +50,6 @@ struct vfio_platform_device { u32 num_irqs; int refcnt; struct mutex igate; - struct module *parent_module; const char *compat; const char *acpihid; struct module *reset_module; From 204db60c83574559a8e08ce4bbd0029d56b8ab2e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 17 Jun 2021 16:22:09 +0200 Subject: [PATCH 03/19] driver core: Pull required checks into driver_probe_device() Checking if the dev is dead or if the dev is already bound is a required precondition to invoking driver_probe_device(). All the call chains leading here duplicate these checks. Add it directly to driver_probe_device() so the precondition is clear and remove the checks from device_driver_attach() and __driver_attach_async_helper(). The other call chain going through __device_attach_driver() does have these same checks but they are inlined into logic higher up the call stack and can't be removed. The sysfs uAPI call chain starting at bind_store() is a bit confused because it reads dev->driver unlocked and returns -ENODEV if it is !NULL, otherwise it reads it again under lock and returns 0 if it is !NULL. Fix this to always return -EBUSY and always read dev->driver under its lock. Done in preparation for the next patches which will add additional callers to driver_probe_device() and will need these checks as well. Signed-off-by: Jason Gunthorpe [hch: drop the extra checks in device_driver_attach and bind_store] Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/20210617142218.1877096-2-hch@lst.de Signed-off-by: Alex Williamson --- drivers/base/bus.c | 2 +- drivers/base/dd.c | 32 ++++++++++---------------------- 2 files changed, 11 insertions(+), 23 deletions(-) diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 36d0c654ea61..7de13302e8c8 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -210,7 +210,7 @@ static ssize_t bind_store(struct device_driver *drv, const char *buf, int err = -ENODEV; dev = bus_find_device_by_name(bus, NULL, buf); - if (dev && dev->driver == NULL && driver_match_device(drv, dev)) { + if (dev && driver_match_device(drv, dev)) { err = device_driver_attach(drv, dev); if (err > 0) { diff --git a/drivers/base/dd.c b/drivers/base/dd.c index ecd7cf848daf..7477d3322b3a 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -733,8 +733,9 @@ EXPORT_SYMBOL_GPL(wait_for_device_probe); * @drv: driver to bind a device to * @dev: device to try to bind to the driver * - * This function returns -ENODEV if the device is not registered, - * 1 if the device is bound successfully and 0 otherwise. + * This function returns -ENODEV if the device is not registered, -EBUSY if it + * already has a driver, and 1 if the device is bound successfully and 0 + * otherwise. * * This function must be called with @dev lock held. When called for a * USB interface, @dev->parent lock must be held as well. @@ -745,8 +746,10 @@ static int driver_probe_device(struct device_driver *drv, struct device *dev) { int ret = 0; - if (!device_is_registered(dev)) + if (dev->p->dead || !device_is_registered(dev)) return -ENODEV; + if (dev->driver) + return -EBUSY; dev->can_match = true; pr_debug("bus: '%s': %s: matched device %s with driver %s\n", @@ -1027,17 +1030,10 @@ static void __device_driver_unlock(struct device *dev, struct device *parent) */ int device_driver_attach(struct device_driver *drv, struct device *dev) { - int ret = 0; + int ret; __device_driver_lock(dev, dev->parent); - - /* - * If device has been removed or someone has already successfully - * bound a driver before us just skip the driver probe call. - */ - if (!dev->p->dead && !dev->driver) - ret = driver_probe_device(drv, dev); - + ret = driver_probe_device(drv, dev); __device_driver_unlock(dev, dev->parent); return ret; @@ -1047,19 +1043,11 @@ static void __driver_attach_async_helper(void *_dev, async_cookie_t cookie) { struct device *dev = _dev; struct device_driver *drv; - int ret = 0; + int ret; __device_driver_lock(dev, dev->parent); - drv = dev->p->async_driver; - - /* - * If device has been removed or someone has already successfully - * bound a driver before us just skip the driver probe call. - */ - if (!dev->p->dead && !dev->driver) - ret = driver_probe_device(drv, dev); - + ret = driver_probe_device(drv, dev); __device_driver_unlock(dev, dev->parent); dev_dbg(dev, "driver %s async attach completed: %d\n", drv->name, ret); From e1499647c69c72c4583273e773d8c2786cb4bee9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Jun 2021 16:22:10 +0200 Subject: [PATCH 04/19] driver core: Better distinguish probe errors in really_probe really_probe tries to special case errors from ->probe, but due to all other initialization added to the function over time now a lot of internal errors hit that code path as well. Untangle that by adding a new probe_err local variable and apply the special casing only to that. Signed-off-by: Christoph Hellwig Reviewed-by: Cornelia Huck Reviewed-by: Greg Kroah-Hartman Reviewed-by: Kirti Wankhede Link: https://lore.kernel.org/r/20210617142218.1877096-3-hch@lst.de Signed-off-by: Alex Williamson --- drivers/base/dd.c | 72 +++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 30 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 7477d3322b3a..fd83817240e6 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -513,12 +513,44 @@ static ssize_t state_synced_show(struct device *dev, } static DEVICE_ATTR_RO(state_synced); + +static int call_driver_probe(struct device *dev, struct device_driver *drv) +{ + int ret = 0; + + if (dev->bus->probe) + ret = dev->bus->probe(dev); + else if (drv->probe) + ret = drv->probe(dev); + + switch (ret) { + case 0: + break; + case -EPROBE_DEFER: + /* Driver requested deferred probing */ + dev_dbg(dev, "Driver %s requests probe deferral\n", drv->name); + break; + case -ENODEV: + case -ENXIO: + pr_debug("%s: probe of %s rejects match %d\n", + drv->name, dev_name(dev), ret); + break; + default: + /* driver matched but the probe failed */ + pr_warn("%s: probe of %s failed with error %d\n", + drv->name, dev_name(dev), ret); + break; + } + + return ret; +} + static int really_probe(struct device *dev, struct device_driver *drv) { - int ret = -EPROBE_DEFER; int local_trigger_count = atomic_read(&deferred_trigger_count); bool test_remove = IS_ENABLED(CONFIG_DEBUG_TEST_DRIVER_REMOVE) && !drv->suppress_bind_attrs; + int ret = -EPROBE_DEFER, probe_ret = 0; if (defer_all_probes) { /* @@ -572,14 +604,14 @@ static int really_probe(struct device *dev, struct device_driver *drv) goto probe_failed; } - if (dev->bus->probe) { - ret = dev->bus->probe(dev); - if (ret) - goto probe_failed; - } else if (drv->probe) { - ret = drv->probe(dev); - if (ret) - goto probe_failed; + probe_ret = call_driver_probe(dev, drv); + if (probe_ret) { + /* + * Ignore errors returned by ->probe so that the next driver can + * try its luck. + */ + ret = 0; + goto probe_failed; } if (device_add_groups(dev, drv->dev_groups)) { @@ -650,28 +682,8 @@ static int really_probe(struct device *dev, struct device_driver *drv) dev->pm_domain->dismiss(dev); pm_runtime_reinit(dev); dev_pm_set_driver_flags(dev, 0); - - switch (ret) { - case -EPROBE_DEFER: - /* Driver requested deferred probing */ - dev_dbg(dev, "Driver %s requests probe deferral\n", drv->name); + if (probe_ret == -EPROBE_DEFER) driver_deferred_probe_add_trigger(dev, local_trigger_count); - break; - case -ENODEV: - case -ENXIO: - pr_debug("%s: probe of %s rejects match %d\n", - drv->name, dev_name(dev), ret); - break; - default: - /* driver matched but the probe failed */ - pr_warn("%s: probe of %s failed with error %d\n", - drv->name, dev_name(dev), ret); - } - /* - * Ignore errors returned by ->probe so that the next driver can try - * its luck. - */ - ret = 0; done: atomic_dec(&probe_count); wake_up_all(&probe_waitqueue); From ef6dcbdd8eb2f44dce70a3abecc32d43cc5f3e64 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Jun 2021 16:22:11 +0200 Subject: [PATCH 05/19] driver core: Flow the return code from ->probe() through to sysfs bind Currently really_probe() returns 1 on success and 0 if the probe() call fails. This return code arrangement is designed to be useful for __device_attach_driver() which is walking the device list and trying every driver. 0 means to keep trying. However, it is not useful for the other places that call through to really_probe() that do actually want to see the probe() return code. For instance bind_store() would be better to return the actual error code from the driver's probe method, not discarding it and returning -ENODEV. Reorganize things so that really_probe() returns the error code from ->probe as a (inverted) positive number, and 0 for successful attach. With this, __device_attach_driver can ignore the (positive) probe errors, return 1 to exit the loop for a successful binding and pass on the other negative errors, while device_driver_attach simplify inverts the positive errors and returns all errors to the sysfs code. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/20210617142218.1877096-4-hch@lst.de Signed-off-by: Alex Williamson --- drivers/base/bus.c | 6 +----- drivers/base/dd.c | 29 ++++++++++++++++++++--------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 7de13302e8c8..1f6b4bd61056 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -212,13 +212,9 @@ static ssize_t bind_store(struct device_driver *drv, const char *buf, dev = bus_find_device_by_name(bus, NULL, buf); if (dev && driver_match_device(drv, dev)) { err = device_driver_attach(drv, dev); - - if (err > 0) { + if (!err) { /* success */ err = count; - } else if (err == 0) { - /* driver didn't accept device */ - err = -ENODEV; } } put_device(dev); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index fd83817240e6..25341f52198c 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -607,10 +607,10 @@ static int really_probe(struct device *dev, struct device_driver *drv) probe_ret = call_driver_probe(dev, drv); if (probe_ret) { /* - * Ignore errors returned by ->probe so that the next driver can - * try its luck. + * Return probe errors as positive values so that the callers + * can distinguish them from other errors. */ - ret = 0; + ret = -probe_ret; goto probe_failed; } @@ -653,7 +653,6 @@ static int really_probe(struct device *dev, struct device_driver *drv) dev->pm_domain->sync(dev); driver_bound(dev); - ret = 1; pr_debug("bus: '%s': %s: bound device %s to driver %s\n", drv->bus->name, __func__, dev_name(dev), drv->name); goto done; @@ -746,8 +745,8 @@ EXPORT_SYMBOL_GPL(wait_for_device_probe); * @dev: device to try to bind to the driver * * This function returns -ENODEV if the device is not registered, -EBUSY if it - * already has a driver, and 1 if the device is bound successfully and 0 - * otherwise. + * already has a driver, 0 if the device is bound successfully and a positive + * (inverted) error code for failures from the ->probe method. * * This function must be called with @dev lock held. When called for a * USB interface, @dev->parent lock must be held as well. @@ -882,7 +881,14 @@ static int __device_attach_driver(struct device_driver *drv, void *_data) if (data->check_async && async_allowed != data->want_async) return 0; - return driver_probe_device(drv, dev); + /* + * Ignore errors returned by ->probe so that the next driver can try + * its luck. + */ + ret = driver_probe_device(drv, dev); + if (ret < 0) + return ret; + return ret == 0; } static void __device_attach_async_helper(void *_dev, async_cookie_t cookie) @@ -1038,7 +1044,7 @@ static void __device_driver_unlock(struct device *dev, struct device *parent) * @dev: Device to attach it to * * Manually attach driver to a device. Will acquire both @dev lock and - * @dev->parent lock if needed. + * @dev->parent lock if needed. Returns 0 on success, -ERR on failure. */ int device_driver_attach(struct device_driver *drv, struct device *dev) { @@ -1048,6 +1054,9 @@ int device_driver_attach(struct device_driver *drv, struct device *dev) ret = driver_probe_device(drv, dev); __device_driver_unlock(dev, dev->parent); + /* also return probe errors as normal negative errnos */ + if (ret > 0) + ret = -ret; return ret; } @@ -1114,7 +1123,9 @@ static int __driver_attach(struct device *dev, void *data) return 0; } - device_driver_attach(drv, dev); + __device_driver_lock(dev, dev->parent); + driver_probe_device(drv, dev); + __device_driver_unlock(dev, dev->parent); return 0; } From 45ddcb42949f825f0caa25352e825cede94b6aba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Jun 2021 16:22:12 +0200 Subject: [PATCH 06/19] driver core: Don't return EPROBE_DEFER to userspace during sysfs bind EPROBE_DEFER is an internal kernel error code and it should not be leaked to userspace via the bind_store() sysfs. Userspace doesn't have this constant and cannot understand it. Further, it doesn't really make sense to have userspace trigger a deferred probe via bind_store(), which could eventually succeed, while simultaneously returning an error back. Resolve this by splitting driver_probe_device so that the version used by the sysfs binding that turns EPROBE_DEFER into -EAGAIN, while the one used for internally binding keeps the error code, and calls driver_deferred_probe_add where needed. This also allows to nicely split out the defer_all_probes / probe_count checks so that they actually allow for full device_{block,unblock}_probing protection while not bothering the sysfs bind case. Signed-off-by: Christoph Hellwig Reviewed-by: Cornelia Huck Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20210617142218.1877096-5-hch@lst.de Signed-off-by: Alex Williamson --- drivers/base/dd.c | 84 +++++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 39 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 25341f52198c..1d8012459587 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -491,15 +491,6 @@ EXPORT_SYMBOL_GPL(device_bind_driver); static atomic_t probe_count = ATOMIC_INIT(0); static DECLARE_WAIT_QUEUE_HEAD(probe_waitqueue); -static void driver_deferred_probe_add_trigger(struct device *dev, - int local_trigger_count) -{ - driver_deferred_probe_add(dev); - /* Did a trigger occur while probing? Need to re-trigger if yes */ - if (local_trigger_count != atomic_read(&deferred_trigger_count)) - driver_deferred_probe_trigger(); -} - static ssize_t state_synced_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -547,10 +538,9 @@ static int call_driver_probe(struct device *dev, struct device_driver *drv) static int really_probe(struct device *dev, struct device_driver *drv) { - int local_trigger_count = atomic_read(&deferred_trigger_count); bool test_remove = IS_ENABLED(CONFIG_DEBUG_TEST_DRIVER_REMOVE) && !drv->suppress_bind_attrs; - int ret = -EPROBE_DEFER, probe_ret = 0; + int ret; if (defer_all_probes) { /* @@ -559,17 +549,13 @@ static int really_probe(struct device *dev, struct device_driver *drv) * wait_for_device_probe() right after that to avoid any races. */ dev_dbg(dev, "Driver %s force probe deferral\n", drv->name); - driver_deferred_probe_add(dev); - return ret; + return -EPROBE_DEFER; } ret = device_links_check_suppliers(dev); - if (ret == -EPROBE_DEFER) - driver_deferred_probe_add_trigger(dev, local_trigger_count); if (ret) return ret; - atomic_inc(&probe_count); pr_debug("bus: '%s': %s: probing driver %s with device %s\n", drv->bus->name, __func__, drv->name, dev_name(dev)); if (!list_empty(&dev->devres_head)) { @@ -604,13 +590,13 @@ static int really_probe(struct device *dev, struct device_driver *drv) goto probe_failed; } - probe_ret = call_driver_probe(dev, drv); - if (probe_ret) { + ret = call_driver_probe(dev, drv); + if (ret) { /* * Return probe errors as positive values so that the callers * can distinguish them from other errors. */ - ret = -probe_ret; + ret = -ret; goto probe_failed; } @@ -681,11 +667,7 @@ static int really_probe(struct device *dev, struct device_driver *drv) dev->pm_domain->dismiss(dev); pm_runtime_reinit(dev); dev_pm_set_driver_flags(dev, 0); - if (probe_ret == -EPROBE_DEFER) - driver_deferred_probe_add_trigger(dev, local_trigger_count); done: - atomic_dec(&probe_count); - wake_up_all(&probe_waitqueue); return ret; } @@ -739,21 +721,7 @@ void wait_for_device_probe(void) } EXPORT_SYMBOL_GPL(wait_for_device_probe); -/** - * driver_probe_device - attempt to bind device & driver together - * @drv: driver to bind a device to - * @dev: device to try to bind to the driver - * - * This function returns -ENODEV if the device is not registered, -EBUSY if it - * already has a driver, 0 if the device is bound successfully and a positive - * (inverted) error code for failures from the ->probe method. - * - * This function must be called with @dev lock held. When called for a - * USB interface, @dev->parent lock must be held as well. - * - * If the device has a parent, runtime-resume the parent before driver probing. - */ -static int driver_probe_device(struct device_driver *drv, struct device *dev) +static int __driver_probe_device(struct device_driver *drv, struct device *dev) { int ret = 0; @@ -784,6 +752,42 @@ static int driver_probe_device(struct device_driver *drv, struct device *dev) return ret; } +/** + * driver_probe_device - attempt to bind device & driver together + * @drv: driver to bind a device to + * @dev: device to try to bind to the driver + * + * This function returns -ENODEV if the device is not registered, -EBUSY if it + * already has a driver, 0 if the device is bound successfully and a positive + * (inverted) error code for failures from the ->probe method. + * + * This function must be called with @dev lock held. When called for a + * USB interface, @dev->parent lock must be held as well. + * + * If the device has a parent, runtime-resume the parent before driver probing. + */ +static int driver_probe_device(struct device_driver *drv, struct device *dev) +{ + int trigger_count = atomic_read(&deferred_trigger_count); + int ret; + + atomic_inc(&probe_count); + ret = __driver_probe_device(drv, dev); + if (ret == -EPROBE_DEFER || ret == EPROBE_DEFER) { + driver_deferred_probe_add(dev); + + /* + * Did a trigger occur while probing? Need to re-trigger if yes + */ + if (trigger_count != atomic_read(&deferred_trigger_count) && + !defer_all_probes) + driver_deferred_probe_trigger(); + } + atomic_dec(&probe_count); + wake_up_all(&probe_waitqueue); + return ret; +} + static inline bool cmdline_requested_async_probing(const char *drv_name) { return parse_option_str(async_probe_drv_names, drv_name); @@ -1051,12 +1055,14 @@ int device_driver_attach(struct device_driver *drv, struct device *dev) int ret; __device_driver_lock(dev, dev->parent); - ret = driver_probe_device(drv, dev); + ret = __driver_probe_device(drv, dev); __device_driver_unlock(dev, dev->parent); /* also return probe errors as normal negative errnos */ if (ret > 0) ret = -ret; + if (ret == -EPROBE_DEFER) + return -EAGAIN; return ret; } From 0d9f837c6958a4c14e6bcb5c5edf6c851d65f507 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 17 Jun 2021 16:22:13 +0200 Subject: [PATCH 07/19] driver core: Export device_driver_attach() This is intended as a replacement API for device_bind_driver(). It has at least the following benefits: - Internal locking. Few of the users of device_bind_driver() follow the locking rules - Calls device driver probe() internally. Notably this means that devm support for probe works correctly as probe() error will call devres_release_all() - struct device_driver -> dev_groups is supported - Simplified calling convention, no need to manually call probe(). The general usage is for situations that already know what driver to bind and need to ensure the bind is synchronized with other logic. Call device_driver_attach() after device_add(). If probe() returns a failure then this will be preserved up through to the error return of device_driver_attach(). Signed-off-by: Jason Gunthorpe Signed-off-by: Christoph Hellwig Reviewed-by: Cornelia Huck Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20210617142218.1877096-6-hch@lst.de Signed-off-by: Alex Williamson --- drivers/base/base.h | 1 - drivers/base/dd.c | 3 +++ include/linux/device.h | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/base/base.h b/drivers/base/base.h index e5f9b7e656c3..404db83ee5ec 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -152,7 +152,6 @@ extern int driver_add_groups(struct device_driver *drv, const struct attribute_group **groups); extern void driver_remove_groups(struct device_driver *drv, const struct attribute_group **groups); -int device_driver_attach(struct device_driver *drv, struct device *dev); void device_driver_detach(struct device *dev); extern char *make_class_name(const char *name, struct kobject *kobj); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 1d8012459587..daeb9b5763ae 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -471,6 +471,8 @@ static void driver_sysfs_remove(struct device *dev) * (It is ok to call with no other effort from a driver's probe() method.) * * This function must be called with the device lock held. + * + * Callers should prefer to use device_driver_attach() instead. */ int device_bind_driver(struct device *dev) { @@ -1065,6 +1067,7 @@ int device_driver_attach(struct device_driver *drv, struct device *dev) return -EAGAIN; return ret; } +EXPORT_SYMBOL_GPL(device_driver_attach); static void __driver_attach_async_helper(void *_dev, async_cookie_t cookie) { diff --git a/include/linux/device.h b/include/linux/device.h index f1a00040fa53..d8b9c9e7d493 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -845,6 +845,8 @@ static inline void *dev_get_platdata(const struct device *dev) * Manual binding of a device to driver. See drivers/base/bus.c * for information on use. */ +int __must_check device_driver_attach(struct device_driver *drv, + struct device *dev); int __must_check device_bind_driver(struct device *dev); void device_release_driver(struct device *dev); int __must_check device_attach(struct device *dev); From af3ab3f9b986cdbc1b97b8a3341ce78851edb0dd Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 17 Jun 2021 16:22:14 +0200 Subject: [PATCH 08/19] vfio/mdev: Remove CONFIG_VFIO_MDEV_DEVICE For some reason the vfio_mdev shim mdev_driver has its own module and kconfig. As the next patch requires access to it from mdev.ko merge the two modules together and remove VFIO_MDEV_DEVICE. A later patch deletes this driver entirely. Signed-off-by: Jason Gunthorpe Signed-off-by: Christoph Hellwig Reviewed-by: Cornelia Huck Reviewed-by: Greg Kroah-Hartman Reviewed-by: Kirti Wankhede Link: https://lore.kernel.org/r/20210617142218.1877096-7-hch@lst.de Signed-off-by: Alex Williamson --- Documentation/s390/vfio-ap.rst | 1 - arch/s390/Kconfig | 2 +- drivers/gpu/drm/i915/Kconfig | 2 +- drivers/vfio/mdev/Kconfig | 7 ------- drivers/vfio/mdev/Makefile | 3 +-- drivers/vfio/mdev/mdev_core.c | 16 ++++++++++++++-- drivers/vfio/mdev/mdev_private.h | 2 ++ drivers/vfio/mdev/vfio_mdev.c | 24 +----------------------- samples/Kconfig | 6 +++--- 9 files changed, 23 insertions(+), 40 deletions(-) diff --git a/Documentation/s390/vfio-ap.rst b/Documentation/s390/vfio-ap.rst index e15436599086..f57ae621f33e 100644 --- a/Documentation/s390/vfio-ap.rst +++ b/Documentation/s390/vfio-ap.rst @@ -514,7 +514,6 @@ These are the steps: * S390_AP_IOMMU * VFIO * VFIO_MDEV - * VFIO_MDEV_DEVICE * KVM If using make menuconfig select the following to build the vfio_ap module:: diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b4c7c34069f8..ea63fd22e119 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -768,7 +768,7 @@ config VFIO_CCW config VFIO_AP def_tristate n prompt "VFIO support for AP devices" - depends on S390_AP_IOMMU && VFIO_MDEV_DEVICE && KVM + depends on S390_AP_IOMMU && VFIO_MDEV && KVM depends on ZCRYPT help This driver grants access to Adjunct Processor (AP) devices diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 1e1cb245fca7..53bc68631861 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -124,7 +124,7 @@ config DRM_I915_GVT_KVMGT tristate "Enable KVM/VFIO support for Intel GVT-g" depends on DRM_I915_GVT depends on KVM - depends on VFIO_MDEV && VFIO_MDEV_DEVICE + depends on VFIO_MDEV default n help Choose this option if you want to enable KVMGT support for diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig index 5da27f2100f9..763c877a1318 100644 --- a/drivers/vfio/mdev/Kconfig +++ b/drivers/vfio/mdev/Kconfig @@ -9,10 +9,3 @@ config VFIO_MDEV See Documentation/driver-api/vfio-mediated-device.rst for more details. If you don't know what do here, say N. - -config VFIO_MDEV_DEVICE - tristate "VFIO driver for Mediated devices" - depends on VFIO && VFIO_MDEV - default n - help - VFIO based driver for Mediated devices. diff --git a/drivers/vfio/mdev/Makefile b/drivers/vfio/mdev/Makefile index 101516fdf375..ff9ecd802125 100644 --- a/drivers/vfio/mdev/Makefile +++ b/drivers/vfio/mdev/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -mdev-y := mdev_core.o mdev_sysfs.o mdev_driver.o +mdev-y := mdev_core.o mdev_sysfs.o mdev_driver.o vfio_mdev.o obj-$(CONFIG_VFIO_MDEV) += mdev.o -obj-$(CONFIG_VFIO_MDEV_DEVICE) += vfio_mdev.o diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index 2a85d6fcb7dd..ff8c1a845166 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -360,11 +360,24 @@ int mdev_device_remove(struct mdev_device *mdev) static int __init mdev_init(void) { - return mdev_bus_register(); + int rc; + + rc = mdev_bus_register(); + if (rc) + return rc; + rc = mdev_register_driver(&vfio_mdev_driver); + if (rc) + goto err_bus; + return 0; +err_bus: + mdev_bus_unregister(); + return rc; } static void __exit mdev_exit(void) { + mdev_unregister_driver(&vfio_mdev_driver); + if (mdev_bus_compat_class) class_compat_unregister(mdev_bus_compat_class); @@ -378,4 +391,3 @@ MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); -MODULE_SOFTDEP("post: vfio_mdev"); diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h index 6999c89db7b1..afbad7b0a14a 100644 --- a/drivers/vfio/mdev/mdev_private.h +++ b/drivers/vfio/mdev/mdev_private.h @@ -37,6 +37,8 @@ struct mdev_type { #define to_mdev_type(_kobj) \ container_of(_kobj, struct mdev_type, kobj) +extern struct mdev_driver vfio_mdev_driver; + int parent_create_sysfs_files(struct mdev_parent *parent); void parent_remove_sysfs_files(struct mdev_parent *parent); diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c index 922729071c5a..d5b4eede47c1 100644 --- a/drivers/vfio/mdev/vfio_mdev.c +++ b/drivers/vfio/mdev/vfio_mdev.c @@ -17,10 +17,6 @@ #include "mdev_private.h" -#define DRIVER_VERSION "0.1" -#define DRIVER_AUTHOR "NVIDIA Corporation" -#define DRIVER_DESC "VFIO based driver for Mediated device" - static int vfio_mdev_open(struct vfio_device *core_vdev) { struct mdev_device *mdev = to_mdev_device(core_vdev->dev); @@ -151,7 +147,7 @@ static void vfio_mdev_remove(struct mdev_device *mdev) kfree(vdev); } -static struct mdev_driver vfio_mdev_driver = { +struct mdev_driver vfio_mdev_driver = { .driver = { .name = "vfio_mdev", .owner = THIS_MODULE, @@ -160,21 +156,3 @@ static struct mdev_driver vfio_mdev_driver = { .probe = vfio_mdev_probe, .remove = vfio_mdev_remove, }; - -static int __init vfio_mdev_init(void) -{ - return mdev_register_driver(&vfio_mdev_driver); -} - -static void __exit vfio_mdev_exit(void) -{ - mdev_unregister_driver(&vfio_mdev_driver); -} - -module_init(vfio_mdev_init) -module_exit(vfio_mdev_exit) - -MODULE_VERSION(DRIVER_VERSION); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR(DRIVER_AUTHOR); -MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/samples/Kconfig b/samples/Kconfig index b5a1a7aa7e23..b0503ef058d3 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -154,14 +154,14 @@ config SAMPLE_UHID config SAMPLE_VFIO_MDEV_MTTY tristate "Build VFIO mtty example mediated device sample code -- loadable modules only" - depends on VFIO_MDEV_DEVICE && m + depends on VFIO_MDEV && m help Build a virtual tty sample driver for use as a VFIO mediated device config SAMPLE_VFIO_MDEV_MDPY tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only" - depends on VFIO_MDEV_DEVICE && m + depends on VFIO_MDEV && m help Build a virtual display sample driver for use as a VFIO mediated device. It is a simple framebuffer and supports @@ -178,7 +178,7 @@ config SAMPLE_VFIO_MDEV_MDPY_FB config SAMPLE_VFIO_MDEV_MBOCHS tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only" - depends on VFIO_MDEV_DEVICE && m + depends on VFIO_MDEV && m select DMA_SHARED_BUFFER help Build a virtual display sample driver for use as a VFIO From 88a21f265ce50a17e6e71e3fb4467625cf234c5a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 17 Jun 2021 16:22:15 +0200 Subject: [PATCH 09/19] vfio/mdev: Allow the mdev_parent_ops to specify the device driver to bind This allows a mdev driver to opt out of using vfio_mdev.c, instead the driver will provide a 'struct mdev_driver' and register directly with the driver core. Much of mdev_parent_ops becomes unused in this mode: - create()/remove() are done via the mdev_driver probe()/remove() - mdev_attr_groups becomes mdev_driver driver.dev_groups - Wrapper function callbacks are replaced with the same ones from struct vfio_device_ops Signed-off-by: Jason Gunthorpe Signed-off-by: Christoph Hellwig Reviewed-by: Cornelia Huck Reviewed-by: Greg Kroah-Hartman Reviewed-by: Kirti Wankhede Link: https://lore.kernel.org/r/20210617142218.1877096-8-hch@lst.de Signed-off-by: Alex Williamson --- .../driver-api/vfio-mediated-device.rst | 35 +++++++------------ drivers/vfio/mdev/mdev_core.c | 30 +++++++++++----- drivers/vfio/mdev/mdev_driver.c | 10 ++++++ include/linux/mdev.h | 2 ++ 4 files changed, 46 insertions(+), 31 deletions(-) diff --git a/Documentation/driver-api/vfio-mediated-device.rst b/Documentation/driver-api/vfio-mediated-device.rst index 1779b85f014e..9f26079cacae 100644 --- a/Documentation/driver-api/vfio-mediated-device.rst +++ b/Documentation/driver-api/vfio-mediated-device.rst @@ -93,7 +93,7 @@ interfaces: Registration Interface for a Mediated Bus Driver ------------------------------------------------ -The registration interface for a mediated bus driver provides the following +The registration interface for a mediated device driver provides the following structure to represent a mediated device's driver:: /* @@ -136,37 +136,26 @@ The structures in the mdev_parent_ops structure are as follows: * dev_attr_groups: attributes of the parent device * mdev_attr_groups: attributes of the mediated device * supported_config: attributes to define supported configurations +* device_driver: device driver to bind for mediated device instances -The functions in the mdev_parent_ops structure are as follows: +The mdev_parent_ops also still has various functions pointers. Theses exist +for historical reasons only and shall not be used for new drivers. -* create: allocate basic resources in a driver for a mediated device -* remove: free resources in a driver when a mediated device is destroyed - -(Note that mdev-core provides no implicit serialization of create/remove -callbacks per mdev parent device, per mdev type, or any other categorization. -Vendor drivers are expected to be fully asynchronous in this respect or -provide their own internal resource protection.) - -The callbacks in the mdev_parent_ops structure are as follows: - -* open: open callback of mediated device -* close: close callback of mediated device -* ioctl: ioctl callback of mediated device -* read : read emulation callback -* write: write emulation callback -* mmap: mmap emulation callback - -A driver should use the mdev_parent_ops structure in the function call to -register itself with the mdev core driver:: +When a driver wants to add the GUID creation sysfs to an existing device it has +probe'd to then it should call:: extern int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops); -However, the mdev_parent_ops structure is not required in the function call -that a driver should use to unregister itself with the mdev core driver:: +This will provide the 'mdev_supported_types/XX/create' files which can then be +used to trigger the creation of a mdev_device. The created mdev_device will be +attached to the specified driver. + +When the driver needs to remove itself it calls:: extern void mdev_unregister_device(struct device *dev); +Which will unbind and destroy all the created mdevs and remove the sysfs files. Mediated Device Management Interface Through sysfs ================================================== diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index ff8c1a845166..e4581ec093a6 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -94,9 +94,11 @@ static void mdev_device_remove_common(struct mdev_device *mdev) mdev_remove_sysfs_files(mdev); device_del(&mdev->dev); lockdep_assert_held(&parent->unreg_sem); - ret = parent->ops->remove(mdev); - if (ret) - dev_err(&mdev->dev, "Remove failed: err=%d\n", ret); + if (parent->ops->remove) { + ret = parent->ops->remove(mdev); + if (ret) + dev_err(&mdev->dev, "Remove failed: err=%d\n", ret); + } /* Balances with device_initialize() */ put_device(&mdev->dev); @@ -127,7 +129,9 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops) char *envp[] = { env_string, NULL }; /* check for mandatory ops */ - if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups) + if (!ops || !ops->supported_type_groups) + return -EINVAL; + if (!ops->device_driver && (!ops->create || !ops->remove)) return -EINVAL; dev = get_device(dev); @@ -256,6 +260,7 @@ int mdev_device_create(struct mdev_type *type, const guid_t *uuid) int ret; struct mdev_device *mdev, *tmp; struct mdev_parent *parent = type->parent; + struct mdev_driver *drv = parent->ops->device_driver; mutex_lock(&mdev_list_lock); @@ -296,14 +301,22 @@ int mdev_device_create(struct mdev_type *type, const guid_t *uuid) goto out_put_device; } - ret = parent->ops->create(mdev); - if (ret) - goto out_unlock; + if (parent->ops->create) { + ret = parent->ops->create(mdev); + if (ret) + goto out_unlock; + } ret = device_add(&mdev->dev); if (ret) goto out_remove; + if (!drv) + drv = &vfio_mdev_driver; + ret = device_driver_attach(&drv->driver, &mdev->dev); + if (ret) + goto out_del; + ret = mdev_create_sysfs_files(mdev); if (ret) goto out_del; @@ -317,7 +330,8 @@ int mdev_device_create(struct mdev_type *type, const guid_t *uuid) out_del: device_del(&mdev->dev); out_remove: - parent->ops->remove(mdev); + if (parent->ops->remove) + parent->ops->remove(mdev); out_unlock: up_read(&parent->unreg_sem); out_put_device: diff --git a/drivers/vfio/mdev/mdev_driver.c b/drivers/vfio/mdev/mdev_driver.c index 041699571b7e..c368ec824e2b 100644 --- a/drivers/vfio/mdev/mdev_driver.c +++ b/drivers/vfio/mdev/mdev_driver.c @@ -71,10 +71,20 @@ static int mdev_remove(struct device *dev) return 0; } +static int mdev_match(struct device *dev, struct device_driver *drv) +{ + /* + * No drivers automatically match. Drivers are only bound by explicit + * device_driver_attach() + */ + return 0; +} + struct bus_type mdev_bus_type = { .name = "mdev", .probe = mdev_probe, .remove = mdev_remove, + .match = mdev_match, }; EXPORT_SYMBOL_GPL(mdev_bus_type); diff --git a/include/linux/mdev.h b/include/linux/mdev.h index 1fb34ea394ad..3a38598c2605 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -55,6 +55,7 @@ struct device *mtype_get_parent_dev(struct mdev_type *mtype); * register the device to mdev module. * * @owner: The module owner. + * @device_driver: Which device driver to probe() on newly created devices * @dev_attr_groups: Attributes of the parent device. * @mdev_attr_groups: Attributes of the mediated device. * @supported_type_groups: Attributes to define supported types. It is mandatory @@ -103,6 +104,7 @@ struct device *mtype_get_parent_dev(struct mdev_type *mtype); **/ struct mdev_parent_ops { struct module *owner; + struct mdev_driver *device_driver; const struct attribute_group **dev_attr_groups; const struct attribute_group **mdev_attr_groups; struct attribute_group **supported_type_groups; From 09177ac9192198bec24a81c822ebeef4197c3c8b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 17 Jun 2021 16:22:16 +0200 Subject: [PATCH 10/19] vfio/mtty: Convert to use vfio_register_group_dev() This is straightforward conversion, the mdev_state is actually serving as the vfio_device and we can replace all the mdev_get_drvdata()'s and the wonky dead code with a simple container_of() Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Kirti Wankhede Link: https://lore.kernel.org/r/20210617142218.1877096-9-hch@lst.de Signed-off-by: Alex Williamson --- samples/vfio-mdev/mtty.c | 185 ++++++++++++++++++--------------------- 1 file changed, 83 insertions(+), 102 deletions(-) diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index b9b24be4abda..faf9b8e8873a 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -127,6 +127,7 @@ struct serial_port { /* State of each mdev device */ struct mdev_state { + struct vfio_device vdev; int irq_fd; struct eventfd_ctx *intx_evtfd; struct eventfd_ctx *msi_evtfd; @@ -150,6 +151,8 @@ static const struct file_operations vd_fops = { .owner = THIS_MODULE, }; +static const struct vfio_device_ops mtty_dev_ops; + /* function prototypes */ static int mtty_trigger_interrupt(struct mdev_state *mdev_state); @@ -631,23 +634,16 @@ static void mdev_read_base(struct mdev_state *mdev_state) } } -static ssize_t mdev_access(struct mdev_device *mdev, u8 *buf, size_t count, +static ssize_t mdev_access(struct mdev_state *mdev_state, u8 *buf, size_t count, loff_t pos, bool is_write) { - struct mdev_state *mdev_state; unsigned int index; loff_t offset; int ret = 0; - if (!mdev || !buf) + if (!buf) return -EINVAL; - mdev_state = mdev_get_drvdata(mdev); - if (!mdev_state) { - pr_err("%s mdev_state not found\n", __func__); - return -EINVAL; - } - mutex_lock(&mdev_state->ops_lock); index = MTTY_VFIO_PCI_OFFSET_TO_INDEX(pos); @@ -708,15 +704,18 @@ static ssize_t mdev_access(struct mdev_device *mdev, u8 *buf, size_t count, return ret; } -static int mtty_create(struct mdev_device *mdev) +static int mtty_probe(struct mdev_device *mdev) { struct mdev_state *mdev_state; int nr_ports = mdev_get_type_group_id(mdev) + 1; + int ret; mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL); if (mdev_state == NULL) return -ENOMEM; + vfio_init_group_dev(&mdev_state->vdev, &mdev->dev, &mtty_dev_ops); + mdev_state->nr_ports = nr_ports; mdev_state->irq_index = -1; mdev_state->s[0].max_fifo_size = MAX_FIFO_SIZE; @@ -731,7 +730,6 @@ static int mtty_create(struct mdev_device *mdev) mutex_init(&mdev_state->ops_lock); mdev_state->mdev = mdev; - mdev_set_drvdata(mdev, mdev_state); mtty_create_config_space(mdev_state); @@ -739,50 +737,40 @@ static int mtty_create(struct mdev_device *mdev) list_add(&mdev_state->next, &mdev_devices_list); mutex_unlock(&mdev_list_lock); + ret = vfio_register_group_dev(&mdev_state->vdev); + if (ret) { + kfree(mdev_state); + return ret; + } + dev_set_drvdata(&mdev->dev, mdev_state); return 0; } -static int mtty_remove(struct mdev_device *mdev) +static void mtty_remove(struct mdev_device *mdev) { - struct mdev_state *mds, *tmp_mds; - struct mdev_state *mdev_state = mdev_get_drvdata(mdev); - int ret = -EINVAL; + struct mdev_state *mdev_state = dev_get_drvdata(&mdev->dev); + vfio_unregister_group_dev(&mdev_state->vdev); mutex_lock(&mdev_list_lock); - list_for_each_entry_safe(mds, tmp_mds, &mdev_devices_list, next) { - if (mdev_state == mds) { - list_del(&mdev_state->next); - mdev_set_drvdata(mdev, NULL); - kfree(mdev_state->vconfig); - kfree(mdev_state); - ret = 0; - break; - } - } + list_del(&mdev_state->next); mutex_unlock(&mdev_list_lock); - return ret; + kfree(mdev_state->vconfig); + kfree(mdev_state); } -static int mtty_reset(struct mdev_device *mdev) +static int mtty_reset(struct mdev_state *mdev_state) { - struct mdev_state *mdev_state; - - if (!mdev) - return -EINVAL; - - mdev_state = mdev_get_drvdata(mdev); - if (!mdev_state) - return -EINVAL; - pr_info("%s: called\n", __func__); return 0; } -static ssize_t mtty_read(struct mdev_device *mdev, char __user *buf, +static ssize_t mtty_read(struct vfio_device *vdev, char __user *buf, size_t count, loff_t *ppos) { + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); unsigned int done = 0; int ret; @@ -792,7 +780,7 @@ static ssize_t mtty_read(struct mdev_device *mdev, char __user *buf, if (count >= 4 && !(*ppos % 4)) { u32 val; - ret = mdev_access(mdev, (u8 *)&val, sizeof(val), + ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val), *ppos, false); if (ret <= 0) goto read_err; @@ -804,7 +792,7 @@ static ssize_t mtty_read(struct mdev_device *mdev, char __user *buf, } else if (count >= 2 && !(*ppos % 2)) { u16 val; - ret = mdev_access(mdev, (u8 *)&val, sizeof(val), + ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val), *ppos, false); if (ret <= 0) goto read_err; @@ -816,7 +804,7 @@ static ssize_t mtty_read(struct mdev_device *mdev, char __user *buf, } else { u8 val; - ret = mdev_access(mdev, (u8 *)&val, sizeof(val), + ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val), *ppos, false); if (ret <= 0) goto read_err; @@ -839,9 +827,11 @@ static ssize_t mtty_read(struct mdev_device *mdev, char __user *buf, return -EFAULT; } -static ssize_t mtty_write(struct mdev_device *mdev, const char __user *buf, +static ssize_t mtty_write(struct vfio_device *vdev, const char __user *buf, size_t count, loff_t *ppos) { + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); unsigned int done = 0; int ret; @@ -854,7 +844,7 @@ static ssize_t mtty_write(struct mdev_device *mdev, const char __user *buf, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = mdev_access(mdev, (u8 *)&val, sizeof(val), + ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val), *ppos, true); if (ret <= 0) goto write_err; @@ -866,7 +856,7 @@ static ssize_t mtty_write(struct mdev_device *mdev, const char __user *buf, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = mdev_access(mdev, (u8 *)&val, sizeof(val), + ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val), *ppos, true); if (ret <= 0) goto write_err; @@ -878,7 +868,7 @@ static ssize_t mtty_write(struct mdev_device *mdev, const char __user *buf, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = mdev_access(mdev, (u8 *)&val, sizeof(val), + ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val), *ppos, true); if (ret <= 0) goto write_err; @@ -896,19 +886,11 @@ static ssize_t mtty_write(struct mdev_device *mdev, const char __user *buf, return -EFAULT; } -static int mtty_set_irqs(struct mdev_device *mdev, uint32_t flags, +static int mtty_set_irqs(struct mdev_state *mdev_state, uint32_t flags, unsigned int index, unsigned int start, unsigned int count, void *data) { int ret = 0; - struct mdev_state *mdev_state; - - if (!mdev) - return -EINVAL; - - mdev_state = mdev_get_drvdata(mdev); - if (!mdev_state) - return -EINVAL; mutex_lock(&mdev_state->ops_lock); switch (index) { @@ -1024,21 +1006,13 @@ static int mtty_trigger_interrupt(struct mdev_state *mdev_state) return ret; } -static int mtty_get_region_info(struct mdev_device *mdev, +static int mtty_get_region_info(struct mdev_state *mdev_state, struct vfio_region_info *region_info, u16 *cap_type_id, void **cap_type) { unsigned int size = 0; - struct mdev_state *mdev_state; u32 bar_index; - if (!mdev) - return -EINVAL; - - mdev_state = mdev_get_drvdata(mdev); - if (!mdev_state) - return -EINVAL; - bar_index = region_info->index; if (bar_index >= VFIO_PCI_NUM_REGIONS) return -EINVAL; @@ -1073,8 +1047,7 @@ static int mtty_get_region_info(struct mdev_device *mdev, return 0; } -static int mtty_get_irq_info(struct mdev_device *mdev, - struct vfio_irq_info *irq_info) +static int mtty_get_irq_info(struct vfio_irq_info *irq_info) { switch (irq_info->index) { case VFIO_PCI_INTX_IRQ_INDEX: @@ -1098,8 +1071,7 @@ static int mtty_get_irq_info(struct mdev_device *mdev, return 0; } -static int mtty_get_device_info(struct mdev_device *mdev, - struct vfio_device_info *dev_info) +static int mtty_get_device_info(struct vfio_device_info *dev_info) { dev_info->flags = VFIO_DEVICE_FLAGS_PCI; dev_info->num_regions = VFIO_PCI_NUM_REGIONS; @@ -1108,19 +1080,13 @@ static int mtty_get_device_info(struct mdev_device *mdev, return 0; } -static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd, +static long mtty_ioctl(struct vfio_device *vdev, unsigned int cmd, unsigned long arg) { + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); int ret = 0; unsigned long minsz; - struct mdev_state *mdev_state; - - if (!mdev) - return -EINVAL; - - mdev_state = mdev_get_drvdata(mdev); - if (!mdev_state) - return -ENODEV; switch (cmd) { case VFIO_DEVICE_GET_INFO: @@ -1135,7 +1101,7 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd, if (info.argsz < minsz) return -EINVAL; - ret = mtty_get_device_info(mdev, &info); + ret = mtty_get_device_info(&info); if (ret) return ret; @@ -1160,7 +1126,7 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd, if (info.argsz < minsz) return -EINVAL; - ret = mtty_get_region_info(mdev, &info, &cap_type_id, + ret = mtty_get_region_info(mdev_state, &info, &cap_type_id, &cap_type); if (ret) return ret; @@ -1184,7 +1150,7 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd, (info.index >= mdev_state->dev_info.num_irqs)) return -EINVAL; - ret = mtty_get_irq_info(mdev, &info); + ret = mtty_get_irq_info(&info); if (ret) return ret; @@ -1218,25 +1184,25 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd, return PTR_ERR(data); } - ret = mtty_set_irqs(mdev, hdr.flags, hdr.index, hdr.start, + ret = mtty_set_irqs(mdev_state, hdr.flags, hdr.index, hdr.start, hdr.count, data); kfree(ptr); return ret; } case VFIO_DEVICE_RESET: - return mtty_reset(mdev); + return mtty_reset(mdev_state); } return -ENOTTY; } -static int mtty_open(struct mdev_device *mdev) +static int mtty_open(struct vfio_device *vdev) { pr_info("%s\n", __func__); return 0; } -static void mtty_close(struct mdev_device *mdev) +static void mtty_close(struct vfio_device *mdev) { pr_info("%s\n", __func__); } @@ -1351,18 +1317,31 @@ static struct attribute_group *mdev_type_groups[] = { NULL, }; +static const struct vfio_device_ops mtty_dev_ops = { + .name = "vfio-mtty", + .open = mtty_open, + .release = mtty_close, + .read = mtty_read, + .write = mtty_write, + .ioctl = mtty_ioctl, +}; + +static struct mdev_driver mtty_driver = { + .driver = { + .name = "mtty", + .owner = THIS_MODULE, + .mod_name = KBUILD_MODNAME, + .dev_groups = mdev_dev_groups, + }, + .probe = mtty_probe, + .remove = mtty_remove, +}; + static const struct mdev_parent_ops mdev_fops = { .owner = THIS_MODULE, + .device_driver = &mtty_driver, .dev_attr_groups = mtty_dev_groups, - .mdev_attr_groups = mdev_dev_groups, .supported_type_groups = mdev_type_groups, - .create = mtty_create, - .remove = mtty_remove, - .open = mtty_open, - .release = mtty_close, - .read = mtty_read, - .write = mtty_write, - .ioctl = mtty_ioctl, }; static void mtty_device_release(struct device *dev) @@ -1393,12 +1372,16 @@ static int __init mtty_dev_init(void) pr_info("major_number:%d\n", MAJOR(mtty_dev.vd_devt)); + ret = mdev_register_driver(&mtty_driver); + if (ret) + goto err_cdev; + mtty_dev.vd_class = class_create(THIS_MODULE, MTTY_CLASS_NAME); if (IS_ERR(mtty_dev.vd_class)) { pr_err("Error: failed to register mtty_dev class\n"); ret = PTR_ERR(mtty_dev.vd_class); - goto failed1; + goto err_driver; } mtty_dev.dev.class = mtty_dev.vd_class; @@ -1407,28 +1390,25 @@ static int __init mtty_dev_init(void) ret = device_register(&mtty_dev.dev); if (ret) - goto failed2; + goto err_class; ret = mdev_register_device(&mtty_dev.dev, &mdev_fops); if (ret) - goto failed3; + goto err_device; mutex_init(&mdev_list_lock); INIT_LIST_HEAD(&mdev_devices_list); + return 0; - goto all_done; - -failed3: - +err_device: device_unregister(&mtty_dev.dev); -failed2: +err_class: class_destroy(mtty_dev.vd_class); - -failed1: +err_driver: + mdev_unregister_driver(&mtty_driver); +err_cdev: cdev_del(&mtty_dev.vd_cdev); unregister_chrdev_region(mtty_dev.vd_devt, MINORMASK + 1); - -all_done: return ret; } @@ -1439,6 +1419,7 @@ static void __exit mtty_dev_exit(void) device_unregister(&mtty_dev.dev); idr_destroy(&mtty_dev.vd_idr); + mdev_unregister_driver(&mtty_driver); cdev_del(&mtty_dev.vd_cdev); unregister_chrdev_region(mtty_dev.vd_devt, MINORMASK + 1); class_destroy(mtty_dev.vd_class); From 437e41368c01fba8c220d7ca2f6b9d7fde92beee Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 17 Jun 2021 16:22:17 +0200 Subject: [PATCH 11/19] vfio/mdpy: Convert to use vfio_register_group_dev() This is straightforward conversion, the mdev_state is actually serving as the vfio_device and we can replace all the mdev_get_drvdata()'s and the wonky dead code with a simple container_of(). Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20210617142218.1877096-10-hch@lst.de Signed-off-by: Alex Williamson --- samples/vfio-mdev/mdpy.c | 159 ++++++++++++++++++++++----------------- 1 file changed, 88 insertions(+), 71 deletions(-) diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c index e889c1cf8fd1..7e9c9df0f05b 100644 --- a/samples/vfio-mdev/mdpy.c +++ b/samples/vfio-mdev/mdpy.c @@ -85,9 +85,11 @@ static struct class *mdpy_class; static struct cdev mdpy_cdev; static struct device mdpy_dev; static u32 mdpy_count; +static const struct vfio_device_ops mdpy_dev_ops; /* State of each mdev device */ struct mdev_state { + struct vfio_device vdev; u8 *vconfig; u32 bar_mask; struct mutex ops_lock; @@ -162,11 +164,9 @@ static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset, } } -static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count, - loff_t pos, bool is_write) +static ssize_t mdev_access(struct mdev_state *mdev_state, char *buf, + size_t count, loff_t pos, bool is_write) { - struct mdev_state *mdev_state = mdev_get_drvdata(mdev); - struct device *dev = mdev_dev(mdev); int ret = 0; mutex_lock(&mdev_state->ops_lock); @@ -187,8 +187,9 @@ static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count, memcpy(buf, mdev_state->memblk, count); } else { - dev_info(dev, "%s: %s @0x%llx (unhandled)\n", - __func__, is_write ? "WR" : "RD", pos); + dev_info(mdev_state->vdev.dev, + "%s: %s @0x%llx (unhandled)\n", __func__, + is_write ? "WR" : "RD", pos); ret = -1; goto accessfailed; } @@ -202,9 +203,8 @@ static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count, return ret; } -static int mdpy_reset(struct mdev_device *mdev) +static int mdpy_reset(struct mdev_state *mdev_state) { - struct mdev_state *mdev_state = mdev_get_drvdata(mdev); u32 stride, i; /* initialize with gray gradient */ @@ -216,13 +216,14 @@ static int mdpy_reset(struct mdev_device *mdev) return 0; } -static int mdpy_create(struct mdev_device *mdev) +static int mdpy_probe(struct mdev_device *mdev) { const struct mdpy_type *type = &mdpy_types[mdev_get_type_group_id(mdev)]; struct device *dev = mdev_dev(mdev); struct mdev_state *mdev_state; u32 fbsize; + int ret; if (mdpy_count >= max_devices) return -ENOMEM; @@ -230,6 +231,7 @@ static int mdpy_create(struct mdev_device *mdev) mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL); if (mdev_state == NULL) return -ENOMEM; + vfio_init_group_dev(&mdev_state->vdev, &mdev->dev, &mdpy_dev_ops); mdev_state->vconfig = kzalloc(MDPY_CONFIG_SPACE_SIZE, GFP_KERNEL); if (mdev_state->vconfig == NULL) { @@ -250,36 +252,41 @@ static int mdpy_create(struct mdev_device *mdev) mutex_init(&mdev_state->ops_lock); mdev_state->mdev = mdev; - mdev_set_drvdata(mdev, mdev_state); - mdev_state->type = type; mdev_state->memsize = fbsize; mdpy_create_config_space(mdev_state); - mdpy_reset(mdev); + mdpy_reset(mdev_state); mdpy_count++; + + ret = vfio_register_group_dev(&mdev_state->vdev); + if (ret) { + kfree(mdev_state); + return ret; + } + dev_set_drvdata(&mdev->dev, mdev_state); return 0; } -static int mdpy_remove(struct mdev_device *mdev) +static void mdpy_remove(struct mdev_device *mdev) { - struct mdev_state *mdev_state = mdev_get_drvdata(mdev); - struct device *dev = mdev_dev(mdev); + struct mdev_state *mdev_state = dev_get_drvdata(&mdev->dev); - dev_info(dev, "%s\n", __func__); + dev_info(&mdev->dev, "%s\n", __func__); - mdev_set_drvdata(mdev, NULL); + vfio_unregister_group_dev(&mdev_state->vdev); vfree(mdev_state->memblk); kfree(mdev_state->vconfig); kfree(mdev_state); mdpy_count--; - return 0; } -static ssize_t mdpy_read(struct mdev_device *mdev, char __user *buf, +static ssize_t mdpy_read(struct vfio_device *vdev, char __user *buf, size_t count, loff_t *ppos) { + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); unsigned int done = 0; int ret; @@ -289,8 +296,8 @@ static ssize_t mdpy_read(struct mdev_device *mdev, char __user *buf, if (count >= 4 && !(*ppos % 4)) { u32 val; - ret = mdev_access(mdev, (char *)&val, sizeof(val), - *ppos, false); + ret = mdev_access(mdev_state, (char *)&val, sizeof(val), + *ppos, false); if (ret <= 0) goto read_err; @@ -301,7 +308,7 @@ static ssize_t mdpy_read(struct mdev_device *mdev, char __user *buf, } else if (count >= 2 && !(*ppos % 2)) { u16 val; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev_state, (char *)&val, sizeof(val), *ppos, false); if (ret <= 0) goto read_err; @@ -313,7 +320,7 @@ static ssize_t mdpy_read(struct mdev_device *mdev, char __user *buf, } else { u8 val; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev_state, (char *)&val, sizeof(val), *ppos, false); if (ret <= 0) goto read_err; @@ -336,9 +343,11 @@ static ssize_t mdpy_read(struct mdev_device *mdev, char __user *buf, return -EFAULT; } -static ssize_t mdpy_write(struct mdev_device *mdev, const char __user *buf, +static ssize_t mdpy_write(struct vfio_device *vdev, const char __user *buf, size_t count, loff_t *ppos) { + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); unsigned int done = 0; int ret; @@ -351,7 +360,7 @@ static ssize_t mdpy_write(struct mdev_device *mdev, const char __user *buf, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev_state, (char *)&val, sizeof(val), *ppos, true); if (ret <= 0) goto write_err; @@ -363,7 +372,7 @@ static ssize_t mdpy_write(struct mdev_device *mdev, const char __user *buf, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev_state, (char *)&val, sizeof(val), *ppos, true); if (ret <= 0) goto write_err; @@ -375,7 +384,7 @@ static ssize_t mdpy_write(struct mdev_device *mdev, const char __user *buf, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev_state, (char *)&val, sizeof(val), *ppos, true); if (ret <= 0) goto write_err; @@ -393,9 +402,10 @@ static ssize_t mdpy_write(struct mdev_device *mdev, const char __user *buf, return -EFAULT; } -static int mdpy_mmap(struct mdev_device *mdev, struct vm_area_struct *vma) +static int mdpy_mmap(struct vfio_device *vdev, struct vm_area_struct *vma) { - struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); if (vma->vm_pgoff != MDPY_MEMORY_BAR_OFFSET >> PAGE_SHIFT) return -EINVAL; @@ -409,16 +419,10 @@ static int mdpy_mmap(struct mdev_device *mdev, struct vm_area_struct *vma) return remap_vmalloc_range(vma, mdev_state->memblk, 0); } -static int mdpy_get_region_info(struct mdev_device *mdev, +static int mdpy_get_region_info(struct mdev_state *mdev_state, struct vfio_region_info *region_info, u16 *cap_type_id, void **cap_type) { - struct mdev_state *mdev_state; - - mdev_state = mdev_get_drvdata(mdev); - if (!mdev_state) - return -EINVAL; - if (region_info->index >= VFIO_PCI_NUM_REGIONS && region_info->index != MDPY_DISPLAY_REGION) return -EINVAL; @@ -447,15 +451,13 @@ static int mdpy_get_region_info(struct mdev_device *mdev, return 0; } -static int mdpy_get_irq_info(struct mdev_device *mdev, - struct vfio_irq_info *irq_info) +static int mdpy_get_irq_info(struct vfio_irq_info *irq_info) { irq_info->count = 0; return 0; } -static int mdpy_get_device_info(struct mdev_device *mdev, - struct vfio_device_info *dev_info) +static int mdpy_get_device_info(struct vfio_device_info *dev_info) { dev_info->flags = VFIO_DEVICE_FLAGS_PCI; dev_info->num_regions = VFIO_PCI_NUM_REGIONS; @@ -463,11 +465,9 @@ static int mdpy_get_device_info(struct mdev_device *mdev, return 0; } -static int mdpy_query_gfx_plane(struct mdev_device *mdev, +static int mdpy_query_gfx_plane(struct mdev_state *mdev_state, struct vfio_device_gfx_plane_info *plane) { - struct mdev_state *mdev_state = mdev_get_drvdata(mdev); - if (plane->flags & VFIO_GFX_PLANE_TYPE_PROBE) { if (plane->flags == (VFIO_GFX_PLANE_TYPE_PROBE | VFIO_GFX_PLANE_TYPE_REGION)) @@ -496,14 +496,13 @@ static int mdpy_query_gfx_plane(struct mdev_device *mdev, return 0; } -static long mdpy_ioctl(struct mdev_device *mdev, unsigned int cmd, +static long mdpy_ioctl(struct vfio_device *vdev, unsigned int cmd, unsigned long arg) { int ret = 0; unsigned long minsz; - struct mdev_state *mdev_state; - - mdev_state = mdev_get_drvdata(mdev); + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); switch (cmd) { case VFIO_DEVICE_GET_INFO: @@ -518,7 +517,7 @@ static long mdpy_ioctl(struct mdev_device *mdev, unsigned int cmd, if (info.argsz < minsz) return -EINVAL; - ret = mdpy_get_device_info(mdev, &info); + ret = mdpy_get_device_info(&info); if (ret) return ret; @@ -543,7 +542,7 @@ static long mdpy_ioctl(struct mdev_device *mdev, unsigned int cmd, if (info.argsz < minsz) return -EINVAL; - ret = mdpy_get_region_info(mdev, &info, &cap_type_id, + ret = mdpy_get_region_info(mdev_state, &info, &cap_type_id, &cap_type); if (ret) return ret; @@ -567,7 +566,7 @@ static long mdpy_ioctl(struct mdev_device *mdev, unsigned int cmd, (info.index >= mdev_state->dev_info.num_irqs)) return -EINVAL; - ret = mdpy_get_irq_info(mdev, &info); + ret = mdpy_get_irq_info(&info); if (ret) return ret; @@ -590,7 +589,7 @@ static long mdpy_ioctl(struct mdev_device *mdev, unsigned int cmd, if (plane.argsz < minsz) return -EINVAL; - ret = mdpy_query_gfx_plane(mdev, &plane); + ret = mdpy_query_gfx_plane(mdev_state, &plane); if (ret) return ret; @@ -604,12 +603,12 @@ static long mdpy_ioctl(struct mdev_device *mdev, unsigned int cmd, return -EINVAL; case VFIO_DEVICE_RESET: - return mdpy_reset(mdev); + return mdpy_reset(mdev_state); } return -ENOTTY; } -static int mdpy_open(struct mdev_device *mdev) +static int mdpy_open(struct vfio_device *vdev) { if (!try_module_get(THIS_MODULE)) return -ENODEV; @@ -617,7 +616,7 @@ static int mdpy_open(struct mdev_device *mdev) return 0; } -static void mdpy_close(struct mdev_device *mdev) +static void mdpy_close(struct vfio_device *vdev) { module_put(THIS_MODULE); } @@ -626,8 +625,7 @@ static ssize_t resolution_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mdev_device *mdev = mdev_from_dev(dev); - struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + struct mdev_state *mdev_state = dev_get_drvdata(dev); return sprintf(buf, "%dx%d\n", mdev_state->type->width, @@ -716,18 +714,30 @@ static struct attribute_group *mdev_type_groups[] = { NULL, }; +static const struct vfio_device_ops mdpy_dev_ops = { + .open = mdpy_open, + .release = mdpy_close, + .read = mdpy_read, + .write = mdpy_write, + .ioctl = mdpy_ioctl, + .mmap = mdpy_mmap, +}; + +static struct mdev_driver mdpy_driver = { + .driver = { + .name = "mdpy", + .owner = THIS_MODULE, + .mod_name = KBUILD_MODNAME, + .dev_groups = mdev_dev_groups, + }, + .probe = mdpy_probe, + .remove = mdpy_remove, +}; + static const struct mdev_parent_ops mdev_fops = { .owner = THIS_MODULE, - .mdev_attr_groups = mdev_dev_groups, + .device_driver = &mdpy_driver, .supported_type_groups = mdev_type_groups, - .create = mdpy_create, - .remove = mdpy_remove, - .open = mdpy_open, - .release = mdpy_close, - .read = mdpy_read, - .write = mdpy_write, - .ioctl = mdpy_ioctl, - .mmap = mdpy_mmap, }; static const struct file_operations vd_fops = { @@ -752,11 +762,15 @@ static int __init mdpy_dev_init(void) cdev_add(&mdpy_cdev, mdpy_devt, MINORMASK + 1); pr_info("%s: major %d\n", __func__, MAJOR(mdpy_devt)); + ret = mdev_register_driver(&mdpy_driver); + if (ret) + goto err_cdev; + mdpy_class = class_create(THIS_MODULE, MDPY_CLASS_NAME); if (IS_ERR(mdpy_class)) { pr_err("Error: failed to register mdpy_dev class\n"); ret = PTR_ERR(mdpy_class); - goto failed1; + goto err_driver; } mdpy_dev.class = mdpy_class; mdpy_dev.release = mdpy_device_release; @@ -764,19 +778,21 @@ static int __init mdpy_dev_init(void) ret = device_register(&mdpy_dev); if (ret) - goto failed2; + goto err_class; ret = mdev_register_device(&mdpy_dev, &mdev_fops); if (ret) - goto failed3; + goto err_device; return 0; -failed3: +err_device: device_unregister(&mdpy_dev); -failed2: +err_class: class_destroy(mdpy_class); -failed1: +err_driver: + mdev_unregister_driver(&mdpy_driver); +err_cdev: cdev_del(&mdpy_cdev); unregister_chrdev_region(mdpy_devt, MINORMASK + 1); return ret; @@ -788,6 +804,7 @@ static void __exit mdpy_dev_exit(void) mdev_unregister_device(&mdpy_dev); device_unregister(&mdpy_dev); + mdev_unregister_driver(&mdpy_driver); cdev_del(&mdpy_cdev); unregister_chrdev_region(mdpy_devt, MINORMASK + 1); class_destroy(mdpy_class); From 681c1615f8914451cfd432ad30e2f307b6490542 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 17 Jun 2021 16:22:18 +0200 Subject: [PATCH 12/19] vfio/mbochs: Convert to use vfio_register_group_dev() This is straightforward conversion, the mdev_state is actually serving as the vfio_device and we can replace all the mdev_get_drvdata()'s and the wonky dead code with a simple container_of(). Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20210617142218.1877096-11-hch@lst.de Signed-off-by: Alex Williamson --- samples/vfio-mdev/mbochs.c | 163 +++++++++++++++++++++---------------- 1 file changed, 91 insertions(+), 72 deletions(-) diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c index 881ef9a7296f..6c0f229db36a 100644 --- a/samples/vfio-mdev/mbochs.c +++ b/samples/vfio-mdev/mbochs.c @@ -130,6 +130,7 @@ static struct class *mbochs_class; static struct cdev mbochs_cdev; static struct device mbochs_dev; static int mbochs_used_mbytes; +static const struct vfio_device_ops mbochs_dev_ops; struct vfio_region_info_ext { struct vfio_region_info base; @@ -160,6 +161,7 @@ struct mbochs_dmabuf { /* State of each mdev device */ struct mdev_state { + struct vfio_device vdev; u8 *vconfig; u64 bar_mask[3]; u32 memory_bar_mask; @@ -425,11 +427,9 @@ static void handle_edid_blob(struct mdev_state *mdev_state, u16 offset, memcpy(buf, mdev_state->edid_blob + offset, count); } -static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count, - loff_t pos, bool is_write) +static ssize_t mdev_access(struct mdev_state *mdev_state, char *buf, + size_t count, loff_t pos, bool is_write) { - struct mdev_state *mdev_state = mdev_get_drvdata(mdev); - struct device *dev = mdev_dev(mdev); struct page *pg; loff_t poff; char *map; @@ -478,7 +478,7 @@ static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count, put_page(pg); } else { - dev_dbg(dev, "%s: %s @0x%llx (unhandled)\n", + dev_dbg(mdev_state->vdev.dev, "%s: %s @0x%llx (unhandled)\n", __func__, is_write ? "WR" : "RD", pos); ret = -1; goto accessfailed; @@ -493,9 +493,8 @@ static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count, return ret; } -static int mbochs_reset(struct mdev_device *mdev) +static int mbochs_reset(struct mdev_state *mdev_state) { - struct mdev_state *mdev_state = mdev_get_drvdata(mdev); u32 size64k = mdev_state->memsize / (64 * 1024); int i; @@ -506,12 +505,13 @@ static int mbochs_reset(struct mdev_device *mdev) return 0; } -static int mbochs_create(struct mdev_device *mdev) +static int mbochs_probe(struct mdev_device *mdev) { const struct mbochs_type *type = &mbochs_types[mdev_get_type_group_id(mdev)]; struct device *dev = mdev_dev(mdev); struct mdev_state *mdev_state; + int ret = -ENOMEM; if (type->mbytes + mbochs_used_mbytes > max_mbytes) return -ENOMEM; @@ -519,6 +519,7 @@ static int mbochs_create(struct mdev_device *mdev) mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL); if (mdev_state == NULL) return -ENOMEM; + vfio_init_group_dev(&mdev_state->vdev, &mdev->dev, &mbochs_dev_ops); mdev_state->vconfig = kzalloc(MBOCHS_CONFIG_SPACE_SIZE, GFP_KERNEL); if (mdev_state->vconfig == NULL) @@ -537,7 +538,6 @@ static int mbochs_create(struct mdev_device *mdev) mutex_init(&mdev_state->ops_lock); mdev_state->mdev = mdev; - mdev_set_drvdata(mdev, mdev_state); INIT_LIST_HEAD(&mdev_state->dmabufs); mdev_state->next_id = 1; @@ -547,32 +547,38 @@ static int mbochs_create(struct mdev_device *mdev) mdev_state->edid_regs.edid_offset = MBOCHS_EDID_BLOB_OFFSET; mdev_state->edid_regs.edid_max_size = sizeof(mdev_state->edid_blob); mbochs_create_config_space(mdev_state); - mbochs_reset(mdev); + mbochs_reset(mdev_state); mbochs_used_mbytes += type->mbytes; + + ret = vfio_register_group_dev(&mdev_state->vdev); + if (ret) + goto err_mem; + dev_set_drvdata(&mdev->dev, mdev_state); return 0; err_mem: kfree(mdev_state->vconfig); kfree(mdev_state); - return -ENOMEM; + return ret; } -static int mbochs_remove(struct mdev_device *mdev) +static void mbochs_remove(struct mdev_device *mdev) { - struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + struct mdev_state *mdev_state = dev_get_drvdata(&mdev->dev); mbochs_used_mbytes -= mdev_state->type->mbytes; - mdev_set_drvdata(mdev, NULL); + vfio_unregister_group_dev(&mdev_state->vdev); kfree(mdev_state->pages); kfree(mdev_state->vconfig); kfree(mdev_state); - return 0; } -static ssize_t mbochs_read(struct mdev_device *mdev, char __user *buf, +static ssize_t mbochs_read(struct vfio_device *vdev, char __user *buf, size_t count, loff_t *ppos) { + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); unsigned int done = 0; int ret; @@ -582,7 +588,7 @@ static ssize_t mbochs_read(struct mdev_device *mdev, char __user *buf, if (count >= 4 && !(*ppos % 4)) { u32 val; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev_state, (char *)&val, sizeof(val), *ppos, false); if (ret <= 0) goto read_err; @@ -594,7 +600,7 @@ static ssize_t mbochs_read(struct mdev_device *mdev, char __user *buf, } else if (count >= 2 && !(*ppos % 2)) { u16 val; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev_state, (char *)&val, sizeof(val), *ppos, false); if (ret <= 0) goto read_err; @@ -606,7 +612,7 @@ static ssize_t mbochs_read(struct mdev_device *mdev, char __user *buf, } else { u8 val; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev_state, (char *)&val, sizeof(val), *ppos, false); if (ret <= 0) goto read_err; @@ -629,9 +635,11 @@ static ssize_t mbochs_read(struct mdev_device *mdev, char __user *buf, return -EFAULT; } -static ssize_t mbochs_write(struct mdev_device *mdev, const char __user *buf, +static ssize_t mbochs_write(struct vfio_device *vdev, const char __user *buf, size_t count, loff_t *ppos) { + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); unsigned int done = 0; int ret; @@ -644,7 +652,7 @@ static ssize_t mbochs_write(struct mdev_device *mdev, const char __user *buf, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev_state, (char *)&val, sizeof(val), *ppos, true); if (ret <= 0) goto write_err; @@ -656,7 +664,7 @@ static ssize_t mbochs_write(struct mdev_device *mdev, const char __user *buf, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev_state, (char *)&val, sizeof(val), *ppos, true); if (ret <= 0) goto write_err; @@ -668,7 +676,7 @@ static ssize_t mbochs_write(struct mdev_device *mdev, const char __user *buf, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev_state, (char *)&val, sizeof(val), *ppos, true); if (ret <= 0) goto write_err; @@ -754,9 +762,10 @@ static const struct vm_operations_struct mbochs_region_vm_ops = { .fault = mbochs_region_vm_fault, }; -static int mbochs_mmap(struct mdev_device *mdev, struct vm_area_struct *vma) +static int mbochs_mmap(struct vfio_device *vdev, struct vm_area_struct *vma) { - struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); if (vma->vm_pgoff != MBOCHS_MEMORY_BAR_OFFSET >> PAGE_SHIFT) return -EINVAL; @@ -963,7 +972,7 @@ mbochs_dmabuf_find_by_id(struct mdev_state *mdev_state, u32 id) static int mbochs_dmabuf_export(struct mbochs_dmabuf *dmabuf) { struct mdev_state *mdev_state = dmabuf->mdev_state; - struct device *dev = mdev_dev(mdev_state->mdev); + struct device *dev = mdev_state->vdev.dev; DEFINE_DMA_BUF_EXPORT_INFO(exp_info); struct dma_buf *buf; @@ -991,15 +1000,10 @@ static int mbochs_dmabuf_export(struct mbochs_dmabuf *dmabuf) return 0; } -static int mbochs_get_region_info(struct mdev_device *mdev, +static int mbochs_get_region_info(struct mdev_state *mdev_state, struct vfio_region_info_ext *ext) { struct vfio_region_info *region_info = &ext->base; - struct mdev_state *mdev_state; - - mdev_state = mdev_get_drvdata(mdev); - if (!mdev_state) - return -EINVAL; if (region_info->index >= MBOCHS_NUM_REGIONS) return -EINVAL; @@ -1047,15 +1051,13 @@ static int mbochs_get_region_info(struct mdev_device *mdev, return 0; } -static int mbochs_get_irq_info(struct mdev_device *mdev, - struct vfio_irq_info *irq_info) +static int mbochs_get_irq_info(struct vfio_irq_info *irq_info) { irq_info->count = 0; return 0; } -static int mbochs_get_device_info(struct mdev_device *mdev, - struct vfio_device_info *dev_info) +static int mbochs_get_device_info(struct vfio_device_info *dev_info) { dev_info->flags = VFIO_DEVICE_FLAGS_PCI; dev_info->num_regions = MBOCHS_NUM_REGIONS; @@ -1063,11 +1065,9 @@ static int mbochs_get_device_info(struct mdev_device *mdev, return 0; } -static int mbochs_query_gfx_plane(struct mdev_device *mdev, +static int mbochs_query_gfx_plane(struct mdev_state *mdev_state, struct vfio_device_gfx_plane_info *plane) { - struct mdev_state *mdev_state = mdev_get_drvdata(mdev); - struct device *dev = mdev_dev(mdev); struct mbochs_dmabuf *dmabuf; struct mbochs_mode mode; int ret; @@ -1121,18 +1121,16 @@ static int mbochs_query_gfx_plane(struct mdev_device *mdev, done: if (plane->drm_plane_type == DRM_PLANE_TYPE_PRIMARY && mdev_state->active_id != plane->dmabuf_id) { - dev_dbg(dev, "%s: primary: %d => %d\n", __func__, - mdev_state->active_id, plane->dmabuf_id); + dev_dbg(mdev_state->vdev.dev, "%s: primary: %d => %d\n", + __func__, mdev_state->active_id, plane->dmabuf_id); mdev_state->active_id = plane->dmabuf_id; } mutex_unlock(&mdev_state->ops_lock); return 0; } -static int mbochs_get_gfx_dmabuf(struct mdev_device *mdev, - u32 id) +static int mbochs_get_gfx_dmabuf(struct mdev_state *mdev_state, u32 id) { - struct mdev_state *mdev_state = mdev_get_drvdata(mdev); struct mbochs_dmabuf *dmabuf; mutex_lock(&mdev_state->ops_lock); @@ -1154,9 +1152,11 @@ static int mbochs_get_gfx_dmabuf(struct mdev_device *mdev, return dma_buf_fd(dmabuf->buf, 0); } -static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd, - unsigned long arg) +static long mbochs_ioctl(struct vfio_device *vdev, unsigned int cmd, + unsigned long arg) { + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); int ret = 0; unsigned long minsz, outsz; @@ -1173,7 +1173,7 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd, if (info.argsz < minsz) return -EINVAL; - ret = mbochs_get_device_info(mdev, &info); + ret = mbochs_get_device_info(&info); if (ret) return ret; @@ -1197,7 +1197,7 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd, if (outsz > sizeof(info)) return -EINVAL; - ret = mbochs_get_region_info(mdev, &info); + ret = mbochs_get_region_info(mdev_state, &info); if (ret) return ret; @@ -1220,7 +1220,7 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd, (info.index >= VFIO_PCI_NUM_IRQS)) return -EINVAL; - ret = mbochs_get_irq_info(mdev, &info); + ret = mbochs_get_irq_info(&info); if (ret) return ret; @@ -1243,7 +1243,7 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd, if (plane.argsz < minsz) return -EINVAL; - ret = mbochs_query_gfx_plane(mdev, &plane); + ret = mbochs_query_gfx_plane(mdev_state, &plane); if (ret) return ret; @@ -1260,19 +1260,19 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd, if (get_user(dmabuf_id, (__u32 __user *)arg)) return -EFAULT; - return mbochs_get_gfx_dmabuf(mdev, dmabuf_id); + return mbochs_get_gfx_dmabuf(mdev_state, dmabuf_id); } case VFIO_DEVICE_SET_IRQS: return -EINVAL; case VFIO_DEVICE_RESET: - return mbochs_reset(mdev); + return mbochs_reset(mdev_state); } return -ENOTTY; } -static int mbochs_open(struct mdev_device *mdev) +static int mbochs_open(struct vfio_device *vdev) { if (!try_module_get(THIS_MODULE)) return -ENODEV; @@ -1280,9 +1280,10 @@ static int mbochs_open(struct mdev_device *mdev) return 0; } -static void mbochs_close(struct mdev_device *mdev) +static void mbochs_close(struct vfio_device *vdev) { - struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); struct mbochs_dmabuf *dmabuf, *tmp; mutex_lock(&mdev_state->ops_lock); @@ -1306,8 +1307,7 @@ static ssize_t memory_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mdev_device *mdev = mdev_from_dev(dev); - struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + struct mdev_state *mdev_state = dev_get_drvdata(dev); return sprintf(buf, "%d MB\n", mdev_state->type->mbytes); } @@ -1398,18 +1398,30 @@ static struct attribute_group *mdev_type_groups[] = { NULL, }; +static const struct vfio_device_ops mbochs_dev_ops = { + .open = mbochs_open, + .release = mbochs_close, + .read = mbochs_read, + .write = mbochs_write, + .ioctl = mbochs_ioctl, + .mmap = mbochs_mmap, +}; + +static struct mdev_driver mbochs_driver = { + .driver = { + .name = "mbochs", + .owner = THIS_MODULE, + .mod_name = KBUILD_MODNAME, + .dev_groups = mdev_dev_groups, + }, + .probe = mbochs_probe, + .remove = mbochs_remove, +}; + static const struct mdev_parent_ops mdev_fops = { .owner = THIS_MODULE, - .mdev_attr_groups = mdev_dev_groups, + .device_driver = &mbochs_driver, .supported_type_groups = mdev_type_groups, - .create = mbochs_create, - .remove = mbochs_remove, - .open = mbochs_open, - .release = mbochs_close, - .read = mbochs_read, - .write = mbochs_write, - .ioctl = mbochs_ioctl, - .mmap = mbochs_mmap, }; static const struct file_operations vd_fops = { @@ -1434,11 +1446,15 @@ static int __init mbochs_dev_init(void) cdev_add(&mbochs_cdev, mbochs_devt, MINORMASK + 1); pr_info("%s: major %d\n", __func__, MAJOR(mbochs_devt)); + ret = mdev_register_driver(&mbochs_driver); + if (ret) + goto err_cdev; + mbochs_class = class_create(THIS_MODULE, MBOCHS_CLASS_NAME); if (IS_ERR(mbochs_class)) { pr_err("Error: failed to register mbochs_dev class\n"); ret = PTR_ERR(mbochs_class); - goto failed1; + goto err_driver; } mbochs_dev.class = mbochs_class; mbochs_dev.release = mbochs_device_release; @@ -1446,19 +1462,21 @@ static int __init mbochs_dev_init(void) ret = device_register(&mbochs_dev); if (ret) - goto failed2; + goto err_class; ret = mdev_register_device(&mbochs_dev, &mdev_fops); if (ret) - goto failed3; + goto err_device; return 0; -failed3: +err_device: device_unregister(&mbochs_dev); -failed2: +err_class: class_destroy(mbochs_class); -failed1: +err_driver: + mdev_unregister_driver(&mbochs_driver); +err_cdev: cdev_del(&mbochs_cdev); unregister_chrdev_region(mbochs_devt, MINORMASK + 1); return ret; @@ -1470,6 +1488,7 @@ static void __exit mbochs_dev_exit(void) mdev_unregister_device(&mbochs_dev); device_unregister(&mbochs_dev); + mdev_unregister_driver(&mbochs_driver); cdev_del(&mbochs_cdev); unregister_chrdev_region(mbochs_devt, MINORMASK + 1); class_destroy(mbochs_class); From c7396f2eac2bf9d767d9cf49bd26224fbb894aaf Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 8 Jun 2021 14:28:41 +0300 Subject: [PATCH 13/19] vfio/iommu_type1: rename vfio_group struck to vfio_iommu_group The vfio_group structure is already defined in vfio module so in order to improve code readability and for simplicity, rename the vfio_group structure in vfio_iommu_type1 module to vfio_iommu_group. Signed-off-by: Max Gurtovoy Link: https://lore.kernel.org/r/20210608112841.51897-1-mgurtovoy@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 34 +++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index a3e925a41b0d..830beb920a14 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -110,7 +110,7 @@ struct vfio_batch { int offset; /* of next entry in pages */ }; -struct vfio_group { +struct vfio_iommu_group { struct iommu_group *iommu_group; struct list_head next; bool mdev_group; /* An mdev group */ @@ -160,8 +160,9 @@ struct vfio_regions { static int put_pfn(unsigned long pfn, int prot); -static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, - struct iommu_group *iommu_group); +static struct vfio_iommu_group* +vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, + struct iommu_group *iommu_group); /* * This code handles mapping and unmapping of user data buffers @@ -836,7 +837,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, unsigned long *phys_pfn) { struct vfio_iommu *iommu = iommu_data; - struct vfio_group *group; + struct vfio_iommu_group *group; int i, j, ret; unsigned long remote_vaddr; struct vfio_dma *dma; @@ -1875,10 +1876,10 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain) __free_pages(pages, order); } -static struct vfio_group *find_iommu_group(struct vfio_domain *domain, - struct iommu_group *iommu_group) +static struct vfio_iommu_group *find_iommu_group(struct vfio_domain *domain, + struct iommu_group *iommu_group) { - struct vfio_group *g; + struct vfio_iommu_group *g; list_for_each_entry(g, &domain->group_list, next) { if (g->iommu_group == iommu_group) @@ -1888,11 +1889,12 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain, return NULL; } -static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, - struct iommu_group *iommu_group) +static struct vfio_iommu_group* +vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, + struct iommu_group *iommu_group) { struct vfio_domain *domain; - struct vfio_group *group = NULL; + struct vfio_iommu_group *group = NULL; list_for_each_entry(domain, &iommu->domain_list, next) { group = find_iommu_group(domain, iommu_group); @@ -1967,7 +1969,7 @@ static int vfio_mdev_detach_domain(struct device *dev, void *data) } static int vfio_iommu_attach_group(struct vfio_domain *domain, - struct vfio_group *group) + struct vfio_iommu_group *group) { if (group->mdev_group) return iommu_group_for_each_dev(group->iommu_group, @@ -1978,7 +1980,7 @@ static int vfio_iommu_attach_group(struct vfio_domain *domain, } static void vfio_iommu_detach_group(struct vfio_domain *domain, - struct vfio_group *group) + struct vfio_iommu_group *group) { if (group->mdev_group) iommu_group_for_each_dev(group->iommu_group, domain->domain, @@ -2242,7 +2244,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, struct iommu_group *iommu_group) { struct vfio_iommu *iommu = iommu_data; - struct vfio_group *group; + struct vfio_iommu_group *group; struct vfio_domain *domain, *d; struct bus_type *bus = NULL; int ret; @@ -2518,7 +2520,7 @@ static int vfio_iommu_resv_refresh(struct vfio_iommu *iommu, struct list_head *iova_copy) { struct vfio_domain *d; - struct vfio_group *g; + struct vfio_iommu_group *g; struct vfio_iova *node; dma_addr_t start, end; LIST_HEAD(resv_regions); @@ -2560,7 +2562,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, { struct vfio_iommu *iommu = iommu_data; struct vfio_domain *domain; - struct vfio_group *group; + struct vfio_iommu_group *group; bool update_dirty_scope = false; LIST_HEAD(iova_copy); @@ -2681,7 +2683,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) static void vfio_release_domain(struct vfio_domain *domain, bool external) { - struct vfio_group *group, *group_tmp; + struct vfio_iommu_group *group, *group_tmp; list_for_each_entry_safe(group, group_tmp, &domain->group_list, next) { From 0af5160edb87b1868eba514422d3991628a018f8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 22 Jun 2021 19:37:10 +0100 Subject: [PATCH 14/19] vfio/mdpy: Fix memory leak of object mdev_state->vconfig In the case where the call to vfio_register_group_dev fails the error return path kfree's mdev_state but not mdev_state->vconfig. Fix this by kfree'ing mdev_state->vconfig before returning. Addresses-Coverity: ("Resource leak") Fixes: 437e41368c01 ("vfio/mdpy: Convert to use vfio_register_group_dev()") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20210622183710.28954-1-colin.king@canonical.com Signed-off-by: Alex Williamson --- samples/vfio-mdev/mdpy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c index 7e9c9df0f05b..393c9df6f6a0 100644 --- a/samples/vfio-mdev/mdpy.c +++ b/samples/vfio-mdev/mdpy.c @@ -261,6 +261,7 @@ static int mdpy_probe(struct mdev_device *mdev) ret = vfio_register_group_dev(&mdev_state->vdev); if (ret) { + kfree(mdev_state->vconfig); kfree(mdev_state); return ret; } From e3a9b1212b9d6cb20751196e338f4a5138d539d3 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Tue, 22 Jun 2021 19:28:23 -0700 Subject: [PATCH 15/19] PCI: Export pci_dev_trylock() and pci_dev_unlock() Other places in the kernel use this form, and so just provide a common path for it. Acked-by: Bjorn Helgaas Signed-off-by: Luis Chamberlain Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/20210623022824.308041-2-mcgrof@kernel.org Signed-off-by: Alex Williamson --- drivers/pci/pci.c | 6 ++++-- include/linux/pci.h | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index b717680377a9..10f08ed3589f 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5028,7 +5028,7 @@ static void pci_dev_lock(struct pci_dev *dev) } /* Return 1 on successful lock, 0 on contention */ -static int pci_dev_trylock(struct pci_dev *dev) +int pci_dev_trylock(struct pci_dev *dev) { if (pci_cfg_access_trylock(dev)) { if (device_trylock(&dev->dev)) @@ -5038,12 +5038,14 @@ static int pci_dev_trylock(struct pci_dev *dev) return 0; } +EXPORT_SYMBOL_GPL(pci_dev_trylock); -static void pci_dev_unlock(struct pci_dev *dev) +void pci_dev_unlock(struct pci_dev *dev) { device_unlock(&dev->dev); pci_cfg_access_unlock(dev); } +EXPORT_SYMBOL_GPL(pci_dev_unlock); static void pci_dev_save_and_disable(struct pci_dev *dev) { diff --git a/include/linux/pci.h b/include/linux/pci.h index 24306504226a..7765c325706a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1621,6 +1621,9 @@ void pci_cfg_access_lock(struct pci_dev *dev); bool pci_cfg_access_trylock(struct pci_dev *dev); void pci_cfg_access_unlock(struct pci_dev *dev); +int pci_dev_trylock(struct pci_dev *dev); +void pci_dev_unlock(struct pci_dev *dev); + /* * PCI domain support. Sometimes called PCI segment (eg by ACPI), * a PCI domain is defined to be a set of PCI buses which share From 742b4c0d1efe7a7640ad17f1bbf696a1305f6495 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Tue, 22 Jun 2021 19:28:24 -0700 Subject: [PATCH 16/19] vfio: use the new pci_dev_trylock() helper to simplify try lock Use the new pci_dev_trylock() helper to simplify our locking. Signed-off-by: Luis Chamberlain Reviewed-by: Cornelia Huck Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20210623022824.308041-3-mcgrof@kernel.org Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index f6729baa1bf4..759dfb118712 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -477,13 +477,10 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) * We can not use the "try" reset interface here, which will * overwrite the previously restored configuration information. */ - if (vdev->reset_works && pci_cfg_access_trylock(pdev)) { - if (device_trylock(&pdev->dev)) { - if (!__pci_reset_function_locked(pdev)) - vdev->needs_reset = false; - device_unlock(&pdev->dev); - } - pci_cfg_access_unlock(pdev); + if (vdev->reset_works && pci_dev_trylock(pdev)) { + if (!__pci_reset_function_locked(pdev)) + vdev->needs_reset = false; + pci_dev_unlock(pdev); } pci_restore_state(pdev); From 0dd1b7fc3e7d30802d5839f6bf8957023b437ad4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 25 Jun 2021 12:56:04 -0300 Subject: [PATCH 17/19] vfio/mtty: Delete mdev_devices_list Dan points out that an error case left things on this list. It is also missing locking in available_instances_show(). Further study shows the list isn't needed at all, just store the total ports in use in an atomic and delete the whole thing. Reported-by: Dan Carpenter Fixes: 09177ac91921 ("vfio/mtty: Convert to use vfio_register_group_dev()") Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/0-v1-0bc56b362ca7+62-mtty_used_ports_jgg@nvidia.com Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- samples/vfio-mdev/mtty.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index faf9b8e8873a..ffbaf07a17ea 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -144,8 +144,7 @@ struct mdev_state { int nr_ports; }; -static struct mutex mdev_list_lock; -static struct list_head mdev_devices_list; +static atomic_t mdev_used_ports; static const struct file_operations vd_fops = { .owner = THIS_MODULE, @@ -733,15 +732,13 @@ static int mtty_probe(struct mdev_device *mdev) mtty_create_config_space(mdev_state); - mutex_lock(&mdev_list_lock); - list_add(&mdev_state->next, &mdev_devices_list); - mutex_unlock(&mdev_list_lock); - ret = vfio_register_group_dev(&mdev_state->vdev); if (ret) { kfree(mdev_state); return ret; } + atomic_add(mdev_state->nr_ports, &mdev_used_ports); + dev_set_drvdata(&mdev->dev, mdev_state); return 0; } @@ -750,10 +747,8 @@ static void mtty_remove(struct mdev_device *mdev) { struct mdev_state *mdev_state = dev_get_drvdata(&mdev->dev); + atomic_sub(mdev_state->nr_ports, &mdev_used_ports); vfio_unregister_group_dev(&mdev_state->vdev); - mutex_lock(&mdev_list_lock); - list_del(&mdev_state->next); - mutex_unlock(&mdev_list_lock); kfree(mdev_state->vconfig); kfree(mdev_state); @@ -1274,14 +1269,10 @@ static ssize_t available_instances_show(struct mdev_type *mtype, struct mdev_type_attribute *attr, char *buf) { - struct mdev_state *mds; unsigned int ports = mtype_get_type_group_id(mtype) + 1; - int used = 0; - list_for_each_entry(mds, &mdev_devices_list, next) - used += mds->nr_ports; - - return sprintf(buf, "%d\n", (MAX_MTTYS - used)/ports); + return sprintf(buf, "%d\n", + (MAX_MTTYS - atomic_read(&mdev_used_ports)) / ports); } static MDEV_TYPE_ATTR_RO(available_instances); @@ -1395,9 +1386,6 @@ static int __init mtty_dev_init(void) ret = mdev_register_device(&mtty_dev.dev, &mdev_fops); if (ret) goto err_device; - - mutex_init(&mdev_list_lock); - INIT_LIST_HEAD(&mdev_devices_list); return 0; err_device: From 97d0a6874478802b68e3bea7aa9b9a333d257182 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 25 Jun 2021 15:20:06 -0600 Subject: [PATCH 18/19] vfio/mtty: Enforce available_instances The sample mtty mdev driver doesn't actually enforce the number of device instances it claims are available. Implement this properly. Link: https://lore.kernel.org/r/162465624894.3338367.12935940647049917981.stgit@omen Reviewed-by: Jason Gunthorpe Reviewed-by: Cornelia Huck Reviewed by: Kirti Wankhede Signed-off-by: Alex Williamson --- samples/vfio-mdev/mtty.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index ffbaf07a17ea..8b26fecc4afe 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -144,7 +144,7 @@ struct mdev_state { int nr_ports; }; -static atomic_t mdev_used_ports; +static atomic_t mdev_avail_ports = ATOMIC_INIT(MAX_MTTYS); static const struct file_operations vd_fops = { .owner = THIS_MODULE, @@ -707,11 +707,20 @@ static int mtty_probe(struct mdev_device *mdev) { struct mdev_state *mdev_state; int nr_ports = mdev_get_type_group_id(mdev) + 1; + int avail_ports = atomic_read(&mdev_avail_ports); int ret; + do { + if (avail_ports < nr_ports) + return -ENOSPC; + } while (!atomic_try_cmpxchg(&mdev_avail_ports, + &avail_ports, avail_ports - nr_ports)); + mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL); - if (mdev_state == NULL) + if (mdev_state == NULL) { + atomic_add(nr_ports, &mdev_avail_ports); return -ENOMEM; + } vfio_init_group_dev(&mdev_state->vdev, &mdev->dev, &mtty_dev_ops); @@ -724,6 +733,7 @@ static int mtty_probe(struct mdev_device *mdev) if (mdev_state->vconfig == NULL) { kfree(mdev_state); + atomic_add(nr_ports, &mdev_avail_ports); return -ENOMEM; } @@ -735,9 +745,9 @@ static int mtty_probe(struct mdev_device *mdev) ret = vfio_register_group_dev(&mdev_state->vdev); if (ret) { kfree(mdev_state); + atomic_add(nr_ports, &mdev_avail_ports); return ret; } - atomic_add(mdev_state->nr_ports, &mdev_used_ports); dev_set_drvdata(&mdev->dev, mdev_state); return 0; @@ -746,12 +756,13 @@ static int mtty_probe(struct mdev_device *mdev) static void mtty_remove(struct mdev_device *mdev) { struct mdev_state *mdev_state = dev_get_drvdata(&mdev->dev); + int nr_ports = mdev_state->nr_ports; - atomic_sub(mdev_state->nr_ports, &mdev_used_ports); vfio_unregister_group_dev(&mdev_state->vdev); kfree(mdev_state->vconfig); kfree(mdev_state); + atomic_add(nr_ports, &mdev_avail_ports); } static int mtty_reset(struct mdev_state *mdev_state) @@ -1271,8 +1282,7 @@ static ssize_t available_instances_show(struct mdev_type *mtype, { unsigned int ports = mtype_get_type_group_id(mtype) + 1; - return sprintf(buf, "%d\n", - (MAX_MTTYS - atomic_read(&mdev_used_ports)) / ports); + return sprintf(buf, "%d\n", atomic_read(&mdev_avail_ports) / ports); } static MDEV_TYPE_ATTR_RO(available_instances); From 6a45ece4c9af473555f01f0f8b97eba56e3c7d0d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 28 Jun 2021 14:08:12 -0600 Subject: [PATCH 19/19] vfio/pci: Handle concurrent vma faults io_remap_pfn_range() will trigger a BUG_ON if it encounters a populated pte within the mapping range. This can occur because we map the entire vma on fault and multiple faults can be blocked behind the vma_lock. This leads to traces like the one reported below. We can use our vma_list to test whether a given vma is mapped to avoid this issue. [ 1591.733256] kernel BUG at mm/memory.c:2177! [ 1591.739515] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 1591.747381] Modules linked in: vfio_iommu_type1 vfio_pci vfio_virqfd vfio pv680_mii(O) [ 1591.760536] CPU: 2 PID: 227 Comm: lcore-worker-2 Tainted: G O 5.11.0-rc3+ #1 [ 1591.770735] Hardware name: , BIOS HixxxxFPGA 1P B600 V121-1 [ 1591.778872] pstate: 40400009 (nZcv daif +PAN -UAO -TCO BTYPE=--) [ 1591.786134] pc : remap_pfn_range+0x214/0x340 [ 1591.793564] lr : remap_pfn_range+0x1b8/0x340 [ 1591.799117] sp : ffff80001068bbd0 [ 1591.803476] x29: ffff80001068bbd0 x28: 0000042eff6f0000 [ 1591.810404] x27: 0000001100910000 x26: 0000001300910000 [ 1591.817457] x25: 0068000000000fd3 x24: ffffa92f1338e358 [ 1591.825144] x23: 0000001140000000 x22: 0000000000000041 [ 1591.832506] x21: 0000001300910000 x20: ffffa92f141a4000 [ 1591.839520] x19: 0000001100a00000 x18: 0000000000000000 [ 1591.846108] x17: 0000000000000000 x16: ffffa92f11844540 [ 1591.853570] x15: 0000000000000000 x14: 0000000000000000 [ 1591.860768] x13: fffffc0000000000 x12: 0000000000000880 [ 1591.868053] x11: ffff0821bf3d01d0 x10: ffff5ef2abd89000 [ 1591.875932] x9 : ffffa92f12ab0064 x8 : ffffa92f136471c0 [ 1591.883208] x7 : 0000001140910000 x6 : 0000000200000000 [ 1591.890177] x5 : 0000000000000001 x4 : 0000000000000001 [ 1591.896656] x3 : 0000000000000000 x2 : 0168044000000fd3 [ 1591.903215] x1 : ffff082126261880 x0 : fffffc2084989868 [ 1591.910234] Call trace: [ 1591.914837] remap_pfn_range+0x214/0x340 [ 1591.921765] vfio_pci_mmap_fault+0xac/0x130 [vfio_pci] [ 1591.931200] __do_fault+0x44/0x12c [ 1591.937031] handle_mm_fault+0xcc8/0x1230 [ 1591.942475] do_page_fault+0x16c/0x484 [ 1591.948635] do_translation_fault+0xbc/0xd8 [ 1591.954171] do_mem_abort+0x4c/0xc0 [ 1591.960316] el0_da+0x40/0x80 [ 1591.965585] el0_sync_handler+0x168/0x1b0 [ 1591.971608] el0_sync+0x174/0x180 [ 1591.978312] Code: eb1b027f 540000c0 f9400022 b4fffe02 (d4210000) Fixes: 11c4cd07ba11 ("vfio-pci: Fault mmaps to enable vma tracking") Reported-by: Zeng Tao Suggested-by: Zeng Tao Link: https://lore.kernel.org/r/162497742783.3883260.3282953006487785034.stgit@omen Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 759dfb118712..318864d52837 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1584,6 +1584,7 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct vfio_pci_device *vdev = vma->vm_private_data; + struct vfio_pci_mmap_vma *mmap_vma; vm_fault_t ret = VM_FAULT_NOPAGE; mutex_lock(&vdev->vma_lock); @@ -1591,24 +1592,36 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) if (!__vfio_pci_memory_enabled(vdev)) { ret = VM_FAULT_SIGBUS; - mutex_unlock(&vdev->vma_lock); + goto up_out; + } + + /* + * We populate the whole vma on fault, so we need to test whether + * the vma has already been mapped, such as for concurrent faults + * to the same vma. io_remap_pfn_range() will trigger a BUG_ON if + * we ask it to fill the same range again. + */ + list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) { + if (mmap_vma->vma == vma) + goto up_out; + } + + if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot)) { + ret = VM_FAULT_SIGBUS; + zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); goto up_out; } if (__vfio_pci_add_vma(vdev, vma)) { ret = VM_FAULT_OOM; - mutex_unlock(&vdev->vma_lock); - goto up_out; + zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); } - mutex_unlock(&vdev->vma_lock); - - if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end - vma->vm_start, vma->vm_page_prot)) - ret = VM_FAULT_SIGBUS; - up_out: up_read(&vdev->memory_lock); + mutex_unlock(&vdev->vma_lock); return ret; }