mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-07 14:32:23 +00:00
ptp: fix the race between the release of ptp_clock and cdev
In a case when a ptp chardev (like /dev/ptp0) is open but an underlying device is removed, closing this file leads to a race. This reproduces easily in a kvm virtual machine: ts# cat openptp0.c int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); } ts# uname -r 5.5.0-rc3-46cf053e ts# cat /proc/cmdline ... slub_debug=FZP ts# modprobe ptp_kvm ts# ./openptp0 & [1] 670 opened /dev/ptp0, sleeping 10s... ts# rmmod ptp_kvm ts# ls /dev/ptp* ls: cannot access '/dev/ptp*': No such file or directory ts# ...woken up [ 48.010809] general protection fault: 0000 [#1] SMP [ 48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25 [ 48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... [ 48.016270] RIP: 0010:module_put.part.0+0x7/0x80 [ 48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202 [ 48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0 [ 48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b [ 48.019470] ... ^^^ a slub poison [ 48.023854] Call Trace: [ 48.024050] __fput+0x21f/0x240 [ 48.024288] task_work_run+0x79/0x90 [ 48.024555] do_exit+0x2af/0xab0 [ 48.024799] ? vfs_write+0x16a/0x190 [ 48.025082] do_group_exit+0x35/0x90 [ 48.025387] __x64_sys_exit_group+0xf/0x10 [ 48.025737] do_syscall_64+0x3d/0x130 [ 48.026056] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 48.026479] RIP: 0033:0x7f53b12082f6 [ 48.026792] ... [ 48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm] [ 48.045001] Fixing recursive fault but reboot is needed! This happens in: static void __fput(struct file *file) { ... if (file->f_op->release) file->f_op->release(inode, file); <<< cdev is kfree'd here if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(mode & FMODE_PATH))) { cdev_put(inode->i_cdev); <<< cdev fields are accessed here Namely: __fput() posix_clock_release() kref_put(&clk->kref, delete_clock) <<< the last reference delete_clock() delete_ptp_clock() kfree(ptp) <<< cdev is embedded in ptp cdev_put module_put(p->owner) <<< *p is kfree'd, bang! Here cdev is embedded in posix_clock which is embedded in ptp_clock. The race happens because ptp_clock's lifetime is controlled by two refcounts: kref and cdev.kobj in posix_clock. This is wrong. Make ptp_clock's sysfs device a parent of cdev with cdev_device_add() created especially for such cases. This way the parent device with its ptp_clock is not released until all references to the cdev are released. This adds a requirement that an initialized but not exposed struct device should be provided to posix_clock_register() by a caller instead of a simple dev_t. This approach was adopted from the commit72139dfa24
("watchdog: Fix the race between the release of watchdog_core_data and cdev"). See details of the implementation in the commit233ed09d7f
("chardev: add helper function to register char devs with a struct device"). Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u Analyzed-by: Stephen Johnston <sjohnsto@redhat.com> Analyzed-by: Vern Lovejoy <vlovejoy@redhat.com> Signed-off-by: Vladis Dronov <vdronov@redhat.com> Acked-by: Richard Cochran <richardcochran@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
54fa49ee88
commit
a33121e548
@ -166,9 +166,9 @@ static struct posix_clock_operations ptp_clock_ops = {
|
|||||||
.read = ptp_read,
|
.read = ptp_read,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void delete_ptp_clock(struct posix_clock *pc)
|
static void ptp_clock_release(struct device *dev)
|
||||||
{
|
{
|
||||||
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
|
struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev);
|
||||||
|
|
||||||
mutex_destroy(&ptp->tsevq_mux);
|
mutex_destroy(&ptp->tsevq_mux);
|
||||||
mutex_destroy(&ptp->pincfg_mux);
|
mutex_destroy(&ptp->pincfg_mux);
|
||||||
@ -213,7 +213,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
|
|||||||
}
|
}
|
||||||
|
|
||||||
ptp->clock.ops = ptp_clock_ops;
|
ptp->clock.ops = ptp_clock_ops;
|
||||||
ptp->clock.release = delete_ptp_clock;
|
|
||||||
ptp->info = info;
|
ptp->info = info;
|
||||||
ptp->devid = MKDEV(major, index);
|
ptp->devid = MKDEV(major, index);
|
||||||
ptp->index = index;
|
ptp->index = index;
|
||||||
@ -236,15 +235,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
|
|||||||
if (err)
|
if (err)
|
||||||
goto no_pin_groups;
|
goto no_pin_groups;
|
||||||
|
|
||||||
/* Create a new device in our class. */
|
|
||||||
ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid,
|
|
||||||
ptp, ptp->pin_attr_groups,
|
|
||||||
"ptp%d", ptp->index);
|
|
||||||
if (IS_ERR(ptp->dev)) {
|
|
||||||
err = PTR_ERR(ptp->dev);
|
|
||||||
goto no_device;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Register a new PPS source. */
|
/* Register a new PPS source. */
|
||||||
if (info->pps) {
|
if (info->pps) {
|
||||||
struct pps_source_info pps;
|
struct pps_source_info pps;
|
||||||
@ -260,8 +250,18 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Create a posix clock. */
|
/* Initialize a new device of our class in our clock structure. */
|
||||||
err = posix_clock_register(&ptp->clock, ptp->devid);
|
device_initialize(&ptp->dev);
|
||||||
|
ptp->dev.devt = ptp->devid;
|
||||||
|
ptp->dev.class = ptp_class;
|
||||||
|
ptp->dev.parent = parent;
|
||||||
|
ptp->dev.groups = ptp->pin_attr_groups;
|
||||||
|
ptp->dev.release = ptp_clock_release;
|
||||||
|
dev_set_drvdata(&ptp->dev, ptp);
|
||||||
|
dev_set_name(&ptp->dev, "ptp%d", ptp->index);
|
||||||
|
|
||||||
|
/* Create a posix clock and link it to the device. */
|
||||||
|
err = posix_clock_register(&ptp->clock, &ptp->dev);
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("failed to create posix clock\n");
|
pr_err("failed to create posix clock\n");
|
||||||
goto no_clock;
|
goto no_clock;
|
||||||
@ -273,8 +273,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
|
|||||||
if (ptp->pps_source)
|
if (ptp->pps_source)
|
||||||
pps_unregister_source(ptp->pps_source);
|
pps_unregister_source(ptp->pps_source);
|
||||||
no_pps:
|
no_pps:
|
||||||
device_destroy(ptp_class, ptp->devid);
|
|
||||||
no_device:
|
|
||||||
ptp_cleanup_pin_groups(ptp);
|
ptp_cleanup_pin_groups(ptp);
|
||||||
no_pin_groups:
|
no_pin_groups:
|
||||||
if (ptp->kworker)
|
if (ptp->kworker)
|
||||||
@ -304,7 +302,6 @@ int ptp_clock_unregister(struct ptp_clock *ptp)
|
|||||||
if (ptp->pps_source)
|
if (ptp->pps_source)
|
||||||
pps_unregister_source(ptp->pps_source);
|
pps_unregister_source(ptp->pps_source);
|
||||||
|
|
||||||
device_destroy(ptp_class, ptp->devid);
|
|
||||||
ptp_cleanup_pin_groups(ptp);
|
ptp_cleanup_pin_groups(ptp);
|
||||||
|
|
||||||
posix_clock_unregister(&ptp->clock);
|
posix_clock_unregister(&ptp->clock);
|
||||||
|
@ -28,7 +28,7 @@ struct timestamp_event_queue {
|
|||||||
|
|
||||||
struct ptp_clock {
|
struct ptp_clock {
|
||||||
struct posix_clock clock;
|
struct posix_clock clock;
|
||||||
struct device *dev;
|
struct device dev;
|
||||||
struct ptp_clock_info *info;
|
struct ptp_clock_info *info;
|
||||||
dev_t devid;
|
dev_t devid;
|
||||||
int index; /* index into clocks.map */
|
int index; /* index into clocks.map */
|
||||||
|
@ -69,29 +69,32 @@ struct posix_clock_operations {
|
|||||||
*
|
*
|
||||||
* @ops: Functional interface to the clock
|
* @ops: Functional interface to the clock
|
||||||
* @cdev: Character device instance for this clock
|
* @cdev: Character device instance for this clock
|
||||||
* @kref: Reference count.
|
* @dev: Pointer to the clock's device.
|
||||||
* @rwsem: Protects the 'zombie' field from concurrent access.
|
* @rwsem: Protects the 'zombie' field from concurrent access.
|
||||||
* @zombie: If 'zombie' is true, then the hardware has disappeared.
|
* @zombie: If 'zombie' is true, then the hardware has disappeared.
|
||||||
* @release: A function to free the structure when the reference count reaches
|
|
||||||
* zero. May be NULL if structure is statically allocated.
|
|
||||||
*
|
*
|
||||||
* Drivers should embed their struct posix_clock within a private
|
* Drivers should embed their struct posix_clock within a private
|
||||||
* structure, obtaining a reference to it during callbacks using
|
* structure, obtaining a reference to it during callbacks using
|
||||||
* container_of().
|
* container_of().
|
||||||
|
*
|
||||||
|
* Drivers should supply an initialized but not exposed struct device
|
||||||
|
* to posix_clock_register(). It is used to manage lifetime of the
|
||||||
|
* driver's private structure. It's 'release' field should be set to
|
||||||
|
* a release function for this private structure.
|
||||||
*/
|
*/
|
||||||
struct posix_clock {
|
struct posix_clock {
|
||||||
struct posix_clock_operations ops;
|
struct posix_clock_operations ops;
|
||||||
struct cdev cdev;
|
struct cdev cdev;
|
||||||
struct kref kref;
|
struct device *dev;
|
||||||
struct rw_semaphore rwsem;
|
struct rw_semaphore rwsem;
|
||||||
bool zombie;
|
bool zombie;
|
||||||
void (*release)(struct posix_clock *clk);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* posix_clock_register() - register a new clock
|
* posix_clock_register() - register a new clock
|
||||||
* @clk: Pointer to the clock. Caller must provide 'ops' and 'release'
|
* @clk: Pointer to the clock. Caller must provide 'ops' field
|
||||||
* @devid: Allocated device id
|
* @dev: Pointer to the initialized device. Caller must provide
|
||||||
|
* 'release' field
|
||||||
*
|
*
|
||||||
* A clock driver calls this function to register itself with the
|
* A clock driver calls this function to register itself with the
|
||||||
* clock device subsystem. If 'clk' points to dynamically allocated
|
* clock device subsystem. If 'clk' points to dynamically allocated
|
||||||
@ -100,7 +103,7 @@ struct posix_clock {
|
|||||||
*
|
*
|
||||||
* Returns zero on success, non-zero otherwise.
|
* Returns zero on success, non-zero otherwise.
|
||||||
*/
|
*/
|
||||||
int posix_clock_register(struct posix_clock *clk, dev_t devid);
|
int posix_clock_register(struct posix_clock *clk, struct device *dev);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* posix_clock_unregister() - unregister a clock
|
* posix_clock_unregister() - unregister a clock
|
||||||
|
@ -14,8 +14,6 @@
|
|||||||
|
|
||||||
#include "posix-timers.h"
|
#include "posix-timers.h"
|
||||||
|
|
||||||
static void delete_clock(struct kref *kref);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
|
* Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
|
||||||
*/
|
*/
|
||||||
@ -125,7 +123,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp)
|
|||||||
err = 0;
|
err = 0;
|
||||||
|
|
||||||
if (!err) {
|
if (!err) {
|
||||||
kref_get(&clk->kref);
|
get_device(clk->dev);
|
||||||
fp->private_data = clk;
|
fp->private_data = clk;
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
@ -141,7 +139,7 @@ static int posix_clock_release(struct inode *inode, struct file *fp)
|
|||||||
if (clk->ops.release)
|
if (clk->ops.release)
|
||||||
err = clk->ops.release(clk);
|
err = clk->ops.release(clk);
|
||||||
|
|
||||||
kref_put(&clk->kref, delete_clock);
|
put_device(clk->dev);
|
||||||
|
|
||||||
fp->private_data = NULL;
|
fp->private_data = NULL;
|
||||||
|
|
||||||
@ -161,38 +159,35 @@ static const struct file_operations posix_clock_file_operations = {
|
|||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
int posix_clock_register(struct posix_clock *clk, dev_t devid)
|
int posix_clock_register(struct posix_clock *clk, struct device *dev)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
kref_init(&clk->kref);
|
|
||||||
init_rwsem(&clk->rwsem);
|
init_rwsem(&clk->rwsem);
|
||||||
|
|
||||||
cdev_init(&clk->cdev, &posix_clock_file_operations);
|
cdev_init(&clk->cdev, &posix_clock_file_operations);
|
||||||
|
err = cdev_device_add(&clk->cdev, dev);
|
||||||
|
if (err) {
|
||||||
|
pr_err("%s unable to add device %d:%d\n",
|
||||||
|
dev_name(dev), MAJOR(dev->devt), MINOR(dev->devt));
|
||||||
|
return err;
|
||||||
|
}
|
||||||
clk->cdev.owner = clk->ops.owner;
|
clk->cdev.owner = clk->ops.owner;
|
||||||
err = cdev_add(&clk->cdev, devid, 1);
|
clk->dev = dev;
|
||||||
|
|
||||||
return err;
|
return 0;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(posix_clock_register);
|
EXPORT_SYMBOL_GPL(posix_clock_register);
|
||||||
|
|
||||||
static void delete_clock(struct kref *kref)
|
|
||||||
{
|
|
||||||
struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
|
|
||||||
|
|
||||||
if (clk->release)
|
|
||||||
clk->release(clk);
|
|
||||||
}
|
|
||||||
|
|
||||||
void posix_clock_unregister(struct posix_clock *clk)
|
void posix_clock_unregister(struct posix_clock *clk)
|
||||||
{
|
{
|
||||||
cdev_del(&clk->cdev);
|
cdev_device_del(&clk->cdev, clk->dev);
|
||||||
|
|
||||||
down_write(&clk->rwsem);
|
down_write(&clk->rwsem);
|
||||||
clk->zombie = true;
|
clk->zombie = true;
|
||||||
up_write(&clk->rwsem);
|
up_write(&clk->rwsem);
|
||||||
|
|
||||||
kref_put(&clk->kref, delete_clock);
|
put_device(clk->dev);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(posix_clock_unregister);
|
EXPORT_SYMBOL_GPL(posix_clock_unregister);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user