Nick Piggin b31ca3f5df sysfs: fix deadlock
On Thu, Sep 11, 2008 at 10:27:10AM +0200, Ingo Molnar wrote:

> and it's working fine on most boxes. One testbox found this new locking
> scenario:
>
> PM: Adding info for No Bus:vcsa7
> EDAC DEBUG: MC0: i82860_check()
>
> =======================================================
> [ INFO: possible circular locking dependency detected ]
> 2.6.27-rc6-tip #1
> -------------------------------------------------------
> X/4873 is trying to acquire lock:
>  (&bb->mutex){--..}, at: [<c020ba20>] mmap+0x40/0xa0
>
> but task is already holding lock:
>  (&mm->mmap_sem){----}, at: [<c0125a1e>] sys_mmap2+0x8e/0xc0
>
> which lock already depends on the new lock.
>
>
> the existing dependency chain (in reverse order) is:
>
> -> #1 (&mm->mmap_sem){----}:
>        [<c017dc96>] validate_chain+0xa96/0xf50
>        [<c017ef2b>] __lock_acquire+0x2cb/0x5b0
>        [<c017f299>] lock_acquire+0x89/0xc0
>        [<c01aa8fb>] might_fault+0x6b/0x90
>        [<c040b618>] copy_to_user+0x38/0x60
>        [<c020bcfb>] read+0xfb/0x170
>        [<c01c09a5>] vfs_read+0x95/0x110
>        [<c01c1443>] sys_pread64+0x63/0x80
>        [<c012146f>] sysenter_do_call+0x12/0x43
>        [<ffffffff>] 0xffffffff
>
> -> #0 (&bb->mutex){--..}:
>        [<c017d8b7>] validate_chain+0x6b7/0xf50
>        [<c017ef2b>] __lock_acquire+0x2cb/0x5b0
>        [<c017f299>] lock_acquire+0x89/0xc0
>        [<c0d6f2ab>] __mutex_lock_common+0xab/0x3c0
>        [<c0d6f698>] mutex_lock_nested+0x38/0x50
>        [<c020ba20>] mmap+0x40/0xa0
>        [<c01b111e>] mmap_region+0x14e/0x450
>        [<c01b170f>] do_mmap_pgoff+0x2ef/0x310
>        [<c0125a3d>] sys_mmap2+0xad/0xc0
>        [<c012146f>] sysenter_do_call+0x12/0x43
>        [<ffffffff>] 0xffffffff
>
> other info that might help us debug this:
>
> 1 lock held by X/4873:
>  #0:  (&mm->mmap_sem){----}, at: [<c0125a1e>] sys_mmap2+0x8e/0xc0
>
> stack backtrace:
> Pid: 4873, comm: X Not tainted 2.6.27-rc6-tip #1
>  [<c017cd09>] print_circular_bug_tail+0x79/0xc0
>  [<c017d8b7>] validate_chain+0x6b7/0xf50
>  [<c017a5b5>] ? trace_hardirqs_off_caller+0x15/0xb0
>  [<c017ef2b>] __lock_acquire+0x2cb/0x5b0
>  [<c017f299>] lock_acquire+0x89/0xc0
>  [<c020ba20>] ? mmap+0x40/0xa0
>  [<c0d6f2ab>] __mutex_lock_common+0xab/0x3c0
>  [<c020ba20>] ? mmap+0x40/0xa0
>  [<c0d6f698>] mutex_lock_nested+0x38/0x50
>  [<c020ba20>] ? mmap+0x40/0xa0
>  [<c020ba20>] mmap+0x40/0xa0
>  [<c01b111e>] mmap_region+0x14e/0x450
>  [<c01afb88>] ? arch_get_unmapped_area_topdown+0xf8/0x160
>  [<c01b170f>] do_mmap_pgoff+0x2ef/0x310
>  [<c0125a3d>] sys_mmap2+0xad/0xc0
>  [<c012146f>] sysenter_do_call+0x12/0x43
>  [<c0120000>] ? __switch_to+0x130/0x220
>  =======================
> evbug.c: Event. Dev: input3, Type: 20, Code: 0, Value: 500
> warning: `sudo' uses deprecated v2 capabilities in a way that may be insecure.
>
> i've attached the config.
>
> at first sight it looks like a genuine bug in fs/sysfs/bin.c?

Yes, it is a real bug by the looks. bin.c takes bb->mutex under mmap_sem
when it is mmapped, and then does its copy_*_user under bb->mutex too.

Here is a basic fix for the sysfs lor.


From: Nick Piggin <npiggin@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2008-10-16 09:24:50 -07:00

280 lines
5.9 KiB
C

/*
* fs/sysfs/bin.c - sysfs binary file implementation
*
* Copyright (c) 2003 Patrick Mochel
* Copyright (c) 2003 Matthew Wilcox
* Copyright (c) 2004 Silicon Graphics, Inc.
* Copyright (c) 2007 SUSE Linux Products GmbH
* Copyright (c) 2007 Tejun Heo <teheo@suse.de>
*
* This file is released under the GPLv2.
*
* Please see Documentation/filesystems/sysfs.txt for more information.
*/
#undef DEBUG
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/kobject.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/mutex.h>
#include <asm/uaccess.h>
#include "sysfs.h"
struct bin_buffer {
struct mutex mutex;
void *buffer;
int mmapped;
};
static int
fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
{
struct sysfs_dirent *attr_sd = dentry->d_fsdata;
struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
int rc;
/* need attr_sd for attr, its parent for kobj */
if (!sysfs_get_active_two(attr_sd))
return -ENODEV;
rc = -EIO;
if (attr->read)
rc = attr->read(kobj, attr, buffer, off, count);
sysfs_put_active_two(attr_sd);
return rc;
}
static ssize_t
read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
{
struct bin_buffer *bb = file->private_data;
struct dentry *dentry = file->f_path.dentry;
int size = dentry->d_inode->i_size;
loff_t offs = *off;
int count = min_t(size_t, bytes, PAGE_SIZE);
char *temp;
if (size) {
if (offs > size)
return 0;
if (offs + count > size)
count = size - offs;
}
temp = kmalloc(count, GFP_KERNEL);
if (!temp)
return -ENOMEM;
mutex_lock(&bb->mutex);
count = fill_read(dentry, bb->buffer, offs, count);
if (count < 0) {
mutex_unlock(&bb->mutex);
goto out_free;
}
memcpy(temp, bb->buffer, count);
mutex_unlock(&bb->mutex);
if (copy_to_user(userbuf, temp, count)) {
count = -EFAULT;
goto out_free;
}
pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
*off = offs + count;
out_free:
kfree(temp);
return count;
}
static int
flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
{
struct sysfs_dirent *attr_sd = dentry->d_fsdata;
struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
int rc;
/* need attr_sd for attr, its parent for kobj */
if (!sysfs_get_active_two(attr_sd))
return -ENODEV;
rc = -EIO;
if (attr->write)
rc = attr->write(kobj, attr, buffer, offset, count);
sysfs_put_active_two(attr_sd);
return rc;
}
static ssize_t write(struct file *file, const char __user *userbuf,
size_t bytes, loff_t *off)
{
struct bin_buffer *bb = file->private_data;
struct dentry *dentry = file->f_path.dentry;
int size = dentry->d_inode->i_size;
loff_t offs = *off;
int count = min_t(size_t, bytes, PAGE_SIZE);
char *temp;
if (size) {
if (offs > size)
return 0;
if (offs + count > size)
count = size - offs;
}
temp = kmalloc(count, GFP_KERNEL);
if (!temp)
return -ENOMEM;
if (copy_from_user(temp, userbuf, count)) {
count = -EFAULT;
goto out_free;
}
mutex_lock(&bb->mutex);
memcpy(bb->buffer, temp, count);
count = flush_write(dentry, bb->buffer, offs, count);
mutex_unlock(&bb->mutex);
if (count > 0)
*off = offs + count;
out_free:
kfree(temp);
return count;
}
static int mmap(struct file *file, struct vm_area_struct *vma)
{
struct bin_buffer *bb = file->private_data;
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
int rc;
mutex_lock(&bb->mutex);
/* need attr_sd for attr, its parent for kobj */
if (!sysfs_get_active_two(attr_sd))
return -ENODEV;
rc = -EINVAL;
if (attr->mmap)
rc = attr->mmap(kobj, attr, vma);
if (rc == 0 && !bb->mmapped)
bb->mmapped = 1;
else
sysfs_put_active_two(attr_sd);
mutex_unlock(&bb->mutex);
return rc;
}
static int open(struct inode * inode, struct file * file)
{
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
struct bin_buffer *bb = NULL;
int error;
/* binary file operations requires both @sd and its parent */
if (!sysfs_get_active_two(attr_sd))
return -ENODEV;
error = -EACCES;
if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap))
goto err_out;
if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap))
goto err_out;
error = -ENOMEM;
bb = kzalloc(sizeof(*bb), GFP_KERNEL);
if (!bb)
goto err_out;
bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!bb->buffer)
goto err_out;
mutex_init(&bb->mutex);
file->private_data = bb;
/* open succeeded, put active references */
sysfs_put_active_two(attr_sd);
return 0;
err_out:
sysfs_put_active_two(attr_sd);
kfree(bb);
return error;
}
static int release(struct inode * inode, struct file * file)
{
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
struct bin_buffer *bb = file->private_data;
if (bb->mmapped)
sysfs_put_active_two(attr_sd);
kfree(bb->buffer);
kfree(bb);
return 0;
}
const struct file_operations bin_fops = {
.read = read,
.write = write,
.mmap = mmap,
.llseek = generic_file_llseek,
.open = open,
.release = release,
};
/**
* sysfs_create_bin_file - create binary file for object.
* @kobj: object.
* @attr: attribute descriptor.
*/
int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
{
BUG_ON(!kobj || !kobj->sd || !attr);
return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
}
/**
* sysfs_remove_bin_file - remove binary file for object.
* @kobj: object.
* @attr: attribute descriptor.
*/
void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
{
sysfs_hash_and_remove(kobj->sd, attr->attr.name);
}
EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);