2005-04-16 15:20:36 -07:00
|
|
|
/*
|
2011-07-16 16:45:13 +08:00
|
|
|
* inode.c - part of debugfs, a tiny little debug file system
|
2005-04-16 15:20:36 -07:00
|
|
|
*
|
|
|
|
* Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com>
|
|
|
|
* Copyright (C) 2004 IBM Inc.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License version
|
|
|
|
* 2 as published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* debugfs is for people to use instead of /proc or /sys.
|
|
|
|
* See Documentation/DocBook/kernel-api for more details.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/mount.h>
|
|
|
|
#include <linux/pagemap.h>
|
|
|
|
#include <linux/init.h>
|
2006-11-25 11:09:26 -08:00
|
|
|
#include <linux/kobject.h>
|
2005-04-16 15:20:36 -07:00
|
|
|
#include <linux/namei.h>
|
|
|
|
#include <linux/debugfs.h>
|
2006-11-24 13:45:37 -05:00
|
|
|
#include <linux/fsnotify.h>
|
2007-02-13 12:13:54 +01:00
|
|
|
#include <linux/string.h>
|
2012-01-25 11:52:28 +01:00
|
|
|
#include <linux/seq_file.h>
|
|
|
|
#include <linux/parser.h>
|
2008-10-07 14:00:12 -04:00
|
|
|
#include <linux/magic.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 17:04:11 +09:00
|
|
|
#include <linux/slab.h>
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2012-08-27 13:32:15 -07:00
|
|
|
#define DEBUGFS_DEFAULT_MODE 0700
|
2012-01-25 11:52:28 +01:00
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
static struct vfsmount *debugfs_mount;
|
|
|
|
static int debugfs_mount_count;
|
2009-03-22 23:10:44 +01:00
|
|
|
static bool debugfs_registered;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2011-07-24 04:33:43 -04:00
|
|
|
static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev_t dev,
|
2009-11-17 14:40:26 -08:00
|
|
|
void *data, const struct file_operations *fops)
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
struct inode *inode = new_inode(sb);
|
|
|
|
|
|
|
|
if (inode) {
|
2010-10-23 11:19:54 -04:00
|
|
|
inode->i_ino = get_next_ino();
|
2005-04-16 15:20:36 -07:00
|
|
|
inode->i_mode = mode;
|
|
|
|
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
|
|
|
|
switch (mode & S_IFMT) {
|
|
|
|
default:
|
|
|
|
init_special_inode(inode, mode, dev);
|
|
|
|
break;
|
|
|
|
case S_IFREG:
|
2009-11-17 14:40:26 -08:00
|
|
|
inode->i_fop = fops ? fops : &debugfs_file_operations;
|
|
|
|
inode->i_private = data;
|
2005-04-16 15:20:36 -07:00
|
|
|
break;
|
2007-02-13 12:13:54 +01:00
|
|
|
case S_IFLNK:
|
|
|
|
inode->i_op = &debugfs_link_operations;
|
2009-11-17 14:40:26 -08:00
|
|
|
inode->i_private = data;
|
2007-02-13 12:13:54 +01:00
|
|
|
break;
|
2005-04-16 15:20:36 -07:00
|
|
|
case S_IFDIR:
|
|
|
|
inode->i_op = &simple_dir_inode_operations;
|
2012-06-09 20:40:20 -04:00
|
|
|
inode->i_fop = &simple_dir_operations;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2006-11-24 13:46:30 -05:00
|
|
|
/* directory inodes start off with i_nlink == 2
|
|
|
|
* (for "." entry) */
|
2006-09-30 23:29:04 -07:00
|
|
|
inc_nlink(inode);
|
2005-04-16 15:20:36 -07:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2014-06-06 23:12:04 +05:30
|
|
|
return inode;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/* SMP-safe */
|
|
|
|
static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
|
2011-07-24 04:33:43 -04:00
|
|
|
umode_t mode, dev_t dev, void *data,
|
2009-11-17 14:40:26 -08:00
|
|
|
const struct file_operations *fops)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
2006-06-08 10:26:39 +02:00
|
|
|
struct inode *inode;
|
2005-04-16 15:20:36 -07:00
|
|
|
int error = -EPERM;
|
|
|
|
|
|
|
|
if (dentry->d_inode)
|
|
|
|
return -EEXIST;
|
|
|
|
|
2009-11-17 14:40:26 -08:00
|
|
|
inode = debugfs_get_inode(dir->i_sb, mode, dev, data, fops);
|
2005-04-16 15:20:36 -07:00
|
|
|
if (inode) {
|
|
|
|
d_instantiate(dentry, inode);
|
|
|
|
dget(dentry);
|
|
|
|
error = 0;
|
|
|
|
}
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2015-01-25 13:50:23 -05:00
|
|
|
static int debugfs_mkdir(struct dentry *dentry, umode_t mode)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
2015-01-25 13:50:23 -05:00
|
|
|
struct inode *dir = dentry->d_parent->d_inode;
|
2005-04-16 15:20:36 -07:00
|
|
|
int res;
|
|
|
|
|
|
|
|
mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR;
|
2012-06-09 20:40:20 -04:00
|
|
|
res = debugfs_mknod(dir, dentry, mode, 0, NULL, NULL);
|
2006-11-24 13:45:37 -05:00
|
|
|
if (!res) {
|
2006-09-30 23:29:04 -07:00
|
|
|
inc_nlink(dir);
|
2006-11-24 13:45:37 -05:00
|
|
|
fsnotify_mkdir(dir, dentry);
|
|
|
|
}
|
2005-04-16 15:20:36 -07:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2015-01-25 13:50:23 -05:00
|
|
|
static int debugfs_link(struct dentry *dentry, umode_t mode,
|
2012-06-09 20:40:20 -04:00
|
|
|
void *data)
|
2007-02-13 12:13:54 +01:00
|
|
|
{
|
2015-01-25 13:50:23 -05:00
|
|
|
struct inode *dir = dentry->d_parent->d_inode;
|
2007-02-13 12:13:54 +01:00
|
|
|
mode = (mode & S_IALLUGO) | S_IFLNK;
|
2012-06-09 20:40:20 -04:00
|
|
|
return debugfs_mknod(dir, dentry, mode, 0, data, NULL);
|
2007-02-13 12:13:54 +01:00
|
|
|
}
|
|
|
|
|
2015-01-25 13:50:23 -05:00
|
|
|
static int debugfs_create(struct dentry *dentry, umode_t mode,
|
2009-11-17 14:40:26 -08:00
|
|
|
void *data, const struct file_operations *fops)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
2015-01-25 13:50:23 -05:00
|
|
|
struct inode *dir = dentry->d_parent->d_inode;
|
2006-11-24 13:45:37 -05:00
|
|
|
int res;
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
mode = (mode & S_IALLUGO) | S_IFREG;
|
2009-11-17 14:40:26 -08:00
|
|
|
res = debugfs_mknod(dir, dentry, mode, 0, data, fops);
|
2006-11-24 13:45:37 -05:00
|
|
|
if (!res)
|
|
|
|
fsnotify_create(dir, dentry);
|
|
|
|
return res;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline int debugfs_positive(struct dentry *dentry)
|
|
|
|
{
|
|
|
|
return dentry->d_inode && !d_unhashed(dentry);
|
|
|
|
}
|
|
|
|
|
2012-01-25 11:52:28 +01:00
|
|
|
struct debugfs_mount_opts {
|
2012-04-03 14:01:31 -07:00
|
|
|
kuid_t uid;
|
|
|
|
kgid_t gid;
|
2012-01-25 11:52:28 +01:00
|
|
|
umode_t mode;
|
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
|
|
|
Opt_uid,
|
|
|
|
Opt_gid,
|
|
|
|
Opt_mode,
|
|
|
|
Opt_err
|
|
|
|
};
|
|
|
|
|
|
|
|
static const match_table_t tokens = {
|
|
|
|
{Opt_uid, "uid=%u"},
|
|
|
|
{Opt_gid, "gid=%u"},
|
|
|
|
{Opt_mode, "mode=%o"},
|
|
|
|
{Opt_err, NULL}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct debugfs_fs_info {
|
|
|
|
struct debugfs_mount_opts mount_opts;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
|
|
|
|
{
|
|
|
|
substring_t args[MAX_OPT_ARGS];
|
|
|
|
int option;
|
|
|
|
int token;
|
2012-04-03 14:01:31 -07:00
|
|
|
kuid_t uid;
|
|
|
|
kgid_t gid;
|
2012-01-25 11:52:28 +01:00
|
|
|
char *p;
|
|
|
|
|
|
|
|
opts->mode = DEBUGFS_DEFAULT_MODE;
|
|
|
|
|
|
|
|
while ((p = strsep(&data, ",")) != NULL) {
|
|
|
|
if (!*p)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
token = match_token(p, tokens, args);
|
|
|
|
switch (token) {
|
|
|
|
case Opt_uid:
|
|
|
|
if (match_int(&args[0], &option))
|
|
|
|
return -EINVAL;
|
2012-04-03 14:01:31 -07:00
|
|
|
uid = make_kuid(current_user_ns(), option);
|
|
|
|
if (!uid_valid(uid))
|
|
|
|
return -EINVAL;
|
|
|
|
opts->uid = uid;
|
2012-01-25 11:52:28 +01:00
|
|
|
break;
|
|
|
|
case Opt_gid:
|
2013-01-02 08:54:37 -05:00
|
|
|
if (match_int(&args[0], &option))
|
2012-01-25 11:52:28 +01:00
|
|
|
return -EINVAL;
|
2012-04-03 14:01:31 -07:00
|
|
|
gid = make_kgid(current_user_ns(), option);
|
|
|
|
if (!gid_valid(gid))
|
|
|
|
return -EINVAL;
|
|
|
|
opts->gid = gid;
|
2012-01-25 11:52:28 +01:00
|
|
|
break;
|
|
|
|
case Opt_mode:
|
|
|
|
if (match_octal(&args[0], &option))
|
|
|
|
return -EINVAL;
|
|
|
|
opts->mode = option & S_IALLUGO;
|
|
|
|
break;
|
|
|
|
/*
|
|
|
|
* We might like to report bad mount options here;
|
|
|
|
* but traditionally debugfs has ignored all mount options
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int debugfs_apply_options(struct super_block *sb)
|
|
|
|
{
|
|
|
|
struct debugfs_fs_info *fsi = sb->s_fs_info;
|
|
|
|
struct inode *inode = sb->s_root->d_inode;
|
|
|
|
struct debugfs_mount_opts *opts = &fsi->mount_opts;
|
|
|
|
|
|
|
|
inode->i_mode &= ~S_IALLUGO;
|
|
|
|
inode->i_mode |= opts->mode;
|
|
|
|
|
|
|
|
inode->i_uid = opts->uid;
|
|
|
|
inode->i_gid = opts->gid;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int debugfs_remount(struct super_block *sb, int *flags, char *data)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
struct debugfs_fs_info *fsi = sb->s_fs_info;
|
|
|
|
|
2014-03-13 10:14:33 -04:00
|
|
|
sync_filesystem(sb);
|
2012-01-25 11:52:28 +01:00
|
|
|
err = debugfs_parse_options(data, &fsi->mount_opts);
|
|
|
|
if (err)
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
debugfs_apply_options(sb);
|
|
|
|
|
|
|
|
fail:
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int debugfs_show_options(struct seq_file *m, struct dentry *root)
|
|
|
|
{
|
|
|
|
struct debugfs_fs_info *fsi = root->d_sb->s_fs_info;
|
|
|
|
struct debugfs_mount_opts *opts = &fsi->mount_opts;
|
|
|
|
|
2012-04-03 14:01:31 -07:00
|
|
|
if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
|
|
|
|
seq_printf(m, ",uid=%u",
|
|
|
|
from_kuid_munged(&init_user_ns, opts->uid));
|
|
|
|
if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
|
|
|
|
seq_printf(m, ",gid=%u",
|
|
|
|
from_kgid_munged(&init_user_ns, opts->gid));
|
2012-01-25 11:52:28 +01:00
|
|
|
if (opts->mode != DEBUGFS_DEFAULT_MODE)
|
|
|
|
seq_printf(m, ",mode=%o", opts->mode);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct super_operations debugfs_super_operations = {
|
|
|
|
.statfs = simple_statfs,
|
|
|
|
.remount_fs = debugfs_remount,
|
|
|
|
.show_options = debugfs_show_options,
|
|
|
|
};
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
static int debug_fill_super(struct super_block *sb, void *data, int silent)
|
|
|
|
{
|
|
|
|
static struct tree_descr debug_files[] = {{""}};
|
2012-01-25 11:52:28 +01:00
|
|
|
struct debugfs_fs_info *fsi;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
save_mount_options(sb, data);
|
|
|
|
|
|
|
|
fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL);
|
|
|
|
sb->s_fs_info = fsi;
|
|
|
|
if (!fsi) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = debugfs_parse_options(data, &fsi->mount_opts);
|
|
|
|
if (err)
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
err = simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
|
|
|
|
if (err)
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
sb->s_op = &debugfs_super_operations;
|
|
|
|
|
|
|
|
debugfs_apply_options(sb);
|
|
|
|
|
|
|
|
return 0;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2012-01-25 11:52:28 +01:00
|
|
|
fail:
|
|
|
|
kfree(fsi);
|
|
|
|
sb->s_fs_info = NULL;
|
|
|
|
return err;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2010-07-25 01:48:30 +04:00
|
|
|
static struct dentry *debug_mount(struct file_system_type *fs_type,
|
[PATCH] VFS: Permit filesystem to override root dentry on mount
Extend the get_sb() filesystem operation to take an extra argument that
permits the VFS to pass in the target vfsmount that defines the mountpoint.
The filesystem is then required to manually set the superblock and root dentry
pointers. For most filesystems, this should be done with simple_set_mnt()
which will set the superblock pointer and then set the root dentry to the
superblock's s_root (as per the old default behaviour).
The get_sb() op now returns an integer as there's now no need to return the
superblock pointer.
This patch permits a superblock to be implicitly shared amongst several mount
points, such as can be done with NFS to avoid potential inode aliasing. In
such a case, simple_set_mnt() would not be called, and instead the mnt_root
and mnt_sb would be set directly.
The patch also makes the following changes:
(*) the get_sb_*() convenience functions in the core kernel now take a vfsmount
pointer argument and return an integer, so most filesystems have to change
very little.
(*) If one of the convenience function is not used, then get_sb() should
normally call simple_set_mnt() to instantiate the vfsmount. This will
always return 0, and so can be tail-called from get_sb().
(*) generic_shutdown_super() now calls shrink_dcache_sb() to clean up the
dcache upon superblock destruction rather than shrink_dcache_anon().
This is required because the superblock may now have multiple trees that
aren't actually bound to s_root, but that still need to be cleaned up. The
currently called functions assume that the whole tree is rooted at s_root,
and that anonymous dentries are not the roots of trees which results in
dentries being left unculled.
However, with the way NFS superblock sharing are currently set to be
implemented, these assumptions are violated: the root of the filesystem is
simply a dummy dentry and inode (the real inode for '/' may well be
inaccessible), and all the vfsmounts are rooted on anonymous[*] dentries
with child trees.
[*] Anonymous until discovered from another tree.
(*) The documentation has been adjusted, including the additional bit of
changing ext2_* into foo_* in the documentation.
[akpm@osdl.org: convert ipath_fs, do other stuff]
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Nathan Scott <nathans@sgi.com>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-23 02:02:57 -07:00
|
|
|
int flags, const char *dev_name,
|
2010-07-25 01:48:30 +04:00
|
|
|
void *data)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
2010-07-25 01:48:30 +04:00
|
|
|
return mount_single(fs_type, flags, data, debug_fill_super);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct file_system_type debug_fs_type = {
|
|
|
|
.owner = THIS_MODULE,
|
|
|
|
.name = "debugfs",
|
2010-07-25 01:48:30 +04:00
|
|
|
.mount = debug_mount,
|
2005-04-16 15:20:36 -07:00
|
|
|
.kill_sb = kill_litter_super,
|
|
|
|
};
|
2013-03-02 19:39:14 -08:00
|
|
|
MODULE_ALIAS_FS("debugfs");
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2012-08-09 15:40:39 -07:00
|
|
|
static struct dentry *__create_file(const char *name, umode_t mode,
|
|
|
|
struct dentry *parent, void *data,
|
|
|
|
const struct file_operations *fops)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
2012-06-09 20:33:28 -04:00
|
|
|
struct dentry *dentry = NULL;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
pr_debug("debugfs: creating file '%s'\n",name);
|
|
|
|
|
|
|
|
error = simple_pin_fs(&debug_fs_type, &debugfs_mount,
|
|
|
|
&debugfs_mount_count);
|
|
|
|
if (error)
|
|
|
|
goto exit;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
/* If the parent is not specified, we create it in the root.
|
2014-06-06 23:12:04 +05:30
|
|
|
* We need the root dentry to do this, which is in the super
|
2005-04-16 15:20:36 -07:00
|
|
|
* block. A pointer to that is in the struct vfsmount that we
|
|
|
|
* have around.
|
|
|
|
*/
|
2010-01-25 04:50:43 -05:00
|
|
|
if (!parent)
|
2011-12-07 18:21:57 -05:00
|
|
|
parent = debugfs_mount->mnt_root;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2006-01-09 15:59:24 -08:00
|
|
|
mutex_lock(&parent->d_inode->i_mutex);
|
2012-06-09 20:33:28 -04:00
|
|
|
dentry = lookup_one_len(name, parent, strlen(name));
|
|
|
|
if (!IS_ERR(dentry)) {
|
2007-02-13 12:13:54 +01:00
|
|
|
switch (mode & S_IFMT) {
|
|
|
|
case S_IFDIR:
|
2015-01-25 13:50:23 -05:00
|
|
|
error = debugfs_mkdir(dentry, mode);
|
2014-06-06 23:12:04 +05:30
|
|
|
|
2007-02-13 12:13:54 +01:00
|
|
|
break;
|
|
|
|
case S_IFLNK:
|
2015-01-25 13:50:23 -05:00
|
|
|
error = debugfs_link(dentry, mode, data);
|
2007-02-13 12:13:54 +01:00
|
|
|
break;
|
|
|
|
default:
|
2015-01-25 13:50:23 -05:00
|
|
|
error = debugfs_create(dentry, mode, data, fops);
|
2007-02-13 12:13:54 +01:00
|
|
|
break;
|
|
|
|
}
|
2012-06-09 20:33:28 -04:00
|
|
|
dput(dentry);
|
2005-04-16 15:20:36 -07:00
|
|
|
} else
|
2012-06-09 20:33:28 -04:00
|
|
|
error = PTR_ERR(dentry);
|
2006-01-09 15:59:24 -08:00
|
|
|
mutex_unlock(&parent->d_inode->i_mutex);
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2012-06-09 20:28:22 -04:00
|
|
|
if (error) {
|
|
|
|
dentry = NULL;
|
|
|
|
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
|
|
|
|
}
|
|
|
|
exit:
|
|
|
|
return dentry;
|
|
|
|
}
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
/**
|
|
|
|
* debugfs_create_file - create a file in the debugfs filesystem
|
|
|
|
* @name: a pointer to a string containing the name of the file to create.
|
2009-10-31 18:26:52 -03:00
|
|
|
* @mode: the permission that the file should have.
|
2005-04-16 15:20:36 -07:00
|
|
|
* @parent: a pointer to the parent dentry for this file. This should be a
|
2014-02-18 22:54:36 +09:00
|
|
|
* directory dentry if set. If this parameter is NULL, then the
|
2005-04-16 15:20:36 -07:00
|
|
|
* file will be created in the root of the debugfs filesystem.
|
|
|
|
* @data: a pointer to something that the caller will want to get to later
|
2006-09-27 01:50:46 -07:00
|
|
|
* on. The inode.i_private pointer will point to this value on
|
2005-04-16 15:20:36 -07:00
|
|
|
* the open() call.
|
|
|
|
* @fops: a pointer to a struct file_operations that should be used for
|
|
|
|
* this file.
|
|
|
|
*
|
|
|
|
* This is the basic "create a file" function for debugfs. It allows for a
|
2009-10-31 18:26:52 -03:00
|
|
|
* wide range of flexibility in creating a file, or a directory (if you want
|
|
|
|
* to create a directory, the debugfs_create_dir() function is
|
2005-04-16 15:20:36 -07:00
|
|
|
* recommended to be used instead.)
|
|
|
|
*
|
|
|
|
* This function will return a pointer to a dentry if it succeeds. This
|
|
|
|
* pointer must be passed to the debugfs_remove() function when the file is
|
|
|
|
* to be removed (no automatic cleanup happens if your module is unloaded,
|
2006-07-20 08:16:42 -07:00
|
|
|
* you are responsible here.) If an error occurs, %NULL will be returned.
|
2005-04-16 15:20:36 -07:00
|
|
|
*
|
2006-07-20 08:16:42 -07:00
|
|
|
* If debugfs is not enabled in the kernel, the value -%ENODEV will be
|
2007-02-14 07:57:47 +01:00
|
|
|
* returned.
|
2005-04-16 15:20:36 -07:00
|
|
|
*/
|
2011-07-24 04:33:43 -04:00
|
|
|
struct dentry *debugfs_create_file(const char *name, umode_t mode,
|
2005-04-16 15:20:36 -07:00
|
|
|
struct dentry *parent, void *data,
|
2006-03-28 01:56:41 -08:00
|
|
|
const struct file_operations *fops)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
2012-06-09 20:28:22 -04:00
|
|
|
switch (mode & S_IFMT) {
|
|
|
|
case S_IFREG:
|
|
|
|
case 0:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
BUG();
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
2012-06-09 20:28:22 -04:00
|
|
|
|
|
|
|
return __create_file(name, mode, parent, data, fops);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(debugfs_create_file);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* debugfs_create_dir - create a directory in the debugfs filesystem
|
|
|
|
* @name: a pointer to a string containing the name of the directory to
|
|
|
|
* create.
|
|
|
|
* @parent: a pointer to the parent dentry for this file. This should be a
|
2014-02-18 22:54:36 +09:00
|
|
|
* directory dentry if set. If this parameter is NULL, then the
|
2005-04-16 15:20:36 -07:00
|
|
|
* directory will be created in the root of the debugfs filesystem.
|
|
|
|
*
|
|
|
|
* This function creates a directory in debugfs with the given name.
|
|
|
|
*
|
|
|
|
* This function will return a pointer to a dentry if it succeeds. This
|
|
|
|
* pointer must be passed to the debugfs_remove() function when the file is
|
|
|
|
* to be removed (no automatic cleanup happens if your module is unloaded,
|
2006-07-20 08:16:42 -07:00
|
|
|
* you are responsible here.) If an error occurs, %NULL will be returned.
|
2005-04-16 15:20:36 -07:00
|
|
|
*
|
2006-07-20 08:16:42 -07:00
|
|
|
* If debugfs is not enabled in the kernel, the value -%ENODEV will be
|
2007-02-14 07:57:47 +01:00
|
|
|
* returned.
|
2005-04-16 15:20:36 -07:00
|
|
|
*/
|
|
|
|
struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
|
|
|
|
{
|
2012-06-09 20:28:22 -04:00
|
|
|
return __create_file(name, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
|
2005-04-16 15:20:36 -07:00
|
|
|
parent, NULL, NULL);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(debugfs_create_dir);
|
|
|
|
|
2007-02-13 12:13:54 +01:00
|
|
|
/**
|
|
|
|
* debugfs_create_symlink- create a symbolic link in the debugfs filesystem
|
|
|
|
* @name: a pointer to a string containing the name of the symbolic link to
|
|
|
|
* create.
|
|
|
|
* @parent: a pointer to the parent dentry for this symbolic link. This
|
2014-02-18 22:54:36 +09:00
|
|
|
* should be a directory dentry if set. If this parameter is NULL,
|
2007-02-13 12:13:54 +01:00
|
|
|
* then the symbolic link will be created in the root of the debugfs
|
|
|
|
* filesystem.
|
|
|
|
* @target: a pointer to a string containing the path to the target of the
|
|
|
|
* symbolic link.
|
|
|
|
*
|
|
|
|
* This function creates a symbolic link with the given name in debugfs that
|
|
|
|
* links to the given target path.
|
|
|
|
*
|
|
|
|
* This function will return a pointer to a dentry if it succeeds. This
|
|
|
|
* pointer must be passed to the debugfs_remove() function when the symbolic
|
|
|
|
* link is to be removed (no automatic cleanup happens if your module is
|
|
|
|
* unloaded, you are responsible here.) If an error occurs, %NULL will be
|
|
|
|
* returned.
|
|
|
|
*
|
|
|
|
* If debugfs is not enabled in the kernel, the value -%ENODEV will be
|
2007-02-14 07:57:47 +01:00
|
|
|
* returned.
|
2007-02-13 12:13:54 +01:00
|
|
|
*/
|
|
|
|
struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
|
|
|
|
const char *target)
|
|
|
|
{
|
|
|
|
struct dentry *result;
|
|
|
|
char *link;
|
|
|
|
|
|
|
|
link = kstrdup(target, GFP_KERNEL);
|
|
|
|
if (!link)
|
|
|
|
return NULL;
|
|
|
|
|
2012-06-09 20:28:22 -04:00
|
|
|
result = __create_file(name, S_IFLNK | S_IRWXUGO, parent, link, NULL);
|
2007-02-13 12:13:54 +01:00
|
|
|
if (!result)
|
|
|
|
kfree(link);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(debugfs_create_symlink);
|
|
|
|
|
2011-02-07 15:00:27 +01:00
|
|
|
static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
|
2008-07-01 15:14:51 +02:00
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (debugfs_positive(dentry)) {
|
|
|
|
if (dentry->d_inode) {
|
|
|
|
dget(dentry);
|
|
|
|
switch (dentry->d_inode->i_mode & S_IFMT) {
|
|
|
|
case S_IFDIR:
|
|
|
|
ret = simple_rmdir(parent->d_inode, dentry);
|
|
|
|
break;
|
|
|
|
case S_IFLNK:
|
|
|
|
kfree(dentry->d_inode->i_private);
|
|
|
|
/* fall through */
|
|
|
|
default:
|
|
|
|
simple_unlink(parent->d_inode, dentry);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!ret)
|
|
|
|
d_delete(dentry);
|
|
|
|
dput(dentry);
|
|
|
|
}
|
|
|
|
}
|
2011-02-07 15:00:27 +01:00
|
|
|
return ret;
|
2008-07-01 15:14:51 +02:00
|
|
|
}
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
/**
|
|
|
|
* debugfs_remove - removes a file or directory from the debugfs filesystem
|
|
|
|
* @dentry: a pointer to a the dentry of the file or directory to be
|
|
|
|
* removed.
|
|
|
|
*
|
|
|
|
* This function removes a file or directory in debugfs that was previously
|
|
|
|
* created with a call to another debugfs function (like
|
2006-10-03 23:28:36 +02:00
|
|
|
* debugfs_create_file() or variants thereof.)
|
2005-04-16 15:20:36 -07:00
|
|
|
*
|
|
|
|
* This function is required to be called in order for the file to be
|
|
|
|
* removed, no automatic cleanup of files will happen when a module is
|
|
|
|
* removed, you are responsible here.
|
|
|
|
*/
|
|
|
|
void debugfs_remove(struct dentry *dentry)
|
|
|
|
{
|
|
|
|
struct dentry *parent;
|
2011-02-07 15:00:27 +01:00
|
|
|
int ret;
|
|
|
|
|
2012-05-23 15:13:07 +02:00
|
|
|
if (IS_ERR_OR_NULL(dentry))
|
2005-04-16 15:20:36 -07:00
|
|
|
return;
|
|
|
|
|
|
|
|
parent = dentry->d_parent;
|
|
|
|
if (!parent || !parent->d_inode)
|
|
|
|
return;
|
|
|
|
|
2006-01-09 15:59:24 -08:00
|
|
|
mutex_lock(&parent->d_inode->i_mutex);
|
2011-02-07 15:00:27 +01:00
|
|
|
ret = __debugfs_remove(dentry, parent);
|
2008-07-01 15:14:51 +02:00
|
|
|
mutex_unlock(&parent->d_inode->i_mutex);
|
2011-02-07 15:00:27 +01:00
|
|
|
if (!ret)
|
|
|
|
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
|
2008-07-01 15:14:51 +02:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(debugfs_remove);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* debugfs_remove_recursive - recursively removes a directory
|
|
|
|
* @dentry: a pointer to a the dentry of the directory to be removed.
|
|
|
|
*
|
|
|
|
* This function recursively removes a directory tree in debugfs that
|
|
|
|
* was previously created with a call to another debugfs function
|
|
|
|
* (like debugfs_create_file() or variants thereof.)
|
|
|
|
*
|
|
|
|
* This function is required to be called in order for the file to be
|
|
|
|
* removed, no automatic cleanup of files will happen when a module is
|
|
|
|
* removed, you are responsible here.
|
|
|
|
*/
|
|
|
|
void debugfs_remove_recursive(struct dentry *dentry)
|
|
|
|
{
|
debugfs: Fix corrupted loop in debugfs_remove_recursive
[ I'm currently running my tests on it now, and so far, after a few
hours it has yet to blow up. I'll run it for 24 hours which it never
succeeded in the past. ]
The tracing code has a way to make directories within the debugfs file
system as well as deleting them using mkdir/rmdir in the instance
directory. This is very limited in functionality, such as there is
no renames, and the parent directory "instance" can not be modified.
The tracing code creates the instance directory from the debugfs code
and then replaces the dentry->d_inode->i_op with its own to allow
for mkdir/rmdir to work.
When these are called, the d_entry and inode locks need to be released
to call the instance creation and deletion code. That code has its own
accounting and locking to serialize everything to prevent multiple
users from causing harm. As the parent "instance" directory can not
be modified this simplifies things.
I created a stress test that creates several threads that randomly
creates and deletes directories thousands of times a second. The code
stood up to this test and I submitted it a while ago.
Recently I added a new test that adds readers to the mix. While the
instance directories were being added and deleted, readers would read
from these directories and even enable tracing within them. This test
was able to trigger a bug:
general protection fault: 0000 [#1] PREEMPT SMP
Modules linked in: ...
CPU: 3 PID: 17789 Comm: rmdir Tainted: G W 3.15.0-rc2-test+ #41
Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
task: ffff88003786ca60 ti: ffff880077018000 task.ti: ffff880077018000
RIP: 0010:[<ffffffff811ed5eb>] [<ffffffff811ed5eb>] debugfs_remove_recursive+0x1bd/0x367
RSP: 0018:ffff880077019df8 EFLAGS: 00010246
RAX: 0000000000000002 RBX: ffff88006f0fe490 RCX: 0000000000000000
RDX: dead000000100058 RSI: 0000000000000246 RDI: ffff88003786d454
RBP: ffff88006f0fe640 R08: 0000000000000628 R09: 0000000000000000
R10: 0000000000000628 R11: ffff8800795110a0 R12: ffff88006f0fe640
R13: ffff88006f0fe640 R14: ffffffff81817d0b R15: ffffffff818188b7
FS: 00007ff13ae24700(0000) GS:ffff88007d580000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000003054ec7be0 CR3: 0000000076d51000 CR4: 00000000000007e0
Stack:
ffff88007a41ebe0 dead000000100058 00000000fffffffe ffff88006f0fe640
0000000000000000 ffff88006f0fe678 ffff88007a41ebe0 ffff88003793a000
00000000fffffffe ffffffff810bde82 ffff88006f0fe640 ffff88007a41eb28
Call Trace:
[<ffffffff810bde82>] ? instance_rmdir+0x15b/0x1de
[<ffffffff81132e2d>] ? vfs_rmdir+0x80/0xd3
[<ffffffff81132f51>] ? do_rmdir+0xd1/0x139
[<ffffffff8124ad9e>] ? trace_hardirqs_on_thunk+0x3a/0x3c
[<ffffffff814fea62>] ? system_call_fastpath+0x16/0x1b
Code: fe ff ff 48 8d 75 30 48 89 df e8 c9 fd ff ff 85 c0 75 13 48 c7 c6 b8 cc d2 81 48 c7 c7 b0 cc d2 81 e8 8c 7a f5 ff 48 8b 54 24 08 <48> 8b 82 a8 00 00 00 48 89 d3 48 2d a8 00 00 00 48 89 44 24 08
RIP [<ffffffff811ed5eb>] debugfs_remove_recursive+0x1bd/0x367
RSP <ffff880077019df8>
It took a while, but every time it triggered, it was always in the
same place:
list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) {
Where the child->d_u.d_child seemed to be corrupted. I added lots of
trace_printk()s to see what was wrong, and sure enough, it was always
the child's d_u.d_child field. I looked around to see what touches
it and noticed that in __dentry_kill() which calls dentry_free():
static void dentry_free(struct dentry *dentry)
{
/* if dentry was never visible to RCU, immediate free is OK */
if (!(dentry->d_flags & DCACHE_RCUACCESS))
__d_free(&dentry->d_u.d_rcu);
else
call_rcu(&dentry->d_u.d_rcu, __d_free);
}
I also noticed that __dentry_kill() unlinks the child->d_u.child
under the parent->d_lock spin_lock.
Looking back at the loop in debugfs_remove_recursive() it never takes the
parent->d_lock to do the list walk. Adding more tracing, I was able to
prove this was the issue:
ftrace-t-15385 1.... 246662024us : dentry_kill <ffffffff81138b91>: free ffff88006d573600
rmdir-15409 2.... 246662024us : debugfs_remove_recursive <ffffffff811ec7e5>: child=ffff88006d573600 next=dead000000100058
The dentry_kill freed ffff88006d573600 just as the remove recursive was walking
it.
In order to fix this, the list walk needs to be modified a bit to take
the parent->d_lock. The safe version is no longer necessary, as every
time we remove a child, the parent->d_lock must be released and the
list walk must start over. Each time a child is removed, even though it
may still be on the list, it should be skipped by the first check
in the loop:
if (!debugfs_positive(child))
continue;
Cc: stable@vger.kernel.org
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-06-09 14:06:07 -04:00
|
|
|
struct dentry *child, *parent;
|
2008-07-01 15:14:51 +02:00
|
|
|
|
2012-05-23 15:13:07 +02:00
|
|
|
if (IS_ERR_OR_NULL(dentry))
|
2008-07-01 15:14:51 +02:00
|
|
|
return;
|
|
|
|
|
|
|
|
parent = dentry->d_parent;
|
|
|
|
if (!parent || !parent->d_inode)
|
|
|
|
return;
|
|
|
|
|
|
|
|
parent = dentry;
|
2013-07-26 17:12:56 +02:00
|
|
|
down:
|
2008-07-01 15:14:51 +02:00
|
|
|
mutex_lock(&parent->d_inode->i_mutex);
|
debugfs: Fix corrupted loop in debugfs_remove_recursive
[ I'm currently running my tests on it now, and so far, after a few
hours it has yet to blow up. I'll run it for 24 hours which it never
succeeded in the past. ]
The tracing code has a way to make directories within the debugfs file
system as well as deleting them using mkdir/rmdir in the instance
directory. This is very limited in functionality, such as there is
no renames, and the parent directory "instance" can not be modified.
The tracing code creates the instance directory from the debugfs code
and then replaces the dentry->d_inode->i_op with its own to allow
for mkdir/rmdir to work.
When these are called, the d_entry and inode locks need to be released
to call the instance creation and deletion code. That code has its own
accounting and locking to serialize everything to prevent multiple
users from causing harm. As the parent "instance" directory can not
be modified this simplifies things.
I created a stress test that creates several threads that randomly
creates and deletes directories thousands of times a second. The code
stood up to this test and I submitted it a while ago.
Recently I added a new test that adds readers to the mix. While the
instance directories were being added and deleted, readers would read
from these directories and even enable tracing within them. This test
was able to trigger a bug:
general protection fault: 0000 [#1] PREEMPT SMP
Modules linked in: ...
CPU: 3 PID: 17789 Comm: rmdir Tainted: G W 3.15.0-rc2-test+ #41
Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
task: ffff88003786ca60 ti: ffff880077018000 task.ti: ffff880077018000
RIP: 0010:[<ffffffff811ed5eb>] [<ffffffff811ed5eb>] debugfs_remove_recursive+0x1bd/0x367
RSP: 0018:ffff880077019df8 EFLAGS: 00010246
RAX: 0000000000000002 RBX: ffff88006f0fe490 RCX: 0000000000000000
RDX: dead000000100058 RSI: 0000000000000246 RDI: ffff88003786d454
RBP: ffff88006f0fe640 R08: 0000000000000628 R09: 0000000000000000
R10: 0000000000000628 R11: ffff8800795110a0 R12: ffff88006f0fe640
R13: ffff88006f0fe640 R14: ffffffff81817d0b R15: ffffffff818188b7
FS: 00007ff13ae24700(0000) GS:ffff88007d580000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000003054ec7be0 CR3: 0000000076d51000 CR4: 00000000000007e0
Stack:
ffff88007a41ebe0 dead000000100058 00000000fffffffe ffff88006f0fe640
0000000000000000 ffff88006f0fe678 ffff88007a41ebe0 ffff88003793a000
00000000fffffffe ffffffff810bde82 ffff88006f0fe640 ffff88007a41eb28
Call Trace:
[<ffffffff810bde82>] ? instance_rmdir+0x15b/0x1de
[<ffffffff81132e2d>] ? vfs_rmdir+0x80/0xd3
[<ffffffff81132f51>] ? do_rmdir+0xd1/0x139
[<ffffffff8124ad9e>] ? trace_hardirqs_on_thunk+0x3a/0x3c
[<ffffffff814fea62>] ? system_call_fastpath+0x16/0x1b
Code: fe ff ff 48 8d 75 30 48 89 df e8 c9 fd ff ff 85 c0 75 13 48 c7 c6 b8 cc d2 81 48 c7 c7 b0 cc d2 81 e8 8c 7a f5 ff 48 8b 54 24 08 <48> 8b 82 a8 00 00 00 48 89 d3 48 2d a8 00 00 00 48 89 44 24 08
RIP [<ffffffff811ed5eb>] debugfs_remove_recursive+0x1bd/0x367
RSP <ffff880077019df8>
It took a while, but every time it triggered, it was always in the
same place:
list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) {
Where the child->d_u.d_child seemed to be corrupted. I added lots of
trace_printk()s to see what was wrong, and sure enough, it was always
the child's d_u.d_child field. I looked around to see what touches
it and noticed that in __dentry_kill() which calls dentry_free():
static void dentry_free(struct dentry *dentry)
{
/* if dentry was never visible to RCU, immediate free is OK */
if (!(dentry->d_flags & DCACHE_RCUACCESS))
__d_free(&dentry->d_u.d_rcu);
else
call_rcu(&dentry->d_u.d_rcu, __d_free);
}
I also noticed that __dentry_kill() unlinks the child->d_u.child
under the parent->d_lock spin_lock.
Looking back at the loop in debugfs_remove_recursive() it never takes the
parent->d_lock to do the list walk. Adding more tracing, I was able to
prove this was the issue:
ftrace-t-15385 1.... 246662024us : dentry_kill <ffffffff81138b91>: free ffff88006d573600
rmdir-15409 2.... 246662024us : debugfs_remove_recursive <ffffffff811ec7e5>: child=ffff88006d573600 next=dead000000100058
The dentry_kill freed ffff88006d573600 just as the remove recursive was walking
it.
In order to fix this, the list walk needs to be modified a bit to take
the parent->d_lock. The safe version is no longer necessary, as every
time we remove a child, the parent->d_lock must be released and the
list walk must start over. Each time a child is removed, even though it
may still be on the list, it should be skipped by the first check
in the loop:
if (!debugfs_positive(child))
continue;
Cc: stable@vger.kernel.org
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-06-09 14:06:07 -04:00
|
|
|
loop:
|
|
|
|
/*
|
|
|
|
* The parent->d_subdirs is protected by the d_lock. Outside that
|
|
|
|
* lock, the child can be unlinked and set to be freed which can
|
|
|
|
* use the d_u.d_child as the rcu head and corrupt this list.
|
|
|
|
*/
|
|
|
|
spin_lock(&parent->d_lock);
|
2014-10-26 19:19:16 -04:00
|
|
|
list_for_each_entry(child, &parent->d_subdirs, d_child) {
|
2013-07-26 17:12:56 +02:00
|
|
|
if (!debugfs_positive(child))
|
|
|
|
continue;
|
2008-07-01 15:14:51 +02:00
|
|
|
|
2013-07-26 17:12:56 +02:00
|
|
|
/* perhaps simple_empty(child) makes more sense */
|
2008-07-01 15:14:51 +02:00
|
|
|
if (!list_empty(&child->d_subdirs)) {
|
debugfs: Fix corrupted loop in debugfs_remove_recursive
[ I'm currently running my tests on it now, and so far, after a few
hours it has yet to blow up. I'll run it for 24 hours which it never
succeeded in the past. ]
The tracing code has a way to make directories within the debugfs file
system as well as deleting them using mkdir/rmdir in the instance
directory. This is very limited in functionality, such as there is
no renames, and the parent directory "instance" can not be modified.
The tracing code creates the instance directory from the debugfs code
and then replaces the dentry->d_inode->i_op with its own to allow
for mkdir/rmdir to work.
When these are called, the d_entry and inode locks need to be released
to call the instance creation and deletion code. That code has its own
accounting and locking to serialize everything to prevent multiple
users from causing harm. As the parent "instance" directory can not
be modified this simplifies things.
I created a stress test that creates several threads that randomly
creates and deletes directories thousands of times a second. The code
stood up to this test and I submitted it a while ago.
Recently I added a new test that adds readers to the mix. While the
instance directories were being added and deleted, readers would read
from these directories and even enable tracing within them. This test
was able to trigger a bug:
general protection fault: 0000 [#1] PREEMPT SMP
Modules linked in: ...
CPU: 3 PID: 17789 Comm: rmdir Tainted: G W 3.15.0-rc2-test+ #41
Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
task: ffff88003786ca60 ti: ffff880077018000 task.ti: ffff880077018000
RIP: 0010:[<ffffffff811ed5eb>] [<ffffffff811ed5eb>] debugfs_remove_recursive+0x1bd/0x367
RSP: 0018:ffff880077019df8 EFLAGS: 00010246
RAX: 0000000000000002 RBX: ffff88006f0fe490 RCX: 0000000000000000
RDX: dead000000100058 RSI: 0000000000000246 RDI: ffff88003786d454
RBP: ffff88006f0fe640 R08: 0000000000000628 R09: 0000000000000000
R10: 0000000000000628 R11: ffff8800795110a0 R12: ffff88006f0fe640
R13: ffff88006f0fe640 R14: ffffffff81817d0b R15: ffffffff818188b7
FS: 00007ff13ae24700(0000) GS:ffff88007d580000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000003054ec7be0 CR3: 0000000076d51000 CR4: 00000000000007e0
Stack:
ffff88007a41ebe0 dead000000100058 00000000fffffffe ffff88006f0fe640
0000000000000000 ffff88006f0fe678 ffff88007a41ebe0 ffff88003793a000
00000000fffffffe ffffffff810bde82 ffff88006f0fe640 ffff88007a41eb28
Call Trace:
[<ffffffff810bde82>] ? instance_rmdir+0x15b/0x1de
[<ffffffff81132e2d>] ? vfs_rmdir+0x80/0xd3
[<ffffffff81132f51>] ? do_rmdir+0xd1/0x139
[<ffffffff8124ad9e>] ? trace_hardirqs_on_thunk+0x3a/0x3c
[<ffffffff814fea62>] ? system_call_fastpath+0x16/0x1b
Code: fe ff ff 48 8d 75 30 48 89 df e8 c9 fd ff ff 85 c0 75 13 48 c7 c6 b8 cc d2 81 48 c7 c7 b0 cc d2 81 e8 8c 7a f5 ff 48 8b 54 24 08 <48> 8b 82 a8 00 00 00 48 89 d3 48 2d a8 00 00 00 48 89 44 24 08
RIP [<ffffffff811ed5eb>] debugfs_remove_recursive+0x1bd/0x367
RSP <ffff880077019df8>
It took a while, but every time it triggered, it was always in the
same place:
list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) {
Where the child->d_u.d_child seemed to be corrupted. I added lots of
trace_printk()s to see what was wrong, and sure enough, it was always
the child's d_u.d_child field. I looked around to see what touches
it and noticed that in __dentry_kill() which calls dentry_free():
static void dentry_free(struct dentry *dentry)
{
/* if dentry was never visible to RCU, immediate free is OK */
if (!(dentry->d_flags & DCACHE_RCUACCESS))
__d_free(&dentry->d_u.d_rcu);
else
call_rcu(&dentry->d_u.d_rcu, __d_free);
}
I also noticed that __dentry_kill() unlinks the child->d_u.child
under the parent->d_lock spin_lock.
Looking back at the loop in debugfs_remove_recursive() it never takes the
parent->d_lock to do the list walk. Adding more tracing, I was able to
prove this was the issue:
ftrace-t-15385 1.... 246662024us : dentry_kill <ffffffff81138b91>: free ffff88006d573600
rmdir-15409 2.... 246662024us : debugfs_remove_recursive <ffffffff811ec7e5>: child=ffff88006d573600 next=dead000000100058
The dentry_kill freed ffff88006d573600 just as the remove recursive was walking
it.
In order to fix this, the list walk needs to be modified a bit to take
the parent->d_lock. The safe version is no longer necessary, as every
time we remove a child, the parent->d_lock must be released and the
list walk must start over. Each time a child is removed, even though it
may still be on the list, it should be skipped by the first check
in the loop:
if (!debugfs_positive(child))
continue;
Cc: stable@vger.kernel.org
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-06-09 14:06:07 -04:00
|
|
|
spin_unlock(&parent->d_lock);
|
2008-07-01 15:14:51 +02:00
|
|
|
mutex_unlock(&parent->d_inode->i_mutex);
|
|
|
|
parent = child;
|
2013-07-26 17:12:56 +02:00
|
|
|
goto down;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
debugfs: Fix corrupted loop in debugfs_remove_recursive
[ I'm currently running my tests on it now, and so far, after a few
hours it has yet to blow up. I'll run it for 24 hours which it never
succeeded in the past. ]
The tracing code has a way to make directories within the debugfs file
system as well as deleting them using mkdir/rmdir in the instance
directory. This is very limited in functionality, such as there is
no renames, and the parent directory "instance" can not be modified.
The tracing code creates the instance directory from the debugfs code
and then replaces the dentry->d_inode->i_op with its own to allow
for mkdir/rmdir to work.
When these are called, the d_entry and inode locks need to be released
to call the instance creation and deletion code. That code has its own
accounting and locking to serialize everything to prevent multiple
users from causing harm. As the parent "instance" directory can not
be modified this simplifies things.
I created a stress test that creates several threads that randomly
creates and deletes directories thousands of times a second. The code
stood up to this test and I submitted it a while ago.
Recently I added a new test that adds readers to the mix. While the
instance directories were being added and deleted, readers would read
from these directories and even enable tracing within them. This test
was able to trigger a bug:
general protection fault: 0000 [#1] PREEMPT SMP
Modules linked in: ...
CPU: 3 PID: 17789 Comm: rmdir Tainted: G W 3.15.0-rc2-test+ #41
Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
task: ffff88003786ca60 ti: ffff880077018000 task.ti: ffff880077018000
RIP: 0010:[<ffffffff811ed5eb>] [<ffffffff811ed5eb>] debugfs_remove_recursive+0x1bd/0x367
RSP: 0018:ffff880077019df8 EFLAGS: 00010246
RAX: 0000000000000002 RBX: ffff88006f0fe490 RCX: 0000000000000000
RDX: dead000000100058 RSI: 0000000000000246 RDI: ffff88003786d454
RBP: ffff88006f0fe640 R08: 0000000000000628 R09: 0000000000000000
R10: 0000000000000628 R11: ffff8800795110a0 R12: ffff88006f0fe640
R13: ffff88006f0fe640 R14: ffffffff81817d0b R15: ffffffff818188b7
FS: 00007ff13ae24700(0000) GS:ffff88007d580000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000003054ec7be0 CR3: 0000000076d51000 CR4: 00000000000007e0
Stack:
ffff88007a41ebe0 dead000000100058 00000000fffffffe ffff88006f0fe640
0000000000000000 ffff88006f0fe678 ffff88007a41ebe0 ffff88003793a000
00000000fffffffe ffffffff810bde82 ffff88006f0fe640 ffff88007a41eb28
Call Trace:
[<ffffffff810bde82>] ? instance_rmdir+0x15b/0x1de
[<ffffffff81132e2d>] ? vfs_rmdir+0x80/0xd3
[<ffffffff81132f51>] ? do_rmdir+0xd1/0x139
[<ffffffff8124ad9e>] ? trace_hardirqs_on_thunk+0x3a/0x3c
[<ffffffff814fea62>] ? system_call_fastpath+0x16/0x1b
Code: fe ff ff 48 8d 75 30 48 89 df e8 c9 fd ff ff 85 c0 75 13 48 c7 c6 b8 cc d2 81 48 c7 c7 b0 cc d2 81 e8 8c 7a f5 ff 48 8b 54 24 08 <48> 8b 82 a8 00 00 00 48 89 d3 48 2d a8 00 00 00 48 89 44 24 08
RIP [<ffffffff811ed5eb>] debugfs_remove_recursive+0x1bd/0x367
RSP <ffff880077019df8>
It took a while, but every time it triggered, it was always in the
same place:
list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) {
Where the child->d_u.d_child seemed to be corrupted. I added lots of
trace_printk()s to see what was wrong, and sure enough, it was always
the child's d_u.d_child field. I looked around to see what touches
it and noticed that in __dentry_kill() which calls dentry_free():
static void dentry_free(struct dentry *dentry)
{
/* if dentry was never visible to RCU, immediate free is OK */
if (!(dentry->d_flags & DCACHE_RCUACCESS))
__d_free(&dentry->d_u.d_rcu);
else
call_rcu(&dentry->d_u.d_rcu, __d_free);
}
I also noticed that __dentry_kill() unlinks the child->d_u.child
under the parent->d_lock spin_lock.
Looking back at the loop in debugfs_remove_recursive() it never takes the
parent->d_lock to do the list walk. Adding more tracing, I was able to
prove this was the issue:
ftrace-t-15385 1.... 246662024us : dentry_kill <ffffffff81138b91>: free ffff88006d573600
rmdir-15409 2.... 246662024us : debugfs_remove_recursive <ffffffff811ec7e5>: child=ffff88006d573600 next=dead000000100058
The dentry_kill freed ffff88006d573600 just as the remove recursive was walking
it.
In order to fix this, the list walk needs to be modified a bit to take
the parent->d_lock. The safe version is no longer necessary, as every
time we remove a child, the parent->d_lock must be released and the
list walk must start over. Each time a child is removed, even though it
may still be on the list, it should be skipped by the first check
in the loop:
if (!debugfs_positive(child))
continue;
Cc: stable@vger.kernel.org
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-06-09 14:06:07 -04:00
|
|
|
|
|
|
|
spin_unlock(&parent->d_lock);
|
|
|
|
|
2013-07-26 17:12:56 +02:00
|
|
|
if (!__debugfs_remove(child, parent))
|
|
|
|
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
|
debugfs: Fix corrupted loop in debugfs_remove_recursive
[ I'm currently running my tests on it now, and so far, after a few
hours it has yet to blow up. I'll run it for 24 hours which it never
succeeded in the past. ]
The tracing code has a way to make directories within the debugfs file
system as well as deleting them using mkdir/rmdir in the instance
directory. This is very limited in functionality, such as there is
no renames, and the parent directory "instance" can not be modified.
The tracing code creates the instance directory from the debugfs code
and then replaces the dentry->d_inode->i_op with its own to allow
for mkdir/rmdir to work.
When these are called, the d_entry and inode locks need to be released
to call the instance creation and deletion code. That code has its own
accounting and locking to serialize everything to prevent multiple
users from causing harm. As the parent "instance" directory can not
be modified this simplifies things.
I created a stress test that creates several threads that randomly
creates and deletes directories thousands of times a second. The code
stood up to this test and I submitted it a while ago.
Recently I added a new test that adds readers to the mix. While the
instance directories were being added and deleted, readers would read
from these directories and even enable tracing within them. This test
was able to trigger a bug:
general protection fault: 0000 [#1] PREEMPT SMP
Modules linked in: ...
CPU: 3 PID: 17789 Comm: rmdir Tainted: G W 3.15.0-rc2-test+ #41
Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
task: ffff88003786ca60 ti: ffff880077018000 task.ti: ffff880077018000
RIP: 0010:[<ffffffff811ed5eb>] [<ffffffff811ed5eb>] debugfs_remove_recursive+0x1bd/0x367
RSP: 0018:ffff880077019df8 EFLAGS: 00010246
RAX: 0000000000000002 RBX: ffff88006f0fe490 RCX: 0000000000000000
RDX: dead000000100058 RSI: 0000000000000246 RDI: ffff88003786d454
RBP: ffff88006f0fe640 R08: 0000000000000628 R09: 0000000000000000
R10: 0000000000000628 R11: ffff8800795110a0 R12: ffff88006f0fe640
R13: ffff88006f0fe640 R14: ffffffff81817d0b R15: ffffffff818188b7
FS: 00007ff13ae24700(0000) GS:ffff88007d580000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000003054ec7be0 CR3: 0000000076d51000 CR4: 00000000000007e0
Stack:
ffff88007a41ebe0 dead000000100058 00000000fffffffe ffff88006f0fe640
0000000000000000 ffff88006f0fe678 ffff88007a41ebe0 ffff88003793a000
00000000fffffffe ffffffff810bde82 ffff88006f0fe640 ffff88007a41eb28
Call Trace:
[<ffffffff810bde82>] ? instance_rmdir+0x15b/0x1de
[<ffffffff81132e2d>] ? vfs_rmdir+0x80/0xd3
[<ffffffff81132f51>] ? do_rmdir+0xd1/0x139
[<ffffffff8124ad9e>] ? trace_hardirqs_on_thunk+0x3a/0x3c
[<ffffffff814fea62>] ? system_call_fastpath+0x16/0x1b
Code: fe ff ff 48 8d 75 30 48 89 df e8 c9 fd ff ff 85 c0 75 13 48 c7 c6 b8 cc d2 81 48 c7 c7 b0 cc d2 81 e8 8c 7a f5 ff 48 8b 54 24 08 <48> 8b 82 a8 00 00 00 48 89 d3 48 2d a8 00 00 00 48 89 44 24 08
RIP [<ffffffff811ed5eb>] debugfs_remove_recursive+0x1bd/0x367
RSP <ffff880077019df8>
It took a while, but every time it triggered, it was always in the
same place:
list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) {
Where the child->d_u.d_child seemed to be corrupted. I added lots of
trace_printk()s to see what was wrong, and sure enough, it was always
the child's d_u.d_child field. I looked around to see what touches
it and noticed that in __dentry_kill() which calls dentry_free():
static void dentry_free(struct dentry *dentry)
{
/* if dentry was never visible to RCU, immediate free is OK */
if (!(dentry->d_flags & DCACHE_RCUACCESS))
__d_free(&dentry->d_u.d_rcu);
else
call_rcu(&dentry->d_u.d_rcu, __d_free);
}
I also noticed that __dentry_kill() unlinks the child->d_u.child
under the parent->d_lock spin_lock.
Looking back at the loop in debugfs_remove_recursive() it never takes the
parent->d_lock to do the list walk. Adding more tracing, I was able to
prove this was the issue:
ftrace-t-15385 1.... 246662024us : dentry_kill <ffffffff81138b91>: free ffff88006d573600
rmdir-15409 2.... 246662024us : debugfs_remove_recursive <ffffffff811ec7e5>: child=ffff88006d573600 next=dead000000100058
The dentry_kill freed ffff88006d573600 just as the remove recursive was walking
it.
In order to fix this, the list walk needs to be modified a bit to take
the parent->d_lock. The safe version is no longer necessary, as every
time we remove a child, the parent->d_lock must be released and the
list walk must start over. Each time a child is removed, even though it
may still be on the list, it should be skipped by the first check
in the loop:
if (!debugfs_positive(child))
continue;
Cc: stable@vger.kernel.org
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-06-09 14:06:07 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The parent->d_lock protects agaist child from unlinking
|
|
|
|
* from d_subdirs. When releasing the parent->d_lock we can
|
|
|
|
* no longer trust that the next pointer is valid.
|
|
|
|
* Restart the loop. We'll skip this one with the
|
|
|
|
* debugfs_positive() check.
|
|
|
|
*/
|
|
|
|
goto loop;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
debugfs: Fix corrupted loop in debugfs_remove_recursive
[ I'm currently running my tests on it now, and so far, after a few
hours it has yet to blow up. I'll run it for 24 hours which it never
succeeded in the past. ]
The tracing code has a way to make directories within the debugfs file
system as well as deleting them using mkdir/rmdir in the instance
directory. This is very limited in functionality, such as there is
no renames, and the parent directory "instance" can not be modified.
The tracing code creates the instance directory from the debugfs code
and then replaces the dentry->d_inode->i_op with its own to allow
for mkdir/rmdir to work.
When these are called, the d_entry and inode locks need to be released
to call the instance creation and deletion code. That code has its own
accounting and locking to serialize everything to prevent multiple
users from causing harm. As the parent "instance" directory can not
be modified this simplifies things.
I created a stress test that creates several threads that randomly
creates and deletes directories thousands of times a second. The code
stood up to this test and I submitted it a while ago.
Recently I added a new test that adds readers to the mix. While the
instance directories were being added and deleted, readers would read
from these directories and even enable tracing within them. This test
was able to trigger a bug:
general protection fault: 0000 [#1] PREEMPT SMP
Modules linked in: ...
CPU: 3 PID: 17789 Comm: rmdir Tainted: G W 3.15.0-rc2-test+ #41
Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
task: ffff88003786ca60 ti: ffff880077018000 task.ti: ffff880077018000
RIP: 0010:[<ffffffff811ed5eb>] [<ffffffff811ed5eb>] debugfs_remove_recursive+0x1bd/0x367
RSP: 0018:ffff880077019df8 EFLAGS: 00010246
RAX: 0000000000000002 RBX: ffff88006f0fe490 RCX: 0000000000000000
RDX: dead000000100058 RSI: 0000000000000246 RDI: ffff88003786d454
RBP: ffff88006f0fe640 R08: 0000000000000628 R09: 0000000000000000
R10: 0000000000000628 R11: ffff8800795110a0 R12: ffff88006f0fe640
R13: ffff88006f0fe640 R14: ffffffff81817d0b R15: ffffffff818188b7
FS: 00007ff13ae24700(0000) GS:ffff88007d580000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000003054ec7be0 CR3: 0000000076d51000 CR4: 00000000000007e0
Stack:
ffff88007a41ebe0 dead000000100058 00000000fffffffe ffff88006f0fe640
0000000000000000 ffff88006f0fe678 ffff88007a41ebe0 ffff88003793a000
00000000fffffffe ffffffff810bde82 ffff88006f0fe640 ffff88007a41eb28
Call Trace:
[<ffffffff810bde82>] ? instance_rmdir+0x15b/0x1de
[<ffffffff81132e2d>] ? vfs_rmdir+0x80/0xd3
[<ffffffff81132f51>] ? do_rmdir+0xd1/0x139
[<ffffffff8124ad9e>] ? trace_hardirqs_on_thunk+0x3a/0x3c
[<ffffffff814fea62>] ? system_call_fastpath+0x16/0x1b
Code: fe ff ff 48 8d 75 30 48 89 df e8 c9 fd ff ff 85 c0 75 13 48 c7 c6 b8 cc d2 81 48 c7 c7 b0 cc d2 81 e8 8c 7a f5 ff 48 8b 54 24 08 <48> 8b 82 a8 00 00 00 48 89 d3 48 2d a8 00 00 00 48 89 44 24 08
RIP [<ffffffff811ed5eb>] debugfs_remove_recursive+0x1bd/0x367
RSP <ffff880077019df8>
It took a while, but every time it triggered, it was always in the
same place:
list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) {
Where the child->d_u.d_child seemed to be corrupted. I added lots of
trace_printk()s to see what was wrong, and sure enough, it was always
the child's d_u.d_child field. I looked around to see what touches
it and noticed that in __dentry_kill() which calls dentry_free():
static void dentry_free(struct dentry *dentry)
{
/* if dentry was never visible to RCU, immediate free is OK */
if (!(dentry->d_flags & DCACHE_RCUACCESS))
__d_free(&dentry->d_u.d_rcu);
else
call_rcu(&dentry->d_u.d_rcu, __d_free);
}
I also noticed that __dentry_kill() unlinks the child->d_u.child
under the parent->d_lock spin_lock.
Looking back at the loop in debugfs_remove_recursive() it never takes the
parent->d_lock to do the list walk. Adding more tracing, I was able to
prove this was the issue:
ftrace-t-15385 1.... 246662024us : dentry_kill <ffffffff81138b91>: free ffff88006d573600
rmdir-15409 2.... 246662024us : debugfs_remove_recursive <ffffffff811ec7e5>: child=ffff88006d573600 next=dead000000100058
The dentry_kill freed ffff88006d573600 just as the remove recursive was walking
it.
In order to fix this, the list walk needs to be modified a bit to take
the parent->d_lock. The safe version is no longer necessary, as every
time we remove a child, the parent->d_lock must be released and the
list walk must start over. Each time a child is removed, even though it
may still be on the list, it should be skipped by the first check
in the loop:
if (!debugfs_positive(child))
continue;
Cc: stable@vger.kernel.org
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-06-09 14:06:07 -04:00
|
|
|
spin_unlock(&parent->d_lock);
|
2008-07-01 15:14:51 +02:00
|
|
|
|
2013-07-26 17:12:56 +02:00
|
|
|
mutex_unlock(&parent->d_inode->i_mutex);
|
|
|
|
child = parent;
|
|
|
|
parent = parent->d_parent;
|
2008-07-01 15:14:51 +02:00
|
|
|
mutex_lock(&parent->d_inode->i_mutex);
|
2013-07-26 17:12:56 +02:00
|
|
|
|
debugfs: Fix corrupted loop in debugfs_remove_recursive
[ I'm currently running my tests on it now, and so far, after a few
hours it has yet to blow up. I'll run it for 24 hours which it never
succeeded in the past. ]
The tracing code has a way to make directories within the debugfs file
system as well as deleting them using mkdir/rmdir in the instance
directory. This is very limited in functionality, such as there is
no renames, and the parent directory "instance" can not be modified.
The tracing code creates the instance directory from the debugfs code
and then replaces the dentry->d_inode->i_op with its own to allow
for mkdir/rmdir to work.
When these are called, the d_entry and inode locks need to be released
to call the instance creation and deletion code. That code has its own
accounting and locking to serialize everything to prevent multiple
users from causing harm. As the parent "instance" directory can not
be modified this simplifies things.
I created a stress test that creates several threads that randomly
creates and deletes directories thousands of times a second. The code
stood up to this test and I submitted it a while ago.
Recently I added a new test that adds readers to the mix. While the
instance directories were being added and deleted, readers would read
from these directories and even enable tracing within them. This test
was able to trigger a bug:
general protection fault: 0000 [#1] PREEMPT SMP
Modules linked in: ...
CPU: 3 PID: 17789 Comm: rmdir Tainted: G W 3.15.0-rc2-test+ #41
Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
task: ffff88003786ca60 ti: ffff880077018000 task.ti: ffff880077018000
RIP: 0010:[<ffffffff811ed5eb>] [<ffffffff811ed5eb>] debugfs_remove_recursive+0x1bd/0x367
RSP: 0018:ffff880077019df8 EFLAGS: 00010246
RAX: 0000000000000002 RBX: ffff88006f0fe490 RCX: 0000000000000000
RDX: dead000000100058 RSI: 0000000000000246 RDI: ffff88003786d454
RBP: ffff88006f0fe640 R08: 0000000000000628 R09: 0000000000000000
R10: 0000000000000628 R11: ffff8800795110a0 R12: ffff88006f0fe640
R13: ffff88006f0fe640 R14: ffffffff81817d0b R15: ffffffff818188b7
FS: 00007ff13ae24700(0000) GS:ffff88007d580000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000003054ec7be0 CR3: 0000000076d51000 CR4: 00000000000007e0
Stack:
ffff88007a41ebe0 dead000000100058 00000000fffffffe ffff88006f0fe640
0000000000000000 ffff88006f0fe678 ffff88007a41ebe0 ffff88003793a000
00000000fffffffe ffffffff810bde82 ffff88006f0fe640 ffff88007a41eb28
Call Trace:
[<ffffffff810bde82>] ? instance_rmdir+0x15b/0x1de
[<ffffffff81132e2d>] ? vfs_rmdir+0x80/0xd3
[<ffffffff81132f51>] ? do_rmdir+0xd1/0x139
[<ffffffff8124ad9e>] ? trace_hardirqs_on_thunk+0x3a/0x3c
[<ffffffff814fea62>] ? system_call_fastpath+0x16/0x1b
Code: fe ff ff 48 8d 75 30 48 89 df e8 c9 fd ff ff 85 c0 75 13 48 c7 c6 b8 cc d2 81 48 c7 c7 b0 cc d2 81 e8 8c 7a f5 ff 48 8b 54 24 08 <48> 8b 82 a8 00 00 00 48 89 d3 48 2d a8 00 00 00 48 89 44 24 08
RIP [<ffffffff811ed5eb>] debugfs_remove_recursive+0x1bd/0x367
RSP <ffff880077019df8>
It took a while, but every time it triggered, it was always in the
same place:
list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) {
Where the child->d_u.d_child seemed to be corrupted. I added lots of
trace_printk()s to see what was wrong, and sure enough, it was always
the child's d_u.d_child field. I looked around to see what touches
it and noticed that in __dentry_kill() which calls dentry_free():
static void dentry_free(struct dentry *dentry)
{
/* if dentry was never visible to RCU, immediate free is OK */
if (!(dentry->d_flags & DCACHE_RCUACCESS))
__d_free(&dentry->d_u.d_rcu);
else
call_rcu(&dentry->d_u.d_rcu, __d_free);
}
I also noticed that __dentry_kill() unlinks the child->d_u.child
under the parent->d_lock spin_lock.
Looking back at the loop in debugfs_remove_recursive() it never takes the
parent->d_lock to do the list walk. Adding more tracing, I was able to
prove this was the issue:
ftrace-t-15385 1.... 246662024us : dentry_kill <ffffffff81138b91>: free ffff88006d573600
rmdir-15409 2.... 246662024us : debugfs_remove_recursive <ffffffff811ec7e5>: child=ffff88006d573600 next=dead000000100058
The dentry_kill freed ffff88006d573600 just as the remove recursive was walking
it.
In order to fix this, the list walk needs to be modified a bit to take
the parent->d_lock. The safe version is no longer necessary, as every
time we remove a child, the parent->d_lock must be released and the
list walk must start over. Each time a child is removed, even though it
may still be on the list, it should be skipped by the first check
in the loop:
if (!debugfs_positive(child))
continue;
Cc: stable@vger.kernel.org
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-06-09 14:06:07 -04:00
|
|
|
if (child != dentry)
|
|
|
|
/* go up */
|
|
|
|
goto loop;
|
2013-07-26 17:12:56 +02:00
|
|
|
|
|
|
|
if (!__debugfs_remove(child, parent))
|
|
|
|
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
|
2006-01-09 15:59:24 -08:00
|
|
|
mutex_unlock(&parent->d_inode->i_mutex);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
2008-07-01 15:14:51 +02:00
|
|
|
EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2007-05-09 13:19:52 +02:00
|
|
|
/**
|
|
|
|
* debugfs_rename - rename a file/directory in the debugfs filesystem
|
|
|
|
* @old_dir: a pointer to the parent dentry for the renamed object. This
|
|
|
|
* should be a directory dentry.
|
|
|
|
* @old_dentry: dentry of an object to be renamed.
|
|
|
|
* @new_dir: a pointer to the parent dentry where the object should be
|
|
|
|
* moved. This should be a directory dentry.
|
|
|
|
* @new_name: a pointer to a string containing the target name.
|
|
|
|
*
|
|
|
|
* This function renames a file/directory in debugfs. The target must not
|
|
|
|
* exist for rename to succeed.
|
|
|
|
*
|
|
|
|
* This function will return a pointer to old_dentry (which is updated to
|
|
|
|
* reflect renaming) if it succeeds. If an error occurs, %NULL will be
|
|
|
|
* returned.
|
|
|
|
*
|
|
|
|
* If debugfs is not enabled in the kernel, the value -%ENODEV will be
|
|
|
|
* returned.
|
|
|
|
*/
|
|
|
|
struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
|
|
|
|
struct dentry *new_dir, const char *new_name)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
struct dentry *dentry = NULL, *trap;
|
|
|
|
const char *old_name;
|
|
|
|
|
|
|
|
trap = lock_rename(new_dir, old_dir);
|
|
|
|
/* Source or destination directories don't exist? */
|
|
|
|
if (!old_dir->d_inode || !new_dir->d_inode)
|
|
|
|
goto exit;
|
|
|
|
/* Source does not exist, cyclic rename, or mountpoint? */
|
|
|
|
if (!old_dentry->d_inode || old_dentry == trap ||
|
|
|
|
d_mountpoint(old_dentry))
|
|
|
|
goto exit;
|
|
|
|
dentry = lookup_one_len(new_name, new_dir, strlen(new_name));
|
|
|
|
/* Lookup failed, cyclic rename or target exists? */
|
|
|
|
if (IS_ERR(dentry) || dentry == trap || dentry->d_inode)
|
|
|
|
goto exit;
|
|
|
|
|
|
|
|
old_name = fsnotify_oldname_init(old_dentry->d_name.name);
|
|
|
|
|
|
|
|
error = simple_rename(old_dir->d_inode, old_dentry, new_dir->d_inode,
|
|
|
|
dentry);
|
|
|
|
if (error) {
|
|
|
|
fsnotify_oldname_free(old_name);
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
d_move(old_dentry, dentry);
|
|
|
|
fsnotify_move(old_dir->d_inode, new_dir->d_inode, old_name,
|
2009-12-25 04:57:57 -05:00
|
|
|
S_ISDIR(old_dentry->d_inode->i_mode),
|
2007-06-07 12:19:32 -04:00
|
|
|
NULL, old_dentry);
|
2007-05-09 13:19:52 +02:00
|
|
|
fsnotify_oldname_free(old_name);
|
|
|
|
unlock_rename(new_dir, old_dir);
|
|
|
|
dput(dentry);
|
|
|
|
return old_dentry;
|
|
|
|
exit:
|
|
|
|
if (dentry && !IS_ERR(dentry))
|
|
|
|
dput(dentry);
|
|
|
|
unlock_rename(new_dir, old_dir);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(debugfs_rename);
|
|
|
|
|
2009-03-22 23:10:44 +01:00
|
|
|
/**
|
|
|
|
* debugfs_initialized - Tells whether debugfs has been registered
|
|
|
|
*/
|
|
|
|
bool debugfs_initialized(void)
|
|
|
|
{
|
|
|
|
return debugfs_registered;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(debugfs_initialized);
|
|
|
|
|
|
|
|
|
2007-10-29 20:13:17 +01:00
|
|
|
static struct kobject *debug_kobj;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
static int __init debugfs_init(void)
|
|
|
|
{
|
|
|
|
int retval;
|
|
|
|
|
2007-11-06 10:36:58 -08:00
|
|
|
debug_kobj = kobject_create_and_add("debug", kernel_kobj);
|
2007-10-29 20:13:17 +01:00
|
|
|
if (!debug_kobj)
|
|
|
|
return -EINVAL;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
retval = register_filesystem(&debug_fs_type);
|
|
|
|
if (retval)
|
2007-12-20 08:13:05 -08:00
|
|
|
kobject_put(debug_kobj);
|
2009-03-22 23:10:44 +01:00
|
|
|
else
|
|
|
|
debugfs_registered = true;
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
core_initcall(debugfs_init);
|
|
|
|
|