1042 lines
26 KiB
C
Raw Normal View History

/*
* linux/fs/reiserfs/xattr.c
*
* Copyright (c) 2002 by Jeff Mahoney, <jeffm@suse.com>
*
*/
/*
* In order to implement EA/ACLs in a clean, backwards compatible manner,
* they are implemented as files in a "private" directory.
* Each EA is in it's own file, with the directory layout like so (/ is assumed
* to be relative to fs root). Inside the /.reiserfs_priv/xattrs directory,
* directories named using the capital-hex form of the objectid and
* generation number are used. Inside each directory are individual files
* named with the name of the extended attribute.
*
* So, for objectid 12648430, we could have:
* /.reiserfs_priv/xattrs/C0FFEE.0/system.posix_acl_access
* /.reiserfs_priv/xattrs/C0FFEE.0/system.posix_acl_default
* /.reiserfs_priv/xattrs/C0FFEE.0/user.Content-Type
* .. or similar.
*
* The file contents are the text of the EA. The size is known based on the
* stat data describing the file.
*
* In the case of system.posix_acl_access and system.posix_acl_default, since
* these are special cases for filesystem ACLs, they are interpreted by the
* kernel, in addition, they are negatively and positively cached and attached
* to the inode so that unnecessary lookups are avoided.
*
* Locking works like so:
* Directory components (xattr root, xattr dir) are protectd by their i_mutex.
* The xattrs themselves are protected by the xattr_sem.
*/
#include "reiserfs.h"
#include <linux/capability.h>
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/errno.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 17:04:11 +09:00
#include <linux/gfp.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/pagemap.h>
#include <linux/xattr.h>
#include "xattr.h"
#include "acl.h"
#include <asm/uaccess.h>
#include <net/checksum.h>
#include <linux/stat.h>
#include <linux/quotaops.h>
#include <linux/security.h>
#include <linux/posix_acl_xattr.h>
#define PRIVROOT_NAME ".reiserfs_priv"
#define XAROOT_NAME "xattrs"
/*
* Helpers for inode ops. We do this so that we don't have all the VFS
* overhead and also for proper i_mutex annotation.
* dir->i_mutex must be held for all of them.
*/
#ifdef CONFIG_REISERFS_FS_XATTR
static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
{
BUG_ON(!mutex_is_locked(&dir->i_mutex));
return dir->i_op->create(dir, dentry, mode, true);
}
#endif
static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
BUG_ON(!mutex_is_locked(&dir->i_mutex));
return dir->i_op->mkdir(dir, dentry, mode);
}
/*
* We use I_MUTEX_CHILD here to silence lockdep. It's safe because xattr
* mutation ops aren't called during rename or splace, which are the
* only other users of I_MUTEX_CHILD. It violates the ordering, but that's
* better than allocating another subclass just for this code.
*/
static int xattr_unlink(struct inode *dir, struct dentry *dentry)
{
int error;
BUG_ON(!mutex_is_locked(&dir->i_mutex));
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
error = dir->i_op->unlink(dir, dentry);
mutex_unlock(&dentry->d_inode->i_mutex);
if (!error)
d_delete(dentry);
return error;
}
static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
BUG_ON(!mutex_is_locked(&dir->i_mutex));
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
error = dir->i_op->rmdir(dir, dentry);
if (!error)
dentry->d_inode->i_flags |= S_DEAD;
mutex_unlock(&dentry->d_inode->i_mutex);
if (!error)
d_delete(dentry);
return error;
}
#define xattr_may_create(flags) (!flags || flags & XATTR_CREATE)
static struct dentry *open_xa_root(struct super_block *sb, int flags)
{
struct dentry *privroot = REISERFS_SB(sb)->priv_root;
struct dentry *xaroot;
if (!privroot->d_inode)
return ERR_PTR(-ENODATA);
mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR);
xaroot = dget(REISERFS_SB(sb)->xattr_root);
if (!xaroot)
xaroot = ERR_PTR(-ENODATA);
else if (!xaroot->d_inode) {
int err = -ENODATA;
if (xattr_may_create(flags))
err = xattr_mkdir(privroot->d_inode, xaroot, 0700);
if (err) {
dput(xaroot);
xaroot = ERR_PTR(err);
}
}
mutex_unlock(&privroot->d_inode->i_mutex);
return xaroot;
}
static struct dentry *open_xa_dir(const struct inode *inode, int flags)
{
struct dentry *xaroot, *xadir;
char namebuf[17];
xaroot = open_xa_root(inode->i_sb, flags);
if (IS_ERR(xaroot))
return xaroot;
snprintf(namebuf, sizeof(namebuf), "%X.%X",
le32_to_cpu(INODE_PKEY(inode)->k_objectid),
inode->i_generation);
mutex_lock_nested(&xaroot->d_inode->i_mutex, I_MUTEX_XATTR);
xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
if (!IS_ERR(xadir) && !xadir->d_inode) {
int err = -ENODATA;
if (xattr_may_create(flags))
err = xattr_mkdir(xaroot->d_inode, xadir, 0700);
if (err) {
dput(xadir);
xadir = ERR_PTR(err);
}
}
mutex_unlock(&xaroot->d_inode->i_mutex);
dput(xaroot);
return xadir;
}
/*
* The following are side effects of other operations that aren't explicitly
* modifying extended attributes. This includes operations such as permissions
* or ownership changes, object deletions, etc.
*/
struct reiserfs_dentry_buf {
struct dir_context ctx;
struct dentry *xadir;
int count;
struct dentry *dentries[8];
};
static int
fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset,
u64 ino, unsigned int d_type)
{
struct reiserfs_dentry_buf *dbuf = buf;
struct dentry *dentry;
WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex));
if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
return -ENOSPC;
if (name[0] == '.' && (namelen < 2 ||
(namelen == 2 && name[1] == '.')))
return 0;
dentry = lookup_one_len(name, dbuf->xadir, namelen);
if (IS_ERR(dentry)) {
return PTR_ERR(dentry);
} else if (!dentry->d_inode) {
/* A directory entry exists, but no file? */
reiserfs_error(dentry->d_sb, "xattr-20003",
"Corrupted directory: xattr %s listed but "
"not found for file %s.\n",
dentry->d_name.name, dbuf->xadir->d_name.name);
dput(dentry);
return -EIO;
}
dbuf->dentries[dbuf->count++] = dentry;
return 0;
}
static void
cleanup_dentry_buf(struct reiserfs_dentry_buf *buf)
{
int i;
for (i = 0; i < buf->count; i++)
if (buf->dentries[i])
dput(buf->dentries[i]);
}
static int reiserfs_for_each_xattr(struct inode *inode,
int (*action)(struct dentry *, void *),
void *data)
{
struct dentry *dir;
int i, err = 0;
struct reiserfs_dentry_buf buf = {
.ctx.actor = fill_with_dentries,
};
/* Skip out, an xattr has no xattrs associated with it */
if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1)
return 0;
dir = open_xa_dir(inode, XATTR_REPLACE);
if (IS_ERR(dir)) {
err = PTR_ERR(dir);
goto out;
} else if (!dir->d_inode) {
err = 0;
goto out_dir;
}
mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
reiserfs: Fix reiserfs lock <-> i_mutex dependency inversion on xattr While deleting the xattrs of an inode, we hold the reiserfs lock and grab the inode->i_mutex of the targeted inode and the root private xattr directory. Later on, we may relax the reiserfs lock for various reasons, this creates inverted dependencies. We can remove the reiserfs lock -> i_mutex dependency by relaxing the former before calling open_xa_dir(). This is fine because the lookup and creation of xattr private directories done in open_xa_dir() are covered by the targeted inode mutexes. And deeper operations in the tree are still done under the write lock. This fixes the following lockdep report: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.32-atom #173 ------------------------------------------------------- cp/3204 is trying to acquire lock: (&REISERFS_SB(s)->lock){+.+.+.}, at: [<c11432b9>] reiserfs_write_lock_once+0x29/0x50 but task is already holding lock: (&sb->s_type->i_mutex_key#4/3){+.+.+.}, at: [<c1141e18>] open_xa_dir+0xd8/0x1b0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&sb->s_type->i_mutex_key#4/3){+.+.+.}: [<c105ea7f>] __lock_acquire+0x11ff/0x19e0 [<c105f2c8>] lock_acquire+0x68/0x90 [<c1401a2b>] mutex_lock_nested+0x5b/0x340 [<c1141d83>] open_xa_dir+0x43/0x1b0 [<c1142722>] reiserfs_for_each_xattr+0x62/0x260 [<c114299a>] reiserfs_delete_xattrs+0x1a/0x60 [<c111ea1f>] reiserfs_delete_inode+0x9f/0x150 [<c10c9c32>] generic_delete_inode+0xa2/0x170 [<c10c9d4f>] generic_drop_inode+0x4f/0x70 [<c10c8b07>] iput+0x47/0x50 [<c10c0965>] do_unlinkat+0xd5/0x160 [<c10c0a00>] sys_unlink+0x10/0x20 [<c1002ec4>] sysenter_do_call+0x12/0x32 -> #0 (&REISERFS_SB(s)->lock){+.+.+.}: [<c105f176>] __lock_acquire+0x18f6/0x19e0 [<c105f2c8>] lock_acquire+0x68/0x90 [<c1401a2b>] mutex_lock_nested+0x5b/0x340 [<c11432b9>] reiserfs_write_lock_once+0x29/0x50 [<c1117012>] reiserfs_lookup+0x62/0x140 [<c10bd85f>] __lookup_hash+0xef/0x110 [<c10bf21d>] lookup_one_len+0x8d/0xc0 [<c1141e2a>] open_xa_dir+0xea/0x1b0 [<c1141fe5>] xattr_lookup+0x15/0x160 [<c1142476>] reiserfs_xattr_get+0x56/0x2a0 [<c1144042>] reiserfs_get_acl+0xa2/0x360 [<c114461a>] reiserfs_cache_default_acl+0x3a/0x160 [<c111789c>] reiserfs_mkdir+0x6c/0x2c0 [<c10bea96>] vfs_mkdir+0xd6/0x180 [<c10c0c10>] sys_mkdirat+0xc0/0xd0 [<c10c0c40>] sys_mkdir+0x20/0x30 [<c1002ec4>] sysenter_do_call+0x12/0x32 other info that might help us debug this: 2 locks held by cp/3204: #0: (&sb->s_type->i_mutex_key#4/1){+.+.+.}, at: [<c10bd8d6>] lookup_create+0x26/0xa0 #1: (&sb->s_type->i_mutex_key#4/3){+.+.+.}, at: [<c1141e18>] open_xa_dir+0xd8/0x1b0 stack backtrace: Pid: 3204, comm: cp Not tainted 2.6.32-atom #173 Call Trace: [<c13ff993>] ? printk+0x18/0x1a [<c105d33a>] print_circular_bug+0xca/0xd0 [<c105f176>] __lock_acquire+0x18f6/0x19e0 [<c105d3aa>] ? check_usage+0x6a/0x460 [<c105f2c8>] lock_acquire+0x68/0x90 [<c11432b9>] ? reiserfs_write_lock_once+0x29/0x50 [<c11432b9>] ? reiserfs_write_lock_once+0x29/0x50 [<c1401a2b>] mutex_lock_nested+0x5b/0x340 [<c11432b9>] ? reiserfs_write_lock_once+0x29/0x50 [<c11432b9>] reiserfs_write_lock_once+0x29/0x50 [<c1117012>] reiserfs_lookup+0x62/0x140 [<c105ccca>] ? debug_check_no_locks_freed+0x8a/0x140 [<c105cbe4>] ? trace_hardirqs_on_caller+0x124/0x170 [<c10bd85f>] __lookup_hash+0xef/0x110 [<c10bf21d>] lookup_one_len+0x8d/0xc0 [<c1141e2a>] open_xa_dir+0xea/0x1b0 [<c1141fe5>] xattr_lookup+0x15/0x160 [<c1142476>] reiserfs_xattr_get+0x56/0x2a0 [<c1144042>] reiserfs_get_acl+0xa2/0x360 [<c10ca2e7>] ? new_inode+0x27/0xa0 [<c114461a>] reiserfs_cache_default_acl+0x3a/0x160 [<c1402eb7>] ? _spin_unlock+0x27/0x40 [<c111789c>] reiserfs_mkdir+0x6c/0x2c0 [<c10c7cb8>] ? __d_lookup+0x108/0x190 [<c105c932>] ? mark_held_locks+0x62/0x80 [<c1401c8d>] ? mutex_lock_nested+0x2bd/0x340 [<c10bd17a>] ? generic_permission+0x1a/0xa0 [<c11788fe>] ? security_inode_permission+0x1e/0x20 [<c10bea96>] vfs_mkdir+0xd6/0x180 [<c10c0c10>] sys_mkdirat+0xc0/0xd0 [<c10505c6>] ? up_read+0x16/0x30 [<c1002fd8>] ? restore_all_notrace+0x0/0x18 [<c10c0c40>] sys_mkdir+0x20/0x30 [<c1002ec4>] sysenter_do_call+0x12/0x32 v2: Don't drop reiserfs_mutex_lock_nested_safe() as we'll still need it later Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Tested-by: Christian Kujau <lists@nerdbynature.de> Cc: Alexander Beregalov <a.beregalov@gmail.com> Cc: Chris Mason <chris.mason@oracle.com> Cc: Ingo Molnar <mingo@elte.hu>
2009-12-30 05:06:21 +01:00
buf.xadir = dir;
while (1) {
err = reiserfs_readdir_inode(dir->d_inode, &buf.ctx);
if (err)
break;
if (!buf.count)
break;
for (i = 0; !err && i < buf.count && buf.dentries[i]; i++) {
struct dentry *dentry = buf.dentries[i];
if (!S_ISDIR(dentry->d_inode->i_mode))
err = action(dentry, data);
dput(dentry);
buf.dentries[i] = NULL;
}
if (err)
break;
buf.count = 0;
}
mutex_unlock(&dir->d_inode->i_mutex);
cleanup_dentry_buf(&buf);
if (!err) {
/*
* We start a transaction here to avoid a ABBA situation
* between the xattr root's i_mutex and the journal lock.
* This doesn't incur much additional overhead since the
* new transaction will just nest inside the
* outer transaction.
*/
int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 +
4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
struct reiserfs_transaction_handle th;
reiserfs_write_lock(inode->i_sb);
err = journal_begin(&th, inode->i_sb, blocks);
reiserfs_write_unlock(inode->i_sb);
if (!err) {
int jerror;
mutex_lock_nested(&dir->d_parent->d_inode->i_mutex,
I_MUTEX_XATTR);
err = action(dir, data);
reiserfs_write_lock(inode->i_sb);
jerror = journal_end(&th);
reiserfs_write_unlock(inode->i_sb);
mutex_unlock(&dir->d_parent->d_inode->i_mutex);
err = jerror ?: err;
}
}
out_dir:
dput(dir);
out:
/* -ENODATA isn't an error */
if (err == -ENODATA)
err = 0;
return err;
}
static int delete_one_xattr(struct dentry *dentry, void *data)
{
struct inode *dir = dentry->d_parent->d_inode;
/* This is the xattr dir, handle specially. */
if (S_ISDIR(dentry->d_inode->i_mode))
return xattr_rmdir(dir, dentry);
return xattr_unlink(dir, dentry);
}
static int chown_one_xattr(struct dentry *dentry, void *data)
{
struct iattr *attrs = data;
int ia_valid = attrs->ia_valid;
int err;
/*
* We only want the ownership bits. Otherwise, we'll do
* things like change a directory to a regular file if
* ATTR_MODE is set.
*/
attrs->ia_valid &= (ATTR_UID|ATTR_GID);
err = reiserfs_setattr(dentry, attrs);
attrs->ia_valid = ia_valid;
return err;
}
/* No i_mutex, but the inode is unconnected. */
int reiserfs_delete_xattrs(struct inode *inode)
{
int err = reiserfs_for_each_xattr(inode, delete_one_xattr, NULL);
if (err)
reiserfs_warning(inode->i_sb, "jdm-20004",
"Couldn't delete all xattrs (%d)\n", err);
return err;
}
/* inode->i_mutex: down */
int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
{
int err = reiserfs_for_each_xattr(inode, chown_one_xattr, attrs);
if (err)
reiserfs_warning(inode->i_sb, "jdm-20007",
"Couldn't chown all xattrs (%d)\n", err);
return err;
}
#ifdef CONFIG_REISERFS_FS_XATTR
/*
* Returns a dentry corresponding to a specific extended attribute file
* for the inode. If flags allow, the file is created. Otherwise, a
* valid or negative dentry, or an error is returned.
*/
static struct dentry *xattr_lookup(struct inode *inode, const char *name,
int flags)
{
struct dentry *xadir, *xafile;
int err = 0;
xadir = open_xa_dir(inode, flags);
if (IS_ERR(xadir))
return ERR_CAST(xadir);
mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR);
xafile = lookup_one_len(name, xadir, strlen(name));
if (IS_ERR(xafile)) {
err = PTR_ERR(xafile);
goto out;
}
if (xafile->d_inode && (flags & XATTR_CREATE))
err = -EEXIST;
if (!xafile->d_inode) {
err = -ENODATA;
if (xattr_may_create(flags))
err = xattr_create(xadir->d_inode, xafile,
0700|S_IFREG);
}
if (err)
dput(xafile);
out:
mutex_unlock(&xadir->d_inode->i_mutex);
dput(xadir);
if (err)
return ERR_PTR(err);
return xafile;
}
/* Internal operations on file data */
static inline void reiserfs_put_page(struct page *page)
{
kunmap(page);
page_cache_release(page);
}
static struct page *reiserfs_get_page(struct inode *dir, size_t n)
{
struct address_space *mapping = dir->i_mapping;
struct page *page;
/*
* We can deadlock if we try to free dentries,
* and an unlink/rmdir has just occurred - GFP_NOFS avoids this
*/
mapping_set_gfp_mask(mapping, GFP_NOFS);
page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL);
if (!IS_ERR(page)) {
kmap(page);
if (PageError(page))
goto fail;
}
return page;
fail:
reiserfs_put_page(page);
return ERR_PTR(-EIO);
}
static inline __u32 xattr_hash(const char *msg, int len)
{
return csum_partial(msg, len, 0);
}
int reiserfs_commit_write(struct file *f, struct page *page,
unsigned from, unsigned to);
static void update_ctime(struct inode *inode)
{
struct timespec now = current_fs_time(inode->i_sb);
if (inode_unhashed(inode) || !inode->i_nlink ||
timespec_equal(&inode->i_ctime, &now))
return;
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
}
static int lookup_and_delete_xattr(struct inode *inode, const char *name)
{
int err = 0;
struct dentry *dentry, *xadir;
xadir = open_xa_dir(inode, XATTR_REPLACE);
if (IS_ERR(xadir))
return PTR_ERR(xadir);
mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR);
dentry = lookup_one_len(name, xadir, strlen(name));
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
goto out_dput;
}
if (dentry->d_inode) {
err = xattr_unlink(xadir->d_inode, dentry);
update_ctime(inode);
}
dput(dentry);
out_dput:
mutex_unlock(&xadir->d_inode->i_mutex);
dput(xadir);
return err;
}
/* Generic extended attribute operations that can be used by xa plugins */
/*
* inode->i_mutex: down
*/
int
reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
struct inode *inode, const char *name,
const void *buffer, size_t buffer_size, int flags)
{
int err = 0;
struct dentry *dentry;
struct page *page;
char *data;
size_t file_pos = 0;
size_t buffer_pos = 0;
size_t new_size;
__u32 xahash = 0;
if (get_inode_sd_version(inode) == STAT_DATA_V1)
return -EOPNOTSUPP;
if (!buffer) {
err = lookup_and_delete_xattr(inode, name);
return err;
}
dentry = xattr_lookup(inode, name, flags);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
reiserfs: Relax lock before open xattr dir in reiserfs_xattr_set_handle() We call xattr_lookup() from reiserfs_xattr_get(). We then hold the reiserfs lock when we grab the i_mutex. But later, we may relax the reiserfs lock, creating dependency inversion between both locks. The lookups and creation jobs ar already protected by the inode mutex, so we can safely relax the reiserfs lock, dropping the unwanted reiserfs lock -> i_mutex dependency, as shown in the following lockdep report: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.32-atom #173 ------------------------------------------------------- cp/3204 is trying to acquire lock: (&REISERFS_SB(s)->lock){+.+.+.}, at: [<c11432b9>] reiserfs_write_lock_once+0x29/0x50 but task is already holding lock: (&sb->s_type->i_mutex_key#4/3){+.+.+.}, at: [<c1141e18>] open_xa_dir+0xd8/0x1b0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&sb->s_type->i_mutex_key#4/3){+.+.+.}: [<c105ea7f>] __lock_acquire+0x11ff/0x19e0 [<c105f2c8>] lock_acquire+0x68/0x90 [<c1401a2b>] mutex_lock_nested+0x5b/0x340 [<c1141d83>] open_xa_dir+0x43/0x1b0 [<c1142722>] reiserfs_for_each_xattr+0x62/0x260 [<c114299a>] reiserfs_delete_xattrs+0x1a/0x60 [<c111ea1f>] reiserfs_delete_inode+0x9f/0x150 [<c10c9c32>] generic_delete_inode+0xa2/0x170 [<c10c9d4f>] generic_drop_inode+0x4f/0x70 [<c10c8b07>] iput+0x47/0x50 [<c10c0965>] do_unlinkat+0xd5/0x160 [<c10c0a00>] sys_unlink+0x10/0x20 [<c1002ec4>] sysenter_do_call+0x12/0x32 -> #0 (&REISERFS_SB(s)->lock){+.+.+.}: [<c105f176>] __lock_acquire+0x18f6/0x19e0 [<c105f2c8>] lock_acquire+0x68/0x90 [<c1401a2b>] mutex_lock_nested+0x5b/0x340 [<c11432b9>] reiserfs_write_lock_once+0x29/0x50 [<c1117012>] reiserfs_lookup+0x62/0x140 [<c10bd85f>] __lookup_hash+0xef/0x110 [<c10bf21d>] lookup_one_len+0x8d/0xc0 [<c1141e2a>] open_xa_dir+0xea/0x1b0 [<c1141fe5>] xattr_lookup+0x15/0x160 [<c1142476>] reiserfs_xattr_get+0x56/0x2a0 [<c1144042>] reiserfs_get_acl+0xa2/0x360 [<c114461a>] reiserfs_cache_default_acl+0x3a/0x160 [<c111789c>] reiserfs_mkdir+0x6c/0x2c0 [<c10bea96>] vfs_mkdir+0xd6/0x180 [<c10c0c10>] sys_mkdirat+0xc0/0xd0 [<c10c0c40>] sys_mkdir+0x20/0x30 [<c1002ec4>] sysenter_do_call+0x12/0x32 other info that might help us debug this: 2 locks held by cp/3204: #0: (&sb->s_type->i_mutex_key#4/1){+.+.+.}, at: [<c10bd8d6>] lookup_create+0x26/0xa0 #1: (&sb->s_type->i_mutex_key#4/3){+.+.+.}, at: [<c1141e18>] open_xa_dir+0xd8/0x1b0 stack backtrace: Pid: 3204, comm: cp Not tainted 2.6.32-atom #173 Call Trace: [<c13ff993>] ? printk+0x18/0x1a [<c105d33a>] print_circular_bug+0xca/0xd0 [<c105f176>] __lock_acquire+0x18f6/0x19e0 [<c105d3aa>] ? check_usage+0x6a/0x460 [<c105f2c8>] lock_acquire+0x68/0x90 [<c11432b9>] ? reiserfs_write_lock_once+0x29/0x50 [<c11432b9>] ? reiserfs_write_lock_once+0x29/0x50 [<c1401a2b>] mutex_lock_nested+0x5b/0x340 [<c11432b9>] ? reiserfs_write_lock_once+0x29/0x50 [<c11432b9>] reiserfs_write_lock_once+0x29/0x50 [<c1117012>] reiserfs_lookup+0x62/0x140 [<c105ccca>] ? debug_check_no_locks_freed+0x8a/0x140 [<c105cbe4>] ? trace_hardirqs_on_caller+0x124/0x170 [<c10bd85f>] __lookup_hash+0xef/0x110 [<c10bf21d>] lookup_one_len+0x8d/0xc0 [<c1141e2a>] open_xa_dir+0xea/0x1b0 [<c1141fe5>] xattr_lookup+0x15/0x160 [<c1142476>] reiserfs_xattr_get+0x56/0x2a0 [<c1144042>] reiserfs_get_acl+0xa2/0x360 [<c10ca2e7>] ? new_inode+0x27/0xa0 [<c114461a>] reiserfs_cache_default_acl+0x3a/0x160 [<c1402eb7>] ? _spin_unlock+0x27/0x40 [<c111789c>] reiserfs_mkdir+0x6c/0x2c0 [<c10c7cb8>] ? __d_lookup+0x108/0x190 [<c105c932>] ? mark_held_locks+0x62/0x80 [<c1401c8d>] ? mutex_lock_nested+0x2bd/0x340 [<c10bd17a>] ? generic_permission+0x1a/0xa0 [<c11788fe>] ? security_inode_permission+0x1e/0x20 [<c10bea96>] vfs_mkdir+0xd6/0x180 [<c10c0c10>] sys_mkdirat+0xc0/0xd0 [<c10505c6>] ? up_read+0x16/0x30 [<c1002fd8>] ? restore_all_notrace+0x0/0x18 [<c10c0c40>] sys_mkdir+0x20/0x30 [<c1002ec4>] sysenter_do_call+0x12/0x32 Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Tested-by: Christian Kujau <lists@nerdbynature.de> Cc: Alexander Beregalov <a.beregalov@gmail.com> Cc: Chris Mason <chris.mason@oracle.com> Cc: Ingo Molnar <mingo@elte.hu>
2009-12-30 07:03:53 +01:00
down_write(&REISERFS_I(inode)->i_xattr_sem);
xahash = xattr_hash(buffer, buffer_size);
while (buffer_pos < buffer_size || buffer_pos == 0) {
size_t chunk;
size_t skip = 0;
size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1));
if (buffer_size - buffer_pos > PAGE_CACHE_SIZE)
chunk = PAGE_CACHE_SIZE;
else
chunk = buffer_size - buffer_pos;
page = reiserfs_get_page(dentry->d_inode, file_pos);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto out_unlock;
}
lock_page(page);
data = page_address(page);
if (file_pos == 0) {
struct reiserfs_xattr_header *rxh;
skip = file_pos = sizeof(struct reiserfs_xattr_header);
if (chunk + skip > PAGE_CACHE_SIZE)
chunk = PAGE_CACHE_SIZE - skip;
rxh = (struct reiserfs_xattr_header *)data;
rxh->h_magic = cpu_to_le32(REISERFS_XATTR_MAGIC);
rxh->h_hash = cpu_to_le32(xahash);
}
reiserfs_write_lock(inode->i_sb);
err = __reiserfs_write_begin(page, page_offset, chunk + skip);
if (!err) {
if (buffer)
memcpy(data + skip, buffer + buffer_pos, chunk);
err = reiserfs_commit_write(NULL, page, page_offset,
page_offset + chunk +
skip);
}
reiserfs_write_unlock(inode->i_sb);
unlock_page(page);
reiserfs_put_page(page);
buffer_pos += chunk;
file_pos += chunk;
skip = 0;
if (err || buffer_size == 0 || !buffer)
break;
}
new_size = buffer_size + sizeof(struct reiserfs_xattr_header);
if (!err && new_size < i_size_read(dentry->d_inode)) {
struct iattr newattrs = {
.ia_ctime = current_fs_time(inode->i_sb),
.ia_size = new_size,
.ia_valid = ATTR_SIZE | ATTR_CTIME,
};
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
inode_dio_wait(dentry->d_inode);
err = reiserfs_setattr(dentry, &newattrs);
mutex_unlock(&dentry->d_inode->i_mutex);
} else
update_ctime(inode);
out_unlock:
up_write(&REISERFS_I(inode)->i_xattr_sem);
dput(dentry);
return err;
}
/* We need to start a transaction to maintain lock ordering */
int reiserfs_xattr_set(struct inode *inode, const char *name,
const void *buffer, size_t buffer_size, int flags)
{
struct reiserfs_transaction_handle th;
int error, error2;
size_t jbegin_count = reiserfs_xattr_nblocks(inode, buffer_size);
if (!(flags & XATTR_REPLACE))
jbegin_count += reiserfs_xattr_jcreate_nblocks(inode);
reiserfs_write_lock(inode->i_sb);
error = journal_begin(&th, inode->i_sb, jbegin_count);
reiserfs_write_unlock(inode->i_sb);
if (error) {
return error;
}
error = reiserfs_xattr_set_handle(&th, inode, name,
buffer, buffer_size, flags);
reiserfs_write_lock(inode->i_sb);
error2 = journal_end(&th);
reiserfs_write_unlock(inode->i_sb);
if (error == 0)
error = error2;
return error;
}
/*
* inode->i_mutex: down
*/
int
reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
size_t buffer_size)
{
ssize_t err = 0;
struct dentry *dentry;
size_t isize;
size_t file_pos = 0;
size_t buffer_pos = 0;
struct page *page;
__u32 hash = 0;
if (name == NULL)
return -EINVAL;
/*
* We can't have xattrs attached to v1 items since they don't have
* generation numbers
*/
if (get_inode_sd_version(inode) == STAT_DATA_V1)
return -EOPNOTSUPP;
dentry = xattr_lookup(inode, name, XATTR_REPLACE);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
goto out;
}
down_read(&REISERFS_I(inode)->i_xattr_sem);
isize = i_size_read(dentry->d_inode);
/* Just return the size needed */
if (buffer == NULL) {
err = isize - sizeof(struct reiserfs_xattr_header);
goto out_unlock;
}
if (buffer_size < isize - sizeof(struct reiserfs_xattr_header)) {
err = -ERANGE;
goto out_unlock;
}
while (file_pos < isize) {
size_t chunk;
char *data;
size_t skip = 0;
if (isize - file_pos > PAGE_CACHE_SIZE)
chunk = PAGE_CACHE_SIZE;
else
chunk = isize - file_pos;
page = reiserfs_get_page(dentry->d_inode, file_pos);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto out_unlock;
}
lock_page(page);
data = page_address(page);
if (file_pos == 0) {
struct reiserfs_xattr_header *rxh =
(struct reiserfs_xattr_header *)data;
skip = file_pos = sizeof(struct reiserfs_xattr_header);
chunk -= skip;
/* Magic doesn't match up.. */
if (rxh->h_magic != cpu_to_le32(REISERFS_XATTR_MAGIC)) {
unlock_page(page);
reiserfs_put_page(page);
reiserfs_warning(inode->i_sb, "jdm-20001",
"Invalid magic for xattr (%s) "
"associated with %k", name,
INODE_PKEY(inode));
err = -EIO;
goto out_unlock;
}
hash = le32_to_cpu(rxh->h_hash);
}
memcpy(buffer + buffer_pos, data + skip, chunk);
unlock_page(page);
reiserfs_put_page(page);
file_pos += chunk;
buffer_pos += chunk;
skip = 0;
}
err = isize - sizeof(struct reiserfs_xattr_header);
if (xattr_hash(buffer, isize - sizeof(struct reiserfs_xattr_header)) !=
hash) {
reiserfs_warning(inode->i_sb, "jdm-20002",
"Invalid hash for xattr (%s) associated "
"with %k", name, INODE_PKEY(inode));
err = -EIO;
}
out_unlock:
up_read(&REISERFS_I(inode)->i_xattr_sem);
dput(dentry);
out:
return err;
}
/*
* In order to implement different sets of xattr operations for each xattr
* prefix with the generic xattr API, a filesystem should create a
* null-terminated array of struct xattr_handler (one for each prefix) and
* hang a pointer to it off of the s_xattr field of the superblock.
*
* The generic_fooxattr() functions will use this list to dispatch xattr
* operations to the correct xattr_handler.
*/
#define for_each_xattr_handler(handlers, handler) \
for ((handler) = *(handlers)++; \
(handler) != NULL; \
(handler) = *(handlers)++)
/* This is the implementation for the xattr plugin infrastructure */
static inline const struct xattr_handler *
find_xattr_handler_prefix(const struct xattr_handler **handlers,
const char *name)
{
const struct xattr_handler *xah;
if (!handlers)
return NULL;
for_each_xattr_handler(handlers, xah) {
if (strncmp(xah->prefix, name, strlen(xah->prefix)) == 0)
break;
}
return xah;
}
/*
* Inode operation getxattr()
*/
ssize_t
reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
size_t size)
{
const struct xattr_handler *handler;
handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
return -EOPNOTSUPP;
return handler->get(dentry, name, buffer, size, handler->flags);
}
/*
* Inode operation setxattr()
*
* dentry->d_inode->i_mutex down
*/
int
reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
size_t size, int flags)
{
const struct xattr_handler *handler;
handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
return -EOPNOTSUPP;
return handler->set(dentry, name, value, size, flags, handler->flags);
}
/*
* Inode operation removexattr()
*
* dentry->d_inode->i_mutex down
*/
int reiserfs_removexattr(struct dentry *dentry, const char *name)
{
const struct xattr_handler *handler;
handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
return -EOPNOTSUPP;
return handler->set(dentry, name, NULL, 0, XATTR_REPLACE, handler->flags);
}
struct listxattr_buf {
struct dir_context ctx;
size_t size;
size_t pos;
char *buf;
struct dentry *dentry;
};
static int listxattr_filler(void *buf, const char *name, int namelen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct listxattr_buf *b = (struct listxattr_buf *)buf;
size_t size;
if (name[0] != '.' ||
(namelen != 1 && (name[1] != '.' || namelen != 2))) {
const struct xattr_handler *handler;
handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
name);
if (!handler) /* Unsupported xattr name */
return 0;
if (b->buf) {
size = handler->list(b->dentry, b->buf + b->pos,
b->size, name, namelen,
handler->flags);
if (size > b->size)
return -ERANGE;
} else {
size = handler->list(b->dentry, NULL, 0, name,
namelen, handler->flags);
}
b->pos += size;
}
return 0;
}
/*
* Inode operation listxattr()
*
* We totally ignore the generic listxattr here because it would be stupid
* not to. Since the xattrs are organized in a directory, we can just
* readdir to find them.
*/
ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
{
struct dentry *dir;
int err = 0;
struct listxattr_buf buf = {
.ctx.actor = listxattr_filler,
.dentry = dentry,
.buf = buffer,
.size = buffer ? size : 0,
};
if (!dentry->d_inode)
return -EINVAL;
if (!dentry->d_sb->s_xattr ||
get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
return -EOPNOTSUPP;
dir = open_xa_dir(dentry->d_inode, XATTR_REPLACE);
if (IS_ERR(dir)) {
err = PTR_ERR(dir);
if (err == -ENODATA)
err = 0; /* Not an error if there aren't any xattrs */
goto out;
}
mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
err = reiserfs_readdir_inode(dir->d_inode, &buf.ctx);
mutex_unlock(&dir->d_inode->i_mutex);
if (!err)
err = buf.pos;
dput(dir);
out:
return err;
}
static int create_privroot(struct dentry *dentry)
{
int err;
struct inode *inode = dentry->d_parent->d_inode;
WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
err = xattr_mkdir(inode, dentry, 0700);
if (err || !dentry->d_inode) {
reiserfs_warning(dentry->d_sb, "jdm-20006",
"xattrs/ACLs enabled and couldn't "
"find/create .reiserfs_priv. "
"Failing mount.");
return -EOPNOTSUPP;
}
dentry->d_inode->i_flags |= S_PRIVATE;
reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr "
"storage.\n", PRIVROOT_NAME);
return 0;
}
#else
int __init reiserfs_xattr_register_handlers(void) { return 0; }
void reiserfs_xattr_unregister_handlers(void) {}
static int create_privroot(struct dentry *dentry) { return 0; }
#endif
/* Actual operations that are exported to VFS-land */
static const struct xattr_handler *reiserfs_xattr_handlers[] = {
#ifdef CONFIG_REISERFS_FS_XATTR
&reiserfs_xattr_user_handler,
&reiserfs_xattr_trusted_handler,
#endif
#ifdef CONFIG_REISERFS_FS_SECURITY
&reiserfs_xattr_security_handler,
#endif
#ifdef CONFIG_REISERFS_FS_POSIX_ACL
&posix_acl_access_xattr_handler,
&posix_acl_default_xattr_handler,
#endif
NULL
};
static int xattr_mount_check(struct super_block *s)
{
/*
* We need generation numbers to ensure that the oid mapping is correct
* v3.5 filesystems don't have them.
*/
if (old_format_only(s)) {
if (reiserfs_xattrs_optional(s)) {
/*
* Old format filesystem, but optional xattrs have
* been enabled. Error out.
*/
reiserfs_warning(s, "jdm-2005",
"xattrs/ACLs not supported "
"on pre-v3.6 format filesystems. "
"Failing mount.");
return -EOPNOTSUPP;
}
}
return 0;
}
int reiserfs_permission(struct inode *inode, int mask)
{
/*
* We don't do permission checks on the internal objects.
* Permissions are determined by the "owning" object.
*/
if (IS_PRIVATE(inode))
return 0;
return generic_permission(inode, mask);
}
static int xattr_hide_revalidate(struct dentry *dentry, unsigned int flags)
{
return -EPERM;
}
static const struct dentry_operations xattr_lookup_poison_ops = {
.d_revalidate = xattr_hide_revalidate,
};
int reiserfs_lookup_privroot(struct super_block *s)
{
struct dentry *dentry;
int err = 0;
/* If we don't have the privroot located yet - go find it */
mutex_lock(&s->s_root->d_inode->i_mutex);
dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
strlen(PRIVROOT_NAME));
if (!IS_ERR(dentry)) {
REISERFS_SB(s)->priv_root = dentry;
d_set_d_op(dentry, &xattr_lookup_poison_ops);
if (dentry->d_inode)
dentry->d_inode->i_flags |= S_PRIVATE;
} else
err = PTR_ERR(dentry);
mutex_unlock(&s->s_root->d_inode->i_mutex);
return err;
}
/*
* We need to take a copy of the mount flags since things like
* MS_RDONLY don't get set until *after* we're called.
* mount_flags != mount_options
*/
int reiserfs_xattr_init(struct super_block *s, int mount_flags)
{
int err = 0;
struct dentry *privroot = REISERFS_SB(s)->priv_root;
err = xattr_mount_check(s);
if (err)
goto error;
if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) {
mutex_lock(&s->s_root->d_inode->i_mutex);
err = create_privroot(REISERFS_SB(s)->priv_root);
mutex_unlock(&s->s_root->d_inode->i_mutex);
}
if (privroot->d_inode) {
s->s_xattr = reiserfs_xattr_handlers;
mutex_lock(&privroot->d_inode->i_mutex);
if (!REISERFS_SB(s)->xattr_root) {
struct dentry *dentry;
dentry = lookup_one_len(XAROOT_NAME, privroot,
strlen(XAROOT_NAME));
if (!IS_ERR(dentry))
REISERFS_SB(s)->xattr_root = dentry;
else
err = PTR_ERR(dentry);
}
mutex_unlock(&privroot->d_inode->i_mutex);
}
error:
if (err) {
clear_bit(REISERFS_XATTRS_USER, &REISERFS_SB(s)->s_mount_opt);
clear_bit(REISERFS_POSIXACL, &REISERFS_SB(s)->s_mount_opt);
}
/* The super_block MS_POSIXACL must mirror the (no)acl mount option. */
if (reiserfs_posixacl(s))
s->s_flags |= MS_POSIXACL;
else
s->s_flags &= ~MS_POSIXACL;
return err;
}