Mimi Zohar c09c59e6a0 ima: replace GFP_KERNEL with GFP_NOFS
While running fsstress tests on the NFSv4 mounted ext3 and ext4
filesystem, the following call trace was generated on the nfs
server machine.

Replace GFP_KERNEL with GFP_NOFS in ima_iint_insert() to avoid a
potential deadlock.

     =================================
    [ INFO: inconsistent lock state ]
    2.6.31-31.el6.x86_64 #1
    ---------------------------------
    inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage.
    kswapd2/75 [HC0[0]:SC0[0]:HE1:SE1] takes:
     (jbd2_handle){+.+.?.}, at: [<ffffffff811edd5e>] jbd2_journal_start+0xfe/0x13f
    {RECLAIM_FS-ON-W} state was registered at:
      [<ffffffff81091e40>] mark_held_locks+0x65/0x99
      [<ffffffff81091f31>] lockdep_trace_alloc+0xbd/0xf5
      [<ffffffff81126fdd>] kmem_cache_alloc+0x40/0x185
      [<ffffffff812344d7>] ima_iint_insert+0x3d/0xf1
      [<ffffffff812345b0>] ima_inode_alloc+0x25/0x44
      [<ffffffff811484ac>] inode_init_always+0xec/0x271
      [<ffffffff81148682>] alloc_inode+0x51/0xa1
      [<ffffffff81148700>] new_inode+0x2e/0x94
      [<ffffffff811b2f08>] ext4_new_inode+0xb8/0xdc9
      [<ffffffff811be611>] ext4_create+0xcf/0x175
      [<ffffffff8113e2cd>] vfs_create+0x82/0xb8
      [<ffffffff8113f337>] do_filp_open+0x32c/0x9ee
      [<ffffffff811309b9>] do_sys_open+0x6c/0x12c
      [<ffffffff81130adc>] sys_open+0x2e/0x44
      [<ffffffff81011e42>] system_call_fastpath+0x16/0x1b
      [<ffffffffffffffff>] 0xffffffffffffffff
    irq event stamp: 90371
    hardirqs last  enabled at (90371): [<ffffffff8112708d>]
    kmem_cache_alloc+0xf0/0x185
    hardirqs last disabled at (90370): [<ffffffff81127026>]
    kmem_cache_alloc+0x89/0x185
    softirqs last  enabled at (89492): [<ffffffff81068ecf>]
    __do_softirq+0x1bf/0x1eb
    softirqs last disabled at (89477): [<ffffffff8101312c>] call_softirq+0x1c/0x30

    other info that might help us debug this:
    2 locks held by kswapd2/75:
     #0:  (shrinker_rwsem){++++..}, at: [<ffffffff810f98ba>] shrink_slab+0x44/0x177
     #1:  (&type->s_umount_key#25){++++..}, at: [<ffffffff811450ba>]

Reported-by: Muni P. Beerakam <mbeeraka@in.ibm.com>
Reported-by: Amit K. Arora <amitarora@in.ibm.com>
Cc: stable@kernel.org
Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2009-11-19 08:42:01 +11:00

205 lines
4.8 KiB
C

/*
* Copyright (C) 2008 IBM Corporation
*
* Authors:
* Mimi Zohar <zohar@us.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, version 2 of the
* License.
*
* File: ima_iint.c
* - implements the IMA hooks: ima_inode_alloc, ima_inode_free
* - cache integrity information associated with an inode
* using a radix tree.
*/
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/radix-tree.h>
#include "ima.h"
#define ima_iint_delete ima_inode_free
RADIX_TREE(ima_iint_store, GFP_ATOMIC);
DEFINE_SPINLOCK(ima_iint_lock);
static struct kmem_cache *iint_cache __read_mostly;
/* ima_iint_find_get - return the iint associated with an inode
*
* ima_iint_find_get gets a reference to the iint. Caller must
* remember to put the iint reference.
*/
struct ima_iint_cache *ima_iint_find_get(struct inode *inode)
{
struct ima_iint_cache *iint;
rcu_read_lock();
iint = radix_tree_lookup(&ima_iint_store, (unsigned long)inode);
if (!iint)
goto out;
kref_get(&iint->refcount);
out:
rcu_read_unlock();
return iint;
}
/* Allocate memory for the iint associated with the inode
* from the iint_cache slab, initialize the iint, and
* insert it into the radix tree.
*
* On success return a pointer to the iint; on failure return NULL.
*/
struct ima_iint_cache *ima_iint_insert(struct inode *inode)
{
struct ima_iint_cache *iint = NULL;
int rc = 0;
if (!ima_initialized)
return iint;
iint = kmem_cache_alloc(iint_cache, GFP_NOFS);
if (!iint)
return iint;
rc = radix_tree_preload(GFP_NOFS);
if (rc < 0)
goto out;
spin_lock(&ima_iint_lock);
rc = radix_tree_insert(&ima_iint_store, (unsigned long)inode, iint);
spin_unlock(&ima_iint_lock);
out:
if (rc < 0) {
kmem_cache_free(iint_cache, iint);
if (rc == -EEXIST) {
spin_lock(&ima_iint_lock);
iint = radix_tree_lookup(&ima_iint_store,
(unsigned long)inode);
spin_unlock(&ima_iint_lock);
} else
iint = NULL;
}
radix_tree_preload_end();
return iint;
}
/**
* ima_inode_alloc - allocate an iint associated with an inode
* @inode: pointer to the inode
*
* Return 0 on success, 1 on failure.
*/
int ima_inode_alloc(struct inode *inode)
{
struct ima_iint_cache *iint;
if (!ima_initialized)
return 0;
iint = ima_iint_insert(inode);
if (!iint)
return 1;
return 0;
}
/* ima_iint_find_insert_get - get the iint associated with an inode
*
* Most insertions are done at inode_alloc, except those allocated
* before late_initcall. When the iint does not exist, allocate it,
* initialize and insert it, and increment the iint refcount.
*
* (Can't initialize at security_initcall before any inodes are
* allocated, got to wait at least until proc_init.)
*
* Return the iint.
*/
struct ima_iint_cache *ima_iint_find_insert_get(struct inode *inode)
{
struct ima_iint_cache *iint = NULL;
iint = ima_iint_find_get(inode);
if (iint)
return iint;
iint = ima_iint_insert(inode);
if (iint)
kref_get(&iint->refcount);
return iint;
}
EXPORT_SYMBOL_GPL(ima_iint_find_insert_get);
/* iint_free - called when the iint refcount goes to zero */
void iint_free(struct kref *kref)
{
struct ima_iint_cache *iint = container_of(kref, struct ima_iint_cache,
refcount);
iint->version = 0;
iint->flags = 0UL;
if (iint->readcount != 0) {
printk(KERN_INFO "%s: readcount: %ld\n", __FUNCTION__,
iint->readcount);
iint->readcount = 0;
}
if (iint->writecount != 0) {
printk(KERN_INFO "%s: writecount: %ld\n", __FUNCTION__,
iint->writecount);
iint->writecount = 0;
}
if (iint->opencount != 0) {
printk(KERN_INFO "%s: opencount: %ld\n", __FUNCTION__,
iint->opencount);
iint->opencount = 0;
}
kref_set(&iint->refcount, 1);
kmem_cache_free(iint_cache, iint);
}
void iint_rcu_free(struct rcu_head *rcu_head)
{
struct ima_iint_cache *iint = container_of(rcu_head,
struct ima_iint_cache, rcu);
kref_put(&iint->refcount, iint_free);
}
/**
* ima_iint_delete - called on integrity_inode_free
* @inode: pointer to the inode
*
* Free the integrity information(iint) associated with an inode.
*/
void ima_iint_delete(struct inode *inode)
{
struct ima_iint_cache *iint;
if (!ima_initialized)
return;
spin_lock(&ima_iint_lock);
iint = radix_tree_delete(&ima_iint_store, (unsigned long)inode);
spin_unlock(&ima_iint_lock);
if (iint)
call_rcu(&iint->rcu, iint_rcu_free);
}
static void init_once(void *foo)
{
struct ima_iint_cache *iint = foo;
memset(iint, 0, sizeof *iint);
iint->version = 0;
iint->flags = 0UL;
mutex_init(&iint->mutex);
iint->readcount = 0;
iint->writecount = 0;
iint->opencount = 0;
kref_set(&iint->refcount, 1);
}
void __init ima_iintcache_init(void)
{
iint_cache =
kmem_cache_create("iint_cache", sizeof(struct ima_iint_cache), 0,
SLAB_PANIC, init_once);
}