linux-next/fs/nfs/delegation.c
Trond Myklebust 412c77cee6 NFSv4: Defer inode revalidation when setting up a delegation
Currently we force a synchronous call to __nfs_revalidate_inode() in
nfs_inode_set_delegation(). This not only ensures that we cannot call
nfs_inode_set_delegation from an asynchronous context, but it also slows
down any call to open().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2007-07-10 23:40:41 -04:00

472 lines
12 KiB
C

/*
* linux/fs/nfs/delegation.c
*
* Copyright (C) 2004 Trond Myklebust
*
* NFS file delegation management
*
*/
#include <linux/completion.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_xdr.h>
#include "nfs4_fs.h"
#include "delegation.h"
#include "internal.h"
static void nfs_free_delegation(struct nfs_delegation *delegation)
{
if (delegation->cred)
put_rpccred(delegation->cred);
kfree(delegation);
}
static void nfs_free_delegation_callback(struct rcu_head *head)
{
struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu);
nfs_free_delegation(delegation);
}
static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state)
{
struct inode *inode = state->inode;
struct file_lock *fl;
int status;
for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) {
if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
continue;
if ((struct nfs_open_context *)fl->fl_file->private_data != ctx)
continue;
status = nfs4_lock_delegation_recall(state, fl);
if (status >= 0)
continue;
switch (status) {
default:
printk(KERN_ERR "%s: unhandled error %d.\n",
__FUNCTION__, status);
case -NFS4ERR_EXPIRED:
/* kill_proc(fl->fl_pid, SIGLOST, 1); */
case -NFS4ERR_STALE_CLIENTID:
nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs_client);
goto out_err;
}
}
return 0;
out_err:
return status;
}
static void nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_context *ctx;
struct nfs4_state *state;
int err;
again:
spin_lock(&inode->i_lock);
list_for_each_entry(ctx, &nfsi->open_files, list) {
state = ctx->state;
if (state == NULL)
continue;
if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
continue;
if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
continue;
get_nfs_open_context(ctx);
spin_unlock(&inode->i_lock);
err = nfs4_open_delegation_recall(ctx, state, stateid);
if (err >= 0)
err = nfs_delegation_claim_locks(ctx, state);
put_nfs_open_context(ctx);
if (err != 0)
return;
goto again;
}
spin_unlock(&inode->i_lock);
}
/*
* Set up a delegation on an inode
*/
void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
{
struct nfs_delegation *delegation = NFS_I(inode)->delegation;
if (delegation == NULL)
return;
memcpy(delegation->stateid.data, res->delegation.data,
sizeof(delegation->stateid.data));
delegation->type = res->delegation_type;
delegation->maxsize = res->maxsize;
put_rpccred(cred);
delegation->cred = get_rpccred(cred);
delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM;
NFS_I(inode)->delegation_state = delegation->type;
smp_wmb();
}
/*
* Set up a delegation on an inode
*/
int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
int status = 0;
delegation = kmalloc(sizeof(*delegation), GFP_KERNEL);
if (delegation == NULL)
return -ENOMEM;
memcpy(delegation->stateid.data, res->delegation.data,
sizeof(delegation->stateid.data));
delegation->type = res->delegation_type;
delegation->maxsize = res->maxsize;
delegation->change_attr = nfsi->change_attr;
delegation->cred = get_rpccred(cred);
delegation->inode = inode;
spin_lock(&clp->cl_lock);
if (rcu_dereference(nfsi->delegation) == NULL) {
list_add_rcu(&delegation->super_list, &clp->cl_delegations);
nfsi->delegation_state = delegation->type;
rcu_assign_pointer(nfsi->delegation, delegation);
delegation = NULL;
} else {
if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
sizeof(delegation->stateid)) != 0 ||
delegation->type != nfsi->delegation->type) {
printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n",
__FUNCTION__, NIPQUAD(clp->cl_addr.sin_addr));
status = -EIO;
}
}
/* Ensure we revalidate the attributes and page cache! */
spin_lock(&inode->i_lock);
nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
spin_unlock(&inode->i_lock);
spin_unlock(&clp->cl_lock);
kfree(delegation);
return status;
}
static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
{
int res = 0;
res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
call_rcu(&delegation->rcu, nfs_free_delegation_callback);
return res;
}
/* Sync all data to disk upon delegation return */
static void nfs_msync_inode(struct inode *inode)
{
filemap_fdatawrite(inode->i_mapping);
nfs_wb_all(inode);
filemap_fdatawait(inode->i_mapping);
}
/*
* Basic procedure for returning a delegation to the server
*/
static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
nfs_msync_inode(inode);
down_read(&clp->cl_sem);
/* Guard against new delegated open calls */
down_write(&nfsi->rwsem);
nfs_delegation_claim_opens(inode, &delegation->stateid);
up_write(&nfsi->rwsem);
up_read(&clp->cl_sem);
nfs_msync_inode(inode);
return nfs_do_return_delegation(inode, delegation);
}
static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
{
struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
if (delegation == NULL)
goto nomatch;
if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
sizeof(delegation->stateid.data)) != 0)
goto nomatch;
list_del_rcu(&delegation->super_list);
nfsi->delegation_state = 0;
rcu_assign_pointer(nfsi->delegation, NULL);
return delegation;
nomatch:
return NULL;
}
int nfs_inode_return_delegation(struct inode *inode)
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
int err = 0;
if (rcu_dereference(nfsi->delegation) != NULL) {
spin_lock(&clp->cl_lock);
delegation = nfs_detach_delegation_locked(nfsi, NULL);
spin_unlock(&clp->cl_lock);
if (delegation != NULL)
err = __nfs_inode_return_delegation(inode, delegation);
}
return err;
}
/*
* Return all delegations associated to a super block
*/
void nfs_return_all_delegations(struct super_block *sb)
{
struct nfs_client *clp = NFS_SB(sb)->nfs_client;
struct nfs_delegation *delegation;
struct inode *inode;
if (clp == NULL)
return;
restart:
rcu_read_lock();
list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
if (delegation->inode->i_sb != sb)
continue;
inode = igrab(delegation->inode);
if (inode == NULL)
continue;
spin_lock(&clp->cl_lock);
delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
spin_unlock(&clp->cl_lock);
rcu_read_unlock();
if (delegation != NULL)
__nfs_inode_return_delegation(inode, delegation);
iput(inode);
goto restart;
}
rcu_read_unlock();
}
static int nfs_do_expire_all_delegations(void *ptr)
{
struct nfs_client *clp = ptr;
struct nfs_delegation *delegation;
struct inode *inode;
allow_signal(SIGKILL);
restart:
if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0)
goto out;
if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0)
goto out;
rcu_read_lock();
list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
inode = igrab(delegation->inode);
if (inode == NULL)
continue;
spin_lock(&clp->cl_lock);
delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
spin_unlock(&clp->cl_lock);
rcu_read_unlock();
if (delegation)
__nfs_inode_return_delegation(inode, delegation);
iput(inode);
goto restart;
}
rcu_read_unlock();
out:
nfs_put_client(clp);
module_put_and_exit(0);
}
void nfs_expire_all_delegations(struct nfs_client *clp)
{
struct task_struct *task;
__module_get(THIS_MODULE);
atomic_inc(&clp->cl_count);
task = kthread_run(nfs_do_expire_all_delegations, clp,
"%u.%u.%u.%u-delegreturn",
NIPQUAD(clp->cl_addr.sin_addr));
if (!IS_ERR(task))
return;
nfs_put_client(clp);
module_put(THIS_MODULE);
}
/*
* Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
*/
void nfs_handle_cb_pathdown(struct nfs_client *clp)
{
struct nfs_delegation *delegation;
struct inode *inode;
if (clp == NULL)
return;
restart:
rcu_read_lock();
list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
inode = igrab(delegation->inode);
if (inode == NULL)
continue;
spin_lock(&clp->cl_lock);
delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
spin_unlock(&clp->cl_lock);
rcu_read_unlock();
if (delegation != NULL)
__nfs_inode_return_delegation(inode, delegation);
iput(inode);
goto restart;
}
rcu_read_unlock();
}
struct recall_threadargs {
struct inode *inode;
struct nfs_client *clp;
const nfs4_stateid *stateid;
struct completion started;
int result;
};
static int recall_thread(void *data)
{
struct recall_threadargs *args = (struct recall_threadargs *)data;
struct inode *inode = igrab(args->inode);
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
daemonize("nfsv4-delegreturn");
nfs_msync_inode(inode);
down_read(&clp->cl_sem);
down_write(&nfsi->rwsem);
spin_lock(&clp->cl_lock);
delegation = nfs_detach_delegation_locked(nfsi, args->stateid);
if (delegation != NULL)
args->result = 0;
else
args->result = -ENOENT;
spin_unlock(&clp->cl_lock);
complete(&args->started);
nfs_delegation_claim_opens(inode, args->stateid);
up_write(&nfsi->rwsem);
up_read(&clp->cl_sem);
nfs_msync_inode(inode);
if (delegation != NULL)
nfs_do_return_delegation(inode, delegation);
iput(inode);
module_put_and_exit(0);
}
/*
* Asynchronous delegation recall!
*/
int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
{
struct recall_threadargs data = {
.inode = inode,
.stateid = stateid,
};
int status;
init_completion(&data.started);
__module_get(THIS_MODULE);
status = kernel_thread(recall_thread, &data, CLONE_KERNEL);
if (status < 0)
goto out_module_put;
wait_for_completion(&data.started);
return data.result;
out_module_put:
module_put(THIS_MODULE);
return status;
}
/*
* Retrieve the inode associated with a delegation
*/
struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle)
{
struct nfs_delegation *delegation;
struct inode *res = NULL;
rcu_read_lock();
list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
res = igrab(delegation->inode);
break;
}
}
rcu_read_unlock();
return res;
}
/*
* Mark all delegations as needing to be reclaimed
*/
void nfs_delegation_mark_reclaim(struct nfs_client *clp)
{
struct nfs_delegation *delegation;
rcu_read_lock();
list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list)
delegation->flags |= NFS_DELEGATION_NEED_RECLAIM;
rcu_read_unlock();
}
/*
* Reap all unclaimed delegations after reboot recovery is done
*/
void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
{
struct nfs_delegation *delegation;
restart:
rcu_read_lock();
list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0)
continue;
spin_lock(&clp->cl_lock);
delegation = nfs_detach_delegation_locked(NFS_I(delegation->inode), NULL);
spin_unlock(&clp->cl_lock);
rcu_read_unlock();
if (delegation != NULL)
call_rcu(&delegation->rcu, nfs_free_delegation_callback);
goto restart;
}
rcu_read_unlock();
}
int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
int ret = 0;
rcu_read_lock();
delegation = rcu_dereference(nfsi->delegation);
if (delegation != NULL) {
memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
ret = 1;
}
rcu_read_unlock();
return ret;
}