linux-stable/fs/nfs/delegation.c

1480 lines
40 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/fs/nfs/delegation.c
*
* Copyright (C) 2004 Trond Myklebust
*
* NFS file delegation management
*
*/
#include <linux/completion.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/sched.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 17:04:11 +09:00
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/iversion.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_xdr.h>
#include "nfs4_fs.h"
#include "nfs4session.h"
#include "delegation.h"
#include "internal.h"
#include "nfs4trace.h"
#define NFS_DEFAULT_DELEGATION_WATERMARK (5000U)
static atomic_long_t nfs_active_delegations;
static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK;
static void __nfs_free_delegation(struct nfs_delegation *delegation)
NFSv4: Don't call put_rpccred() from an rcu callback Doing so would require us to introduce bh-safe locks into put_rpccred(). This patch fixes the lockdep complaint reported by Marc Dietrich: inconsistent {softirq-on-W} -> {in-softirq-W} usage. swapper/0 [HC0[0]:SC1[1]:HE1:SE0] takes: (rpc_credcache_lock){-+..}, at: [<c01dc487>] _atomic_dec_and_lock+0x17/0x60 {softirq-on-W} state was registered at: [<c013e870>] __lock_acquire+0x650/0x1030 [<c013f2b1>] lock_acquire+0x61/0x80 [<c02db9ac>] _spin_lock+0x2c/0x40 [<c01dc487>] _atomic_dec_and_lock+0x17/0x60 [<dced55fd>] put_rpccred+0x5d/0x100 [sunrpc] [<dced56c1>] rpcauth_unbindcred+0x21/0x60 [sunrpc] [<dced3fd4>] a0 [sunrpc] [<dcecefe0>] rpc_call_sync+0x30/0x40 [sunrpc] [<dcedc73b>] rpcb_register+0xdb/0x180 [sunrpc] [<dced65b3>] svc_register+0x93/0x160 [sunrpc] [<dced6ebe>] __svc_create+0x1ee/0x220 [sunrpc] [<dced7053>] svc_create+0x13/0x20 [sunrpc] [<dcf6d722>] nfs_callback_up+0x82/0x120 [nfs] [<dcf48f36>] nfs_get_client+0x176/0x390 [nfs] [<dcf49181>] nfs4_set_client+0x31/0x190 [nfs] [<dcf49983>] nfs4_create_server+0x63/0x3b0 [nfs] [<dcf52426>] nfs4_get_sb+0x346/0x5b0 [nfs] [<c017b444>] vfs_kern_mount+0x94/0x110 [<c0190a62>] do_mount+0x1f2/0x7d0 [<c01910a6>] sys_mount+0x66/0xa0 [<c0104046>] syscall_call+0x7/0xb [<ffffffff>] 0xffffffff irq event stamp: 5277830 hardirqs last enabled at (5277830): [<c017530a>] kmem_cache_free+0x8a/0xc0 hardirqs last disabled at (5277829): [<c01752d2>] kmem_cache_free+0x52/0xc0 softirqs last enabled at (5277798): [<c0124173>] __do_softirq+0xa3/0xc0 softirqs last disabled at (5277817): [<c01241d7>] do_softirq+0x47/0x50 other info that might help us debug this: no locks held by swapper/0. stack backtrace: [<c0104fda>] show_trace_log_lvl+0x1a/0x30 [<c0105c02>] show_trace+0x12/0x20 [<c0105d15>] dump_stack+0x15/0x20 [<c013ccc3>] print_usage_bug+0x153/0x160 [<c013d8b9>] mark_lock+0x449/0x620 [<c013e824>] __lock_acquire+0x604/0x1030 [<c013f2b1>] lock_acquire+0x61/0x80 [<c02db9ac>] _spin_lock+0x2c/0x40 [<c01dc487>] _atomic_dec_and_lock+0x17/0x60 [<dced55fd>] put_rpccred+0x5d/0x100 [sunrpc] [<dcf6bf83>] nfs_free_delegation_callback+0x13/0x20 [nfs] [<c012f9ea>] __rcu_process_callbacks+0x6a/0x1c0 [<c012fb52>] rcu_process_callbacks+0x12/0x30 [<c0124218>] tasklet_action+0x38/0x80 [<c0124125>] __do_softirq+0x55/0xc0 [<c01241d7>] do_softirq+0x47/0x50 [<c0124605>] irq_exit+0x35/0x40 [<c0112463>] smp_apic_timer_interrupt+0x43/0x80 [<c0104a77>] apic_timer_interrupt+0x33/0x38 [<c02690df>] cpuidle_idle_call+0x6f/0x90 [<c01023c3>] cpu_idle+0x43/0x70 [<c02d8c27>] rest_init+0x47/0x50 [<c03bcb6a>] start_kernel+0x22a/0x2b0 [<00000000>] 0x0 ======================= Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2007-08-06 12:18:34 -04:00
{
put_cred(delegation->cred);
delegation->cred = NULL;
kfree_rcu(delegation, rcu);
}
static void nfs_mark_delegation_revoked(struct nfs_delegation *delegation)
{
if (!test_and_set_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
delegation->stateid.type = NFS4_INVALID_STATEID_TYPE;
atomic_long_dec(&nfs_active_delegations);
if (!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
nfs_clear_verifier_delegated(delegation->inode);
}
}
static struct nfs_delegation *nfs_get_delegation(struct nfs_delegation *delegation)
{
refcount_inc(&delegation->refcount);
return delegation;
}
static void nfs_put_delegation(struct nfs_delegation *delegation)
{
if (refcount_dec_and_test(&delegation->refcount))
__nfs_free_delegation(delegation);
}
static void nfs_free_delegation(struct nfs_delegation *delegation)
{
nfs_mark_delegation_revoked(delegation);
nfs_put_delegation(delegation);
}
/**
* nfs_mark_delegation_referenced - set delegation's REFERENCED flag
* @delegation: delegation to process
*
*/
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
{
set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
}
static void nfs_mark_return_delegation(struct nfs_server *server,
struct nfs_delegation *delegation)
{
set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
}
static bool
nfs4_is_valid_delegation(const struct nfs_delegation *delegation,
fmode_t flags)
{
if (delegation != NULL && (delegation->type & flags) == flags &&
!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) &&
!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
return true;
return false;
}
struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode)
{
struct nfs_delegation *delegation;
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (nfs4_is_valid_delegation(delegation, 0))
return delegation;
return NULL;
}
static int
nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
{
struct nfs_delegation *delegation;
int ret = 0;
flags &= FMODE_READ|FMODE_WRITE;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (nfs4_is_valid_delegation(delegation, flags)) {
if (mark)
nfs_mark_delegation_referenced(delegation);
ret = 1;
}
rcu_read_unlock();
return ret;
}
/**
* nfs4_have_delegation - check if inode has a delegation, mark it
* NFS_DELEGATION_REFERENCED if there is one.
* @inode: inode to check
* @flags: delegation types to check for
*
* Returns one if inode has the indicated delegation, otherwise zero.
*/
int nfs4_have_delegation(struct inode *inode, fmode_t flags)
{
return nfs4_do_check_delegation(inode, flags, true);
}
/*
* nfs4_check_delegation - check if inode has a delegation, do not mark
* NFS_DELEGATION_REFERENCED if it has one.
*/
int nfs4_check_delegation(struct inode *inode, fmode_t flags)
{
return nfs4_do_check_delegation(inode, flags, false);
}
static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
{
struct inode *inode = state->inode;
struct file_lock *fl;
struct file_lock_context *flctx = inode->i_flctx;
struct list_head *list;
int status = 0;
if (flctx == NULL)
goto out;
list = &flctx->flc_posix;
spin_lock(&flctx->flc_lock);
restart:
list_for_each_entry(fl, list, fl_list) {
if (nfs_file_open_context(fl->fl_file)->state != state)
continue;
spin_unlock(&flctx->flc_lock);
status = nfs4_lock_delegation_recall(fl, state, stateid);
if (status < 0)
goto out;
spin_lock(&flctx->flc_lock);
}
if (list == &flctx->flc_posix) {
list = &flctx->flc_flock;
goto restart;
}
spin_unlock(&flctx->flc_lock);
out:
return status;
}
static int nfs_delegation_claim_opens(struct inode *inode,
const nfs4_stateid *stateid, fmode_t type)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_context *ctx;
struct nfs4_state_owner *sp;
struct nfs4_state *state;
unsigned int seq;
int err;
again:
rcu_read_lock();
list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
state = ctx->state;
if (state == NULL)
continue;
if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
continue;
if (!nfs4_valid_open_stateid(state))
continue;
if (!nfs4_stateid_match(&state->stateid, stateid))
continue;
if (!get_nfs_open_context(ctx))
continue;
rcu_read_unlock();
sp = state->owner;
/* Block nfs4_proc_unlck */
mutex_lock(&sp->so_delegreturn_mutex);
seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
err = nfs4_open_delegation_recall(ctx, state, stateid);
if (!err)
err = nfs_delegation_claim_locks(state, stateid);
if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
err = -EAGAIN;
mutex_unlock(&sp->so_delegreturn_mutex);
put_nfs_open_context(ctx);
if (err != 0)
return err;
goto again;
}
rcu_read_unlock();
return 0;
}
/**
* nfs_inode_reclaim_delegation - process a delegation reclaim request
* @inode: inode to process
* @cred: credential to use for request
* @type: delegation type
* @stateid: delegation stateid
* @pagemod_limit: write delegation "space_limit"
*
*/
void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
fmode_t type, const nfs4_stateid *stateid,
unsigned long pagemod_limit)
{
struct nfs_delegation *delegation;
const struct cred *oldcred = NULL;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL) {
spin_lock(&delegation->lock);
nfs4_stateid_copy(&delegation->stateid, stateid);
delegation->type = type;
delegation->pagemod_limit = pagemod_limit;
oldcred = delegation->cred;
delegation->cred = get_cred(cred);
clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
if (test_and_clear_bit(NFS_DELEGATION_REVOKED,
&delegation->flags))
atomic_long_inc(&nfs_active_delegations);
spin_unlock(&delegation->lock);
rcu_read_unlock();
put_cred(oldcred);
trace_nfs4_reclaim_delegation(inode, type);
} else {
rcu_read_unlock();
nfs_inode_set_delegation(inode, cred, type, stateid,
pagemod_limit);
}
}
static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
{
const struct cred *cred;
int res = 0;
if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
spin_lock(&delegation->lock);
cred = get_cred(delegation->cred);
spin_unlock(&delegation->lock);
res = nfs4_proc_delegreturn(inode, cred,
&delegation->stateid,
issync);
put_cred(cred);
}
return res;
}
static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation)
{
struct inode *inode = NULL;
spin_lock(&delegation->lock);
if (delegation->inode != NULL)
inode = igrab(delegation->inode);
if (!inode)
set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags);
spin_unlock(&delegation->lock);
return inode;
}
static struct nfs_delegation *
nfs_start_delegation_return_locked(struct nfs_inode *nfsi)
{
struct nfs_delegation *ret = NULL;
struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
if (delegation == NULL)
goto out;
spin_lock(&delegation->lock);
if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
clear_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags);
/* Refcount matched in nfs_end_delegation_return() */
ret = nfs_get_delegation(delegation);
}
spin_unlock(&delegation->lock);
if (ret)
nfs_clear_verifier_delegated(&nfsi->vfs_inode);
out:
return ret;
}
static struct nfs_delegation *
nfs_start_delegation_return(struct nfs_inode *nfsi)
{
struct nfs_delegation *delegation;
rcu_read_lock();
delegation = nfs_start_delegation_return_locked(nfsi);
rcu_read_unlock();
return delegation;
}
static void nfs_abort_delegation_return(struct nfs_delegation *delegation,
struct nfs_client *clp, int err)
{
spin_lock(&delegation->lock);
clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
if (err == -EAGAIN) {
set_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags);
set_bit(NFS4CLNT_DELEGRETURN_DELAYED, &clp->cl_state);
}
spin_unlock(&delegation->lock);
}
static struct nfs_delegation *
nfs_detach_delegation_locked(struct nfs_inode *nfsi,
struct nfs_delegation *delegation,
struct nfs_client *clp)
{
struct nfs_delegation *deleg_cur =
NFS: Fix RCU issues in the NFSv4 delegation code Fix a number of RCU issues in the NFSv4 delegation code. (1) delegation->cred doesn't need to be RCU protected as it's essentially an invariant refcounted structure. By the time we get to nfs_free_delegation(), the delegation is being released, so no one else should be attempting to use the saved credentials, and they can be cleared. However, since the list of delegations could still be under traversal at this point by such as nfs_client_return_marked_delegations(), the cred should be released in nfs_do_free_delegation() rather than in nfs_free_delegation(). Simply using rcu_assign_pointer() to clear it is insufficient as that doesn't stop the cred from being destroyed, and nor does calling put_rpccred() after call_rcu(), given that the latter is asynchronous. (2) nfs_detach_delegation_locked() and nfs_inode_set_delegation() should use rcu_derefence_protected() because they can only be called if nfs_client::cl_lock is held, and that guards against anyone changing nfsi->delegation under it. Furthermore, the barrier imposed by rcu_dereference() is superfluous, given that the spin_lock() is also a barrier. (3) nfs_detach_delegation_locked() is now passed a pointer to the nfs_client struct so that it can issue lockdep advice based on clp->cl_lock for (2). (4) nfs_inode_return_delegation_noreclaim() and nfs_inode_return_delegation() should use rcu_access_pointer() outside the spinlocked region as they merely examine the pointer and don't follow it, thus rendering unnecessary the need to impose a partial ordering over the one item of interest. These result in an RCU warning like the following: [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- fs/nfs/delegation.c:332 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 2 locks held by mount.nfs4/2281: #0: (&type->s_umount_key#34){+.+...}, at: [<ffffffff810b25b4>] deactivate_super+0x60/0x80 #1: (iprune_sem){+.+...}, at: [<ffffffff810c332a>] invalidate_inodes+0x39/0x13a stack backtrace: Pid: 2281, comm: mount.nfs4 Not tainted 2.6.34-rc1-cachefs #110 Call Trace: [<ffffffff8105149f>] lockdep_rcu_dereference+0xaa/0xb2 [<ffffffffa00b4591>] nfs_inode_return_delegation_noreclaim+0x5b/0xa0 [nfs] [<ffffffffa0095d63>] nfs4_clear_inode+0x11/0x1e [nfs] [<ffffffff810c2d92>] clear_inode+0x9e/0xf8 [<ffffffff810c3028>] dispose_list+0x67/0x10e [<ffffffff810c340d>] invalidate_inodes+0x11c/0x13a [<ffffffff810b1dc1>] generic_shutdown_super+0x42/0xf4 [<ffffffff810b1ebe>] kill_anon_super+0x11/0x4f [<ffffffffa009893c>] nfs4_kill_super+0x3f/0x72 [nfs] [<ffffffff810b25bc>] deactivate_super+0x68/0x80 [<ffffffff810c6744>] mntput_no_expire+0xbb/0xf8 [<ffffffff810c681b>] release_mounts+0x9a/0xb0 [<ffffffff810c689b>] put_mnt_ns+0x6a/0x79 [<ffffffffa00983a1>] nfs_follow_remote_path+0x5a/0x146 [nfs] [<ffffffffa0098334>] ? nfs_do_root_mount+0x82/0x95 [nfs] [<ffffffffa00985a9>] nfs4_try_mount+0x75/0xaf [nfs] [<ffffffffa0098874>] nfs4_get_sb+0x291/0x31a [nfs] [<ffffffff810b2059>] vfs_kern_mount+0xb8/0x177 [<ffffffff810b2176>] do_kern_mount+0x48/0xe8 [<ffffffff810c810b>] do_mount+0x782/0x7f9 [<ffffffff810c8205>] sys_mount+0x83/0xbe [<ffffffff81001eeb>] system_call_fastpath+0x16/0x1b Also on: fs/nfs/delegation.c:215 invoked rcu_dereference_check() without protection! [<ffffffff8105149f>] lockdep_rcu_dereference+0xaa/0xb2 [<ffffffffa00b4223>] nfs_inode_set_delegation+0xfe/0x219 [nfs] [<ffffffffa00a9c6f>] nfs4_opendata_to_nfs4_state+0x2c2/0x30d [nfs] [<ffffffffa00aa15d>] nfs4_do_open+0x2a6/0x3a6 [nfs] ... And: fs/nfs/delegation.c:40 invoked rcu_dereference_check() without protection! [<ffffffff8105149f>] lockdep_rcu_dereference+0xaa/0xb2 [<ffffffffa00b3bef>] nfs_free_delegation+0x3d/0x6e [nfs] [<ffffffffa00b3e71>] nfs_do_return_delegation+0x26/0x30 [nfs] [<ffffffffa00b406a>] __nfs_inode_return_delegation+0x1ef/0x1fe [nfs] [<ffffffffa00b448a>] nfs_client_return_marked_delegations+0xc9/0x124 [nfs] ... Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2010-05-01 12:37:18 -04:00
rcu_dereference_protected(nfsi->delegation,
lockdep_is_held(&clp->cl_lock));
if (deleg_cur == NULL || delegation != deleg_cur)
return NULL;
spin_lock(&delegation->lock);
if (!delegation->inode) {
spin_unlock(&delegation->lock);
return NULL;
}
list_del_rcu(&delegation->super_list);
delegation->inode = NULL;
rcu_assign_pointer(nfsi->delegation, NULL);
spin_unlock(&delegation->lock);
return delegation;
}
static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi,
struct nfs_delegation *delegation,
struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
spin_lock(&clp->cl_lock);
delegation = nfs_detach_delegation_locked(nfsi, delegation, clp);
spin_unlock(&clp->cl_lock);
return delegation;
}
static struct nfs_delegation *
nfs_inode_detach_delegation(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_delegation *delegation;
rcu_read_lock();
delegation = rcu_dereference(nfsi->delegation);
if (delegation != NULL)
delegation = nfs_detach_delegation(nfsi, delegation, server);
rcu_read_unlock();
return delegation;
}
static void
nfs_update_delegation_cred(struct nfs_delegation *delegation,
const struct cred *cred)
{
const struct cred *old;
if (cred_fscmp(delegation->cred, cred) != 0) {
old = xchg(&delegation->cred, get_cred(cred));
put_cred(old);
}
}
static void
nfs_update_inplace_delegation(struct nfs_delegation *delegation,
const struct nfs_delegation *update)
{
if (nfs4_stateid_is_newer(&update->stateid, &delegation->stateid)) {
delegation->stateid.seqid = update->stateid.seqid;
smp_wmb();
delegation->type = update->type;
delegation->pagemod_limit = update->pagemod_limit;
if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
delegation->change_attr = update->change_attr;
nfs_update_delegation_cred(delegation, update->cred);
/* smp_mb__before_atomic() is implicit due to xchg() */
clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
atomic_long_inc(&nfs_active_delegations);
}
}
}
/**
* nfs_inode_set_delegation - set up a delegation on an inode
* @inode: inode to which delegation applies
* @cred: cred to use for subsequent delegation processing
* @type: delegation type
* @stateid: delegation stateid
* @pagemod_limit: write delegation "space_limit"
*
* Returns zero on success, or a negative errno value.
*/
int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
fmode_t type,
const nfs4_stateid *stateid,
unsigned long pagemod_limit)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_client *clp = server->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
NFS: Fix RCU issues in the NFSv4 delegation code Fix a number of RCU issues in the NFSv4 delegation code. (1) delegation->cred doesn't need to be RCU protected as it's essentially an invariant refcounted structure. By the time we get to nfs_free_delegation(), the delegation is being released, so no one else should be attempting to use the saved credentials, and they can be cleared. However, since the list of delegations could still be under traversal at this point by such as nfs_client_return_marked_delegations(), the cred should be released in nfs_do_free_delegation() rather than in nfs_free_delegation(). Simply using rcu_assign_pointer() to clear it is insufficient as that doesn't stop the cred from being destroyed, and nor does calling put_rpccred() after call_rcu(), given that the latter is asynchronous. (2) nfs_detach_delegation_locked() and nfs_inode_set_delegation() should use rcu_derefence_protected() because they can only be called if nfs_client::cl_lock is held, and that guards against anyone changing nfsi->delegation under it. Furthermore, the barrier imposed by rcu_dereference() is superfluous, given that the spin_lock() is also a barrier. (3) nfs_detach_delegation_locked() is now passed a pointer to the nfs_client struct so that it can issue lockdep advice based on clp->cl_lock for (2). (4) nfs_inode_return_delegation_noreclaim() and nfs_inode_return_delegation() should use rcu_access_pointer() outside the spinlocked region as they merely examine the pointer and don't follow it, thus rendering unnecessary the need to impose a partial ordering over the one item of interest. These result in an RCU warning like the following: [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- fs/nfs/delegation.c:332 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 2 locks held by mount.nfs4/2281: #0: (&type->s_umount_key#34){+.+...}, at: [<ffffffff810b25b4>] deactivate_super+0x60/0x80 #1: (iprune_sem){+.+...}, at: [<ffffffff810c332a>] invalidate_inodes+0x39/0x13a stack backtrace: Pid: 2281, comm: mount.nfs4 Not tainted 2.6.34-rc1-cachefs #110 Call Trace: [<ffffffff8105149f>] lockdep_rcu_dereference+0xaa/0xb2 [<ffffffffa00b4591>] nfs_inode_return_delegation_noreclaim+0x5b/0xa0 [nfs] [<ffffffffa0095d63>] nfs4_clear_inode+0x11/0x1e [nfs] [<ffffffff810c2d92>] clear_inode+0x9e/0xf8 [<ffffffff810c3028>] dispose_list+0x67/0x10e [<ffffffff810c340d>] invalidate_inodes+0x11c/0x13a [<ffffffff810b1dc1>] generic_shutdown_super+0x42/0xf4 [<ffffffff810b1ebe>] kill_anon_super+0x11/0x4f [<ffffffffa009893c>] nfs4_kill_super+0x3f/0x72 [nfs] [<ffffffff810b25bc>] deactivate_super+0x68/0x80 [<ffffffff810c6744>] mntput_no_expire+0xbb/0xf8 [<ffffffff810c681b>] release_mounts+0x9a/0xb0 [<ffffffff810c689b>] put_mnt_ns+0x6a/0x79 [<ffffffffa00983a1>] nfs_follow_remote_path+0x5a/0x146 [nfs] [<ffffffffa0098334>] ? nfs_do_root_mount+0x82/0x95 [nfs] [<ffffffffa00985a9>] nfs4_try_mount+0x75/0xaf [nfs] [<ffffffffa0098874>] nfs4_get_sb+0x291/0x31a [nfs] [<ffffffff810b2059>] vfs_kern_mount+0xb8/0x177 [<ffffffff810b2176>] do_kern_mount+0x48/0xe8 [<ffffffff810c810b>] do_mount+0x782/0x7f9 [<ffffffff810c8205>] sys_mount+0x83/0xbe [<ffffffff81001eeb>] system_call_fastpath+0x16/0x1b Also on: fs/nfs/delegation.c:215 invoked rcu_dereference_check() without protection! [<ffffffff8105149f>] lockdep_rcu_dereference+0xaa/0xb2 [<ffffffffa00b4223>] nfs_inode_set_delegation+0xfe/0x219 [nfs] [<ffffffffa00a9c6f>] nfs4_opendata_to_nfs4_state+0x2c2/0x30d [nfs] [<ffffffffa00aa15d>] nfs4_do_open+0x2a6/0x3a6 [nfs] ... And: fs/nfs/delegation.c:40 invoked rcu_dereference_check() without protection! [<ffffffff8105149f>] lockdep_rcu_dereference+0xaa/0xb2 [<ffffffffa00b3bef>] nfs_free_delegation+0x3d/0x6e [nfs] [<ffffffffa00b3e71>] nfs_do_return_delegation+0x26/0x30 [nfs] [<ffffffffa00b406a>] __nfs_inode_return_delegation+0x1ef/0x1fe [nfs] [<ffffffffa00b448a>] nfs_client_return_marked_delegations+0xc9/0x124 [nfs] ... Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2010-05-01 12:37:18 -04:00
struct nfs_delegation *delegation, *old_delegation;
struct nfs_delegation *freeme = NULL;
int status = 0;
delegation = kmalloc(sizeof(*delegation), GFP_KERNEL_ACCOUNT);
if (delegation == NULL)
return -ENOMEM;
nfs4_stateid_copy(&delegation->stateid, stateid);
refcount_set(&delegation->refcount, 1);
delegation->type = type;
delegation->pagemod_limit = pagemod_limit;
delegation->change_attr = inode_peek_iversion_raw(inode);
delegation->cred = get_cred(cred);
delegation->inode = inode;
delegation->flags = 1<<NFS_DELEGATION_REFERENCED;
spin_lock_init(&delegation->lock);
spin_lock(&clp->cl_lock);
NFS: Fix RCU issues in the NFSv4 delegation code Fix a number of RCU issues in the NFSv4 delegation code. (1) delegation->cred doesn't need to be RCU protected as it's essentially an invariant refcounted structure. By the time we get to nfs_free_delegation(), the delegation is being released, so no one else should be attempting to use the saved credentials, and they can be cleared. However, since the list of delegations could still be under traversal at this point by such as nfs_client_return_marked_delegations(), the cred should be released in nfs_do_free_delegation() rather than in nfs_free_delegation(). Simply using rcu_assign_pointer() to clear it is insufficient as that doesn't stop the cred from being destroyed, and nor does calling put_rpccred() after call_rcu(), given that the latter is asynchronous. (2) nfs_detach_delegation_locked() and nfs_inode_set_delegation() should use rcu_derefence_protected() because they can only be called if nfs_client::cl_lock is held, and that guards against anyone changing nfsi->delegation under it. Furthermore, the barrier imposed by rcu_dereference() is superfluous, given that the spin_lock() is also a barrier. (3) nfs_detach_delegation_locked() is now passed a pointer to the nfs_client struct so that it can issue lockdep advice based on clp->cl_lock for (2). (4) nfs_inode_return_delegation_noreclaim() and nfs_inode_return_delegation() should use rcu_access_pointer() outside the spinlocked region as they merely examine the pointer and don't follow it, thus rendering unnecessary the need to impose a partial ordering over the one item of interest. These result in an RCU warning like the following: [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- fs/nfs/delegation.c:332 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 2 locks held by mount.nfs4/2281: #0: (&type->s_umount_key#34){+.+...}, at: [<ffffffff810b25b4>] deactivate_super+0x60/0x80 #1: (iprune_sem){+.+...}, at: [<ffffffff810c332a>] invalidate_inodes+0x39/0x13a stack backtrace: Pid: 2281, comm: mount.nfs4 Not tainted 2.6.34-rc1-cachefs #110 Call Trace: [<ffffffff8105149f>] lockdep_rcu_dereference+0xaa/0xb2 [<ffffffffa00b4591>] nfs_inode_return_delegation_noreclaim+0x5b/0xa0 [nfs] [<ffffffffa0095d63>] nfs4_clear_inode+0x11/0x1e [nfs] [<ffffffff810c2d92>] clear_inode+0x9e/0xf8 [<ffffffff810c3028>] dispose_list+0x67/0x10e [<ffffffff810c340d>] invalidate_inodes+0x11c/0x13a [<ffffffff810b1dc1>] generic_shutdown_super+0x42/0xf4 [<ffffffff810b1ebe>] kill_anon_super+0x11/0x4f [<ffffffffa009893c>] nfs4_kill_super+0x3f/0x72 [nfs] [<ffffffff810b25bc>] deactivate_super+0x68/0x80 [<ffffffff810c6744>] mntput_no_expire+0xbb/0xf8 [<ffffffff810c681b>] release_mounts+0x9a/0xb0 [<ffffffff810c689b>] put_mnt_ns+0x6a/0x79 [<ffffffffa00983a1>] nfs_follow_remote_path+0x5a/0x146 [nfs] [<ffffffffa0098334>] ? nfs_do_root_mount+0x82/0x95 [nfs] [<ffffffffa00985a9>] nfs4_try_mount+0x75/0xaf [nfs] [<ffffffffa0098874>] nfs4_get_sb+0x291/0x31a [nfs] [<ffffffff810b2059>] vfs_kern_mount+0xb8/0x177 [<ffffffff810b2176>] do_kern_mount+0x48/0xe8 [<ffffffff810c810b>] do_mount+0x782/0x7f9 [<ffffffff810c8205>] sys_mount+0x83/0xbe [<ffffffff81001eeb>] system_call_fastpath+0x16/0x1b Also on: fs/nfs/delegation.c:215 invoked rcu_dereference_check() without protection! [<ffffffff8105149f>] lockdep_rcu_dereference+0xaa/0xb2 [<ffffffffa00b4223>] nfs_inode_set_delegation+0xfe/0x219 [nfs] [<ffffffffa00a9c6f>] nfs4_opendata_to_nfs4_state+0x2c2/0x30d [nfs] [<ffffffffa00aa15d>] nfs4_do_open+0x2a6/0x3a6 [nfs] ... And: fs/nfs/delegation.c:40 invoked rcu_dereference_check() without protection! [<ffffffff8105149f>] lockdep_rcu_dereference+0xaa/0xb2 [<ffffffffa00b3bef>] nfs_free_delegation+0x3d/0x6e [nfs] [<ffffffffa00b3e71>] nfs_do_return_delegation+0x26/0x30 [nfs] [<ffffffffa00b406a>] __nfs_inode_return_delegation+0x1ef/0x1fe [nfs] [<ffffffffa00b448a>] nfs_client_return_marked_delegations+0xc9/0x124 [nfs] ... Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2010-05-01 12:37:18 -04:00
old_delegation = rcu_dereference_protected(nfsi->delegation,
lockdep_is_held(&clp->cl_lock));
if (old_delegation == NULL)
goto add_new;
/* Is this an update of the existing delegation? */
if (nfs4_stateid_match_other(&old_delegation->stateid,
&delegation->stateid)) {
spin_lock(&old_delegation->lock);
nfs_update_inplace_delegation(old_delegation,
delegation);
spin_unlock(&old_delegation->lock);
goto out;
}
if (!test_bit(NFS_DELEGATION_REVOKED, &old_delegation->flags)) {
/*
* Deal with broken servers that hand out two
* delegations for the same file.
* Allow for upgrades to a WRITE delegation, but
* nothing else.
*/
dfprintk(FILE, "%s: server %s handed out "
"a duplicate delegation!\n",
__func__, clp->cl_hostname);
if (delegation->type == old_delegation->type ||
!(delegation->type & FMODE_WRITE)) {
freeme = delegation;
delegation = NULL;
goto out;
}
if (test_and_set_bit(NFS_DELEGATION_RETURNING,
&old_delegation->flags))
goto out;
}
freeme = nfs_detach_delegation_locked(nfsi, old_delegation, clp);
if (freeme == NULL)
goto out;
add_new:
/*
* If we didn't revalidate the change attribute before setting
* the delegation, then pre-emptively ask for a full attribute
* cache revalidation.
*/
spin_lock(&inode->i_lock);
if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_CHANGE)
nfs_set_cache_invalid(inode,
NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME |
NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK |
NFS_INO_INVALID_OTHER | NFS_INO_INVALID_DATA |
NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL |
NFS_INO_INVALID_XATTR);
spin_unlock(&inode->i_lock);
list_add_tail_rcu(&delegation->super_list, &server->delegations);
rcu_assign_pointer(nfsi->delegation, delegation);
delegation = NULL;
atomic_long_inc(&nfs_active_delegations);
trace_nfs4_set_delegation(inode, type);
out:
spin_unlock(&clp->cl_lock);
if (delegation != NULL)
__nfs_free_delegation(delegation);
if (freeme != NULL) {
nfs_do_return_delegation(inode, freeme, 0);
nfs_free_delegation(freeme);
}
return status;
}
/*
* Basic procedure for returning a delegation to the server
*/
static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync)
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
unsigned int mode = O_WRONLY | O_RDWR;
int err = 0;
if (delegation == NULL)
return 0;
if (!issync)
mode |= O_NONBLOCK;
/* Recall of any remaining application leases */
err = break_lease(inode, mode);
while (err == 0) {
if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
break;
err = nfs_delegation_claim_opens(inode, &delegation->stateid,
delegation->type);
if (!issync || err != -EAGAIN)
break;
/*
* Guard against state recovery
*/
err = nfs4_wait_clnt_recover(clp);
}
if (err) {
nfs_abort_delegation_return(delegation, clp, err);
goto out;
}
err = nfs_do_return_delegation(inode, delegation, issync);
out:
/* Refcount matched in nfs_start_delegation_return_locked() */
nfs_put_delegation(delegation);
return err;
}
static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
{
bool ret = false;
if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
ret = true;
else if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) {
struct inode *inode;
spin_lock(&delegation->lock);
inode = delegation->inode;
if (inode && list_empty(&NFS_I(inode)->open_files))
ret = true;
spin_unlock(&delegation->lock);
}
if (ret)
clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) ||
test_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags) ||
test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
ret = false;
return ret;
}
static int nfs_server_return_marked_delegations(struct nfs_server *server,
void __always_unused *data)
{
struct nfs_delegation *delegation;
NFS: Avoid quadratic search when freeing delegations. There are three places that walk all delegation for an nfs_client and restart whenever they find something interesting - potentially resulting in a quadratic search: If there are 10,000 uninteresting delegations followed by 10,000 interesting one, then the code skips over 100,000,000 delegations, which can take a noticeable amount of time. Of these nfs_delegation_reap_unclaimed() and nfs_reap_expired_delegations() are only called during unusual events: a server reboots or reports expired delegations, probably due to a network partition. Optimizing these is not particularly important. The third, nfs_client_return_marked_delegations(), is called periodically via nfs_expire_unreferenced_delegations(). It could cause periodic problems on a busy server. New delegations are added to the end of the list, so if there are 10,000 open files with delegations, and 10,000 more recently opened files that received delegations but are now closed, then nfs_client_return_marked_delegations() can take seconds to skip over the 10,000 open files 10,000 times. That is a waste of time. The avoid this waste a place-holder (an inode) is kept when locks are dropped, so that the place can usually be found again after taking rcu_readlock(). This place holder ensure that we find the right starting point in the list of nfs_servers, and makes is probable that we find the right starting point in the list of delegations. We might need to occasionally restart at the head of that list. It might be possible that the place_holder inode could lose its delegation separately, and then get a new one using the same (freed and then reallocated) 'struct nfs_delegation'. Were this to happen, the new delegation would be at the end of the list and we would miss returning some other delegations. This would have the effect of unnecessarily delaying the return of some unused delegations until the next time this function is called - typically 90 seconds later. As this is not a correctness issue and is vanishingly unlikely to happen, it does not seem worth addressing. Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2018-04-30 14:31:30 +10:00
struct nfs_delegation *prev;
struct inode *inode;
NFS: Avoid quadratic search when freeing delegations. There are three places that walk all delegation for an nfs_client and restart whenever they find something interesting - potentially resulting in a quadratic search: If there are 10,000 uninteresting delegations followed by 10,000 interesting one, then the code skips over 100,000,000 delegations, which can take a noticeable amount of time. Of these nfs_delegation_reap_unclaimed() and nfs_reap_expired_delegations() are only called during unusual events: a server reboots or reports expired delegations, probably due to a network partition. Optimizing these is not particularly important. The third, nfs_client_return_marked_delegations(), is called periodically via nfs_expire_unreferenced_delegations(). It could cause periodic problems on a busy server. New delegations are added to the end of the list, so if there are 10,000 open files with delegations, and 10,000 more recently opened files that received delegations but are now closed, then nfs_client_return_marked_delegations() can take seconds to skip over the 10,000 open files 10,000 times. That is a waste of time. The avoid this waste a place-holder (an inode) is kept when locks are dropped, so that the place can usually be found again after taking rcu_readlock(). This place holder ensure that we find the right starting point in the list of nfs_servers, and makes is probable that we find the right starting point in the list of delegations. We might need to occasionally restart at the head of that list. It might be possible that the place_holder inode could lose its delegation separately, and then get a new one using the same (freed and then reallocated) 'struct nfs_delegation'. Were this to happen, the new delegation would be at the end of the list and we would miss returning some other delegations. This would have the effect of unnecessarily delaying the return of some unused delegations until the next time this function is called - typically 90 seconds later. As this is not a correctness issue and is vanishingly unlikely to happen, it does not seem worth addressing. Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2018-04-30 14:31:30 +10:00
struct inode *place_holder = NULL;
struct nfs_delegation *place_holder_deleg = NULL;
int err = 0;
restart:
NFS: Avoid quadratic search when freeing delegations. There are three places that walk all delegation for an nfs_client and restart whenever they find something interesting - potentially resulting in a quadratic search: If there are 10,000 uninteresting delegations followed by 10,000 interesting one, then the code skips over 100,000,000 delegations, which can take a noticeable amount of time. Of these nfs_delegation_reap_unclaimed() and nfs_reap_expired_delegations() are only called during unusual events: a server reboots or reports expired delegations, probably due to a network partition. Optimizing these is not particularly important. The third, nfs_client_return_marked_delegations(), is called periodically via nfs_expire_unreferenced_delegations(). It could cause periodic problems on a busy server. New delegations are added to the end of the list, so if there are 10,000 open files with delegations, and 10,000 more recently opened files that received delegations but are now closed, then nfs_client_return_marked_delegations() can take seconds to skip over the 10,000 open files 10,000 times. That is a waste of time. The avoid this waste a place-holder (an inode) is kept when locks are dropped, so that the place can usually be found again after taking rcu_readlock(). This place holder ensure that we find the right starting point in the list of nfs_servers, and makes is probable that we find the right starting point in the list of delegations. We might need to occasionally restart at the head of that list. It might be possible that the place_holder inode could lose its delegation separately, and then get a new one using the same (freed and then reallocated) 'struct nfs_delegation'. Were this to happen, the new delegation would be at the end of the list and we would miss returning some other delegations. This would have the effect of unnecessarily delaying the return of some unused delegations until the next time this function is called - typically 90 seconds later. As this is not a correctness issue and is vanishingly unlikely to happen, it does not seem worth addressing. Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2018-04-30 14:31:30 +10:00
/*
* To avoid quadratic looping we hold a reference
* to an inode place_holder. Each time we restart, we
* list delegation in the server from the delegations
* of that inode.
NFS: Avoid quadratic search when freeing delegations. There are three places that walk all delegation for an nfs_client and restart whenever they find something interesting - potentially resulting in a quadratic search: If there are 10,000 uninteresting delegations followed by 10,000 interesting one, then the code skips over 100,000,000 delegations, which can take a noticeable amount of time. Of these nfs_delegation_reap_unclaimed() and nfs_reap_expired_delegations() are only called during unusual events: a server reboots or reports expired delegations, probably due to a network partition. Optimizing these is not particularly important. The third, nfs_client_return_marked_delegations(), is called periodically via nfs_expire_unreferenced_delegations(). It could cause periodic problems on a busy server. New delegations are added to the end of the list, so if there are 10,000 open files with delegations, and 10,000 more recently opened files that received delegations but are now closed, then nfs_client_return_marked_delegations() can take seconds to skip over the 10,000 open files 10,000 times. That is a waste of time. The avoid this waste a place-holder (an inode) is kept when locks are dropped, so that the place can usually be found again after taking rcu_readlock(). This place holder ensure that we find the right starting point in the list of nfs_servers, and makes is probable that we find the right starting point in the list of delegations. We might need to occasionally restart at the head of that list. It might be possible that the place_holder inode could lose its delegation separately, and then get a new one using the same (freed and then reallocated) 'struct nfs_delegation'. Were this to happen, the new delegation would be at the end of the list and we would miss returning some other delegations. This would have the effect of unnecessarily delaying the return of some unused delegations until the next time this function is called - typically 90 seconds later. As this is not a correctness issue and is vanishingly unlikely to happen, it does not seem worth addressing. Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2018-04-30 14:31:30 +10:00
* prev is an RCU-protected pointer to a delegation which
* wasn't marked for return and might be a good choice for
* the next place_holder.
*/
prev = NULL;
delegation = NULL;
rcu_read_lock();
NFS: Avoid quadratic search when freeing delegations. There are three places that walk all delegation for an nfs_client and restart whenever they find something interesting - potentially resulting in a quadratic search: If there are 10,000 uninteresting delegations followed by 10,000 interesting one, then the code skips over 100,000,000 delegations, which can take a noticeable amount of time. Of these nfs_delegation_reap_unclaimed() and nfs_reap_expired_delegations() are only called during unusual events: a server reboots or reports expired delegations, probably due to a network partition. Optimizing these is not particularly important. The third, nfs_client_return_marked_delegations(), is called periodically via nfs_expire_unreferenced_delegations(). It could cause periodic problems on a busy server. New delegations are added to the end of the list, so if there are 10,000 open files with delegations, and 10,000 more recently opened files that received delegations but are now closed, then nfs_client_return_marked_delegations() can take seconds to skip over the 10,000 open files 10,000 times. That is a waste of time. The avoid this waste a place-holder (an inode) is kept when locks are dropped, so that the place can usually be found again after taking rcu_readlock(). This place holder ensure that we find the right starting point in the list of nfs_servers, and makes is probable that we find the right starting point in the list of delegations. We might need to occasionally restart at the head of that list. It might be possible that the place_holder inode could lose its delegation separately, and then get a new one using the same (freed and then reallocated) 'struct nfs_delegation'. Were this to happen, the new delegation would be at the end of the list and we would miss returning some other delegations. This would have the effect of unnecessarily delaying the return of some unused delegations until the next time this function is called - typically 90 seconds later. As this is not a correctness issue and is vanishingly unlikely to happen, it does not seem worth addressing. Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2018-04-30 14:31:30 +10:00
if (place_holder)
delegation = rcu_dereference(NFS_I(place_holder)->delegation);
if (!delegation || delegation != place_holder_deleg)
delegation = list_entry_rcu(server->delegations.next,
struct nfs_delegation, super_list);
list_for_each_entry_from_rcu(delegation, &server->delegations, super_list) {
struct inode *to_put = NULL;
if (test_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags))
continue;
if (!nfs_delegation_need_return(delegation)) {
if (nfs4_is_valid_delegation(delegation, 0))
NFS: Avoid quadratic search when freeing delegations. There are three places that walk all delegation for an nfs_client and restart whenever they find something interesting - potentially resulting in a quadratic search: If there are 10,000 uninteresting delegations followed by 10,000 interesting one, then the code skips over 100,000,000 delegations, which can take a noticeable amount of time. Of these nfs_delegation_reap_unclaimed() and nfs_reap_expired_delegations() are only called during unusual events: a server reboots or reports expired delegations, probably due to a network partition. Optimizing these is not particularly important. The third, nfs_client_return_marked_delegations(), is called periodically via nfs_expire_unreferenced_delegations(). It could cause periodic problems on a busy server. New delegations are added to the end of the list, so if there are 10,000 open files with delegations, and 10,000 more recently opened files that received delegations but are now closed, then nfs_client_return_marked_delegations() can take seconds to skip over the 10,000 open files 10,000 times. That is a waste of time. The avoid this waste a place-holder (an inode) is kept when locks are dropped, so that the place can usually be found again after taking rcu_readlock(). This place holder ensure that we find the right starting point in the list of nfs_servers, and makes is probable that we find the right starting point in the list of delegations. We might need to occasionally restart at the head of that list. It might be possible that the place_holder inode could lose its delegation separately, and then get a new one using the same (freed and then reallocated) 'struct nfs_delegation'. Were this to happen, the new delegation would be at the end of the list and we would miss returning some other delegations. This would have the effect of unnecessarily delaying the return of some unused delegations until the next time this function is called - typically 90 seconds later. As this is not a correctness issue and is vanishingly unlikely to happen, it does not seem worth addressing. Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2018-04-30 14:31:30 +10:00
prev = delegation;
continue;
}
NFS: Avoid quadratic search when freeing delegations. There are three places that walk all delegation for an nfs_client and restart whenever they find something interesting - potentially resulting in a quadratic search: If there are 10,000 uninteresting delegations followed by 10,000 interesting one, then the code skips over 100,000,000 delegations, which can take a noticeable amount of time. Of these nfs_delegation_reap_unclaimed() and nfs_reap_expired_delegations() are only called during unusual events: a server reboots or reports expired delegations, probably due to a network partition. Optimizing these is not particularly important. The third, nfs_client_return_marked_delegations(), is called periodically via nfs_expire_unreferenced_delegations(). It could cause periodic problems on a busy server. New delegations are added to the end of the list, so if there are 10,000 open files with delegations, and 10,000 more recently opened files that received delegations but are now closed, then nfs_client_return_marked_delegations() can take seconds to skip over the 10,000 open files 10,000 times. That is a waste of time. The avoid this waste a place-holder (an inode) is kept when locks are dropped, so that the place can usually be found again after taking rcu_readlock(). This place holder ensure that we find the right starting point in the list of nfs_servers, and makes is probable that we find the right starting point in the list of delegations. We might need to occasionally restart at the head of that list. It might be possible that the place_holder inode could lose its delegation separately, and then get a new one using the same (freed and then reallocated) 'struct nfs_delegation'. Were this to happen, the new delegation would be at the end of the list and we would miss returning some other delegations. This would have the effect of unnecessarily delaying the return of some unused delegations until the next time this function is called - typically 90 seconds later. As this is not a correctness issue and is vanishingly unlikely to happen, it does not seem worth addressing. Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2018-04-30 14:31:30 +10:00
if (prev) {
struct inode *tmp = nfs_delegation_grab_inode(prev);
if (tmp) {
to_put = place_holder;
place_holder = tmp;
place_holder_deleg = prev;
}
}
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL) {
rcu_read_unlock();
iput(to_put);
goto restart;
}
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
iput(to_put);
err = nfs_end_delegation_return(inode, delegation, 0);
iput(inode);
cond_resched();
if (!err)
goto restart;
set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
goto out;
}
rcu_read_unlock();
out:
iput(place_holder);
return err;
}
static bool nfs_server_clear_delayed_delegations(struct nfs_server *server)
{
struct nfs_delegation *d;
bool ret = false;
list_for_each_entry_rcu (d, &server->delegations, super_list) {
if (!test_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags))
continue;
nfs_mark_return_delegation(server, d);
clear_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags);
ret = true;
}
return ret;
}
static bool nfs_client_clear_delayed_delegations(struct nfs_client *clp)
{
struct nfs_server *server;
bool ret = false;
if (!test_and_clear_bit(NFS4CLNT_DELEGRETURN_DELAYED, &clp->cl_state))
goto out;
rcu_read_lock();
list_for_each_entry_rcu (server, &clp->cl_superblocks, client_link) {
if (nfs_server_clear_delayed_delegations(server))
ret = true;
}
rcu_read_unlock();
out:
return ret;
}
/**
* nfs_client_return_marked_delegations - return previously marked delegations
* @clp: nfs_client to process
*
* Note that this function is designed to be called by the state
* manager thread. For this reason, it cannot flush the dirty data,
* since that could deadlock in case of a state recovery error.
*
* Returns zero on success, or a negative errno value.
*/
int nfs_client_return_marked_delegations(struct nfs_client *clp)
{
int err = nfs_client_for_each_server(
clp, nfs_server_return_marked_delegations, NULL);
if (err)
return err;
/* If a return was delayed, sleep to prevent hard looping */
if (nfs_client_clear_delayed_delegations(clp))
ssleep(1);
return 0;
}
/**
* nfs_inode_evict_delegation - return delegation, don't reclaim opens
* @inode: inode to process
*
* Does not protect against delegation reclaims, therefore really only safe
* to be called from nfs4_clear_inode(). Guaranteed to always free
* the delegation structure.
*/
void nfs_inode_evict_delegation(struct inode *inode)
{
struct nfs_delegation *delegation;
delegation = nfs_inode_detach_delegation(inode);
if (delegation != NULL) {
set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags);
nfs_do_return_delegation(inode, delegation, 1);
nfs_free_delegation(delegation);
}
}
/**
* nfs4_inode_return_delegation - synchronously return a delegation
* @inode: inode to process
*
* This routine will always flush any dirty data to disk on the
* assumption that if we need to return the delegation, then
* we should stop caching.
*
* Returns zero on success, or a negative errno value.
*/
int nfs4_inode_return_delegation(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
delegation = nfs_start_delegation_return(nfsi);
if (delegation != NULL) {
/* Synchronous recall of any application leases */
break_lease(inode, O_WRONLY | O_RDWR);
if (S_ISREG(inode->i_mode))
nfs_wb_all(inode);
return nfs_end_delegation_return(inode, delegation, 1);
}
return 0;
}
/**
* nfs4_inode_return_delegation_on_close - asynchronously return a delegation
* @inode: inode to process
*
* This routine is called on file close in order to determine if the
* inode delegation needs to be returned immediately.
*/
void nfs4_inode_return_delegation_on_close(struct inode *inode)
{
struct nfs_delegation *delegation;
struct nfs_delegation *ret = NULL;
if (!inode)
return;
rcu_read_lock();
delegation = nfs4_get_valid_delegation(inode);
if (!delegation)
goto out;
if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) ||
atomic_long_read(&nfs_active_delegations) >= nfs_delegation_watermark) {
spin_lock(&delegation->lock);
if (delegation->inode &&
list_empty(&NFS_I(inode)->open_files) &&
!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
/* Refcount matched in nfs_end_delegation_return() */
ret = nfs_get_delegation(delegation);
}
spin_unlock(&delegation->lock);
if (ret)
nfs_clear_verifier_delegated(inode);
}
out:
rcu_read_unlock();
nfs_end_delegation_return(inode, ret, 0);
}
/**
* nfs4_inode_make_writeable
* @inode: pointer to inode
*
* Make the inode writeable by returning the delegation if necessary
*
* Returns zero on success, or a negative errno value.
*/
int nfs4_inode_make_writeable(struct inode *inode)
{
struct nfs_delegation *delegation;
rcu_read_lock();
delegation = nfs4_get_valid_delegation(inode);
if (delegation == NULL ||
(nfs4_has_session(NFS_SERVER(inode)->nfs_client) &&
(delegation->type & FMODE_WRITE))) {
rcu_read_unlock();
return 0;
}
rcu_read_unlock();
return nfs4_inode_return_delegation(inode);
}
static void nfs_mark_return_if_closed_delegation(struct nfs_server *server,
struct nfs_delegation *delegation)
{
set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
}
static bool nfs_server_mark_return_all_delegations(struct nfs_server *server)
{
struct nfs_delegation *delegation;
bool ret = false;
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
nfs_mark_return_delegation(server, delegation);
ret = true;
}
return ret;
}
static void nfs_client_mark_return_all_delegations(struct nfs_client *clp)
{
struct nfs_server *server;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
nfs_server_mark_return_all_delegations(server);
rcu_read_unlock();
}
static void nfs_delegation_run_state_manager(struct nfs_client *clp)
{
if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state))
nfs4_schedule_state_manager(clp);
}
/**
* nfs_expire_all_delegations
* @clp: client to process
*
*/
void nfs_expire_all_delegations(struct nfs_client *clp)
{
nfs_client_mark_return_all_delegations(clp);
nfs_delegation_run_state_manager(clp);
}
/**
* nfs_server_return_all_delegations - return delegations for one superblock
* @server: pointer to nfs_server to process
*
*/
void nfs_server_return_all_delegations(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
bool need_wait;
if (clp == NULL)
return;
rcu_read_lock();
need_wait = nfs_server_mark_return_all_delegations(server);
rcu_read_unlock();
if (need_wait) {
nfs4_schedule_state_manager(clp);
nfs4_wait_clnt_recover(clp);
}
}
static void nfs_mark_return_unused_delegation_types(struct nfs_server *server,
fmode_t flags)
{
struct nfs_delegation *delegation;
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
continue;
if (delegation->type & flags)
nfs_mark_return_if_closed_delegation(server, delegation);
}
}
static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *clp,
fmode_t flags)
{
struct nfs_server *server;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
nfs_mark_return_unused_delegation_types(server, flags);
rcu_read_unlock();
}
static void nfs_revoke_delegation(struct inode *inode,
const nfs4_stateid *stateid)
{
struct nfs_delegation *delegation;
nfs4_stateid tmp;
bool ret = false;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation == NULL)
goto out;
if (stateid == NULL) {
nfs4_stateid_copy(&tmp, &delegation->stateid);
stateid = &tmp;
} else {
if (!nfs4_stateid_match_other(stateid, &delegation->stateid))
goto out;
spin_lock(&delegation->lock);
if (stateid->seqid) {
if (nfs4_stateid_is_newer(&delegation->stateid, stateid)) {
spin_unlock(&delegation->lock);
goto out;
}
delegation->stateid.seqid = stateid->seqid;
}
spin_unlock(&delegation->lock);
}
nfs_mark_delegation_revoked(delegation);
ret = true;
out:
rcu_read_unlock();
if (ret)
nfs_inode_find_state_and_recover(inode, stateid);
}
void nfs_remove_bad_delegation(struct inode *inode,
const nfs4_stateid *stateid)
{
nfs_revoke_delegation(inode, stateid);
}
EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation);
void nfs_delegation_mark_returned(struct inode *inode,
const nfs4_stateid *stateid)
{
struct nfs_delegation *delegation;
if (!inode)
return;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (!delegation)
goto out_rcu_unlock;
spin_lock(&delegation->lock);
if (!nfs4_stateid_match_other(stateid, &delegation->stateid))
goto out_spin_unlock;
if (stateid->seqid) {
/* If delegation->stateid is newer, dont mark as returned */
if (nfs4_stateid_is_newer(&delegation->stateid, stateid))
goto out_clear_returning;
if (delegation->stateid.seqid != stateid->seqid)
delegation->stateid.seqid = stateid->seqid;
}
nfs_mark_delegation_revoked(delegation);
out_clear_returning:
clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
out_spin_unlock:
spin_unlock(&delegation->lock);
out_rcu_unlock:
rcu_read_unlock();
nfs_inode_find_state_and_recover(inode, stateid);
}
/**
* nfs_expire_unused_delegation_types
* @clp: client to process
* @flags: delegation types to expire
*
*/
void nfs_expire_unused_delegation_types(struct nfs_client *clp, fmode_t flags)
{
nfs_client_mark_return_unused_delegation_types(clp, flags);
nfs_delegation_run_state_manager(clp);
}
static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
{
struct nfs_delegation *delegation;
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
continue;
nfs_mark_return_if_closed_delegation(server, delegation);
}
}
/**
* nfs_expire_unreferenced_delegations - Eliminate unused delegations
* @clp: nfs_client to process
*
*/
void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
{
struct nfs_server *server;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
nfs_mark_return_unreferenced_delegations(server);
rcu_read_unlock();
nfs_delegation_run_state_manager(clp);
}
/**
* nfs_async_inode_return_delegation - asynchronously return a delegation
* @inode: inode to process
* @stateid: state ID information
*
* Returns zero on success, or a negative errno value.
*/
int nfs_async_inode_return_delegation(struct inode *inode,
const nfs4_stateid *stateid)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_client *clp = server->nfs_client;
struct nfs_delegation *delegation;
rcu_read_lock();
delegation = nfs4_get_valid_delegation(inode);
if (delegation == NULL)
goto out_enoent;
if (stateid != NULL &&
!clp->cl_mvops->match_stateid(&delegation->stateid, stateid))
goto out_enoent;
nfs_mark_return_delegation(server, delegation);
rcu_read_unlock();
/* If there are any application leases or delegations, recall them */
break_lease(inode, O_WRONLY | O_RDWR | O_NONBLOCK);
nfs_delegation_run_state_manager(clp);
return 0;
out_enoent:
rcu_read_unlock();
return -ENOENT;
}
static struct inode *
nfs_delegation_find_inode_server(struct nfs_server *server,
const struct nfs_fh *fhandle)
{
struct nfs_delegation *delegation;
struct super_block *freeme = NULL;
struct inode *res = NULL;
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
spin_lock(&delegation->lock);
if (delegation->inode != NULL &&
!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) &&
nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
if (nfs_sb_active(server->super)) {
freeme = server->super;
res = igrab(delegation->inode);
}
spin_unlock(&delegation->lock);
if (res != NULL)
return res;
if (freeme) {
rcu_read_unlock();
nfs_sb_deactive(freeme);
rcu_read_lock();
}
return ERR_PTR(-EAGAIN);
}
spin_unlock(&delegation->lock);
}
return ERR_PTR(-ENOENT);
}
/**
* nfs_delegation_find_inode - retrieve the inode associated with a delegation
* @clp: client state handle
* @fhandle: filehandle from a delegation recall
*
* Returns pointer to inode matching "fhandle," or NULL if a matching inode
* cannot be found.
*/
struct inode *nfs_delegation_find_inode(struct nfs_client *clp,
const struct nfs_fh *fhandle)
{
struct nfs_server *server;
struct inode *res;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
res = nfs_delegation_find_inode_server(server, fhandle);
if (res != ERR_PTR(-ENOENT)) {
rcu_read_unlock();
return res;
}
}
rcu_read_unlock();
return ERR_PTR(-ENOENT);
}
static void nfs_delegation_mark_reclaim_server(struct nfs_server *server)
{
struct nfs_delegation *delegation;
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
/*
* If the delegation may have been admin revoked, then we
* cannot reclaim it.
*/
if (test_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags))
continue;
set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
}
}
/**
* nfs_delegation_mark_reclaim - mark all delegations as needing to be reclaimed
* @clp: nfs_client to process
*
*/
void nfs_delegation_mark_reclaim(struct nfs_client *clp)
{
struct nfs_server *server;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
nfs_delegation_mark_reclaim_server(server);
rcu_read_unlock();
}
static int nfs_server_reap_unclaimed_delegations(struct nfs_server *server,
void __always_unused *data)
{
struct nfs_delegation *delegation;
struct inode *inode;
restart:
rcu_read_lock();
restart_locked:
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_bit(NFS_DELEGATION_INODE_FREEING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_RETURNING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_NEED_RECLAIM,
&delegation->flags) == 0)
continue;
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL)
goto restart_locked;
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
if (delegation != NULL) {
if (nfs_detach_delegation(NFS_I(inode), delegation,
server) != NULL)
nfs_free_delegation(delegation);
/* Match nfs_start_delegation_return_locked */
nfs_put_delegation(delegation);
}
iput(inode);
cond_resched();
goto restart;
}
rcu_read_unlock();
return 0;
}
/**
* nfs_delegation_reap_unclaimed - reap unclaimed delegations after reboot recovery is done
* @clp: nfs_client to process
*
*/
void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
{
nfs_client_for_each_server(clp, nfs_server_reap_unclaimed_delegations,
NULL);
}
static inline bool nfs4_server_rebooted(const struct nfs_client *clp)
{
return (clp->cl_state & (BIT(NFS4CLNT_CHECK_LEASE) |
BIT(NFS4CLNT_LEASE_EXPIRED) |
BIT(NFS4CLNT_SESSION_RESET))) != 0;
}
static void nfs_mark_test_expired_delegation(struct nfs_server *server,
struct nfs_delegation *delegation)
{
if (delegation->stateid.type == NFS4_INVALID_STATEID_TYPE)
return;
clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
set_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags);
set_bit(NFS4CLNT_DELEGATION_EXPIRED, &server->nfs_client->cl_state);
}
static void nfs_inode_mark_test_expired_delegation(struct nfs_server *server,
struct inode *inode)
{
struct nfs_delegation *delegation;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation)
nfs_mark_test_expired_delegation(server, delegation);
rcu_read_unlock();
}
static void nfs_delegation_mark_test_expired_server(struct nfs_server *server)
{
struct nfs_delegation *delegation;
list_for_each_entry_rcu(delegation, &server->delegations, super_list)
nfs_mark_test_expired_delegation(server, delegation);
}
/**
* nfs_mark_test_expired_all_delegations - mark all delegations for testing
* @clp: nfs_client to process
*
* Iterates through all the delegations associated with this server and
* marks them as needing to be checked for validity.
*/
void nfs_mark_test_expired_all_delegations(struct nfs_client *clp)
{
struct nfs_server *server;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
nfs_delegation_mark_test_expired_server(server);
rcu_read_unlock();
}
/**
* nfs_test_expired_all_delegations - test all delegations for a client
* @clp: nfs_client to process
*
* Helper for handling "recallable state revoked" status from server.
*/
void nfs_test_expired_all_delegations(struct nfs_client *clp)
{
nfs_mark_test_expired_all_delegations(clp);
nfs4_schedule_state_manager(clp);
}
static void
nfs_delegation_test_free_expired(struct inode *inode,
nfs4_stateid *stateid,
const struct cred *cred)
{
struct nfs_server *server = NFS_SERVER(inode);
const struct nfs4_minor_version_ops *ops = server->nfs_client->cl_mvops;
int status;
if (!cred)
return;
status = ops->test_and_free_expired(server, stateid, cred);
if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID)
nfs_remove_bad_delegation(inode, stateid);
}
static int nfs_server_reap_expired_delegations(struct nfs_server *server,
void __always_unused *data)
{
struct nfs_delegation *delegation;
struct inode *inode;
const struct cred *cred;
nfs4_stateid stateid;
restart:
rcu_read_lock();
restart_locked:
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_bit(NFS_DELEGATION_INODE_FREEING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_RETURNING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_TEST_EXPIRED,
&delegation->flags) == 0)
continue;
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL)
goto restart_locked;
spin_lock(&delegation->lock);
cred = get_cred_rcu(delegation->cred);
nfs4_stateid_copy(&stateid, &delegation->stateid);
spin_unlock(&delegation->lock);
clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags);
rcu_read_unlock();
nfs_delegation_test_free_expired(inode, &stateid, cred);
put_cred(cred);
if (!nfs4_server_rebooted(server->nfs_client)) {
iput(inode);
cond_resched();
goto restart;
}
nfs_inode_mark_test_expired_delegation(server,inode);
iput(inode);
return -EAGAIN;
}
rcu_read_unlock();
return 0;
}
/**
* nfs_reap_expired_delegations - reap expired delegations
* @clp: nfs_client to process
*
* Iterates through all the delegations associated with this server and
* checks if they have may have been revoked. This function is usually
* expected to be called in cases where the server may have lost its
* lease.
*/
void nfs_reap_expired_delegations(struct nfs_client *clp)
{
nfs_client_for_each_server(clp, nfs_server_reap_expired_delegations,
NULL);
}
void nfs_inode_find_delegation_state_and_recover(struct inode *inode,
const nfs4_stateid *stateid)
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_delegation *delegation;
bool found = false;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation &&
nfs4_stateid_match_or_older(&delegation->stateid, stateid) &&
!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
nfs_mark_test_expired_delegation(NFS_SERVER(inode), delegation);
found = true;
}
rcu_read_unlock();
if (found)
nfs4_schedule_state_manager(clp);
}
/**
* nfs_delegations_present - check for existence of delegations
* @clp: client state handle
*
* Returns one if there are any nfs_delegation structures attached
* to this nfs_client.
*/
int nfs_delegations_present(struct nfs_client *clp)
{
struct nfs_server *server;
int ret = 0;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
if (!list_empty(&server->delegations)) {
ret = 1;
break;
}
rcu_read_unlock();
return ret;
}
/**
* nfs4_refresh_delegation_stateid - Update delegation stateid seqid
* @dst: stateid to refresh
* @inode: inode to check
*
* Returns "true" and updates "dst->seqid" * if inode had a delegation
* that matches our delegation stateid. Otherwise "false" is returned.
*/
bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
{
struct nfs_delegation *delegation;
bool ret = false;
if (!inode)
goto out;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL &&
nfs4_stateid_match_other(dst, &delegation->stateid) &&
nfs4_stateid_is_newer(&delegation->stateid, dst) &&
!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
dst->seqid = delegation->stateid.seqid;
ret = true;
}
rcu_read_unlock();
out:
return ret;
}
/**
* nfs4_copy_delegation_stateid - Copy inode's state ID information
* @inode: inode to check
* @flags: delegation type requirement
* @dst: stateid data structure to fill in
* @cred: optional argument to retrieve credential
*
* Returns "true" and fills in "dst->data" * if inode had a delegation,
* otherwise "false" is returned.
*/
bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags,
nfs4_stateid *dst, const struct cred **cred)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
bool ret = false;
flags &= FMODE_READ|FMODE_WRITE;
rcu_read_lock();
delegation = rcu_dereference(nfsi->delegation);
if (!delegation)
goto out;
spin_lock(&delegation->lock);
ret = nfs4_is_valid_delegation(delegation, flags);
if (ret) {
nfs4_stateid_copy(dst, &delegation->stateid);
nfs_mark_delegation_referenced(delegation);
if (cred)
*cred = get_cred(delegation->cred);
}
spin_unlock(&delegation->lock);
out:
rcu_read_unlock();
return ret;
}
/**
* nfs4_delegation_flush_on_close - Check if we must flush file on close
* @inode: inode to check
*
* This function checks the number of outstanding writes to the file
* against the delegation 'space_limit' field to see if
* the spec requires us to flush the file on close.
*/
bool nfs4_delegation_flush_on_close(const struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
bool ret = true;
rcu_read_lock();
delegation = rcu_dereference(nfsi->delegation);
if (delegation == NULL || !(delegation->type & FMODE_WRITE))
goto out;
if (atomic_long_read(&nfsi->nrequests) < delegation->pagemod_limit)
ret = false;
out:
rcu_read_unlock();
return ret;
}
module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644);