mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-03 19:55:31 +00:00
rename(): avoid a deadlock in the case of parents having no common ancestor
... and fix the directory locking documentation and proof of correctness. Holding ->s_vfs_rename_mutex *almost* prevents ->d_parent changes; the case where we really don't want it is splicing the root of disconnected tree to somewhere. In other words, ->s_vfs_rename_mutex is sufficient to stabilize "X is an ancestor of Y" only if X and Y are already in the same tree. Otherwise it can go from false to true, and one can construct a deadlock on that. Make lock_two_directories() report an error in such case and update the callers of lock_rename()/lock_rename_child() to handle such errors. And yes, such conditions are not impossible to create ;-/ Reviewed-by: Jan Kara <jack@suse.cz> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
dbd4540df2
commit
a8b0026847
@ -11,130 +11,268 @@ When taking the i_rwsem on multiple non-directory objects, we
|
|||||||
always acquire the locks in order by increasing address. We'll call
|
always acquire the locks in order by increasing address. We'll call
|
||||||
that "inode pointer" order in the following.
|
that "inode pointer" order in the following.
|
||||||
|
|
||||||
For our purposes all operations fall in 5 classes:
|
|
||||||
|
|
||||||
1) read access. Locking rules: caller locks directory we are accessing.
|
Primitives
|
||||||
The lock is taken shared.
|
==========
|
||||||
|
|
||||||
2) object creation. Locking rules: same as above, but the lock is taken
|
For our purposes all operations fall in 6 classes:
|
||||||
exclusive.
|
|
||||||
|
|
||||||
3) object removal. Locking rules: caller locks parent, finds victim,
|
1. read access. Locking rules:
|
||||||
locks victim and calls the method. Locks are exclusive.
|
|
||||||
|
|
||||||
4) rename() that is _not_ cross-directory. Locking rules: caller locks
|
* lock the directory we are accessing (shared)
|
||||||
the parent and finds source and target. Then we decide which of the
|
|
||||||
source and target need to be locked. Source needs to be locked if it's a
|
|
||||||
non-directory; target - if it's a non-directory or about to be removed.
|
|
||||||
Take the locks that need to be taken, in inode pointer order if need
|
|
||||||
to take both (that can happen only when both source and target are
|
|
||||||
non-directories - the source because it wouldn't be locked otherwise
|
|
||||||
and the target because mixing directory and non-directory is allowed
|
|
||||||
only with RENAME_EXCHANGE, and that won't be removing the target).
|
|
||||||
After the locks had been taken, call the method. All locks are exclusive.
|
|
||||||
|
|
||||||
5) link creation. Locking rules:
|
2. object creation. Locking rules:
|
||||||
|
|
||||||
* lock parent
|
* lock the directory we are accessing (exclusive)
|
||||||
* check that source is not a directory
|
|
||||||
* lock source
|
|
||||||
* call the method.
|
|
||||||
|
|
||||||
All locks are exclusive.
|
3. object removal. Locking rules:
|
||||||
|
|
||||||
6) cross-directory rename. The trickiest in the whole bunch. Locking
|
* lock the parent (exclusive)
|
||||||
rules:
|
* find the victim
|
||||||
|
* lock the victim (exclusive)
|
||||||
|
|
||||||
|
4. link creation. Locking rules:
|
||||||
|
|
||||||
|
* lock the parent (exclusive)
|
||||||
|
* check that the source is not a directory
|
||||||
|
* lock the source (exclusive; probably could be weakened to shared)
|
||||||
|
|
||||||
|
5. rename that is _not_ cross-directory. Locking rules:
|
||||||
|
|
||||||
|
* lock the parent (exclusive)
|
||||||
|
* find the source and target
|
||||||
|
* decide which of the source and target need to be locked.
|
||||||
|
The source needs to be locked if it's a non-directory, target - if it's
|
||||||
|
a non-directory or about to be removed.
|
||||||
|
* take the locks that need to be taken (exlusive), in inode pointer order
|
||||||
|
if need to take both (that can happen only when both source and target
|
||||||
|
are non-directories - the source because it wouldn't need to be locked
|
||||||
|
otherwise and the target because mixing directory and non-directory is
|
||||||
|
allowed only with RENAME_EXCHANGE, and that won't be removing the target).
|
||||||
|
|
||||||
|
6. cross-directory rename. The trickiest in the whole bunch. Locking rules:
|
||||||
|
|
||||||
* lock the filesystem
|
* lock the filesystem
|
||||||
* lock parents in "ancestors first" order. If one is not ancestor of
|
* if the parents don't have a common ancestor, fail the operation.
|
||||||
the other, lock the parent of source first.
|
* lock the parents in "ancestors first" order (exclusive). If neither is an
|
||||||
* find source and target.
|
ancestor of the other, lock the parent of source first.
|
||||||
* if old parent is equal to or is a descendent of target
|
* find the source and target.
|
||||||
fail with -ENOTEMPTY
|
* verify that the source is not a descendent of the target and
|
||||||
* if new parent is equal to or is a descendent of source
|
target is not a descendent of source; fail the operation otherwise.
|
||||||
fail with -ELOOP
|
* lock the subdirectories involved (exclusive), source before target.
|
||||||
* Lock subdirectories involved (source before target).
|
* lock the non-directories involved (exclusive), in inode pointer order.
|
||||||
* Lock non-directories involved, in inode pointer order.
|
|
||||||
* call the method.
|
|
||||||
|
|
||||||
All ->i_rwsem are taken exclusive.
|
The rules above obviously guarantee that all directories that are going
|
||||||
|
to be read, modified or removed by method will be locked by the caller.
|
||||||
|
|
||||||
The rules above obviously guarantee that all directories that are going to be
|
|
||||||
read, modified or removed by method will be locked by caller.
|
|
||||||
|
|
||||||
|
Splicing
|
||||||
|
========
|
||||||
|
|
||||||
|
There is one more thing to consider - splicing. It's not an operation
|
||||||
|
in its own right; it may happen as part of lookup. We speak of the
|
||||||
|
operations on directory trees, but we obviously do not have the full
|
||||||
|
picture of those - especially for network filesystems. What we have
|
||||||
|
is a bunch of subtrees visible in dcache and locking happens on those.
|
||||||
|
Trees grow as we do operations; memory pressure prunes them. Normally
|
||||||
|
that's not a problem, but there is a nasty twist - what should we do
|
||||||
|
when one growing tree reaches the root of another? That can happen in
|
||||||
|
several scenarios, starting from "somebody mounted two nested subtrees
|
||||||
|
from the same NFS4 server and doing lookups in one of them has reached
|
||||||
|
the root of another"; there's also open-by-fhandle stuff, and there's a
|
||||||
|
possibility that directory we see in one place gets moved by the server
|
||||||
|
to another and we run into it when we do a lookup.
|
||||||
|
|
||||||
|
For a lot of reasons we want to have the same directory present in dcache
|
||||||
|
only once. Multiple aliases are not allowed. So when lookup runs into
|
||||||
|
a subdirectory that already has an alias, something needs to be done with
|
||||||
|
dcache trees. Lookup is already holding the parent locked. If alias is
|
||||||
|
a root of separate tree, it gets attached to the directory we are doing a
|
||||||
|
lookup in, under the name we'd been looking for. If the alias is already
|
||||||
|
a child of the directory we are looking in, it changes name to the one
|
||||||
|
we'd been looking for. No extra locking is involved in these two cases.
|
||||||
|
However, if it's a child of some other directory, the things get trickier.
|
||||||
|
First of all, we verify that it is *not* an ancestor of our directory
|
||||||
|
and fail the lookup if it is. Then we try to lock the filesystem and the
|
||||||
|
current parent of the alias. If either trylock fails, we fail the lookup.
|
||||||
|
If trylocks succeed, we detach the alias from its current parent and
|
||||||
|
attach to our directory, under the name we are looking for.
|
||||||
|
|
||||||
|
Note that splicing does *not* involve any modification of the filesystem;
|
||||||
|
all we change is the view in dcache. Moreover, holding a directory locked
|
||||||
|
exclusive prevents such changes involving its children and holding the
|
||||||
|
filesystem lock prevents any changes of tree topology, other than having a
|
||||||
|
root of one tree becoming a child of directory in another. In particular,
|
||||||
|
if two dentries have been found to have a common ancestor after taking
|
||||||
|
the filesystem lock, their relationship will remain unchanged until
|
||||||
|
the lock is dropped. So from the directory operations' point of view
|
||||||
|
splicing is almost irrelevant - the only place where it matters is one
|
||||||
|
step in cross-directory renames; we need to be careful when checking if
|
||||||
|
parents have a common ancestor.
|
||||||
|
|
||||||
|
|
||||||
|
Multiple-filesystem stuff
|
||||||
|
=========================
|
||||||
|
|
||||||
|
For some filesystems a method can involve a directory operation on
|
||||||
|
another filesystem; it may be ecryptfs doing operation in the underlying
|
||||||
|
filesystem, overlayfs doing something to the layers, network filesystem
|
||||||
|
using a local one as a cache, etc. In all such cases the operations
|
||||||
|
on other filesystems must follow the same locking rules. Moreover, "a
|
||||||
|
directory operation on this filesystem might involve directory operations
|
||||||
|
on that filesystem" should be an asymmetric relation (or, if you will,
|
||||||
|
it should be possible to rank the filesystems so that directory operation
|
||||||
|
on a filesystem could trigger directory operations only on higher-ranked
|
||||||
|
ones - in these terms overlayfs ranks lower than its layers, network
|
||||||
|
filesystem ranks lower than whatever it caches on, etc.)
|
||||||
|
|
||||||
|
|
||||||
|
Deadlock avoidance
|
||||||
|
==================
|
||||||
|
|
||||||
If no directory is its own ancestor, the scheme above is deadlock-free.
|
If no directory is its own ancestor, the scheme above is deadlock-free.
|
||||||
|
|
||||||
Proof:
|
Proof:
|
||||||
|
|
||||||
[XXX: will be updated once we are done massaging the lock_rename()]
|
There is a ranking on the locks, such that all primitives take
|
||||||
First of all, at any moment we have a linear ordering of the
|
them in order of non-decreasing rank. Namely,
|
||||||
objects - A < B iff (A is an ancestor of B) or (B is not an ancestor
|
|
||||||
of A and ptr(A) < ptr(B)).
|
|
||||||
|
|
||||||
That ordering can change. However, the following is true:
|
* rank ->i_rwsem of non-directories on given filesystem in inode pointer
|
||||||
|
order.
|
||||||
|
* put ->i_rwsem of all directories on a filesystem at the same rank,
|
||||||
|
lower than ->i_rwsem of any non-directory on the same filesystem.
|
||||||
|
* put ->s_vfs_rename_mutex at rank lower than that of any ->i_rwsem
|
||||||
|
on the same filesystem.
|
||||||
|
* among the locks on different filesystems use the relative
|
||||||
|
rank of those filesystems.
|
||||||
|
|
||||||
(1) if object removal or non-cross-directory rename holds lock on A and
|
For example, if we have NFS filesystem caching on a local one, we have
|
||||||
attempts to acquire lock on B, A will remain the parent of B until we
|
|
||||||
acquire the lock on B. (Proof: only cross-directory rename can change
|
|
||||||
the parent of object and it would have to lock the parent).
|
|
||||||
|
|
||||||
(2) if cross-directory rename holds the lock on filesystem, order will not
|
1. ->s_vfs_rename_mutex of NFS filesystem
|
||||||
change until rename acquires all locks. (Proof: other cross-directory
|
2. ->i_rwsem of directories on that NFS filesystem, same rank for all
|
||||||
renames will be blocked on filesystem lock and we don't start changing
|
3. ->i_rwsem of non-directories on that filesystem, in order of
|
||||||
the order until we had acquired all locks).
|
increasing address of inode
|
||||||
|
4. ->s_vfs_rename_mutex of local filesystem
|
||||||
|
5. ->i_rwsem of directories on the local filesystem, same rank for all
|
||||||
|
6. ->i_rwsem of non-directories on local filesystem, in order of
|
||||||
|
increasing address of inode.
|
||||||
|
|
||||||
(3) locks on non-directory objects are acquired only after locks on
|
It's easy to verify that operations never take a lock with rank
|
||||||
directory objects, and are acquired in inode pointer order.
|
lower than that of an already held lock.
|
||||||
(Proof: all operations but renames take lock on at most one
|
|
||||||
non-directory object, except renames, which take locks on source and
|
|
||||||
target in inode pointer order in the case they are not directories.)
|
|
||||||
|
|
||||||
Now consider the minimal deadlock. Each process is blocked on
|
Suppose deadlocks are possible. Consider the minimal deadlocked
|
||||||
attempt to acquire some lock and already holds at least one lock. Let's
|
set of threads. It is a cycle of several threads, each blocked on a lock
|
||||||
consider the set of contended locks. First of all, filesystem lock is
|
held by the next thread in the cycle.
|
||||||
not contended, since any process blocked on it is not holding any locks.
|
|
||||||
Thus all processes are blocked on ->i_rwsem.
|
|
||||||
|
|
||||||
By (3), any process holding a non-directory lock can only be
|
Since the locking order is consistent with the ranking, all
|
||||||
waiting on another non-directory lock with a larger address. Therefore
|
contended locks in the minimal deadlock will be of the same rank,
|
||||||
the process holding the "largest" such lock can always make progress, and
|
i.e. they all will be ->i_rwsem of directories on the same filesystem.
|
||||||
non-directory objects are not included in the set of contended locks.
|
Moreover, without loss of generality we can assume that all operations
|
||||||
|
are done directly to that filesystem and none of them has actually
|
||||||
|
reached the method call.
|
||||||
|
|
||||||
Thus link creation can't be a part of deadlock - it can't be
|
In other words, we have a cycle of threads, T1,..., Tn,
|
||||||
blocked on source and it means that it doesn't hold any locks.
|
and the same number of directories (D1,...,Dn) such that
|
||||||
|
|
||||||
Any contended object is either held by cross-directory rename or
|
T1 is blocked on D1 which is held by T2
|
||||||
has a child that is also contended. Indeed, suppose that it is held by
|
|
||||||
operation other than cross-directory rename. Then the lock this operation
|
|
||||||
is blocked on belongs to child of that object due to (1).
|
|
||||||
|
|
||||||
It means that one of the operations is cross-directory rename.
|
T2 is blocked on D2 which is held by T3
|
||||||
Otherwise the set of contended objects would be infinite - each of them
|
|
||||||
would have a contended child and we had assumed that no object is its
|
|
||||||
own descendent. Moreover, there is exactly one cross-directory rename
|
|
||||||
(see above).
|
|
||||||
|
|
||||||
Consider the object blocking the cross-directory rename. One
|
...
|
||||||
of its descendents is locked by cross-directory rename (otherwise we
|
|
||||||
would again have an infinite set of contended objects). But that
|
|
||||||
means that cross-directory rename is taking locks out of order. Due
|
|
||||||
to (2) the order hadn't changed since we had acquired filesystem lock.
|
|
||||||
But locking rules for cross-directory rename guarantee that we do not
|
|
||||||
try to acquire lock on descendent before the lock on ancestor.
|
|
||||||
Contradiction. I.e. deadlock is impossible. Q.E.D.
|
|
||||||
|
|
||||||
|
Tn is blocked on Dn which is held by T1.
|
||||||
|
|
||||||
|
Each operation in the minimal cycle must have locked at least
|
||||||
|
one directory and blocked on attempt to lock another. That leaves
|
||||||
|
only 3 possible operations: directory removal (locks parent, then
|
||||||
|
child), same-directory rename killing a subdirectory (ditto) and
|
||||||
|
cross-directory rename of some sort.
|
||||||
|
|
||||||
|
There must be a cross-directory rename in the set; indeed,
|
||||||
|
if all operations had been of the "lock parent, then child" sort
|
||||||
|
we would have Dn a parent of D1, which is a parent of D2, which is
|
||||||
|
a parent of D3, ..., which is a parent of Dn. Relationships couldn't
|
||||||
|
have changed since the moment directory locks had been acquired,
|
||||||
|
so they would all hold simultaneously at the deadlock time and
|
||||||
|
we would have a loop.
|
||||||
|
|
||||||
|
Since all operations are on the same filesystem, there can't be
|
||||||
|
more than one cross-directory rename among them. Without loss of
|
||||||
|
generality we can assume that T1 is the one doing a cross-directory
|
||||||
|
rename and everything else is of the "lock parent, then child" sort.
|
||||||
|
|
||||||
|
In other words, we have a cross-directory rename that locked
|
||||||
|
Dn and blocked on attempt to lock D1, which is a parent of D2, which is
|
||||||
|
a parent of D3, ..., which is a parent of Dn. Relationships between
|
||||||
|
D1,...,Dn all hold simultaneously at the deadlock time. Moreover,
|
||||||
|
cross-directory rename does not get to locking any directories until it
|
||||||
|
has acquired filesystem lock and verified that directories involved have
|
||||||
|
a common ancestor, which guarantees that ancestry relationships between
|
||||||
|
all of them had been stable.
|
||||||
|
|
||||||
|
Consider the order in which directories are locked by the
|
||||||
|
cross-directory rename; parents first, then possibly their children.
|
||||||
|
Dn and D1 would have to be among those, with Dn locked before D1.
|
||||||
|
Which pair could it be?
|
||||||
|
|
||||||
|
It can't be the parents - indeed, since D1 is an ancestor of Dn,
|
||||||
|
it would be the first parent to be locked. Therefore at least one of the
|
||||||
|
children must be involved and thus neither of them could be a descendent
|
||||||
|
of another - otherwise the operation would not have progressed past
|
||||||
|
locking the parents.
|
||||||
|
|
||||||
|
It can't be a parent and its child; otherwise we would've had
|
||||||
|
a loop, since the parents are locked before the children, so the parent
|
||||||
|
would have to be a descendent of its child.
|
||||||
|
|
||||||
|
It can't be a parent and a child of another parent either.
|
||||||
|
Otherwise the child of the parent in question would've been a descendent
|
||||||
|
of another child.
|
||||||
|
|
||||||
|
That leaves only one possibility - namely, both Dn and D1 are
|
||||||
|
among the children, in some order. But that is also impossible, since
|
||||||
|
neither of the children is a descendent of another.
|
||||||
|
|
||||||
|
That concludes the proof, since the set of operations with the
|
||||||
|
properties requiered for a minimal deadlock can not exist.
|
||||||
|
|
||||||
|
Note that the check for having a common ancestor in cross-directory
|
||||||
|
rename is crucial - without it a deadlock would be possible. Indeed,
|
||||||
|
suppose the parents are initially in different trees; we would lock the
|
||||||
|
parent of source, then try to lock the parent of target, only to have
|
||||||
|
an unrelated lookup splice a distant ancestor of source to some distant
|
||||||
|
descendent of the parent of target. At that point we have cross-directory
|
||||||
|
rename holding the lock on parent of source and trying to lock its
|
||||||
|
distant ancestor. Add a bunch of rmdir() attempts on all directories
|
||||||
|
in between (all of those would fail with -ENOTEMPTY, had they ever gotten
|
||||||
|
the locks) and voila - we have a deadlock.
|
||||||
|
|
||||||
|
Loop avoidance
|
||||||
|
==============
|
||||||
|
|
||||||
These operations are guaranteed to avoid loop creation. Indeed,
|
These operations are guaranteed to avoid loop creation. Indeed,
|
||||||
the only operation that could introduce loops is cross-directory rename.
|
the only operation that could introduce loops is cross-directory rename.
|
||||||
Since the only new (parent, child) pair added by rename() is (new parent,
|
Suppose after the operation there is a loop; since there hadn't been such
|
||||||
source), such loop would have to contain these objects and the rest of it
|
loops before the operation, at least on of the nodes in that loop must've
|
||||||
would have to exist before rename(). I.e. at the moment of loop creation
|
had its parent changed. In other words, the loop must be passing through
|
||||||
rename() responsible for that would be holding filesystem lock and new parent
|
the source or, in case of exchange, possibly the target.
|
||||||
would have to be equal to or a descendent of source. But that means that
|
|
||||||
new parent had been equal to or a descendent of source since the moment when
|
Since the operation has succeeded, neither source nor target could have
|
||||||
we had acquired filesystem lock and rename() would fail with -ELOOP in that
|
been ancestors of each other. Therefore the chain of ancestors starting
|
||||||
case.
|
in the parent of source could not have passed through the target and
|
||||||
|
vice versa. On the other hand, the chain of ancestors of any node could
|
||||||
|
not have passed through the node itself, or we would've had a loop before
|
||||||
|
the operation. But everything other than source and target has kept
|
||||||
|
the parent after the operation, so the operation does not change the
|
||||||
|
chains of ancestors of (ex-)parents of source and target. In particular,
|
||||||
|
those chains must end after a finite number of steps.
|
||||||
|
|
||||||
|
Now consider the loop created by the operation. It passes through either
|
||||||
|
source or target; the next node in the loop would be the ex-parent of
|
||||||
|
target or source resp. After that the loop would follow the chain of
|
||||||
|
ancestors of that parent. But as we have just shown, that chain must
|
||||||
|
end after a finite number of steps, which means that it can't be a part
|
||||||
|
of any loop. Q.E.D.
|
||||||
|
|
||||||
While this locking scheme works for arbitrary DAGs, it relies on
|
While this locking scheme works for arbitrary DAGs, it relies on
|
||||||
ability to check that directory is a descendent of another object. Current
|
ability to check that directory is a descendent of another object. Current
|
||||||
|
@ -1079,3 +1079,12 @@ On same-directory ->rename() the (tautological) update of .. is not protected
|
|||||||
by any locks; just don't do it if the old parent is the same as the new one.
|
by any locks; just don't do it if the old parent is the same as the new one.
|
||||||
We really can't lock two subdirectories in same-directory rename - not without
|
We really can't lock two subdirectories in same-directory rename - not without
|
||||||
deadlocks.
|
deadlocks.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**mandatory**
|
||||||
|
|
||||||
|
lock_rename() and lock_rename_child() may fail in cross-directory case, if
|
||||||
|
their arguments do not have a common ancestor. In that case ERR_PTR(-EXDEV)
|
||||||
|
is returned, with no locks taken. In-tree users updated; out-of-tree ones
|
||||||
|
would need to do so.
|
||||||
|
@ -305,6 +305,8 @@ int cachefiles_bury_object(struct cachefiles_cache *cache,
|
|||||||
|
|
||||||
/* do the multiway lock magic */
|
/* do the multiway lock magic */
|
||||||
trap = lock_rename(cache->graveyard, dir);
|
trap = lock_rename(cache->graveyard, dir);
|
||||||
|
if (IS_ERR(trap))
|
||||||
|
return PTR_ERR(trap);
|
||||||
|
|
||||||
/* do some checks before getting the grave dentry */
|
/* do some checks before getting the grave dentry */
|
||||||
if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) {
|
if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) {
|
||||||
|
@ -599,6 +599,8 @@ ecryptfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
|
|||||||
target_inode = d_inode(new_dentry);
|
target_inode = d_inode(new_dentry);
|
||||||
|
|
||||||
trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
|
trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
|
||||||
|
if (IS_ERR(trap))
|
||||||
|
return PTR_ERR(trap);
|
||||||
dget(lower_new_dentry);
|
dget(lower_new_dentry);
|
||||||
rc = -EINVAL;
|
rc = -EINVAL;
|
||||||
if (lower_old_dentry->d_parent != lower_old_dir_dentry)
|
if (lower_old_dentry->d_parent != lower_old_dir_dentry)
|
||||||
|
37
fs/namei.c
37
fs/namei.c
@ -3014,21 +3014,37 @@ static inline int may_create(struct mnt_idmap *idmap,
|
|||||||
return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
|
return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// p1 != p2, both are on the same filesystem, ->s_vfs_rename_mutex is held
|
||||||
static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2)
|
static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2)
|
||||||
{
|
{
|
||||||
struct dentry *p;
|
struct dentry *p = p1, *q = p2, *r;
|
||||||
|
|
||||||
p = d_ancestor(p2, p1);
|
while ((r = p->d_parent) != p2 && r != p)
|
||||||
if (p) {
|
p = r;
|
||||||
|
if (r == p2) {
|
||||||
|
// p is a child of p2 and an ancestor of p1 or p1 itself
|
||||||
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
|
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
|
||||||
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2);
|
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2);
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
// p is the root of connected component that contains p1
|
||||||
p = d_ancestor(p1, p2);
|
// p2 does not occur on the path from p to p1
|
||||||
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
|
while ((r = q->d_parent) != p1 && r != p && r != q)
|
||||||
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
|
q = r;
|
||||||
return p;
|
if (r == p1) {
|
||||||
|
// q is a child of p1 and an ancestor of p2 or p2 itself
|
||||||
|
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
|
||||||
|
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
|
||||||
|
return q;
|
||||||
|
} else if (likely(r == p)) {
|
||||||
|
// both p2 and p1 are descendents of p
|
||||||
|
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
|
||||||
|
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
|
||||||
|
return NULL;
|
||||||
|
} else { // no common ancestor at the time we'd been called
|
||||||
|
mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
|
||||||
|
return ERR_PTR(-EXDEV);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -4947,6 +4963,10 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
|
|||||||
|
|
||||||
retry_deleg:
|
retry_deleg:
|
||||||
trap = lock_rename(new_path.dentry, old_path.dentry);
|
trap = lock_rename(new_path.dentry, old_path.dentry);
|
||||||
|
if (IS_ERR(trap)) {
|
||||||
|
error = PTR_ERR(trap);
|
||||||
|
goto exit_lock_rename;
|
||||||
|
}
|
||||||
|
|
||||||
old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry,
|
old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry,
|
||||||
lookup_flags);
|
lookup_flags);
|
||||||
@ -5014,6 +5034,7 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
|
|||||||
dput(old_dentry);
|
dput(old_dentry);
|
||||||
exit3:
|
exit3:
|
||||||
unlock_rename(new_path.dentry, old_path.dentry);
|
unlock_rename(new_path.dentry, old_path.dentry);
|
||||||
|
exit_lock_rename:
|
||||||
if (delegated_inode) {
|
if (delegated_inode) {
|
||||||
error = break_deleg_wait(&delegated_inode);
|
error = break_deleg_wait(&delegated_inode);
|
||||||
if (!error)
|
if (!error)
|
||||||
|
@ -1813,6 +1813,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
|
|||||||
}
|
}
|
||||||
|
|
||||||
trap = lock_rename(tdentry, fdentry);
|
trap = lock_rename(tdentry, fdentry);
|
||||||
|
if (IS_ERR(trap)) {
|
||||||
|
err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
err = fh_fill_pre_attrs(ffhp);
|
err = fh_fill_pre_attrs(ffhp);
|
||||||
if (err != nfs_ok)
|
if (err != nfs_ok)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
@ -722,7 +722,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
|
|||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
|
struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
|
||||||
struct path path = { .mnt = ovl_upper_mnt(ofs) };
|
struct path path = { .mnt = ovl_upper_mnt(ofs) };
|
||||||
struct dentry *temp, *upper;
|
struct dentry *temp, *upper, *trap;
|
||||||
struct ovl_cu_creds cc;
|
struct ovl_cu_creds cc;
|
||||||
int err;
|
int err;
|
||||||
struct ovl_cattr cattr = {
|
struct ovl_cattr cattr = {
|
||||||
@ -760,9 +760,11 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
|
|||||||
* If temp was moved, abort without the cleanup.
|
* If temp was moved, abort without the cleanup.
|
||||||
*/
|
*/
|
||||||
ovl_start_write(c->dentry);
|
ovl_start_write(c->dentry);
|
||||||
if (lock_rename(c->workdir, c->destdir) != NULL ||
|
trap = lock_rename(c->workdir, c->destdir);
|
||||||
temp->d_parent != c->workdir) {
|
if (trap || temp->d_parent != c->workdir) {
|
||||||
err = -EIO;
|
err = -EIO;
|
||||||
|
if (IS_ERR(trap))
|
||||||
|
goto out;
|
||||||
goto unlock;
|
goto unlock;
|
||||||
} else if (err) {
|
} else if (err) {
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
@ -803,6 +805,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
|
|||||||
ovl_set_flag(OVL_WHITEOUTS, inode);
|
ovl_set_flag(OVL_WHITEOUTS, inode);
|
||||||
unlock:
|
unlock:
|
||||||
unlock_rename(c->workdir, c->destdir);
|
unlock_rename(c->workdir, c->destdir);
|
||||||
|
out:
|
||||||
ovl_end_write(c->dentry);
|
ovl_end_write(c->dentry);
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
|
@ -1180,6 +1180,10 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
|
|||||||
}
|
}
|
||||||
|
|
||||||
trap = lock_rename(new_upperdir, old_upperdir);
|
trap = lock_rename(new_upperdir, old_upperdir);
|
||||||
|
if (IS_ERR(trap)) {
|
||||||
|
err = PTR_ERR(trap);
|
||||||
|
goto out_revert_creds;
|
||||||
|
}
|
||||||
|
|
||||||
olddentry = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir,
|
olddentry = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir,
|
||||||
old->d_name.len);
|
old->d_name.len);
|
||||||
|
@ -439,8 +439,10 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
|
|||||||
bool ok = false;
|
bool ok = false;
|
||||||
|
|
||||||
if (workdir != upperdir) {
|
if (workdir != upperdir) {
|
||||||
ok = (lock_rename(workdir, upperdir) == NULL);
|
struct dentry *trap = lock_rename(workdir, upperdir);
|
||||||
unlock_rename(workdir, upperdir);
|
if (!IS_ERR(trap))
|
||||||
|
unlock_rename(workdir, upperdir);
|
||||||
|
ok = (trap == NULL);
|
||||||
}
|
}
|
||||||
return ok;
|
return ok;
|
||||||
}
|
}
|
||||||
|
@ -1198,12 +1198,17 @@ void ovl_nlink_end(struct dentry *dentry)
|
|||||||
|
|
||||||
int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
|
int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
|
||||||
{
|
{
|
||||||
|
struct dentry *trap;
|
||||||
|
|
||||||
/* Workdir should not be the same as upperdir */
|
/* Workdir should not be the same as upperdir */
|
||||||
if (workdir == upperdir)
|
if (workdir == upperdir)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
/* Workdir should not be subdir of upperdir and vice versa */
|
/* Workdir should not be subdir of upperdir and vice versa */
|
||||||
if (lock_rename(workdir, upperdir) != NULL)
|
trap = lock_rename(workdir, upperdir);
|
||||||
|
if (IS_ERR(trap))
|
||||||
|
goto err;
|
||||||
|
if (trap)
|
||||||
goto err_unlock;
|
goto err_unlock;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -708,6 +708,10 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
|
|||||||
goto out2;
|
goto out2;
|
||||||
|
|
||||||
trap = lock_rename_child(old_child, new_path.dentry);
|
trap = lock_rename_child(old_child, new_path.dentry);
|
||||||
|
if (IS_ERR(trap)) {
|
||||||
|
err = PTR_ERR(trap);
|
||||||
|
goto out_drop_write;
|
||||||
|
}
|
||||||
|
|
||||||
old_parent = dget(old_child->d_parent);
|
old_parent = dget(old_child->d_parent);
|
||||||
if (d_unhashed(old_child)) {
|
if (d_unhashed(old_child)) {
|
||||||
@ -770,6 +774,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
|
|||||||
out3:
|
out3:
|
||||||
dput(old_parent);
|
dput(old_parent);
|
||||||
unlock_rename(old_parent, new_path.dentry);
|
unlock_rename(old_parent, new_path.dentry);
|
||||||
|
out_drop_write:
|
||||||
mnt_drop_write(old_path->mnt);
|
mnt_drop_write(old_path->mnt);
|
||||||
out2:
|
out2:
|
||||||
path_put(&new_path);
|
path_put(&new_path);
|
||||||
|
Loading…
Reference in New Issue
Block a user