mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-16 13:34:30 +00:00
ocfs2: reflink deadlock when clone file to the same directory simultaneously
Running reflink from multiple nodes simultaneously to clone a file to the same directory probably triggers a deadlock issue. For example, there is a three node ocfs2 cluster, each node mounts the ocfs2 file system to /mnt/shared, and run the reflink command from each node repeatedly, like reflink "/mnt/shared/test" \ "/mnt/shared/.snapshots/test.`date +%m%d%H%M%S`.`hostname`" then, reflink command process will be hung on each node, and you can't list this file system directory. The problematic reflink command process is blocked at one node, task:reflink state:D stack: 0 pid: 1283 ppid: 4154 Call Trace: __schedule+0x2fd/0x750 schedule+0x2f/0xa0 schedule_timeout+0x1cc/0x310 ? ocfs2_control_cfu+0x50/0x50 [ocfs2_stack_user] ? 0xffffffffc0e3e000 wait_for_completion+0xba/0x140 ? wake_up_q+0xa0/0xa0 __ocfs2_cluster_lock.isra.41+0x3b5/0x820 [ocfs2] ? ocfs2_inode_lock_full_nested+0x1fc/0x960 [ocfs2] ocfs2_inode_lock_full_nested+0x1fc/0x960 [ocfs2] ocfs2_init_security_and_acl+0xbe/0x1d0 [ocfs2] ocfs2_reflink+0x436/0x4c0 [ocfs2] ? ocfs2_reflink_ioctl+0x2ca/0x360 [ocfs2] ocfs2_reflink_ioctl+0x2ca/0x360 [ocfs2] ocfs2_ioctl+0x25e/0x670 [ocfs2] do_vfs_ioctl+0xa0/0x680 ksys_ioctl+0x70/0x80 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x5b/0x1e0 The other reflink command processes are blocked at other nodes, task:reflink state:D stack: 0 pid:29759 ppid: 4088 Call Trace: __schedule+0x2fd/0x750 schedule+0x2f/0xa0 schedule_timeout+0x1cc/0x310 ? ocfs2_control_cfu+0x50/0x50 [ocfs2_stack_user] ? 0xffffffffc0b19000 wait_for_completion+0xba/0x140 ? wake_up_q+0xa0/0xa0 __ocfs2_cluster_lock.isra.41+0x3b5/0x820 [ocfs2] ? ocfs2_inode_lock_full_nested+0x1fc/0x960 [ocfs2] ocfs2_inode_lock_full_nested+0x1fc/0x960 [ocfs2] ocfs2_mv_orphaned_inode_to_new+0x87/0x7e0 [ocfs2] ocfs2_reflink+0x335/0x4c0 [ocfs2] ? ocfs2_reflink_ioctl+0x2ca/0x360 [ocfs2] ocfs2_reflink_ioctl+0x2ca/0x360 [ocfs2] ocfs2_ioctl+0x25e/0x670 [ocfs2] do_vfs_ioctl+0xa0/0x680 ksys_ioctl+0x70/0x80 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x5b/0x1e0 or task:reflink state:D stack: 0 pid:18465 ppid: 4156 Call Trace: __schedule+0x302/0x940 ? usleep_range+0x80/0x80 schedule+0x46/0xb0 schedule_timeout+0xff/0x140 ? ocfs2_control_cfu+0x50/0x50 [ocfs2_stack_user] ? 0xffffffffc0c3b000 __wait_for_common+0xb9/0x170 __ocfs2_cluster_lock.constprop.0+0x1d6/0x860 [ocfs2] ? ocfs2_wait_for_recovery+0x49/0xd0 [ocfs2] ? ocfs2_inode_lock_full_nested+0x30f/0xa50 [ocfs2] ocfs2_inode_lock_full_nested+0x30f/0xa50 [ocfs2] ocfs2_inode_lock_tracker+0xf2/0x2b0 [ocfs2] ? dput+0x32/0x2f0 ocfs2_permission+0x45/0xe0 [ocfs2] inode_permission+0xcc/0x170 link_path_walk.part.0.constprop.0+0x2a2/0x380 ? path_init+0x2c1/0x3f0 path_parentat+0x3c/0x90 filename_parentat+0xc1/0x1d0 ? filename_lookup+0x138/0x1c0 filename_create+0x43/0x160 ocfs2_reflink_ioctl+0xe6/0x380 [ocfs2] ocfs2_ioctl+0x1ea/0x2c0 [ocfs2] ? do_sys_openat2+0x81/0x150 __x64_sys_ioctl+0x82/0xb0 do_syscall_64+0x61/0xb0 The deadlock is caused by multiple acquiring the destination directory inode dlm lock in ocfs2_reflink function, we should acquire this directory inode dlm lock at the beginning, and hold this dlm lock until end of the function. Link: https://lkml.kernel.org/r/20210729110230.18983-1-ghe@suse.com Signed-off-by: Gang He <ghe@suse.com> Cc: Mark Fasheh <mark@fasheh.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Junxiao Bi <junxiao.bi@oracle.com> Cc: Joseph Qi <jiangqi903@gmail.com> Cc: Changwei Ge <gechangwei@live.cn> Cc: Gang He <ghe@suse.com> Cc: Jun Piao <piaojun@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
ee56c3e8ee
commit
409745064d
@ -2489,6 +2489,7 @@ out:
|
||||
}
|
||||
|
||||
int ocfs2_create_inode_in_orphan(struct inode *dir,
|
||||
struct buffer_head **dir_bh,
|
||||
int mode,
|
||||
struct inode **new_inode)
|
||||
{
|
||||
@ -2597,13 +2598,16 @@ leave:
|
||||
|
||||
brelse(new_di_bh);
|
||||
|
||||
if (!status)
|
||||
*new_inode = inode;
|
||||
|
||||
ocfs2_free_dir_lookup_result(&orphan_insert);
|
||||
|
||||
ocfs2_inode_unlock(dir, 1);
|
||||
brelse(parent_di_bh);
|
||||
if (!status) {
|
||||
*new_inode = inode;
|
||||
*dir_bh = parent_di_bh;
|
||||
} else {
|
||||
ocfs2_inode_unlock(dir, 1);
|
||||
brelse(parent_di_bh);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -2760,11 +2764,11 @@ bail:
|
||||
}
|
||||
|
||||
int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
|
||||
struct buffer_head *dir_bh,
|
||||
struct inode *inode,
|
||||
struct dentry *dentry)
|
||||
{
|
||||
int status = 0;
|
||||
struct buffer_head *parent_di_bh = NULL;
|
||||
handle_t *handle = NULL;
|
||||
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
|
||||
struct ocfs2_dinode *dir_di, *di;
|
||||
@ -2778,14 +2782,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
|
||||
(unsigned long long)OCFS2_I(dir)->ip_blkno,
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno);
|
||||
|
||||
status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
|
||||
if (status < 0) {
|
||||
if (status != -ENOENT)
|
||||
mlog_errno(status);
|
||||
return status;
|
||||
}
|
||||
|
||||
dir_di = (struct ocfs2_dinode *) parent_di_bh->b_data;
|
||||
dir_di = (struct ocfs2_dinode *) dir_bh->b_data;
|
||||
if (!dir_di->i_links_count) {
|
||||
/* can't make a file in a deleted directory. */
|
||||
status = -ENOENT;
|
||||
@ -2798,7 +2795,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
|
||||
goto leave;
|
||||
|
||||
/* get a spot inside the dir. */
|
||||
status = ocfs2_prepare_dir_for_insert(osb, dir, parent_di_bh,
|
||||
status = ocfs2_prepare_dir_for_insert(osb, dir, dir_bh,
|
||||
dentry->d_name.name,
|
||||
dentry->d_name.len, &lookup);
|
||||
if (status < 0) {
|
||||
@ -2862,7 +2859,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
|
||||
ocfs2_journal_dirty(handle, di_bh);
|
||||
|
||||
status = ocfs2_add_entry(handle, dentry, inode,
|
||||
OCFS2_I(inode)->ip_blkno, parent_di_bh,
|
||||
OCFS2_I(inode)->ip_blkno, dir_bh,
|
||||
&lookup);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
@ -2886,10 +2883,7 @@ orphan_unlock:
|
||||
iput(orphan_dir_inode);
|
||||
leave:
|
||||
|
||||
ocfs2_inode_unlock(dir, 1);
|
||||
|
||||
brelse(di_bh);
|
||||
brelse(parent_di_bh);
|
||||
brelse(orphan_dir_bh);
|
||||
|
||||
ocfs2_free_dir_lookup_result(&lookup);
|
||||
|
@ -24,6 +24,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
|
||||
struct buffer_head *orphan_dir_bh,
|
||||
bool dio);
|
||||
int ocfs2_create_inode_in_orphan(struct inode *dir,
|
||||
struct buffer_head **dir_bh,
|
||||
int mode,
|
||||
struct inode **new_inode);
|
||||
int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
|
||||
@ -32,6 +33,7 @@ int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb,
|
||||
struct inode *inode, struct buffer_head *di_bh,
|
||||
int update_isize, loff_t end);
|
||||
int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
|
||||
struct buffer_head *dir_bh,
|
||||
struct inode *new_inode,
|
||||
struct dentry *new_dentry);
|
||||
|
||||
|
@ -4252,7 +4252,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
|
||||
{
|
||||
int error, had_lock;
|
||||
struct inode *inode = d_inode(old_dentry);
|
||||
struct buffer_head *old_bh = NULL;
|
||||
struct buffer_head *old_bh = NULL, *dir_bh = NULL;
|
||||
struct inode *new_orphan_inode = NULL;
|
||||
struct ocfs2_lock_holder oh;
|
||||
|
||||
@ -4260,7 +4260,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
|
||||
error = ocfs2_create_inode_in_orphan(dir, inode->i_mode,
|
||||
error = ocfs2_create_inode_in_orphan(dir, &dir_bh, inode->i_mode,
|
||||
&new_orphan_inode);
|
||||
if (error) {
|
||||
mlog_errno(error);
|
||||
@ -4306,13 +4306,15 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
|
||||
|
||||
/* If the security isn't preserved, we need to re-initialize them. */
|
||||
if (!preserve) {
|
||||
error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
|
||||
error = ocfs2_init_security_and_acl(dir, dir_bh,
|
||||
new_orphan_inode,
|
||||
&new_dentry->d_name);
|
||||
if (error)
|
||||
mlog_errno(error);
|
||||
}
|
||||
if (!error) {
|
||||
error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
|
||||
error = ocfs2_mv_orphaned_inode_to_new(dir, dir_bh,
|
||||
new_orphan_inode,
|
||||
new_dentry);
|
||||
if (error)
|
||||
mlog_errno(error);
|
||||
@ -4330,6 +4332,11 @@ out:
|
||||
iput(new_orphan_inode);
|
||||
}
|
||||
|
||||
if (dir_bh) {
|
||||
ocfs2_inode_unlock(dir, 1);
|
||||
brelse(dir_bh);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -7203,16 +7203,13 @@ out:
|
||||
/*
|
||||
* Initialize security and acl for a already created inode.
|
||||
* Used for reflink a non-preserve-security file.
|
||||
*
|
||||
* It uses common api like ocfs2_xattr_set, so the caller
|
||||
* must not hold any lock expect i_rwsem.
|
||||
*/
|
||||
int ocfs2_init_security_and_acl(struct inode *dir,
|
||||
struct buffer_head *dir_bh,
|
||||
struct inode *inode,
|
||||
const struct qstr *qstr)
|
||||
{
|
||||
int ret = 0;
|
||||
struct buffer_head *dir_bh = NULL;
|
||||
|
||||
ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
|
||||
if (ret) {
|
||||
@ -7220,17 +7217,10 @@ int ocfs2_init_security_and_acl(struct inode *dir,
|
||||
goto leave;
|
||||
}
|
||||
|
||||
ret = ocfs2_inode_lock(dir, &dir_bh, 0);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto leave;
|
||||
}
|
||||
ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
|
||||
if (ret)
|
||||
mlog_errno(ret);
|
||||
|
||||
ocfs2_inode_unlock(dir, 0);
|
||||
brelse(dir_bh);
|
||||
leave:
|
||||
return ret;
|
||||
}
|
||||
|
@ -83,6 +83,7 @@ int ocfs2_reflink_xattrs(struct inode *old_inode,
|
||||
struct buffer_head *new_bh,
|
||||
bool preserve_security);
|
||||
int ocfs2_init_security_and_acl(struct inode *dir,
|
||||
struct buffer_head *dir_bh,
|
||||
struct inode *inode,
|
||||
const struct qstr *qstr);
|
||||
#endif /* OCFS2_XATTR_H */
|
||||
|
Loading…
x
Reference in New Issue
Block a user