More VFS fixes for 5.10-rc4:

- Minor cleanups of the sb_start_* fs freeze helpers.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEUzaAxoMeQq6m2jMV+H93GTRKtOsFAl+sDaIACgkQ+H93GTRK
 tOu4sw//bIdBw11YfI9sPtMJR/RkK3lm/pU4A/eJYGD65Mzk8J4kNi6jXKuyqQ8e
 /RpTqKWOwVW05Qg5HlKTxXRyr5Q788+EuBQH2t8VukWVdAgK2TFvNTTXb7QDsNSD
 SneC7Sox3CEO+vYnBsr7tUjfl7AYH0uFTxLkvpYqSQBn2+jo2x0s7NyKKZSDAASI
 +Rmhinw4QjjAHYC54nBy6Q47XhrZJj7XCODJdEql81cKSJUvjCo3url3sNvGXXNW
 oXbs5IO5cVQrQx6n9rQxCfkN1dz9c/CBopYFwdgmg76Bj4VLSzCYVecnMeDl53pV
 3jXesNtJcR2dz64e98K1Moof2dHSm0/NP0Q7KnMYEaGEl6tAtyjSx9lL2Qd6npG+
 mG460UHd/7RHXoH/BTaCrtHHyA4pApHMqf+w3R2ienxrltKUJAEfGM/5x8o0ikWx
 laeT0L/m6Yv/dGnDvNthhoF84tCiQUnxg+UeXiKv4R9uFL1bKMFPw5i1zWuXqqaX
 yZPqUY1tiecQskr89AimOVI64L2MJ4DgBey1JzNL/XzPtw55Qu+LR6MkkaIC08Wu
 ubGJTm6fPw3Cz8JYgn4WIgKB9Q7yAoKsyl0mGLQh2SJT1FS8WLct+SRPwXcMVfJT
 VpkgjJW/ak5L+XfQU6Ev39zUasEAqdaxvPoTxUfne6spUiNbgrk=
 =ZC9a
 -----END PGP SIGNATURE-----

Merge tag 'vfs-5.10-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull fs freeze fix and cleanups from Darrick Wong:
 "A single vfs fix for 5.10, along with two subsequent cleanups.

  A very long time ago, a hack was added to the vfs fs freeze protection
  code to work around lockdep complaints about XFS, which would try to
  run a transaction (which requires intwrite protection) to finalize an
  xfs freeze (by which time the vfs had already taken intwrite).

  Fast forward a few years, and XFS fixed the recursive intwrite problem
  on its own, and the hack became unnecessary. Fast forward almost a
  decade, and latent bugs in the code converting this hack from freeze
  flags to freeze locks combine with lockdep bugs to make this reproduce
  frequently enough to notice page faults racing with freeze.

  Since the hack is unnecessary and causes thread race errors, just get
  rid of it completely. Making this kind of vfs change midway through a
  cycle makes me nervous, but a large enough number of the usual
  VFS/ext4/XFS/btrfs suspects have said this looks good and solves a
  real problem vector.

  And once that removal is done, __sb_start_write is now simple enough
  that it becomes possible to refactor the function into smaller,
  simpler static inline helpers in linux/fs.h. The cleanup is
  straightforward.

  Summary:

   - Finally remove the "convert to trylock" weirdness in the fs freezer
     code. It was necessary 10 years ago to deal with nested
     transactions in XFS, but we've long since removed that; and now
     this is causing subtle race conditions when lockdep goes offline
     and sb_start_* aren't prepared to retry a trylock failure.

   - Minor cleanups of the sb_start_* fs freeze helpers"

* tag 'vfs-5.10-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  vfs: move __sb_{start,end}_write* to fs.h
  vfs: separate __sb_start_write into blocking and non-blocking helpers
  vfs: remove lockdep bogosity in __sb_start_write
This commit is contained in:
Linus Torvalds 2020-11-13 16:07:53 -08:00
commit f01c30de86
4 changed files with 29 additions and 63 deletions

View File

@ -1572,7 +1572,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
* we return to userspace. * we return to userspace.
*/ */
if (S_ISREG(file_inode(file)->i_mode)) { if (S_ISREG(file_inode(file)->i_mode)) {
__sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); sb_start_write(file_inode(file)->i_sb);
__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
} }
req->ki_flags |= IOCB_WRITE; req->ki_flags |= IOCB_WRITE;

View File

@ -3547,8 +3547,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
* we return to userspace. * we return to userspace.
*/ */
if (req->flags & REQ_F_ISREG) { if (req->flags & REQ_F_ISREG) {
__sb_start_write(file_inode(req->file)->i_sb, sb_start_write(file_inode(req->file)->i_sb);
SB_FREEZE_WRITE, true);
__sb_writers_release(file_inode(req->file)->i_sb, __sb_writers_release(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE); SB_FREEZE_WRITE);
} }

View File

@ -1631,55 +1631,6 @@ int super_setup_bdi(struct super_block *sb)
} }
EXPORT_SYMBOL(super_setup_bdi); EXPORT_SYMBOL(super_setup_bdi);
/*
* This is an internal function, please use sb_end_{write,pagefault,intwrite}
* instead.
*/
void __sb_end_write(struct super_block *sb, int level)
{
percpu_up_read(sb->s_writers.rw_sem + level-1);
}
EXPORT_SYMBOL(__sb_end_write);
/*
* This is an internal function, please use sb_start_{write,pagefault,intwrite}
* instead.
*/
int __sb_start_write(struct super_block *sb, int level, bool wait)
{
bool force_trylock = false;
int ret = 1;
#ifdef CONFIG_LOCKDEP
/*
* We want lockdep to tell us about possible deadlocks with freezing
* but it's it bit tricky to properly instrument it. Getting a freeze
* protection works as getting a read lock but there are subtle
* problems. XFS for example gets freeze protection on internal level
* twice in some cases, which is OK only because we already hold a
* freeze protection also on higher level. Due to these cases we have
* to use wait == F (trylock mode) which must not fail.
*/
if (wait) {
int i;
for (i = 0; i < level - 1; i++)
if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) {
force_trylock = true;
break;
}
}
#endif
if (wait && !force_trylock)
percpu_down_read(sb->s_writers.rw_sem + level-1);
else
ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
WARN_ON(force_trylock && !ret);
return ret;
}
EXPORT_SYMBOL(__sb_start_write);
/** /**
* sb_wait_write - wait until all writers to given file system finish * sb_wait_write - wait until all writers to given file system finish
* @sb: the super for which we wait * @sb: the super for which we wait

View File

@ -1580,8 +1580,24 @@ extern struct timespec64 current_time(struct inode *inode);
* Snapshotting support. * Snapshotting support.
*/ */
void __sb_end_write(struct super_block *sb, int level); /*
int __sb_start_write(struct super_block *sb, int level, bool wait); * These are internal functions, please use sb_start_{write,pagefault,intwrite}
* instead.
*/
static inline void __sb_end_write(struct super_block *sb, int level)
{
percpu_up_read(sb->s_writers.rw_sem + level-1);
}
static inline void __sb_start_write(struct super_block *sb, int level)
{
percpu_down_read(sb->s_writers.rw_sem + level - 1);
}
static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
{
return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1);
}
#define __sb_writers_acquired(sb, lev) \ #define __sb_writers_acquired(sb, lev) \
percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
@ -1645,12 +1661,12 @@ static inline void sb_end_intwrite(struct super_block *sb)
*/ */
static inline void sb_start_write(struct super_block *sb) static inline void sb_start_write(struct super_block *sb)
{ {
__sb_start_write(sb, SB_FREEZE_WRITE, true); __sb_start_write(sb, SB_FREEZE_WRITE);
} }
static inline int sb_start_write_trylock(struct super_block *sb) static inline bool sb_start_write_trylock(struct super_block *sb)
{ {
return __sb_start_write(sb, SB_FREEZE_WRITE, false); return __sb_start_write_trylock(sb, SB_FREEZE_WRITE);
} }
/** /**
@ -1674,7 +1690,7 @@ static inline int sb_start_write_trylock(struct super_block *sb)
*/ */
static inline void sb_start_pagefault(struct super_block *sb) static inline void sb_start_pagefault(struct super_block *sb)
{ {
__sb_start_write(sb, SB_FREEZE_PAGEFAULT, true); __sb_start_write(sb, SB_FREEZE_PAGEFAULT);
} }
/* /*
@ -1692,12 +1708,12 @@ static inline void sb_start_pagefault(struct super_block *sb)
*/ */
static inline void sb_start_intwrite(struct super_block *sb) static inline void sb_start_intwrite(struct super_block *sb)
{ {
__sb_start_write(sb, SB_FREEZE_FS, true); __sb_start_write(sb, SB_FREEZE_FS);
} }
static inline int sb_start_intwrite_trylock(struct super_block *sb) static inline bool sb_start_intwrite_trylock(struct super_block *sb)
{ {
return __sb_start_write(sb, SB_FREEZE_FS, false); return __sb_start_write_trylock(sb, SB_FREEZE_FS);
} }
@ -2756,14 +2772,14 @@ static inline void file_start_write(struct file *file)
{ {
if (!S_ISREG(file_inode(file)->i_mode)) if (!S_ISREG(file_inode(file)->i_mode))
return; return;
__sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); sb_start_write(file_inode(file)->i_sb);
} }
static inline bool file_start_write_trylock(struct file *file) static inline bool file_start_write_trylock(struct file *file)
{ {
if (!S_ISREG(file_inode(file)->i_mode)) if (!S_ISREG(file_inode(file)->i_mode))
return true; return true;
return __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, false); return sb_start_write_trylock(file_inode(file)->i_sb);
} }
static inline void file_end_write(struct file *file) static inline void file_end_write(struct file *file)