mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-06 05:06:29 +00:00
v6.6-rc4.vfs.fixes
-----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZRKHuAAKCRCRxhvAZXjc ohOLAQDU9Fxq5UdqCdmsyi/b24XJFZlQhcVIZy2Hrhcor9TiVQEAjuECGlxFPSgj atVOWLdugDJquiHextqTEMgIecJpNw4= =uINF -----END PGP SIGNATURE----- Merge tag 'v6.6-rc4.vfs.fixes' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs Pull vfs fixes from Christian Brauner: "This contains the usual miscellaneous fixes and cleanups for vfs and individual fses: Fixes: - Revert ki_pos on error from buffered writes for direct io fallback - Add missing documentation for block device and superblock handling for changes merged this cycle - Fix reiserfs flexible array usage - Ensure that overlayfs sets ctime when setting mtime and atime - Disable deferred caller completions with overlayfs writes until proper support exists Cleanups: - Remove duplicate initialization in pipe code - Annotate aio kioctx_table with __counted_by" * tag 'v6.6-rc4.vfs.fixes' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs: overlayfs: set ctime when setting mtime and atime ntfs3: put resources during ntfs_fill_super() ovl: disable IOCB_DIO_CALLER_COMP porting: document superblock as block device holder porting: document new block device opening order fs/pipe: remove duplicate "offset" initializer fs-writeback: do not requeue a clean inode having skipped pages aio: Annotate struct kioctx_table with __counted_by direct_write_fallback(): on error revert the ->ki_pos update from buffered write reiserfs: Replace 1-element array with C99 style flex-array
This commit is contained in:
commit
84422aee15
@ -949,3 +949,99 @@ mmap_lock held. All in-tree users have been audited and do not seem to
|
|||||||
depend on the mmap_lock being held, but out of tree users should verify
|
depend on the mmap_lock being held, but out of tree users should verify
|
||||||
for themselves. If they do need it, they can return VM_FAULT_RETRY to
|
for themselves. If they do need it, they can return VM_FAULT_RETRY to
|
||||||
be called with the mmap_lock held.
|
be called with the mmap_lock held.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**mandatory**
|
||||||
|
|
||||||
|
The order of opening block devices and matching or creating superblocks has
|
||||||
|
changed.
|
||||||
|
|
||||||
|
The old logic opened block devices first and then tried to find a
|
||||||
|
suitable superblock to reuse based on the block device pointer.
|
||||||
|
|
||||||
|
The new logic tries to find a suitable superblock first based on the device
|
||||||
|
number, and opening the block device afterwards.
|
||||||
|
|
||||||
|
Since opening block devices cannot happen under s_umount because of lock
|
||||||
|
ordering requirements s_umount is now dropped while opening block devices and
|
||||||
|
reacquired before calling fill_super().
|
||||||
|
|
||||||
|
In the old logic concurrent mounters would find the superblock on the list of
|
||||||
|
superblocks for the filesystem type. Since the first opener of the block device
|
||||||
|
would hold s_umount they would wait until the superblock became either born or
|
||||||
|
was discarded due to initialization failure.
|
||||||
|
|
||||||
|
Since the new logic drops s_umount concurrent mounters could grab s_umount and
|
||||||
|
would spin. Instead they are now made to wait using an explicit wait-wake
|
||||||
|
mechanism without having to hold s_umount.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**mandatory**
|
||||||
|
|
||||||
|
The holder of a block device is now the superblock.
|
||||||
|
|
||||||
|
The holder of a block device used to be the file_system_type which wasn't
|
||||||
|
particularly useful. It wasn't possible to go from block device to owning
|
||||||
|
superblock without matching on the device pointer stored in the superblock.
|
||||||
|
This mechanism would only work for a single device so the block layer couldn't
|
||||||
|
find the owning superblock of any additional devices.
|
||||||
|
|
||||||
|
In the old mechanism reusing or creating a superblock for a racing mount(2) and
|
||||||
|
umount(2) relied on the file_system_type as the holder. This was severly
|
||||||
|
underdocumented however:
|
||||||
|
|
||||||
|
(1) Any concurrent mounter that managed to grab an active reference on an
|
||||||
|
existing superblock was made to wait until the superblock either became
|
||||||
|
ready or until the superblock was removed from the list of superblocks of
|
||||||
|
the filesystem type. If the superblock is ready the caller would simple
|
||||||
|
reuse it.
|
||||||
|
|
||||||
|
(2) If the mounter came after deactivate_locked_super() but before
|
||||||
|
the superblock had been removed from the list of superblocks of the
|
||||||
|
filesystem type the mounter would wait until the superblock was shutdown,
|
||||||
|
reuse the block device and allocate a new superblock.
|
||||||
|
|
||||||
|
(3) If the mounter came after deactivate_locked_super() and after
|
||||||
|
the superblock had been removed from the list of superblocks of the
|
||||||
|
filesystem type the mounter would reuse the block device and allocate a new
|
||||||
|
superblock (the bd_holder point may still be set to the filesystem type).
|
||||||
|
|
||||||
|
Because the holder of the block device was the file_system_type any concurrent
|
||||||
|
mounter could open the block devices of any superblock of the same
|
||||||
|
file_system_type without risking seeing EBUSY because the block device was
|
||||||
|
still in use by another superblock.
|
||||||
|
|
||||||
|
Making the superblock the owner of the block device changes this as the holder
|
||||||
|
is now a unique superblock and thus block devices associated with it cannot be
|
||||||
|
reused by concurrent mounters. So a concurrent mounter in (2) could suddenly
|
||||||
|
see EBUSY when trying to open a block device whose holder was a different
|
||||||
|
superblock.
|
||||||
|
|
||||||
|
The new logic thus waits until the superblock and the devices are shutdown in
|
||||||
|
->kill_sb(). Removal of the superblock from the list of superblocks of the
|
||||||
|
filesystem type is now moved to a later point when the devices are closed:
|
||||||
|
|
||||||
|
(1) Any concurrent mounter managing to grab an active reference on an existing
|
||||||
|
superblock is made to wait until the superblock is either ready or until
|
||||||
|
the superblock and all devices are shutdown in ->kill_sb(). If the
|
||||||
|
superblock is ready the caller will simply reuse it.
|
||||||
|
|
||||||
|
(2) If the mounter comes after deactivate_locked_super() but before
|
||||||
|
the superblock has been removed from the list of superblocks of the
|
||||||
|
filesystem type the mounter is made to wait until the superblock and the
|
||||||
|
devices are shut down in ->kill_sb() and the superblock is removed from the
|
||||||
|
list of superblocks of the filesystem type. The mounter will allocate a new
|
||||||
|
superblock and grab ownership of the block device (the bd_holder pointer of
|
||||||
|
the block device will be set to the newly allocated superblock).
|
||||||
|
|
||||||
|
(3) This case is now collapsed into (2) as the superblock is left on the list
|
||||||
|
of superblocks of the filesystem type until all devices are shutdown in
|
||||||
|
->kill_sb(). In other words, if the superblock isn't on the list of
|
||||||
|
superblock of the filesystem type anymore then it has given up ownership of
|
||||||
|
all associated block devices (the bd_holder pointer is NULL).
|
||||||
|
|
||||||
|
As this is a VFS level change it has no practical consequences for filesystems
|
||||||
|
other than that all of them must use one of the provided kill_litter_super(),
|
||||||
|
kill_anon_super(), or kill_block_super() helpers.
|
||||||
|
2
fs/aio.c
2
fs/aio.c
@ -80,7 +80,7 @@ struct aio_ring {
|
|||||||
struct kioctx_table {
|
struct kioctx_table {
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
unsigned nr;
|
unsigned nr;
|
||||||
struct kioctx __rcu *table[];
|
struct kioctx __rcu *table[] __counted_by(nr);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kioctx_cpu {
|
struct kioctx_cpu {
|
||||||
|
@ -1535,10 +1535,15 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
|
|||||||
|
|
||||||
if (wbc->pages_skipped) {
|
if (wbc->pages_skipped) {
|
||||||
/*
|
/*
|
||||||
* writeback is not making progress due to locked
|
* Writeback is not making progress due to locked buffers.
|
||||||
* buffers. Skip this inode for now.
|
* Skip this inode for now. Although having skipped pages
|
||||||
|
* is odd for clean inodes, it can happen for some
|
||||||
|
* filesystems so handle that gracefully.
|
||||||
*/
|
*/
|
||||||
redirty_tail_locked(inode, wb);
|
if (inode->i_state & I_DIRTY_ALL)
|
||||||
|
redirty_tail_locked(inode, wb);
|
||||||
|
else
|
||||||
|
inode_cgwb_move_to_attached(inode, wb);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1903,6 +1903,7 @@ ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
|
|||||||
* We don't know how much we wrote, so just return the number of
|
* We don't know how much we wrote, so just return the number of
|
||||||
* bytes which were direct-written
|
* bytes which were direct-written
|
||||||
*/
|
*/
|
||||||
|
iocb->ki_pos -= buffered_written;
|
||||||
if (direct_written)
|
if (direct_written)
|
||||||
return direct_written;
|
return direct_written;
|
||||||
return err;
|
return err;
|
||||||
|
@ -1562,6 +1562,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
|
|||||||
put_inode_out:
|
put_inode_out:
|
||||||
iput(inode);
|
iput(inode);
|
||||||
out:
|
out:
|
||||||
|
ntfs3_put_sbi(sbi);
|
||||||
kfree(boot2);
|
kfree(boot2);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -337,7 +337,7 @@ static int ovl_set_timestamps(struct ovl_fs *ofs, struct dentry *upperdentry,
|
|||||||
{
|
{
|
||||||
struct iattr attr = {
|
struct iattr attr = {
|
||||||
.ia_valid =
|
.ia_valid =
|
||||||
ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
|
ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_CTIME,
|
||||||
.ia_atime = stat->atime,
|
.ia_atime = stat->atime,
|
||||||
.ia_mtime = stat->mtime,
|
.ia_mtime = stat->mtime,
|
||||||
};
|
};
|
||||||
|
@ -391,6 +391,12 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
|
|||||||
if (!ovl_should_sync(OVL_FS(inode->i_sb)))
|
if (!ovl_should_sync(OVL_FS(inode->i_sb)))
|
||||||
ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
|
ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Overlayfs doesn't support deferred completions, don't copy
|
||||||
|
* this property in case it is set by the issuer.
|
||||||
|
*/
|
||||||
|
ifl &= ~IOCB_DIO_CALLER_COMP;
|
||||||
|
|
||||||
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
||||||
if (is_sync_kiocb(iocb)) {
|
if (is_sync_kiocb(iocb)) {
|
||||||
file_start_write(real.file);
|
file_start_write(real.file);
|
||||||
|
@ -537,7 +537,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
ret += copied;
|
ret += copied;
|
||||||
buf->offset = 0;
|
|
||||||
buf->len = copied;
|
buf->len = copied;
|
||||||
|
|
||||||
if (!iov_iter_count(from))
|
if (!iov_iter_count(from))
|
||||||
|
@ -2699,7 +2699,7 @@ struct reiserfs_iget_args {
|
|||||||
#define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12)
|
#define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12)
|
||||||
|
|
||||||
#define journal_trans_half(blocksize) \
|
#define journal_trans_half(blocksize) \
|
||||||
((blocksize - sizeof (struct reiserfs_journal_desc) + sizeof (__u32) - 12) / sizeof (__u32))
|
((blocksize - sizeof(struct reiserfs_journal_desc) - 12) / sizeof(__u32))
|
||||||
|
|
||||||
/* journal.c see journal.c for all the comments here */
|
/* journal.c see journal.c for all the comments here */
|
||||||
|
|
||||||
@ -2711,7 +2711,7 @@ struct reiserfs_journal_desc {
|
|||||||
__le32 j_len;
|
__le32 j_len;
|
||||||
|
|
||||||
__le32 j_mount_id; /* mount id of this trans */
|
__le32 j_mount_id; /* mount id of this trans */
|
||||||
__le32 j_realblock[1]; /* real locations for each block */
|
__le32 j_realblock[]; /* real locations for each block */
|
||||||
};
|
};
|
||||||
|
|
||||||
#define get_desc_trans_id(d) le32_to_cpu((d)->j_trans_id)
|
#define get_desc_trans_id(d) le32_to_cpu((d)->j_trans_id)
|
||||||
@ -2726,7 +2726,7 @@ struct reiserfs_journal_desc {
|
|||||||
struct reiserfs_journal_commit {
|
struct reiserfs_journal_commit {
|
||||||
__le32 j_trans_id; /* must match j_trans_id from the desc block */
|
__le32 j_trans_id; /* must match j_trans_id from the desc block */
|
||||||
__le32 j_len; /* ditto */
|
__le32 j_len; /* ditto */
|
||||||
__le32 j_realblock[1]; /* real locations for each block */
|
__le32 j_realblock[]; /* real locations for each block */
|
||||||
};
|
};
|
||||||
|
|
||||||
#define get_commit_trans_id(c) le32_to_cpu((c)->j_trans_id)
|
#define get_commit_trans_id(c) le32_to_cpu((c)->j_trans_id)
|
||||||
|
Loading…
Reference in New Issue
Block a user