Changes since last update:

- Fix (pcluster) memory leak and (sbi) UAF after umounting;
 
  - Fix a case of PSI memstall mis-accounting;
 
  - Use buffered I/Os by default for file-backed mounts.
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEQ0A6bDUS9Y+83NPFUXZn5Zlu5qoFAmdhgxQRHHhpYW5nQGtl
 cm5lbC5vcmcACgkQUXZn5Zlu5qrLmxAAm621Zq5Jz+AlN2HvBpyfIjD8eXtdCEd6
 8r6+2e5aw8HpZKyKBo1ET3gTSA9KO4FbdZl0S9e+SfPJDa/Tak4e5mzaF8su1LnS
 bzg3MQwU8W7bahsKn6OOnC4pTFvKL1ZdLvujbqjEDYXEP2cUEjxtZbHPpbTCRpte
 lhbN9444lfJevtyaNK92SP5NQjPYNDN0J6QJZIZuRMB9IDA2zsiuzBnqUVMkGbRx
 iiH3gsWo0l554RXY81rMwLLHMsW79Qc5fBD2pmkzzp1ioH8YyY0+aylZi/ps9tcr
 xgOGZNKJT3fouhPVSE/QMdiqlNZW8qd/jwc3S0l8yeYn55pHftKCC0wysrGkXjVw
 ODHU6WYWSNtZ2uxCU44lDKVnse4fIksFX7w1/BZer7dZy8kUNZ4hexLQp+kSBpFs
 QKK3bJpN85GfNndk9X+vk6MFPHpEougJNiywVMAPCa55heeCMTES+vW5WjpIBjuz
 hyU26y5xELAbK4T+VmNlNh16LEbV1rUyvBHaq4vhVJensEQQu8pusqQH0gMYZi3l
 Bn5drLmsSG6zaMeeBc14609f3IBJBgkzIi7G5wFuIK4viqcRkh0nCf1c6D10vgST
 G+8CTwks6c2TTHANvIPzs3Ciw6FTBQym/CJSItPcoLpc5xoDfcAYA2uuCyhz9khZ
 A3kR3lNe0e0=
 =Idg8
 -----END PGP SIGNATURE-----

Merge tag 'erofs-for-6.13-rc4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

Pull erofs fixes from Gao Xiang:
 "The first one fixes a syzbot UAF report caused by a commit introduced
  in this cycle, but it also addresses a longstanding memory leak. The
  second one resolves a PSI memstall mis-accounting issue.

  The remaining patches switch file-backed mounts to use buffered I/Os
  by default instead of direct I/Os, since the page cache of underlay
  files is typically valid and maybe even dirty. This change also aligns
  with the default policy of loopback devices. A mount option has been
  added to try to use direct I/Os explicitly.

  Summary:

   - Fix (pcluster) memory leak and (sbi) UAF after umounting

   - Fix a case of PSI memstall mis-accounting

   - Use buffered I/Os by default for file-backed mounts"

* tag 'erofs-for-6.13-rc4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: use buffered I/O for file-backed mounts by default
  erofs: reference `struct erofs_device_info` for erofs_map_dev
  erofs: use `struct erofs_device_info` for the primary device
  erofs: add erofs_sb_free() helper
  MAINTAINERS: erofs: update Yue Hu's email address
  erofs: fix PSI memstall accounting
  erofs: fix rare pcluster memory leak after unmounting
This commit is contained in:
Linus Torvalds 2024-12-17 09:04:42 -08:00
commit ed90ed56e4
8 changed files with 80 additions and 83 deletions

View File

@ -8453,7 +8453,7 @@ F: include/video/s1d13xxxfb.h
EROFS FILE SYSTEM
M: Gao Xiang <xiang@kernel.org>
M: Chao Yu <chao@kernel.org>
R: Yue Hu <huyue2@coolpad.com>
R: Yue Hu <zbestahu@gmail.com>
R: Jeffle Xu <jefflexu@linux.alibaba.com>
R: Sandeep Dhavale <dhavale@google.com>
L: linux-erofs@lists.ozlabs.org

View File

@ -56,10 +56,10 @@ void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
buf->file = NULL;
if (erofs_is_fileio_mode(sbi)) {
buf->file = sbi->fdev; /* some fs like FUSE needs it */
buf->file = sbi->dif0.file; /* some fs like FUSE needs it */
buf->mapping = buf->file->f_mapping;
} else if (erofs_is_fscache_mode(sb))
buf->mapping = sbi->s_fscache->inode->i_mapping;
buf->mapping = sbi->dif0.fscache->inode->i_mapping;
else
buf->mapping = sb->s_bdev->bd_mapping;
}
@ -179,19 +179,13 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
}
static void erofs_fill_from_devinfo(struct erofs_map_dev *map,
struct erofs_device_info *dif)
struct super_block *sb, struct erofs_device_info *dif)
{
map->m_sb = sb;
map->m_dif = dif;
map->m_bdev = NULL;
map->m_fp = NULL;
if (dif->file) {
if (S_ISBLK(file_inode(dif->file)->i_mode))
map->m_bdev = file_bdev(dif->file);
else
map->m_fp = dif->file;
}
map->m_daxdev = dif->dax_dev;
map->m_dax_part_off = dif->dax_part_off;
map->m_fscache = dif->fscache;
if (dif->file && S_ISBLK(file_inode(dif->file)->i_mode))
map->m_bdev = file_bdev(dif->file);
}
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
@ -201,12 +195,8 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
erofs_off_t startoff, length;
int id;
map->m_bdev = sb->s_bdev;
map->m_daxdev = EROFS_SB(sb)->dax_dev;
map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
map->m_fscache = EROFS_SB(sb)->s_fscache;
map->m_fp = EROFS_SB(sb)->fdev;
erofs_fill_from_devinfo(map, sb, &EROFS_SB(sb)->dif0);
map->m_bdev = sb->s_bdev; /* use s_bdev for the primary device */
if (map->m_deviceid) {
down_read(&devs->rwsem);
dif = idr_find(&devs->tree, map->m_deviceid - 1);
@ -219,7 +209,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
up_read(&devs->rwsem);
return 0;
}
erofs_fill_from_devinfo(map, dif);
erofs_fill_from_devinfo(map, sb, dif);
up_read(&devs->rwsem);
} else if (devs->extra_devices && !devs->flatdev) {
down_read(&devs->rwsem);
@ -232,7 +222,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
if (map->m_pa >= startoff &&
map->m_pa < startoff + length) {
map->m_pa -= startoff;
erofs_fill_from_devinfo(map, dif);
erofs_fill_from_devinfo(map, sb, dif);
break;
}
}
@ -302,7 +292,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
iomap->offset = map.m_la;
if (flags & IOMAP_DAX)
iomap->dax_dev = mdev.m_daxdev;
iomap->dax_dev = mdev.m_dif->dax_dev;
else
iomap->bdev = mdev.m_bdev;
iomap->length = map.m_llen;
@ -331,7 +321,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
iomap->type = IOMAP_MAPPED;
iomap->addr = mdev.m_pa;
if (flags & IOMAP_DAX)
iomap->addr += mdev.m_dax_part_off;
iomap->addr += mdev.m_dif->dax_part_off;
}
return 0;
}

View File

@ -9,6 +9,7 @@ struct erofs_fileio_rq {
struct bio_vec bvecs[BIO_MAX_VECS];
struct bio bio;
struct kiocb iocb;
struct super_block *sb;
};
struct erofs_fileio {
@ -52,8 +53,9 @@ static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
rq->iocb.ki_pos = rq->bio.bi_iter.bi_sector << SECTOR_SHIFT;
rq->iocb.ki_ioprio = get_current_ioprio();
rq->iocb.ki_complete = erofs_fileio_ki_complete;
rq->iocb.ki_flags = (rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT) ?
IOCB_DIRECT : 0;
if (test_opt(&EROFS_SB(rq->sb)->opt, DIRECT_IO) &&
rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT)
rq->iocb.ki_flags = IOCB_DIRECT;
iov_iter_bvec(&iter, ITER_DEST, rq->bvecs, rq->bio.bi_vcnt,
rq->bio.bi_iter.bi_size);
ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter);
@ -67,7 +69,8 @@ static struct erofs_fileio_rq *erofs_fileio_rq_alloc(struct erofs_map_dev *mdev)
GFP_KERNEL | __GFP_NOFAIL);
bio_init(&rq->bio, NULL, rq->bvecs, BIO_MAX_VECS, REQ_OP_READ);
rq->iocb.ki_filp = mdev->m_fp;
rq->iocb.ki_filp = mdev->m_dif->file;
rq->sb = mdev->m_sb;
return rq;
}

View File

@ -198,7 +198,7 @@ struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev)
io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL);
bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ);
io->io.private = mdev->m_fscache->cookie;
io->io.private = mdev->m_dif->fscache->cookie;
io->io.end_io = erofs_fscache_bio_endio;
refcount_set(&io->io.ref, 1);
return &io->bio;
@ -316,7 +316,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
if (!io)
return -ENOMEM;
iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie,
ret = erofs_fscache_read_io_async(mdev.m_dif->fscache->cookie,
mdev.m_pa + (pos - map.m_la), io);
erofs_fscache_req_io_put(io);
@ -657,7 +657,7 @@ int erofs_fscache_register_fs(struct super_block *sb)
if (IS_ERR(fscache))
return PTR_ERR(fscache);
sbi->s_fscache = fscache;
sbi->dif0.fscache = fscache;
return 0;
}
@ -665,14 +665,14 @@ void erofs_fscache_unregister_fs(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
erofs_fscache_unregister_cookie(sbi->s_fscache);
erofs_fscache_unregister_cookie(sbi->dif0.fscache);
if (sbi->domain)
erofs_fscache_domain_put(sbi->domain);
else
fscache_relinquish_volume(sbi->volume, NULL, false);
sbi->s_fscache = NULL;
sbi->dif0.fscache = NULL;
sbi->volume = NULL;
sbi->domain = NULL;
}

View File

@ -107,6 +107,7 @@ struct erofs_xattr_prefix_item {
};
struct erofs_sb_info {
struct erofs_device_info dif0;
struct erofs_mount_opts opt; /* options */
#ifdef CONFIG_EROFS_FS_ZIP
/* list for all registered superblocks, mainly for shrinker */
@ -124,13 +125,9 @@ struct erofs_sb_info {
struct erofs_sb_lz4_info lz4;
#endif /* CONFIG_EROFS_FS_ZIP */
struct file *fdev;
struct inode *packed_inode;
struct erofs_dev_context *devs;
struct dax_device *dax_dev;
u64 dax_part_off;
u64 total_blocks;
u32 primarydevice_blocks;
u32 meta_blkaddr;
#ifdef CONFIG_EROFS_FS_XATTR
@ -166,7 +163,6 @@ struct erofs_sb_info {
/* fscache support */
struct fscache_volume *volume;
struct erofs_fscache *s_fscache;
struct erofs_domain *domain;
char *fsid;
char *domain_id;
@ -180,6 +176,7 @@ struct erofs_sb_info {
#define EROFS_MOUNT_POSIX_ACL 0x00000020
#define EROFS_MOUNT_DAX_ALWAYS 0x00000040
#define EROFS_MOUNT_DAX_NEVER 0x00000080
#define EROFS_MOUNT_DIRECT_IO 0x00000100
#define clear_opt(opt, option) ((opt)->mount_opt &= ~EROFS_MOUNT_##option)
#define set_opt(opt, option) ((opt)->mount_opt |= EROFS_MOUNT_##option)
@ -187,7 +184,7 @@ struct erofs_sb_info {
static inline bool erofs_is_fileio_mode(struct erofs_sb_info *sbi)
{
return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->fdev;
return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->dif0.file;
}
static inline bool erofs_is_fscache_mode(struct super_block *sb)
@ -357,11 +354,9 @@ enum {
};
struct erofs_map_dev {
struct erofs_fscache *m_fscache;
struct super_block *m_sb;
struct erofs_device_info *m_dif;
struct block_device *m_bdev;
struct dax_device *m_daxdev;
struct file *m_fp;
u64 m_dax_part_off;
erofs_off_t m_pa;
unsigned int m_deviceid;

View File

@ -203,7 +203,7 @@ static int erofs_scan_devices(struct super_block *sb,
struct erofs_device_info *dif;
int id, err = 0;
sbi->total_blocks = sbi->primarydevice_blocks;
sbi->total_blocks = sbi->dif0.blocks;
if (!erofs_sb_has_device_table(sbi))
ondisk_extradevs = 0;
else
@ -307,7 +307,7 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->sb_size);
goto out;
}
sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks);
sbi->dif0.blocks = le32_to_cpu(dsb->blocks);
sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
#ifdef CONFIG_EROFS_FS_XATTR
sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
@ -364,14 +364,8 @@ static void erofs_default_options(struct erofs_sb_info *sbi)
}
enum {
Opt_user_xattr,
Opt_acl,
Opt_cache_strategy,
Opt_dax,
Opt_dax_enum,
Opt_device,
Opt_fsid,
Opt_domain_id,
Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum,
Opt_device, Opt_fsid, Opt_domain_id, Opt_directio,
Opt_err
};
@ -398,6 +392,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
fsparam_string("device", Opt_device),
fsparam_string("fsid", Opt_fsid),
fsparam_string("domain_id", Opt_domain_id),
fsparam_flag_no("directio", Opt_directio),
{}
};
@ -511,6 +506,16 @@ static int erofs_fc_parse_param(struct fs_context *fc,
errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
break;
#endif
case Opt_directio:
#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
if (result.boolean)
set_opt(&sbi->opt, DIRECT_IO);
else
clear_opt(&sbi->opt, DIRECT_IO);
#else
errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
#endif
break;
default:
return -ENOPARAM;
}
@ -602,9 +607,8 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
return -EINVAL;
}
sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev,
&sbi->dax_part_off,
NULL, NULL);
sbi->dif0.dax_dev = fs_dax_get_by_bdev(sb->s_bdev,
&sbi->dif0.dax_part_off, NULL, NULL);
}
err = erofs_read_superblock(sb);
@ -627,7 +631,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
}
if (test_opt(&sbi->opt, DAX_ALWAYS)) {
if (!sbi->dax_dev) {
if (!sbi->dif0.dax_dev) {
errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
clear_opt(&sbi->opt, DAX_ALWAYS);
} else if (sbi->blkszbits != PAGE_SHIFT) {
@ -703,16 +707,18 @@ static int erofs_fc_get_tree(struct fs_context *fc)
GET_TREE_BDEV_QUIET_LOOKUP : 0);
#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
if (ret == -ENOTBLK) {
struct file *file;
if (!fc->source)
return invalf(fc, "No source specified");
sbi->fdev = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0);
if (IS_ERR(sbi->fdev))
return PTR_ERR(sbi->fdev);
file = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0);
if (IS_ERR(file))
return PTR_ERR(file);
sbi->dif0.file = file;
if (S_ISREG(file_inode(sbi->fdev)->i_mode) &&
sbi->fdev->f_mapping->a_ops->read_folio)
if (S_ISREG(file_inode(sbi->dif0.file)->i_mode) &&
sbi->dif0.file->f_mapping->a_ops->read_folio)
return get_tree_nodev(fc, erofs_fc_fill_super);
fput(sbi->fdev);
}
#endif
return ret;
@ -763,17 +769,22 @@ static void erofs_free_dev_context(struct erofs_dev_context *devs)
kfree(devs);
}
static void erofs_sb_free(struct erofs_sb_info *sbi)
{
erofs_free_dev_context(sbi->devs);
kfree(sbi->fsid);
kfree(sbi->domain_id);
if (sbi->dif0.file)
fput(sbi->dif0.file);
kfree(sbi);
}
static void erofs_fc_free(struct fs_context *fc)
{
struct erofs_sb_info *sbi = fc->s_fs_info;
if (!sbi)
return;
erofs_free_dev_context(sbi->devs);
kfree(sbi->fsid);
kfree(sbi->domain_id);
kfree(sbi);
if (sbi) /* free here if an error occurs before transferring to sb */
erofs_sb_free(sbi);
}
static const struct fs_context_operations erofs_context_ops = {
@ -809,19 +820,14 @@ static void erofs_kill_sb(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
if ((IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) || sbi->fdev)
if ((IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) ||
sbi->dif0.file)
kill_anon_super(sb);
else
kill_block_super(sb);
erofs_free_dev_context(sbi->devs);
fs_put_dax(sbi->dax_dev, NULL);
fs_put_dax(sbi->dif0.dax_dev, NULL);
erofs_fscache_unregister_fs(sb);
kfree(sbi->fsid);
kfree(sbi->domain_id);
if (sbi->fdev)
fput(sbi->fdev);
kfree(sbi);
erofs_sb_free(sbi);
sb->s_fs_info = NULL;
}
@ -947,6 +953,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",dax=always");
if (test_opt(opt, DAX_NEVER))
seq_puts(seq, ",dax=never");
if (erofs_is_fileio_mode(sbi) && test_opt(opt, DIRECT_IO))
seq_puts(seq, ",directio");
#ifdef CONFIG_EROFS_FS_ONDEMAND
if (sbi->fsid)
seq_printf(seq, ",fsid=%s", sbi->fsid);

View File

@ -1792,9 +1792,9 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
erofs_fscache_submit_bio(bio);
else
submit_bio(bio);
if (memstall)
psi_memstall_leave(&pflags);
}
if (memstall)
psi_memstall_leave(&pflags);
/*
* although background is preferred, no one is pending for submission.

View File

@ -230,9 +230,10 @@ void erofs_shrinker_unregister(struct super_block *sb)
struct erofs_sb_info *const sbi = EROFS_SB(sb);
mutex_lock(&sbi->umount_mutex);
/* clean up all remaining pclusters in memory */
z_erofs_shrink_scan(sbi, ~0UL);
while (!xa_empty(&sbi->managed_pslots)) {
z_erofs_shrink_scan(sbi, ~0UL);
cond_resched();
}
spin_lock(&erofs_sb_list_lock);
list_del(&sbi->list);
spin_unlock(&erofs_sb_list_lock);