diff --git a/fs/file.c b/fs/file.c index fb1011cf6b4a..25c6e53b03f8 100644 --- a/fs/file.c +++ b/fs/file.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "internal.h" diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 3bee9b5dba5e..fe09c2093a93 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -349,11 +349,13 @@ static int hfs_fill_super(struct super_block *sb, struct fs_context *fc) goto bail_no_root; res = hfs_cat_find_brec(sb, HFS_ROOT_CNID, &fd); if (!res) { - if (fd.entrylength > sizeof(rec) || fd.entrylength < 0) { + if (fd.entrylength != sizeof(rec.dir)) { res = -EIO; goto bail_hfs_find; } hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, fd.entrylength); + if (rec.type != HFS_CDR_DIR) + res = -EIO; } if (res) goto bail_hfs_find; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 955f19e27e47..54dc27d92781 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1774,7 +1774,8 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos) */ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct writeback_control *wbc, struct folio *folio, - struct inode *inode, loff_t pos, unsigned len) + struct inode *inode, loff_t pos, loff_t end_pos, + unsigned len) { struct iomap_folio_state *ifs = folio->private; size_t poff = offset_in_folio(folio, pos); @@ -1793,15 +1794,60 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, if (ifs) atomic_add(len, &ifs->write_bytes_pending); + + /* + * Clamp io_offset and io_size to the incore EOF so that ondisk + * file size updates in the ioend completion are byte-accurate. + * This avoids recovering files with zeroed tail regions when + * writeback races with appending writes: + * + * Thread 1: Thread 2: + * ------------ ----------- + * write [A, A+B] + * update inode size to A+B + * submit I/O [A, A+BS] + * write [A+B, A+B+C] + * update inode size to A+B+C + * + * + * + * After reboot: + * 1) with A+B+C < A+BS, the file has zero padding in range + * [A+B, A+B+C] + * + * |< Block Size (BS) >| + * |DDDDDDDDDDDD0000000000000| + * ^ ^ ^ + * A A+B A+B+C + * (EOF) + * + * 2) with A+B+C > A+BS, the file has zero padding in range + * [A+B, A+BS] + * + * |< Block Size (BS) >|< Block Size (BS) >| + * |DDDDDDDDDDDD0000000000000|00000000000000000000000000| + * ^ ^ ^ ^ + * A A+B A+BS A+B+C + * (EOF) + * + * D = Valid Data + * 0 = Zero Padding + * + * Note that this defeats the ability to chain the ioends of + * appending writes. + */ wpc->ioend->io_size += len; + if (wpc->ioend->io_offset + wpc->ioend->io_size > end_pos) + wpc->ioend->io_size = end_pos - wpc->ioend->io_offset; + wbc_account_cgroup_owner(wbc, folio, len); return 0; } static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc, struct writeback_control *wbc, struct folio *folio, - struct inode *inode, u64 pos, unsigned dirty_len, - unsigned *count) + struct inode *inode, u64 pos, u64 end_pos, + unsigned dirty_len, unsigned *count) { int error; @@ -1826,7 +1872,7 @@ static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc, break; default: error = iomap_add_to_ioend(wpc, wbc, folio, inode, pos, - map_len); + end_pos, map_len); if (!error) (*count)++; break; @@ -1897,11 +1943,11 @@ static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode, * remaining memory is zeroed when mapped, and writes to that * region are not written out to the file. * - * Also adjust the writeback range to skip all blocks entirely - * beyond i_size. + * Also adjust the end_pos to the end of file and skip writeback + * for all blocks entirely beyond i_size. */ folio_zero_segment(folio, poff, folio_size(folio)); - *end_pos = round_up(isize, i_blocksize(inode)); + *end_pos = isize; } return true; @@ -1914,6 +1960,7 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, struct inode *inode = folio->mapping->host; u64 pos = folio_pos(folio); u64 end_pos = pos + folio_size(folio); + u64 end_aligned = 0; unsigned count = 0; int error = 0; u32 rlen; @@ -1955,9 +2002,10 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, /* * Walk through the folio to find dirty areas to write back. */ - while ((rlen = iomap_find_dirty_range(folio, &pos, end_pos))) { + end_aligned = round_up(end_pos, i_blocksize(inode)); + while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) { error = iomap_writepage_map_blocks(wpc, wbc, folio, inode, - pos, rlen, &count); + pos, end_pos, rlen, &count); if (error) break; pos += rlen; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 9153ff3a08e7..e8e80761ac73 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -772,9 +772,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) /* * If the journal is not located on the file system device, * then we must flush the file system device before we issue - * the commit record + * the commit record and update the journal tail sequence. */ - if (commit_transaction->t_need_data_flush && + if ((commit_transaction->t_need_data_flush || update_tail) && (journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) blkdev_issue_flush(journal->j_fs_dev); diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 4556e4689024..ce63d5fde9c3 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -654,7 +654,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(descriptor); BUFFER_TRACE(descriptor, "write"); set_buffer_dirty(descriptor); - write_dirty_buffer(descriptor, REQ_SYNC); + write_dirty_buffer(descriptor, JBD2_JOURNAL_REQ_FLAGS); } #endif diff --git a/fs/namespace.c b/fs/namespace.c index 23e81c2a1e3f..6eec7794f707 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2055,9 +2055,15 @@ SYSCALL_DEFINE1(oldumount, char __user *, name) static bool is_mnt_ns_file(struct dentry *dentry) { + struct ns_common *ns; + /* Is this a proxy for a mount namespace? */ - return dentry->d_op == &ns_dentry_operations && - dentry->d_fsdata == &mntns_operations; + if (dentry->d_op != &ns_dentry_operations) + return false; + + ns = d_inode(dentry)->i_private; + + return ns->ops == &mntns_operations; } struct ns_common *from_mnt_ns(struct mnt_namespace *mnt) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index eacafe46e3b6..aa4712362b3b 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -40,24 +40,15 @@ #define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) -static void expkey_put_work(struct work_struct *work) +static void expkey_put(struct kref *ref) { - struct svc_expkey *key = - container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work); + struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); if (test_bit(CACHE_VALID, &key->h.flags) && !test_bit(CACHE_NEGATIVE, &key->h.flags)) path_put(&key->ek_path); auth_domain_put(key->ek_client); - kfree(key); -} - -static void expkey_put(struct kref *ref) -{ - struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); - - INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work); - queue_rcu_work(system_wq, &key->ek_rcu_work); + kfree_rcu(key, ek_rcu); } static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) @@ -364,26 +355,16 @@ static void export_stats_destroy(struct export_stats *stats) EXP_STATS_COUNTERS_NUM); } -static void svc_export_put_work(struct work_struct *work) +static void svc_export_put(struct kref *ref) { - struct svc_export *exp = - container_of(to_rcu_work(work), struct svc_export, ex_rcu_work); - + struct svc_export *exp = container_of(ref, struct svc_export, h.ref); path_put(&exp->ex_path); auth_domain_put(exp->ex_client); nfsd4_fslocs_free(&exp->ex_fslocs); export_stats_destroy(exp->ex_stats); kfree(exp->ex_stats); kfree(exp->ex_uuid); - kfree(exp); -} - -static void svc_export_put(struct kref *ref) -{ - struct svc_export *exp = container_of(ref, struct svc_export, h.ref); - - INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work); - queue_rcu_work(system_wq, &exp->ex_rcu_work); + kfree_rcu(exp, ex_rcu); } static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index 6f2fbaae01fa..4d92b99c1ffd 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -75,7 +75,7 @@ struct svc_export { u32 ex_layout_types; struct nfsd4_deviceid_map *ex_devid_map; struct cache_detail *cd; - struct rcu_work ex_rcu_work; + struct rcu_head ex_rcu; unsigned long ex_xprtsec_modes; struct export_stats *ex_stats; }; @@ -92,7 +92,7 @@ struct svc_expkey { u32 ek_fsid[6]; struct path ek_path; - struct rcu_work ek_rcu_work; + struct rcu_head ek_rcu; }; #define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC)) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f8a10f90bc7a..ad44ad49274f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1347,7 +1347,6 @@ static void nfs4_put_copy(struct nfsd4_copy *copy) { if (!refcount_dec_and_test(©->refcount)) return; - atomic_dec(©->cp_nn->pending_async_copies); kfree(copy->cp_src); kfree(copy); } @@ -1870,6 +1869,7 @@ static int nfsd4_do_async_copy(void *data) set_bit(NFSD4_COPY_F_COMPLETED, ©->cp_flags); trace_nfsd_copy_async_done(copy); nfsd4_send_cb_offload(copy); + atomic_dec(©->cp_nn->pending_async_copies); return 0; } @@ -1927,19 +1927,19 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* Arbitrary cap on number of pending async copy operations */ if (atomic_inc_return(&nn->pending_async_copies) > (int)rqstp->rq_pool->sp_nrthreads) - goto out_err; + goto out_dec_async_copy_err; async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL); if (!async_copy->cp_src) - goto out_err; + goto out_dec_async_copy_err; if (!nfs4_init_copy_state(nn, copy)) - goto out_err; + goto out_dec_async_copy_err; memcpy(&result->cb_stateid, ©->cp_stateid.cs_stid, sizeof(result->cb_stateid)); dup_copy_fields(copy, async_copy); async_copy->copy_task = kthread_create(nfsd4_do_async_copy, async_copy, "%s", "copy thread"); if (IS_ERR(async_copy->copy_task)) - goto out_err; + goto out_dec_async_copy_err; spin_lock(&async_copy->cp_clp->async_lock); list_add(&async_copy->copies, &async_copy->cp_clp->async_copies); @@ -1954,6 +1954,9 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, trace_nfsd_copy_done(copy, status); release_copy_files(copy); return status; +out_dec_async_copy_err: + if (async_copy) + atomic_dec(&nn->pending_async_copies); out_err: if (nfsd4_ssc_is_inter(copy)) { /* diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 85925ec0051a..3310d1ad4d0e 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -179,8 +179,7 @@ static int qnx6_statfs(struct dentry *dentry, struct kstatfs *buf) */ static const char *qnx6_checkroot(struct super_block *s) { - static char match_root[2][3] = {".\0\0", "..\0"}; - int i, error = 0; + int error = 0; struct qnx6_dir_entry *dir_entry; struct inode *root = d_inode(s->s_root); struct address_space *mapping = root->i_mapping; @@ -189,11 +188,9 @@ static const char *qnx6_checkroot(struct super_block *s) if (IS_ERR(folio)) return "error reading root directory"; dir_entry = kmap_local_folio(folio, 0); - for (i = 0; i < 2; i++) { - /* maximum 3 bytes - due to match_root limitation */ - if (strncmp(dir_entry[i].de_fname, match_root[i], 3)) - error = 1; - } + if (memcmp(dir_entry[0].de_fname, ".", 2) || + memcmp(dir_entry[1].de_fname, "..", 3)) + error = 1; folio_release_kmap(folio, dir_entry); if (error) return "error reading root directory."; diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 5675af6b740c..75bf54e76f3b 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -335,7 +335,7 @@ struct iomap_ioend { u16 io_type; u16 io_flags; /* IOMAP_F_* */ struct inode *io_inode; /* file being written to */ - size_t io_size; /* size of the extent */ + size_t io_size; /* size of data within eof */ loff_t io_offset; /* offset in the file */ sector_t io_sector; /* start sector of ioend */ struct bio io_bio; /* MUST BE LAST! */