From 7059a9aa4b6b8c6daf257a3978a4d8c476c29a96 Mon Sep 17 00:00:00 2001 From: Changcheng Liu Date: Sun, 12 Mar 2023 17:25:22 +0800 Subject: [PATCH 01/13] eventpoll: align comment with nested epoll limitation fix comment in commit 02edc6fc4d5f ("epoll: comment the funky #ifdef") Signed-off-by: Liu, Changcheng Signed-off-by: Christian Brauner (Microsoft) --- fs/eventpoll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 64659b110973..f6d25050dd7a 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -483,8 +483,8 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) * (efd1) notices that it may have some event ready, so it needs to wake up * the waiters on its poll wait list (efd2). So it calls ep_poll_safewake() * that ends up in another wake_up(), after having checked about the - * recursion constraints. That are, no more than EP_MAX_POLLWAKE_NESTS, to - * avoid stack blasting. + * recursion constraints. That are, no more than EP_MAX_NESTS, to avoid + * stack blasting. * * When CONFIG_DEBUG_LOCK_ALLOC is enabled, make sure lockdep can handle * this special case of epoll. From 3e27877ac565807135209933e0894d8c522cc520 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 15:12:03 -0800 Subject: [PATCH 02/13] devpts: simplify two-level sysctl registration for pty_kern_table There is no need to declare two tables to just create directories, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Signed-off-by: Luis Chamberlain Signed-off-by: Christian Brauner (Microsoft) --- fs/devpts/inode.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 4f25015aa534..fe3db0eda8e4 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -72,24 +72,6 @@ static struct ctl_table pty_table[] = { {} }; -static struct ctl_table pty_kern_table[] = { - { - .procname = "pty", - .mode = 0555, - .child = pty_table, - }, - {} -}; - -static struct ctl_table pty_root_table[] = { - { - .procname = "kernel", - .mode = 0555, - .child = pty_kern_table, - }, - {} -}; - struct pts_mount_opts { int setuid; int setgid; @@ -630,7 +612,7 @@ static int __init init_devpts_fs(void) { int err = register_filesystem(&devpts_fs_type); if (!err) { - register_sysctl_table(pty_root_table); + register_sysctl("kernel/pty", pty_table); } return err; } From 5d3ca5968480758a29a0b2777da9049a7c5134e3 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 6 Mar 2023 15:11:42 +0100 Subject: [PATCH 03/13] Documentation: update idmappings.rst Quite a lot has changed over the last few kernel releases with the introduction of vfs{g,u}id_t and struct mnt_idmap. Update the documentation accordingly. Cc: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) --- Documentation/filesystems/idmappings.rst | 178 ++++++++++++++++------- 1 file changed, 125 insertions(+), 53 deletions(-) diff --git a/Documentation/filesystems/idmappings.rst b/Documentation/filesystems/idmappings.rst index b9b31066aef2..ad6d21640576 100644 --- a/Documentation/filesystems/idmappings.rst +++ b/Documentation/filesystems/idmappings.rst @@ -241,7 +241,7 @@ according to the filesystem's idmapping as this would give the wrong owner if the caller is using an idmapping. So the kernel will map the id back up in the idmapping of the caller. Let's -assume the caller has the slighly unconventional idmapping +assume the caller has the somewhat unconventional idmapping ``u3000:k20000:r10000`` then ``k21000`` would map back up to ``u4000``. Consequently the user would see that this file is owned by ``u4000``. @@ -320,6 +320,10 @@ and equally wrong:: from_kuid(u20000:k0:r10000, u1000) = k21000 ~~~~~ +Since userspace ids have type ``uid_t`` and ``gid_t`` and kernel ids have type +``kuid_t`` and ``kgid_t`` the compiler will throw an error when they are +conflated. So the two examples above would cause a compilation failure. + Idmappings when creating filesystem objects ------------------------------------------- @@ -623,42 +627,105 @@ privileged users in the initial user namespace. However, it is perfectly possible to combine idmapped mounts with filesystems mountable inside user namespaces. We will touch on this further below. +Filesystem types vs idmapped mount types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +With the introduction of idmapped mounts we need to distinguish between +filesystem ownership and mount ownership of a VFS object such as an inode. The +owner of a inode might be different when looked at from a filesystem +perspective than when looked at from an idmapped mount. Such fundamental +conceptual distinctions should almost always be clearly expressed in the code. +So, to distinguish idmapped mount ownership from filesystem ownership separate +types have been introduced. + +If a uid or gid has been generated using the filesystem or caller's idmapping +then we will use the ``kuid_t`` and ``kgid_t`` types. However, if a uid or gid +has been generated using a mount idmapping then we will be using the dedicated +``vfsuid_t`` and ``vfsgid_t`` types. + +All VFS helpers that generate or take uids and gids as arguments use the +``vfsuid_t`` and ``vfsgid_t`` types and we will be able to rely on the compiler +to catch errors that originate from conflating filesystem and VFS uids and gids. + +The ``vfsuid_t`` and ``vfsgid_t`` types are often mapped from and to ``kuid_t`` +and ``kgid_t`` types similar how ``kuid_t`` and ``kgid_t`` types are mapped +from and to ``uid_t`` and ``gid_t`` types:: + + uid_t <--> kuid_t <--> vfsuid_t + gid_t <--> kgid_t <--> vfsgid_t + +Whenever we report ownership based on a ``vfsuid_t`` or ``vfsgid_t`` type, +e.g., during ``stat()``, or store ownership information in a shared VFS object +based on a ``vfsuid_t`` or ``vfsgid_t`` type, e.g., during ``chown()`` we can +use the ``vfsuid_into_kuid()`` and ``vfsgid_into_kgid()`` helpers. + +To illustrate why this helper currently exists, consider what happens when we +change ownership of an inode from an idmapped mount. After we generated +a ``vfsuid_t`` or ``vfsgid_t`` based on the mount idmapping we later commit to +this ``vfsuid_t`` or ``vfsgid_t`` to become the new filesytem wide ownership. +Thus, we are turning the ``vfsuid_t`` or ``vfsgid_t`` into a global ``kuid_t`` +or ``kgid_t``. And this can be done by using ``vfsuid_into_kuid()`` and +``vfsgid_into_kgid()``. + +Note, whenever a shared VFS object, e.g., a cached ``struct inode`` or a cached +``struct posix_acl``, stores ownership information a filesystem or "global" +``kuid_t`` and ``kgid_t`` must be used. Ownership expressed via ``vfsuid_t`` +and ``vfsgid_t`` is specific to an idmapped mount. + +We already noted that ``vfsuid_t`` and ``vfsgid_t`` types are generated based +on mount idmappings whereas ``kuid_t`` and ``kgid_t`` types are generated based +on filesystem idmappings. To prevent abusing filesystem idmappings to generate +``vfsuid_t`` or ``vfsgid_t`` types or mount idmappings to generate ``kuid_t`` +or ``kgid_t`` types filesystem idmappings and mount idmappings are different +types as well. + +All helpers that map to or from ``vfsuid_t`` and ``vfsgid_t`` types require +a mount idmapping to be passed which is of type ``struct mnt_idmap``. Passing +a filesystem or caller idmapping will cause a compilation error. + +Similar to how we prefix all userspace ids in this document with ``u`` and all +kernel ids with ``k`` we will prefix all VFS ids with ``v``. So a mount +idmapping will be written as: ``u0:v10000:r10000``. + Remapping helpers ~~~~~~~~~~~~~~~~~ Idmapping functions were added that translate between idmappings. They make use -of the remapping algorithm we've introduced earlier. We're going to look at -two: +of the remapping algorithm we've introduced earlier. We're going to look at: -- ``i_uid_into_mnt()`` and ``i_gid_into_mnt()`` +- ``i_uid_into_vfsuid()`` and ``i_gid_into_vfsgid()`` - The ``i_*id_into_mnt()`` functions translate filesystem's kernel ids into - kernel ids in the mount's idmapping:: + The ``i_*id_into_vfs*id()`` functions translate filesystem's kernel ids into + VFS ids in the mount's idmapping:: /* Map the filesystem's kernel id up into a userspace id in the filesystem's idmapping. */ from_kuid(filesystem, kid) = uid - /* Map the filesystem's userspace id down ito a kernel id in the mount's idmapping. */ + /* Map the filesystem's userspace id down ito a VFS id in the mount's idmapping. */ make_kuid(mount, uid) = kuid - ``mapped_fsuid()`` and ``mapped_fsgid()`` The ``mapped_fs*id()`` functions translate the caller's kernel ids into kernel ids in the filesystem's idmapping. This translation is achieved by - remapping the caller's kernel ids using the mount's idmapping:: + remapping the caller's VFS ids using the mount's idmapping:: - /* Map the caller's kernel id up into a userspace id in the mount's idmapping. */ + /* Map the caller's VFS id up into a userspace id in the mount's idmapping. */ from_kuid(mount, kid) = uid /* Map the mount's userspace id down into a kernel id in the filesystem's idmapping. */ make_kuid(filesystem, uid) = kuid +- ``vfsuid_into_kuid()`` and ``vfsgid_into_kgid()`` + + Whenever + Note that these two functions invert each other. Consider the following idmappings:: caller idmapping: u0:k10000:r10000 filesystem idmapping: u0:k20000:r10000 - mount idmapping: u0:k10000:r10000 + mount idmapping: u0:v10000:r10000 Assume a file owned by ``u1000`` is read from disk. The filesystem maps this id to ``k21000`` according to its idmapping. This is what is stored in the @@ -669,20 +736,21 @@ would usually simply use the crossmapping algorithm and map the filesystem's kernel id up to a userspace id in the caller's idmapping. But when the caller is accessing the file on an idmapped mount the kernel will -first call ``i_uid_into_mnt()`` thereby translating the filesystem's kernel id -into a kernel id in the mount's idmapping:: +first call ``i_uid_into_vfsuid()`` thereby translating the filesystem's kernel +id into a VFS id in the mount's idmapping:: - i_uid_into_mnt(k21000): + i_uid_into_vfsuid(k21000): /* Map the filesystem's kernel id up into a userspace id. */ from_kuid(u0:k20000:r10000, k21000) = u1000 - /* Map the filesystem's userspace id down ito a kernel id in the mount's idmapping. */ - make_kuid(u0:k10000:r10000, u1000) = k11000 + /* Map the filesystem's userspace id down into a VFS id in the mount's idmapping. */ + make_kuid(u0:v10000:r10000, u1000) = v11000 Finally, when the kernel reports the owner to the caller it will turn the -kernel id in the mount's idmapping into a userspace id in the caller's +VFS id in the mount's idmapping into a userspace id in the caller's idmapping:: + k11000 = vfsuid_into_kuid(v11000) from_kuid(u0:k10000:r10000, k11000) = u1000 We can test whether this algorithm really works by verifying what happens when @@ -696,18 +764,19 @@ fails. But when the caller is accessing the file on an idmapped mount the kernel will first call ``mapped_fs*id()`` thereby translating the caller's kernel id into -a kernel id according to the mount's idmapping:: +a VFS id according to the mount's idmapping:: mapped_fsuid(k11000): /* Map the caller's kernel id up into a userspace id in the mount's idmapping. */ from_kuid(u0:k10000:r10000, k11000) = u1000 /* Map the mount's userspace id down into a kernel id in the filesystem's idmapping. */ - make_kuid(u0:k20000:r10000, u1000) = k21000 + make_kuid(u0:v20000:r10000, u1000) = v21000 -When finally writing to disk the kernel will then map ``k21000`` up into a +When finally writing to disk the kernel will then map ``v21000`` up into a userspace id in the filesystem's idmapping:: + k21000 = vfsuid_into_kuid(v21000) from_kuid(u0:k20000:r10000, k21000) = u1000 As we can see, we end up with an invertible and therefore information @@ -725,7 +794,7 @@ Example 2 reconsidered caller id: u1000 caller idmapping: u0:k10000:r10000 filesystem idmapping: u0:k20000:r10000 - mount idmapping: u0:k10000:r10000 + mount idmapping: u0:v10000:r10000 When the caller is using a non-initial idmapping the common case is to attach the same idmapping to the mount. We now perform three steps: @@ -734,12 +803,12 @@ the same idmapping to the mount. We now perform three steps: make_kuid(u0:k10000:r10000, u1000) = k11000 -2. Translate the caller's kernel id into a kernel id in the filesystem's +2. Translate the caller's VFS id into a kernel id in the filesystem's idmapping:: - mapped_fsuid(k11000): - /* Map the kernel id up into a userspace id in the mount's idmapping. */ - from_kuid(u0:k10000:r10000, k11000) = u1000 + mapped_fsuid(v11000): + /* Map the VFS id up into a userspace id in the mount's idmapping. */ + from_kuid(u0:v10000:r10000, v11000) = u1000 /* Map the userspace id down into a kernel id in the filesystem's idmapping. */ make_kuid(u0:k20000:r10000, u1000) = k21000 @@ -759,7 +828,7 @@ Example 3 reconsidered caller id: u1000 caller idmapping: u0:k10000:r10000 filesystem idmapping: u0:k0:r4294967295 - mount idmapping: u0:k10000:r10000 + mount idmapping: u0:v10000:r10000 The same translation algorithm works with the third example. @@ -767,12 +836,12 @@ The same translation algorithm works with the third example. make_kuid(u0:k10000:r10000, u1000) = k11000 -2. Translate the caller's kernel id into a kernel id in the filesystem's +2. Translate the caller's VFS id into a kernel id in the filesystem's idmapping:: - mapped_fsuid(k11000): - /* Map the kernel id up into a userspace id in the mount's idmapping. */ - from_kuid(u0:k10000:r10000, k11000) = u1000 + mapped_fsuid(v11000): + /* Map the VFS id up into a userspace id in the mount's idmapping. */ + from_kuid(u0:v10000:r10000, v11000) = u1000 /* Map the userspace id down into a kernel id in the filesystem's idmapping. */ make_kuid(u0:k0:r4294967295, u1000) = k1000 @@ -792,7 +861,7 @@ Example 4 reconsidered file id: u1000 caller idmapping: u0:k10000:r10000 filesystem idmapping: u0:k0:r4294967295 - mount idmapping: u0:k10000:r10000 + mount idmapping: u0:v10000:r10000 In order to report ownership to userspace the kernel now does three steps using the translation algorithm we introduced earlier: @@ -802,17 +871,18 @@ the translation algorithm we introduced earlier: make_kuid(u0:k0:r4294967295, u1000) = k1000 -2. Translate the kernel id into a kernel id in the mount's idmapping:: +2. Translate the kernel id into a VFS id in the mount's idmapping:: - i_uid_into_mnt(k1000): + i_uid_into_vfsuid(k1000): /* Map the kernel id up into a userspace id in the filesystem's idmapping. */ from_kuid(u0:k0:r4294967295, k1000) = u1000 - /* Map the userspace id down into a kernel id in the mounts's idmapping. */ - make_kuid(u0:k10000:r10000, u1000) = k11000 + /* Map the userspace id down into a VFS id in the mounts's idmapping. */ + make_kuid(u0:v10000:r10000, u1000) = v11000 -3. Map the kernel id up into a userspace id in the caller's idmapping:: +3. Map the VFS id up into a userspace id in the caller's idmapping:: + k11000 = vfsuid_into_kuid(v11000) from_kuid(u0:k10000:r10000, k11000) = u1000 Earlier, the caller's kernel id couldn't be crossmapped in the filesystems's @@ -828,7 +898,7 @@ Example 5 reconsidered file id: u1000 caller idmapping: u0:k10000:r10000 filesystem idmapping: u0:k20000:r10000 - mount idmapping: u0:k10000:r10000 + mount idmapping: u0:v10000:r10000 Again, in order to report ownership to userspace the kernel now does three steps using the translation algorithm we introduced earlier: @@ -838,17 +908,18 @@ steps using the translation algorithm we introduced earlier: make_kuid(u0:k20000:r10000, u1000) = k21000 -2. Translate the kernel id into a kernel id in the mount's idmapping:: +2. Translate the kernel id into a VFS id in the mount's idmapping:: - i_uid_into_mnt(k21000): + i_uid_into_vfsuid(k21000): /* Map the kernel id up into a userspace id in the filesystem's idmapping. */ from_kuid(u0:k20000:r10000, k21000) = u1000 - /* Map the userspace id down into a kernel id in the mounts's idmapping. */ - make_kuid(u0:k10000:r10000, u1000) = k11000 + /* Map the userspace id down into a VFS id in the mounts's idmapping. */ + make_kuid(u0:v10000:r10000, u1000) = v11000 -3. Map the kernel id up into a userspace id in the caller's idmapping:: +3. Map the VFS id up into a userspace id in the caller's idmapping:: + k11000 = vfsuid_into_kuid(v11000) from_kuid(u0:k10000:r10000, k11000) = u1000 Earlier, the file's kernel id couldn't be crossmapped in the filesystems's @@ -899,23 +970,23 @@ from above::: caller id: u1125 caller idmapping: u0:k0:r4294967295 filesystem idmapping: u0:k0:r4294967295 - mount idmapping: u1000:k1125:r1 + mount idmapping: u1000:v1125:r1 1. Map the caller's userspace ids into kernel ids in the caller's idmapping:: make_kuid(u0:k0:r4294967295, u1125) = k1125 -2. Translate the caller's kernel id into a kernel id in the filesystem's +2. Translate the caller's VFS id into a kernel id in the filesystem's idmapping:: - mapped_fsuid(k1125): - /* Map the kernel id up into a userspace id in the mount's idmapping. */ - from_kuid(u1000:k1125:r1, k1125) = u1000 + mapped_fsuid(v1125): + /* Map the VFS id up into a userspace id in the mount's idmapping. */ + from_kuid(u1000:v1125:r1, v1125) = u1000 /* Map the userspace id down into a kernel id in the filesystem's idmapping. */ make_kuid(u0:k0:r4294967295, u1000) = k1000 -2. Verify that the caller's kernel ids can be mapped to userspace ids in the +2. Verify that the caller's filesystem ids can be mapped to userspace ids in the filesystem's idmapping:: from_kuid(u0:k0:r4294967295, k1000) = u1000 @@ -930,24 +1001,25 @@ on their work computer: file id: u1000 caller idmapping: u0:k0:r4294967295 filesystem idmapping: u0:k0:r4294967295 - mount idmapping: u1000:k1125:r1 + mount idmapping: u1000:v1125:r1 1. Map the userspace id on disk down into a kernel id in the filesystem's idmapping:: make_kuid(u0:k0:r4294967295, u1000) = k1000 -2. Translate the kernel id into a kernel id in the mount's idmapping:: +2. Translate the kernel id into a VFS id in the mount's idmapping:: - i_uid_into_mnt(k1000): + i_uid_into_vfsuid(k1000): /* Map the kernel id up into a userspace id in the filesystem's idmapping. */ from_kuid(u0:k0:r4294967295, k1000) = u1000 - /* Map the userspace id down into a kernel id in the mounts's idmapping. */ - make_kuid(u1000:k1125:r1, u1000) = k1125 + /* Map the userspace id down into a VFS id in the mounts's idmapping. */ + make_kuid(u1000:v1125:r1, u1000) = v1125 -3. Map the kernel id up into a userspace id in the caller's idmapping:: +3. Map the VFS id up into a userspace id in the caller's idmapping:: + k1125 = vfsuid_into_kuid(v1125) from_kuid(u0:k0:r4294967295, k1125) = u1125 So ultimately the caller will be reported that the file belongs to ``u1125`` From 74e60b8b2f0fe3702710e648a31725ee8224dbdf Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 14 Mar 2023 17:09:06 +0200 Subject: [PATCH 04/13] fs/namespace: fnic: Switch to use %ptTd Use %ptTd instead of open-coded variant to print contents of time64_t type in human readable form. Signed-off-by: Andy Shevchenko Signed-off-by: Christian Brauner (Microsoft) --- fs/namespace.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index bc0f15257b49..a6e4c3a3179f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2617,15 +2617,12 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount * (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) { char *buf = (char *)__get_free_page(GFP_KERNEL); char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM); - struct tm tm; - time64_to_tm(sb->s_time_max, 0, &tm); - - pr_warn("%s filesystem being %s at %s supports timestamps until %04ld (0x%llx)\n", + pr_warn("%s filesystem being %s at %s supports timestamps until %ptTd (0x%llx)\n", sb->s_type->name, is_mounted(mnt) ? "remounted" : "mounted", - mntpath, - tm.tm_year+1900, (unsigned long long)sb->s_time_max); + mntpath, &sb->s_time_max, + (unsigned long long)sb->s_time_max); free_page((unsigned long)buf); sb->s_iflags |= SB_I_TS_EXPIRY_WARNED; From 4e04143c869c5b6d499fbd5083caa860d5c942c3 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Thu, 16 Mar 2023 14:07:51 +0100 Subject: [PATCH 05/13] fs_context: drop the unused lsm_flags member This isn't ever used by VFS now, and it couldn't even work. Any FS that uses the SECURITY_LSM_NATIVE_LABELS flag needs to also process the value returned back from the LSM, so it needs to do its security_sb_set_mnt_opts() call on its own anyway. Signed-off-by: Ondrej Mosnacek Signed-off-by: Christian Brauner (Microsoft) --- Documentation/filesystems/mount_api.rst | 1 - fs/nfs/super.c | 3 --- include/linux/fs_context.h | 1 - include/linux/security.h | 2 +- 4 files changed, 1 insertion(+), 6 deletions(-) diff --git a/Documentation/filesystems/mount_api.rst b/Documentation/filesystems/mount_api.rst index 63204d2094fd..9aaf6ef75eb5 100644 --- a/Documentation/filesystems/mount_api.rst +++ b/Documentation/filesystems/mount_api.rst @@ -79,7 +79,6 @@ context. This is represented by the fs_context structure:: unsigned int sb_flags; unsigned int sb_flags_mask; unsigned int s_iflags; - unsigned int lsm_flags; enum fs_context_purpose purpose:8; ... }; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 05ae23657527..397c096d874e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1274,9 +1274,6 @@ int nfs_get_tree_common(struct fs_context *fc) if (ctx->clone_data.sb->s_flags & SB_SYNCHRONOUS) fc->sb_flags |= SB_SYNCHRONOUS; - if (server->caps & NFS_CAP_SECURITY_LABEL) - fc->lsm_flags |= SECURITY_LSM_NATIVE_LABELS; - /* Get a superblock - note that we may end up sharing one that already exists */ fc->s_fs_info = server; s = sget_fc(fc, compare_super, nfs_set_super); diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 5469ffee21c7..ff6341e09925 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -104,7 +104,6 @@ struct fs_context { unsigned int sb_flags; /* Proposed superblock flags (SB_*) */ unsigned int sb_flags_mask; /* Superblock flags that were changed */ unsigned int s_iflags; /* OR'd with sb->s_iflags */ - unsigned int lsm_flags; /* Information flags from the fs to the LSM */ enum fs_context_purpose purpose:8; enum fs_context_phase phase:8; /* The phase the context is in */ bool need_free:1; /* Need to call ops->free() */ diff --git a/include/linux/security.h b/include/linux/security.h index 5984d0d550b4..db9b659b02b9 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -68,7 +68,7 @@ struct watch_notification; /* If capable is being called by a setid function */ #define CAP_OPT_INSETID BIT(2) -/* LSM Agnostic defines for fs_context::lsm_flags */ +/* LSM Agnostic defines for security_sb_set_mnt_opts() flags */ #define SECURITY_LSM_NATIVE_LABELS 1 struct ctl_table; From dc7cb2d29805fe4fa4000fc0b09740fc24c93408 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 23 Mar 2023 10:32:59 +0800 Subject: [PATCH 06/13] fs/buffer: Remove redundant assignment to err Variable 'err' set but not used. fs/buffer.c:2613:2: warning: Value stored to 'err' is never read. Reported-by: Abaci Robot Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4589 Signed-off-by: Jiapeng Chong Signed-off-by: Christian Brauner --- fs/buffer.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 9e1e2add541e..10390f53f3f5 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2581,7 +2581,7 @@ int block_truncate_page(struct address_space *mapping, struct inode *inode = mapping->host; struct page *page; struct buffer_head *bh; - int err; + int err = 0; blocksize = i_blocksize(inode); length = offset & (blocksize - 1); @@ -2594,9 +2594,8 @@ int block_truncate_page(struct address_space *mapping, iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits); page = grab_cache_page(mapping, index); - err = -ENOMEM; if (!page) - goto out; + return -ENOMEM; if (!page_has_buffers(page)) create_empty_buffers(page, blocksize, 0); @@ -2610,7 +2609,6 @@ int block_truncate_page(struct address_space *mapping, pos += blocksize; } - err = 0; if (!buffer_mapped(bh)) { WARN_ON(bh->b_size != blocksize); err = get_block(inode, iblock, bh, 0); @@ -2634,12 +2632,11 @@ int block_truncate_page(struct address_space *mapping, zero_user(page, offset, length); mark_buffer_dirty(bh); - err = 0; unlock: unlock_page(page); put_page(page); -out: + return err; } EXPORT_SYMBOL(block_truncate_page); From d98ffa1aca264ce547b9135135f83d81cfe4345f Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Sat, 25 Mar 2023 09:22:32 +0100 Subject: [PATCH 07/13] Update relatime comments to include equality relatime also updates atime if the previous atime is equal to one or both of the ctime and mtime; a non-strict interpretation of "earlier than" and "younger than" in the comments allows this, but for clarity, this makes it explicit. Pointed out by "epiii2" and "ctrl-alt-delor" in https://unix.stackexchange.com/q/740862/86440. Signed-off-by: Stephen Kitt Signed-off-by: Christian Brauner --- fs/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 4558dc2f1355..3ec5a8f7b644 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1804,8 +1804,8 @@ EXPORT_SYMBOL(bmap); /* * With relative atime, only update atime if the previous atime is - * earlier than either the ctime or mtime or if at least a day has - * passed since the last atime update. + * earlier than or equal to either the ctime or mtime, + * or if at least a day has passed since the last atime update. */ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, struct timespec64 now) @@ -1814,12 +1814,12 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, if (!(mnt->mnt_flags & MNT_RELATIME)) return 1; /* - * Is mtime younger than atime? If yes, update atime: + * Is mtime younger than or equal to atime? If yes, update atime: */ if (timespec64_compare(&inode->i_mtime, &inode->i_atime) >= 0) return 1; /* - * Is ctime younger than atime? If yes, update atime: + * Is ctime younger than or equal to atime? If yes, update atime: */ if (timespec64_compare(&inode->i_ctime, &inode->i_atime) >= 0) return 1; From 4f704d9a8352f5c0a8fcdb6213b934630342bd44 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 14 Mar 2023 12:51:10 +0100 Subject: [PATCH 08/13] nfs: use vfs setgid helper We've aligned setgid behavior over multiple kernel releases. The details can be found in the following two merge messages: cf619f891971 ("Merge tag 'fs.ovl.setgid.v6.2') 426b4ca2d6a5 ("Merge tag 'fs.setgid.v6.0') Consistent setgid stripping behavior is now encapsulated in the setattr_should_drop_sgid() helper which is used by all filesystems that strip setgid bits outside of vfs proper. Switch nfs to rely on this helper as well. Without this patch the setgid stripping tests in xfstests will fail. Signed-off-by: Christian Brauner (Microsoft) Reviewed-by: Christoph Hellwig Message-Id: <20230313-fs-nfs-setgid-v2-1-9a59f436cfc0@kernel.org> Signed-off-by: Christian Brauner --- fs/attr.c | 1 + fs/internal.h | 2 -- fs/nfs/inode.c | 4 +--- include/linux/fs.h | 2 ++ 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/attr.c b/fs/attr.c index aca9ff7aed33..d60dc1edb526 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -47,6 +47,7 @@ int setattr_should_drop_sgid(struct mnt_idmap *idmap, return ATTR_KILL_SGID; return 0; } +EXPORT_SYMBOL(setattr_should_drop_sgid); /** * setattr_should_drop_suidgid - determine whether the set{g,u}id bit needs to diff --git a/fs/internal.h b/fs/internal.h index dc4eb91a577a..ab36ed8fa41c 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -259,8 +259,6 @@ ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *po /* * fs/attr.c */ -int setattr_should_drop_sgid(struct mnt_idmap *idmap, - const struct inode *inode); struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns); struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap); void mnt_idmap_put(struct mnt_idmap *idmap); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 222a28320e1c..97a76706fd54 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -717,9 +717,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, if ((attr->ia_valid & ATTR_KILL_SUID) != 0 && inode->i_mode & S_ISUID) inode->i_mode &= ~S_ISUID; - if ((attr->ia_valid & ATTR_KILL_SGID) != 0 && - (inode->i_mode & (S_ISGID | S_IXGRP)) == - (S_ISGID | S_IXGRP)) + if (setattr_should_drop_sgid(&nop_mnt_idmap, inode)) inode->i_mode &= ~S_ISGID; if ((attr->ia_valid & ATTR_MODE) != 0) { int mode = attr->ia_mode & S_IALLUGO; diff --git a/include/linux/fs.h b/include/linux/fs.h index c85916e9f7db..af95b64fc810 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2675,6 +2675,8 @@ extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *); extern int file_remove_privs(struct file *); +int setattr_should_drop_sgid(struct mnt_idmap *idmap, + const struct inode *inode); /* * This must be used for allocating filesystems specific inodes to set From 364595a6851bf64e1c38224ae68f5dd6651906d1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Mar 2023 06:41:43 -0400 Subject: [PATCH 09/13] fs: consolidate duplicate dt_type helpers There are three copies of the same dt_type helper sprinkled around the tree. Convert them to use the common fs_umode_to_dtype function instead, which has the added advantage of properly returning DT_UNKNOWN when given a mode that contains an unrecognized type. Cc: Chuck Lever Cc: Phillip Potter Suggested-by: Christian Brauner Signed-off-by: Jeff Layton Acked-by: Greg Kroah-Hartman Reviewed-by: Christian Brauner Message-Id: <20230330104144.75547-1-jlayton@kernel.org> Signed-off-by: Christian Brauner --- fs/configfs/dir.c | 9 ++------- fs/kernfs/dir.c | 8 +------- fs/libfs.c | 9 ++------- 3 files changed, 5 insertions(+), 21 deletions(-) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 4afcbbe63e68..18677cd4e62f 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1599,12 +1599,6 @@ static int configfs_dir_close(struct inode *inode, struct file *file) return 0; } -/* Relationship between s_mode and the DT_xxx types */ -static inline unsigned char dt_type(struct configfs_dirent *sd) -{ - return (sd->s_mode >> 12) & 15; -} - static int configfs_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dentry = file->f_path.dentry; @@ -1654,7 +1648,8 @@ static int configfs_readdir(struct file *file, struct dir_context *ctx) name = configfs_get_name(next); len = strlen(name); - if (!dir_emit(ctx, name, len, ino, dt_type(next))) + if (!dir_emit(ctx, name, len, ino, + fs_umode_to_dtype(next->s_mode))) return 0; spin_lock(&configfs_dirent_lock); diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index ef00b5fe8cee..90de0e498371 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1748,12 +1748,6 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, return error; } -/* Relationship between mode and the DT_xxx types */ -static inline unsigned char dt_type(struct kernfs_node *kn) -{ - return (kn->mode >> 12) & 15; -} - static int kernfs_dir_fop_release(struct inode *inode, struct file *filp) { kernfs_put(filp->private_data); @@ -1831,7 +1825,7 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) pos; pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) { const char *name = pos->name; - unsigned int type = dt_type(pos); + unsigned int type = fs_umode_to_dtype(pos->mode); int len = strlen(name); ino_t ino = kernfs_ino(pos); diff --git a/fs/libfs.c b/fs/libfs.c index 4eda519c3002..89cf614a3271 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -174,12 +174,6 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) } EXPORT_SYMBOL(dcache_dir_lseek); -/* Relationship between i_mode and the DT_xxx types */ -static inline unsigned char dt_type(struct inode *inode) -{ - return (inode->i_mode >> 12) & 15; -} - /* * Directory is locked and all positive dentries in it are safe, since * for ramfs-type trees they can't go away without unlink() or rmdir(), @@ -206,7 +200,8 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) while ((next = scan_positives(cursor, p, 1, next)) != NULL) { if (!dir_emit(ctx, next->d_name.name, next->d_name.len, - d_inode(next)->i_ino, dt_type(d_inode(next)))) + d_inode(next)->i_ino, + fs_umode_to_dtype(d_inode(next)->i_mode))) break; ctx->pos++; p = &next->d_child; From 983652c691990b3257a07f67f4263eb847baa82d Mon Sep 17 00:00:00 2001 From: Chung-Chiang Cheng Date: Wed, 22 Mar 2023 14:25:19 +0800 Subject: [PATCH 10/13] splice: report related fsnotify events The fsnotify ACCESS and MODIFY event are missing when manipulating a file with splice(2). Signed-off-by: Chung-Chiang Cheng Reviewed-by: Amir Goldstein Acked-by: Jan Kara Message-Id: <20230322062519.409752-1-cccheng@synology.com> Signed-off-by: Christian Brauner --- fs/splice.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/splice.c b/fs/splice.c index 2c3dec2b6dfa..0af8d150394f 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -1165,6 +1166,9 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out, ret = do_splice_from(ipipe, out, &offset, len, flags); file_end_write(out); + if (ret > 0) + fsnotify_modify(out); + if (!off_out) out->f_pos = offset; else @@ -1188,6 +1192,10 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out, flags |= SPLICE_F_NONBLOCK; ret = splice_file_to_pipe(in, opipe, &offset, len, flags); + + if (ret > 0) + fsnotify_access(in); + if (!off_in) in->f_pos = offset; else From 113348a44b8622b497fb884f41c8659481ad0b04 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Thu, 6 Apr 2023 03:20:02 +0800 Subject: [PATCH 11/13] eventfd: use wait_event_interruptible_locked_irq() helper wait_event_interruptible_locked_irq was introduced by commit 22c43c81a51e ("wait_event_interruptible_locked() interface"), but older code such as eventfd_{write,read} still uses the open code implementation. Inspired by commit 8120a8aadb20 ("fs/timerfd.c: make use of wait_event_interruptible_locked_irq()"), this patch replaces the open code implementation with a single macro call. No functional change intended. Signed-off-by: Wen Yang Reviewed-by: Eric Biggers Reviewed-by: Jens Axboe Cc: Alexander Viro Cc: Christoph Hellwig Cc: Dylan Yudaken Cc: Jens Axboe Cc: David Woodhouse Cc: Fu Wei Cc: Paolo Bonzini Cc: Michal Nazarewicz Cc: Matthew Wilcox Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org Message-Id: Signed-off-by: Christian Brauner --- fs/eventfd.c | 41 +++++++---------------------------------- 1 file changed, 7 insertions(+), 34 deletions(-) diff --git a/fs/eventfd.c b/fs/eventfd.c index 249ca6c0b784..95850a13ce8d 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -228,7 +228,6 @@ static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *to) struct file *file = iocb->ki_filp; struct eventfd_ctx *ctx = file->private_data; __u64 ucnt = 0; - DECLARE_WAITQUEUE(wait, current); if (iov_iter_count(to) < sizeof(ucnt)) return -EINVAL; @@ -239,23 +238,11 @@ static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *to) spin_unlock_irq(&ctx->wqh.lock); return -EAGAIN; } - __add_wait_queue(&ctx->wqh, &wait); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (ctx->count) - break; - if (signal_pending(current)) { - __remove_wait_queue(&ctx->wqh, &wait); - __set_current_state(TASK_RUNNING); - spin_unlock_irq(&ctx->wqh.lock); - return -ERESTARTSYS; - } + + if (wait_event_interruptible_locked_irq(ctx->wqh, ctx->count)) { spin_unlock_irq(&ctx->wqh.lock); - schedule(); - spin_lock_irq(&ctx->wqh.lock); + return -ERESTARTSYS; } - __remove_wait_queue(&ctx->wqh, &wait); - __set_current_state(TASK_RUNNING); } eventfd_ctx_do_read(ctx, &ucnt); current->in_eventfd = 1; @@ -275,7 +262,6 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c struct eventfd_ctx *ctx = file->private_data; ssize_t res; __u64 ucnt; - DECLARE_WAITQUEUE(wait, current); if (count < sizeof(ucnt)) return -EINVAL; @@ -288,23 +274,10 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c if (ULLONG_MAX - ctx->count > ucnt) res = sizeof(ucnt); else if (!(file->f_flags & O_NONBLOCK)) { - __add_wait_queue(&ctx->wqh, &wait); - for (res = 0;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (ULLONG_MAX - ctx->count > ucnt) { - res = sizeof(ucnt); - break; - } - if (signal_pending(current)) { - res = -ERESTARTSYS; - break; - } - spin_unlock_irq(&ctx->wqh.lock); - schedule(); - spin_lock_irq(&ctx->wqh.lock); - } - __remove_wait_queue(&ctx->wqh, &wait); - __set_current_state(TASK_RUNNING); + res = wait_event_interruptible_locked_irq(ctx->wqh, + ULLONG_MAX - ctx->count > ucnt); + if (!res) + res = sizeof(ucnt); } if (likely(res > 0)) { ctx->count += ucnt; From 4ea2a8d84c75e20b4d9b5d9010879cdb89f2e384 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 Mar 2023 18:13:07 +0200 Subject: [PATCH 12/13] pnode: pass mountpoint directly Currently, we use a global variable to stash the destination mountpoint. All global variables are changed in propagate_one(). The mountpoint variable is one of the few which doesn't change after initialization. Instead, just pass the destination mountpoint directly making it easy to verify directly in propagate_mnt() that the destination mountpoint never changes. Reviewed-by: Seth Forshee (DigitalOcean) Message-Id: <20230202-fs-move-mount-replace-v2-2-f53cd31d6392@kernel.org> Signed-off-by: Christian Brauner --- fs/pnode.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/pnode.c b/fs/pnode.c index 468e4e65a615..3cede8b18c8b 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -214,7 +214,6 @@ static struct mount *next_group(struct mount *m, struct mount *origin) /* all accesses are serialized by namespace_sem */ static struct mount *last_dest, *first_source, *last_source, *dest_master; -static struct mountpoint *mp; static struct hlist_head *list; static inline bool peers(struct mount *m1, struct mount *m2) @@ -222,7 +221,7 @@ static inline bool peers(struct mount *m1, struct mount *m2) return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id; } -static int propagate_one(struct mount *m) +static int propagate_one(struct mount *m, struct mountpoint *dest_mp) { struct mount *child; int type; @@ -230,7 +229,7 @@ static int propagate_one(struct mount *m) if (IS_MNT_NEW(m)) return 0; /* skip if mountpoint isn't covered by it */ - if (!is_subdir(mp->m_dentry, m->mnt.mnt_root)) + if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) return 0; if (peers(m, last_dest)) { type = CL_MAKE_SHARED; @@ -262,7 +261,7 @@ static int propagate_one(struct mount *m) if (IS_ERR(child)) return PTR_ERR(child); read_seqlock_excl(&mount_lock); - mnt_set_mountpoint(m, mp, child); + mnt_set_mountpoint(m, dest_mp, child); if (m->mnt_master != dest_master) SET_MNT_MARK(m->mnt_master); read_sequnlock_excl(&mount_lock); @@ -299,13 +298,12 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, last_dest = dest_mnt; first_source = source_mnt; last_source = source_mnt; - mp = dest_mp; list = tree_list; dest_master = dest_mnt->mnt_master; /* all peers of dest_mnt, except dest_mnt itself */ for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) { - ret = propagate_one(n); + ret = propagate_one(n, dest_mp); if (ret) goto out; } @@ -316,7 +314,7 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, /* everything in that slave group */ n = m; do { - ret = propagate_one(n); + ret = propagate_one(n, dest_mp); if (ret) goto out; n = next_peer(n); From 81b21c0f0138ff5a499eafc3eb0578ad2a99622c Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 11 Apr 2023 19:57:33 +0900 Subject: [PATCH 13/13] fs: hfsplus: remove WARN_ON() from hfsplus_cat_{read,write}_inode() syzbot is hitting WARN_ON() in hfsplus_cat_{read,write}_inode(), for crafted filesystem image can contain bogus length. There conditions are not kernel bugs that can justify kernel to panic. Reported-by: syzbot Link: https://syzkaller.appspot.com/bug?extid=e2787430e752a92b8750 Reported-by: syzbot Link: https://syzkaller.appspot.com/bug?extid=4913dca2ea6e4d43f3f1 Signed-off-by: Tetsuo Handa Reviewed-by: Viacheslav Dubeyko Message-Id: <15308173-5252-d6a3-ae3b-e96d46cb6f41@I-love.SAKURA.ne.jp> Signed-off-by: Christian Brauner --- fs/hfsplus/inode.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index abb91f5fae92..b21660475ac1 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -511,7 +511,11 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) if (type == HFSPLUS_FOLDER) { struct hfsplus_cat_folder *folder = &entry.folder; - WARN_ON(fd->entrylength < sizeof(struct hfsplus_cat_folder)); + if (fd->entrylength < sizeof(struct hfsplus_cat_folder)) { + pr_err("bad catalog folder entry\n"); + res = -EIO; + goto out; + } hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, sizeof(struct hfsplus_cat_folder)); hfsplus_get_perms(inode, &folder->permissions, 1); @@ -531,7 +535,11 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) } else if (type == HFSPLUS_FILE) { struct hfsplus_cat_file *file = &entry.file; - WARN_ON(fd->entrylength < sizeof(struct hfsplus_cat_file)); + if (fd->entrylength < sizeof(struct hfsplus_cat_file)) { + pr_err("bad catalog file entry\n"); + res = -EIO; + goto out; + } hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, sizeof(struct hfsplus_cat_file)); @@ -562,6 +570,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) pr_err("bad catalog entry used to create inode\n"); res = -EIO; } +out: return res; } @@ -570,6 +579,7 @@ int hfsplus_cat_write_inode(struct inode *inode) struct inode *main_inode = inode; struct hfs_find_data fd; hfsplus_cat_entry entry; + int res = 0; if (HFSPLUS_IS_RSRC(inode)) main_inode = HFSPLUS_I(inode)->rsrc_inode; @@ -588,7 +598,11 @@ int hfsplus_cat_write_inode(struct inode *inode) if (S_ISDIR(main_inode->i_mode)) { struct hfsplus_cat_folder *folder = &entry.folder; - WARN_ON(fd.entrylength < sizeof(struct hfsplus_cat_folder)); + if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) { + pr_err("bad catalog folder entry\n"); + res = -EIO; + goto out; + } hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_folder)); /* simple node checks? */ @@ -613,7 +627,11 @@ int hfsplus_cat_write_inode(struct inode *inode) } else { struct hfsplus_cat_file *file = &entry.file; - WARN_ON(fd.entrylength < sizeof(struct hfsplus_cat_file)); + if (fd.entrylength < sizeof(struct hfsplus_cat_file)) { + pr_err("bad catalog file entry\n"); + res = -EIO; + goto out; + } hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_file)); hfsplus_inode_write_fork(inode, &file->data_fork); @@ -634,7 +652,7 @@ int hfsplus_cat_write_inode(struct inode *inode) set_bit(HFSPLUS_I_CAT_DIRTY, &HFSPLUS_I(inode)->flags); out: hfs_find_exit(&fd); - return 0; + return res; } int hfsplus_fileattr_get(struct dentry *dentry, struct fileattr *fa)