linux-next/fs/utimes.c
Linus Torvalds 05e6295f7b fs.idmapped.v6.3
-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCY+5NlQAKCRCRxhvAZXjc
 orOaAP9i2h3OJy95nO2Fpde0Bt2UT+oulKCCcGlvXJ8/+TQpyQD/ZQq47gFQ0EAz
 Br5NxeyGeecAb0lHpFz+CpLGsxMrMwQ=
 =+BG5
 -----END PGP SIGNATURE-----

Merge tag 'fs.idmapped.v6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/idmapping

Pull vfs idmapping updates from Christian Brauner:

 - Last cycle we introduced the dedicated struct mnt_idmap type for
   mount idmapping and the required infrastucture in 256c8aed2b ("fs:
   introduce dedicated idmap type for mounts"). As promised in last
   cycle's pull request message this converts everything to rely on
   struct mnt_idmap.

   Currently we still pass around the plain namespace that was attached
   to a mount. This is in general pretty convenient but it makes it easy
   to conflate namespaces that are relevant on the filesystem with
   namespaces that are relevant on the mount level. Especially for
   non-vfs developers without detailed knowledge in this area this was a
   potential source for bugs.

   This finishes the conversion. Instead of passing the plain namespace
   around this updates all places that currently take a pointer to a
   mnt_userns with a pointer to struct mnt_idmap.

   Now that the conversion is done all helpers down to the really
   low-level helpers only accept a struct mnt_idmap argument instead of
   two namespace arguments.

   Conflating mount and other idmappings will now cause the compiler to
   complain loudly thus eliminating the possibility of any bugs. This
   makes it impossible for filesystem developers to mix up mount and
   filesystem idmappings as they are two distinct types and require
   distinct helpers that cannot be used interchangeably.

   Everything associated with struct mnt_idmap is moved into a single
   separate file. With that change no code can poke around in struct
   mnt_idmap. It can only be interacted with through dedicated helpers.
   That means all filesystems are and all of the vfs is completely
   oblivious to the actual implementation of idmappings.

   We are now also able to extend struct mnt_idmap as we see fit. For
   example, we can decouple it completely from namespaces for users that
   don't require or don't want to use them at all. We can also extend
   the concept of idmappings so we can cover filesystem specific
   requirements.

   In combination with the vfs{g,u}id_t work we finished in v6.2 this
   makes this feature substantially more robust and thus difficult to
   implement wrong by a given filesystem and also protects the vfs.

 - Enable idmapped mounts for tmpfs and fulfill a longstanding request.

   A long-standing request from users had been to make it possible to
   create idmapped mounts for tmpfs. For example, to share the host's
   tmpfs mount between multiple sandboxes. This is a prerequisite for
   some advanced Kubernetes cases. Systemd also has a range of use-cases
   to increase service isolation. And there are more users of this.

   However, with all of the other work going on this was way down on the
   priority list but luckily someone other than ourselves picked this
   up.

   As usual the patch is tiny as all the infrastructure work had been
   done multiple kernel releases ago. In addition to all the tests that
   we already have I requested that Rodrigo add a dedicated tmpfs
   testsuite for idmapped mounts to xfstests. It is to be included into
   xfstests during the v6.3 development cycle. This should add a slew of
   additional tests.

* tag 'fs.idmapped.v6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/idmapping: (26 commits)
  shmem: support idmapped mounts for tmpfs
  fs: move mnt_idmap
  fs: port vfs{g,u}id helpers to mnt_idmap
  fs: port fs{g,u}id helpers to mnt_idmap
  fs: port i_{g,u}id_into_vfs{g,u}id() to mnt_idmap
  fs: port i_{g,u}id_{needs_}update() to mnt_idmap
  quota: port to mnt_idmap
  fs: port privilege checking helpers to mnt_idmap
  fs: port inode_owner_or_capable() to mnt_idmap
  fs: port inode_init_owner() to mnt_idmap
  fs: port acl to mnt_idmap
  fs: port xattr to mnt_idmap
  fs: port ->permission() to pass mnt_idmap
  fs: port ->fileattr_set() to pass mnt_idmap
  fs: port ->set_acl() to pass mnt_idmap
  fs: port ->get_acl() to pass mnt_idmap
  fs: port ->tmpfile() to pass mnt_idmap
  fs: port ->rename() to pass mnt_idmap
  fs: port ->mknod() to pass mnt_idmap
  fs: port ->mkdir() to pass mnt_idmap
  ...
2023-02-20 11:53:11 -08:00

301 lines
7.7 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/utime.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <linux/compat.h>
#include <asm/unistd.h>
#include <linux/filelock.h>
static bool nsec_valid(long nsec)
{
if (nsec == UTIME_OMIT || nsec == UTIME_NOW)
return true;
return nsec >= 0 && nsec <= 999999999;
}
int vfs_utimes(const struct path *path, struct timespec64 *times)
{
int error;
struct iattr newattrs;
struct inode *inode = path->dentry->d_inode;
struct inode *delegated_inode = NULL;
if (times) {
if (!nsec_valid(times[0].tv_nsec) ||
!nsec_valid(times[1].tv_nsec))
return -EINVAL;
if (times[0].tv_nsec == UTIME_NOW &&
times[1].tv_nsec == UTIME_NOW)
times = NULL;
}
error = mnt_want_write(path->mnt);
if (error)
goto out;
newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
if (times) {
if (times[0].tv_nsec == UTIME_OMIT)
newattrs.ia_valid &= ~ATTR_ATIME;
else if (times[0].tv_nsec != UTIME_NOW) {
newattrs.ia_atime = times[0];
newattrs.ia_valid |= ATTR_ATIME_SET;
}
if (times[1].tv_nsec == UTIME_OMIT)
newattrs.ia_valid &= ~ATTR_MTIME;
else if (times[1].tv_nsec != UTIME_NOW) {
newattrs.ia_mtime = times[1];
newattrs.ia_valid |= ATTR_MTIME_SET;
}
/*
* Tell setattr_prepare(), that this is an explicit time
* update, even if neither ATTR_ATIME_SET nor ATTR_MTIME_SET
* were used.
*/
newattrs.ia_valid |= ATTR_TIMES_SET;
} else {
newattrs.ia_valid |= ATTR_TOUCH;
}
retry_deleg:
inode_lock(inode);
error = notify_change(mnt_idmap(path->mnt), path->dentry, &newattrs,
&delegated_inode);
inode_unlock(inode);
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
if (!error)
goto retry_deleg;
}
mnt_drop_write(path->mnt);
out:
return error;
}
static int do_utimes_path(int dfd, const char __user *filename,
struct timespec64 *times, int flags)
{
struct path path;
int lookup_flags = 0, error;
if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
return -EINVAL;
if (!(flags & AT_SYMLINK_NOFOLLOW))
lookup_flags |= LOOKUP_FOLLOW;
if (flags & AT_EMPTY_PATH)
lookup_flags |= LOOKUP_EMPTY;
retry:
error = user_path_at(dfd, filename, lookup_flags, &path);
if (error)
return error;
error = vfs_utimes(&path, times);
path_put(&path);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
return error;
}
static int do_utimes_fd(int fd, struct timespec64 *times, int flags)
{
struct fd f;
int error;
if (flags)
return -EINVAL;
f = fdget(fd);
if (!f.file)
return -EBADF;
error = vfs_utimes(&f.file->f_path, times);
fdput(f);
return error;
}
/*
* do_utimes - change times on filename or file descriptor
* @dfd: open file descriptor, -1 or AT_FDCWD
* @filename: path name or NULL
* @times: new times or NULL
* @flags: zero or more flags (only AT_SYMLINK_NOFOLLOW for the moment)
*
* If filename is NULL and dfd refers to an open file, then operate on
* the file. Otherwise look up filename, possibly using dfd as a
* starting point.
*
* If times==NULL, set access and modification to current time,
* must be owner or have write permission.
* Else, update from *times, must be owner or super user.
*/
long do_utimes(int dfd, const char __user *filename, struct timespec64 *times,
int flags)
{
if (filename == NULL && dfd != AT_FDCWD)
return do_utimes_fd(dfd, times, flags);
return do_utimes_path(dfd, filename, times, flags);
}
SYSCALL_DEFINE4(utimensat, int, dfd, const char __user *, filename,
struct __kernel_timespec __user *, utimes, int, flags)
{
struct timespec64 tstimes[2];
if (utimes) {
if ((get_timespec64(&tstimes[0], &utimes[0]) ||
get_timespec64(&tstimes[1], &utimes[1])))
return -EFAULT;
/* Nothing to do, we must not even check the path. */
if (tstimes[0].tv_nsec == UTIME_OMIT &&
tstimes[1].tv_nsec == UTIME_OMIT)
return 0;
}
return do_utimes(dfd, filename, utimes ? tstimes : NULL, flags);
}
#ifdef __ARCH_WANT_SYS_UTIME
/*
* futimesat(), utimes() and utime() are older versions of utimensat()
* that are provided for compatibility with traditional C libraries.
* On modern architectures, we always use libc wrappers around
* utimensat() instead.
*/
static long do_futimesat(int dfd, const char __user *filename,
struct __kernel_old_timeval __user *utimes)
{
struct __kernel_old_timeval times[2];
struct timespec64 tstimes[2];
if (utimes) {
if (copy_from_user(&times, utimes, sizeof(times)))
return -EFAULT;
/* This test is needed to catch all invalid values. If we
would test only in do_utimes we would miss those invalid
values truncated by the multiplication with 1000. Note
that we also catch UTIME_{NOW,OMIT} here which are only
valid for utimensat. */
if (times[0].tv_usec >= 1000000 || times[0].tv_usec < 0 ||
times[1].tv_usec >= 1000000 || times[1].tv_usec < 0)
return -EINVAL;
tstimes[0].tv_sec = times[0].tv_sec;
tstimes[0].tv_nsec = 1000 * times[0].tv_usec;
tstimes[1].tv_sec = times[1].tv_sec;
tstimes[1].tv_nsec = 1000 * times[1].tv_usec;
}
return do_utimes(dfd, filename, utimes ? tstimes : NULL, 0);
}
SYSCALL_DEFINE3(futimesat, int, dfd, const char __user *, filename,
struct __kernel_old_timeval __user *, utimes)
{
return do_futimesat(dfd, filename, utimes);
}
SYSCALL_DEFINE2(utimes, char __user *, filename,
struct __kernel_old_timeval __user *, utimes)
{
return do_futimesat(AT_FDCWD, filename, utimes);
}
SYSCALL_DEFINE2(utime, char __user *, filename, struct utimbuf __user *, times)
{
struct timespec64 tv[2];
if (times) {
if (get_user(tv[0].tv_sec, &times->actime) ||
get_user(tv[1].tv_sec, &times->modtime))
return -EFAULT;
tv[0].tv_nsec = 0;
tv[1].tv_nsec = 0;
}
return do_utimes(AT_FDCWD, filename, times ? tv : NULL, 0);
}
#endif
#ifdef CONFIG_COMPAT_32BIT_TIME
/*
* Not all architectures have sys_utime, so implement this in terms
* of sys_utimes.
*/
#ifdef __ARCH_WANT_SYS_UTIME32
SYSCALL_DEFINE2(utime32, const char __user *, filename,
struct old_utimbuf32 __user *, t)
{
struct timespec64 tv[2];
if (t) {
if (get_user(tv[0].tv_sec, &t->actime) ||
get_user(tv[1].tv_sec, &t->modtime))
return -EFAULT;
tv[0].tv_nsec = 0;
tv[1].tv_nsec = 0;
}
return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0);
}
#endif
SYSCALL_DEFINE4(utimensat_time32, unsigned int, dfd, const char __user *, filename, struct old_timespec32 __user *, t, int, flags)
{
struct timespec64 tv[2];
if (t) {
if (get_old_timespec32(&tv[0], &t[0]) ||
get_old_timespec32(&tv[1], &t[1]))
return -EFAULT;
if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT)
return 0;
}
return do_utimes(dfd, filename, t ? tv : NULL, flags);
}
#ifdef __ARCH_WANT_SYS_UTIME32
static long do_compat_futimesat(unsigned int dfd, const char __user *filename,
struct old_timeval32 __user *t)
{
struct timespec64 tv[2];
if (t) {
if (get_user(tv[0].tv_sec, &t[0].tv_sec) ||
get_user(tv[0].tv_nsec, &t[0].tv_usec) ||
get_user(tv[1].tv_sec, &t[1].tv_sec) ||
get_user(tv[1].tv_nsec, &t[1].tv_usec))
return -EFAULT;
if (tv[0].tv_nsec >= 1000000 || tv[0].tv_nsec < 0 ||
tv[1].tv_nsec >= 1000000 || tv[1].tv_nsec < 0)
return -EINVAL;
tv[0].tv_nsec *= 1000;
tv[1].tv_nsec *= 1000;
}
return do_utimes(dfd, filename, t ? tv : NULL, 0);
}
SYSCALL_DEFINE3(futimesat_time32, unsigned int, dfd,
const char __user *, filename,
struct old_timeval32 __user *, t)
{
return do_compat_futimesat(dfd, filename, t);
}
SYSCALL_DEFINE2(utimes_time32, const char __user *, filename, struct old_timeval32 __user *, t)
{
return do_compat_futimesat(AT_FDCWD, filename, t);
}
#endif
#endif