Merge branch 'vfs-6.14.misc' into vfs.all

This commit is contained in:
Christian Brauner 2024-12-17 21:41:49 +01:00
commit 4554288d75
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2
22 changed files with 480 additions and 184 deletions

View File

@ -12,21 +12,10 @@ returns a list of extents.
Request Basics
--------------
A fiemap request is encoded within struct fiemap::
struct fiemap {
__u64 fm_start; /* logical offset (inclusive) at
* which to start mapping (in) */
__u64 fm_length; /* logical length of mapping which
* userspace cares about (in) */
__u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
__u32 fm_mapped_extents; /* number of extents that were
* mapped (out) */
__u32 fm_extent_count; /* size of fm_extents array (in) */
__u32 fm_reserved;
struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
};
A fiemap request is encoded within struct fiemap:
.. kernel-doc:: include/uapi/linux/fiemap.h
:identifiers: fiemap
fm_start, and fm_length specify the logical range within the file
which the process would like mappings for. Extents returned mirror
@ -60,6 +49,8 @@ FIEMAP_FLAG_XATTR
If this flag is set, the extents returned will describe the inodes
extended attribute lookup tree, instead of its data tree.
FIEMAP_FLAG_CACHE
This flag requests caching of the extents.
Extent Mapping
--------------
@ -77,18 +68,10 @@ complete the requested range and will not have the FIEMAP_EXTENT_LAST
flag set (see the next section on extent flags).
Each extent is described by a single fiemap_extent structure as
returned in fm_extents::
returned in fm_extents:
struct fiemap_extent {
__u64 fe_logical; /* logical offset in bytes for the start of
* the extent */
__u64 fe_physical; /* physical offset in bytes for the start
* of the extent */
__u64 fe_length; /* length in bytes for the extent */
__u64 fe_reserved64[2];
__u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
__u32 fe_reserved[3];
};
.. kernel-doc:: include/uapi/linux/fiemap.h
:identifiers: fiemap_extent
All offsets and lengths are in bytes and mirror those on disk. It is valid
for an extents logical offset to start before the request or its logical
@ -175,6 +158,8 @@ FIEMAP_EXTENT_MERGED
userspace would be highly inefficient, the kernel will try to merge most
adjacent blocks into 'extents'.
FIEMAP_EXTENT_SHARED
This flag is set to request that space be shared with other files.
VFS -> File System Implementation
---------------------------------
@ -191,14 +176,10 @@ each discovered extent::
u64 len);
->fiemap is passed struct fiemap_extent_info which describes the
fiemap request::
fiemap request:
struct fiemap_extent_info {
unsigned int fi_flags; /* Flags as passed from user */
unsigned int fi_extents_mapped; /* Number of mapped extents */
unsigned int fi_extents_max; /* Size of fiemap_extent array */
struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent array */
};
.. kernel-doc:: include/linux/fiemap.h
:identifiers: fiemap_extent_info
It is intended that the file system should not need to access any of this
structure directly. Filesystem handlers should be tolerant to signals and return

View File

@ -5006,10 +5006,11 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
if (IS_ENCRYPTED(inode)) {
inode->i_op = &ext4_encrypted_symlink_inode_operations;
} else if (ext4_inode_is_fast_symlink(inode)) {
inode->i_link = (char *)ei->i_data;
inode->i_op = &ext4_fast_symlink_inode_operations;
nd_terminate_link(ei->i_data, inode->i_size,
sizeof(ei->i_data) - 1);
inode_set_cached_link(inode, (char *)ei->i_data,
inode->i_size);
} else {
inode->i_op = &ext4_symlink_inode_operations;
}

View File

@ -3418,7 +3418,6 @@ static int ext4_symlink(struct mnt_idmap *idmap, struct inode *dir,
inode->i_op = &ext4_symlink_inode_operations;
} else {
inode->i_op = &ext4_fast_symlink_inode_operations;
inode->i_link = (char *)&EXT4_I(inode)->i_data;
}
}
@ -3434,6 +3433,9 @@ static int ext4_symlink(struct mnt_idmap *idmap, struct inode *dir,
disk_link.len);
inode->i_size = disk_link.len - 1;
EXT4_I(inode)->i_disksize = inode->i_size;
if (!IS_ENCRYPTED(inode))
inode_set_cached_link(inode, (char *)&EXT4_I(inode)->i_data,
inode->i_size);
}
err = ext4_add_nondir(handle, dentry, &inode);
if (handle)

View File

@ -279,10 +279,6 @@ static int expand_files(struct files_struct *files, unsigned int nr)
if (nr < fdt->max_fds)
return 0;
/* Can we expand? */
if (nr >= sysctl_nr_open)
return -EMFILE;
if (unlikely(files->resize_in_progress)) {
spin_unlock(&files->file_lock);
wait_event(files->resize_wait, !files->resize_in_progress);
@ -290,6 +286,10 @@ static int expand_files(struct files_struct *files, unsigned int nr)
goto repeat;
}
/* Can we expand? */
if (unlikely(nr >= sysctl_nr_open))
return -EMFILE;
/* All good, so we try */
files->resize_in_progress = true;
error = expand_fdtable(files, nr);
@ -1231,17 +1231,9 @@ __releases(&files->file_lock)
/*
* We need to detect attempts to do dup2() over allocated but still
* not finished descriptor. NB: OpenBSD avoids that at the price of
* extra work in their equivalent of fget() - they insert struct
* file immediately after grabbing descriptor, mark it larval if
* more work (e.g. actual opening) is needed and make sure that
* fget() treats larval files as absent. Potentially interesting,
* but while extra work in fget() is trivial, locking implications
* and amount of surgery on open()-related paths in VFS are not.
* FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
* deadlocks in rather amusing ways, AFAICS. All of that is out of
* scope of POSIX or SUS, since neither considers shared descriptor
* tables and this condition does not arise without those.
* not finished descriptor.
*
* POSIX is silent on the issue, we return -EBUSY.
*/
fdt = files_fdtable(files);
fd = array_index_nospec(fd, fdt->max_fds);

View File

@ -128,7 +128,7 @@ static struct ctl_table fs_stat_sysctls[] = {
.data = &sysctl_nr_open,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.proc_handler = proc_douintvec_minmax,
.extra1 = &sysctl_nr_open_min,
.extra2 = &sysctl_nr_open_max,
},
@ -478,6 +478,8 @@ static void ____fput(struct callback_head *work)
__fput(container_of(work, struct file, f_task_work));
}
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
/*
* If kernel thread really needs to have the final fput() it has done
* to complete, call this. The only user right now is the boot - we
@ -491,11 +493,10 @@ static void ____fput(struct callback_head *work)
void flush_delayed_fput(void)
{
delayed_fput(NULL);
flush_delayed_work(&delayed_fput_work);
}
EXPORT_SYMBOL_GPL(flush_delayed_fput);
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
void fput(struct file *file)
{
if (file_ref_put(&file->f_ref)) {

View File

@ -493,7 +493,7 @@ static void put_fc_log(struct fs_context *fc)
if (log) {
if (refcount_dec_and_test(&log->usage)) {
fc->log.log = NULL;
for (i = 0; i <= 7; i++)
for (i = 0; i < ARRAY_SIZE(log->buffer) ; i++)
if (log->need_free & (1 << i))
kfree(log->buffer[i]);
kfree(log);

View File

@ -245,9 +245,17 @@ const struct inode_operations simple_dir_inode_operations = {
};
EXPORT_SYMBOL(simple_dir_inode_operations);
/* 0 is '.', 1 is '..', so always start with offset 2 or more */
/* simple_offset_add() allocation range */
enum {
DIR_OFFSET_MIN = 2,
DIR_OFFSET_MIN = 3,
DIR_OFFSET_MAX = LONG_MAX - 1,
};
/* simple_offset_add() never assigns these to a dentry */
enum {
DIR_OFFSET_FIRST = 2, /* Find first real entry */
DIR_OFFSET_EOD = LONG_MAX, /* Marks EOD */
};
static void offset_set(struct dentry *dentry, long offset)
@ -291,8 +299,11 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
return -EBUSY;
ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN,
LONG_MAX, &octx->next_offset, GFP_KERNEL);
if (ret < 0)
DIR_OFFSET_MAX, &octx->next_offset,
GFP_KERNEL);
if (unlikely(ret == -EBUSY))
return -ENOSPC;
if (unlikely(ret < 0))
return ret;
offset_set(dentry, offset);
@ -329,38 +340,6 @@ void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
offset_set(dentry, 0);
}
/**
* simple_offset_empty - Check if a dentry can be unlinked
* @dentry: dentry to be tested
*
* Returns 0 if @dentry is a non-empty directory; otherwise returns 1.
*/
int simple_offset_empty(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
struct offset_ctx *octx;
struct dentry *child;
unsigned long index;
int ret = 1;
if (!inode || !S_ISDIR(inode->i_mode))
return ret;
index = DIR_OFFSET_MIN;
octx = inode->i_op->get_offset_ctx(inode);
mt_for_each(&octx->mt, child, index, LONG_MAX) {
spin_lock(&child->d_lock);
if (simple_positive(child)) {
spin_unlock(&child->d_lock);
ret = 0;
break;
}
spin_unlock(&child->d_lock);
}
return ret;
}
/**
* simple_offset_rename - handle directory offsets for rename
* @old_dir: parent directory of source entry
@ -454,14 +433,6 @@ void simple_offset_destroy(struct offset_ctx *octx)
mtree_destroy(&octx->mt);
}
static int offset_dir_open(struct inode *inode, struct file *file)
{
struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode);
file->private_data = (void *)ctx->next_offset;
return 0;
}
/**
* offset_dir_llseek - Advance the read position of a directory descriptor
* @file: an open directory whose position is to be updated
@ -475,9 +446,6 @@ static int offset_dir_open(struct inode *inode, struct file *file)
*/
static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_inode;
struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode);
switch (whence) {
case SEEK_CUR:
offset += file->f_pos;
@ -490,25 +458,46 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
return -EINVAL;
}
/* In this case, ->private_data is protected by f_pos_lock */
if (!offset)
file->private_data = (void *)ctx->next_offset;
return vfs_setpos(file, offset, LONG_MAX);
}
static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
/* Cf. find_next_child() */
static struct dentry *find_next_sibling_locked(struct dentry *parent,
struct dentry *dentry)
{
MA_STATE(mas, &octx->mt, offset, offset);
struct dentry *found = NULL;
hlist_for_each_entry_from(dentry, d_sib) {
if (!simple_positive(dentry))
continue;
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
if (simple_positive(dentry))
found = dget_dlock(dentry);
spin_unlock(&dentry->d_lock);
if (likely(found))
break;
}
return found;
}
static noinline_for_stack struct dentry *
offset_dir_lookup(struct file *file, loff_t offset)
{
struct dentry *parent = file->f_path.dentry;
struct dentry *child, *found = NULL;
struct inode *inode = d_inode(parent);
struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
MA_STATE(mas, &octx->mt, offset, offset);
rcu_read_lock();
child = mas_find(&mas, LONG_MAX);
child = mas_find(&mas, DIR_OFFSET_MAX);
if (!child)
goto out;
spin_lock(&child->d_lock);
if (simple_positive(child))
found = dget_dlock(child);
spin_unlock(&child->d_lock);
spin_lock(&parent->d_lock);
found = find_next_sibling_locked(parent, child);
spin_unlock(&parent->d_lock);
out:
rcu_read_unlock();
return found;
@ -517,35 +506,46 @@ static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
long offset = dentry2offset(dentry);
return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
inode->i_ino, fs_umode_to_dtype(inode->i_mode));
return dir_emit(ctx, dentry->d_name.name, dentry->d_name.len,
inode->i_ino, fs_umode_to_dtype(inode->i_mode));
}
static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, long last_index)
static void offset_iterate_dir(struct file *file, struct dir_context *ctx)
{
struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
struct dentry *dir = file->f_path.dentry;
struct dentry *dentry;
if (ctx->pos == DIR_OFFSET_FIRST) {
spin_lock(&dir->d_lock);
dentry = find_next_sibling_locked(dir, d_first_child(dir));
spin_unlock(&dir->d_lock);
} else
dentry = offset_dir_lookup(file, ctx->pos);
if (!dentry)
goto out_eod;
while (true) {
dentry = offset_find_next(octx, ctx->pos);
if (!dentry)
return;
struct dentry *next;
if (dentry2offset(dentry) >= last_index) {
dput(dentry);
return;
}
ctx->pos = dentry2offset(dentry);
if (!offset_dir_emit(ctx, dentry))
break;
if (!offset_dir_emit(ctx, dentry)) {
dput(dentry);
return;
}
ctx->pos = dentry2offset(dentry) + 1;
spin_lock(&dir->d_lock);
next = find_next_sibling_locked(dir, d_next_sibling(dentry));
spin_unlock(&dir->d_lock);
dput(dentry);
if (!next)
goto out_eod;
dentry = next;
}
dput(dentry);
return;
out_eod:
ctx->pos = DIR_OFFSET_EOD;
}
/**
@ -565,6 +565,8 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, lon
*
* On return, @ctx->pos contains an offset that will read the next entry
* in this directory when offset_readdir() is called again with @ctx.
* Caller places this value in the d_off field of the last entry in the
* user's buffer.
*
* Return values:
* %0 - Complete
@ -572,19 +574,17 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, lon
static int offset_readdir(struct file *file, struct dir_context *ctx)
{
struct dentry *dir = file->f_path.dentry;
long last_index = (long)file->private_data;
lockdep_assert_held(&d_inode(dir)->i_rwsem);
if (!dir_emit_dots(file, ctx))
return 0;
offset_iterate_dir(d_inode(dir), ctx, last_index);
if (ctx->pos != DIR_OFFSET_EOD)
offset_iterate_dir(file, ctx);
return 0;
}
const struct file_operations simple_offset_dir_operations = {
.open = offset_dir_open,
.llseek = offset_dir_llseek,
.iterate_shared = offset_readdir,
.read = generic_read_dir,

View File

@ -5272,19 +5272,16 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
getname(newname), 0);
}
int readlink_copy(char __user *buffer, int buflen, const char *link)
int readlink_copy(char __user *buffer, int buflen, const char *link, int linklen)
{
int len = PTR_ERR(link);
if (IS_ERR(link))
goto out;
int copylen;
len = strlen(link);
if (len > (unsigned) buflen)
len = buflen;
if (copy_to_user(buffer, link, len))
len = -EFAULT;
out:
return len;
copylen = linklen;
if (unlikely(copylen > (unsigned) buflen))
copylen = buflen;
if (copy_to_user(buffer, link, copylen))
copylen = -EFAULT;
return copylen;
}
/**
@ -5304,6 +5301,9 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
const char *link;
int res;
if (inode->i_opflags & IOP_CACHED_LINK)
return readlink_copy(buffer, buflen, inode->i_link, inode->i_linklen);
if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) {
if (unlikely(inode->i_op->readlink))
return inode->i_op->readlink(dentry, buffer, buflen);
@ -5322,7 +5322,7 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
if (IS_ERR(link))
return PTR_ERR(link);
}
res = readlink_copy(buffer, buflen, link);
res = readlink_copy(buffer, buflen, link, strlen(link));
do_delayed_call(&done);
return res;
}
@ -5391,10 +5391,14 @@ EXPORT_SYMBOL(page_put_link);
int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
const char *link;
int res;
DEFINE_DELAYED_CALL(done);
int res = readlink_copy(buffer, buflen,
page_get_link(dentry, d_inode(dentry),
&done));
link = page_get_link(dentry, d_inode(dentry), &done);
res = PTR_ERR(link);
if (!IS_ERR(link))
res = readlink_copy(buffer, buflen, link, strlen(link));
do_delayed_call(&done);
return res;
}

View File

@ -5044,6 +5044,10 @@ static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq)
if (sb->s_op->show_options) {
size_t start = seq->count;
err = security_sb_show_options(seq, sb);
if (err)
return err;
err = sb->s_op->show_options(seq, mnt->mnt_root);
if (err)
return err;

View File

@ -611,10 +611,10 @@ int propagate_umount(struct list_head *list)
continue;
} else if (child->mnt.mnt_flags & MNT_UMOUNT) {
/*
* We have come accross an partially unmounted
* mount in list that has not been visited yet.
* Remember it has been visited and continue
* about our merry way.
* We have come across a partially unmounted
* mount in a list that has not been visited
* yet. Remember it has been visited and
* continue about our merry way.
*/
list_add_tail(&child->mnt_umounting, &visited);
continue;

View File

@ -83,7 +83,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
res = ns_get_name(name, sizeof(name), task, ns_ops);
if (res >= 0)
res = readlink_copy(buffer, buflen, name);
res = readlink_copy(buffer, buflen, name, strlen(name));
}
put_task_struct(task);
return res;

View File

@ -5,12 +5,18 @@
#include <uapi/linux/fiemap.h>
#include <linux/fs.h>
/**
* struct fiemap_extent_info - fiemap request to a filesystem
* @fi_flags: Flags as passed from user
* @fi_extents_mapped: Number of mapped extents
* @fi_extents_max: Size of fiemap_extent array
* @fi_extents_start: Start of fiemap_extent array
*/
struct fiemap_extent_info {
unsigned int fi_flags; /* Flags as passed from user */
unsigned int fi_extents_mapped; /* Number of mapped extents */
unsigned int fi_extents_max; /* Size of fiemap_extent array */
struct fiemap_extent __user *fi_extents_start; /* Start of
fiemap_extent array */
unsigned int fi_flags;
unsigned int fi_extents_mapped;
unsigned int fi_extents_max;
struct fiemap_extent __user *fi_extents_start;
};
int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo,

View File

@ -626,6 +626,7 @@ is_uncached_acl(struct posix_acl *acl)
#define IOP_XATTR 0x0008
#define IOP_DEFAULT_READLINK 0x0010
#define IOP_MGTIME 0x0020
#define IOP_CACHED_LINK 0x0040
/*
* Keep mostly read-only and often accessed (especially for
@ -723,7 +724,10 @@ struct inode {
};
struct file_lock_context *i_flctx;
struct address_space i_data;
struct list_head i_devices;
union {
struct list_head i_devices;
int i_linklen;
};
union {
struct pipe_inode_info *i_pipe;
struct cdev *i_cdev;
@ -749,6 +753,13 @@ struct inode {
void *i_private; /* fs or device private pointer */
} __randomize_layout;
static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen)
{
inode->i_link = link;
inode->i_linklen = linklen;
inode->i_opflags |= IOP_CACHED_LINK;
}
/*
* Get bit address from inode->i_state to use with wait_var_event()
* infrastructre.
@ -3351,7 +3362,7 @@ extern const struct file_operations generic_ro_fops;
#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
extern int readlink_copy(char __user *, int, const char *);
extern int readlink_copy(char __user *, int, const char *, int);
extern int page_readlink(struct dentry *, char __user *, int);
extern const char *page_get_link(struct dentry *, struct inode *,
struct delayed_call *);
@ -3468,7 +3479,6 @@ struct offset_ctx {
void simple_offset_init(struct offset_ctx *octx);
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
int simple_offset_empty(struct dentry *dentry);
int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry);
int simple_offset_rename_exchange(struct inode *old_dir,

View File

@ -76,7 +76,7 @@ struct vfsmount {
static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt)
{
/* Pairs with smp_store_release() in do_idmap_mount(). */
return smp_load_acquire(&mnt->mnt_idmap);
return READ_ONCE(mnt->mnt_idmap);
}
extern int mnt_want_write(struct vfsmount *mnt);

View File

@ -272,7 +272,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex)
({ \
unsigned __seq; \
\
while ((__seq = seqprop_sequence(s)) & 1) \
while (unlikely((__seq = seqprop_sequence(s)) & 1)) \
cpu_relax(); \
\
kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \

View File

@ -14,37 +14,56 @@
#include <linux/types.h>
/**
* struct fiemap_extent - description of one fiemap extent
* @fe_logical: byte offset of the extent in the file
* @fe_physical: byte offset of extent on disk
* @fe_length: length in bytes for this extent
* @fe_flags: FIEMAP_EXTENT_* flags for this extent
*/
struct fiemap_extent {
__u64 fe_logical; /* logical offset in bytes for the start of
* the extent from the beginning of the file */
__u64 fe_physical; /* physical offset in bytes for the start
* of the extent from the beginning of the disk */
__u64 fe_length; /* length in bytes for this extent */
__u64 fe_logical;
__u64 fe_physical;
__u64 fe_length;
/* private: */
__u64 fe_reserved64[2];
__u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
/* public: */
__u32 fe_flags;
/* private: */
__u32 fe_reserved[3];
};
/**
* struct fiemap - file extent mappings
* @fm_start: byte offset (inclusive) at which to start mapping (in)
* @fm_length: logical length of mapping which userspace wants (in)
* @fm_flags: FIEMAP_FLAG_* flags for request (in/out)
* @fm_mapped_extents: number of extents that were mapped (out)
* @fm_extent_count: size of fm_extents array (in)
* @fm_extents: array of mapped extents (out)
*/
struct fiemap {
__u64 fm_start; /* logical offset (inclusive) at
* which to start mapping (in) */
__u64 fm_length; /* logical length of mapping which
* userspace wants (in) */
__u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
__u32 fm_mapped_extents;/* number of extents that were mapped (out) */
__u32 fm_extent_count; /* size of fm_extents array (in) */
__u64 fm_start;
__u64 fm_length;
__u32 fm_flags;
__u32 fm_mapped_extents;
__u32 fm_extent_count;
/* private: */
__u32 fm_reserved;
struct fiemap_extent fm_extents[]; /* array of mapped extents (out) */
/* public: */
struct fiemap_extent fm_extents[];
};
#define FIEMAP_MAX_OFFSET (~0ULL)
/* flags used in fm_flags: */
#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */
#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */
#define FIEMAP_FLAG_CACHE 0x00000004 /* request caching of the extents */
#define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR)
/* flags used in fe_flags: */
#define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */
#define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */
#define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending.

View File

@ -71,7 +71,7 @@ static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe,
bit /= WATCH_QUEUE_NOTE_SIZE;
page = buf->page;
bit += page->index;
bit += page->private;
set_bit(bit, wqueue->notes_bitmap);
generic_pipe_buf_release(pipe, buf);
@ -278,7 +278,7 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
pages[i] = alloc_page(GFP_KERNEL);
if (!pages[i])
goto error_p;
pages[i]->index = i * WATCH_QUEUE_NOTES_PER_PAGE;
pages[i]->private = i * WATCH_QUEUE_NOTES_PER_PAGE;
}
bitmap = bitmap_alloc(nr_notes, GFP_KERNEL);

View File

@ -3818,7 +3818,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
{
if (!simple_offset_empty(dentry))
if (!simple_empty(dentry))
return -ENOTEMPTY;
drop_nlink(d_inode(dentry));
@ -3875,7 +3875,7 @@ static int shmem_rename2(struct mnt_idmap *idmap,
return simple_offset_rename_exchange(old_dir, old_dentry,
new_dir, new_dentry);
if (!simple_offset_empty(new_dentry))
if (!simple_empty(new_dentry))
return -ENOTEMPTY;
if (flags & RENAME_WHITEOUT) {
@ -3914,6 +3914,7 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
int len;
struct inode *inode;
struct folio *folio;
char *link;
len = strlen(symname) + 1;
if (len > PAGE_SIZE)
@ -3935,12 +3936,13 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
inode->i_size = len-1;
if (len <= SHORT_SYMLINK_LEN) {
inode->i_link = kmemdup(symname, len, GFP_KERNEL);
if (!inode->i_link) {
link = kmemdup(symname, len, GFP_KERNEL);
if (!link) {
error = -ENOMEM;
goto out_remove_offset;
}
inode->i_op = &shmem_short_symlink_operations;
inode_set_cached_link(inode, link, len - 1);
} else {
inode_nohighmem(inode);
inode->i_mapping->a_ops = &shmem_aops;

View File

@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
/test-fsmount
/test-statx
/mountinfo

View File

@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
userprogs-always-y += test-fsmount test-statx
userprogs-always-y += test-fsmount test-statx mountinfo
userccflags += -I usr/include

273
samples/vfs/mountinfo.c Normal file
View File

@ -0,0 +1,273 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Use pidfds, nsfds, listmount() and statmount() mimic the
* contents of /proc/self/mountinfo.
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdint.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <linux/pidfd.h>
#include <linux/mount.h>
#include <linux/nsfs.h>
#include <unistd.h>
#include <alloca.h>
#include <getopt.h>
#include <stdlib.h>
#include <stdbool.h>
#include <errno.h>
/* max mounts per listmount call */
#define MAXMOUNTS 1024
/* size of struct statmount (including trailing string buffer) */
#define STATMOUNT_BUFSIZE 4096
static bool ext_format;
/*
* There are no bindings in glibc for listmount() and statmount() (yet),
* make our own here.
*/
static int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
struct statmount *buf, size_t bufsize,
unsigned int flags)
{
struct mnt_id_req req = {
.size = MNT_ID_REQ_SIZE_VER0,
.mnt_id = mnt_id,
.param = mask,
};
if (mnt_ns_id) {
req.size = MNT_ID_REQ_SIZE_VER1;
req.mnt_ns_id = mnt_ns_id;
}
return syscall(__NR_statmount, &req, buf, bufsize, flags);
}
static ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id,
uint64_t last_mnt_id, uint64_t list[], size_t num,
unsigned int flags)
{
struct mnt_id_req req = {
.size = MNT_ID_REQ_SIZE_VER0,
.mnt_id = mnt_id,
.param = last_mnt_id,
};
if (mnt_ns_id) {
req.size = MNT_ID_REQ_SIZE_VER1;
req.mnt_ns_id = mnt_ns_id;
}
return syscall(__NR_listmount, &req, list, num, flags);
}
static void show_mnt_attrs(uint64_t flags)
{
printf("%s", flags & MOUNT_ATTR_RDONLY ? "ro" : "rw");
if (flags & MOUNT_ATTR_NOSUID)
printf(",nosuid");
if (flags & MOUNT_ATTR_NODEV)
printf(",nodev");
if (flags & MOUNT_ATTR_NOEXEC)
printf(",noexec");
switch (flags & MOUNT_ATTR__ATIME) {
case MOUNT_ATTR_RELATIME:
printf(",relatime");
break;
case MOUNT_ATTR_NOATIME:
printf(",noatime");
break;
case MOUNT_ATTR_STRICTATIME:
/* print nothing */
break;
}
if (flags & MOUNT_ATTR_NODIRATIME)
printf(",nodiratime");
if (flags & MOUNT_ATTR_NOSYMFOLLOW)
printf(",nosymfollow");
if (flags & MOUNT_ATTR_IDMAP)
printf(",idmapped");
}
static void show_propagation(struct statmount *sm)
{
if (sm->mnt_propagation & MS_SHARED)
printf(" shared:%llu", sm->mnt_peer_group);
if (sm->mnt_propagation & MS_SLAVE) {
printf(" master:%llu", sm->mnt_master);
if (sm->propagate_from && sm->propagate_from != sm->mnt_master)
printf(" propagate_from:%llu", sm->propagate_from);
}
if (sm->mnt_propagation & MS_UNBINDABLE)
printf(" unbindable");
}
static void show_sb_flags(uint64_t flags)
{
printf("%s", flags & MS_RDONLY ? "ro" : "rw");
if (flags & MS_SYNCHRONOUS)
printf(",sync");
if (flags & MS_DIRSYNC)
printf(",dirsync");
if (flags & MS_MANDLOCK)
printf(",mand");
if (flags & MS_LAZYTIME)
printf(",lazytime");
}
static int dump_mountinfo(uint64_t mnt_id, uint64_t mnt_ns_id)
{
int ret;
struct statmount *buf = alloca(STATMOUNT_BUFSIZE);
const uint64_t mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC |
STATMOUNT_PROPAGATE_FROM | STATMOUNT_FS_TYPE |
STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT |
STATMOUNT_MNT_OPTS | STATMOUNT_FS_SUBTYPE |
STATMOUNT_SB_SOURCE;
ret = statmount(mnt_id, mnt_ns_id, mask, buf, STATMOUNT_BUFSIZE, 0);
if (ret < 0) {
perror("statmount");
return 1;
}
if (ext_format)
printf("0x%lx 0x%lx 0x%llx ", mnt_ns_id, mnt_id, buf->mnt_parent_id);
printf("%u %u %u:%u %s %s ", buf->mnt_id_old, buf->mnt_parent_id_old,
buf->sb_dev_major, buf->sb_dev_minor,
&buf->str[buf->mnt_root],
&buf->str[buf->mnt_point]);
show_mnt_attrs(buf->mnt_attr);
show_propagation(buf);
printf(" - %s", &buf->str[buf->fs_type]);
if (buf->mask & STATMOUNT_FS_SUBTYPE)
printf(".%s", &buf->str[buf->fs_subtype]);
if (buf->mask & STATMOUNT_SB_SOURCE)
printf(" %s ", &buf->str[buf->sb_source]);
else
printf(" :none ");
show_sb_flags(buf->sb_flags);
if (buf->mask & STATMOUNT_MNT_OPTS)
printf(",%s", &buf->str[buf->mnt_opts]);
printf("\n");
return 0;
}
static int dump_mounts(uint64_t mnt_ns_id)
{
uint64_t mntid[MAXMOUNTS];
uint64_t last_mnt_id = 0;
ssize_t count;
int i;
/*
* Get a list of all mntids in mnt_ns_id. If it returns MAXMOUNTS
* mounts, then go again until we get everything.
*/
do {
count = listmount(LSMT_ROOT, mnt_ns_id, last_mnt_id, mntid, MAXMOUNTS, 0);
if (count < 0 || count > MAXMOUNTS) {
errno = count < 0 ? errno : count;
perror("listmount");
return 1;
}
/* Walk the returned mntids and print info about each */
for (i = 0; i < count; ++i) {
int ret = dump_mountinfo(mntid[i], mnt_ns_id);
if (ret != 0)
return ret;
}
/* Set up last_mnt_id to pick up where we left off */
last_mnt_id = mntid[count - 1];
} while (count == MAXMOUNTS);
return 0;
}
static void usage(const char * const prog)
{
printf("Usage:\n");
printf("%s [-e] [-p pid] [-r] [-h]\n", prog);
printf(" -e: extended format\n");
printf(" -h: print usage message\n");
printf(" -p: get mount namespace from given pid\n");
printf(" -r: recursively print all mounts in all child namespaces\n");
}
int main(int argc, char * const *argv)
{
struct mnt_ns_info mni = { .size = MNT_NS_INFO_SIZE_VER0 };
int pidfd, mntns, ret, opt;
pid_t pid = getpid();
bool recursive = false;
while ((opt = getopt(argc, argv, "ehp:r")) != -1) {
switch (opt) {
case 'e':
ext_format = true;
break;
case 'h':
usage(argv[0]);
return 0;
case 'p':
pid = atoi(optarg);
break;
case 'r':
recursive = true;
break;
}
}
/* Get a pidfd for pid */
pidfd = syscall(SYS_pidfd_open, pid, 0);
if (pidfd < 0) {
perror("pidfd_open");
return 1;
}
/* Get the mnt namespace for pidfd */
mntns = ioctl(pidfd, PIDFD_GET_MNT_NAMESPACE, NULL);
if (mntns < 0) {
perror("PIDFD_GET_MNT_NAMESPACE");
return 1;
}
close(pidfd);
/* get info about mntns. In particular, the mnt_ns_id */
ret = ioctl(mntns, NS_MNT_GET_INFO, &mni);
if (ret < 0) {
perror("NS_MNT_GET_INFO");
return 1;
}
do {
int ret;
ret = dump_mounts(mni.mnt_ns_id);
if (ret)
return ret;
if (!recursive)
break;
/* get the next mntns (and overwrite the old mount ns info) */
ret = ioctl(mntns, NS_MNT_GET_NEXT, &mni);
close(mntns);
mntns = ret;
} while (mntns >= 0);
return 0;
}

View File

@ -2612,7 +2612,7 @@ static int policy_readlink(struct dentry *dentry, char __user *buffer,
res = snprintf(name, sizeof(name), "%s:[%lu]", AAFS_NAME,
d_inode(dentry)->i_ino);
if (res > 0 && res < sizeof(name))
res = readlink_copy(buffer, buflen, name);
res = readlink_copy(buffer, buflen, name, strlen(name));
else
res = -ENOENT;