mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-15 13:15:57 +00:00
Merge branch 'vfs-6.14.misc' into vfs.all
This commit is contained in:
commit
c9e970fded
@ -12,21 +12,10 @@ returns a list of extents.
|
|||||||
Request Basics
|
Request Basics
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
A fiemap request is encoded within struct fiemap::
|
A fiemap request is encoded within struct fiemap:
|
||||||
|
|
||||||
struct fiemap {
|
|
||||||
__u64 fm_start; /* logical offset (inclusive) at
|
|
||||||
* which to start mapping (in) */
|
|
||||||
__u64 fm_length; /* logical length of mapping which
|
|
||||||
* userspace cares about (in) */
|
|
||||||
__u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
|
|
||||||
__u32 fm_mapped_extents; /* number of extents that were
|
|
||||||
* mapped (out) */
|
|
||||||
__u32 fm_extent_count; /* size of fm_extents array (in) */
|
|
||||||
__u32 fm_reserved;
|
|
||||||
struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
|
|
||||||
};
|
|
||||||
|
|
||||||
|
.. kernel-doc:: include/uapi/linux/fiemap.h
|
||||||
|
:identifiers: fiemap
|
||||||
|
|
||||||
fm_start, and fm_length specify the logical range within the file
|
fm_start, and fm_length specify the logical range within the file
|
||||||
which the process would like mappings for. Extents returned mirror
|
which the process would like mappings for. Extents returned mirror
|
||||||
@ -60,6 +49,8 @@ FIEMAP_FLAG_XATTR
|
|||||||
If this flag is set, the extents returned will describe the inodes
|
If this flag is set, the extents returned will describe the inodes
|
||||||
extended attribute lookup tree, instead of its data tree.
|
extended attribute lookup tree, instead of its data tree.
|
||||||
|
|
||||||
|
FIEMAP_FLAG_CACHE
|
||||||
|
This flag requests caching of the extents.
|
||||||
|
|
||||||
Extent Mapping
|
Extent Mapping
|
||||||
--------------
|
--------------
|
||||||
@ -77,18 +68,10 @@ complete the requested range and will not have the FIEMAP_EXTENT_LAST
|
|||||||
flag set (see the next section on extent flags).
|
flag set (see the next section on extent flags).
|
||||||
|
|
||||||
Each extent is described by a single fiemap_extent structure as
|
Each extent is described by a single fiemap_extent structure as
|
||||||
returned in fm_extents::
|
returned in fm_extents:
|
||||||
|
|
||||||
struct fiemap_extent {
|
.. kernel-doc:: include/uapi/linux/fiemap.h
|
||||||
__u64 fe_logical; /* logical offset in bytes for the start of
|
:identifiers: fiemap_extent
|
||||||
* the extent */
|
|
||||||
__u64 fe_physical; /* physical offset in bytes for the start
|
|
||||||
* of the extent */
|
|
||||||
__u64 fe_length; /* length in bytes for the extent */
|
|
||||||
__u64 fe_reserved64[2];
|
|
||||||
__u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
|
|
||||||
__u32 fe_reserved[3];
|
|
||||||
};
|
|
||||||
|
|
||||||
All offsets and lengths are in bytes and mirror those on disk. It is valid
|
All offsets and lengths are in bytes and mirror those on disk. It is valid
|
||||||
for an extents logical offset to start before the request or its logical
|
for an extents logical offset to start before the request or its logical
|
||||||
@ -175,6 +158,8 @@ FIEMAP_EXTENT_MERGED
|
|||||||
userspace would be highly inefficient, the kernel will try to merge most
|
userspace would be highly inefficient, the kernel will try to merge most
|
||||||
adjacent blocks into 'extents'.
|
adjacent blocks into 'extents'.
|
||||||
|
|
||||||
|
FIEMAP_EXTENT_SHARED
|
||||||
|
This flag is set to request that space be shared with other files.
|
||||||
|
|
||||||
VFS -> File System Implementation
|
VFS -> File System Implementation
|
||||||
---------------------------------
|
---------------------------------
|
||||||
@ -191,14 +176,10 @@ each discovered extent::
|
|||||||
u64 len);
|
u64 len);
|
||||||
|
|
||||||
->fiemap is passed struct fiemap_extent_info which describes the
|
->fiemap is passed struct fiemap_extent_info which describes the
|
||||||
fiemap request::
|
fiemap request:
|
||||||
|
|
||||||
struct fiemap_extent_info {
|
.. kernel-doc:: include/linux/fiemap.h
|
||||||
unsigned int fi_flags; /* Flags as passed from user */
|
:identifiers: fiemap_extent_info
|
||||||
unsigned int fi_extents_mapped; /* Number of mapped extents */
|
|
||||||
unsigned int fi_extents_max; /* Size of fiemap_extent array */
|
|
||||||
struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent array */
|
|
||||||
};
|
|
||||||
|
|
||||||
It is intended that the file system should not need to access any of this
|
It is intended that the file system should not need to access any of this
|
||||||
structure directly. Filesystem handlers should be tolerant to signals and return
|
structure directly. Filesystem handlers should be tolerant to signals and return
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# SPDX-License-Identifier: GPL-2.0-only
|
# SPDX-License-Identifier: GPL-2.0-only
|
||||||
config VBOXGUEST
|
config VBOXGUEST
|
||||||
tristate "Virtual Box Guest integration support"
|
tristate "Virtual Box Guest integration support"
|
||||||
depends on X86 && PCI && INPUT
|
depends on (ARM64 || X86) && PCI && INPUT
|
||||||
help
|
help
|
||||||
This is a driver for the Virtual Box Guest PCI device used in
|
This is a driver for the Virtual Box Guest PCI device used in
|
||||||
Virtual Box virtual machines. Enabling this driver will add
|
Virtual Box virtual machines. Enabling this driver will add
|
||||||
|
@ -5006,10 +5006,11 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
|
|||||||
if (IS_ENCRYPTED(inode)) {
|
if (IS_ENCRYPTED(inode)) {
|
||||||
inode->i_op = &ext4_encrypted_symlink_inode_operations;
|
inode->i_op = &ext4_encrypted_symlink_inode_operations;
|
||||||
} else if (ext4_inode_is_fast_symlink(inode)) {
|
} else if (ext4_inode_is_fast_symlink(inode)) {
|
||||||
inode->i_link = (char *)ei->i_data;
|
|
||||||
inode->i_op = &ext4_fast_symlink_inode_operations;
|
inode->i_op = &ext4_fast_symlink_inode_operations;
|
||||||
nd_terminate_link(ei->i_data, inode->i_size,
|
nd_terminate_link(ei->i_data, inode->i_size,
|
||||||
sizeof(ei->i_data) - 1);
|
sizeof(ei->i_data) - 1);
|
||||||
|
inode_set_cached_link(inode, (char *)ei->i_data,
|
||||||
|
inode->i_size);
|
||||||
} else {
|
} else {
|
||||||
inode->i_op = &ext4_symlink_inode_operations;
|
inode->i_op = &ext4_symlink_inode_operations;
|
||||||
}
|
}
|
||||||
|
@ -3418,7 +3418,6 @@ retry:
|
|||||||
inode->i_op = &ext4_symlink_inode_operations;
|
inode->i_op = &ext4_symlink_inode_operations;
|
||||||
} else {
|
} else {
|
||||||
inode->i_op = &ext4_fast_symlink_inode_operations;
|
inode->i_op = &ext4_fast_symlink_inode_operations;
|
||||||
inode->i_link = (char *)&EXT4_I(inode)->i_data;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3434,6 +3433,9 @@ retry:
|
|||||||
disk_link.len);
|
disk_link.len);
|
||||||
inode->i_size = disk_link.len - 1;
|
inode->i_size = disk_link.len - 1;
|
||||||
EXT4_I(inode)->i_disksize = inode->i_size;
|
EXT4_I(inode)->i_disksize = inode->i_size;
|
||||||
|
if (!IS_ENCRYPTED(inode))
|
||||||
|
inode_set_cached_link(inode, (char *)&EXT4_I(inode)->i_data,
|
||||||
|
inode->i_size);
|
||||||
}
|
}
|
||||||
err = ext4_add_nondir(handle, dentry, &inode);
|
err = ext4_add_nondir(handle, dentry, &inode);
|
||||||
if (handle)
|
if (handle)
|
||||||
|
22
fs/file.c
22
fs/file.c
@ -279,10 +279,6 @@ repeat:
|
|||||||
if (nr < fdt->max_fds)
|
if (nr < fdt->max_fds)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/* Can we expand? */
|
|
||||||
if (nr >= sysctl_nr_open)
|
|
||||||
return -EMFILE;
|
|
||||||
|
|
||||||
if (unlikely(files->resize_in_progress)) {
|
if (unlikely(files->resize_in_progress)) {
|
||||||
spin_unlock(&files->file_lock);
|
spin_unlock(&files->file_lock);
|
||||||
wait_event(files->resize_wait, !files->resize_in_progress);
|
wait_event(files->resize_wait, !files->resize_in_progress);
|
||||||
@ -290,6 +286,10 @@ repeat:
|
|||||||
goto repeat;
|
goto repeat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Can we expand? */
|
||||||
|
if (unlikely(nr >= sysctl_nr_open))
|
||||||
|
return -EMFILE;
|
||||||
|
|
||||||
/* All good, so we try */
|
/* All good, so we try */
|
||||||
files->resize_in_progress = true;
|
files->resize_in_progress = true;
|
||||||
error = expand_fdtable(files, nr);
|
error = expand_fdtable(files, nr);
|
||||||
@ -1231,17 +1231,9 @@ __releases(&files->file_lock)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to detect attempts to do dup2() over allocated but still
|
* We need to detect attempts to do dup2() over allocated but still
|
||||||
* not finished descriptor. NB: OpenBSD avoids that at the price of
|
* not finished descriptor.
|
||||||
* extra work in their equivalent of fget() - they insert struct
|
*
|
||||||
* file immediately after grabbing descriptor, mark it larval if
|
* POSIX is silent on the issue, we return -EBUSY.
|
||||||
* more work (e.g. actual opening) is needed and make sure that
|
|
||||||
* fget() treats larval files as absent. Potentially interesting,
|
|
||||||
* but while extra work in fget() is trivial, locking implications
|
|
||||||
* and amount of surgery on open()-related paths in VFS are not.
|
|
||||||
* FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
|
|
||||||
* deadlocks in rather amusing ways, AFAICS. All of that is out of
|
|
||||||
* scope of POSIX or SUS, since neither considers shared descriptor
|
|
||||||
* tables and this condition does not arise without those.
|
|
||||||
*/
|
*/
|
||||||
fdt = files_fdtable(files);
|
fdt = files_fdtable(files);
|
||||||
fd = array_index_nospec(fd, fdt->max_fds);
|
fd = array_index_nospec(fd, fdt->max_fds);
|
||||||
|
@ -128,7 +128,7 @@ static struct ctl_table fs_stat_sysctls[] = {
|
|||||||
.data = &sysctl_nr_open,
|
.data = &sysctl_nr_open,
|
||||||
.maxlen = sizeof(unsigned int),
|
.maxlen = sizeof(unsigned int),
|
||||||
.mode = 0644,
|
.mode = 0644,
|
||||||
.proc_handler = proc_dointvec_minmax,
|
.proc_handler = proc_douintvec_minmax,
|
||||||
.extra1 = &sysctl_nr_open_min,
|
.extra1 = &sysctl_nr_open_min,
|
||||||
.extra2 = &sysctl_nr_open_max,
|
.extra2 = &sysctl_nr_open_max,
|
||||||
},
|
},
|
||||||
@ -478,6 +478,8 @@ static void ____fput(struct callback_head *work)
|
|||||||
__fput(container_of(work, struct file, f_task_work));
|
__fput(container_of(work, struct file, f_task_work));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If kernel thread really needs to have the final fput() it has done
|
* If kernel thread really needs to have the final fput() it has done
|
||||||
* to complete, call this. The only user right now is the boot - we
|
* to complete, call this. The only user right now is the boot - we
|
||||||
@ -491,11 +493,10 @@ static void ____fput(struct callback_head *work)
|
|||||||
void flush_delayed_fput(void)
|
void flush_delayed_fput(void)
|
||||||
{
|
{
|
||||||
delayed_fput(NULL);
|
delayed_fput(NULL);
|
||||||
|
flush_delayed_work(&delayed_fput_work);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(flush_delayed_fput);
|
EXPORT_SYMBOL_GPL(flush_delayed_fput);
|
||||||
|
|
||||||
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
|
|
||||||
|
|
||||||
void fput(struct file *file)
|
void fput(struct file *file)
|
||||||
{
|
{
|
||||||
if (file_ref_put(&file->f_ref)) {
|
if (file_ref_put(&file->f_ref)) {
|
||||||
|
@ -493,7 +493,7 @@ static void put_fc_log(struct fs_context *fc)
|
|||||||
if (log) {
|
if (log) {
|
||||||
if (refcount_dec_and_test(&log->usage)) {
|
if (refcount_dec_and_test(&log->usage)) {
|
||||||
fc->log.log = NULL;
|
fc->log.log = NULL;
|
||||||
for (i = 0; i <= 7; i++)
|
for (i = 0; i < ARRAY_SIZE(log->buffer) ; i++)
|
||||||
if (log->need_free & (1 << i))
|
if (log->need_free & (1 << i))
|
||||||
kfree(log->buffer[i]);
|
kfree(log->buffer[i]);
|
||||||
kfree(log);
|
kfree(log);
|
||||||
|
34
fs/namei.c
34
fs/namei.c
@ -5272,19 +5272,16 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
|
|||||||
getname(newname), 0);
|
getname(newname), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int readlink_copy(char __user *buffer, int buflen, const char *link)
|
int readlink_copy(char __user *buffer, int buflen, const char *link, int linklen)
|
||||||
{
|
{
|
||||||
int len = PTR_ERR(link);
|
int copylen;
|
||||||
if (IS_ERR(link))
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
len = strlen(link);
|
copylen = linklen;
|
||||||
if (len > (unsigned) buflen)
|
if (unlikely(copylen > (unsigned) buflen))
|
||||||
len = buflen;
|
copylen = buflen;
|
||||||
if (copy_to_user(buffer, link, len))
|
if (copy_to_user(buffer, link, copylen))
|
||||||
len = -EFAULT;
|
copylen = -EFAULT;
|
||||||
out:
|
return copylen;
|
||||||
return len;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -5304,6 +5301,9 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
|
|||||||
const char *link;
|
const char *link;
|
||||||
int res;
|
int res;
|
||||||
|
|
||||||
|
if (inode->i_opflags & IOP_CACHED_LINK)
|
||||||
|
return readlink_copy(buffer, buflen, inode->i_link, inode->i_linklen);
|
||||||
|
|
||||||
if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) {
|
if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) {
|
||||||
if (unlikely(inode->i_op->readlink))
|
if (unlikely(inode->i_op->readlink))
|
||||||
return inode->i_op->readlink(dentry, buffer, buflen);
|
return inode->i_op->readlink(dentry, buffer, buflen);
|
||||||
@ -5322,7 +5322,7 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
|
|||||||
if (IS_ERR(link))
|
if (IS_ERR(link))
|
||||||
return PTR_ERR(link);
|
return PTR_ERR(link);
|
||||||
}
|
}
|
||||||
res = readlink_copy(buffer, buflen, link);
|
res = readlink_copy(buffer, buflen, link, strlen(link));
|
||||||
do_delayed_call(&done);
|
do_delayed_call(&done);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
@ -5391,10 +5391,14 @@ EXPORT_SYMBOL(page_put_link);
|
|||||||
|
|
||||||
int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
|
int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
|
||||||
{
|
{
|
||||||
|
const char *link;
|
||||||
|
int res;
|
||||||
|
|
||||||
DEFINE_DELAYED_CALL(done);
|
DEFINE_DELAYED_CALL(done);
|
||||||
int res = readlink_copy(buffer, buflen,
|
link = page_get_link(dentry, d_inode(dentry), &done);
|
||||||
page_get_link(dentry, d_inode(dentry),
|
res = PTR_ERR(link);
|
||||||
&done));
|
if (!IS_ERR(link))
|
||||||
|
res = readlink_copy(buffer, buflen, link, strlen(link));
|
||||||
do_delayed_call(&done);
|
do_delayed_call(&done);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
19
fs/pipe.c
19
fs/pipe.c
@ -253,7 +253,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
|
|||||||
size_t total_len = iov_iter_count(to);
|
size_t total_len = iov_iter_count(to);
|
||||||
struct file *filp = iocb->ki_filp;
|
struct file *filp = iocb->ki_filp;
|
||||||
struct pipe_inode_info *pipe = filp->private_data;
|
struct pipe_inode_info *pipe = filp->private_data;
|
||||||
bool was_full, wake_next_reader = false;
|
bool wake_writer = false, wake_next_reader = false;
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
|
|
||||||
/* Null read succeeds. */
|
/* Null read succeeds. */
|
||||||
@ -264,14 +264,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
|
|||||||
mutex_lock(&pipe->mutex);
|
mutex_lock(&pipe->mutex);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We only wake up writers if the pipe was full when we started
|
* We only wake up writers if the pipe was full when we started reading
|
||||||
* reading in order to avoid unnecessary wakeups.
|
* and it is no longer full after reading to avoid unnecessary wakeups.
|
||||||
*
|
*
|
||||||
* But when we do wake up writers, we do so using a sync wakeup
|
* But when we do wake up writers, we do so using a sync wakeup
|
||||||
* (WF_SYNC), because we want them to get going and generate more
|
* (WF_SYNC), because we want them to get going and generate more
|
||||||
* data for us.
|
* data for us.
|
||||||
*/
|
*/
|
||||||
was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
/* Read ->head with a barrier vs post_one_notification() */
|
/* Read ->head with a barrier vs post_one_notification() */
|
||||||
unsigned int head = smp_load_acquire(&pipe->head);
|
unsigned int head = smp_load_acquire(&pipe->head);
|
||||||
@ -340,8 +339,10 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
|
|||||||
buf->len = 0;
|
buf->len = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!buf->len)
|
if (!buf->len) {
|
||||||
|
wake_writer |= pipe_full(head, tail, pipe->max_usage);
|
||||||
tail = pipe_update_tail(pipe, buf, tail);
|
tail = pipe_update_tail(pipe, buf, tail);
|
||||||
|
}
|
||||||
total_len -= chars;
|
total_len -= chars;
|
||||||
if (!total_len)
|
if (!total_len)
|
||||||
break; /* common path: read succeeded */
|
break; /* common path: read succeeded */
|
||||||
@ -377,7 +378,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
|
|||||||
* _very_ unlikely case that the pipe was full, but we got
|
* _very_ unlikely case that the pipe was full, but we got
|
||||||
* no data.
|
* no data.
|
||||||
*/
|
*/
|
||||||
if (unlikely(was_full))
|
if (unlikely(wake_writer))
|
||||||
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
|
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
|
||||||
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
|
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
|
||||||
|
|
||||||
@ -390,15 +391,15 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
|
|||||||
if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
|
if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
|
||||||
return -ERESTARTSYS;
|
return -ERESTARTSYS;
|
||||||
|
|
||||||
mutex_lock(&pipe->mutex);
|
wake_writer = false;
|
||||||
was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
|
|
||||||
wake_next_reader = true;
|
wake_next_reader = true;
|
||||||
|
mutex_lock(&pipe->mutex);
|
||||||
}
|
}
|
||||||
if (pipe_empty(pipe->head, pipe->tail))
|
if (pipe_empty(pipe->head, pipe->tail))
|
||||||
wake_next_reader = false;
|
wake_next_reader = false;
|
||||||
mutex_unlock(&pipe->mutex);
|
mutex_unlock(&pipe->mutex);
|
||||||
|
|
||||||
if (was_full)
|
if (wake_writer)
|
||||||
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
|
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
|
||||||
if (wake_next_reader)
|
if (wake_next_reader)
|
||||||
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
|
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
|
||||||
|
@ -611,10 +611,10 @@ int propagate_umount(struct list_head *list)
|
|||||||
continue;
|
continue;
|
||||||
} else if (child->mnt.mnt_flags & MNT_UMOUNT) {
|
} else if (child->mnt.mnt_flags & MNT_UMOUNT) {
|
||||||
/*
|
/*
|
||||||
* We have come accross an partially unmounted
|
* We have come across a partially unmounted
|
||||||
* mount in list that has not been visited yet.
|
* mount in a list that has not been visited
|
||||||
* Remember it has been visited and continue
|
* yet. Remember it has been visited and
|
||||||
* about our merry way.
|
* continue about our merry way.
|
||||||
*/
|
*/
|
||||||
list_add_tail(&child->mnt_umounting, &visited);
|
list_add_tail(&child->mnt_umounting, &visited);
|
||||||
continue;
|
continue;
|
||||||
|
@ -500,7 +500,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
|
|||||||
* a program is not able to use ptrace(2) in that case. It is
|
* a program is not able to use ptrace(2) in that case. It is
|
||||||
* safe because the task has stopped executing permanently.
|
* safe because the task has stopped executing permanently.
|
||||||
*/
|
*/
|
||||||
if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) {
|
if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE|PF_POSTCOREDUMP))) {
|
||||||
if (try_get_task_stack(task)) {
|
if (try_get_task_stack(task)) {
|
||||||
eip = KSTK_EIP(task);
|
eip = KSTK_EIP(task);
|
||||||
esp = KSTK_ESP(task);
|
esp = KSTK_ESP(task);
|
||||||
|
@ -83,7 +83,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
|
|||||||
if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
|
if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
|
||||||
res = ns_get_name(name, sizeof(name), task, ns_ops);
|
res = ns_get_name(name, sizeof(name), task, ns_ops);
|
||||||
if (res >= 0)
|
if (res >= 0)
|
||||||
res = readlink_copy(buffer, buflen, name);
|
res = readlink_copy(buffer, buflen, name, strlen(name));
|
||||||
}
|
}
|
||||||
put_task_struct(task);
|
put_task_struct(task);
|
||||||
return res;
|
return res;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
config VBOXSF_FS
|
config VBOXSF_FS
|
||||||
tristate "VirtualBox guest shared folder (vboxsf) support"
|
tristate "VirtualBox guest shared folder (vboxsf) support"
|
||||||
depends on X86 && VBOXGUEST
|
depends on (ARM64 || X86) && VBOXGUEST
|
||||||
select NLS
|
select NLS
|
||||||
help
|
help
|
||||||
VirtualBox hosts can share folders with guests, this driver
|
VirtualBox hosts can share folders with guests, this driver
|
||||||
|
@ -5,12 +5,18 @@
|
|||||||
#include <uapi/linux/fiemap.h>
|
#include <uapi/linux/fiemap.h>
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct fiemap_extent_info - fiemap request to a filesystem
|
||||||
|
* @fi_flags: Flags as passed from user
|
||||||
|
* @fi_extents_mapped: Number of mapped extents
|
||||||
|
* @fi_extents_max: Size of fiemap_extent array
|
||||||
|
* @fi_extents_start: Start of fiemap_extent array
|
||||||
|
*/
|
||||||
struct fiemap_extent_info {
|
struct fiemap_extent_info {
|
||||||
unsigned int fi_flags; /* Flags as passed from user */
|
unsigned int fi_flags;
|
||||||
unsigned int fi_extents_mapped; /* Number of mapped extents */
|
unsigned int fi_extents_mapped;
|
||||||
unsigned int fi_extents_max; /* Size of fiemap_extent array */
|
unsigned int fi_extents_max;
|
||||||
struct fiemap_extent __user *fi_extents_start; /* Start of
|
struct fiemap_extent __user *fi_extents_start;
|
||||||
fiemap_extent array */
|
|
||||||
};
|
};
|
||||||
|
|
||||||
int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||||
|
@ -322,6 +322,7 @@ struct readahead_control;
|
|||||||
#define IOCB_NOWAIT (__force int) RWF_NOWAIT
|
#define IOCB_NOWAIT (__force int) RWF_NOWAIT
|
||||||
#define IOCB_APPEND (__force int) RWF_APPEND
|
#define IOCB_APPEND (__force int) RWF_APPEND
|
||||||
#define IOCB_ATOMIC (__force int) RWF_ATOMIC
|
#define IOCB_ATOMIC (__force int) RWF_ATOMIC
|
||||||
|
#define IOCB_DONTCACHE (__force int) RWF_DONTCACHE
|
||||||
|
|
||||||
/* non-RWF related bits - start at 16 */
|
/* non-RWF related bits - start at 16 */
|
||||||
#define IOCB_EVENTFD (1 << 16)
|
#define IOCB_EVENTFD (1 << 16)
|
||||||
@ -356,7 +357,8 @@ struct readahead_control;
|
|||||||
{ IOCB_SYNC, "SYNC" }, \
|
{ IOCB_SYNC, "SYNC" }, \
|
||||||
{ IOCB_NOWAIT, "NOWAIT" }, \
|
{ IOCB_NOWAIT, "NOWAIT" }, \
|
||||||
{ IOCB_APPEND, "APPEND" }, \
|
{ IOCB_APPEND, "APPEND" }, \
|
||||||
{ IOCB_ATOMIC, "ATOMIC"}, \
|
{ IOCB_ATOMIC, "ATOMIC" }, \
|
||||||
|
{ IOCB_DONTCACHE, "DONTCACHE" }, \
|
||||||
{ IOCB_EVENTFD, "EVENTFD"}, \
|
{ IOCB_EVENTFD, "EVENTFD"}, \
|
||||||
{ IOCB_DIRECT, "DIRECT" }, \
|
{ IOCB_DIRECT, "DIRECT" }, \
|
||||||
{ IOCB_WRITE, "WRITE" }, \
|
{ IOCB_WRITE, "WRITE" }, \
|
||||||
@ -626,6 +628,7 @@ is_uncached_acl(struct posix_acl *acl)
|
|||||||
#define IOP_XATTR 0x0008
|
#define IOP_XATTR 0x0008
|
||||||
#define IOP_DEFAULT_READLINK 0x0010
|
#define IOP_DEFAULT_READLINK 0x0010
|
||||||
#define IOP_MGTIME 0x0020
|
#define IOP_MGTIME 0x0020
|
||||||
|
#define IOP_CACHED_LINK 0x0040
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Keep mostly read-only and often accessed (especially for
|
* Keep mostly read-only and often accessed (especially for
|
||||||
@ -723,7 +726,10 @@ struct inode {
|
|||||||
};
|
};
|
||||||
struct file_lock_context *i_flctx;
|
struct file_lock_context *i_flctx;
|
||||||
struct address_space i_data;
|
struct address_space i_data;
|
||||||
struct list_head i_devices;
|
union {
|
||||||
|
struct list_head i_devices;
|
||||||
|
int i_linklen;
|
||||||
|
};
|
||||||
union {
|
union {
|
||||||
struct pipe_inode_info *i_pipe;
|
struct pipe_inode_info *i_pipe;
|
||||||
struct cdev *i_cdev;
|
struct cdev *i_cdev;
|
||||||
@ -749,6 +755,13 @@ struct inode {
|
|||||||
void *i_private; /* fs or device private pointer */
|
void *i_private; /* fs or device private pointer */
|
||||||
} __randomize_layout;
|
} __randomize_layout;
|
||||||
|
|
||||||
|
static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen)
|
||||||
|
{
|
||||||
|
inode->i_link = link;
|
||||||
|
inode->i_linklen = linklen;
|
||||||
|
inode->i_opflags |= IOP_CACHED_LINK;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get bit address from inode->i_state to use with wait_var_event()
|
* Get bit address from inode->i_state to use with wait_var_event()
|
||||||
* infrastructre.
|
* infrastructre.
|
||||||
@ -2127,6 +2140,8 @@ struct file_operations {
|
|||||||
#define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5))
|
#define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5))
|
||||||
/* Supports asynchronous lock callbacks */
|
/* Supports asynchronous lock callbacks */
|
||||||
#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6))
|
#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6))
|
||||||
|
/* File system supports uncached read/write buffered IO */
|
||||||
|
#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7))
|
||||||
|
|
||||||
/* Wrap a directory iterator that needs exclusive inode access */
|
/* Wrap a directory iterator that needs exclusive inode access */
|
||||||
int wrap_directory_iterator(struct file *, struct dir_context *,
|
int wrap_directory_iterator(struct file *, struct dir_context *,
|
||||||
@ -3351,7 +3366,7 @@ extern const struct file_operations generic_ro_fops;
|
|||||||
|
|
||||||
#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
|
#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
|
||||||
|
|
||||||
extern int readlink_copy(char __user *, int, const char *);
|
extern int readlink_copy(char __user *, int, const char *, int);
|
||||||
extern int page_readlink(struct dentry *, char __user *, int);
|
extern int page_readlink(struct dentry *, char __user *, int);
|
||||||
extern const char *page_get_link(struct dentry *, struct inode *,
|
extern const char *page_get_link(struct dentry *, struct inode *,
|
||||||
struct delayed_call *);
|
struct delayed_call *);
|
||||||
@ -3614,6 +3629,14 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags,
|
|||||||
if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE))
|
if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE))
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
}
|
}
|
||||||
|
if (flags & RWF_DONTCACHE) {
|
||||||
|
/* file system must support it */
|
||||||
|
if (!(ki->ki_filp->f_op->fop_flags & FOP_DONTCACHE))
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
/* DAX mappings not supported */
|
||||||
|
if (IS_DAX(ki->ki_filp->f_mapping->host))
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
}
|
||||||
kiocb_flags |= (__force int) (flags & RWF_SUPPORTED);
|
kiocb_flags |= (__force int) (flags & RWF_SUPPORTED);
|
||||||
if (flags & RWF_SYNC)
|
if (flags & RWF_SYNC)
|
||||||
kiocb_flags |= IOCB_DSYNC;
|
kiocb_flags |= IOCB_DSYNC;
|
||||||
|
@ -75,7 +75,7 @@ struct vfsmount {
|
|||||||
static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt)
|
static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt)
|
||||||
{
|
{
|
||||||
/* Pairs with smp_store_release() in do_idmap_mount(). */
|
/* Pairs with smp_store_release() in do_idmap_mount(). */
|
||||||
return smp_load_acquire(&mnt->mnt_idmap);
|
return READ_ONCE(mnt->mnt_idmap);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int mnt_want_write(struct vfsmount *mnt);
|
extern int mnt_want_write(struct vfsmount *mnt);
|
||||||
|
@ -272,7 +272,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex)
|
|||||||
({ \
|
({ \
|
||||||
unsigned __seq; \
|
unsigned __seq; \
|
||||||
\
|
\
|
||||||
while ((__seq = seqprop_sequence(s)) & 1) \
|
while (unlikely((__seq = seqprop_sequence(s)) & 1)) \
|
||||||
cpu_relax(); \
|
cpu_relax(); \
|
||||||
\
|
\
|
||||||
kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \
|
kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \
|
||||||
|
@ -14,37 +14,56 @@
|
|||||||
|
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct fiemap_extent - description of one fiemap extent
|
||||||
|
* @fe_logical: byte offset of the extent in the file
|
||||||
|
* @fe_physical: byte offset of extent on disk
|
||||||
|
* @fe_length: length in bytes for this extent
|
||||||
|
* @fe_flags: FIEMAP_EXTENT_* flags for this extent
|
||||||
|
*/
|
||||||
struct fiemap_extent {
|
struct fiemap_extent {
|
||||||
__u64 fe_logical; /* logical offset in bytes for the start of
|
__u64 fe_logical;
|
||||||
* the extent from the beginning of the file */
|
__u64 fe_physical;
|
||||||
__u64 fe_physical; /* physical offset in bytes for the start
|
__u64 fe_length;
|
||||||
* of the extent from the beginning of the disk */
|
/* private: */
|
||||||
__u64 fe_length; /* length in bytes for this extent */
|
|
||||||
__u64 fe_reserved64[2];
|
__u64 fe_reserved64[2];
|
||||||
__u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
|
/* public: */
|
||||||
|
__u32 fe_flags;
|
||||||
|
/* private: */
|
||||||
__u32 fe_reserved[3];
|
__u32 fe_reserved[3];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct fiemap - file extent mappings
|
||||||
|
* @fm_start: byte offset (inclusive) at which to start mapping (in)
|
||||||
|
* @fm_length: logical length of mapping which userspace wants (in)
|
||||||
|
* @fm_flags: FIEMAP_FLAG_* flags for request (in/out)
|
||||||
|
* @fm_mapped_extents: number of extents that were mapped (out)
|
||||||
|
* @fm_extent_count: size of fm_extents array (in)
|
||||||
|
* @fm_extents: array of mapped extents (out)
|
||||||
|
*/
|
||||||
struct fiemap {
|
struct fiemap {
|
||||||
__u64 fm_start; /* logical offset (inclusive) at
|
__u64 fm_start;
|
||||||
* which to start mapping (in) */
|
__u64 fm_length;
|
||||||
__u64 fm_length; /* logical length of mapping which
|
__u32 fm_flags;
|
||||||
* userspace wants (in) */
|
__u32 fm_mapped_extents;
|
||||||
__u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
|
__u32 fm_extent_count;
|
||||||
__u32 fm_mapped_extents;/* number of extents that were mapped (out) */
|
/* private: */
|
||||||
__u32 fm_extent_count; /* size of fm_extents array (in) */
|
|
||||||
__u32 fm_reserved;
|
__u32 fm_reserved;
|
||||||
struct fiemap_extent fm_extents[]; /* array of mapped extents (out) */
|
/* public: */
|
||||||
|
struct fiemap_extent fm_extents[];
|
||||||
};
|
};
|
||||||
|
|
||||||
#define FIEMAP_MAX_OFFSET (~0ULL)
|
#define FIEMAP_MAX_OFFSET (~0ULL)
|
||||||
|
|
||||||
|
/* flags used in fm_flags: */
|
||||||
#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */
|
#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */
|
||||||
#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */
|
#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */
|
||||||
#define FIEMAP_FLAG_CACHE 0x00000004 /* request caching of the extents */
|
#define FIEMAP_FLAG_CACHE 0x00000004 /* request caching of the extents */
|
||||||
|
|
||||||
#define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR)
|
#define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR)
|
||||||
|
|
||||||
|
/* flags used in fe_flags: */
|
||||||
#define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */
|
#define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */
|
||||||
#define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */
|
#define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */
|
||||||
#define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending.
|
#define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending.
|
||||||
|
@ -332,9 +332,13 @@ typedef int __bitwise __kernel_rwf_t;
|
|||||||
/* Atomic Write */
|
/* Atomic Write */
|
||||||
#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040)
|
#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040)
|
||||||
|
|
||||||
|
/* buffered IO that drops the cache after reading or writing data */
|
||||||
|
#define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080)
|
||||||
|
|
||||||
/* mask of flags supported by the kernel */
|
/* mask of flags supported by the kernel */
|
||||||
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
|
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
|
||||||
RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC)
|
RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
|
||||||
|
RWF_DONTCACHE)
|
||||||
|
|
||||||
#define PROCFS_IOCTL_MAGIC 'f'
|
#define PROCFS_IOCTL_MAGIC 'f'
|
||||||
|
|
||||||
|
@ -71,7 +71,7 @@ static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe,
|
|||||||
bit /= WATCH_QUEUE_NOTE_SIZE;
|
bit /= WATCH_QUEUE_NOTE_SIZE;
|
||||||
|
|
||||||
page = buf->page;
|
page = buf->page;
|
||||||
bit += page->index;
|
bit += page->private;
|
||||||
|
|
||||||
set_bit(bit, wqueue->notes_bitmap);
|
set_bit(bit, wqueue->notes_bitmap);
|
||||||
generic_pipe_buf_release(pipe, buf);
|
generic_pipe_buf_release(pipe, buf);
|
||||||
@ -278,7 +278,7 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
|
|||||||
pages[i] = alloc_page(GFP_KERNEL);
|
pages[i] = alloc_page(GFP_KERNEL);
|
||||||
if (!pages[i])
|
if (!pages[i])
|
||||||
goto error_p;
|
goto error_p;
|
||||||
pages[i]->index = i * WATCH_QUEUE_NOTES_PER_PAGE;
|
pages[i]->private = i * WATCH_QUEUE_NOTES_PER_PAGE;
|
||||||
}
|
}
|
||||||
|
|
||||||
bitmap = bitmap_alloc(nr_notes, GFP_KERNEL);
|
bitmap = bitmap_alloc(nr_notes, GFP_KERNEL);
|
||||||
|
@ -3917,6 +3917,7 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
|
|||||||
int len;
|
int len;
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
struct folio *folio;
|
struct folio *folio;
|
||||||
|
char *link;
|
||||||
|
|
||||||
len = strlen(symname) + 1;
|
len = strlen(symname) + 1;
|
||||||
if (len > PAGE_SIZE)
|
if (len > PAGE_SIZE)
|
||||||
@ -3938,12 +3939,13 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
|
|||||||
|
|
||||||
inode->i_size = len-1;
|
inode->i_size = len-1;
|
||||||
if (len <= SHORT_SYMLINK_LEN) {
|
if (len <= SHORT_SYMLINK_LEN) {
|
||||||
inode->i_link = kmemdup(symname, len, GFP_KERNEL);
|
link = kmemdup(symname, len, GFP_KERNEL);
|
||||||
if (!inode->i_link) {
|
if (!link) {
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
goto out_remove_offset;
|
goto out_remove_offset;
|
||||||
}
|
}
|
||||||
inode->i_op = &shmem_short_symlink_operations;
|
inode->i_op = &shmem_short_symlink_operations;
|
||||||
|
inode_set_cached_link(inode, link, len - 1);
|
||||||
} else {
|
} else {
|
||||||
inode_nohighmem(inode);
|
inode_nohighmem(inode);
|
||||||
inode->i_mapping->a_ops = &shmem_aops;
|
inode->i_mapping->a_ops = &shmem_aops;
|
||||||
|
@ -2612,7 +2612,7 @@ static int policy_readlink(struct dentry *dentry, char __user *buffer,
|
|||||||
res = snprintf(name, sizeof(name), "%s:[%lu]", AAFS_NAME,
|
res = snprintf(name, sizeof(name), "%s:[%lu]", AAFS_NAME,
|
||||||
d_inode(dentry)->i_ino);
|
d_inode(dentry)->i_ino);
|
||||||
if (res > 0 && res < sizeof(name))
|
if (res > 0 && res < sizeof(name))
|
||||||
res = readlink_copy(buffer, buflen, name);
|
res = readlink_copy(buffer, buflen, name, strlen(name));
|
||||||
else
|
else
|
||||||
res = -ENOENT;
|
res = -ENOENT;
|
||||||
|
|
||||||
|
7
tools/testing/selftests/coredump/Makefile
Normal file
7
tools/testing/selftests/coredump/Makefile
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
# SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
CFLAGS = $(KHDR_INCLUDES)
|
||||||
|
|
||||||
|
TEST_GEN_PROGS := stackdump_test
|
||||||
|
TEST_FILES := stackdump
|
||||||
|
|
||||||
|
include ../lib.mk
|
50
tools/testing/selftests/coredump/README.rst
Normal file
50
tools/testing/selftests/coredump/README.rst
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
coredump selftest
|
||||||
|
=================
|
||||||
|
|
||||||
|
Background context
|
||||||
|
------------------
|
||||||
|
|
||||||
|
`coredump` is a feature which dumps a process's memory space when the process terminates
|
||||||
|
unexpectedly (e.g. due to segmentation fault), which can be useful for debugging. By default,
|
||||||
|
`coredump` dumps the memory to the file named `core`, but this behavior can be changed by writing a
|
||||||
|
different file name to `/proc/sys/kernel/core_pattern`. Furthermore, `coredump` can be piped to a
|
||||||
|
user-space program by writing the pipe symbol (`|`) followed by the command to be executed to
|
||||||
|
`/proc/sys/kernel/core_pattern`. For the full description, see `man 5 core`.
|
||||||
|
|
||||||
|
The piped user program may be interested in reading the stack pointers of the crashed process. The
|
||||||
|
crashed process's stack pointers can be read from `procfs`: it is the `kstkesp` field in
|
||||||
|
`/proc/$PID/stat`. See `man 5 proc` for all the details.
|
||||||
|
|
||||||
|
The problem
|
||||||
|
-----------
|
||||||
|
While a thread is active, the stack pointer is unsafe to read and therefore the `kstkesp` field
|
||||||
|
reads zero. But when the thread is dead (e.g. during a coredump), this field should have valid
|
||||||
|
value.
|
||||||
|
|
||||||
|
However, this was broken in the past and `kstkesp` was zero even during coredump:
|
||||||
|
|
||||||
|
* commit 0a1eb2d474ed ("fs/proc: Stop reporting eip and esp in /proc/PID/stat") changed kstkesp to
|
||||||
|
always be zero
|
||||||
|
|
||||||
|
* commit fd7d56270b52 ("fs/proc: Report eip/esp in /prod/PID/stat for coredumping") fixed it for the
|
||||||
|
coredumping thread. However, other threads in a coredumping process still had the problem.
|
||||||
|
|
||||||
|
* commit cb8f381f1613 ("fs/proc/array.c: allow reporting eip/esp for all coredumping threads") fixed
|
||||||
|
for all threads in a coredumping process.
|
||||||
|
|
||||||
|
* commit 92307383082d ("coredump: Don't perform any cleanups before dumping core") broke it again
|
||||||
|
for the other threads in a coredumping process.
|
||||||
|
|
||||||
|
The problem has been fixed now, but considering the history, it may appear again in the future.
|
||||||
|
|
||||||
|
The goal of this test
|
||||||
|
---------------------
|
||||||
|
This test detects problem with reading `kstkesp` during coredump by doing the following:
|
||||||
|
|
||||||
|
#. Tell the kernel to execute the "stackdump" script when a coredump happens. This script
|
||||||
|
reads the stack pointers of all threads of crashed processes.
|
||||||
|
|
||||||
|
#. Spawn a child process who creates some threads and then crashes.
|
||||||
|
|
||||||
|
#. Read the output from the "stackdump" script, and make sure all stack pointer values are
|
||||||
|
non-zero.
|
14
tools/testing/selftests/coredump/stackdump
Executable file
14
tools/testing/selftests/coredump/stackdump
Executable file
@ -0,0 +1,14 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
CRASH_PROGRAM_ID=$1
|
||||||
|
STACKDUMP_FILE=$2
|
||||||
|
|
||||||
|
TMP=$(mktemp)
|
||||||
|
|
||||||
|
for t in /proc/$CRASH_PROGRAM_ID/task/*; do
|
||||||
|
tid=$(basename $t)
|
||||||
|
cat /proc/$tid/stat | awk '{print $29}' >> $TMP
|
||||||
|
done
|
||||||
|
|
||||||
|
mv $TMP $STACKDUMP_FILE
|
151
tools/testing/selftests/coredump/stackdump_test.c
Normal file
151
tools/testing/selftests/coredump/stackdump_test.c
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <libgen.h>
|
||||||
|
#include <linux/limits.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/resource.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "../kselftest_harness.h"
|
||||||
|
|
||||||
|
#define STACKDUMP_FILE "stack_values"
|
||||||
|
#define STACKDUMP_SCRIPT "stackdump"
|
||||||
|
#define NUM_THREAD_SPAWN 128
|
||||||
|
|
||||||
|
static void *do_nothing(void *)
|
||||||
|
{
|
||||||
|
while (1)
|
||||||
|
pause();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void crashing_child(void)
|
||||||
|
{
|
||||||
|
pthread_t thread;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < NUM_THREAD_SPAWN; ++i)
|
||||||
|
pthread_create(&thread, NULL, do_nothing, NULL);
|
||||||
|
|
||||||
|
/* crash on purpose */
|
||||||
|
i = *(int *)NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
FIXTURE(coredump)
|
||||||
|
{
|
||||||
|
char original_core_pattern[256];
|
||||||
|
};
|
||||||
|
|
||||||
|
FIXTURE_SETUP(coredump)
|
||||||
|
{
|
||||||
|
char buf[PATH_MAX];
|
||||||
|
FILE *file;
|
||||||
|
char *dir;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
file = fopen("/proc/sys/kernel/core_pattern", "r");
|
||||||
|
ASSERT_NE(NULL, file);
|
||||||
|
|
||||||
|
ret = fread(self->original_core_pattern, 1, sizeof(self->original_core_pattern), file);
|
||||||
|
ASSERT_TRUE(ret || feof(file));
|
||||||
|
ASSERT_LT(ret, sizeof(self->original_core_pattern));
|
||||||
|
|
||||||
|
self->original_core_pattern[ret] = '\0';
|
||||||
|
|
||||||
|
ret = fclose(file);
|
||||||
|
ASSERT_EQ(0, ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
FIXTURE_TEARDOWN(coredump)
|
||||||
|
{
|
||||||
|
const char *reason;
|
||||||
|
FILE *file;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
unlink(STACKDUMP_FILE);
|
||||||
|
|
||||||
|
file = fopen("/proc/sys/kernel/core_pattern", "w");
|
||||||
|
if (!file) {
|
||||||
|
reason = "Unable to open core_pattern";
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = fprintf(file, "%s", self->original_core_pattern);
|
||||||
|
if (ret < 0) {
|
||||||
|
reason = "Unable to write to core_pattern";
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = fclose(file);
|
||||||
|
if (ret) {
|
||||||
|
reason = "Unable to close core_pattern";
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
fail:
|
||||||
|
/* This should never happen */
|
||||||
|
fprintf(stderr, "Failed to cleanup stackdump test: %s\n", reason);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(coredump, stackdump)
|
||||||
|
{
|
||||||
|
struct sigaction action = {};
|
||||||
|
unsigned long long stack;
|
||||||
|
char *test_dir, *line;
|
||||||
|
size_t line_length;
|
||||||
|
char buf[PATH_MAX];
|
||||||
|
int ret, i;
|
||||||
|
FILE *file;
|
||||||
|
pid_t pid;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Step 1: Setup core_pattern so that the stackdump script is executed when the child
|
||||||
|
* process crashes
|
||||||
|
*/
|
||||||
|
ret = readlink("/proc/self/exe", buf, sizeof(buf));
|
||||||
|
ASSERT_NE(-1, ret);
|
||||||
|
ASSERT_LT(ret, sizeof(buf));
|
||||||
|
buf[ret] = '\0';
|
||||||
|
|
||||||
|
test_dir = dirname(buf);
|
||||||
|
|
||||||
|
file = fopen("/proc/sys/kernel/core_pattern", "w");
|
||||||
|
ASSERT_NE(NULL, file);
|
||||||
|
|
||||||
|
ret = fprintf(file, "|%1$s/%2$s %%P %1$s/%3$s", test_dir, STACKDUMP_SCRIPT, STACKDUMP_FILE);
|
||||||
|
ASSERT_LT(0, ret);
|
||||||
|
|
||||||
|
ret = fclose(file);
|
||||||
|
ASSERT_EQ(0, ret);
|
||||||
|
|
||||||
|
/* Step 2: Create a process who spawns some threads then crashes */
|
||||||
|
pid = fork();
|
||||||
|
ASSERT_TRUE(pid >= 0);
|
||||||
|
if (pid == 0)
|
||||||
|
crashing_child();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Step 3: Wait for the stackdump script to write the stack pointers to the stackdump file
|
||||||
|
*/
|
||||||
|
for (i = 0; i < 10; ++i) {
|
||||||
|
file = fopen(STACKDUMP_FILE, "r");
|
||||||
|
if (file)
|
||||||
|
break;
|
||||||
|
sleep(1);
|
||||||
|
}
|
||||||
|
ASSERT_NE(file, NULL);
|
||||||
|
|
||||||
|
/* Step 4: Make sure all stack pointer values are non-zero */
|
||||||
|
for (i = 0; -1 != getline(&line, &line_length, file); ++i) {
|
||||||
|
stack = strtoull(line, NULL, 10);
|
||||||
|
ASSERT_NE(stack, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_EQ(i, 1 + NUM_THREAD_SPAWN);
|
||||||
|
|
||||||
|
fclose(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_HARNESS_MAIN
|
Loading…
x
Reference in New Issue
Block a user