mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-01 10:42:11 +00:00
mm/madvise: unrestrict process_madvise() for current process
The process_madvise() call was introduced in commitecb8ac8b1f
("mm/madvise: introduce process_madvise() syscall: an external memory hinting API") as a means of performing madvise() operations on another process. However, as it provides the means by which to perform multiple madvise() operations in a batch via an iovec, it is useful to utilise the same interface for performing operations on the current process rather than a remote one. Commit22af8caff7
("mm/madvise: process_madvise() drop capability check if same mm") removed the need for a caller invoking process_madvise() on its own pidfd to possess the CAP_SYS_NICE capability, however this leaves the restrictions on operation in place. Resolve this by only applying the restriction on operations when accessing a remote process. Moving forward we plan to implement a simpler means of specifying this condition other than needing to establish a self pidfd, perhaps in the form of a sentinel pidfd. Also take the opportunity to refactor the system call implementation abstracting the vectorised operation. Link: https://lkml.kernel.org/r/20240926151019.82902-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Acked-by: Shakeel Butt <shakeel.butt@linux.dev> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christian Brauner <brauner@kernel.org> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Pedro Falcato <pedro.falcato@gmail.com> Cc: Suren Baghdasaryan <surenb@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
f33cea94e3
commit
021781b012
55
mm/madvise.c
55
mm/madvise.c
@ -1208,7 +1208,8 @@ madvise_behavior_valid(int behavior)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool process_madvise_behavior_valid(int behavior)
|
/* Can we invoke process_madvise() on a remote mm for the specified behavior? */
|
||||||
|
static bool process_madvise_remote_valid(int behavior)
|
||||||
{
|
{
|
||||||
switch (behavior) {
|
switch (behavior) {
|
||||||
case MADV_COLD:
|
case MADV_COLD:
|
||||||
@ -1477,6 +1478,28 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
|
|||||||
return do_madvise(current->mm, start, len_in, behavior);
|
return do_madvise(current->mm, start, len_in, behavior);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Perform an madvise operation over a vector of addresses and lengths. */
|
||||||
|
static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter,
|
||||||
|
int behavior)
|
||||||
|
{
|
||||||
|
ssize_t ret = 0;
|
||||||
|
size_t total_len;
|
||||||
|
|
||||||
|
total_len = iov_iter_count(iter);
|
||||||
|
|
||||||
|
while (iov_iter_count(iter)) {
|
||||||
|
ret = do_madvise(mm, (unsigned long)iter_iov_addr(iter),
|
||||||
|
iter_iov_len(iter), behavior);
|
||||||
|
if (ret < 0)
|
||||||
|
break;
|
||||||
|
iov_iter_advance(iter, iter_iov_len(iter));
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = (total_len - iov_iter_count(iter)) ? : ret;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
|
SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
|
||||||
size_t, vlen, int, behavior, unsigned int, flags)
|
size_t, vlen, int, behavior, unsigned int, flags)
|
||||||
{
|
{
|
||||||
@ -1486,7 +1509,6 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
|
|||||||
struct iov_iter iter;
|
struct iov_iter iter;
|
||||||
struct task_struct *task;
|
struct task_struct *task;
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm;
|
||||||
size_t total_len;
|
|
||||||
unsigned int f_flags;
|
unsigned int f_flags;
|
||||||
|
|
||||||
if (flags != 0) {
|
if (flags != 0) {
|
||||||
@ -1504,11 +1526,6 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
|
|||||||
goto free_iov;
|
goto free_iov;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!process_madvise_behavior_valid(behavior)) {
|
|
||||||
ret = -EINVAL;
|
|
||||||
goto release_task;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
|
/* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
|
||||||
mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
|
mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
|
||||||
if (IS_ERR(mm)) {
|
if (IS_ERR(mm)) {
|
||||||
@ -1516,26 +1533,26 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
|
|||||||
goto release_task;
|
goto release_task;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need only perform this check if we are attempting to manipulate a
|
||||||
|
* remote process's address space.
|
||||||
|
*/
|
||||||
|
if (mm != current->mm && !process_madvise_remote_valid(behavior)) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto release_mm;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Require CAP_SYS_NICE for influencing process performance. Note that
|
* Require CAP_SYS_NICE for influencing process performance. Note that
|
||||||
* only non-destructive hints are currently supported.
|
* only non-destructive hints are currently supported for remote
|
||||||
|
* processes.
|
||||||
*/
|
*/
|
||||||
if (mm != current->mm && !capable(CAP_SYS_NICE)) {
|
if (mm != current->mm && !capable(CAP_SYS_NICE)) {
|
||||||
ret = -EPERM;
|
ret = -EPERM;
|
||||||
goto release_mm;
|
goto release_mm;
|
||||||
}
|
}
|
||||||
|
|
||||||
total_len = iov_iter_count(&iter);
|
ret = vector_madvise(mm, &iter, behavior);
|
||||||
|
|
||||||
while (iov_iter_count(&iter)) {
|
|
||||||
ret = do_madvise(mm, (unsigned long)iter_iov_addr(&iter),
|
|
||||||
iter_iov_len(&iter), behavior);
|
|
||||||
if (ret < 0)
|
|
||||||
break;
|
|
||||||
iov_iter_advance(&iter, iter_iov_len(&iter));
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = (total_len - iov_iter_count(&iter)) ? : ret;
|
|
||||||
|
|
||||||
release_mm:
|
release_mm:
|
||||||
mmput(mm);
|
mmput(mm);
|
||||||
|
Loading…
Reference in New Issue
Block a user