fs/proc: Introduce /proc/pid/timens_offsets

API to set time namespace offsets for children processes, i.e.:
echo "$clockid $offset_sec $offset_nsec" > /proc/self/timens_offsets

Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20191112012724.250792-28-dima@arista.com
This commit is contained in:
Andrei Vagin 2019-11-12 01:27:16 +00:00 committed by Thomas Gleixner
parent 70ddf65184
commit 04a8682a71
3 changed files with 205 additions and 0 deletions

View File

@ -94,6 +94,7 @@
#include <linux/sched/debug.h> #include <linux/sched/debug.h>
#include <linux/sched/stat.h> #include <linux/sched/stat.h>
#include <linux/posix-timers.h> #include <linux/posix-timers.h>
#include <linux/time_namespace.h>
#include <trace/events/oom.h> #include <trace/events/oom.h>
#include "internal.h" #include "internal.h"
#include "fd.h" #include "fd.h"
@ -1533,6 +1534,96 @@ static const struct file_operations proc_pid_sched_autogroup_operations = {
#endif /* CONFIG_SCHED_AUTOGROUP */ #endif /* CONFIG_SCHED_AUTOGROUP */
#ifdef CONFIG_TIME_NS
static int timens_offsets_show(struct seq_file *m, void *v)
{
struct task_struct *p;
p = get_proc_task(file_inode(m->file));
if (!p)
return -ESRCH;
proc_timens_show_offsets(p, m);
put_task_struct(p);
return 0;
}
static ssize_t timens_offsets_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct inode *inode = file_inode(file);
struct proc_timens_offset offsets[2];
char *kbuf = NULL, *pos, *next_line;
struct task_struct *p;
int ret, noffsets;
/* Only allow < page size writes at the beginning of the file */
if ((*ppos != 0) || (count >= PAGE_SIZE))
return -EINVAL;
/* Slurp in the user data */
kbuf = memdup_user_nul(buf, count);
if (IS_ERR(kbuf))
return PTR_ERR(kbuf);
/* Parse the user data */
ret = -EINVAL;
noffsets = 0;
for (pos = kbuf; pos; pos = next_line) {
struct proc_timens_offset *off = &offsets[noffsets];
int err;
/* Find the end of line and ensure we don't look past it */
next_line = strchr(pos, '\n');
if (next_line) {
*next_line = '\0';
next_line++;
if (*next_line == '\0')
next_line = NULL;
}
err = sscanf(pos, "%u %lld %lu", &off->clockid,
&off->val.tv_sec, &off->val.tv_nsec);
if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC)
goto out;
noffsets++;
if (noffsets == ARRAY_SIZE(offsets)) {
if (next_line)
count = next_line - kbuf;
break;
}
}
ret = -ESRCH;
p = get_proc_task(inode);
if (!p)
goto out;
ret = proc_timens_set_offset(file, p, offsets, noffsets);
put_task_struct(p);
if (ret)
goto out;
ret = count;
out:
kfree(kbuf);
return ret;
}
static int timens_offsets_open(struct inode *inode, struct file *filp)
{
return single_open(filp, timens_offsets_show, inode);
}
static const struct file_operations proc_timens_offsets_operations = {
.open = timens_offsets_open,
.read = seq_read,
.write = timens_offsets_write,
.llseek = seq_lseek,
.release = single_release,
};
#endif /* CONFIG_TIME_NS */
static ssize_t comm_write(struct file *file, const char __user *buf, static ssize_t comm_write(struct file *file, const char __user *buf,
size_t count, loff_t *offset) size_t count, loff_t *offset)
{ {
@ -3015,6 +3106,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#endif #endif
#ifdef CONFIG_SCHED_AUTOGROUP #ifdef CONFIG_SCHED_AUTOGROUP
REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
#endif
#ifdef CONFIG_TIME_NS
REG("timens_offsets", S_IRUGO|S_IWUSR, proc_timens_offsets_operations),
#endif #endif
REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK #ifdef CONFIG_HAVE_ARCH_TRACEHOOK

View File

@ -52,6 +52,16 @@ static inline void put_time_ns(struct time_namespace *ns)
kref_put(&ns->kref, free_time_ns); kref_put(&ns->kref, free_time_ns);
} }
void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m);
struct proc_timens_offset {
int clockid;
struct timespec64 val;
};
int proc_timens_set_offset(struct file *file, struct task_struct *p,
struct proc_timens_offset *offsets, int n);
static inline void timens_add_monotonic(struct timespec64 *ts) static inline void timens_add_monotonic(struct timespec64 *ts)
{ {
struct timens_offsets *ns_offsets = &current->nsproxy->time_ns->offsets; struct timens_offsets *ns_offsets = &current->nsproxy->time_ns->offsets;

View File

@ -8,6 +8,7 @@
#include <linux/user_namespace.h> #include <linux/user_namespace.h>
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/sched/task.h> #include <linux/sched/task.h>
#include <linux/seq_file.h>
#include <linux/proc_ns.h> #include <linux/proc_ns.h>
#include <linux/export.h> #include <linux/export.h>
#include <linux/time.h> #include <linux/time.h>
@ -334,6 +335,106 @@ static struct user_namespace *timens_owner(struct ns_common *ns)
return to_time_ns(ns)->user_ns; return to_time_ns(ns)->user_ns;
} }
static void show_offset(struct seq_file *m, int clockid, struct timespec64 *ts)
{
seq_printf(m, "%d %lld %ld\n", clockid, ts->tv_sec, ts->tv_nsec);
}
void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m)
{
struct ns_common *ns;
struct time_namespace *time_ns;
ns = timens_for_children_get(p);
if (!ns)
return;
time_ns = to_time_ns(ns);
show_offset(m, CLOCK_MONOTONIC, &time_ns->offsets.monotonic);
show_offset(m, CLOCK_BOOTTIME, &time_ns->offsets.boottime);
put_time_ns(time_ns);
}
int proc_timens_set_offset(struct file *file, struct task_struct *p,
struct proc_timens_offset *offsets, int noffsets)
{
struct ns_common *ns;
struct time_namespace *time_ns;
struct timespec64 tp;
int i, err;
ns = timens_for_children_get(p);
if (!ns)
return -ESRCH;
time_ns = to_time_ns(ns);
if (!file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME)) {
put_time_ns(time_ns);
return -EPERM;
}
for (i = 0; i < noffsets; i++) {
struct proc_timens_offset *off = &offsets[i];
switch (off->clockid) {
case CLOCK_MONOTONIC:
ktime_get_ts64(&tp);
break;
case CLOCK_BOOTTIME:
ktime_get_boottime_ts64(&tp);
break;
default:
err = -EINVAL;
goto out;
}
err = -ERANGE;
if (off->val.tv_sec > KTIME_SEC_MAX ||
off->val.tv_sec < -KTIME_SEC_MAX)
goto out;
tp = timespec64_add(tp, off->val);
/*
* KTIME_SEC_MAX is divided by 2 to be sure that KTIME_MAX is
* still unreachable.
*/
if (tp.tv_sec < 0 || tp.tv_sec > KTIME_SEC_MAX / 2)
goto out;
}
mutex_lock(&offset_lock);
if (time_ns->frozen_offsets) {
err = -EACCES;
goto out_unlock;
}
err = 0;
/* Don't report errors after this line */
for (i = 0; i < noffsets; i++) {
struct proc_timens_offset *off = &offsets[i];
struct timespec64 *offset = NULL;
switch (off->clockid) {
case CLOCK_MONOTONIC:
offset = &time_ns->offsets.monotonic;
break;
case CLOCK_BOOTTIME:
offset = &time_ns->offsets.boottime;
break;
}
*offset = off->val;
}
out_unlock:
mutex_unlock(&offset_lock);
out:
put_time_ns(time_ns);
return err;
}
const struct proc_ns_operations timens_operations = { const struct proc_ns_operations timens_operations = {
.name = "time", .name = "time",
.type = CLONE_NEWTIME, .type = CLONE_NEWTIME,