2019-06-01 10:08:55 +02:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2016-07-30 13:58:49 -05:00
|
|
|
|
|
|
|
#include <linux/stat.h>
|
|
|
|
#include <linux/sysctl.h>
|
|
|
|
#include <linux/slab.h>
|
2017-02-02 17:54:15 +01:00
|
|
|
#include <linux/cred.h>
|
2016-08-08 13:54:50 -05:00
|
|
|
#include <linux/hash.h>
|
2018-04-05 16:25:34 -07:00
|
|
|
#include <linux/kmemleak.h>
|
2016-07-30 13:58:49 -05:00
|
|
|
#include <linux/user_namespace.h>
|
|
|
|
|
2021-04-22 14:27:09 +02:00
|
|
|
struct ucounts init_ucounts = {
|
|
|
|
.ns = &init_user_ns,
|
|
|
|
.uid = GLOBAL_ROOT_UID,
|
2021-04-22 14:27:10 +02:00
|
|
|
.count = ATOMIC_INIT(1),
|
2021-04-22 14:27:09 +02:00
|
|
|
};
|
|
|
|
|
2016-08-08 13:54:50 -05:00
|
|
|
#define UCOUNTS_HASHTABLE_BITS 10
|
|
|
|
static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)];
|
|
|
|
static DEFINE_SPINLOCK(ucounts_lock);
|
|
|
|
|
|
|
|
#define ucounts_hashfn(ns, uid) \
|
|
|
|
hash_long((unsigned long)__kuid_val(uid) + (unsigned long)(ns), \
|
|
|
|
UCOUNTS_HASHTABLE_BITS)
|
|
|
|
#define ucounts_hashentry(ns, uid) \
|
|
|
|
(ucounts_hashtable + ucounts_hashfn(ns, uid))
|
|
|
|
|
|
|
|
|
2016-07-30 13:58:49 -05:00
|
|
|
#ifdef CONFIG_SYSCTL
|
|
|
|
static struct ctl_table_set *
|
|
|
|
set_lookup(struct ctl_table_root *root)
|
|
|
|
{
|
|
|
|
return ¤t_user_ns()->set;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int set_is_seen(struct ctl_table_set *set)
|
|
|
|
{
|
|
|
|
return ¤t_user_ns()->set == set;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int set_permissions(struct ctl_table_header *head,
|
|
|
|
struct ctl_table *table)
|
|
|
|
{
|
|
|
|
struct user_namespace *user_ns =
|
|
|
|
container_of(head->set, struct user_namespace, set);
|
|
|
|
int mode;
|
|
|
|
|
|
|
|
/* Allow users with CAP_SYS_RESOURCE unrestrained access */
|
|
|
|
if (ns_capable(user_ns, CAP_SYS_RESOURCE))
|
|
|
|
mode = (table->mode & S_IRWXU) >> 6;
|
|
|
|
else
|
|
|
|
/* Allow all others at most read-only access */
|
|
|
|
mode = table->mode & S_IROTH;
|
|
|
|
return (mode << 6) | (mode << 3) | mode;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ctl_table_root set_root = {
|
|
|
|
.lookup = set_lookup,
|
|
|
|
.permissions = set_permissions,
|
|
|
|
};
|
|
|
|
|
2021-07-30 08:28:54 +02:00
|
|
|
static long ue_zero = 0;
|
|
|
|
static long ue_int_max = INT_MAX;
|
|
|
|
|
|
|
|
#define UCOUNT_ENTRY(name) \
|
|
|
|
{ \
|
|
|
|
.procname = name, \
|
|
|
|
.maxlen = sizeof(long), \
|
|
|
|
.mode = 0644, \
|
|
|
|
.proc_handler = proc_doulongvec_minmax, \
|
|
|
|
.extra1 = &ue_zero, \
|
|
|
|
.extra2 = &ue_int_max, \
|
2016-08-08 14:41:52 -05:00
|
|
|
}
|
2016-08-08 13:54:50 -05:00
|
|
|
static struct ctl_table user_table[] = {
|
2016-08-08 14:41:52 -05:00
|
|
|
UCOUNT_ENTRY("max_user_namespaces"),
|
2016-08-08 14:08:36 -05:00
|
|
|
UCOUNT_ENTRY("max_pid_namespaces"),
|
2016-08-08 14:11:25 -05:00
|
|
|
UCOUNT_ENTRY("max_uts_namespaces"),
|
2016-08-08 14:20:23 -05:00
|
|
|
UCOUNT_ENTRY("max_ipc_namespaces"),
|
2016-08-08 14:33:23 -05:00
|
|
|
UCOUNT_ENTRY("max_net_namespaces"),
|
2016-08-08 14:37:37 -05:00
|
|
|
UCOUNT_ENTRY("max_mnt_namespaces"),
|
2016-08-08 14:25:30 -05:00
|
|
|
UCOUNT_ENTRY("max_cgroup_namespaces"),
|
2020-04-06 18:13:42 +01:00
|
|
|
UCOUNT_ENTRY("max_time_namespaces"),
|
2016-12-14 15:56:33 +02:00
|
|
|
#ifdef CONFIG_INOTIFY_USER
|
|
|
|
UCOUNT_ENTRY("max_inotify_instances"),
|
|
|
|
UCOUNT_ENTRY("max_inotify_watches"),
|
2021-03-04 13:29:20 +02:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_FANOTIFY
|
|
|
|
UCOUNT_ENTRY("max_fanotify_groups"),
|
|
|
|
UCOUNT_ENTRY("max_fanotify_marks"),
|
2016-12-14 15:56:33 +02:00
|
|
|
#endif
|
2021-04-22 14:27:13 +02:00
|
|
|
{ },
|
2021-04-22 14:27:12 +02:00
|
|
|
{ },
|
2021-04-22 14:27:11 +02:00
|
|
|
{ },
|
2021-04-22 14:27:14 +02:00
|
|
|
{ },
|
2016-07-30 13:58:49 -05:00
|
|
|
{ }
|
|
|
|
};
|
|
|
|
#endif /* CONFIG_SYSCTL */
|
|
|
|
|
|
|
|
bool setup_userns_sysctls(struct user_namespace *ns)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
|
|
struct ctl_table *tbl;
|
2020-04-07 17:46:43 +02:00
|
|
|
|
|
|
|
BUILD_BUG_ON(ARRAY_SIZE(user_table) != UCOUNT_COUNTS + 1);
|
2016-07-30 13:58:49 -05:00
|
|
|
setup_sysctl_set(&ns->set, &set_root, set_is_seen);
|
2016-08-08 13:54:50 -05:00
|
|
|
tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL);
|
2016-07-30 13:58:49 -05:00
|
|
|
if (tbl) {
|
2016-08-08 14:41:52 -05:00
|
|
|
int i;
|
|
|
|
for (i = 0; i < UCOUNT_COUNTS; i++) {
|
|
|
|
tbl[i].data = &ns->ucount_max[i];
|
|
|
|
}
|
2016-08-08 13:54:50 -05:00
|
|
|
ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl);
|
2016-07-30 13:58:49 -05:00
|
|
|
}
|
|
|
|
if (!ns->sysctls) {
|
|
|
|
kfree(tbl);
|
|
|
|
retire_sysctl_set(&ns->set);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void retire_userns_sysctls(struct user_namespace *ns)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
|
|
struct ctl_table *tbl;
|
|
|
|
|
|
|
|
tbl = ns->sysctls->ctl_table_arg;
|
|
|
|
unregister_sysctl_table(ns->sysctls);
|
|
|
|
retire_sysctl_set(&ns->set);
|
|
|
|
kfree(tbl);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2016-08-08 13:54:50 -05:00
|
|
|
static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent)
|
|
|
|
{
|
|
|
|
struct ucounts *ucounts;
|
|
|
|
|
|
|
|
hlist_for_each_entry(ucounts, hashent, node) {
|
|
|
|
if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns))
|
|
|
|
return ucounts;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-04-22 14:27:09 +02:00
|
|
|
static void hlist_add_ucounts(struct ucounts *ucounts)
|
|
|
|
{
|
|
|
|
struct hlist_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid);
|
|
|
|
spin_lock_irq(&ucounts_lock);
|
|
|
|
hlist_add_head(&ucounts->node, hashent);
|
|
|
|
spin_unlock_irq(&ucounts_lock);
|
|
|
|
}
|
|
|
|
|
2021-10-16 14:05:34 -05:00
|
|
|
static inline bool get_ucounts_or_wrap(struct ucounts *ucounts)
|
|
|
|
{
|
|
|
|
/* Returns true on a successful get, false if the count wraps. */
|
|
|
|
return !atomic_add_negative(1, &ucounts->count);
|
|
|
|
}
|
|
|
|
|
2021-04-22 14:27:10 +02:00
|
|
|
struct ucounts *get_ucounts(struct ucounts *ucounts)
|
|
|
|
{
|
2021-10-16 14:05:34 -05:00
|
|
|
if (!get_ucounts_or_wrap(ucounts)) {
|
2021-04-22 14:27:10 +02:00
|
|
|
put_ucounts(ucounts);
|
|
|
|
ucounts = NULL;
|
|
|
|
}
|
|
|
|
return ucounts;
|
|
|
|
}
|
|
|
|
|
2021-04-22 14:27:09 +02:00
|
|
|
struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
|
2016-08-08 13:54:50 -05:00
|
|
|
{
|
|
|
|
struct hlist_head *hashent = ucounts_hashentry(ns, uid);
|
|
|
|
struct ucounts *ucounts, *new;
|
2021-10-16 14:05:34 -05:00
|
|
|
bool wrapped;
|
2016-08-08 13:54:50 -05:00
|
|
|
|
2017-01-20 15:21:35 +02:00
|
|
|
spin_lock_irq(&ucounts_lock);
|
2016-08-08 13:54:50 -05:00
|
|
|
ucounts = find_ucounts(ns, uid, hashent);
|
|
|
|
if (!ucounts) {
|
2017-01-20 15:21:35 +02:00
|
|
|
spin_unlock_irq(&ucounts_lock);
|
2016-08-08 13:54:50 -05:00
|
|
|
|
|
|
|
new = kzalloc(sizeof(*new), GFP_KERNEL);
|
|
|
|
if (!new)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
new->ns = ns;
|
|
|
|
new->uid = uid;
|
2021-04-22 14:27:10 +02:00
|
|
|
atomic_set(&new->count, 1);
|
2016-08-08 13:54:50 -05:00
|
|
|
|
2017-01-20 15:21:35 +02:00
|
|
|
spin_lock_irq(&ucounts_lock);
|
2016-08-08 13:54:50 -05:00
|
|
|
ucounts = find_ucounts(ns, uid, hashent);
|
|
|
|
if (ucounts) {
|
|
|
|
kfree(new);
|
|
|
|
} else {
|
|
|
|
hlist_add_head(&new->node, hashent);
|
2022-01-24 12:46:50 -06:00
|
|
|
get_user_ns(new->ns);
|
2021-04-22 14:27:10 +02:00
|
|
|
spin_unlock_irq(&ucounts_lock);
|
|
|
|
return new;
|
2016-08-08 13:54:50 -05:00
|
|
|
}
|
|
|
|
}
|
2021-10-16 14:05:34 -05:00
|
|
|
wrapped = !get_ucounts_or_wrap(ucounts);
|
2017-01-20 15:21:35 +02:00
|
|
|
spin_unlock_irq(&ucounts_lock);
|
2021-10-16 14:05:34 -05:00
|
|
|
if (wrapped) {
|
ucounts: Fix race condition between alloc_ucounts and put_ucounts
The race happens because put_ucounts() doesn't use spinlock and
get_ucounts is not under spinlock:
CPU0 CPU1
---- ----
alloc_ucounts() put_ucounts()
spin_lock_irq(&ucounts_lock);
ucounts = find_ucounts(ns, uid, hashent);
atomic_dec_and_test(&ucounts->count))
spin_unlock_irq(&ucounts_lock);
spin_lock_irqsave(&ucounts_lock, flags);
hlist_del_init(&ucounts->node);
spin_unlock_irqrestore(&ucounts_lock, flags);
kfree(ucounts);
ucounts = get_ucounts(ucounts);
==================================================================
BUG: KASAN: use-after-free in instrument_atomic_read_write include/linux/instrumented.h:101 [inline]
BUG: KASAN: use-after-free in atomic_add_negative include/asm-generic/atomic-instrumented.h:556 [inline]
BUG: KASAN: use-after-free in get_ucounts kernel/ucount.c:152 [inline]
BUG: KASAN: use-after-free in get_ucounts kernel/ucount.c:150 [inline]
BUG: KASAN: use-after-free in alloc_ucounts+0x19b/0x5b0 kernel/ucount.c:188
Write of size 4 at addr ffff88802821e41c by task syz-executor.4/16785
CPU: 1 PID: 16785 Comm: syz-executor.4 Not tainted 5.14.0-rc1-next-20210712-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:105
print_address_description.constprop.0.cold+0x6c/0x309 mm/kasan/report.c:233
__kasan_report mm/kasan/report.c:419 [inline]
kasan_report.cold+0x83/0xdf mm/kasan/report.c:436
check_region_inline mm/kasan/generic.c:183 [inline]
kasan_check_range+0x13d/0x180 mm/kasan/generic.c:189
instrument_atomic_read_write include/linux/instrumented.h:101 [inline]
atomic_add_negative include/asm-generic/atomic-instrumented.h:556 [inline]
get_ucounts kernel/ucount.c:152 [inline]
get_ucounts kernel/ucount.c:150 [inline]
alloc_ucounts+0x19b/0x5b0 kernel/ucount.c:188
set_cred_ucounts+0x171/0x3a0 kernel/cred.c:684
__sys_setuid+0x285/0x400 kernel/sys.c:623
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x4665d9
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fde54097188 EFLAGS: 00000246 ORIG_RAX: 0000000000000069
RAX: ffffffffffffffda RBX: 000000000056bf80 RCX: 00000000004665d9
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000000000ff
RBP: 00000000004bfcb9 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 000000000056bf80
R13: 00007ffc8655740f R14: 00007fde54097300 R15: 0000000000022000
Allocated by task 16784:
kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
kasan_set_track mm/kasan/common.c:46 [inline]
set_alloc_info mm/kasan/common.c:434 [inline]
____kasan_kmalloc mm/kasan/common.c:513 [inline]
____kasan_kmalloc mm/kasan/common.c:472 [inline]
__kasan_kmalloc+0x9b/0xd0 mm/kasan/common.c:522
kmalloc include/linux/slab.h:591 [inline]
kzalloc include/linux/slab.h:721 [inline]
alloc_ucounts+0x23d/0x5b0 kernel/ucount.c:169
set_cred_ucounts+0x171/0x3a0 kernel/cred.c:684
__sys_setuid+0x285/0x400 kernel/sys.c:623
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
Freed by task 16785:
kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
kasan_set_track+0x1c/0x30 mm/kasan/common.c:46
kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:360
____kasan_slab_free mm/kasan/common.c:366 [inline]
____kasan_slab_free mm/kasan/common.c:328 [inline]
__kasan_slab_free+0xfb/0x130 mm/kasan/common.c:374
kasan_slab_free include/linux/kasan.h:229 [inline]
slab_free_hook mm/slub.c:1650 [inline]
slab_free_freelist_hook+0xdf/0x240 mm/slub.c:1675
slab_free mm/slub.c:3235 [inline]
kfree+0xeb/0x650 mm/slub.c:4295
put_ucounts kernel/ucount.c:200 [inline]
put_ucounts+0x117/0x150 kernel/ucount.c:192
put_cred_rcu+0x27a/0x520 kernel/cred.c:124
rcu_do_batch kernel/rcu/tree.c:2550 [inline]
rcu_core+0x7ab/0x1380 kernel/rcu/tree.c:2785
__do_softirq+0x29b/0x9c2 kernel/softirq.c:558
Last potentially related work creation:
kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
kasan_record_aux_stack+0xe5/0x110 mm/kasan/generic.c:348
insert_work+0x48/0x370 kernel/workqueue.c:1332
__queue_work+0x5c1/0xed0 kernel/workqueue.c:1498
queue_work_on+0xee/0x110 kernel/workqueue.c:1525
queue_work include/linux/workqueue.h:507 [inline]
call_usermodehelper_exec+0x1f0/0x4c0 kernel/umh.c:435
kobject_uevent_env+0xf8f/0x1650 lib/kobject_uevent.c:618
netdev_queue_add_kobject net/core/net-sysfs.c:1621 [inline]
netdev_queue_update_kobjects+0x374/0x450 net/core/net-sysfs.c:1655
register_queue_kobjects net/core/net-sysfs.c:1716 [inline]
netdev_register_kobject+0x35a/0x430 net/core/net-sysfs.c:1959
register_netdevice+0xd33/0x1500 net/core/dev.c:10331
nsim_init_netdevsim drivers/net/netdevsim/netdev.c:317 [inline]
nsim_create+0x381/0x4d0 drivers/net/netdevsim/netdev.c:364
__nsim_dev_port_add+0x32e/0x830 drivers/net/netdevsim/dev.c:1295
nsim_dev_port_add_all+0x53/0x150 drivers/net/netdevsim/dev.c:1355
nsim_dev_probe+0xcb5/0x1190 drivers/net/netdevsim/dev.c:1496
call_driver_probe drivers/base/dd.c:517 [inline]
really_probe+0x23c/0xcd0 drivers/base/dd.c:595
__driver_probe_device+0x338/0x4d0 drivers/base/dd.c:747
driver_probe_device+0x4c/0x1a0 drivers/base/dd.c:777
__device_attach_driver+0x20b/0x2f0 drivers/base/dd.c:894
bus_for_each_drv+0x15f/0x1e0 drivers/base/bus.c:427
__device_attach+0x228/0x4a0 drivers/base/dd.c:965
bus_probe_device+0x1e4/0x290 drivers/base/bus.c:487
device_add+0xc2f/0x2180 drivers/base/core.c:3356
nsim_bus_dev_new drivers/net/netdevsim/bus.c:431 [inline]
new_device_store+0x436/0x710 drivers/net/netdevsim/bus.c:298
bus_attr_store+0x72/0xa0 drivers/base/bus.c:122
sysfs_kf_write+0x110/0x160 fs/sysfs/file.c:139
kernfs_fop_write_iter+0x342/0x500 fs/kernfs/file.c:296
call_write_iter include/linux/fs.h:2152 [inline]
new_sync_write+0x426/0x650 fs/read_write.c:518
vfs_write+0x75a/0xa40 fs/read_write.c:605
ksys_write+0x12d/0x250 fs/read_write.c:658
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
Second to last potentially related work creation:
kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
kasan_record_aux_stack+0xe5/0x110 mm/kasan/generic.c:348
insert_work+0x48/0x370 kernel/workqueue.c:1332
__queue_work+0x5c1/0xed0 kernel/workqueue.c:1498
queue_work_on+0xee/0x110 kernel/workqueue.c:1525
queue_work include/linux/workqueue.h:507 [inline]
call_usermodehelper_exec+0x1f0/0x4c0 kernel/umh.c:435
kobject_uevent_env+0xf8f/0x1650 lib/kobject_uevent.c:618
kobject_synth_uevent+0x701/0x850 lib/kobject_uevent.c:208
uevent_store+0x20/0x50 drivers/base/core.c:2371
dev_attr_store+0x50/0x80 drivers/base/core.c:2072
sysfs_kf_write+0x110/0x160 fs/sysfs/file.c:139
kernfs_fop_write_iter+0x342/0x500 fs/kernfs/file.c:296
call_write_iter include/linux/fs.h:2152 [inline]
new_sync_write+0x426/0x650 fs/read_write.c:518
vfs_write+0x75a/0xa40 fs/read_write.c:605
ksys_write+0x12d/0x250 fs/read_write.c:658
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
The buggy address belongs to the object at ffff88802821e400
which belongs to the cache kmalloc-192 of size 192
The buggy address is located 28 bytes inside of
192-byte region [ffff88802821e400, ffff88802821e4c0)
The buggy address belongs to the page:
page:ffffea0000a08780 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x2821e
flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff)
raw: 00fff00000000200 dead000000000100 dead000000000122 ffff888010841a00
raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected
page_owner tracks the page as allocated
page last allocated via order 0, migratetype Unmovable, gfp_mask 0x12cc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY), pid 1, ts 12874702440, free_ts 12637793385
prep_new_page mm/page_alloc.c:2433 [inline]
get_page_from_freelist+0xa72/0x2f80 mm/page_alloc.c:4166
__alloc_pages+0x1b2/0x500 mm/page_alloc.c:5374
alloc_page_interleave+0x1e/0x200 mm/mempolicy.c:2119
alloc_pages+0x238/0x2a0 mm/mempolicy.c:2242
alloc_slab_page mm/slub.c:1713 [inline]
allocate_slab+0x32b/0x4c0 mm/slub.c:1853
new_slab mm/slub.c:1916 [inline]
new_slab_objects mm/slub.c:2662 [inline]
___slab_alloc+0x4ba/0x820 mm/slub.c:2825
__slab_alloc.constprop.0+0xa7/0xf0 mm/slub.c:2865
slab_alloc_node mm/slub.c:2947 [inline]
slab_alloc mm/slub.c:2989 [inline]
__kmalloc+0x312/0x330 mm/slub.c:4133
kmalloc include/linux/slab.h:596 [inline]
kzalloc include/linux/slab.h:721 [inline]
__register_sysctl_table+0x112/0x1090 fs/proc/proc_sysctl.c:1318
rds_tcp_init_net+0x1db/0x4f0 net/rds/tcp.c:551
ops_init+0xaf/0x470 net/core/net_namespace.c:140
__register_pernet_operations net/core/net_namespace.c:1137 [inline]
register_pernet_operations+0x35a/0x850 net/core/net_namespace.c:1214
register_pernet_device+0x26/0x70 net/core/net_namespace.c:1301
rds_tcp_init+0x77/0xe0 net/rds/tcp.c:717
do_one_initcall+0x103/0x650 init/main.c:1285
do_initcall_level init/main.c:1360 [inline]
do_initcalls init/main.c:1376 [inline]
do_basic_setup init/main.c:1396 [inline]
kernel_init_freeable+0x6b8/0x741 init/main.c:1598
page last free stack trace:
reset_page_owner include/linux/page_owner.h:24 [inline]
free_pages_prepare mm/page_alloc.c:1343 [inline]
free_pcp_prepare+0x312/0x7d0 mm/page_alloc.c:1394
free_unref_page_prepare mm/page_alloc.c:3329 [inline]
free_unref_page+0x19/0x690 mm/page_alloc.c:3408
__vunmap+0x783/0xb70 mm/vmalloc.c:2587
free_work+0x58/0x70 mm/vmalloc.c:82
process_one_work+0x98d/0x1630 kernel/workqueue.c:2276
worker_thread+0x658/0x11f0 kernel/workqueue.c:2422
kthread+0x3e5/0x4d0 kernel/kthread.c:319
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
Memory state around the buggy address:
ffff88802821e300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
ffff88802821e380: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc
>ffff88802821e400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
^
ffff88802821e480: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
ffff88802821e500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
==================================================================
- The race fix has two parts.
* Changing the code to guarantee that ucounts->count is only decremented
when ucounts_lock is held. This guarantees that find_ucounts
will never find a structure with a zero reference count.
* Changing alloc_ucounts to increment ucounts->count while
ucounts_lock is held. This guarantees the reference count on the
found data structure will not be decremented to zero (and the data
structure freed) before the reference count is incremented.
-- Eric Biederman
Reported-by: syzbot+01985d7909f9468f013c@syzkaller.appspotmail.com
Reported-by: syzbot+59dd63761094a80ad06d@syzkaller.appspotmail.com
Reported-by: syzbot+6cd79f45bb8fa1c9eeae@syzkaller.appspotmail.com
Reported-by: syzbot+b6e65bd125a05f803d6b@syzkaller.appspotmail.com
Fixes: b6c336528926 ("Use atomic_t for ucounts reference counting")
Cc: Hillf Danton <hdanton@sina.com>
Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/7b2ace1759b281cdd2d66101d6b305deef722efb.1627397820.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
2021-07-27 17:24:18 +02:00
|
|
|
put_ucounts(ucounts);
|
|
|
|
return NULL;
|
|
|
|
}
|
2016-08-08 13:54:50 -05:00
|
|
|
return ucounts;
|
|
|
|
}
|
|
|
|
|
2021-04-22 14:27:09 +02:00
|
|
|
void put_ucounts(struct ucounts *ucounts)
|
2016-08-08 13:54:50 -05:00
|
|
|
{
|
2017-01-20 15:21:35 +02:00
|
|
|
unsigned long flags;
|
|
|
|
|
ucounts: Fix race condition between alloc_ucounts and put_ucounts
The race happens because put_ucounts() doesn't use spinlock and
get_ucounts is not under spinlock:
CPU0 CPU1
---- ----
alloc_ucounts() put_ucounts()
spin_lock_irq(&ucounts_lock);
ucounts = find_ucounts(ns, uid, hashent);
atomic_dec_and_test(&ucounts->count))
spin_unlock_irq(&ucounts_lock);
spin_lock_irqsave(&ucounts_lock, flags);
hlist_del_init(&ucounts->node);
spin_unlock_irqrestore(&ucounts_lock, flags);
kfree(ucounts);
ucounts = get_ucounts(ucounts);
==================================================================
BUG: KASAN: use-after-free in instrument_atomic_read_write include/linux/instrumented.h:101 [inline]
BUG: KASAN: use-after-free in atomic_add_negative include/asm-generic/atomic-instrumented.h:556 [inline]
BUG: KASAN: use-after-free in get_ucounts kernel/ucount.c:152 [inline]
BUG: KASAN: use-after-free in get_ucounts kernel/ucount.c:150 [inline]
BUG: KASAN: use-after-free in alloc_ucounts+0x19b/0x5b0 kernel/ucount.c:188
Write of size 4 at addr ffff88802821e41c by task syz-executor.4/16785
CPU: 1 PID: 16785 Comm: syz-executor.4 Not tainted 5.14.0-rc1-next-20210712-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:105
print_address_description.constprop.0.cold+0x6c/0x309 mm/kasan/report.c:233
__kasan_report mm/kasan/report.c:419 [inline]
kasan_report.cold+0x83/0xdf mm/kasan/report.c:436
check_region_inline mm/kasan/generic.c:183 [inline]
kasan_check_range+0x13d/0x180 mm/kasan/generic.c:189
instrument_atomic_read_write include/linux/instrumented.h:101 [inline]
atomic_add_negative include/asm-generic/atomic-instrumented.h:556 [inline]
get_ucounts kernel/ucount.c:152 [inline]
get_ucounts kernel/ucount.c:150 [inline]
alloc_ucounts+0x19b/0x5b0 kernel/ucount.c:188
set_cred_ucounts+0x171/0x3a0 kernel/cred.c:684
__sys_setuid+0x285/0x400 kernel/sys.c:623
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x4665d9
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fde54097188 EFLAGS: 00000246 ORIG_RAX: 0000000000000069
RAX: ffffffffffffffda RBX: 000000000056bf80 RCX: 00000000004665d9
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000000000ff
RBP: 00000000004bfcb9 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 000000000056bf80
R13: 00007ffc8655740f R14: 00007fde54097300 R15: 0000000000022000
Allocated by task 16784:
kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
kasan_set_track mm/kasan/common.c:46 [inline]
set_alloc_info mm/kasan/common.c:434 [inline]
____kasan_kmalloc mm/kasan/common.c:513 [inline]
____kasan_kmalloc mm/kasan/common.c:472 [inline]
__kasan_kmalloc+0x9b/0xd0 mm/kasan/common.c:522
kmalloc include/linux/slab.h:591 [inline]
kzalloc include/linux/slab.h:721 [inline]
alloc_ucounts+0x23d/0x5b0 kernel/ucount.c:169
set_cred_ucounts+0x171/0x3a0 kernel/cred.c:684
__sys_setuid+0x285/0x400 kernel/sys.c:623
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
Freed by task 16785:
kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
kasan_set_track+0x1c/0x30 mm/kasan/common.c:46
kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:360
____kasan_slab_free mm/kasan/common.c:366 [inline]
____kasan_slab_free mm/kasan/common.c:328 [inline]
__kasan_slab_free+0xfb/0x130 mm/kasan/common.c:374
kasan_slab_free include/linux/kasan.h:229 [inline]
slab_free_hook mm/slub.c:1650 [inline]
slab_free_freelist_hook+0xdf/0x240 mm/slub.c:1675
slab_free mm/slub.c:3235 [inline]
kfree+0xeb/0x650 mm/slub.c:4295
put_ucounts kernel/ucount.c:200 [inline]
put_ucounts+0x117/0x150 kernel/ucount.c:192
put_cred_rcu+0x27a/0x520 kernel/cred.c:124
rcu_do_batch kernel/rcu/tree.c:2550 [inline]
rcu_core+0x7ab/0x1380 kernel/rcu/tree.c:2785
__do_softirq+0x29b/0x9c2 kernel/softirq.c:558
Last potentially related work creation:
kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
kasan_record_aux_stack+0xe5/0x110 mm/kasan/generic.c:348
insert_work+0x48/0x370 kernel/workqueue.c:1332
__queue_work+0x5c1/0xed0 kernel/workqueue.c:1498
queue_work_on+0xee/0x110 kernel/workqueue.c:1525
queue_work include/linux/workqueue.h:507 [inline]
call_usermodehelper_exec+0x1f0/0x4c0 kernel/umh.c:435
kobject_uevent_env+0xf8f/0x1650 lib/kobject_uevent.c:618
netdev_queue_add_kobject net/core/net-sysfs.c:1621 [inline]
netdev_queue_update_kobjects+0x374/0x450 net/core/net-sysfs.c:1655
register_queue_kobjects net/core/net-sysfs.c:1716 [inline]
netdev_register_kobject+0x35a/0x430 net/core/net-sysfs.c:1959
register_netdevice+0xd33/0x1500 net/core/dev.c:10331
nsim_init_netdevsim drivers/net/netdevsim/netdev.c:317 [inline]
nsim_create+0x381/0x4d0 drivers/net/netdevsim/netdev.c:364
__nsim_dev_port_add+0x32e/0x830 drivers/net/netdevsim/dev.c:1295
nsim_dev_port_add_all+0x53/0x150 drivers/net/netdevsim/dev.c:1355
nsim_dev_probe+0xcb5/0x1190 drivers/net/netdevsim/dev.c:1496
call_driver_probe drivers/base/dd.c:517 [inline]
really_probe+0x23c/0xcd0 drivers/base/dd.c:595
__driver_probe_device+0x338/0x4d0 drivers/base/dd.c:747
driver_probe_device+0x4c/0x1a0 drivers/base/dd.c:777
__device_attach_driver+0x20b/0x2f0 drivers/base/dd.c:894
bus_for_each_drv+0x15f/0x1e0 drivers/base/bus.c:427
__device_attach+0x228/0x4a0 drivers/base/dd.c:965
bus_probe_device+0x1e4/0x290 drivers/base/bus.c:487
device_add+0xc2f/0x2180 drivers/base/core.c:3356
nsim_bus_dev_new drivers/net/netdevsim/bus.c:431 [inline]
new_device_store+0x436/0x710 drivers/net/netdevsim/bus.c:298
bus_attr_store+0x72/0xa0 drivers/base/bus.c:122
sysfs_kf_write+0x110/0x160 fs/sysfs/file.c:139
kernfs_fop_write_iter+0x342/0x500 fs/kernfs/file.c:296
call_write_iter include/linux/fs.h:2152 [inline]
new_sync_write+0x426/0x650 fs/read_write.c:518
vfs_write+0x75a/0xa40 fs/read_write.c:605
ksys_write+0x12d/0x250 fs/read_write.c:658
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
Second to last potentially related work creation:
kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
kasan_record_aux_stack+0xe5/0x110 mm/kasan/generic.c:348
insert_work+0x48/0x370 kernel/workqueue.c:1332
__queue_work+0x5c1/0xed0 kernel/workqueue.c:1498
queue_work_on+0xee/0x110 kernel/workqueue.c:1525
queue_work include/linux/workqueue.h:507 [inline]
call_usermodehelper_exec+0x1f0/0x4c0 kernel/umh.c:435
kobject_uevent_env+0xf8f/0x1650 lib/kobject_uevent.c:618
kobject_synth_uevent+0x701/0x850 lib/kobject_uevent.c:208
uevent_store+0x20/0x50 drivers/base/core.c:2371
dev_attr_store+0x50/0x80 drivers/base/core.c:2072
sysfs_kf_write+0x110/0x160 fs/sysfs/file.c:139
kernfs_fop_write_iter+0x342/0x500 fs/kernfs/file.c:296
call_write_iter include/linux/fs.h:2152 [inline]
new_sync_write+0x426/0x650 fs/read_write.c:518
vfs_write+0x75a/0xa40 fs/read_write.c:605
ksys_write+0x12d/0x250 fs/read_write.c:658
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
The buggy address belongs to the object at ffff88802821e400
which belongs to the cache kmalloc-192 of size 192
The buggy address is located 28 bytes inside of
192-byte region [ffff88802821e400, ffff88802821e4c0)
The buggy address belongs to the page:
page:ffffea0000a08780 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x2821e
flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff)
raw: 00fff00000000200 dead000000000100 dead000000000122 ffff888010841a00
raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected
page_owner tracks the page as allocated
page last allocated via order 0, migratetype Unmovable, gfp_mask 0x12cc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY), pid 1, ts 12874702440, free_ts 12637793385
prep_new_page mm/page_alloc.c:2433 [inline]
get_page_from_freelist+0xa72/0x2f80 mm/page_alloc.c:4166
__alloc_pages+0x1b2/0x500 mm/page_alloc.c:5374
alloc_page_interleave+0x1e/0x200 mm/mempolicy.c:2119
alloc_pages+0x238/0x2a0 mm/mempolicy.c:2242
alloc_slab_page mm/slub.c:1713 [inline]
allocate_slab+0x32b/0x4c0 mm/slub.c:1853
new_slab mm/slub.c:1916 [inline]
new_slab_objects mm/slub.c:2662 [inline]
___slab_alloc+0x4ba/0x820 mm/slub.c:2825
__slab_alloc.constprop.0+0xa7/0xf0 mm/slub.c:2865
slab_alloc_node mm/slub.c:2947 [inline]
slab_alloc mm/slub.c:2989 [inline]
__kmalloc+0x312/0x330 mm/slub.c:4133
kmalloc include/linux/slab.h:596 [inline]
kzalloc include/linux/slab.h:721 [inline]
__register_sysctl_table+0x112/0x1090 fs/proc/proc_sysctl.c:1318
rds_tcp_init_net+0x1db/0x4f0 net/rds/tcp.c:551
ops_init+0xaf/0x470 net/core/net_namespace.c:140
__register_pernet_operations net/core/net_namespace.c:1137 [inline]
register_pernet_operations+0x35a/0x850 net/core/net_namespace.c:1214
register_pernet_device+0x26/0x70 net/core/net_namespace.c:1301
rds_tcp_init+0x77/0xe0 net/rds/tcp.c:717
do_one_initcall+0x103/0x650 init/main.c:1285
do_initcall_level init/main.c:1360 [inline]
do_initcalls init/main.c:1376 [inline]
do_basic_setup init/main.c:1396 [inline]
kernel_init_freeable+0x6b8/0x741 init/main.c:1598
page last free stack trace:
reset_page_owner include/linux/page_owner.h:24 [inline]
free_pages_prepare mm/page_alloc.c:1343 [inline]
free_pcp_prepare+0x312/0x7d0 mm/page_alloc.c:1394
free_unref_page_prepare mm/page_alloc.c:3329 [inline]
free_unref_page+0x19/0x690 mm/page_alloc.c:3408
__vunmap+0x783/0xb70 mm/vmalloc.c:2587
free_work+0x58/0x70 mm/vmalloc.c:82
process_one_work+0x98d/0x1630 kernel/workqueue.c:2276
worker_thread+0x658/0x11f0 kernel/workqueue.c:2422
kthread+0x3e5/0x4d0 kernel/kthread.c:319
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
Memory state around the buggy address:
ffff88802821e300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
ffff88802821e380: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc
>ffff88802821e400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
^
ffff88802821e480: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
ffff88802821e500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
==================================================================
- The race fix has two parts.
* Changing the code to guarantee that ucounts->count is only decremented
when ucounts_lock is held. This guarantees that find_ucounts
will never find a structure with a zero reference count.
* Changing alloc_ucounts to increment ucounts->count while
ucounts_lock is held. This guarantees the reference count on the
found data structure will not be decremented to zero (and the data
structure freed) before the reference count is incremented.
-- Eric Biederman
Reported-by: syzbot+01985d7909f9468f013c@syzkaller.appspotmail.com
Reported-by: syzbot+59dd63761094a80ad06d@syzkaller.appspotmail.com
Reported-by: syzbot+6cd79f45bb8fa1c9eeae@syzkaller.appspotmail.com
Reported-by: syzbot+b6e65bd125a05f803d6b@syzkaller.appspotmail.com
Fixes: b6c336528926 ("Use atomic_t for ucounts reference counting")
Cc: Hillf Danton <hdanton@sina.com>
Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/7b2ace1759b281cdd2d66101d6b305deef722efb.1627397820.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
2021-07-27 17:24:18 +02:00
|
|
|
if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) {
|
2016-08-08 13:54:50 -05:00
|
|
|
hlist_del_init(&ucounts->node);
|
2021-04-22 14:27:10 +02:00
|
|
|
spin_unlock_irqrestore(&ucounts_lock, flags);
|
2022-01-24 12:46:50 -06:00
|
|
|
put_user_ns(ucounts->ns);
|
2021-04-22 14:27:10 +02:00
|
|
|
kfree(ucounts);
|
|
|
|
}
|
2016-08-08 13:54:50 -05:00
|
|
|
}
|
|
|
|
|
2021-04-22 14:27:08 +02:00
|
|
|
static inline bool atomic_long_inc_below(atomic_long_t *v, int u)
|
2016-08-08 13:41:24 -05:00
|
|
|
{
|
2021-04-22 14:27:08 +02:00
|
|
|
long c, old;
|
|
|
|
c = atomic_long_read(v);
|
2016-08-08 13:41:24 -05:00
|
|
|
for (;;) {
|
|
|
|
if (unlikely(c >= u))
|
|
|
|
return false;
|
2021-04-22 14:27:08 +02:00
|
|
|
old = atomic_long_cmpxchg(v, c, c+1);
|
2016-08-08 13:41:24 -05:00
|
|
|
if (likely(old == c))
|
|
|
|
return true;
|
|
|
|
c = old;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-08 14:41:52 -05:00
|
|
|
struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid,
|
|
|
|
enum ucount_type type)
|
2016-08-08 13:41:24 -05:00
|
|
|
{
|
2016-08-08 13:54:50 -05:00
|
|
|
struct ucounts *ucounts, *iter, *bad;
|
|
|
|
struct user_namespace *tns;
|
2021-04-22 14:27:09 +02:00
|
|
|
ucounts = alloc_ucounts(ns, uid);
|
2016-08-08 13:54:50 -05:00
|
|
|
for (iter = ucounts; iter; iter = tns->ucounts) {
|
2021-04-22 14:27:08 +02:00
|
|
|
long max;
|
2016-08-08 13:54:50 -05:00
|
|
|
tns = iter->ns;
|
2016-08-08 14:41:52 -05:00
|
|
|
max = READ_ONCE(tns->ucount_max[type]);
|
2021-04-22 14:27:08 +02:00
|
|
|
if (!atomic_long_inc_below(&iter->ucount[type], max))
|
2016-08-08 13:41:24 -05:00
|
|
|
goto fail;
|
|
|
|
}
|
2016-08-08 13:54:50 -05:00
|
|
|
return ucounts;
|
2016-08-08 13:41:24 -05:00
|
|
|
fail:
|
2016-08-08 13:54:50 -05:00
|
|
|
bad = iter;
|
|
|
|
for (iter = ucounts; iter != bad; iter = iter->ns->ucounts)
|
2021-04-22 14:27:08 +02:00
|
|
|
atomic_long_dec(&iter->ucount[type]);
|
2016-08-08 13:41:24 -05:00
|
|
|
|
2016-08-08 13:54:50 -05:00
|
|
|
put_ucounts(ucounts);
|
|
|
|
return NULL;
|
2016-08-08 13:41:24 -05:00
|
|
|
}
|
|
|
|
|
2016-08-08 14:41:52 -05:00
|
|
|
void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
|
2016-08-08 13:41:24 -05:00
|
|
|
{
|
2016-08-08 13:54:50 -05:00
|
|
|
struct ucounts *iter;
|
|
|
|
for (iter = ucounts; iter; iter = iter->ns->ucounts) {
|
2021-04-22 14:27:08 +02:00
|
|
|
long dec = atomic_long_dec_if_positive(&iter->ucount[type]);
|
2016-08-08 13:41:24 -05:00
|
|
|
WARN_ON_ONCE(dec < 0);
|
|
|
|
}
|
2016-08-08 13:54:50 -05:00
|
|
|
put_ucounts(ucounts);
|
2016-08-08 13:41:24 -05:00
|
|
|
}
|
|
|
|
|
2021-04-22 14:27:11 +02:00
|
|
|
long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
|
|
|
|
{
|
|
|
|
struct ucounts *iter;
|
2021-11-29 21:37:25 +01:00
|
|
|
long max = LONG_MAX;
|
2021-04-22 14:27:11 +02:00
|
|
|
long ret = 0;
|
|
|
|
|
|
|
|
for (iter = ucounts; iter; iter = iter->ns->ucounts) {
|
|
|
|
long new = atomic_long_add_return(v, &iter->ucount[type]);
|
|
|
|
if (new < 0 || new > max)
|
|
|
|
ret = LONG_MAX;
|
|
|
|
else if (iter == ucounts)
|
|
|
|
ret = new;
|
2021-11-29 21:37:25 +01:00
|
|
|
max = READ_ONCE(iter->ns->ucount_max[type]);
|
2021-04-22 14:27:11 +02:00
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
|
|
|
|
{
|
|
|
|
struct ucounts *iter;
|
2021-04-30 13:00:26 -05:00
|
|
|
long new = -1; /* Silence compiler warning */
|
2021-04-22 14:27:11 +02:00
|
|
|
for (iter = ucounts; iter; iter = iter->ns->ucounts) {
|
2021-10-18 11:22:20 -05:00
|
|
|
long dec = atomic_long_sub_return(v, &iter->ucount[type]);
|
2021-04-22 14:27:11 +02:00
|
|
|
WARN_ON_ONCE(dec < 0);
|
|
|
|
if (iter == ucounts)
|
|
|
|
new = dec;
|
|
|
|
}
|
|
|
|
return (new == 0);
|
|
|
|
}
|
|
|
|
|
2021-10-16 15:59:49 -05:00
|
|
|
static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts,
|
|
|
|
struct ucounts *last, enum ucount_type type)
|
|
|
|
{
|
|
|
|
struct ucounts *iter, *next;
|
|
|
|
for (iter = ucounts; iter != last; iter = next) {
|
2021-10-18 11:22:20 -05:00
|
|
|
long dec = atomic_long_sub_return(1, &iter->ucount[type]);
|
2021-10-16 15:59:49 -05:00
|
|
|
WARN_ON_ONCE(dec < 0);
|
|
|
|
next = iter->ns->ucounts;
|
|
|
|
if (dec == 0)
|
|
|
|
put_ucounts(iter);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type)
|
|
|
|
{
|
|
|
|
do_dec_rlimit_put_ucounts(ucounts, NULL, type);
|
|
|
|
}
|
|
|
|
|
|
|
|
long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type)
|
|
|
|
{
|
|
|
|
/* Caller must hold a reference to ucounts */
|
|
|
|
struct ucounts *iter;
|
2021-11-29 21:37:25 +01:00
|
|
|
long max = LONG_MAX;
|
2021-10-16 15:59:49 -05:00
|
|
|
long dec, ret = 0;
|
|
|
|
|
|
|
|
for (iter = ucounts; iter; iter = iter->ns->ucounts) {
|
|
|
|
long new = atomic_long_add_return(1, &iter->ucount[type]);
|
|
|
|
if (new < 0 || new > max)
|
|
|
|
goto unwind;
|
|
|
|
if (iter == ucounts)
|
|
|
|
ret = new;
|
2021-11-29 21:37:25 +01:00
|
|
|
max = READ_ONCE(iter->ns->ucount_max[type]);
|
2021-10-16 15:59:49 -05:00
|
|
|
/*
|
|
|
|
* Grab an extra ucount reference for the caller when
|
|
|
|
* the rlimit count was previously 0.
|
|
|
|
*/
|
|
|
|
if (new != 1)
|
|
|
|
continue;
|
|
|
|
if (!get_ucounts(iter))
|
|
|
|
goto dec_unwind;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
dec_unwind:
|
2021-10-18 11:22:20 -05:00
|
|
|
dec = atomic_long_sub_return(1, &iter->ucount[type]);
|
2021-10-16 15:59:49 -05:00
|
|
|
WARN_ON_ONCE(dec < 0);
|
|
|
|
unwind:
|
|
|
|
do_dec_rlimit_put_ucounts(ucounts, iter, type);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-11-29 21:37:25 +01:00
|
|
|
bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long rlimit)
|
2021-04-22 14:27:11 +02:00
|
|
|
{
|
|
|
|
struct ucounts *iter;
|
2021-11-29 21:37:25 +01:00
|
|
|
long max = rlimit;
|
|
|
|
if (rlimit > LONG_MAX)
|
|
|
|
max = LONG_MAX;
|
2021-04-22 14:27:11 +02:00
|
|
|
for (iter = ucounts; iter; iter = iter->ns->ucounts) {
|
2022-02-09 18:09:41 -06:00
|
|
|
long val = get_ucounts_value(iter, type);
|
|
|
|
if (val < 0 || val > max)
|
2021-04-22 14:27:11 +02:00
|
|
|
return true;
|
2021-11-29 21:37:25 +01:00
|
|
|
max = READ_ONCE(iter->ns->ucount_max[type]);
|
2021-04-22 14:27:11 +02:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-07-30 13:58:49 -05:00
|
|
|
static __init int user_namespace_sysctl_init(void)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_SYSCTL
|
2016-08-08 13:54:50 -05:00
|
|
|
static struct ctl_table_header *user_header;
|
2016-07-30 13:58:49 -05:00
|
|
|
static struct ctl_table empty[1];
|
|
|
|
/*
|
2016-08-08 13:54:50 -05:00
|
|
|
* It is necessary to register the user directory in the
|
2016-07-30 13:58:49 -05:00
|
|
|
* default set so that registrations in the child sets work
|
|
|
|
* properly.
|
|
|
|
*/
|
2016-08-08 13:54:50 -05:00
|
|
|
user_header = register_sysctl("user", empty);
|
2017-02-08 14:30:50 -08:00
|
|
|
kmemleak_ignore(user_header);
|
2016-08-08 13:54:50 -05:00
|
|
|
BUG_ON(!user_header);
|
2016-07-30 13:58:49 -05:00
|
|
|
BUG_ON(!setup_userns_sysctls(&init_user_ns));
|
|
|
|
#endif
|
2021-04-22 14:27:09 +02:00
|
|
|
hlist_add_ucounts(&init_ucounts);
|
2021-04-22 14:27:11 +02:00
|
|
|
inc_rlimit_ucounts(&init_ucounts, UCOUNT_RLIMIT_NPROC, 1);
|
2016-07-30 13:58:49 -05:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
subsys_initcall(user_namespace_sysctl_init);
|