2019-06-04 10:11:33 +02:00
// SPDX-License-Identifier: GPL-2.0-only
2005-04-16 15:20:36 -07:00
/*
2023-07-18 13:13:35 -04:00
* Security - Enhanced Linux ( SELinux ) security module
2005-04-16 15:20:36 -07:00
*
* This file contains the SELinux hook function implementations .
*
2023-07-19 11:12:50 -04:00
* Authors : Stephen Smalley , < stephen . smalley . work @ gmail . com >
2008-04-17 13:17:49 -04:00
* Chris Vance , < cvance @ nai . com >
* Wayne Salamon , < wsalamon @ nai . com >
* James Morris < jmorris @ redhat . com >
2005-04-16 15:20:36 -07:00
*
* Copyright ( C ) 2001 , 2002 Networks Associates Technology , Inc .
2008-07-04 09:47:13 +10:00
* Copyright ( C ) 2003 - 2008 Red Hat , Inc . , James Morris < jmorris @ redhat . com >
* Eric Paris < eparis @ redhat . com >
2005-04-16 15:20:36 -07:00
* Copyright ( C ) 2004 - 2005 Trusted Computer Solutions , Inc .
2008-04-17 13:17:49 -04:00
* < dgoeddel @ trustedcs . com >
2009-08-28 18:12:49 -04:00
* Copyright ( C ) 2006 , 2007 , 2009 Hewlett - Packard Development Company , L . P .
2011-08-01 11:10:33 +00:00
* Paul Moore < paul @ paul - moore . com >
2007-09-14 09:27:07 +09:00
* Copyright ( C ) 2007 Hitachi Software Engineering Co . , Ltd .
2008-04-17 13:17:49 -04:00
* Yuichi Nakamura < ynakam @ hitachisoft . jp >
2017-05-19 15:48:56 +03:00
* Copyright ( C ) 2016 Mellanox Technologies
2005-04-16 15:20:36 -07:00
*/
# include <linux/init.h>
2011-02-25 15:39:20 -05:00
# include <linux/kd.h>
2005-04-16 15:20:36 -07:00
# include <linux/kernel.h>
2020-10-02 10:38:15 -07:00
# include <linux/kernel_read_file.h>
2005-04-16 15:20:36 -07:00
# include <linux/errno.h>
2017-02-08 18:51:30 +01:00
# include <linux/sched/signal.h>
2017-02-08 18:51:36 +01:00
# include <linux/sched/task.h>
2015-05-02 15:10:46 -07:00
# include <linux/lsm_hooks.h>
2005-04-16 15:20:36 -07:00
# include <linux/xattr.h>
# include <linux/capability.h>
# include <linux/unistd.h>
# include <linux/mm.h>
# include <linux/mman.h>
# include <linux/slab.h>
# include <linux/pagemap.h>
2011-02-25 15:39:20 -05:00
# include <linux/proc_fs.h>
2005-04-16 15:20:36 -07:00
# include <linux/swap.h>
# include <linux/spinlock.h>
# include <linux/syscalls.h>
2011-02-01 11:05:39 -05:00
# include <linux/dcache.h>
2005-04-16 15:20:36 -07:00
# include <linux/file.h>
2008-04-24 07:44:08 -04:00
# include <linux/fdtable.h>
2005-04-16 15:20:36 -07:00
# include <linux/namei.h>
# include <linux/mount.h>
2018-11-01 23:07:24 +00:00
# include <linux/fs_context.h>
# include <linux/fs_parser.h>
2005-04-16 15:20:36 -07:00
# include <linux/netfilter_ipv4.h>
# include <linux/netfilter_ipv6.h>
# include <linux/tty.h>
# include <net/icmp.h>
2007-10-10 17:30:46 -07:00
# include <net/ip.h> /* for local_port_range[] */
2005-04-16 15:20:36 -07:00
# include <net/tcp.h> /* struct or_callable used in sock_rcv_skb */
2013-12-04 16:10:45 -05:00
# include <net/inet_connection_sock.h>
2008-01-29 08:38:23 -05:00
# include <net/net_namespace.h>
2008-01-29 08:43:36 -05:00
# include <net/netlabel.h>
2008-05-14 11:27:45 -04:00
# include <linux/uaccess.h>
2005-04-16 15:20:36 -07:00
# include <asm/ioctls.h>
2011-07-26 16:09:06 -07:00
# include <linux/atomic.h>
2005-04-16 15:20:36 -07:00
# include <linux/bitops.h>
# include <linux/interrupt.h>
# include <linux/netdevice.h> /* for network interface checks */
2013-03-27 06:49:35 +00:00
# include <net/netlink.h>
2005-04-16 15:20:36 -07:00
# include <linux/tcp.h>
# include <linux/udp.h>
2006-11-13 16:09:01 -08:00
# include <linux/dccp.h>
2018-02-13 20:57:18 +00:00
# include <linux/sctp.h>
# include <net/sctp/structs.h>
2005-04-16 15:20:36 -07:00
# include <linux/quota.h>
# include <linux/un.h> /* for Unix socket types */
# include <net/af_unix.h> /* for Unix socket types */
# include <linux/parser.h>
# include <linux/nfs_mount.h>
# include <net/ipv6.h>
# include <linux/hugetlb.h>
# include <linux/personality.h>
# include <linux/audit.h>
2005-06-30 02:58:51 -07:00
# include <linux/string.h>
2006-09-25 23:32:01 -07:00
# include <linux/mutex.h>
timers: fix itimer/many thread hang
Overview
This patch reworks the handling of POSIX CPU timers, including the
ITIMER_PROF, ITIMER_VIRT timers and rlimit handling. It was put together
with the help of Roland McGrath, the owner and original writer of this code.
The problem we ran into, and the reason for this rework, has to do with using
a profiling timer in a process with a large number of threads. It appears
that the performance of the old implementation of run_posix_cpu_timers() was
at least O(n*3) (where "n" is the number of threads in a process) or worse.
Everything is fine with an increasing number of threads until the time taken
for that routine to run becomes the same as or greater than the tick time, at
which point things degrade rather quickly.
This patch fixes bug 9906, "Weird hang with NPTL and SIGPROF."
Code Changes
This rework corrects the implementation of run_posix_cpu_timers() to make it
run in constant time for a particular machine. (Performance may vary between
one machine and another depending upon whether the kernel is built as single-
or multiprocessor and, in the latter case, depending upon the number of
running processors.) To do this, at each tick we now update fields in
signal_struct as well as task_struct. The run_posix_cpu_timers() function
uses those fields to make its decisions.
We define a new structure, "task_cputime," to contain user, system and
scheduler times and use these in appropriate places:
struct task_cputime {
cputime_t utime;
cputime_t stime;
unsigned long long sum_exec_runtime;
};
This is included in the structure "thread_group_cputime," which is a new
substructure of signal_struct and which varies for uniprocessor versus
multiprocessor kernels. For uniprocessor kernels, it uses "task_cputime" as
a simple substructure, while for multiprocessor kernels it is a pointer:
struct thread_group_cputime {
struct task_cputime totals;
};
struct thread_group_cputime {
struct task_cputime *totals;
};
We also add a new task_cputime substructure directly to signal_struct, to
cache the earliest expiration of process-wide timers, and task_cputime also
replaces the it_*_expires fields of task_struct (used for earliest expiration
of thread timers). The "thread_group_cputime" structure contains process-wide
timers that are updated via account_user_time() and friends. In the non-SMP
case the structure is a simple aggregator; unfortunately in the SMP case that
simplicity was not achievable due to cache-line contention between CPUs (in
one measured case performance was actually _worse_ on a 16-cpu system than
the same test on a 4-cpu system, due to this contention). For SMP, the
thread_group_cputime counters are maintained as a per-cpu structure allocated
using alloc_percpu(). The timer functions update only the timer field in
the structure corresponding to the running CPU, obtained using per_cpu_ptr().
We define a set of inline functions in sched.h that we use to maintain the
thread_group_cputime structure and hide the differences between UP and SMP
implementations from the rest of the kernel. The thread_group_cputime_init()
function initializes the thread_group_cputime structure for the given task.
The thread_group_cputime_alloc() is a no-op for UP; for SMP it calls the
out-of-line function thread_group_cputime_alloc_smp() to allocate and fill
in the per-cpu structures and fields. The thread_group_cputime_free()
function, also a no-op for UP, in SMP frees the per-cpu structures. The
thread_group_cputime_clone_thread() function (also a UP no-op) for SMP calls
thread_group_cputime_alloc() if the per-cpu structures haven't yet been
allocated. The thread_group_cputime() function fills the task_cputime
structure it is passed with the contents of the thread_group_cputime fields;
in UP it's that simple but in SMP it must also safely check that tsk->signal
is non-NULL (if it is it just uses the appropriate fields of task_struct) and,
if so, sums the per-cpu values for each online CPU. Finally, the three
functions account_group_user_time(), account_group_system_time() and
account_group_exec_runtime() are used by timer functions to update the
respective fields of the thread_group_cputime structure.
Non-SMP operation is trivial and will not be mentioned further.
The per-cpu structure is always allocated when a task creates its first new
thread, via a call to thread_group_cputime_clone_thread() from copy_signal().
It is freed at process exit via a call to thread_group_cputime_free() from
cleanup_signal().
All functions that formerly summed utime/stime/sum_sched_runtime values from
from all threads in the thread group now use thread_group_cputime() to
snapshot the values in the thread_group_cputime structure or the values in
the task structure itself if the per-cpu structure hasn't been allocated.
Finally, the code in kernel/posix-cpu-timers.c has changed quite a bit.
The run_posix_cpu_timers() function has been split into a fast path and a
slow path; the former safely checks whether there are any expired thread
timers and, if not, just returns, while the slow path does the heavy lifting.
With the dedicated thread group fields, timers are no longer "rebalanced" and
the process_timer_rebalance() function and related code has gone away. All
summing loops are gone and all code that used them now uses the
thread_group_cputime() inline. When process-wide timers are set, the new
task_cputime structure in signal_struct is used to cache the earliest
expiration; this is checked in the fast path.
Performance
The fix appears not to add significant overhead to existing operations. It
generally performs the same as the current code except in two cases, one in
which it performs slightly worse (Case 5 below) and one in which it performs
very significantly better (Case 2 below). Overall it's a wash except in those
two cases.
I've since done somewhat more involved testing on a dual-core Opteron system.
Case 1: With no itimer running, for a test with 100,000 threads, the fixed
kernel took 1428.5 seconds, 513 seconds more than the unfixed system,
all of which was spent in the system. There were twice as many
voluntary context switches with the fix as without it.
Case 2: With an itimer running at .01 second ticks and 4000 threads (the most
an unmodified kernel can handle), the fixed kernel ran the test in
eight percent of the time (5.8 seconds as opposed to 70 seconds) and
had better tick accuracy (.012 seconds per tick as opposed to .023
seconds per tick).
Case 3: A 4000-thread test with an initial timer tick of .01 second and an
interval of 10,000 seconds (i.e. a timer that ticks only once) had
very nearly the same performance in both cases: 6.3 seconds elapsed
for the fixed kernel versus 5.5 seconds for the unfixed kernel.
With fewer threads (eight in these tests), the Case 1 test ran in essentially
the same time on both the modified and unmodified kernels (5.2 seconds versus
5.8 seconds). The Case 2 test ran in about the same time as well, 5.9 seconds
versus 5.4 seconds but again with much better tick accuracy, .013 seconds per
tick versus .025 seconds per tick for the unmodified kernel.
Since the fix affected the rlimit code, I also tested soft and hard CPU limits.
Case 4: With a hard CPU limit of 20 seconds and eight threads (and an itimer
running), the modified kernel was very slightly favored in that while
it killed the process in 19.997 seconds of CPU time (5.002 seconds of
wall time), only .003 seconds of that was system time, the rest was
user time. The unmodified kernel killed the process in 20.001 seconds
of CPU (5.014 seconds of wall time) of which .016 seconds was system
time. Really, though, the results were too close to call. The results
were essentially the same with no itimer running.
Case 5: With a soft limit of 20 seconds and a hard limit of 2000 seconds
(where the hard limit would never be reached) and an itimer running,
the modified kernel exhibited worse tick accuracy than the unmodified
kernel: .050 seconds/tick versus .028 seconds/tick. Otherwise,
performance was almost indistinguishable. With no itimer running this
test exhibited virtually identical behavior and times in both cases.
In times past I did some limited performance testing. those results are below.
On a four-cpu Opteron system without this fix, a sixteen-thread test executed
in 3569.991 seconds, of which user was 3568.435s and system was 1.556s. On
the same system with the fix, user and elapsed time were about the same, but
system time dropped to 0.007 seconds. Performance with eight, four and one
thread were comparable. Interestingly, the timer ticks with the fix seemed
more accurate: The sixteen-thread test with the fix received 149543 ticks
for 0.024 seconds per tick, while the same test without the fix received 58720
for 0.061 seconds per tick. Both cases were configured for an interval of
0.01 seconds. Again, the other tests were comparable. Each thread in this
test computed the primes up to 25,000,000.
I also did a test with a large number of threads, 100,000 threads, which is
impossible without the fix. In this case each thread computed the primes only
up to 10,000 (to make the runtime manageable). System time dominated, at
1546.968 seconds out of a total 2176.906 seconds (giving a user time of
629.938s). It received 147651 ticks for 0.015 seconds per tick, still quite
accurate. There is obviously no comparable test without the fix.
Signed-off-by: Frank Mayhar <fmayhar@google.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-09-12 09:54:39 -07:00
# include <linux/posix-timers.h>
2010-02-03 15:36:43 -08:00
# include <linux/syslog.h>
userns: security: make capabilities relative to the user namespace
- Introduce ns_capable to test for a capability in a non-default
user namespace.
- Teach cap_capable to handle capabilities in a non-default
user namespace.
The motivation is to get to the unprivileged creation of new
namespaces. It looks like this gets us 90% of the way there, with
only potential uid confusion issues left.
I still need to handle getting all caps after creation but otherwise I
think I have a good starter patch that achieves all of your goals.
Changelog:
11/05/2010: [serge] add apparmor
12/14/2010: [serge] fix capabilities to created user namespaces
Without this, if user serge creates a user_ns, he won't have
capabilities to the user_ns he created. THis is because we
were first checking whether his effective caps had the caps
he needed and returning -EPERM if not, and THEN checking whether
he was the creator. Reverse those checks.
12/16/2010: [serge] security_real_capable needs ns argument in !security case
01/11/2011: [serge] add task_ns_capable helper
01/11/2011: [serge] add nsown_capable() helper per Bastian Blank suggestion
02/16/2011: [serge] fix a logic bug: the root user is always creator of
init_user_ns, but should not always have capabilities to
it! Fix the check in cap_capable().
02/21/2011: Add the required user_ns parameter to security_capable,
fixing a compile failure.
02/23/2011: Convert some macros to functions as per akpm comments. Some
couldn't be converted because we can't easily forward-declare
them (they are inline if !SECURITY, extern if SECURITY). Add
a current_user_ns function so we can use it in capability.h
without #including cred.h. Move all forward declarations
together to the top of the #ifdef __KERNEL__ section, and use
kernel-doc format.
02/23/2011: Per dhowells, clean up comment in cap_capable().
02/23/2011: Per akpm, remove unreachable 'return -EPERM' in cap_capable.
(Original written and signed off by Eric; latest, modified version
acked by him)
[akpm@linux-foundation.org: fix build]
[akpm@linux-foundation.org: export current_user_ns() for ecryptfs]
[serge.hallyn@canonical.com: remove unneeded extra argument in selinux's task_has_capability]
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Serge E. Hallyn <serge.hallyn@canonical.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Acked-by: David Howells <dhowells@redhat.com>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Serge E. Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-03-23 16:43:17 -07:00
# include <linux/user_namespace.h>
2011-05-26 20:52:10 -04:00
# include <linux/export.h>
2012-02-13 03:58:52 +00:00
# include <linux/msg.h>
# include <linux/shm.h>
2023-12-11 13:58:25 -05:00
# include <uapi/linux/shm.h>
2017-10-18 13:00:25 -07:00
# include <linux/bpf.h>
2019-02-22 15:57:17 +01:00
# include <linux/kernfs.h>
# include <linux/stringhash.h> /* for hashlen_string() */
2018-11-01 23:07:23 +00:00
# include <uapi/linux/mount.h>
fanotify, inotify, dnotify, security: add security hook for fs notifications
As of now, setting watches on filesystem objects has, at most, applied a
check for read access to the inode, and in the case of fanotify, requires
CAP_SYS_ADMIN. No specific security hook or permission check has been
provided to control the setting of watches. Using any of inotify, dnotify,
or fanotify, it is possible to observe, not only write-like operations, but
even read access to a file. Modeling the watch as being merely a read from
the file is insufficient for the needs of SELinux. This is due to the fact
that read access should not necessarily imply access to information about
when another process reads from a file. Furthermore, fanotify watches grant
more power to an application in the form of permission events. While
notification events are solely, unidirectional (i.e. they only pass
information to the receiving application), permission events are blocking.
Permission events make a request to the receiving application which will
then reply with a decision as to whether or not that action may be
completed. This causes the issue of the watching application having the
ability to exercise control over the triggering process. Without drawing a
distinction within the permission check, the ability to read would imply
the greater ability to control an application. Additionally, mount and
superblock watches apply to all files within the same mount or superblock.
Read access to one file should not necessarily imply the ability to watch
all files accessed within a given mount or superblock.
In order to solve these issues, a new LSM hook is implemented and has been
placed within the system calls for marking filesystem objects with inotify,
fanotify, and dnotify watches. These calls to the hook are placed at the
point at which the target path has been resolved and are provided with the
path struct, the mask of requested notification events, and the type of
object on which the mark is being set (inode, superblock, or mount). The
mask and obj_type have already been translated into common FS_* values
shared by the entirety of the fs notification infrastructure. The path
struct is passed rather than just the inode so that the mount is available,
particularly for mount watches. This also allows for use of the hook by
pathname-based security modules. However, since the hook is intended for
use even by inode based security modules, it is not placed under the
CONFIG_SECURITY_PATH conditional. Otherwise, the inode-based security
modules would need to enable all of the path hooks, even though they do not
use any of them.
This only provides a hook at the point of setting a watch, and presumes
that permission to set a particular watch implies the ability to receive
all notification about that object which match the mask. This is all that
is required for SELinux. If other security modules require additional hooks
or infrastructure to control delivery of notification, these can be added
by them. It does not make sense for us to propose hooks for which we have
no implementation. The understanding that all notifications received by the
requesting application are all strictly of a type for which the application
has been granted permission shows that this implementation is sufficient in
its coverage.
Security modules wishing to provide complete control over fanotify must
also implement a security_file_open hook that validates that the access
requested by the watching application is authorized. Fanotify has the issue
that it returns a file descriptor with the file mode specified during
fanotify_init() to the watching process on event. This is already covered
by the LSM security_file_open hook if the security module implements
checking of the requested file mode there. Otherwise, a watching process
can obtain escalated access to a file for which it has not been authorized.
The selinux_path_notify hook implementation works by adding five new file
permissions: watch, watch_mount, watch_sb, watch_reads, and watch_with_perm
(descriptions about which will follow), and one new filesystem permission:
watch (which is applied to superblock checks). The hook then decides which
subset of these permissions must be held by the requesting application
based on the contents of the provided mask and the obj_type. The
selinux_file_open hook already checks the requested file mode and therefore
ensures that a watching process cannot escalate its access through
fanotify.
The watch, watch_mount, and watch_sb permissions are the baseline
permissions for setting a watch on an object and each are a requirement for
any watch to be set on a file, mount, or superblock respectively. It should
be noted that having either of the other two permissions (watch_reads and
watch_with_perm) does not imply the watch, watch_mount, or watch_sb
permission. Superblock watches further require the filesystem watch
permission to the superblock. As there is no labeled object in view for
mounts, there is no specific check for mount watches beyond watch_mount to
the inode. Such a check could be added in the future, if a suitable labeled
object existed representing the mount.
The watch_reads permission is required to receive notifications from
read-exclusive events on filesystem objects. These events include accessing
a file for the purpose of reading and closing a file which has been opened
read-only. This distinction has been drawn in order to provide a direct
indication in the policy for this otherwise not obvious capability. Read
access to a file should not necessarily imply the ability to observe read
events on a file.
Finally, watch_with_perm only applies to fanotify masks since it is the
only way to set a mask which allows for the blocking, permission event.
This permission is needed for any watch which is of this type. Though
fanotify requires CAP_SYS_ADMIN, this is insufficient as it gives implicit
trust to root, which we do not do, and does not support least privilege.
Signed-off-by: Aaron Goidel <acgoide@tycho.nsa.gov>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2019-08-12 11:20:00 -04:00
# include <linux/fsnotify.h>
# include <linux/fanotify.h>
2023-12-01 00:57:35 +00:00
# include <linux/io_uring/cmd.h>
2023-09-12 13:56:46 -07:00
# include <uapi/linux/lsm.h>
2005-04-16 15:20:36 -07:00
# include "avc.h"
# include "objsec.h"
# include "netif.h"
2008-01-29 08:38:13 -05:00
# include "netnode.h"
2008-04-10 10:48:14 -04:00
# include "netport.h"
2017-05-19 15:48:59 +03:00
# include "ibpkey.h"
[LSM-IPSec]: Per-packet access control.
This patch series implements per packet access control via the
extension of the Linux Security Modules (LSM) interface by hooks in
the XFRM and pfkey subsystems that leverage IPSec security
associations to label packets. Extensions to the SELinux LSM are
included that leverage the patch for this purpose.
This patch implements the changes necessary to the SELinux LSM to
create, deallocate, and use security contexts for policies
(xfrm_policy) and security associations (xfrm_state) that enable
control of a socket's ability to send and receive packets.
Patch purpose:
The patch is designed to enable the SELinux LSM to implement access
control on individual packets based on the strongly authenticated
IPSec security association. Such access controls augment the existing
ones in SELinux based on network interface and IP address. The former
are very coarse-grained, and the latter can be spoofed. By using
IPSec, the SELinux can control access to remote hosts based on
cryptographic keys generated using the IPSec mechanism. This enables
access control on a per-machine basis or per-application if the remote
machine is running the same mechanism and trusted to enforce the
access control policy.
Patch design approach:
The patch's main function is to authorize a socket's access to a IPSec
policy based on their security contexts. Since the communication is
implemented by a security association, the patch ensures that the
security association's negotiated and used have the same security
context. The patch enables allocation and deallocation of such
security contexts for policies and security associations. It also
enables copying of the security context when policies are cloned.
Lastly, the patch ensures that packets that are sent without using a
IPSec security assocation with a security context are allowed to be
sent in that manner.
A presentation available at
www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf
from the SELinux symposium describes the overall approach.
Patch implementation details:
The function which authorizes a socket to perform a requested
operation (send/receive) on a IPSec policy (xfrm_policy) is
selinux_xfrm_policy_lookup. The Netfilter and rcv_skb hooks ensure
that if a IPSec SA with a securit y association has not been used,
then the socket is allowed to send or receive the packet,
respectively.
The patch implements SELinux function for allocating security contexts
when policies (xfrm_policy) are created via the pfkey or xfrm_user
interfaces via selinux_xfrm_policy_alloc. When a security association
is built, SELinux allocates the security context designated by the
XFRM subsystem which is based on that of the authorized policy via
selinux_xfrm_state_alloc.
When a xfrm_policy is cloned, the security context of that policy, if
any, is copied to the clone via selinux_xfrm_policy_clone.
When a xfrm_policy or xfrm_state is freed, its security context, if
any is also freed at selinux_xfrm_policy_free or
selinux_xfrm_state_free.
Testing:
The SELinux authorization function is tested using ipsec-tools. We
created policies and security associations with particular security
contexts and added SELinux access control policy entries to verify the
authorization decision. We also made sure that packets for which no
security context was supplied (which either did or did not use
security associations) were authorized using an unlabelled context.
Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-13 23:12:40 -08:00
# include "xfrm.h"
2007-02-28 15:14:23 -05:00
# include "netlabel.h"
2008-03-01 22:03:14 +02:00
# include "audit.h"
2011-08-30 12:52:32 +10:00
# include "avc_ss.h"
2005-04-16 15:20:36 -07:00
security: Allow all LSMs to provide xattrs for inode_init_security hook
Currently, the LSM infrastructure supports only one LSM providing an xattr
and EVM calculating the HMAC on that xattr, plus other inode metadata.
Allow all LSMs to provide one or multiple xattrs, by extending the security
blob reservation mechanism. Introduce the new lbs_xattr_count field of the
lsm_blob_sizes structure, so that each LSM can specify how many xattrs it
needs, and the LSM infrastructure knows how many xattr slots it should
allocate.
Modify the inode_init_security hook definition, by passing the full
xattr array allocated in security_inode_init_security(), and the current
number of xattr slots in that array filled by LSMs. The first parameter
would allow EVM to access and calculate the HMAC on xattrs supplied by
other LSMs, the second to not leave gaps in the xattr array, when an LSM
requested but did not provide xattrs (e.g. if it is not initialized).
Introduce lsm_get_xattr_slot(), which LSMs can call as many times as the
number specified in the lbs_xattr_count field of the lsm_blob_sizes
structure. During each call, lsm_get_xattr_slot() increments the number of
filled xattrs, so that at the next invocation it returns the next xattr
slot to fill.
Cleanup security_inode_init_security(). Unify the !initxattrs and
initxattrs case by simply not allocating the new_xattrs array in the
former. Update the documentation to reflect the changes, and fix the
description of the xattr name, as it is not allocated anymore.
Adapt both SELinux and Smack to use the new definition of the
inode_init_security hook, and to call lsm_get_xattr_slot() to obtain and
fill the reserved slots in the xattr array.
Move the xattr->name assignment after the xattr->value one, so that it is
done only in case of successful memory allocation.
Finally, change the default return value of the inode_init_security hook
from zero to -EOPNOTSUPP, so that BPF LSM correctly follows the hook
conventions.
Reported-by: Nicolas Bouchinet <nicolas.bouchinet@clip-os.org>
Link: https://lore.kernel.org/linux-integrity/Y1FTSIo+1x+4X0LS@archlinux/
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: minor comment and variable tweaks, approved by RS]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-06-10 09:57:35 +02:00
# define SELINUX_INODE_INIT_XATTRS 1
2018-03-01 18:48:02 -05:00
struct selinux_state selinux_state ;
2008-01-29 08:43:36 -05:00
/* SECMARK reference count */
2011-08-17 11:08:43 +10:00
static atomic_t selinux_secmark_refcount = ATOMIC_INIT ( 0 ) ;
2008-01-29 08:43:36 -05:00
2005-04-16 15:20:36 -07:00
# ifdef CONFIG_SECURITY_SELINUX_DEVELOP
2019-12-17 09:15:10 -05:00
static int selinux_enforcing_boot __initdata ;
2005-04-16 15:20:36 -07:00
static int __init enforcing_setup ( char * str )
{
2008-05-14 11:27:45 -04:00
unsigned long enforcing ;
2014-02-05 15:13:14 +09:00
if ( ! kstrtoul ( str , 0 , & enforcing ) )
2018-03-01 18:48:02 -05:00
selinux_enforcing_boot = enforcing ? 1 : 0 ;
2005-04-16 15:20:36 -07:00
return 1 ;
}
__setup ( " enforcing= " , enforcing_setup ) ;
2018-03-01 18:48:02 -05:00
# else
# define selinux_enforcing_boot 1
2005-04-16 15:20:36 -07:00
# endif
2019-12-17 09:15:10 -05:00
int selinux_enabled_boot __initdata = 1 ;
2005-04-16 15:20:36 -07:00
# ifdef CONFIG_SECURITY_SELINUX_BOOTPARAM
static int __init selinux_enabled_setup ( char * str )
{
2008-05-14 11:27:45 -04:00
unsigned long enabled ;
2014-02-05 15:13:14 +09:00
if ( ! kstrtoul ( str , 0 , & enabled ) )
2019-12-17 09:15:10 -05:00
selinux_enabled_boot = enabled ? 1 : 0 ;
2005-04-16 15:20:36 -07:00
return 1 ;
}
__setup ( " selinux= " , selinux_enabled_setup ) ;
# endif
2018-03-01 18:48:02 -05:00
static int __init checkreqprot_setup ( char * str )
{
unsigned long checkreqprot ;
2020-01-08 11:24:47 -05:00
if ( ! kstrtoul ( str , 0 , & checkreqprot ) ) {
if ( checkreqprot )
selinux: remove the 'checkreqprot' functionality
We originally promised that the SELinux 'checkreqprot' functionality
would be removed no sooner than June 2021, and now that it is March
2023 it seems like it is a good time to do the final removal. The
deprecation notice in the kernel provides plenty of detail on why
'checkreqprot' is not desirable, with the key point repeated below:
This was a compatibility mechanism for legacy userspace and
for the READ_IMPLIES_EXEC personality flag. However, if set to
1, it weakens security by allowing mappings to be made executable
without authorization by policy. The default value of checkreqprot
at boot was changed starting in Linux v4.4 to 0 (i.e. check the
actual protection), and Android and Linux distributions have been
explicitly writing a "0" to /sys/fs/selinux/checkreqprot during
initialization for some time.
Along with the official deprecation notice, we have been discussing
this on-list and directly with several of the larger SELinux-based
distros and everyone is happy to see this feature finally removed.
In an attempt to catch all of the smaller, and DIY, Linux systems
we have been writing a deprecation notice URL into the kernel log,
along with a growing ssleep() penalty, when admins enabled
checkreqprot at runtime or via the kernel command line. We have
yet to have anyone come to us and raise an objection to the
deprecation or planned removal.
It is worth noting that while this patch removes the checkreqprot
functionality, it leaves the user visible interfaces (kernel command
line and selinuxfs file) intact, just inert. This should help
prevent breakages with existing userspace tools that correctly, but
unnecessarily, disable checkreqprot at boot or runtime. Admins
that attempt to enable checkreqprot will be met with a removal
message in the kernel log.
Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-03-16 11:43:08 -04:00
pr_err ( " SELinux: checkreqprot set to 1 via kernel parameter. This is no longer supported. \n " ) ;
2020-01-08 11:24:47 -05:00
}
2018-03-01 18:48:02 -05:00
return 1 ;
}
__setup ( " checkreqprot= " , checkreqprot_setup ) ;
2008-01-29 08:43:36 -05:00
/**
* selinux_secmark_enabled - Check to see if SECMARK is currently enabled
*
* Description :
* This function checks the SECMARK reference counter to see if any SECMARK
* targets are currently configured , if the reference counter is greater than
* zero SECMARK is considered to be enabled . Returns true ( 1 ) if SECMARK is
2013-05-03 09:05:39 -04:00
* enabled , false ( 0 ) if SECMARK is disabled . If the always_check_network
* policy capability is enabled , SECMARK is always considered enabled .
2008-01-29 08:43:36 -05:00
*
*/
static int selinux_secmark_enabled ( void )
{
2018-03-01 18:48:02 -05:00
return ( selinux_policycap_alwaysnetwork ( ) | |
atomic_read ( & selinux_secmark_refcount ) ) ;
2013-05-03 09:05:39 -04:00
}
/**
* selinux_peerlbl_enabled - Check to see if peer labeling is currently enabled
*
* Description :
* This function checks if NetLabel or labeled IPSEC is enabled . Returns true
* ( 1 ) if any are enabled or false ( 0 ) if neither are enabled . If the
* always_check_network policy capability is enabled , peer labeling
* is always considered enabled .
*
*/
static int selinux_peerlbl_enabled ( void )
{
2018-03-01 18:48:02 -05:00
return ( selinux_policycap_alwaysnetwork ( ) | |
netlbl_enabled ( ) | | selinux_xfrm_enabled ( ) ) ;
2008-01-29 08:43:36 -05:00
}
2014-06-26 14:33:56 -04:00
static int selinux_netcache_avc_callback ( u32 event )
{
if ( event = = AVC_CALLBACK_RESET ) {
sel_netif_flush ( ) ;
sel_netnode_flush ( ) ;
sel_netport_flush ( ) ;
synchronize_net ( ) ;
}
return 0 ;
}
2017-05-19 15:48:53 +03:00
static int selinux_lsm_notifier_avc_callback ( u32 event )
{
2017-05-19 15:48:59 +03:00
if ( event = = AVC_CALLBACK_RESET ) {
sel_ib_pkey_flush ( ) ;
2019-06-14 15:20:14 +03:00
call_blocking_lsm_notifier ( LSM_POLICY_CHANGE , NULL ) ;
2017-05-19 15:48:59 +03:00
}
2017-05-19 15:48:53 +03:00
return 0 ;
}
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
/*
* initialise the security for the init task
*/
static void cred_init_security ( void )
2005-04-16 15:20:36 -07:00
{
struct task_security_struct * tsec ;
2022-01-27 10:45:59 -05:00
tsec = selinux_cred ( unrcu_pointer ( current - > real_cred ) ) ;
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
tsec - > osid = tsec - > sid = SECINITSID_KERNEL ;
2005-04-16 15:20:36 -07:00
}
2008-11-14 10:39:21 +11:00
/*
* get the security ID of a set of credentials
*/
static inline u32 cred_sid ( const struct cred * cred )
{
const struct task_security_struct * tsec ;
2018-09-21 17:17:16 -07:00
tsec = selinux_cred ( cred ) ;
2008-11-14 10:39:21 +11:00
return tsec - > sid ;
}
2023-07-19 13:37:49 +02:00
static void __ad_net_init ( struct common_audit_data * ad ,
struct lsm_network_audit * net ,
int ifindex , struct sock * sk , u16 family )
{
ad - > type = LSM_AUDIT_DATA_NET ;
ad - > u . net = net ;
net - > netif = ifindex ;
net - > sk = sk ;
net - > family = family ;
}
static void ad_net_init_from_sk ( struct common_audit_data * ad ,
struct lsm_network_audit * net ,
struct sock * sk )
{
__ad_net_init ( ad , net , 0 , sk , 0 ) ;
}
static void ad_net_init_from_iif ( struct common_audit_data * ad ,
struct lsm_network_audit * net ,
int ifindex , u16 family )
{
2023-07-20 16:26:34 -04:00
__ad_net_init ( ad , net , ifindex , NULL , family ) ;
2023-07-19 13:37:49 +02:00
}
2008-11-14 10:39:19 +11:00
/*
2008-11-14 10:39:26 +11:00
* get the objective security ID of a task
2008-11-14 10:39:19 +11:00
*/
2021-02-18 15:13:40 -05:00
static inline u32 task_sid_obj ( const struct task_struct * task )
2008-11-14 10:39:19 +11:00
{
u32 sid ;
rcu_read_lock ( ) ;
2008-11-14 10:39:21 +11:00
sid = cred_sid ( __task_cred ( task ) ) ;
2008-11-14 10:39:19 +11:00
rcu_read_unlock ( ) ;
return sid ;
}
2015-12-24 11:09:40 -05:00
static int inode_doinit_with_dentry ( struct inode * inode , struct dentry * opt_dentry ) ;
/*
* Try reloading inode security labels that have been marked as invalid . The
* @ may_sleep parameter indicates when sleeping and thus reloading labels is
2016-11-10 22:18:27 +01:00
* allowed ; when set to false , returns - ECHILD when the label is
2018-04-24 21:31:02 -04:00
* invalid . The @ dentry parameter should be set to a dentry of the inode .
2015-12-24 11:09:40 -05:00
*/
static int __inode_security_revalidate ( struct inode * inode ,
2018-04-24 21:31:02 -04:00
struct dentry * dentry ,
2015-12-24 11:09:40 -05:00
bool may_sleep )
{
2018-09-21 17:19:11 -07:00
struct inode_security_struct * isec = selinux_inode ( inode ) ;
2015-12-24 11:09:40 -05:00
might_sleep_if ( may_sleep ) ;
2024-08-26 08:47:09 -04:00
/*
* The check of isec - > initialized below is racy but
* inode_doinit_with_dentry ( ) will recheck with
* isec - > lock held .
*/
2023-03-09 13:30:37 -05:00
if ( selinux_initialized ( ) & &
2024-08-26 08:47:09 -04:00
data_race ( isec - > initialized ! = LABEL_INITIALIZED ) ) {
2015-12-24 11:09:40 -05:00
if ( ! may_sleep )
return - ECHILD ;
/*
* Try reloading the inode security label . This will fail if
* @ opt_dentry is NULL and no dentry for this inode can be
* found ; in that case , continue using the old label .
*/
2018-04-24 21:31:02 -04:00
inode_doinit_with_dentry ( inode , dentry ) ;
2015-12-24 11:09:40 -05:00
}
return 0 ;
}
static struct inode_security_struct * inode_security_novalidate ( struct inode * inode )
{
2018-09-21 17:19:11 -07:00
return selinux_inode ( inode ) ;
2015-12-24 11:09:40 -05:00
}
static struct inode_security_struct * inode_security_rcu ( struct inode * inode , bool rcu )
{
int error ;
error = __inode_security_revalidate ( inode , NULL , ! rcu ) ;
if ( error )
return ERR_PTR ( error ) ;
2018-09-21 17:19:11 -07:00
return selinux_inode ( inode ) ;
2015-12-24 11:09:40 -05:00
}
2015-12-24 11:09:39 -05:00
/*
* Get the security label of an inode .
*/
static struct inode_security_struct * inode_security ( struct inode * inode )
{
2015-12-24 11:09:40 -05:00
__inode_security_revalidate ( inode , NULL , true ) ;
2018-09-21 17:19:11 -07:00
return selinux_inode ( inode ) ;
2015-12-24 11:09:39 -05:00
}
2016-04-19 16:36:28 -04:00
static struct inode_security_struct * backing_inode_security_novalidate ( struct dentry * dentry )
{
struct inode * inode = d_backing_inode ( dentry ) ;
2018-09-21 17:19:11 -07:00
return selinux_inode ( inode ) ;
2016-04-19 16:36:28 -04:00
}
2015-12-24 11:09:39 -05:00
/*
* Get the security label of a dentry ' s backing inode .
*/
static struct inode_security_struct * backing_inode_security ( struct dentry * dentry )
{
struct inode * inode = d_backing_inode ( dentry ) ;
2015-12-24 11:09:40 -05:00
__inode_security_revalidate ( inode , dentry , true ) ;
2018-09-21 17:19:11 -07:00
return selinux_inode ( inode ) ;
2015-12-24 11:09:39 -05:00
}
2005-04-16 15:20:36 -07:00
static void inode_free_security ( struct inode * inode )
{
2018-09-21 17:19:11 -07:00
struct inode_security_struct * isec = selinux_inode ( inode ) ;
2018-09-21 17:19:29 -07:00
struct superblock_security_struct * sbsec ;
2005-04-16 15:20:36 -07:00
2018-09-21 17:19:29 -07:00
if ( ! isec )
return ;
2021-04-22 17:41:15 +02:00
sbsec = selinux_superblock ( inode - > i_sb ) ;
2015-07-10 17:19:56 -04:00
/*
* As not all inode security structures are in a list , we check for
* empty list outside of the lock to make sure that we won ' t waste
* time taking a lock doing nothing .
*
* The list_del_init ( ) function can be safely called more than once .
* It should not be possible for this function to be called with
* concurrent list_add ( ) , but for better safety against future changes
* in the code , we use list_empty_careful ( ) here .
*/
if ( ! list_empty_careful ( & isec - > list ) ) {
spin_lock ( & sbsec - > isec_lock ) ;
2005-04-16 15:20:36 -07:00
list_del_init ( & isec - > list ) ;
2015-07-10 17:19:56 -04:00
spin_unlock ( & sbsec - > isec_lock ) ;
}
2005-04-16 15:20:36 -07:00
}
2018-12-13 15:04:59 -05:00
struct selinux_mnt_opts {
2022-01-31 13:57:36 -05:00
u32 fscontext_sid ;
u32 context_sid ;
u32 rootcontext_sid ;
u32 defcontext_sid ;
2018-12-13 15:04:59 -05:00
} ;
2018-12-13 13:41:47 -05:00
static void selinux_free_mnt_opts ( void * mnt_opts )
{
2022-02-02 13:55:29 +01:00
kfree ( mnt_opts ) ;
2018-12-13 13:41:47 -05:00
}
2005-04-16 15:20:36 -07:00
enum {
2007-09-19 17:19:12 -04:00
Opt_error = - 1 ,
2018-11-01 23:07:24 +00:00
Opt_context = 0 ,
Opt_defcontext = 1 ,
2005-04-16 15:20:36 -07:00
Opt_fscontext = 2 ,
2018-11-01 23:07:24 +00:00
Opt_rootcontext = 3 ,
Opt_seclabel = 4 ,
2005-04-16 15:20:36 -07:00
} ;
2018-12-17 10:14:16 -05:00
# define A(s, has_arg) {#s, sizeof(#s) - 1, Opt_##s, has_arg}
2023-04-20 17:05:03 +02:00
static const struct {
2018-12-14 22:44:50 -05:00
const char * name ;
int len ;
int opt ;
bool has_arg ;
} tokens [ ] = {
2018-12-17 10:14:16 -05:00
A ( context , true ) ,
A ( fscontext , true ) ,
A ( defcontext , true ) ,
A ( rootcontext , true ) ,
A ( seclabel , false ) ,
2005-04-16 15:20:36 -07:00
} ;
2018-12-14 22:44:50 -05:00
# undef A
static int match_opt_prefix ( char * s , int l , char * * arg )
{
int i ;
for ( i = 0 ; i < ARRAY_SIZE ( tokens ) ; i + + ) {
size_t len = tokens [ i ] . len ;
if ( len > l | | memcmp ( s , tokens [ i ] . name , len ) )
continue ;
if ( tokens [ i ] . has_arg ) {
if ( len = = l | | s [ len ] ! = ' = ' )
continue ;
* arg = s + len + 1 ;
} else if ( len ! = l )
continue ;
return tokens [ i ] . opt ;
}
return Opt_error ;
}
2005-04-16 15:20:36 -07:00
# define SEL_MOUNT_FAIL_MSG "SELinux: duplicate or incompatible mount options\n"
2006-07-10 04:43:53 -07:00
static int may_context_mount_sb_relabel ( u32 sid ,
struct superblock_security_struct * sbsec ,
2008-11-14 10:39:19 +11:00
const struct cred * cred )
2006-07-10 04:43:53 -07:00
{
2018-09-21 17:17:16 -07:00
const struct task_security_struct * tsec = selinux_cred ( cred ) ;
2006-07-10 04:43:53 -07:00
int rc ;
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( tsec - > sid , sbsec - > sid , SECCLASS_FILESYSTEM ,
2006-07-10 04:43:53 -07:00
FILESYSTEM__RELABELFROM , NULL ) ;
if ( rc )
return rc ;
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( tsec - > sid , sid , SECCLASS_FILESYSTEM ,
2006-07-10 04:43:53 -07:00
FILESYSTEM__RELABELTO , NULL ) ;
return rc ;
}
2006-07-10 04:43:55 -07:00
static int may_context_mount_inode_relabel ( u32 sid ,
struct superblock_security_struct * sbsec ,
2008-11-14 10:39:19 +11:00
const struct cred * cred )
2006-07-10 04:43:55 -07:00
{
2018-09-21 17:17:16 -07:00
const struct task_security_struct * tsec = selinux_cred ( cred ) ;
2006-07-10 04:43:55 -07:00
int rc ;
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( tsec - > sid , sbsec - > sid , SECCLASS_FILESYSTEM ,
2006-07-10 04:43:55 -07:00
FILESYSTEM__RELABELFROM , NULL ) ;
if ( rc )
return rc ;
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , sbsec - > sid , SECCLASS_FILESYSTEM ,
2006-07-10 04:43:55 -07:00
FILESYSTEM__ASSOCIATE , NULL ) ;
return rc ;
}
2018-12-21 21:18:52 +01:00
static int selinux_is_genfs_special_handling ( struct super_block * sb )
2012-10-10 14:27:35 -04:00
{
2018-12-21 21:18:52 +01:00
/* Special handling. Genfs but also in-core setxattr handler */
return ! strcmp ( sb - > s_type - > name , " sysfs " ) | |
2015-02-04 11:34:30 -05:00
! strcmp ( sb - > s_type - > name , " pstore " ) | |
! strcmp ( sb - > s_type - > name , " debugfs " ) | |
2017-01-09 10:07:30 -05:00
! strcmp ( sb - > s_type - > name , " tracefs " ) | |
2017-02-28 10:35:56 -05:00
! strcmp ( sb - > s_type - > name , " rootfs " ) | |
2018-03-01 18:48:02 -05:00
( selinux_policycap_cgroupseclabel ( ) & &
2017-02-28 10:35:56 -05:00
( ! strcmp ( sb - > s_type - > name , " cgroup " ) | |
! strcmp ( sb - > s_type - > name , " cgroup2 " ) ) ) ;
2012-10-10 14:27:35 -04:00
}
2018-12-21 21:18:52 +01:00
static int selinux_is_sblabel_mnt ( struct super_block * sb )
{
2021-04-22 17:41:15 +02:00
struct superblock_security_struct * sbsec = selinux_superblock ( sb ) ;
2018-12-21 21:18:52 +01:00
/*
* IMPORTANT : Double - check logic in this function when adding a new
* SECURITY_FS_USE_ * definition !
*/
BUILD_BUG_ON ( SECURITY_FS_USE_MAX ! = 7 ) ;
switch ( sbsec - > behavior ) {
case SECURITY_FS_USE_XATTR :
case SECURITY_FS_USE_TRANS :
case SECURITY_FS_USE_TASK :
case SECURITY_FS_USE_NATIVE :
return 1 ;
case SECURITY_FS_USE_GENFS :
return selinux_is_genfs_special_handling ( sb ) ;
/* Never allow relabeling on context mounts */
case SECURITY_FS_USE_MNTPOINT :
case SECURITY_FS_USE_NONE :
default :
return 0 ;
}
}
2021-01-13 13:38:02 +01:00
static int sb_check_xattr_support ( struct super_block * sb )
{
2022-01-25 15:11:33 +08:00
struct superblock_security_struct * sbsec = selinux_superblock ( sb ) ;
2021-01-13 13:38:02 +01:00
struct dentry * root = sb - > s_root ;
struct inode * root_inode = d_backing_inode ( root ) ;
u32 sid ;
int rc ;
/*
* Make sure that the xattr handler exists and that no
* error other than - ENODATA is returned by getxattr on
* the root directory . - ENODATA is ok , as this may be
* the first boot of the SELinux kernel before we have
* assigned xattr values to the filesystem .
*/
if ( ! ( root_inode - > i_opflags & IOP_XATTR ) ) {
pr_warn ( " SELinux: (dev %s, type %s) has no xattr support \n " ,
sb - > s_id , sb - > s_type - > name ) ;
goto fallback ;
}
rc = __vfs_getxattr ( root , root_inode , XATTR_NAME_SELINUX , NULL , 0 ) ;
if ( rc < 0 & & rc ! = - ENODATA ) {
if ( rc = = - EOPNOTSUPP ) {
pr_warn ( " SELinux: (dev %s, type %s) has no security xattr handler \n " ,
sb - > s_id , sb - > s_type - > name ) ;
goto fallback ;
} else {
pr_warn ( " SELinux: (dev %s, type %s) getxattr errno %d \n " ,
sb - > s_id , sb - > s_type - > name , - rc ) ;
return rc ;
}
}
return 0 ;
fallback :
/* No xattr support - try to fallback to genfs if possible. */
2023-03-09 13:30:37 -05:00
rc = security_genfs_sid ( sb - > s_type - > name , " / " ,
2021-01-13 13:38:02 +01:00
SECCLASS_DIR , & sid ) ;
if ( rc )
return - EOPNOTSUPP ;
pr_warn ( " SELinux: (dev %s, type %s) falling back to genfs \n " ,
sb - > s_id , sb - > s_type - > name ) ;
sbsec - > behavior = SECURITY_FS_USE_GENFS ;
sbsec - > sid = sid ;
return 0 ;
}
2007-11-30 13:00:35 -05:00
static int sb_finish_set_opts ( struct super_block * sb )
2005-04-16 15:20:36 -07:00
{
2021-04-22 17:41:15 +02:00
struct superblock_security_struct * sbsec = selinux_superblock ( sb ) ;
2007-11-30 13:00:35 -05:00
struct dentry * root = sb - > s_root ;
2015-03-17 22:26:22 +00:00
struct inode * root_inode = d_backing_inode ( root ) ;
2007-11-30 13:00:35 -05:00
int rc = 0 ;
2005-04-16 15:20:36 -07:00
2007-11-30 13:00:35 -05:00
if ( sbsec - > behavior = = SECURITY_FS_USE_XATTR ) {
2021-01-13 13:38:02 +01:00
rc = sb_check_xattr_support ( sb ) ;
if ( rc )
return rc ;
2007-11-30 13:00:35 -05:00
}
2005-04-16 15:20:36 -07:00
2012-08-24 15:59:14 -04:00
sbsec - > flags | = SE_SBINITIALIZED ;
2017-06-05 11:45:04 -04:00
/*
* Explicitly set or clear SBLABEL_MNT . It ' s not sufficient to simply
* leave the flag untouched because sb_clone_mnt_opts might be handing
* us a superblock that needs the flag to be cleared .
*/
2012-10-10 14:27:35 -04:00
if ( selinux_is_sblabel_mnt ( sb ) )
2012-10-09 10:56:25 -04:00
sbsec - > flags | = SBLABEL_MNT ;
2017-06-05 11:45:04 -04:00
else
sbsec - > flags & = ~ SBLABEL_MNT ;
2009-09-09 14:25:37 -04:00
2007-11-30 13:00:35 -05:00
/* Initialize the root inode. */
rc = inode_doinit_with_dentry ( root_inode , root ) ;
2005-04-16 15:20:36 -07:00
2007-11-30 13:00:35 -05:00
/* Initialize any other inodes associated with the superblock, e.g.
inodes created prior to initial policy load or inodes created
during get_sb by a pseudo filesystem that directly
populates itself . */
spin_lock ( & sbsec - > isec_lock ) ;
2018-12-10 15:34:12 -05:00
while ( ! list_empty ( & sbsec - > isec_head ) ) {
2007-11-30 13:00:35 -05:00
struct inode_security_struct * isec =
2018-12-10 15:34:12 -05:00
list_first_entry ( & sbsec - > isec_head ,
2007-11-30 13:00:35 -05:00
struct inode_security_struct , list ) ;
struct inode * inode = isec - > inode ;
2014-10-06 16:32:52 -04:00
list_del_init ( & isec - > list ) ;
2007-11-30 13:00:35 -05:00
spin_unlock ( & sbsec - > isec_lock ) ;
inode = igrab ( inode ) ;
if ( inode ) {
if ( ! IS_PRIVATE ( inode ) )
2020-01-10 16:32:10 -05:00
inode_doinit_with_dentry ( inode , NULL ) ;
2007-11-30 13:00:35 -05:00
iput ( inode ) ;
}
spin_lock ( & sbsec - > isec_lock ) ;
}
spin_unlock ( & sbsec - > isec_lock ) ;
return rc ;
}
2005-04-16 15:20:36 -07:00
2007-11-30 13:00:35 -05:00
static int bad_option ( struct superblock_security_struct * sbsec , char flag ,
u32 old_sid , u32 new_sid )
{
2009-01-16 09:22:02 -05:00
char mnt_flags = sbsec - > flags & SE_MNTMASK ;
2007-11-30 13:00:35 -05:00
/* check if the old mount command had the same options */
2009-01-16 09:22:02 -05:00
if ( sbsec - > flags & SE_SBINITIALIZED )
2007-11-30 13:00:35 -05:00
if ( ! ( sbsec - > flags & flag ) | |
( old_sid ! = new_sid ) )
return 1 ;
/* check if we were passed the same options twice,
* aka someone passed context = a , context = b
*/
2009-01-16 09:22:02 -05:00
if ( ! ( sbsec - > flags & SE_SBINITIALIZED ) )
if ( mnt_flags & flag )
2007-11-30 13:00:35 -05:00
return 1 ;
return 0 ;
}
2008-03-05 10:31:54 -05:00
2007-11-30 13:00:35 -05:00
/*
* Allow filesystems with binary mount data to explicitly set mount point
* labeling information .
*/
2008-03-05 10:31:54 -05:00
static int selinux_set_mnt_opts ( struct super_block * sb ,
2018-12-13 13:41:47 -05:00
void * mnt_opts ,
2013-05-22 12:50:36 -04:00
unsigned long kern_flags ,
unsigned long * set_kern_flags )
2007-11-30 13:00:35 -05:00
{
2008-11-14 10:39:19 +11:00
const struct cred * cred = current_cred ( ) ;
2021-04-22 17:41:15 +02:00
struct superblock_security_struct * sbsec = selinux_superblock ( sb ) ;
2020-11-04 13:01:10 +01:00
struct dentry * root = sb - > s_root ;
2018-12-13 15:04:59 -05:00
struct selinux_mnt_opts * opts = mnt_opts ;
2016-04-19 16:36:28 -04:00
struct inode_security_struct * root_isec ;
2007-11-30 13:00:35 -05:00
u32 fscontext_sid = 0 , context_sid = 0 , rootcontext_sid = 0 ;
u32 defcontext_sid = 0 ;
2018-12-13 15:04:59 -05:00
int rc = 0 ;
2007-11-30 13:00:35 -05:00
2023-05-29 16:05:27 +02:00
/*
* Specifying internal flags without providing a place to
* place the results is not allowed
*/
if ( kern_flags & & ! set_kern_flags )
return - EINVAL ;
2007-11-30 13:00:35 -05:00
mutex_lock ( & sbsec - > lock ) ;
2023-03-09 13:30:37 -05:00
if ( ! selinux_initialized ( ) ) {
2018-12-13 15:04:59 -05:00
if ( ! opts ) {
2007-11-30 13:00:35 -05:00
/* Defer initialization until selinux_complete_init,
after the initial policy is loaded and the security
server is ready to handle calls . */
2023-05-29 16:05:27 +02:00
if ( kern_flags & SECURITY_LSM_NATIVE_LABELS ) {
sbsec - > flags | = SE_SBNATIVE ;
* set_kern_flags | = SECURITY_LSM_NATIVE_LABELS ;
}
2007-11-30 13:00:35 -05:00
goto out ;
}
rc = - EINVAL ;
2018-06-12 10:09:03 +02:00
pr_warn ( " SELinux: Unable to set superblock options "
2008-04-17 11:52:44 -04:00
" before the security server is initialized \n " ) ;
2005-04-16 15:20:36 -07:00
goto out ;
2007-11-30 13:00:35 -05:00
}
2005-04-16 15:20:36 -07:00
2008-03-05 10:31:54 -05:00
/*
* Binary mount data FS will come through this function twice . Once
* from an explicit call and once from the generic calls from the vfs .
* Since the generic VFS calls will not contain any security mount data
* we need to skip the double mount verification .
*
* This does open a hole in which we will not notice if the first
2022-06-09 00:36:16 +02:00
* mount using this sb set explicit options and a second mount using
2008-03-05 10:31:54 -05:00
* this sb does not set any security options . ( The first options
* will be used for both mounts )
*/
2009-01-16 09:22:02 -05:00
if ( ( sbsec - > flags & SE_SBINITIALIZED ) & & ( sb - > s_type - > fs_flags & FS_BINARY_MOUNTDATA )
2018-12-13 15:04:59 -05:00
& & ! opts )
2008-05-14 11:27:45 -04:00
goto out ;
2008-03-05 10:31:54 -05:00
2016-04-19 16:36:28 -04:00
root_isec = backing_inode_security_novalidate ( root ) ;
2007-11-30 13:00:35 -05:00
/*
* parse the mount options , check if they are valid sids .
* also check if someone is trying to mount the same sb more
* than once with different security options .
*/
2018-12-13 15:04:59 -05:00
if ( opts ) {
2022-02-02 13:55:29 +01:00
if ( opts - > fscontext_sid ) {
fscontext_sid = opts - > fscontext_sid ;
2007-11-30 13:00:35 -05:00
if ( bad_option ( sbsec , FSCONTEXT_MNT , sbsec - > sid ,
fscontext_sid ) )
goto out_double_mount ;
sbsec - > flags | = FSCONTEXT_MNT ;
2018-12-13 15:04:59 -05:00
}
2022-02-02 13:55:29 +01:00
if ( opts - > context_sid ) {
context_sid = opts - > context_sid ;
2007-11-30 13:00:35 -05:00
if ( bad_option ( sbsec , CONTEXT_MNT , sbsec - > mntpoint_sid ,
context_sid ) )
goto out_double_mount ;
sbsec - > flags | = CONTEXT_MNT ;
2018-12-13 15:04:59 -05:00
}
2022-02-02 13:55:29 +01:00
if ( opts - > rootcontext_sid ) {
rootcontext_sid = opts - > rootcontext_sid ;
2007-11-30 13:00:35 -05:00
if ( bad_option ( sbsec , ROOTCONTEXT_MNT , root_isec - > sid ,
rootcontext_sid ) )
goto out_double_mount ;
sbsec - > flags | = ROOTCONTEXT_MNT ;
2018-12-13 15:04:59 -05:00
}
2022-02-02 13:55:29 +01:00
if ( opts - > defcontext_sid ) {
defcontext_sid = opts - > defcontext_sid ;
2007-11-30 13:00:35 -05:00
if ( bad_option ( sbsec , DEFCONTEXT_MNT , sbsec - > def_sid ,
defcontext_sid ) )
goto out_double_mount ;
sbsec - > flags | = DEFCONTEXT_MNT ;
2005-04-16 15:20:36 -07:00
}
2007-11-30 13:00:35 -05:00
}
2009-01-16 09:22:02 -05:00
if ( sbsec - > flags & SE_SBINITIALIZED ) {
2007-11-30 13:00:35 -05:00
/* previously mounted with options, but not on this attempt? */
2018-12-13 15:04:59 -05:00
if ( ( sbsec - > flags & SE_MNTMASK ) & & ! opts )
2007-11-30 13:00:35 -05:00
goto out_double_mount ;
rc = 0 ;
goto out ;
}
2008-07-15 18:32:49 +10:00
if ( strcmp ( sb - > s_type - > name , " proc " ) = = 0 )
2015-06-04 16:22:17 -04:00
sbsec - > flags | = SE_SBPROC | SE_SBGENFS ;
2015-06-04 16:22:17 -04:00
if ( ! strcmp ( sb - > s_type - > name , " debugfs " ) | |
2017-06-20 09:35:33 -07:00
! strcmp ( sb - > s_type - > name , " tracefs " ) | |
2020-02-01 17:46:23 -08:00
! strcmp ( sb - > s_type - > name , " binder " ) | |
2020-02-07 10:01:49 -08:00
! strcmp ( sb - > s_type - > name , " bpf " ) | |
2021-09-28 17:39:31 +02:00
! strcmp ( sb - > s_type - > name , " pstore " ) | |
! strcmp ( sb - > s_type - > name , " securityfs " ) )
2019-02-22 15:57:14 +01:00
sbsec - > flags | = SE_SBGENFS ;
if ( ! strcmp ( sb - > s_type - > name , " sysfs " ) | |
2017-02-09 17:02:42 +01:00
! strcmp ( sb - > s_type - > name , " cgroup " ) | |
! strcmp ( sb - > s_type - > name , " cgroup2 " ) )
2019-02-22 15:57:14 +01:00
sbsec - > flags | = SE_SBGENFS | SE_SBGENFS_XATTR ;
2007-11-30 13:00:35 -05:00
2013-05-22 12:50:37 -04:00
if ( ! sbsec - > behavior ) {
/*
* Determine the labeling behavior to use for this
* filesystem type .
*/
2023-03-09 13:30:37 -05:00
rc = security_fs_use ( sb ) ;
2013-05-22 12:50:37 -04:00
if ( rc ) {
2018-06-12 10:09:03 +02:00
pr_warn ( " %s: security_fs_use(%s) returned %d \n " ,
2013-05-22 12:50:37 -04:00
__func__ , sb - > s_type - > name , rc ) ;
goto out ;
}
2007-11-30 13:00:35 -05:00
}
2016-04-26 14:36:20 -05:00
/*
2017-01-09 10:07:31 -05:00
* If this is a user namespace mount and the filesystem type is not
* explicitly whitelisted , then no contexts are allowed on the command
* line and security labels must be ignored .
2016-04-26 14:36:20 -05:00
*/
2017-01-09 10:07:31 -05:00
if ( sb - > s_user_ns ! = & init_user_ns & &
strcmp ( sb - > s_type - > name , " tmpfs " ) & &
strcmp ( sb - > s_type - > name , " ramfs " ) & &
2021-02-11 13:03:03 -05:00
strcmp ( sb - > s_type - > name , " devpts " ) & &
strcmp ( sb - > s_type - > name , " overlay " ) ) {
2016-04-26 14:36:20 -05:00
if ( context_sid | | fscontext_sid | | rootcontext_sid | |
defcontext_sid ) {
rc = - EACCES ;
goto out ;
}
if ( sbsec - > behavior = = SECURITY_FS_USE_XATTR ) {
sbsec - > behavior = SECURITY_FS_USE_MNTPOINT ;
2023-03-09 13:30:37 -05:00
rc = security_transition_sid ( current_sid ( ) ,
2018-03-01 18:48:02 -05:00
current_sid ( ) ,
2016-04-26 14:36:20 -05:00
SECCLASS_FILE , NULL ,
& sbsec - > mntpoint_sid ) ;
if ( rc )
goto out ;
}
goto out_set_opts ;
}
2007-11-30 13:00:35 -05:00
/* sets the context of the superblock for the fs being mounted. */
if ( fscontext_sid ) {
2008-11-14 10:39:19 +11:00
rc = may_context_mount_sb_relabel ( fscontext_sid , sbsec , cred ) ;
2005-04-16 15:20:36 -07:00
if ( rc )
2007-11-30 13:00:35 -05:00
goto out ;
2005-04-16 15:20:36 -07:00
2007-11-30 13:00:35 -05:00
sbsec - > sid = fscontext_sid ;
2006-07-10 04:43:53 -07:00
}
/*
* Switch to using mount point labeling behavior .
* sets the label used on all file below the mountpoint , and will set
* the superblock context if not already set .
*/
2023-05-29 16:05:27 +02:00
if ( sbsec - > flags & SE_SBNATIVE ) {
/*
* This means we are initializing a superblock that has been
* mounted before the SELinux was initialized and the
* filesystem requested native labeling . We had already
* returned SECURITY_LSM_NATIVE_LABELS in * set_kern_flags
* in the original mount attempt , so now we just need to set
* the SECURITY_FS_USE_NATIVE behavior .
*/
sbsec - > behavior = SECURITY_FS_USE_NATIVE ;
} else if ( kern_flags & SECURITY_LSM_NATIVE_LABELS & & ! context_sid ) {
2013-05-22 12:50:37 -04:00
sbsec - > behavior = SECURITY_FS_USE_NATIVE ;
* set_kern_flags | = SECURITY_LSM_NATIVE_LABELS ;
}
2007-11-30 13:00:35 -05:00
if ( context_sid ) {
if ( ! fscontext_sid ) {
2008-11-14 10:39:19 +11:00
rc = may_context_mount_sb_relabel ( context_sid , sbsec ,
cred ) ;
2006-07-14 00:24:33 -07:00
if ( rc )
2007-11-30 13:00:35 -05:00
goto out ;
sbsec - > sid = context_sid ;
2006-07-14 00:24:33 -07:00
} else {
2008-11-14 10:39:19 +11:00
rc = may_context_mount_inode_relabel ( context_sid , sbsec ,
cred ) ;
2006-07-14 00:24:33 -07:00
if ( rc )
2007-11-30 13:00:35 -05:00
goto out ;
2006-07-14 00:24:33 -07:00
}
2007-11-30 13:00:35 -05:00
if ( ! rootcontext_sid )
rootcontext_sid = context_sid ;
2005-04-16 15:20:36 -07:00
2007-11-30 13:00:35 -05:00
sbsec - > mntpoint_sid = context_sid ;
2006-07-10 04:43:53 -07:00
sbsec - > behavior = SECURITY_FS_USE_MNTPOINT ;
2005-04-16 15:20:36 -07:00
}
2007-11-30 13:00:35 -05:00
if ( rootcontext_sid ) {
2008-11-14 10:39:19 +11:00
rc = may_context_mount_inode_relabel ( rootcontext_sid , sbsec ,
cred ) ;
2006-07-10 04:43:55 -07:00
if ( rc )
2007-11-30 13:00:35 -05:00
goto out ;
2006-07-10 04:43:55 -07:00
2007-11-30 13:00:35 -05:00
root_isec - > sid = rootcontext_sid ;
2015-12-24 11:09:40 -05:00
root_isec - > initialized = LABEL_INITIALIZED ;
2006-07-10 04:43:55 -07:00
}
2007-11-30 13:00:35 -05:00
if ( defcontext_sid ) {
2013-05-22 12:50:37 -04:00
if ( sbsec - > behavior ! = SECURITY_FS_USE_XATTR & &
sbsec - > behavior ! = SECURITY_FS_USE_NATIVE ) {
2007-11-30 13:00:35 -05:00
rc = - EINVAL ;
2018-06-12 10:09:03 +02:00
pr_warn ( " SELinux: defcontext option is "
2007-11-30 13:00:35 -05:00
" invalid for this filesystem type \n " ) ;
goto out ;
2005-04-16 15:20:36 -07:00
}
2007-11-30 13:00:35 -05:00
if ( defcontext_sid ! = sbsec - > def_sid ) {
rc = may_context_mount_inode_relabel ( defcontext_sid ,
2008-11-14 10:39:19 +11:00
sbsec , cred ) ;
2007-11-30 13:00:35 -05:00
if ( rc )
goto out ;
}
2005-04-16 15:20:36 -07:00
2007-11-30 13:00:35 -05:00
sbsec - > def_sid = defcontext_sid ;
2005-04-16 15:20:36 -07:00
}
2016-04-26 14:36:20 -05:00
out_set_opts :
2007-11-30 13:00:35 -05:00
rc = sb_finish_set_opts ( sb ) ;
2005-04-16 15:20:36 -07:00
out :
2007-11-30 13:00:35 -05:00
mutex_unlock ( & sbsec - > lock ) ;
2005-04-16 15:20:36 -07:00
return rc ;
2007-11-30 13:00:35 -05:00
out_double_mount :
rc = - EINVAL ;
2018-06-12 10:09:03 +02:00
pr_warn ( " SELinux: mount invalid. Same superblock, different "
2018-12-13 15:04:59 -05:00
" security settings for (dev %s, type %s) \n " , sb - > s_id ,
sb - > s_type - > name ) ;
2007-11-30 13:00:35 -05:00
goto out ;
2005-04-16 15:20:36 -07:00
}
selinux: make security_sb_clone_mnt_opts return an error on context mismatch
I had the following problem reported a while back. If you mount the
same filesystem twice using NFSv4 with different contexts, then the
second context= option is ignored. For instance:
# mount server:/export /mnt/test1
# mount server:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0
# ls -dZ /mnt/test1
drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test1
# ls -dZ /mnt/test2
drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test2
When we call into SELinux to set the context of a "cloned" superblock,
it will currently just bail out when it notices that we're reusing an
existing superblock. Since the existing superblock is already set up and
presumably in use, we can't go overwriting its context with the one from
the "original" sb. Because of this, the second context= option in this
case cannot take effect.
This patch fixes this by turning security_sb_clone_mnt_opts into an int
return operation. When it finds that the "new" superblock that it has
been handed is already set up, it checks to see whether the contexts on
the old superblock match it. If it does, then it will just return
success, otherwise it'll return -EBUSY and emit a printk to tell the
admin why the second mount failed.
Note that this patch may cause casualties. The NFSv4 code relies on
being able to walk down to an export from the pseudoroot. If you mount
filesystems that are nested within one another with different contexts,
then this patch will make those mounts fail in new and "exciting" ways.
For instance, suppose that /export is a separate filesystem on the
server:
# mount server:/ /mnt/test1
# mount salusa:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0
mount.nfs: an incorrect mount option was specified
...with the printk in the ring buffer. Because we *might* eventually
walk down to /mnt/test1/export, the mount is denied due to this patch.
The second mount needs the pseudoroot superblock, but that's already
present with the wrong context.
OTOH, if we mount these in the reverse order, then both mounts work,
because the pseudoroot superblock created when mounting /export is
discarded once that mount is done. If we then however try to walk into
that directory, the automount fails for the similar reasons:
# cd /mnt/test1/scratch/
-bash: cd: /mnt/test1/scratch: Device or resource busy
The story I've gotten from the SELinux folks that I've talked to is that
this is desirable behavior. In SELinux-land, mounting the same data
under different contexts is wrong -- there can be only one.
Cc: Steve Dickson <steved@redhat.com>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
2013-04-01 08:14:24 -04:00
static int selinux_cmp_sb_context ( const struct super_block * oldsb ,
const struct super_block * newsb )
{
2021-04-22 17:41:15 +02:00
struct superblock_security_struct * old = selinux_superblock ( oldsb ) ;
struct superblock_security_struct * new = selinux_superblock ( newsb ) ;
selinux: make security_sb_clone_mnt_opts return an error on context mismatch
I had the following problem reported a while back. If you mount the
same filesystem twice using NFSv4 with different contexts, then the
second context= option is ignored. For instance:
# mount server:/export /mnt/test1
# mount server:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0
# ls -dZ /mnt/test1
drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test1
# ls -dZ /mnt/test2
drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test2
When we call into SELinux to set the context of a "cloned" superblock,
it will currently just bail out when it notices that we're reusing an
existing superblock. Since the existing superblock is already set up and
presumably in use, we can't go overwriting its context with the one from
the "original" sb. Because of this, the second context= option in this
case cannot take effect.
This patch fixes this by turning security_sb_clone_mnt_opts into an int
return operation. When it finds that the "new" superblock that it has
been handed is already set up, it checks to see whether the contexts on
the old superblock match it. If it does, then it will just return
success, otherwise it'll return -EBUSY and emit a printk to tell the
admin why the second mount failed.
Note that this patch may cause casualties. The NFSv4 code relies on
being able to walk down to an export from the pseudoroot. If you mount
filesystems that are nested within one another with different contexts,
then this patch will make those mounts fail in new and "exciting" ways.
For instance, suppose that /export is a separate filesystem on the
server:
# mount server:/ /mnt/test1
# mount salusa:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0
mount.nfs: an incorrect mount option was specified
...with the printk in the ring buffer. Because we *might* eventually
walk down to /mnt/test1/export, the mount is denied due to this patch.
The second mount needs the pseudoroot superblock, but that's already
present with the wrong context.
OTOH, if we mount these in the reverse order, then both mounts work,
because the pseudoroot superblock created when mounting /export is
discarded once that mount is done. If we then however try to walk into
that directory, the automount fails for the similar reasons:
# cd /mnt/test1/scratch/
-bash: cd: /mnt/test1/scratch: Device or resource busy
The story I've gotten from the SELinux folks that I've talked to is that
this is desirable behavior. In SELinux-land, mounting the same data
under different contexts is wrong -- there can be only one.
Cc: Steve Dickson <steved@redhat.com>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
2013-04-01 08:14:24 -04:00
char oldflags = old - > flags & SE_MNTMASK ;
char newflags = new - > flags & SE_MNTMASK ;
if ( oldflags ! = newflags )
goto mismatch ;
if ( ( oldflags & FSCONTEXT_MNT ) & & old - > sid ! = new - > sid )
goto mismatch ;
if ( ( oldflags & CONTEXT_MNT ) & & old - > mntpoint_sid ! = new - > mntpoint_sid )
goto mismatch ;
if ( ( oldflags & DEFCONTEXT_MNT ) & & old - > def_sid ! = new - > def_sid )
goto mismatch ;
if ( oldflags & ROOTCONTEXT_MNT ) {
2015-12-24 11:09:39 -05:00
struct inode_security_struct * oldroot = backing_inode_security ( oldsb - > s_root ) ;
struct inode_security_struct * newroot = backing_inode_security ( newsb - > s_root ) ;
selinux: make security_sb_clone_mnt_opts return an error on context mismatch
I had the following problem reported a while back. If you mount the
same filesystem twice using NFSv4 with different contexts, then the
second context= option is ignored. For instance:
# mount server:/export /mnt/test1
# mount server:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0
# ls -dZ /mnt/test1
drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test1
# ls -dZ /mnt/test2
drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test2
When we call into SELinux to set the context of a "cloned" superblock,
it will currently just bail out when it notices that we're reusing an
existing superblock. Since the existing superblock is already set up and
presumably in use, we can't go overwriting its context with the one from
the "original" sb. Because of this, the second context= option in this
case cannot take effect.
This patch fixes this by turning security_sb_clone_mnt_opts into an int
return operation. When it finds that the "new" superblock that it has
been handed is already set up, it checks to see whether the contexts on
the old superblock match it. If it does, then it will just return
success, otherwise it'll return -EBUSY and emit a printk to tell the
admin why the second mount failed.
Note that this patch may cause casualties. The NFSv4 code relies on
being able to walk down to an export from the pseudoroot. If you mount
filesystems that are nested within one another with different contexts,
then this patch will make those mounts fail in new and "exciting" ways.
For instance, suppose that /export is a separate filesystem on the
server:
# mount server:/ /mnt/test1
# mount salusa:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0
mount.nfs: an incorrect mount option was specified
...with the printk in the ring buffer. Because we *might* eventually
walk down to /mnt/test1/export, the mount is denied due to this patch.
The second mount needs the pseudoroot superblock, but that's already
present with the wrong context.
OTOH, if we mount these in the reverse order, then both mounts work,
because the pseudoroot superblock created when mounting /export is
discarded once that mount is done. If we then however try to walk into
that directory, the automount fails for the similar reasons:
# cd /mnt/test1/scratch/
-bash: cd: /mnt/test1/scratch: Device or resource busy
The story I've gotten from the SELinux folks that I've talked to is that
this is desirable behavior. In SELinux-land, mounting the same data
under different contexts is wrong -- there can be only one.
Cc: Steve Dickson <steved@redhat.com>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
2013-04-01 08:14:24 -04:00
if ( oldroot - > sid ! = newroot - > sid )
goto mismatch ;
}
return 0 ;
mismatch :
2018-06-12 10:09:03 +02:00
pr_warn ( " SELinux: mount invalid. Same superblock, "
selinux: make security_sb_clone_mnt_opts return an error on context mismatch
I had the following problem reported a while back. If you mount the
same filesystem twice using NFSv4 with different contexts, then the
second context= option is ignored. For instance:
# mount server:/export /mnt/test1
# mount server:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0
# ls -dZ /mnt/test1
drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test1
# ls -dZ /mnt/test2
drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test2
When we call into SELinux to set the context of a "cloned" superblock,
it will currently just bail out when it notices that we're reusing an
existing superblock. Since the existing superblock is already set up and
presumably in use, we can't go overwriting its context with the one from
the "original" sb. Because of this, the second context= option in this
case cannot take effect.
This patch fixes this by turning security_sb_clone_mnt_opts into an int
return operation. When it finds that the "new" superblock that it has
been handed is already set up, it checks to see whether the contexts on
the old superblock match it. If it does, then it will just return
success, otherwise it'll return -EBUSY and emit a printk to tell the
admin why the second mount failed.
Note that this patch may cause casualties. The NFSv4 code relies on
being able to walk down to an export from the pseudoroot. If you mount
filesystems that are nested within one another with different contexts,
then this patch will make those mounts fail in new and "exciting" ways.
For instance, suppose that /export is a separate filesystem on the
server:
# mount server:/ /mnt/test1
# mount salusa:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0
mount.nfs: an incorrect mount option was specified
...with the printk in the ring buffer. Because we *might* eventually
walk down to /mnt/test1/export, the mount is denied due to this patch.
The second mount needs the pseudoroot superblock, but that's already
present with the wrong context.
OTOH, if we mount these in the reverse order, then both mounts work,
because the pseudoroot superblock created when mounting /export is
discarded once that mount is done. If we then however try to walk into
that directory, the automount fails for the similar reasons:
# cd /mnt/test1/scratch/
-bash: cd: /mnt/test1/scratch: Device or resource busy
The story I've gotten from the SELinux folks that I've talked to is that
this is desirable behavior. In SELinux-land, mounting the same data
under different contexts is wrong -- there can be only one.
Cc: Steve Dickson <steved@redhat.com>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
2013-04-01 08:14:24 -04:00
" different security settings for (dev %s, "
" type %s) \n " , newsb - > s_id , newsb - > s_type - > name ) ;
return - EBUSY ;
}
static int selinux_sb_clone_mnt_opts ( const struct super_block * oldsb ,
2017-06-05 11:45:04 -04:00
struct super_block * newsb ,
unsigned long kern_flags ,
unsigned long * set_kern_flags )
2005-04-16 15:20:36 -07:00
{
2017-06-05 11:45:04 -04:00
int rc = 0 ;
2021-04-22 17:41:15 +02:00
const struct superblock_security_struct * oldsbsec =
selinux_superblock ( oldsb ) ;
struct superblock_security_struct * newsbsec = selinux_superblock ( newsb ) ;
2005-04-16 15:20:36 -07:00
2007-11-30 13:00:35 -05:00
int set_fscontext = ( oldsbsec - > flags & FSCONTEXT_MNT ) ;
int set_context = ( oldsbsec - > flags & CONTEXT_MNT ) ;
int set_rootcontext = ( oldsbsec - > flags & ROOTCONTEXT_MNT ) ;
2005-04-16 15:20:36 -07:00
2017-06-05 11:45:04 -04:00
/*
* Specifying internal flags without providing a place to
* place the results is not allowed .
*/
if ( kern_flags & & ! set_kern_flags )
return - EINVAL ;
2023-05-29 16:05:27 +02:00
mutex_lock ( & newsbsec - > lock ) ;
/*
* if the parent was able to be mounted it clearly had no special lsm
* mount options . thus we can safely deal with this superblock later
*/
if ( ! selinux_initialized ( ) ) {
if ( kern_flags & SECURITY_LSM_NATIVE_LABELS ) {
newsbsec - > flags | = SE_SBNATIVE ;
* set_kern_flags | = SECURITY_LSM_NATIVE_LABELS ;
}
goto out ;
}
2007-11-30 13:00:35 -05:00
/* how can we clone if the old one wasn't set up?? */
2009-01-16 09:22:02 -05:00
BUG_ON ( ! ( oldsbsec - > flags & SE_SBINITIALIZED ) ) ;
2007-11-30 13:00:35 -05:00
selinux: make security_sb_clone_mnt_opts return an error on context mismatch
I had the following problem reported a while back. If you mount the
same filesystem twice using NFSv4 with different contexts, then the
second context= option is ignored. For instance:
# mount server:/export /mnt/test1
# mount server:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0
# ls -dZ /mnt/test1
drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test1
# ls -dZ /mnt/test2
drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test2
When we call into SELinux to set the context of a "cloned" superblock,
it will currently just bail out when it notices that we're reusing an
existing superblock. Since the existing superblock is already set up and
presumably in use, we can't go overwriting its context with the one from
the "original" sb. Because of this, the second context= option in this
case cannot take effect.
This patch fixes this by turning security_sb_clone_mnt_opts into an int
return operation. When it finds that the "new" superblock that it has
been handed is already set up, it checks to see whether the contexts on
the old superblock match it. If it does, then it will just return
success, otherwise it'll return -EBUSY and emit a printk to tell the
admin why the second mount failed.
Note that this patch may cause casualties. The NFSv4 code relies on
being able to walk down to an export from the pseudoroot. If you mount
filesystems that are nested within one another with different contexts,
then this patch will make those mounts fail in new and "exciting" ways.
For instance, suppose that /export is a separate filesystem on the
server:
# mount server:/ /mnt/test1
# mount salusa:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0
mount.nfs: an incorrect mount option was specified
...with the printk in the ring buffer. Because we *might* eventually
walk down to /mnt/test1/export, the mount is denied due to this patch.
The second mount needs the pseudoroot superblock, but that's already
present with the wrong context.
OTOH, if we mount these in the reverse order, then both mounts work,
because the pseudoroot superblock created when mounting /export is
discarded once that mount is done. If we then however try to walk into
that directory, the automount fails for the similar reasons:
# cd /mnt/test1/scratch/
-bash: cd: /mnt/test1/scratch: Device or resource busy
The story I've gotten from the SELinux folks that I've talked to is that
this is desirable behavior. In SELinux-land, mounting the same data
under different contexts is wrong -- there can be only one.
Cc: Steve Dickson <steved@redhat.com>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
2013-04-01 08:14:24 -04:00
/* if fs is reusing a sb, make sure that the contexts match */
2019-03-05 16:17:58 -05:00
if ( newsbsec - > flags & SE_SBINITIALIZED ) {
2023-05-29 16:05:27 +02:00
mutex_unlock ( & newsbsec - > lock ) ;
2019-03-05 16:17:58 -05:00
if ( ( kern_flags & SECURITY_LSM_NATIVE_LABELS ) & & ! set_context )
* set_kern_flags | = SECURITY_LSM_NATIVE_LABELS ;
selinux: make security_sb_clone_mnt_opts return an error on context mismatch
I had the following problem reported a while back. If you mount the
same filesystem twice using NFSv4 with different contexts, then the
second context= option is ignored. For instance:
# mount server:/export /mnt/test1
# mount server:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0
# ls -dZ /mnt/test1
drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test1
# ls -dZ /mnt/test2
drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test2
When we call into SELinux to set the context of a "cloned" superblock,
it will currently just bail out when it notices that we're reusing an
existing superblock. Since the existing superblock is already set up and
presumably in use, we can't go overwriting its context with the one from
the "original" sb. Because of this, the second context= option in this
case cannot take effect.
This patch fixes this by turning security_sb_clone_mnt_opts into an int
return operation. When it finds that the "new" superblock that it has
been handed is already set up, it checks to see whether the contexts on
the old superblock match it. If it does, then it will just return
success, otherwise it'll return -EBUSY and emit a printk to tell the
admin why the second mount failed.
Note that this patch may cause casualties. The NFSv4 code relies on
being able to walk down to an export from the pseudoroot. If you mount
filesystems that are nested within one another with different contexts,
then this patch will make those mounts fail in new and "exciting" ways.
For instance, suppose that /export is a separate filesystem on the
server:
# mount server:/ /mnt/test1
# mount salusa:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0
mount.nfs: an incorrect mount option was specified
...with the printk in the ring buffer. Because we *might* eventually
walk down to /mnt/test1/export, the mount is denied due to this patch.
The second mount needs the pseudoroot superblock, but that's already
present with the wrong context.
OTOH, if we mount these in the reverse order, then both mounts work,
because the pseudoroot superblock created when mounting /export is
discarded once that mount is done. If we then however try to walk into
that directory, the automount fails for the similar reasons:
# cd /mnt/test1/scratch/
-bash: cd: /mnt/test1/scratch: Device or resource busy
The story I've gotten from the SELinux folks that I've talked to is that
this is desirable behavior. In SELinux-land, mounting the same data
under different contexts is wrong -- there can be only one.
Cc: Steve Dickson <steved@redhat.com>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
2013-04-01 08:14:24 -04:00
return selinux_cmp_sb_context ( oldsb , newsb ) ;
2019-03-05 16:17:58 -05:00
}
2008-04-09 14:08:35 -04:00
2007-11-30 13:00:35 -05:00
newsbsec - > flags = oldsbsec - > flags ;
newsbsec - > sid = oldsbsec - > sid ;
newsbsec - > def_sid = oldsbsec - > def_sid ;
newsbsec - > behavior = oldsbsec - > behavior ;
2017-06-05 11:45:04 -04:00
if ( newsbsec - > behavior = = SECURITY_FS_USE_NATIVE & &
! ( kern_flags & SECURITY_LSM_NATIVE_LABELS ) & & ! set_context ) {
2023-03-09 13:30:37 -05:00
rc = security_fs_use ( newsb ) ;
2017-06-05 11:45:04 -04:00
if ( rc )
goto out ;
}
if ( kern_flags & SECURITY_LSM_NATIVE_LABELS & & ! set_context ) {
newsbsec - > behavior = SECURITY_FS_USE_NATIVE ;
* set_kern_flags | = SECURITY_LSM_NATIVE_LABELS ;
}
2007-11-30 13:00:35 -05:00
if ( set_context ) {
u32 sid = oldsbsec - > mntpoint_sid ;
if ( ! set_fscontext )
newsbsec - > sid = sid ;
if ( ! set_rootcontext ) {
2015-12-24 11:09:39 -05:00
struct inode_security_struct * newisec = backing_inode_security ( newsb - > s_root ) ;
2007-11-30 13:00:35 -05:00
newisec - > sid = sid ;
}
newsbsec - > mntpoint_sid = sid ;
2005-04-16 15:20:36 -07:00
}
2007-11-30 13:00:35 -05:00
if ( set_rootcontext ) {
2015-12-24 11:09:39 -05:00
const struct inode_security_struct * oldisec = backing_inode_security ( oldsb - > s_root ) ;
struct inode_security_struct * newisec = backing_inode_security ( newsb - > s_root ) ;
2005-04-16 15:20:36 -07:00
2007-11-30 13:00:35 -05:00
newisec - > sid = oldisec - > sid ;
2005-04-16 15:20:36 -07:00
}
2007-11-30 13:00:35 -05:00
sb_finish_set_opts ( newsb ) ;
2017-06-05 11:45:04 -04:00
out :
2007-11-30 13:00:35 -05:00
mutex_unlock ( & newsbsec - > lock ) ;
2017-06-05 11:45:04 -04:00
return rc ;
2007-11-30 13:00:35 -05:00
}
2022-06-17 17:44:12 +08:00
/*
2023-04-20 17:04:59 +02:00
* NOTE : the caller is responsible for freeing the memory even if on error .
2022-06-17 17:44:12 +08:00
*/
2018-12-14 20:28:15 -05:00
static int selinux_add_opt ( int token , const char * s , void * * mnt_opts )
2007-11-30 13:00:35 -05:00
{
2018-12-14 20:28:15 -05:00
struct selinux_mnt_opts * opts = * mnt_opts ;
2022-02-02 13:55:29 +01:00
u32 * dst_sid ;
int rc ;
2005-04-16 15:20:36 -07:00
2021-12-21 15:01:29 -05:00
if ( token = = Opt_seclabel )
/* eaten and completely ignored */
2018-12-14 22:44:50 -05:00
return 0 ;
2021-12-10 04:03:58 -08:00
if ( ! s )
2022-06-17 17:44:12 +08:00
return - EINVAL ;
2008-03-05 10:31:54 -05:00
2023-03-09 13:30:37 -05:00
if ( ! selinux_initialized ( ) ) {
2022-02-02 13:55:29 +01:00
pr_warn ( " SELinux: Unable to set superblock options before the security server is initialized \n " ) ;
return - EINVAL ;
}
2018-12-14 20:28:15 -05:00
if ( ! opts ) {
2021-12-21 15:01:29 -05:00
opts = kzalloc ( sizeof ( * opts ) , GFP_KERNEL ) ;
2018-12-14 20:28:15 -05:00
if ( ! opts )
return - ENOMEM ;
* mnt_opts = opts ;
2007-11-30 13:00:35 -05:00
}
2021-12-10 04:03:58 -08:00
2018-12-14 20:28:15 -05:00
switch ( token ) {
case Opt_context :
2022-02-02 13:55:29 +01:00
if ( opts - > context_sid | | opts - > defcontext_sid )
2021-12-21 15:01:29 -05:00
goto err ;
2022-02-02 13:55:29 +01:00
dst_sid = & opts - > context_sid ;
2018-12-14 20:28:15 -05:00
break ;
case Opt_fscontext :
2022-02-02 13:55:29 +01:00
if ( opts - > fscontext_sid )
2021-12-21 15:01:29 -05:00
goto err ;
2022-02-02 13:55:29 +01:00
dst_sid = & opts - > fscontext_sid ;
2018-12-14 20:28:15 -05:00
break ;
case Opt_rootcontext :
2022-02-02 13:55:29 +01:00
if ( opts - > rootcontext_sid )
2021-12-21 15:01:29 -05:00
goto err ;
2022-02-02 13:55:29 +01:00
dst_sid = & opts - > rootcontext_sid ;
2018-12-14 20:28:15 -05:00
break ;
case Opt_defcontext :
2022-02-02 13:55:29 +01:00
if ( opts - > context_sid | | opts - > defcontext_sid )
2021-12-21 15:01:29 -05:00
goto err ;
2022-02-02 13:55:29 +01:00
dst_sid = & opts - > defcontext_sid ;
2018-12-14 20:28:15 -05:00
break ;
2022-02-02 13:55:29 +01:00
default :
WARN_ON ( 1 ) ;
return - EINVAL ;
2007-11-30 13:00:35 -05:00
}
2023-03-09 13:30:37 -05:00
rc = security_context_str_to_sid ( s , dst_sid , GFP_KERNEL ) ;
2022-02-02 13:55:29 +01:00
if ( rc )
pr_warn ( " SELinux: security_context_str_to_sid (%s) failed with errno=%d \n " ,
s , rc ) ;
return rc ;
2019-06-12 21:28:21 +08:00
2021-12-21 15:01:29 -05:00
err :
2018-12-14 20:28:15 -05:00
pr_warn ( SEL_MOUNT_FAIL_MSG ) ;
return - EINVAL ;
2008-03-05 10:31:54 -05:00
}
2005-04-16 15:20:36 -07:00
2018-12-13 00:24:36 -05:00
static int show_sid ( struct seq_file * m , u32 sid )
2008-07-04 09:47:13 +10:00
{
2018-12-13 00:24:36 -05:00
char * context = NULL ;
u32 len ;
int rc ;
2009-01-16 09:22:03 -05:00
2023-03-09 13:30:37 -05:00
rc = security_sid_to_context ( sid , & context , & len ) ;
2018-12-13 00:24:36 -05:00
if ( ! rc ) {
2022-02-17 15:21:28 +01:00
bool has_comma = strchr ( context , ' , ' ) ;
2008-07-04 09:47:13 +10:00
2018-11-01 23:07:24 +00:00
seq_putc ( m , ' = ' ) ;
2008-07-04 09:47:13 +10:00
if ( has_comma )
seq_putc ( m , ' \" ' ) ;
2018-12-13 00:24:36 -05:00
seq_escape ( m , context , " \" \n \\ " ) ;
2008-07-04 09:47:13 +10:00
if ( has_comma )
seq_putc ( m , ' \" ' ) ;
}
2018-12-13 00:24:36 -05:00
kfree ( context ) ;
return rc ;
2008-07-04 09:47:13 +10:00
}
static int selinux_sb_show_options ( struct seq_file * m , struct super_block * sb )
{
2021-04-22 17:41:15 +02:00
struct superblock_security_struct * sbsec = selinux_superblock ( sb ) ;
2008-07-04 09:47:13 +10:00
int rc ;
2018-12-13 00:24:36 -05:00
if ( ! ( sbsec - > flags & SE_SBINITIALIZED ) )
return 0 ;
2008-07-04 09:47:13 +10:00
2023-03-09 13:30:37 -05:00
if ( ! selinux_initialized ( ) )
2018-12-13 00:24:36 -05:00
return 0 ;
2008-07-04 09:47:13 +10:00
2018-12-13 00:24:36 -05:00
if ( sbsec - > flags & FSCONTEXT_MNT ) {
seq_putc ( m , ' , ' ) ;
seq_puts ( m , FSCONTEXT_STR ) ;
rc = show_sid ( m , sbsec - > sid ) ;
if ( rc )
return rc ;
}
if ( sbsec - > flags & CONTEXT_MNT ) {
seq_putc ( m , ' , ' ) ;
seq_puts ( m , CONTEXT_STR ) ;
rc = show_sid ( m , sbsec - > mntpoint_sid ) ;
if ( rc )
return rc ;
}
if ( sbsec - > flags & DEFCONTEXT_MNT ) {
seq_putc ( m , ' , ' ) ;
seq_puts ( m , DEFCONTEXT_STR ) ;
rc = show_sid ( m , sbsec - > def_sid ) ;
if ( rc )
return rc ;
}
if ( sbsec - > flags & ROOTCONTEXT_MNT ) {
2020-11-04 13:01:10 +01:00
struct dentry * root = sb - > s_root ;
2018-12-13 00:24:36 -05:00
struct inode_security_struct * isec = backing_inode_security ( root ) ;
seq_putc ( m , ' , ' ) ;
seq_puts ( m , ROOTCONTEXT_STR ) ;
rc = show_sid ( m , isec - > sid ) ;
if ( rc )
return rc ;
}
if ( sbsec - > flags & SBLABEL_MNT ) {
seq_putc ( m , ' , ' ) ;
2018-11-01 23:07:24 +00:00
seq_puts ( m , SECLABEL_STR ) ;
2018-12-13 00:24:36 -05:00
}
return 0 ;
2008-07-04 09:47:13 +10:00
}
2005-04-16 15:20:36 -07:00
static inline u16 inode_mode_to_security_class ( umode_t mode )
{
switch ( mode & S_IFMT ) {
case S_IFSOCK :
return SECCLASS_SOCK_FILE ;
case S_IFLNK :
return SECCLASS_LNK_FILE ;
case S_IFREG :
return SECCLASS_FILE ;
case S_IFBLK :
return SECCLASS_BLK_FILE ;
case S_IFDIR :
return SECCLASS_DIR ;
case S_IFCHR :
return SECCLASS_CHR_FILE ;
case S_IFIFO :
return SECCLASS_FIFO_FILE ;
}
return SECCLASS_FILE ;
}
[PATCH] SELinux - fix SCTP socket bug and general IP protocol handling
The following patch updates the way SELinux classifies and handles IP
based protocols.
Currently, IP sockets are classified by SELinux as being either TCP, UDP
or 'Raw', the latter being a default for IP socket that is not TCP or UDP.
The classification code is out of date and uses only the socket type
parameter to socket(2) to determine the class of IP socket. So, any
socket created with SOCK_STREAM will be classified by SELinux as TCP, and
SOCK_DGRAM as UDP. Also, other socket types such as SOCK_SEQPACKET and
SOCK_DCCP are currently ignored by SELinux, which classifies them as
generic sockets, which means they don't even get basic IP level checking.
This patch changes the SELinux IP socket classification logic, so that
only an IPPROTO_IP protocol value passed to socket(2) classify the socket
as TCP or UDP. The patch also drops the check for SOCK_RAW and converts
it into a default, so that socket types like SOCK_DCCP and SOCK_SEQPACKET
are classified as SECCLASS_RAWIP_SOCKET (instead of generic sockets).
Note that protocol-specific support for SCTP, DCCP etc. is not addressed
here, we're just getting these protocols checked at the IP layer.
This fixes a reported problem where SCTP sockets were being recognized as
generic SELinux sockets yet still being passed in one case to an IP level
check, which then fails for generic sockets.
It will also fix bugs where any SOCK_STREAM socket is classified as TCP or
any SOCK_DGRAM socket is classified as UDP.
This patch also unifies the way IP sockets classes are determined in
selinux_socket_bind(), so we use the already calculated value instead of
trying to recalculate it.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-30 14:24:34 -04:00
static inline int default_protocol_stream ( int protocol )
{
2020-12-16 12:55:27 +01:00
return ( protocol = = IPPROTO_IP | | protocol = = IPPROTO_TCP | |
protocol = = IPPROTO_MPTCP ) ;
[PATCH] SELinux - fix SCTP socket bug and general IP protocol handling
The following patch updates the way SELinux classifies and handles IP
based protocols.
Currently, IP sockets are classified by SELinux as being either TCP, UDP
or 'Raw', the latter being a default for IP socket that is not TCP or UDP.
The classification code is out of date and uses only the socket type
parameter to socket(2) to determine the class of IP socket. So, any
socket created with SOCK_STREAM will be classified by SELinux as TCP, and
SOCK_DGRAM as UDP. Also, other socket types such as SOCK_SEQPACKET and
SOCK_DCCP are currently ignored by SELinux, which classifies them as
generic sockets, which means they don't even get basic IP level checking.
This patch changes the SELinux IP socket classification logic, so that
only an IPPROTO_IP protocol value passed to socket(2) classify the socket
as TCP or UDP. The patch also drops the check for SOCK_RAW and converts
it into a default, so that socket types like SOCK_DCCP and SOCK_SEQPACKET
are classified as SECCLASS_RAWIP_SOCKET (instead of generic sockets).
Note that protocol-specific support for SCTP, DCCP etc. is not addressed
here, we're just getting these protocols checked at the IP layer.
This fixes a reported problem where SCTP sockets were being recognized as
generic SELinux sockets yet still being passed in one case to an IP level
check, which then fails for generic sockets.
It will also fix bugs where any SOCK_STREAM socket is classified as TCP or
any SOCK_DGRAM socket is classified as UDP.
This patch also unifies the way IP sockets classes are determined in
selinux_socket_bind(), so we use the already calculated value instead of
trying to recalculate it.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-30 14:24:34 -04:00
}
static inline int default_protocol_dgram ( int protocol )
{
return ( protocol = = IPPROTO_IP | | protocol = = IPPROTO_UDP ) ;
}
2005-04-16 15:20:36 -07:00
static inline u16 socket_type_to_security_class ( int family , int type , int protocol )
{
2023-07-06 15:23:27 +02:00
bool extsockclass = selinux_policycap_extsockclass ( ) ;
selinux: support distinctions among all network address families
Extend SELinux to support distinctions among all network address families
implemented by the kernel by defining new socket security classes
and mapping to them. Otherwise, many sockets are mapped to the generic
socket class and are indistinguishable in policy. This has come up
previously with regard to selectively allowing access to bluetooth sockets,
and more recently with regard to selectively allowing access to AF_ALG
sockets. Guido Trentalancia submitted a patch that took a similar approach
to add only support for distinguishing AF_ALG sockets, but this generalizes
his approach to handle all address families implemented by the kernel.
Socket security classes are also added for ICMP and SCTP sockets.
Socket security classes were not defined for AF_* values that are reserved
but unimplemented in the kernel, e.g. AF_NETBEUI, AF_SECURITY, AF_ASH,
AF_ECONET, AF_SNA, AF_WANPIPE.
Backward compatibility is provided by only enabling the finer-grained
socket classes if a new policy capability is set in the policy; older
policies will behave as before. The legacy redhat1 policy capability
that was only ever used in testing within Fedora for ptrace_child
is reclaimed for this purpose; as far as I can tell, this policy
capability is not enabled in any supported distro policy.
Add a pair of conditional compilation guards to detect when new AF_* values
are added so that we can update SELinux accordingly rather than having to
belatedly update it long after new address families are introduced.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2017-01-09 10:07:30 -05:00
2005-04-16 15:20:36 -07:00
switch ( family ) {
case PF_UNIX :
switch ( type ) {
case SOCK_STREAM :
case SOCK_SEQPACKET :
return SECCLASS_UNIX_STREAM_SOCKET ;
case SOCK_DGRAM :
2017-07-25 15:13:41 -04:00
case SOCK_RAW :
2005-04-16 15:20:36 -07:00
return SECCLASS_UNIX_DGRAM_SOCKET ;
}
break ;
case PF_INET :
case PF_INET6 :
switch ( type ) {
case SOCK_STREAM :
selinux: support distinctions among all network address families
Extend SELinux to support distinctions among all network address families
implemented by the kernel by defining new socket security classes
and mapping to them. Otherwise, many sockets are mapped to the generic
socket class and are indistinguishable in policy. This has come up
previously with regard to selectively allowing access to bluetooth sockets,
and more recently with regard to selectively allowing access to AF_ALG
sockets. Guido Trentalancia submitted a patch that took a similar approach
to add only support for distinguishing AF_ALG sockets, but this generalizes
his approach to handle all address families implemented by the kernel.
Socket security classes are also added for ICMP and SCTP sockets.
Socket security classes were not defined for AF_* values that are reserved
but unimplemented in the kernel, e.g. AF_NETBEUI, AF_SECURITY, AF_ASH,
AF_ECONET, AF_SNA, AF_WANPIPE.
Backward compatibility is provided by only enabling the finer-grained
socket classes if a new policy capability is set in the policy; older
policies will behave as before. The legacy redhat1 policy capability
that was only ever used in testing within Fedora for ptrace_child
is reclaimed for this purpose; as far as I can tell, this policy
capability is not enabled in any supported distro policy.
Add a pair of conditional compilation guards to detect when new AF_* values
are added so that we can update SELinux accordingly rather than having to
belatedly update it long after new address families are introduced.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2017-01-09 10:07:30 -05:00
case SOCK_SEQPACKET :
[PATCH] SELinux - fix SCTP socket bug and general IP protocol handling
The following patch updates the way SELinux classifies and handles IP
based protocols.
Currently, IP sockets are classified by SELinux as being either TCP, UDP
or 'Raw', the latter being a default for IP socket that is not TCP or UDP.
The classification code is out of date and uses only the socket type
parameter to socket(2) to determine the class of IP socket. So, any
socket created with SOCK_STREAM will be classified by SELinux as TCP, and
SOCK_DGRAM as UDP. Also, other socket types such as SOCK_SEQPACKET and
SOCK_DCCP are currently ignored by SELinux, which classifies them as
generic sockets, which means they don't even get basic IP level checking.
This patch changes the SELinux IP socket classification logic, so that
only an IPPROTO_IP protocol value passed to socket(2) classify the socket
as TCP or UDP. The patch also drops the check for SOCK_RAW and converts
it into a default, so that socket types like SOCK_DCCP and SOCK_SEQPACKET
are classified as SECCLASS_RAWIP_SOCKET (instead of generic sockets).
Note that protocol-specific support for SCTP, DCCP etc. is not addressed
here, we're just getting these protocols checked at the IP layer.
This fixes a reported problem where SCTP sockets were being recognized as
generic SELinux sockets yet still being passed in one case to an IP level
check, which then fails for generic sockets.
It will also fix bugs where any SOCK_STREAM socket is classified as TCP or
any SOCK_DGRAM socket is classified as UDP.
This patch also unifies the way IP sockets classes are determined in
selinux_socket_bind(), so we use the already calculated value instead of
trying to recalculate it.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-30 14:24:34 -04:00
if ( default_protocol_stream ( protocol ) )
return SECCLASS_TCP_SOCKET ;
selinux: support distinctions among all network address families
Extend SELinux to support distinctions among all network address families
implemented by the kernel by defining new socket security classes
and mapping to them. Otherwise, many sockets are mapped to the generic
socket class and are indistinguishable in policy. This has come up
previously with regard to selectively allowing access to bluetooth sockets,
and more recently with regard to selectively allowing access to AF_ALG
sockets. Guido Trentalancia submitted a patch that took a similar approach
to add only support for distinguishing AF_ALG sockets, but this generalizes
his approach to handle all address families implemented by the kernel.
Socket security classes are also added for ICMP and SCTP sockets.
Socket security classes were not defined for AF_* values that are reserved
but unimplemented in the kernel, e.g. AF_NETBEUI, AF_SECURITY, AF_ASH,
AF_ECONET, AF_SNA, AF_WANPIPE.
Backward compatibility is provided by only enabling the finer-grained
socket classes if a new policy capability is set in the policy; older
policies will behave as before. The legacy redhat1 policy capability
that was only ever used in testing within Fedora for ptrace_child
is reclaimed for this purpose; as far as I can tell, this policy
capability is not enabled in any supported distro policy.
Add a pair of conditional compilation guards to detect when new AF_* values
are added so that we can update SELinux accordingly rather than having to
belatedly update it long after new address families are introduced.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2017-01-09 10:07:30 -05:00
else if ( extsockclass & & protocol = = IPPROTO_SCTP )
return SECCLASS_SCTP_SOCKET ;
[PATCH] SELinux - fix SCTP socket bug and general IP protocol handling
The following patch updates the way SELinux classifies and handles IP
based protocols.
Currently, IP sockets are classified by SELinux as being either TCP, UDP
or 'Raw', the latter being a default for IP socket that is not TCP or UDP.
The classification code is out of date and uses only the socket type
parameter to socket(2) to determine the class of IP socket. So, any
socket created with SOCK_STREAM will be classified by SELinux as TCP, and
SOCK_DGRAM as UDP. Also, other socket types such as SOCK_SEQPACKET and
SOCK_DCCP are currently ignored by SELinux, which classifies them as
generic sockets, which means they don't even get basic IP level checking.
This patch changes the SELinux IP socket classification logic, so that
only an IPPROTO_IP protocol value passed to socket(2) classify the socket
as TCP or UDP. The patch also drops the check for SOCK_RAW and converts
it into a default, so that socket types like SOCK_DCCP and SOCK_SEQPACKET
are classified as SECCLASS_RAWIP_SOCKET (instead of generic sockets).
Note that protocol-specific support for SCTP, DCCP etc. is not addressed
here, we're just getting these protocols checked at the IP layer.
This fixes a reported problem where SCTP sockets were being recognized as
generic SELinux sockets yet still being passed in one case to an IP level
check, which then fails for generic sockets.
It will also fix bugs where any SOCK_STREAM socket is classified as TCP or
any SOCK_DGRAM socket is classified as UDP.
This patch also unifies the way IP sockets classes are determined in
selinux_socket_bind(), so we use the already calculated value instead of
trying to recalculate it.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-30 14:24:34 -04:00
else
return SECCLASS_RAWIP_SOCKET ;
2005-04-16 15:20:36 -07:00
case SOCK_DGRAM :
[PATCH] SELinux - fix SCTP socket bug and general IP protocol handling
The following patch updates the way SELinux classifies and handles IP
based protocols.
Currently, IP sockets are classified by SELinux as being either TCP, UDP
or 'Raw', the latter being a default for IP socket that is not TCP or UDP.
The classification code is out of date and uses only the socket type
parameter to socket(2) to determine the class of IP socket. So, any
socket created with SOCK_STREAM will be classified by SELinux as TCP, and
SOCK_DGRAM as UDP. Also, other socket types such as SOCK_SEQPACKET and
SOCK_DCCP are currently ignored by SELinux, which classifies them as
generic sockets, which means they don't even get basic IP level checking.
This patch changes the SELinux IP socket classification logic, so that
only an IPPROTO_IP protocol value passed to socket(2) classify the socket
as TCP or UDP. The patch also drops the check for SOCK_RAW and converts
it into a default, so that socket types like SOCK_DCCP and SOCK_SEQPACKET
are classified as SECCLASS_RAWIP_SOCKET (instead of generic sockets).
Note that protocol-specific support for SCTP, DCCP etc. is not addressed
here, we're just getting these protocols checked at the IP layer.
This fixes a reported problem where SCTP sockets were being recognized as
generic SELinux sockets yet still being passed in one case to an IP level
check, which then fails for generic sockets.
It will also fix bugs where any SOCK_STREAM socket is classified as TCP or
any SOCK_DGRAM socket is classified as UDP.
This patch also unifies the way IP sockets classes are determined in
selinux_socket_bind(), so we use the already calculated value instead of
trying to recalculate it.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-30 14:24:34 -04:00
if ( default_protocol_dgram ( protocol ) )
return SECCLASS_UDP_SOCKET ;
2017-01-09 10:07:31 -05:00
else if ( extsockclass & & ( protocol = = IPPROTO_ICMP | |
protocol = = IPPROTO_ICMPV6 ) )
selinux: support distinctions among all network address families
Extend SELinux to support distinctions among all network address families
implemented by the kernel by defining new socket security classes
and mapping to them. Otherwise, many sockets are mapped to the generic
socket class and are indistinguishable in policy. This has come up
previously with regard to selectively allowing access to bluetooth sockets,
and more recently with regard to selectively allowing access to AF_ALG
sockets. Guido Trentalancia submitted a patch that took a similar approach
to add only support for distinguishing AF_ALG sockets, but this generalizes
his approach to handle all address families implemented by the kernel.
Socket security classes are also added for ICMP and SCTP sockets.
Socket security classes were not defined for AF_* values that are reserved
but unimplemented in the kernel, e.g. AF_NETBEUI, AF_SECURITY, AF_ASH,
AF_ECONET, AF_SNA, AF_WANPIPE.
Backward compatibility is provided by only enabling the finer-grained
socket classes if a new policy capability is set in the policy; older
policies will behave as before. The legacy redhat1 policy capability
that was only ever used in testing within Fedora for ptrace_child
is reclaimed for this purpose; as far as I can tell, this policy
capability is not enabled in any supported distro policy.
Add a pair of conditional compilation guards to detect when new AF_* values
are added so that we can update SELinux accordingly rather than having to
belatedly update it long after new address families are introduced.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2017-01-09 10:07:30 -05:00
return SECCLASS_ICMP_SOCKET ;
[PATCH] SELinux - fix SCTP socket bug and general IP protocol handling
The following patch updates the way SELinux classifies and handles IP
based protocols.
Currently, IP sockets are classified by SELinux as being either TCP, UDP
or 'Raw', the latter being a default for IP socket that is not TCP or UDP.
The classification code is out of date and uses only the socket type
parameter to socket(2) to determine the class of IP socket. So, any
socket created with SOCK_STREAM will be classified by SELinux as TCP, and
SOCK_DGRAM as UDP. Also, other socket types such as SOCK_SEQPACKET and
SOCK_DCCP are currently ignored by SELinux, which classifies them as
generic sockets, which means they don't even get basic IP level checking.
This patch changes the SELinux IP socket classification logic, so that
only an IPPROTO_IP protocol value passed to socket(2) classify the socket
as TCP or UDP. The patch also drops the check for SOCK_RAW and converts
it into a default, so that socket types like SOCK_DCCP and SOCK_SEQPACKET
are classified as SECCLASS_RAWIP_SOCKET (instead of generic sockets).
Note that protocol-specific support for SCTP, DCCP etc. is not addressed
here, we're just getting these protocols checked at the IP layer.
This fixes a reported problem where SCTP sockets were being recognized as
generic SELinux sockets yet still being passed in one case to an IP level
check, which then fails for generic sockets.
It will also fix bugs where any SOCK_STREAM socket is classified as TCP or
any SOCK_DGRAM socket is classified as UDP.
This patch also unifies the way IP sockets classes are determined in
selinux_socket_bind(), so we use the already calculated value instead of
trying to recalculate it.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-30 14:24:34 -04:00
else
return SECCLASS_RAWIP_SOCKET ;
2006-11-13 16:09:01 -08:00
case SOCK_DCCP :
return SECCLASS_DCCP_SOCKET ;
[PATCH] SELinux - fix SCTP socket bug and general IP protocol handling
The following patch updates the way SELinux classifies and handles IP
based protocols.
Currently, IP sockets are classified by SELinux as being either TCP, UDP
or 'Raw', the latter being a default for IP socket that is not TCP or UDP.
The classification code is out of date and uses only the socket type
parameter to socket(2) to determine the class of IP socket. So, any
socket created with SOCK_STREAM will be classified by SELinux as TCP, and
SOCK_DGRAM as UDP. Also, other socket types such as SOCK_SEQPACKET and
SOCK_DCCP are currently ignored by SELinux, which classifies them as
generic sockets, which means they don't even get basic IP level checking.
This patch changes the SELinux IP socket classification logic, so that
only an IPPROTO_IP protocol value passed to socket(2) classify the socket
as TCP or UDP. The patch also drops the check for SOCK_RAW and converts
it into a default, so that socket types like SOCK_DCCP and SOCK_SEQPACKET
are classified as SECCLASS_RAWIP_SOCKET (instead of generic sockets).
Note that protocol-specific support for SCTP, DCCP etc. is not addressed
here, we're just getting these protocols checked at the IP layer.
This fixes a reported problem where SCTP sockets were being recognized as
generic SELinux sockets yet still being passed in one case to an IP level
check, which then fails for generic sockets.
It will also fix bugs where any SOCK_STREAM socket is classified as TCP or
any SOCK_DGRAM socket is classified as UDP.
This patch also unifies the way IP sockets classes are determined in
selinux_socket_bind(), so we use the already calculated value instead of
trying to recalculate it.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-30 14:24:34 -04:00
default :
2005-04-16 15:20:36 -07:00
return SECCLASS_RAWIP_SOCKET ;
}
break ;
case PF_NETLINK :
switch ( protocol ) {
case NETLINK_ROUTE :
return SECCLASS_NETLINK_ROUTE_SOCKET ;
2011-12-06 07:56:43 +00:00
case NETLINK_SOCK_DIAG :
2005-04-16 15:20:36 -07:00
return SECCLASS_NETLINK_TCPDIAG_SOCKET ;
case NETLINK_NFLOG :
return SECCLASS_NETLINK_NFLOG_SOCKET ;
case NETLINK_XFRM :
return SECCLASS_NETLINK_XFRM_SOCKET ;
case NETLINK_SELINUX :
return SECCLASS_NETLINK_SELINUX_SOCKET ;
2015-06-04 16:22:16 -04:00
case NETLINK_ISCSI :
return SECCLASS_NETLINK_ISCSI_SOCKET ;
2005-04-16 15:20:36 -07:00
case NETLINK_AUDIT :
return SECCLASS_NETLINK_AUDIT_SOCKET ;
2015-06-04 16:22:16 -04:00
case NETLINK_FIB_LOOKUP :
return SECCLASS_NETLINK_FIB_LOOKUP_SOCKET ;
case NETLINK_CONNECTOR :
return SECCLASS_NETLINK_CONNECTOR_SOCKET ;
case NETLINK_NETFILTER :
return SECCLASS_NETLINK_NETFILTER_SOCKET ;
2005-04-16 15:20:36 -07:00
case NETLINK_DNRTMSG :
return SECCLASS_NETLINK_DNRT_SOCKET ;
2005-04-16 15:24:13 -07:00
case NETLINK_KOBJECT_UEVENT :
return SECCLASS_NETLINK_KOBJECT_UEVENT_SOCKET ;
2015-06-04 16:22:16 -04:00
case NETLINK_GENERIC :
return SECCLASS_NETLINK_GENERIC_SOCKET ;
case NETLINK_SCSITRANSPORT :
return SECCLASS_NETLINK_SCSITRANSPORT_SOCKET ;
case NETLINK_RDMA :
return SECCLASS_NETLINK_RDMA_SOCKET ;
case NETLINK_CRYPTO :
return SECCLASS_NETLINK_CRYPTO_SOCKET ;
2005-04-16 15:20:36 -07:00
default :
return SECCLASS_NETLINK_SOCKET ;
}
case PF_PACKET :
return SECCLASS_PACKET_SOCKET ;
case PF_KEY :
return SECCLASS_KEY_SOCKET ;
2006-06-09 00:25:03 -07:00
case PF_APPLETALK :
return SECCLASS_APPLETALK_SOCKET ;
2005-04-16 15:20:36 -07:00
}
selinux: support distinctions among all network address families
Extend SELinux to support distinctions among all network address families
implemented by the kernel by defining new socket security classes
and mapping to them. Otherwise, many sockets are mapped to the generic
socket class and are indistinguishable in policy. This has come up
previously with regard to selectively allowing access to bluetooth sockets,
and more recently with regard to selectively allowing access to AF_ALG
sockets. Guido Trentalancia submitted a patch that took a similar approach
to add only support for distinguishing AF_ALG sockets, but this generalizes
his approach to handle all address families implemented by the kernel.
Socket security classes are also added for ICMP and SCTP sockets.
Socket security classes were not defined for AF_* values that are reserved
but unimplemented in the kernel, e.g. AF_NETBEUI, AF_SECURITY, AF_ASH,
AF_ECONET, AF_SNA, AF_WANPIPE.
Backward compatibility is provided by only enabling the finer-grained
socket classes if a new policy capability is set in the policy; older
policies will behave as before. The legacy redhat1 policy capability
that was only ever used in testing within Fedora for ptrace_child
is reclaimed for this purpose; as far as I can tell, this policy
capability is not enabled in any supported distro policy.
Add a pair of conditional compilation guards to detect when new AF_* values
are added so that we can update SELinux accordingly rather than having to
belatedly update it long after new address families are introduced.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2017-01-09 10:07:30 -05:00
if ( extsockclass ) {
switch ( family ) {
case PF_AX25 :
return SECCLASS_AX25_SOCKET ;
case PF_IPX :
return SECCLASS_IPX_SOCKET ;
case PF_NETROM :
return SECCLASS_NETROM_SOCKET ;
case PF_ATMPVC :
return SECCLASS_ATMPVC_SOCKET ;
case PF_X25 :
return SECCLASS_X25_SOCKET ;
case PF_ROSE :
return SECCLASS_ROSE_SOCKET ;
case PF_DECnet :
return SECCLASS_DECNET_SOCKET ;
case PF_ATMSVC :
return SECCLASS_ATMSVC_SOCKET ;
case PF_RDS :
return SECCLASS_RDS_SOCKET ;
case PF_IRDA :
return SECCLASS_IRDA_SOCKET ;
case PF_PPPOX :
return SECCLASS_PPPOX_SOCKET ;
case PF_LLC :
return SECCLASS_LLC_SOCKET ;
case PF_CAN :
return SECCLASS_CAN_SOCKET ;
case PF_TIPC :
return SECCLASS_TIPC_SOCKET ;
case PF_BLUETOOTH :
return SECCLASS_BLUETOOTH_SOCKET ;
case PF_IUCV :
return SECCLASS_IUCV_SOCKET ;
case PF_RXRPC :
return SECCLASS_RXRPC_SOCKET ;
case PF_ISDN :
return SECCLASS_ISDN_SOCKET ;
case PF_PHONET :
return SECCLASS_PHONET_SOCKET ;
case PF_IEEE802154 :
return SECCLASS_IEEE802154_SOCKET ;
case PF_CAIF :
return SECCLASS_CAIF_SOCKET ;
case PF_ALG :
return SECCLASS_ALG_SOCKET ;
case PF_NFC :
return SECCLASS_NFC_SOCKET ;
case PF_VSOCK :
return SECCLASS_VSOCK_SOCKET ;
case PF_KCM :
return SECCLASS_KCM_SOCKET ;
case PF_QIPCRTR :
return SECCLASS_QIPCRTR_SOCKET ;
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Highlights:
1) Support TX_RING in AF_PACKET TPACKET_V3 mode, from Sowmini
Varadhan.
2) Simplify classifier state on sk_buff in order to shrink it a bit.
From Willem de Bruijn.
3) Introduce SIPHASH and it's usage for secure sequence numbers and
syncookies. From Jason A. Donenfeld.
4) Reduce CPU usage for ICMP replies we are going to limit or
suppress, from Jesper Dangaard Brouer.
5) Introduce Shared Memory Communications socket layer, from Ursula
Braun.
6) Add RACK loss detection and allow it to actually trigger fast
recovery instead of just assisting after other algorithms have
triggered it. From Yuchung Cheng.
7) Add xmit_more and BQL support to mvneta driver, from Simon Guinot.
8) skb_cow_data avoidance in esp4 and esp6, from Steffen Klassert.
9) Export MPLS packet stats via netlink, from Robert Shearman.
10) Significantly improve inet port bind conflict handling, especially
when an application is restarted and changes it's setting of
reuseport. From Josef Bacik.
11) Implement TX batching in vhost_net, from Jason Wang.
12) Extend the dummy device so that VF (virtual function) features,
such as configuration, can be more easily tested. From Phil
Sutter.
13) Avoid two atomic ops per page on x86 in bnx2x driver, from Eric
Dumazet.
14) Add new bpf MAP, implementing a longest prefix match trie. From
Daniel Mack.
15) Packet sample offloading support in mlxsw driver, from Yotam Gigi.
16) Add new aquantia driver, from David VomLehn.
17) Add bpf tracepoints, from Daniel Borkmann.
18) Add support for port mirroring to b53 and bcm_sf2 drivers, from
Florian Fainelli.
19) Remove custom busy polling in many drivers, it is done in the core
networking since 4.5 times. From Eric Dumazet.
20) Support XDP adjust_head in virtio_net, from John Fastabend.
21) Fix several major holes in neighbour entry confirmation, from
Julian Anastasov.
22) Add XDP support to bnxt_en driver, from Michael Chan.
23) VXLAN offloads for enic driver, from Govindarajulu Varadarajan.
24) Add IPVTAP driver (IP-VLAN based tap driver) from Sainath Grandhi.
25) Support GRO in IPSEC protocols, from Steffen Klassert"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1764 commits)
Revert "ath10k: Search SMBIOS for OEM board file extension"
net: socket: fix recvmmsg not returning error from sock_error
bnxt_en: use eth_hw_addr_random()
bpf: fix unlocking of jited image when module ronx not set
arch: add ARCH_HAS_SET_MEMORY config
net: napi_watchdog() can use napi_schedule_irqoff()
tcp: Revert "tcp: tcp_probe: use spin_lock_bh()"
net/hsr: use eth_hw_addr_random()
net: mvpp2: enable building on 64-bit platforms
net: mvpp2: switch to build_skb() in the RX path
net: mvpp2: simplify MVPP2_PRS_RI_* definitions
net: mvpp2: fix indentation of MVPP2_EXT_GLOBAL_CTRL_DEFAULT
net: mvpp2: remove unused register definitions
net: mvpp2: simplify mvpp2_bm_bufs_add()
net: mvpp2: drop useless fields in mvpp2_bm_pool and related code
net: mvpp2: remove unused 'tx_skb' field of 'struct mvpp2_tx_queue'
net: mvpp2: release reference to txq_cpu[] entry after unmapping
net: mvpp2: handle too large value in mvpp2_rx_time_coal_set()
net: mvpp2: handle too large value handling in mvpp2_rx_pkts_coal_set()
net: mvpp2: remove useless arguments in mvpp2_rx_{pkts, time}_coal_set
...
2017-02-22 10:15:09 -08:00
case PF_SMC :
return SECCLASS_SMC_SOCKET ;
2018-05-02 13:01:22 +02:00
case PF_XDP :
return SECCLASS_XDP_SOCKET ;
2021-07-29 10:20:39 +08:00
case PF_MCTP :
return SECCLASS_MCTP_SOCKET ;
# if PF_MAX > 46
selinux: support distinctions among all network address families
Extend SELinux to support distinctions among all network address families
implemented by the kernel by defining new socket security classes
and mapping to them. Otherwise, many sockets are mapped to the generic
socket class and are indistinguishable in policy. This has come up
previously with regard to selectively allowing access to bluetooth sockets,
and more recently with regard to selectively allowing access to AF_ALG
sockets. Guido Trentalancia submitted a patch that took a similar approach
to add only support for distinguishing AF_ALG sockets, but this generalizes
his approach to handle all address families implemented by the kernel.
Socket security classes are also added for ICMP and SCTP sockets.
Socket security classes were not defined for AF_* values that are reserved
but unimplemented in the kernel, e.g. AF_NETBEUI, AF_SECURITY, AF_ASH,
AF_ECONET, AF_SNA, AF_WANPIPE.
Backward compatibility is provided by only enabling the finer-grained
socket classes if a new policy capability is set in the policy; older
policies will behave as before. The legacy redhat1 policy capability
that was only ever used in testing within Fedora for ptrace_child
is reclaimed for this purpose; as far as I can tell, this policy
capability is not enabled in any supported distro policy.
Add a pair of conditional compilation guards to detect when new AF_* values
are added so that we can update SELinux accordingly rather than having to
belatedly update it long after new address families are introduced.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2017-01-09 10:07:30 -05:00
# error New address family defined, please update this function.
# endif
}
}
2005-04-16 15:20:36 -07:00
return SECCLASS_SOCKET ;
}
2015-06-04 16:22:17 -04:00
static int selinux_genfs_get_sid ( struct dentry * dentry ,
u16 tclass ,
u16 flags ,
u32 * sid )
2005-04-16 15:20:36 -07:00
{
2011-02-01 18:42:22 +02:00
int rc ;
2016-04-10 01:33:30 -04:00
struct super_block * sb = dentry - > d_sb ;
2011-02-01 18:42:22 +02:00
char * buffer , * path ;
2005-04-16 15:20:36 -07:00
2008-04-17 13:17:49 -04:00
buffer = ( char * ) __get_free_page ( GFP_KERNEL ) ;
2005-04-16 15:20:36 -07:00
if ( ! buffer )
return - ENOMEM ;
2011-02-01 18:42:22 +02:00
path = dentry_path_raw ( dentry , buffer , PAGE_SIZE ) ;
if ( IS_ERR ( path ) )
rc = PTR_ERR ( path ) ;
else {
2015-06-04 16:22:17 -04:00
if ( flags & SE_SBPROC ) {
/* each process gets a /proc/PID/ entry. Strip off the
* PID part to get a valid selinux labeling .
* e . g . / proc / 1 / net / rpc / nfs - > / net / rpc / nfs */
while ( path [ 1 ] > = ' 0 ' & & path [ 1 ] < = ' 9 ' ) {
path [ 1 ] = ' / ' ;
path + + ;
}
2011-02-01 18:42:22 +02:00
}
2023-03-09 13:30:37 -05:00
rc = security_genfs_sid ( sb - > s_type - > name ,
2018-03-01 18:48:02 -05:00
path , tclass , sid ) ;
2018-09-04 16:51:36 -04:00
if ( rc = = - ENOENT ) {
/* No match in policy, mark as unlabeled. */
* sid = SECINITSID_UNLABELED ;
rc = 0 ;
}
2005-04-16 15:20:36 -07:00
}
free_page ( ( unsigned long ) buffer ) ;
return rc ;
}
2019-02-22 15:57:14 +01:00
static int inode_doinit_use_xattr ( struct inode * inode , struct dentry * dentry ,
u32 def_sid , u32 * sid )
{
# define INITCONTEXTLEN 255
char * context ;
unsigned int len ;
int rc ;
len = INITCONTEXTLEN ;
context = kmalloc ( len + 1 , GFP_NOFS ) ;
if ( ! context )
return - ENOMEM ;
context [ len ] = ' \0 ' ;
rc = __vfs_getxattr ( dentry , inode , XATTR_NAME_SELINUX , context , len ) ;
if ( rc = = - ERANGE ) {
kfree ( context ) ;
/* Need a larger buffer. Query for the right size. */
rc = __vfs_getxattr ( dentry , inode , XATTR_NAME_SELINUX , NULL , 0 ) ;
if ( rc < 0 )
return rc ;
len = rc ;
context = kmalloc ( len + 1 , GFP_NOFS ) ;
if ( ! context )
return - ENOMEM ;
context [ len ] = ' \0 ' ;
rc = __vfs_getxattr ( dentry , inode , XATTR_NAME_SELINUX ,
context , len ) ;
}
if ( rc < 0 ) {
kfree ( context ) ;
if ( rc ! = - ENODATA ) {
pr_warn ( " SELinux: %s: getxattr returned %d for dev=%s ino=%ld \n " ,
__func__ , - rc , inode - > i_sb - > s_id , inode - > i_ino ) ;
return rc ;
}
* sid = def_sid ;
return 0 ;
}
2023-03-09 13:30:37 -05:00
rc = security_context_to_sid_default ( context , rc , sid ,
2019-02-22 15:57:14 +01:00
def_sid , GFP_NOFS ) ;
if ( rc ) {
char * dev = inode - > i_sb - > s_id ;
unsigned long ino = inode - > i_ino ;
if ( rc = = - EINVAL ) {
pr_notice_ratelimited ( " SELinux: inode=%lu on dev=%s was found to have an invalid context=%s. This indicates you may need to relabel the inode or the filesystem in question. \n " ,
ino , dev , context ) ;
} else {
pr_warn ( " SELinux: %s: context_to_sid(%s) returned %d for dev=%s ino=%ld \n " ,
__func__ , context , - rc , dev , ino ) ;
}
}
kfree ( context ) ;
return 0 ;
}
2005-04-16 15:20:36 -07:00
/* The inode's security attributes must be initialized before first use. */
static int inode_doinit_with_dentry ( struct inode * inode , struct dentry * opt_dentry )
{
struct superblock_security_struct * sbsec = NULL ;
2018-09-21 17:19:11 -07:00
struct inode_security_struct * isec = selinux_inode ( inode ) ;
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
u32 task_sid , sid = 0 ;
u16 sclass ;
2005-04-16 15:20:36 -07:00
struct dentry * dentry ;
int rc = 0 ;
2015-12-24 11:09:40 -05:00
if ( isec - > initialized = = LABEL_INITIALIZED )
2016-11-10 22:18:29 +01:00
return 0 ;
2005-04-16 15:20:36 -07:00
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
spin_lock ( & isec - > lock ) ;
2015-12-24 11:09:40 -05:00
if ( isec - > initialized = = LABEL_INITIALIZED )
2006-09-25 23:32:01 -07:00
goto out_unlock ;
2005-04-16 15:20:36 -07:00
2016-11-10 22:18:29 +01:00
if ( isec - > sclass = = SECCLASS_FILE )
isec - > sclass = inode_mode_to_security_class ( inode - > i_mode ) ;
2021-04-22 17:41:15 +02:00
sbsec = selinux_superblock ( inode - > i_sb ) ;
2009-01-16 09:22:02 -05:00
if ( ! ( sbsec - > flags & SE_SBINITIALIZED ) ) {
2005-04-16 15:20:36 -07:00
/* Defer initialization until selinux_complete_init,
after the initial policy is loaded and the security
server is ready to handle calls . */
spin_lock ( & sbsec - > isec_lock ) ;
if ( list_empty ( & isec - > list ) )
list_add ( & isec - > list , & sbsec - > isec_head ) ;
spin_unlock ( & sbsec - > isec_lock ) ;
2006-09-25 23:32:01 -07:00
goto out_unlock ;
2005-04-16 15:20:36 -07:00
}
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
sclass = isec - > sclass ;
task_sid = isec - > task_sid ;
sid = isec - > sid ;
isec - > initialized = LABEL_PENDING ;
spin_unlock ( & isec - > lock ) ;
2005-04-16 15:20:36 -07:00
switch ( sbsec - > behavior ) {
2023-05-29 16:05:27 +02:00
/*
* In case of SECURITY_FS_USE_NATIVE we need to re - fetch the labels
* via xattr when called from delayed_superblock_init ( ) .
*/
2013-05-22 12:50:37 -04:00
case SECURITY_FS_USE_NATIVE :
2005-04-16 15:20:36 -07:00
case SECURITY_FS_USE_XATTR :
2016-09-29 17:48:42 +02:00
if ( ! ( inode - > i_opflags & IOP_XATTR ) ) {
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
sid = sbsec - > def_sid ;
2005-04-16 15:20:36 -07:00
break ;
}
/* Need a dentry, since the xattr API requires one.
Life would be simpler if we could just pass the inode . */
if ( opt_dentry ) {
/* Called from d_instantiate or d_splice_alias. */
dentry = dget ( opt_dentry ) ;
} else {
2018-04-25 10:28:38 -04:00
/*
* Called from selinux_complete_init , try to find a dentry .
* Some filesystems really want a connected one , so try
* that first . We could split SECURITY_FS_USE_XATTR in
* two , depending upon that . . .
*/
2005-04-16 15:20:36 -07:00
dentry = d_find_alias ( inode ) ;
2018-04-25 10:28:38 -04:00
if ( ! dentry )
dentry = d_find_any_alias ( inode ) ;
2005-04-16 15:20:36 -07:00
}
if ( ! dentry ) {
2009-03-09 14:35:58 -04:00
/*
* this is can be hit on boot when a file is accessed
* before the policy is loaded . When we load policy we
* may find inodes that have no dentry on the
* sbsec - > isec_head list . No reason to complain as these
* will get fixed up the next time we go through
* inode_doinit with a dentry , before these inodes could
* be used again by userspace .
*/
2020-11-03 11:49:38 -05:00
goto out_invalid ;
2005-04-16 15:20:36 -07:00
}
2019-02-22 15:57:14 +01:00
rc = inode_doinit_use_xattr ( inode , dentry , sbsec - > def_sid ,
& sid ) ;
2005-04-16 15:20:36 -07:00
dput ( dentry ) ;
2019-02-22 15:57:14 +01:00
if ( rc )
goto out ;
2005-04-16 15:20:36 -07:00
break ;
case SECURITY_FS_USE_TASK :
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
sid = task_sid ;
2005-04-16 15:20:36 -07:00
break ;
case SECURITY_FS_USE_TRANS :
/* Default to the fs SID. */
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
sid = sbsec - > sid ;
2005-04-16 15:20:36 -07:00
/* Try to obtain a transition SID. */
2023-03-09 13:30:37 -05:00
rc = security_transition_sid ( task_sid , sid ,
2018-03-01 18:48:02 -05:00
sclass , NULL , & sid ) ;
2005-04-16 15:20:36 -07:00
if ( rc )
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
goto out ;
2005-04-16 15:20:36 -07:00
break ;
2006-07-10 04:43:53 -07:00
case SECURITY_FS_USE_MNTPOINT :
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
sid = sbsec - > mntpoint_sid ;
2006-07-10 04:43:53 -07:00
break ;
2005-04-16 15:20:36 -07:00
default :
2006-07-10 04:43:53 -07:00
/* Default to the fs superblock SID. */
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
sid = sbsec - > sid ;
2005-04-16 15:20:36 -07:00
2020-01-28 20:16:48 +01:00
if ( ( sbsec - > flags & SE_SBGENFS ) & &
( ! S_ISLNK ( inode - > i_mode ) | |
selinux_policycap_genfs_seclabel_symlinks ( ) ) ) {
2014-03-19 16:46:18 -04:00
/* We must have a dentry to determine the label on
* procfs inodes */
2018-04-25 10:28:38 -04:00
if ( opt_dentry ) {
2014-03-19 16:46:18 -04:00
/* Called from d_instantiate or
* d_splice_alias . */
dentry = dget ( opt_dentry ) ;
2018-04-25 10:28:38 -04:00
} else {
2014-03-19 16:46:18 -04:00
/* Called from selinux_complete_init, try to
2018-04-25 10:28:38 -04:00
* find a dentry . Some filesystems really want
* a connected one , so try that first .
*/
2014-03-19 16:46:18 -04:00
dentry = d_find_alias ( inode ) ;
2018-04-25 10:28:38 -04:00
if ( ! dentry )
dentry = d_find_any_alias ( inode ) ;
}
2014-03-19 16:46:18 -04:00
/*
* This can be hit on boot when a file is accessed
* before the policy is loaded . When we load policy we
* may find inodes that have no dentry on the
* sbsec - > isec_head list . No reason to complain as
* these will get fixed up the next time we go through
* inode_doinit ( ) with a dentry , before these inodes
* could be used again by userspace .
*/
if ( ! dentry )
2020-11-03 11:49:38 -05:00
goto out_invalid ;
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
rc = selinux_genfs_get_sid ( dentry , sclass ,
2015-06-04 16:22:17 -04:00
sbsec - > flags , & sid ) ;
2019-02-22 15:57:14 +01:00
if ( rc ) {
dput ( dentry ) ;
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
goto out ;
2019-02-22 15:57:14 +01:00
}
if ( ( sbsec - > flags & SE_SBGENFS_XATTR ) & &
( inode - > i_opflags & IOP_XATTR ) ) {
rc = inode_doinit_use_xattr ( inode , dentry ,
sid , & sid ) ;
if ( rc ) {
dput ( dentry ) ;
goto out ;
}
}
dput ( dentry ) ;
2005-04-16 15:20:36 -07:00
}
break ;
}
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
out :
spin_lock ( & isec - > lock ) ;
if ( isec - > initialized = = LABEL_PENDING ) {
2020-11-03 11:49:38 -05:00
if ( rc ) {
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
isec - > initialized = LABEL_INVALID ;
goto out_unlock ;
}
isec - > initialized = LABEL_INITIALIZED ;
isec - > sid = sid ;
}
2005-04-16 15:20:36 -07:00
2006-09-25 23:32:01 -07:00
out_unlock :
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
spin_unlock ( & isec - > lock ) ;
2005-04-16 15:20:36 -07:00
return rc ;
2020-11-03 11:49:38 -05:00
out_invalid :
spin_lock ( & isec - > lock ) ;
if ( isec - > initialized = = LABEL_PENDING ) {
isec - > initialized = LABEL_INVALID ;
isec - > sid = sid ;
}
spin_unlock ( & isec - > lock ) ;
return 0 ;
2005-04-16 15:20:36 -07:00
}
/* Convert a Linux signal to an access vector. */
static inline u32 signal_to_av ( int sig )
{
u32 perm = 0 ;
switch ( sig ) {
case SIGCHLD :
/* Commonly granted from child to parent. */
perm = PROCESS__SIGCHLD ;
break ;
case SIGKILL :
/* Cannot be caught or ignored */
perm = PROCESS__SIGKILL ;
break ;
case SIGSTOP :
/* Cannot be caught or ignored */
perm = PROCESS__SIGSTOP ;
break ;
default :
/* All other signals. */
perm = PROCESS__SIGNAL ;
break ;
}
return perm ;
}
2008-02-07 11:21:04 -05:00
# if CAP_LAST_CAP > 63
# error Fix SELinux to handle capabilities > 63.
# endif
2005-04-16 15:20:36 -07:00
/* Check whether a task is allowed to use a capability. */
2012-01-03 12:25:14 -05:00
static int cred_has_capability ( const struct cred * cred ,
2019-01-07 16:10:53 -08:00
int cap , unsigned int opts , bool initns )
2005-04-16 15:20:36 -07:00
{
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2008-11-11 22:02:50 +11:00
struct av_decision avd ;
2008-02-07 11:21:04 -05:00
u16 sclass ;
CRED: Fix regression in cap_capable() as shown up by sys_faccessat() [ver #3]
Fix a regression in cap_capable() due to:
commit 3b11a1decef07c19443d24ae926982bc8ec9f4c0
Author: David Howells <dhowells@redhat.com>
Date: Fri Nov 14 10:39:26 2008 +1100
CRED: Differentiate objective and effective subjective credentials on a task
The problem is that the above patch allows a process to have two sets of
credentials, and for the most part uses the subjective credentials when
accessing current's creds.
There is, however, one exception: cap_capable(), and thus capable(), uses the
real/objective credentials of the target task, whether or not it is the current
task.
Ordinarily this doesn't matter, since usually the two cred pointers in current
point to the same set of creds. However, sys_faccessat() makes use of this
facility to override the credentials of the calling process to make its test,
without affecting the creds as seen from other processes.
One of the things sys_faccessat() does is to make an adjustment to the
effective capabilities mask, which cap_capable(), as it stands, then ignores.
The affected capability check is in generic_permission():
if (!(mask & MAY_EXEC) || execute_ok(inode))
if (capable(CAP_DAC_OVERRIDE))
return 0;
This change passes the set of credentials to be tested down into the commoncap
and SELinux code. The security functions called by capable() and
has_capability() select the appropriate set of credentials from the process
being checked.
This can be tested by compiling the following program from the XFS testsuite:
/*
* t_access_root.c - trivial test program to show permission bug.
*
* Written by Michael Kerrisk - copyright ownership not pursued.
* Sourced from: http://linux.derkeiler.com/Mailing-Lists/Kernel/2003-10/6030.html
*/
#include <limits.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/stat.h>
#define UID 500
#define GID 100
#define PERM 0
#define TESTPATH "/tmp/t_access"
static void
errExit(char *msg)
{
perror(msg);
exit(EXIT_FAILURE);
} /* errExit */
static void
accessTest(char *file, int mask, char *mstr)
{
printf("access(%s, %s) returns %d\n", file, mstr, access(file, mask));
} /* accessTest */
int
main(int argc, char *argv[])
{
int fd, perm, uid, gid;
char *testpath;
char cmd[PATH_MAX + 20];
testpath = (argc > 1) ? argv[1] : TESTPATH;
perm = (argc > 2) ? strtoul(argv[2], NULL, 8) : PERM;
uid = (argc > 3) ? atoi(argv[3]) : UID;
gid = (argc > 4) ? atoi(argv[4]) : GID;
unlink(testpath);
fd = open(testpath, O_RDWR | O_CREAT, 0);
if (fd == -1) errExit("open");
if (fchown(fd, uid, gid) == -1) errExit("fchown");
if (fchmod(fd, perm) == -1) errExit("fchmod");
close(fd);
snprintf(cmd, sizeof(cmd), "ls -l %s", testpath);
system(cmd);
if (seteuid(uid) == -1) errExit("seteuid");
accessTest(testpath, 0, "0");
accessTest(testpath, R_OK, "R_OK");
accessTest(testpath, W_OK, "W_OK");
accessTest(testpath, X_OK, "X_OK");
accessTest(testpath, R_OK | W_OK, "R_OK | W_OK");
accessTest(testpath, R_OK | X_OK, "R_OK | X_OK");
accessTest(testpath, W_OK | X_OK, "W_OK | X_OK");
accessTest(testpath, R_OK | W_OK | X_OK, "R_OK | W_OK | X_OK");
exit(EXIT_SUCCESS);
} /* main */
This can be run against an Ext3 filesystem as well as against an XFS
filesystem. If successful, it will show:
[root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043
---------- 1 dhowells dhowells 0 2008-12-31 03:00 /tmp/xxx
access(/tmp/xxx, 0) returns 0
access(/tmp/xxx, R_OK) returns 0
access(/tmp/xxx, W_OK) returns 0
access(/tmp/xxx, X_OK) returns -1
access(/tmp/xxx, R_OK | W_OK) returns 0
access(/tmp/xxx, R_OK | X_OK) returns -1
access(/tmp/xxx, W_OK | X_OK) returns -1
access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1
If unsuccessful, it will show:
[root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043
---------- 1 dhowells dhowells 0 2008-12-31 02:56 /tmp/xxx
access(/tmp/xxx, 0) returns 0
access(/tmp/xxx, R_OK) returns -1
access(/tmp/xxx, W_OK) returns -1
access(/tmp/xxx, X_OK) returns -1
access(/tmp/xxx, R_OK | W_OK) returns -1
access(/tmp/xxx, R_OK | X_OK) returns -1
access(/tmp/xxx, W_OK | X_OK) returns -1
access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1
I've also tested the fix with the SELinux and syscalls LTP testsuites.
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: J. Bruce Fields <bfields@citi.umich.edu>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2009-01-06 22:27:01 +00:00
u32 sid = cred_sid ( cred ) ;
2008-02-07 11:21:04 -05:00
u32 av = CAP_TO_MASK ( cap ) ;
2008-11-11 22:02:50 +11:00
int rc ;
2005-04-16 15:20:36 -07:00
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_CAP ;
2005-04-16 15:20:36 -07:00
ad . u . cap = cap ;
2008-02-07 11:21:04 -05:00
switch ( CAP_TO_INDEX ( cap ) ) {
case 0 :
2016-04-08 13:52:00 -04:00
sclass = initns ? SECCLASS_CAPABILITY : SECCLASS_CAP_USERNS ;
2008-02-07 11:21:04 -05:00
break ;
case 1 :
2016-04-08 13:52:00 -04:00
sclass = initns ? SECCLASS_CAPABILITY2 : SECCLASS_CAP2_USERNS ;
2008-02-07 11:21:04 -05:00
break ;
default :
2018-06-12 10:09:03 +02:00
pr_err ( " SELinux: out of range capability %d \n " , cap ) ;
2008-02-07 11:21:04 -05:00
BUG ( ) ;
2011-04-20 10:21:28 -04:00
return - EINVAL ;
2008-02-07 11:21:04 -05:00
}
2008-11-11 22:02:50 +11:00
2023-03-09 13:30:37 -05:00
rc = avc_has_perm_noaudit ( sid , sid , sclass , av , 0 , & avd ) ;
2019-01-07 16:10:53 -08:00
if ( ! ( opts & CAP_OPT_NOAUDIT ) ) {
2023-03-09 13:30:37 -05:00
int rc2 = avc_audit ( sid , sid , sclass , av , & avd , rc , & ad ) ;
2011-04-25 16:26:29 -04:00
if ( rc2 )
return rc2 ;
}
2008-11-11 22:02:50 +11:00
return rc ;
2005-04-16 15:20:36 -07:00
}
/* Check whether a task has a particular permission to an inode.
The ' adp ' parameter is optional and allows other audit
data to be passed ( e . g . the dentry ) . */
2008-11-14 10:39:21 +11:00
static int inode_has_perm ( const struct cred * cred ,
2005-04-16 15:20:36 -07:00
struct inode * inode ,
u32 perms ,
2013-10-04 12:54:11 -07:00
struct common_audit_data * adp )
2005-04-16 15:20:36 -07:00
{
struct inode_security_struct * isec ;
2008-11-14 10:39:19 +11:00
u32 sid ;
2005-04-16 15:20:36 -07:00
2008-04-17 13:17:49 -04:00
if ( unlikely ( IS_PRIVATE ( inode ) ) )
2007-02-14 00:34:16 -08:00
return 0 ;
2008-11-14 10:39:21 +11:00
sid = cred_sid ( cred ) ;
2018-09-21 17:19:11 -07:00
isec = selinux_inode ( inode ) ;
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , isec - > sid , isec - > sclass , perms , adp ) ;
2005-04-16 15:20:36 -07:00
}
/* Same as inode_has_perm, but pass explicit audit data containing
the dentry to help the auditing code to more easily generate the
pathname if needed . */
2008-11-14 10:39:21 +11:00
static inline int dentry_has_perm ( const struct cred * cred ,
2005-04-16 15:20:36 -07:00
struct dentry * dentry ,
u32 av )
{
2015-03-17 22:26:22 +00:00
struct inode * inode = d_backing_inode ( dentry ) ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2008-11-14 10:39:21 +11:00
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_DENTRY ;
2011-04-28 16:04:24 -04:00
ad . u . dentry = dentry ;
2015-12-24 11:09:40 -05:00
__inode_security_revalidate ( inode , dentry , true ) ;
2013-10-04 12:54:11 -07:00
return inode_has_perm ( cred , inode , av , & ad ) ;
2011-04-28 16:04:24 -04:00
}
/* Same as inode_has_perm, but pass explicit audit data containing
the path to help the auditing code to more easily generate the
pathname if needed . */
static inline int path_has_perm ( const struct cred * cred ,
2015-03-08 19:28:30 -04:00
const struct path * path ,
2011-04-28 16:04:24 -04:00
u32 av )
{
2015-03-17 22:26:22 +00:00
struct inode * inode = d_backing_inode ( path - > dentry ) ;
2011-04-28 16:04:24 -04:00
struct common_audit_data ad ;
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_PATH ;
2011-04-28 16:04:24 -04:00
ad . u . path = * path ;
2015-12-24 11:09:40 -05:00
__inode_security_revalidate ( inode , path - > dentry , true ) ;
2013-10-04 12:54:11 -07:00
return inode_has_perm ( cred , inode , av , & ad ) ;
2005-04-16 15:20:36 -07:00
}
2013-06-13 23:37:55 +01:00
/* Same as path_has_perm, but uses the inode from the file struct. */
static inline int file_path_has_perm ( const struct cred * cred ,
struct file * file ,
u32 av )
{
struct common_audit_data ad ;
2016-09-09 11:37:49 -04:00
ad . type = LSM_AUDIT_DATA_FILE ;
ad . u . file = file ;
2013-10-04 12:54:11 -07:00
return inode_has_perm ( cred , file_inode ( file ) , av , & ad ) ;
2013-06-13 23:37:55 +01:00
}
2017-10-18 13:00:26 -07:00
# ifdef CONFIG_BPF_SYSCALL
2023-08-12 20:31:08 +05:00
static int bpf_fd_pass ( const struct file * file , u32 sid ) ;
2017-10-18 13:00:26 -07:00
# endif
2005-04-16 15:20:36 -07:00
/* Check whether a task can use an open file descriptor to
access an inode in a given way . Check access to the
descriptor itself , and then use dentry_has_perm to
check a particular permission to the file .
Access to the descriptor is implicitly granted if it
has the same SID as the process . If av is zero , then
access to the file is not checked , e . g . for cases
where only the descriptor is affected like seek . */
2008-11-14 10:39:21 +11:00
static int file_has_perm ( const struct cred * cred ,
struct file * file ,
u32 av )
2005-04-16 15:20:36 -07:00
{
2018-09-21 17:22:32 -07:00
struct file_security_struct * fsec = selinux_file ( file ) ;
2013-01-23 17:07:38 -05:00
struct inode * inode = file_inode ( file ) ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2008-11-14 10:39:21 +11:00
u32 sid = cred_sid ( cred ) ;
2005-04-16 15:20:36 -07:00
int rc ;
2016-09-09 11:37:49 -04:00
ad . type = LSM_AUDIT_DATA_FILE ;
ad . u . file = file ;
2005-04-16 15:20:36 -07:00
2008-11-14 10:39:19 +11:00
if ( sid ! = fsec - > sid ) {
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , fsec - > sid ,
2005-04-16 15:20:36 -07:00
SECCLASS_FD ,
FD__USE ,
& ad ) ;
if ( rc )
2008-11-14 10:39:21 +11:00
goto out ;
2005-04-16 15:20:36 -07:00
}
2017-10-18 13:00:26 -07:00
# ifdef CONFIG_BPF_SYSCALL
rc = bpf_fd_pass ( file , cred_sid ( cred ) ) ;
if ( rc )
return rc ;
# endif
2005-04-16 15:20:36 -07:00
/* av is zero if only checking access to the descriptor. */
2008-11-14 10:39:21 +11:00
rc = 0 ;
2005-04-16 15:20:36 -07:00
if ( av )
2013-10-04 12:54:11 -07:00
rc = inode_has_perm ( cred , inode , av , & ad ) ;
2005-04-16 15:20:36 -07:00
2008-11-14 10:39:21 +11:00
out :
return rc ;
2005-04-16 15:20:36 -07:00
}
2015-07-10 17:19:58 -04:00
/*
* Determine the label for an inode that might be unioned .
*/
2016-07-13 10:44:51 -04:00
static int
selinux_determine_inode_label ( const struct task_security_struct * tsec ,
struct inode * dir ,
const struct qstr * name , u16 tclass ,
u32 * _new_isid )
2015-07-10 17:19:58 -04:00
{
2021-04-22 17:41:15 +02:00
const struct superblock_security_struct * sbsec =
selinux_superblock ( dir - > i_sb ) ;
2015-07-10 17:19:58 -04:00
if ( ( sbsec - > flags & SE_SBINITIALIZED ) & &
( sbsec - > behavior = = SECURITY_FS_USE_MNTPOINT ) ) {
* _new_isid = sbsec - > mntpoint_sid ;
} else if ( ( sbsec - > flags & SBLABEL_MNT ) & &
tsec - > create_sid ) {
* _new_isid = tsec - > create_sid ;
} else {
2016-04-04 14:14:42 -04:00
const struct inode_security_struct * dsec = inode_security ( dir ) ;
2023-03-09 13:30:37 -05:00
return security_transition_sid ( tsec - > sid ,
2018-03-01 18:48:02 -05:00
dsec - > sid , tclass ,
2015-07-10 17:19:58 -04:00
name , _new_isid ) ;
}
return 0 ;
}
2005-04-16 15:20:36 -07:00
/* Check whether a task can create a file. */
static int may_create ( struct inode * dir ,
struct dentry * dentry ,
u16 tclass )
{
2018-09-21 17:17:16 -07:00
const struct task_security_struct * tsec = selinux_cred ( current_cred ( ) ) ;
2005-04-16 15:20:36 -07:00
struct inode_security_struct * dsec ;
struct superblock_security_struct * sbsec ;
2008-11-14 10:39:19 +11:00
u32 sid , newsid ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2005-04-16 15:20:36 -07:00
int rc ;
2015-12-24 11:09:39 -05:00
dsec = inode_security ( dir ) ;
2021-04-22 17:41:15 +02:00
sbsec = selinux_superblock ( dir - > i_sb ) ;
2005-04-16 15:20:36 -07:00
2008-11-14 10:39:19 +11:00
sid = tsec - > sid ;
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_DENTRY ;
2011-04-25 13:10:27 -04:00
ad . u . dentry = dentry ;
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , dsec - > sid , SECCLASS_DIR ,
2005-04-16 15:20:36 -07:00
DIR__ADD_NAME | DIR__SEARCH ,
& ad ) ;
if ( rc )
return rc ;
2019-12-12 10:02:24 +08:00
rc = selinux_determine_inode_label ( tsec , dir , & dentry - > d_name , tclass ,
& newsid ) ;
2015-07-10 17:19:58 -04:00
if ( rc )
return rc ;
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , newsid , tclass , FILE__CREATE , & ad ) ;
2005-04-16 15:20:36 -07:00
if ( rc )
return rc ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( newsid , sbsec - > sid ,
2005-04-16 15:20:36 -07:00
SECCLASS_FILESYSTEM ,
FILESYSTEM__ASSOCIATE , & ad ) ;
}
2008-04-17 13:17:49 -04:00
# define MAY_LINK 0
# define MAY_UNLINK 1
# define MAY_RMDIR 2
2005-04-16 15:20:36 -07:00
/* Check whether a task can link, unlink, or rmdir a file/directory. */
static int may_link ( struct inode * dir ,
struct dentry * dentry ,
int kind )
{
struct inode_security_struct * dsec , * isec ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2008-11-14 10:39:19 +11:00
u32 sid = current_sid ( ) ;
2005-04-16 15:20:36 -07:00
u32 av ;
int rc ;
2015-12-24 11:09:39 -05:00
dsec = inode_security ( dir ) ;
isec = backing_inode_security ( dentry ) ;
2005-04-16 15:20:36 -07:00
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_DENTRY ;
2011-04-25 13:10:27 -04:00
ad . u . dentry = dentry ;
2005-04-16 15:20:36 -07:00
av = DIR__SEARCH ;
av | = ( kind ? DIR__REMOVE_NAME : DIR__ADD_NAME ) ;
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , dsec - > sid , SECCLASS_DIR , av , & ad ) ;
2005-04-16 15:20:36 -07:00
if ( rc )
return rc ;
switch ( kind ) {
case MAY_LINK :
av = FILE__LINK ;
break ;
case MAY_UNLINK :
av = FILE__UNLINK ;
break ;
case MAY_RMDIR :
av = DIR__RMDIR ;
break ;
default :
2018-06-12 10:09:03 +02:00
pr_warn ( " SELinux: %s: unrecognized kind %d \n " ,
2008-04-17 11:52:44 -04:00
__func__ , kind ) ;
2005-04-16 15:20:36 -07:00
return 0 ;
}
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , isec - > sid , isec - > sclass , av , & ad ) ;
2005-04-16 15:20:36 -07:00
return rc ;
}
static inline int may_rename ( struct inode * old_dir ,
struct dentry * old_dentry ,
struct inode * new_dir ,
struct dentry * new_dentry )
{
struct inode_security_struct * old_dsec , * new_dsec , * old_isec , * new_isec ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2008-11-14 10:39:19 +11:00
u32 sid = current_sid ( ) ;
2005-04-16 15:20:36 -07:00
u32 av ;
int old_is_dir , new_is_dir ;
int rc ;
2015-12-24 11:09:39 -05:00
old_dsec = inode_security ( old_dir ) ;
old_isec = backing_inode_security ( old_dentry ) ;
VFS: (Scripted) Convert S_ISLNK/DIR/REG(dentry->d_inode) to d_is_*(dentry)
Convert the following where appropriate:
(1) S_ISLNK(dentry->d_inode) to d_is_symlink(dentry).
(2) S_ISREG(dentry->d_inode) to d_is_reg(dentry).
(3) S_ISDIR(dentry->d_inode) to d_is_dir(dentry). This is actually more
complicated than it appears as some calls should be converted to
d_can_lookup() instead. The difference is whether the directory in
question is a real dir with a ->lookup op or whether it's a fake dir with
a ->d_automount op.
In some circumstances, we can subsume checks for dentry->d_inode not being
NULL into this, provided we the code isn't in a filesystem that expects
d_inode to be NULL if the dirent really *is* negative (ie. if we're going to
use d_inode() rather than d_backing_inode() to get the inode pointer).
Note that the dentry type field may be set to something other than
DCACHE_MISS_TYPE when d_inode is NULL in the case of unionmount, where the VFS
manages the fall-through from a negative dentry to a lower layer. In such a
case, the dentry type of the negative union dentry is set to the same as the
type of the lower dentry.
However, if you know d_inode is not NULL at the call site, then you can use
the d_is_xxx() functions even in a filesystem.
There is one further complication: a 0,0 chardev dentry may be labelled
DCACHE_WHITEOUT_TYPE rather than DCACHE_SPECIAL_TYPE. Strictly, this was
intended for special directory entry types that don't have attached inodes.
The following perl+coccinelle script was used:
use strict;
my @callers;
open($fd, 'git grep -l \'S_IS[A-Z].*->d_inode\' |') ||
die "Can't grep for S_ISDIR and co. callers";
@callers = <$fd>;
close($fd);
unless (@callers) {
print "No matches\n";
exit(0);
}
my @cocci = (
'@@',
'expression E;',
'@@',
'',
'- S_ISLNK(E->d_inode->i_mode)',
'+ d_is_symlink(E)',
'',
'@@',
'expression E;',
'@@',
'',
'- S_ISDIR(E->d_inode->i_mode)',
'+ d_is_dir(E)',
'',
'@@',
'expression E;',
'@@',
'',
'- S_ISREG(E->d_inode->i_mode)',
'+ d_is_reg(E)' );
my $coccifile = "tmp.sp.cocci";
open($fd, ">$coccifile") || die $coccifile;
print($fd "$_\n") || die $coccifile foreach (@cocci);
close($fd);
foreach my $file (@callers) {
chomp $file;
print "Processing ", $file, "\n";
system("spatch", "--sp-file", $coccifile, $file, "--in-place", "--no-show-diff") == 0 ||
die "spatch failed";
}
[AV: overlayfs parts skipped]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2015-01-29 12:02:35 +00:00
old_is_dir = d_is_dir ( old_dentry ) ;
2015-12-24 11:09:39 -05:00
new_dsec = inode_security ( new_dir ) ;
2005-04-16 15:20:36 -07:00
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_DENTRY ;
2005-04-16 15:20:36 -07:00
2011-04-25 13:10:27 -04:00
ad . u . dentry = old_dentry ;
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , old_dsec - > sid , SECCLASS_DIR ,
2005-04-16 15:20:36 -07:00
DIR__REMOVE_NAME | DIR__SEARCH , & ad ) ;
if ( rc )
return rc ;
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , old_isec - > sid ,
2005-04-16 15:20:36 -07:00
old_isec - > sclass , FILE__RENAME , & ad ) ;
if ( rc )
return rc ;
if ( old_is_dir & & new_dir ! = old_dir ) {
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , old_isec - > sid ,
2005-04-16 15:20:36 -07:00
old_isec - > sclass , DIR__REPARENT , & ad ) ;
if ( rc )
return rc ;
}
2011-04-25 13:10:27 -04:00
ad . u . dentry = new_dentry ;
2005-04-16 15:20:36 -07:00
av = DIR__ADD_NAME | DIR__SEARCH ;
2015-01-29 12:02:33 +00:00
if ( d_is_positive ( new_dentry ) )
2005-04-16 15:20:36 -07:00
av | = DIR__REMOVE_NAME ;
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , new_dsec - > sid , SECCLASS_DIR , av , & ad ) ;
2005-04-16 15:20:36 -07:00
if ( rc )
return rc ;
2015-01-29 12:02:33 +00:00
if ( d_is_positive ( new_dentry ) ) {
2015-12-24 11:09:39 -05:00
new_isec = backing_inode_security ( new_dentry ) ;
VFS: (Scripted) Convert S_ISLNK/DIR/REG(dentry->d_inode) to d_is_*(dentry)
Convert the following where appropriate:
(1) S_ISLNK(dentry->d_inode) to d_is_symlink(dentry).
(2) S_ISREG(dentry->d_inode) to d_is_reg(dentry).
(3) S_ISDIR(dentry->d_inode) to d_is_dir(dentry). This is actually more
complicated than it appears as some calls should be converted to
d_can_lookup() instead. The difference is whether the directory in
question is a real dir with a ->lookup op or whether it's a fake dir with
a ->d_automount op.
In some circumstances, we can subsume checks for dentry->d_inode not being
NULL into this, provided we the code isn't in a filesystem that expects
d_inode to be NULL if the dirent really *is* negative (ie. if we're going to
use d_inode() rather than d_backing_inode() to get the inode pointer).
Note that the dentry type field may be set to something other than
DCACHE_MISS_TYPE when d_inode is NULL in the case of unionmount, where the VFS
manages the fall-through from a negative dentry to a lower layer. In such a
case, the dentry type of the negative union dentry is set to the same as the
type of the lower dentry.
However, if you know d_inode is not NULL at the call site, then you can use
the d_is_xxx() functions even in a filesystem.
There is one further complication: a 0,0 chardev dentry may be labelled
DCACHE_WHITEOUT_TYPE rather than DCACHE_SPECIAL_TYPE. Strictly, this was
intended for special directory entry types that don't have attached inodes.
The following perl+coccinelle script was used:
use strict;
my @callers;
open($fd, 'git grep -l \'S_IS[A-Z].*->d_inode\' |') ||
die "Can't grep for S_ISDIR and co. callers";
@callers = <$fd>;
close($fd);
unless (@callers) {
print "No matches\n";
exit(0);
}
my @cocci = (
'@@',
'expression E;',
'@@',
'',
'- S_ISLNK(E->d_inode->i_mode)',
'+ d_is_symlink(E)',
'',
'@@',
'expression E;',
'@@',
'',
'- S_ISDIR(E->d_inode->i_mode)',
'+ d_is_dir(E)',
'',
'@@',
'expression E;',
'@@',
'',
'- S_ISREG(E->d_inode->i_mode)',
'+ d_is_reg(E)' );
my $coccifile = "tmp.sp.cocci";
open($fd, ">$coccifile") || die $coccifile;
print($fd "$_\n") || die $coccifile foreach (@cocci);
close($fd);
foreach my $file (@callers) {
chomp $file;
print "Processing ", $file, "\n";
system("spatch", "--sp-file", $coccifile, $file, "--in-place", "--no-show-diff") == 0 ||
die "spatch failed";
}
[AV: overlayfs parts skipped]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2015-01-29 12:02:35 +00:00
new_is_dir = d_is_dir ( new_dentry ) ;
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , new_isec - > sid ,
2005-04-16 15:20:36 -07:00
new_isec - > sclass ,
( new_is_dir ? DIR__RMDIR : FILE__UNLINK ) , & ad ) ;
if ( rc )
return rc ;
}
return 0 ;
}
/* Check whether a task can perform a filesystem operation. */
2008-11-14 10:39:21 +11:00
static int superblock_has_perm ( const struct cred * cred ,
2023-08-23 11:44:41 +05:00
const struct super_block * sb ,
2005-04-16 15:20:36 -07:00
u32 perms ,
2009-07-14 12:14:09 -04:00
struct common_audit_data * ad )
2005-04-16 15:20:36 -07:00
{
struct superblock_security_struct * sbsec ;
2008-11-14 10:39:21 +11:00
u32 sid = cred_sid ( cred ) ;
2005-04-16 15:20:36 -07:00
2021-04-22 17:41:15 +02:00
sbsec = selinux_superblock ( sb ) ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , sbsec - > sid , SECCLASS_FILESYSTEM , perms , ad ) ;
2005-04-16 15:20:36 -07:00
}
/* Convert a Linux mode and permission mask to an access vector. */
static inline u32 file_mask_to_av ( int mode , int mask )
{
u32 av = 0 ;
2011-07-25 20:49:29 -04:00
if ( ! S_ISDIR ( mode ) ) {
2005-04-16 15:20:36 -07:00
if ( mask & MAY_EXEC )
av | = FILE__EXECUTE ;
if ( mask & MAY_READ )
av | = FILE__READ ;
if ( mask & MAY_APPEND )
av | = FILE__APPEND ;
else if ( mask & MAY_WRITE )
av | = FILE__WRITE ;
} else {
if ( mask & MAY_EXEC )
av | = DIR__SEARCH ;
if ( mask & MAY_WRITE )
av | = DIR__WRITE ;
if ( mask & MAY_READ )
av | = DIR__READ ;
}
return av ;
}
2008-10-29 17:06:46 -04:00
/* Convert a Linux file to an access vector. */
2023-08-12 20:31:08 +05:00
static inline u32 file_to_av ( const struct file * file )
2008-10-29 17:06:46 -04:00
{
u32 av = 0 ;
if ( file - > f_mode & FMODE_READ )
av | = FILE__READ ;
if ( file - > f_mode & FMODE_WRITE ) {
if ( file - > f_flags & O_APPEND )
av | = FILE__APPEND ;
else
av | = FILE__WRITE ;
}
if ( ! av ) {
/*
* Special file opened with flags 3 for ioctl - only use .
*/
av = FILE__IOCTL ;
}
return av ;
}
2008-02-28 12:58:40 -05:00
/*
2020-08-07 09:51:34 -07:00
* Convert a file to an access vector and include the correct
2008-02-28 12:58:40 -05:00
* open permission .
*/
2008-10-29 17:06:46 -04:00
static inline u32 open_file_to_av ( struct file * file )
2008-02-28 12:58:40 -05:00
{
2008-10-29 17:06:46 -04:00
u32 av = file_to_av ( file ) ;
2017-05-12 12:41:24 -04:00
struct inode * inode = file_inode ( file ) ;
2008-02-28 12:58:40 -05:00
2018-03-01 18:48:02 -05:00
if ( selinux_policycap_openperm ( ) & &
inode - > i_sb - > s_magic ! = SOCKFS_MAGIC )
2010-07-23 11:44:09 -04:00
av | = FILE__OPEN ;
2008-02-28 12:58:40 -05:00
return av ;
}
2005-04-16 15:20:36 -07:00
/* Hook functions begin here. */
2021-10-12 09:56:13 -07:00
static int selinux_binder_set_context_mgr ( const struct cred * mgr )
2015-01-21 10:54:10 -05:00
{
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , cred_sid ( mgr ) , SECCLASS_BINDER ,
2015-01-21 10:54:10 -05:00
BINDER__SET_CONTEXT_MGR , NULL ) ;
}
2021-10-12 09:56:13 -07:00
static int selinux_binder_transaction ( const struct cred * from ,
const struct cred * to )
2015-01-21 10:54:10 -05:00
{
u32 mysid = current_sid ( ) ;
2021-10-12 09:56:13 -07:00
u32 fromsid = cred_sid ( from ) ;
u32 tosid = cred_sid ( to ) ;
2015-01-21 10:54:10 -05:00
int rc ;
if ( mysid ! = fromsid ) {
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( mysid , fromsid , SECCLASS_BINDER ,
2015-01-21 10:54:10 -05:00
BINDER__IMPERSONATE , NULL ) ;
if ( rc )
return rc ;
}
2023-03-09 13:30:37 -05:00
return avc_has_perm ( fromsid , tosid ,
2021-02-18 15:13:40 -05:00
SECCLASS_BINDER , BINDER__CALL , NULL ) ;
2015-01-21 10:54:10 -05:00
}
2021-10-12 09:56:13 -07:00
static int selinux_binder_transfer_binder ( const struct cred * from ,
const struct cred * to )
2015-01-21 10:54:10 -05:00
{
2023-03-09 13:30:37 -05:00
return avc_has_perm ( cred_sid ( from ) , cred_sid ( to ) ,
2021-02-18 15:13:40 -05:00
SECCLASS_BINDER , BINDER__TRANSFER ,
2015-01-21 10:54:10 -05:00
NULL ) ;
}
2021-10-12 09:56:13 -07:00
static int selinux_binder_transfer_file ( const struct cred * from ,
const struct cred * to ,
2023-08-12 20:31:08 +05:00
const struct file * file )
2015-01-21 10:54:10 -05:00
{
2021-10-12 09:56:13 -07:00
u32 sid = cred_sid ( to ) ;
2018-09-21 17:22:32 -07:00
struct file_security_struct * fsec = selinux_file ( file ) ;
2015-12-24 11:09:39 -05:00
struct dentry * dentry = file - > f_path . dentry ;
2016-04-04 14:14:42 -04:00
struct inode_security_struct * isec ;
2015-01-21 10:54:10 -05:00
struct common_audit_data ad ;
int rc ;
ad . type = LSM_AUDIT_DATA_PATH ;
ad . u . path = file - > f_path ;
if ( sid ! = fsec - > sid ) {
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , fsec - > sid ,
2015-01-21 10:54:10 -05:00
SECCLASS_FD ,
FD__USE ,
& ad ) ;
if ( rc )
return rc ;
}
2017-10-18 13:00:26 -07:00
# ifdef CONFIG_BPF_SYSCALL
rc = bpf_fd_pass ( file , sid ) ;
if ( rc )
return rc ;
# endif
2015-12-24 11:09:39 -05:00
if ( unlikely ( IS_PRIVATE ( d_backing_inode ( dentry ) ) ) )
2015-01-21 10:54:10 -05:00
return 0 ;
2016-04-04 14:14:42 -04:00
isec = backing_inode_security ( dentry ) ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , isec - > sid , isec - > sclass , file_to_av ( file ) ,
2015-01-21 10:54:10 -05:00
& ad ) ;
}
2009-05-07 19:26:19 +10:00
static int selinux_ptrace_access_check ( struct task_struct * child ,
2021-02-18 15:13:40 -05:00
unsigned int mode )
2005-04-16 15:20:36 -07:00
{
2017-01-09 10:07:31 -05:00
u32 sid = current_sid ( ) ;
2021-02-18 15:13:40 -05:00
u32 csid = task_sid_obj ( child ) ;
2017-01-09 10:07:31 -05:00
if ( mode & PTRACE_MODE_READ )
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , csid , SECCLASS_FILE , FILE__READ ,
NULL ) ;
Security: split proc ptrace checking into read vs. attach
Enable security modules to distinguish reading of process state via
proc from full ptrace access by renaming ptrace_may_attach to
ptrace_may_access and adding a mode argument indicating whether only
read access or full attach access is requested. This allows security
modules to permit access to reading process state without granting
full ptrace access. The base DAC/capability checking remains unchanged.
Read access to /proc/pid/mem continues to apply a full ptrace attach
check since check_mem_permission() already requires the current task
to already be ptracing the target. The other ptrace checks within
proc for elements like environ, maps, and fds are changed to pass the
read mode instead of attach.
In the SELinux case, we model such reading of process state as a
reading of a proc file labeled with the target process' label. This
enables SELinux policy to permit such reading of process state without
permitting control or manipulation of the target process, as there are
a number of cases where programs probe for such information via proc
but do not need to be able to control the target (e.g. procps,
lsof, PolicyKit, ConsoleKit). At present we have to choose between
allowing full ptrace in policy (more permissive than required/desired)
or breaking functionality (or in some cases just silencing the denials
via dontaudit rules but this can hide genuine attacks).
This version of the patch incorporates comments from Casey Schaufler
(change/replace existing ptrace_may_attach interface, pass access
mode), and Chris Wright (provide greater consistency in the checking).
Note that like their predecessors __ptrace_may_attach and
ptrace_may_attach, the __ptrace_may_access and ptrace_may_access
interfaces use different return value conventions from each other (0
or -errno vs. 1 or 0). I retained this difference to avoid any
changes to the caller logic but made the difference clearer by
changing the latter interface to return a bool rather than an int and
by adding a comment about it to ptrace.h for any future callers.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-05-19 08:32:49 -04:00
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , csid , SECCLASS_PROCESS , PROCESS__PTRACE ,
NULL ) ;
security: Fix setting of PF_SUPERPRIV by __capable()
Fix the setting of PF_SUPERPRIV by __capable() as it could corrupt the flags
the target process if that is not the current process and it is trying to
change its own flags in a different way at the same time.
__capable() is using neither atomic ops nor locking to protect t->flags. This
patch removes __capable() and introduces has_capability() that doesn't set
PF_SUPERPRIV on the process being queried.
This patch further splits security_ptrace() in two:
(1) security_ptrace_may_access(). This passes judgement on whether one
process may access another only (PTRACE_MODE_ATTACH for ptrace() and
PTRACE_MODE_READ for /proc), and takes a pointer to the child process.
current is the parent.
(2) security_ptrace_traceme(). This passes judgement on PTRACE_TRACEME only,
and takes only a pointer to the parent process. current is the child.
In Smack and commoncap, this uses has_capability() to determine whether
the parent will be permitted to use PTRACE_ATTACH if normal checks fail.
This does not set PF_SUPERPRIV.
Two of the instances of __capable() actually only act on current, and so have
been changed to calls to capable().
Of the places that were using __capable():
(1) The OOM killer calls __capable() thrice when weighing the killability of a
process. All of these now use has_capability().
(2) cap_ptrace() and smack_ptrace() were using __capable() to check to see
whether the parent was allowed to trace any process. As mentioned above,
these have been split. For PTRACE_ATTACH and /proc, capable() is now
used, and for PTRACE_TRACEME, has_capability() is used.
(3) cap_safe_nice() only ever saw current, so now uses capable().
(4) smack_setprocattr() rejected accesses to tasks other than current just
after calling __capable(), so the order of these two tests have been
switched and capable() is used instead.
(5) In smack_file_send_sigiotask(), we need to allow privileged processes to
receive SIGIO on files they're manipulating.
(6) In smack_task_wait(), we let a process wait for a privileged process,
whether or not the process doing the waiting is privileged.
I've tested this with the LTP SELinux and syscalls testscripts.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: Andrew G. Morgan <morgan@kernel.org>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: James Morris <jmorris@namei.org>
2008-08-14 11:37:28 +01:00
}
static int selinux_ptrace_traceme ( struct task_struct * parent )
{
2023-03-09 13:30:37 -05:00
return avc_has_perm ( task_sid_obj ( parent ) , task_sid_obj ( current ) ,
2021-02-18 15:13:40 -05:00
SECCLASS_PROCESS , PROCESS__PTRACE , NULL ) ;
2005-04-16 15:20:36 -07:00
}
2023-08-07 11:59:29 +05:00
static int selinux_capget ( const struct task_struct * target , kernel_cap_t * effective ,
2008-04-17 13:17:49 -04:00
kernel_cap_t * inheritable , kernel_cap_t * permitted )
2005-04-16 15:20:36 -07:00
{
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , task_sid_obj ( target ) ,
SECCLASS_PROCESS , PROCESS__GETCAP , NULL ) ;
2005-04-16 15:20:36 -07:00
}
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
static int selinux_capset ( struct cred * new , const struct cred * old ,
const kernel_cap_t * effective ,
const kernel_cap_t * inheritable ,
const kernel_cap_t * permitted )
2005-04-16 15:20:36 -07:00
{
2023-03-09 13:30:37 -05:00
return avc_has_perm ( cred_sid ( old ) , cred_sid ( new ) , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__SETCAP , NULL ) ;
2005-04-16 15:20:36 -07:00
}
2009-01-30 10:05:06 +11:00
/*
* ( This comment used to live with the selinux_task_setuid hook ,
* which was removed ) .
*
* Since setuid only affects the current process , and since the SELinux
* controls are not based on the Linux identity attributes , SELinux does not
* need to control this operation . However , SELinux does control the use of
* the CAP_SETUID and CAP_SETGID capabilities using the capable hook .
*/
2012-01-03 12:25:14 -05:00
static int selinux_capable ( const struct cred * cred , struct user_namespace * ns ,
2019-01-07 16:10:53 -08:00
int cap , unsigned int opts )
2005-04-16 15:20:36 -07:00
{
2019-01-07 16:10:53 -08:00
return cred_has_capability ( cred , cap , opts , ns = = & init_user_ns ) ;
2005-04-16 15:20:36 -07:00
}
2023-08-23 11:44:41 +05:00
static int selinux_quotactl ( int cmds , int type , int id , const struct super_block * sb )
2005-04-16 15:20:36 -07:00
{
2008-11-14 10:39:21 +11:00
const struct cred * cred = current_cred ( ) ;
2005-04-16 15:20:36 -07:00
int rc = 0 ;
if ( ! sb )
return 0 ;
switch ( cmds ) {
2008-04-17 13:17:49 -04:00
case Q_SYNC :
case Q_QUOTAON :
case Q_QUOTAOFF :
case Q_SETINFO :
case Q_SETQUOTA :
2020-02-20 15:32:34 +00:00
case Q_XQUOTAOFF :
case Q_XQUOTAON :
case Q_XSETQLIM :
2008-11-14 10:39:21 +11:00
rc = superblock_has_perm ( cred , sb , FILESYSTEM__QUOTAMOD , NULL ) ;
2008-04-17 13:17:49 -04:00
break ;
case Q_GETFMT :
case Q_GETINFO :
case Q_GETQUOTA :
2020-02-20 15:32:34 +00:00
case Q_XGETQUOTA :
case Q_XGETQSTAT :
case Q_XGETQSTATV :
case Q_XGETNEXTQUOTA :
2008-11-14 10:39:21 +11:00
rc = superblock_has_perm ( cred , sb , FILESYSTEM__QUOTAGET , NULL ) ;
2008-04-17 13:17:49 -04:00
break ;
default :
rc = 0 ; /* let the kernel handle invalid cmds */
break ;
2005-04-16 15:20:36 -07:00
}
return rc ;
}
static int selinux_quota_on ( struct dentry * dentry )
{
2008-11-14 10:39:21 +11:00
const struct cred * cred = current_cred ( ) ;
2011-04-28 16:04:24 -04:00
return dentry_has_perm ( cred , dentry , FILE__QUOTAON ) ;
2005-04-16 15:20:36 -07:00
}
2010-11-15 18:36:29 -05:00
static int selinux_syslog ( int type )
2005-04-16 15:20:36 -07:00
{
switch ( type ) {
2010-02-03 15:37:13 -08:00
case SYSLOG_ACTION_READ_ALL : /* Read last kernel messages */
case SYSLOG_ACTION_SIZE_BUFFER : /* Return size of the log buffer */
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , SECINITSID_KERNEL ,
2017-01-09 10:07:31 -05:00
SECCLASS_SYSTEM , SYSTEM__SYSLOG_READ , NULL ) ;
2010-02-03 15:37:13 -08:00
case SYSLOG_ACTION_CONSOLE_OFF : /* Disable logging to console */
case SYSLOG_ACTION_CONSOLE_ON : /* Enable logging to console */
/* Set level of messages printed to console */
case SYSLOG_ACTION_CONSOLE_LEVEL :
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , SECINITSID_KERNEL ,
2017-01-09 10:07:31 -05:00
SECCLASS_SYSTEM , SYSTEM__SYSLOG_CONSOLE ,
NULL ) ;
2005-04-16 15:20:36 -07:00
}
2017-01-09 10:07:31 -05:00
/* All other syslog types */
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , SECINITSID_KERNEL ,
2017-01-09 10:07:31 -05:00
SECCLASS_SYSTEM , SYSTEM__SYSLOG_MOD , NULL ) ;
2005-04-16 15:20:36 -07:00
}
/*
2024-07-24 10:06:58 +08:00
* Check permission for allocating a new virtual mapping . Returns
* 0 if permission is granted , negative error code if not .
2005-04-16 15:20:36 -07:00
*
* Do not audit the selinux permission check , as this is applied to all
* processes that allocate mappings .
*/
2007-08-22 14:01:28 -07:00
static int selinux_vm_enough_memory ( struct mm_struct * mm , long pages )
2005-04-16 15:20:36 -07:00
{
2024-07-24 10:06:58 +08:00
return cred_has_capability ( current_cred ( ) , CAP_SYS_ADMIN ,
CAP_OPT_NOAUDIT , true ) ;
2005-04-16 15:20:36 -07:00
}
/* binprm security operations */
2017-01-09 10:07:31 -05:00
static u32 ptrace_parent_sid ( void )
2016-03-30 21:41:21 -04:00
{
u32 sid = 0 ;
struct task_struct * tracer ;
rcu_read_lock ( ) ;
2017-01-09 10:07:31 -05:00
tracer = ptrace_parent ( current ) ;
2016-03-30 21:41:21 -04:00
if ( tracer )
2021-02-18 15:13:40 -05:00
sid = task_sid_obj ( tracer ) ;
2016-03-30 21:41:21 -04:00
rcu_read_unlock ( ) ;
return sid ;
}
2014-08-04 13:36:49 -04:00
static int check_nnp_nosuid ( const struct linux_binprm * bprm ,
const struct task_security_struct * old_tsec ,
const struct task_security_struct * new_tsec )
{
int nnp = ( bprm - > unsafe & LSM_UNSAFE_NO_NEW_PRIVS ) ;
fs: Treat foreign mounts as nosuid
If a process gets access to a mount from a different user
namespace, that process should not be able to take advantage of
setuid files or selinux entrypoints from that filesystem. Prevent
this by treating mounts from other mount namespaces and those not
owned by current_user_ns() or an ancestor as nosuid.
This will make it safer to allow more complex filesystems to be
mounted in non-root user namespaces.
This does not remove the need for MNT_LOCK_NOSUID. The setuid,
setgid, and file capability bits can no longer be abused if code in
a user namespace were to clear nosuid on an untrusted filesystem,
but this patch, by itself, is insufficient to protect the system
from abuse of files that, when execed, would increase MAC privilege.
As a more concrete explanation, any task that can manipulate a
vfsmount associated with a given user namespace already has
capabilities in that namespace and all of its descendents. If they
can cause a malicious setuid, setgid, or file-caps executable to
appear in that mount, then that executable will only allow them to
elevate privileges in exactly the set of namespaces in which they
are already privileges.
On the other hand, if they can cause a malicious executable to
appear with a dangerous MAC label, running it could change the
caller's security context in a way that should not have been
possible, even inside the namespace in which the task is confined.
As a hardening measure, this would have made CVE-2014-5207 much
more difficult to exploit.
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Acked-by: James Morris <james.l.morris@oracle.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
2016-06-23 16:41:05 -05:00
int nosuid = ! mnt_may_suid ( bprm - > file - > f_path . mnt ) ;
2014-08-04 13:36:49 -04:00
int rc ;
selinux: Generalize support for NNP/nosuid SELinux domain transitions
As systemd ramps up enabling NNP (NoNewPrivileges) for system services,
it is increasingly breaking SELinux domain transitions for those services
and their descendants. systemd enables NNP not only for services whose
unit files explicitly specify NoNewPrivileges=yes but also for services
whose unit files specify any of the following options in combination with
running without CAP_SYS_ADMIN (e.g. specifying User= or a
CapabilityBoundingSet= without CAP_SYS_ADMIN): SystemCallFilter=,
SystemCallArchitectures=, RestrictAddressFamilies=, RestrictNamespaces=,
PrivateDevices=, ProtectKernelTunables=, ProtectKernelModules=,
MemoryDenyWriteExecute=, or RestrictRealtime= as per the systemd.exec(5)
man page.
The end result is bad for the security of both SELinux-disabled and
SELinux-enabled systems. Packagers have to turn off these
options in the unit files to preserve SELinux domain transitions. For
users who choose to disable SELinux, this means that they miss out on
at least having the systemd-supported protections. For users who keep
SELinux enabled, they may still be missing out on some protections
because it isn't necessarily guaranteed that the SELinux policy for
that service provides the same protections in all cases.
commit 7b0d0b40cd78 ("selinux: Permit bounded transitions under
NO_NEW_PRIVS or NOSUID.") allowed bounded transitions under NNP in
order to support limited usage for sandboxing programs. However,
defining typebounds for all of the affected service domains
is impractical to implement in policy, since typebounds requires us
to ensure that each domain is allowed everything all of its descendant
domains are allowed, and this has to be repeated for the entire chain
of domain transitions. There is no way to clone all allow rules from
descendants to their ancestors in policy currently, and doing so would
be undesirable even if it were practical, as it requires leaking
permissions to objects and operations into ancestor domains that could
weaken their own security in order to allow them to the descendants
(e.g. if a descendant requires execmem permission, then so do all of
its ancestors; if a descendant requires execute permission to a file,
then so do all of its ancestors; if a descendant requires read to a
symbolic link or temporary file, then so do all of its ancestors...).
SELinux domains are intentionally not hierarchical / bounded in this
manner normally, and making them so would undermine their protections
and least privilege.
We have long had a similar tension with SELinux transitions and nosuid
mounts, albeit not as severe. Users often have had to choose between
retaining nosuid on a mount and allowing SELinux domain transitions on
files within those mounts. This likewise leads to unfortunate tradeoffs
in security.
Decouple NNP/nosuid from SELinux transitions, so that we don't have to
make a choice between them. Introduce a nnp_nosuid_transition policy
capability that enables transitions under NNP/nosuid to be based on
a permission (nnp_transition for NNP; nosuid_transition for nosuid)
between the old and new contexts in addition to the current support
for bounded transitions. Domain transitions can then be allowed in
policy without requiring the parent to be a strict superset of all of
its children.
With this change, systemd unit files can be left unmodified from upstream.
SELinux-disabled and SELinux-enabled users will benefit from retaining any
of the systemd-provided protections. SELinux policy will only need to
be adapted to enable the new policy capability and to allow the
new permissions between domain pairs as appropriate.
NB: Allowing nnp_transition between two contexts opens up the potential
for the old context to subvert the new context by installing seccomp
filters before the execve. Allowing nosuid_transition between two contexts
opens up the potential for a context transition to occur on a file from
an untrusted filesystem (e.g. removable media or remote filesystem). Use
with care.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2017-07-31 10:12:46 -04:00
u32 av ;
2014-08-04 13:36:49 -04:00
if ( ! nnp & & ! nosuid )
return 0 ; /* neither NNP nor nosuid */
if ( new_tsec - > sid = = old_tsec - > sid )
return 0 ; /* No change in credentials */
/*
selinux: Generalize support for NNP/nosuid SELinux domain transitions
As systemd ramps up enabling NNP (NoNewPrivileges) for system services,
it is increasingly breaking SELinux domain transitions for those services
and their descendants. systemd enables NNP not only for services whose
unit files explicitly specify NoNewPrivileges=yes but also for services
whose unit files specify any of the following options in combination with
running without CAP_SYS_ADMIN (e.g. specifying User= or a
CapabilityBoundingSet= without CAP_SYS_ADMIN): SystemCallFilter=,
SystemCallArchitectures=, RestrictAddressFamilies=, RestrictNamespaces=,
PrivateDevices=, ProtectKernelTunables=, ProtectKernelModules=,
MemoryDenyWriteExecute=, or RestrictRealtime= as per the systemd.exec(5)
man page.
The end result is bad for the security of both SELinux-disabled and
SELinux-enabled systems. Packagers have to turn off these
options in the unit files to preserve SELinux domain transitions. For
users who choose to disable SELinux, this means that they miss out on
at least having the systemd-supported protections. For users who keep
SELinux enabled, they may still be missing out on some protections
because it isn't necessarily guaranteed that the SELinux policy for
that service provides the same protections in all cases.
commit 7b0d0b40cd78 ("selinux: Permit bounded transitions under
NO_NEW_PRIVS or NOSUID.") allowed bounded transitions under NNP in
order to support limited usage for sandboxing programs. However,
defining typebounds for all of the affected service domains
is impractical to implement in policy, since typebounds requires us
to ensure that each domain is allowed everything all of its descendant
domains are allowed, and this has to be repeated for the entire chain
of domain transitions. There is no way to clone all allow rules from
descendants to their ancestors in policy currently, and doing so would
be undesirable even if it were practical, as it requires leaking
permissions to objects and operations into ancestor domains that could
weaken their own security in order to allow them to the descendants
(e.g. if a descendant requires execmem permission, then so do all of
its ancestors; if a descendant requires execute permission to a file,
then so do all of its ancestors; if a descendant requires read to a
symbolic link or temporary file, then so do all of its ancestors...).
SELinux domains are intentionally not hierarchical / bounded in this
manner normally, and making them so would undermine their protections
and least privilege.
We have long had a similar tension with SELinux transitions and nosuid
mounts, albeit not as severe. Users often have had to choose between
retaining nosuid on a mount and allowing SELinux domain transitions on
files within those mounts. This likewise leads to unfortunate tradeoffs
in security.
Decouple NNP/nosuid from SELinux transitions, so that we don't have to
make a choice between them. Introduce a nnp_nosuid_transition policy
capability that enables transitions under NNP/nosuid to be based on
a permission (nnp_transition for NNP; nosuid_transition for nosuid)
between the old and new contexts in addition to the current support
for bounded transitions. Domain transitions can then be allowed in
policy without requiring the parent to be a strict superset of all of
its children.
With this change, systemd unit files can be left unmodified from upstream.
SELinux-disabled and SELinux-enabled users will benefit from retaining any
of the systemd-provided protections. SELinux policy will only need to
be adapted to enable the new policy capability and to allow the
new permissions between domain pairs as appropriate.
NB: Allowing nnp_transition between two contexts opens up the potential
for the old context to subvert the new context by installing seccomp
filters before the execve. Allowing nosuid_transition between two contexts
opens up the potential for a context transition to occur on a file from
an untrusted filesystem (e.g. removable media or remote filesystem). Use
with care.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2017-07-31 10:12:46 -04:00
* If the policy enables the nnp_nosuid_transition policy capability ,
* then we permit transitions under NNP or nosuid if the
* policy allows the corresponding permission between
* the old and new contexts .
2014-08-04 13:36:49 -04:00
*/
2018-03-01 18:48:02 -05:00
if ( selinux_policycap_nnp_nosuid_transition ( ) ) {
selinux: Generalize support for NNP/nosuid SELinux domain transitions
As systemd ramps up enabling NNP (NoNewPrivileges) for system services,
it is increasingly breaking SELinux domain transitions for those services
and their descendants. systemd enables NNP not only for services whose
unit files explicitly specify NoNewPrivileges=yes but also for services
whose unit files specify any of the following options in combination with
running without CAP_SYS_ADMIN (e.g. specifying User= or a
CapabilityBoundingSet= without CAP_SYS_ADMIN): SystemCallFilter=,
SystemCallArchitectures=, RestrictAddressFamilies=, RestrictNamespaces=,
PrivateDevices=, ProtectKernelTunables=, ProtectKernelModules=,
MemoryDenyWriteExecute=, or RestrictRealtime= as per the systemd.exec(5)
man page.
The end result is bad for the security of both SELinux-disabled and
SELinux-enabled systems. Packagers have to turn off these
options in the unit files to preserve SELinux domain transitions. For
users who choose to disable SELinux, this means that they miss out on
at least having the systemd-supported protections. For users who keep
SELinux enabled, they may still be missing out on some protections
because it isn't necessarily guaranteed that the SELinux policy for
that service provides the same protections in all cases.
commit 7b0d0b40cd78 ("selinux: Permit bounded transitions under
NO_NEW_PRIVS or NOSUID.") allowed bounded transitions under NNP in
order to support limited usage for sandboxing programs. However,
defining typebounds for all of the affected service domains
is impractical to implement in policy, since typebounds requires us
to ensure that each domain is allowed everything all of its descendant
domains are allowed, and this has to be repeated for the entire chain
of domain transitions. There is no way to clone all allow rules from
descendants to their ancestors in policy currently, and doing so would
be undesirable even if it were practical, as it requires leaking
permissions to objects and operations into ancestor domains that could
weaken their own security in order to allow them to the descendants
(e.g. if a descendant requires execmem permission, then so do all of
its ancestors; if a descendant requires execute permission to a file,
then so do all of its ancestors; if a descendant requires read to a
symbolic link or temporary file, then so do all of its ancestors...).
SELinux domains are intentionally not hierarchical / bounded in this
manner normally, and making them so would undermine their protections
and least privilege.
We have long had a similar tension with SELinux transitions and nosuid
mounts, albeit not as severe. Users often have had to choose between
retaining nosuid on a mount and allowing SELinux domain transitions on
files within those mounts. This likewise leads to unfortunate tradeoffs
in security.
Decouple NNP/nosuid from SELinux transitions, so that we don't have to
make a choice between them. Introduce a nnp_nosuid_transition policy
capability that enables transitions under NNP/nosuid to be based on
a permission (nnp_transition for NNP; nosuid_transition for nosuid)
between the old and new contexts in addition to the current support
for bounded transitions. Domain transitions can then be allowed in
policy without requiring the parent to be a strict superset of all of
its children.
With this change, systemd unit files can be left unmodified from upstream.
SELinux-disabled and SELinux-enabled users will benefit from retaining any
of the systemd-provided protections. SELinux policy will only need to
be adapted to enable the new policy capability and to allow the
new permissions between domain pairs as appropriate.
NB: Allowing nnp_transition between two contexts opens up the potential
for the old context to subvert the new context by installing seccomp
filters before the execve. Allowing nosuid_transition between two contexts
opens up the potential for a context transition to occur on a file from
an untrusted filesystem (e.g. removable media or remote filesystem). Use
with care.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2017-07-31 10:12:46 -04:00
av = 0 ;
2014-08-04 13:36:49 -04:00
if ( nnp )
selinux: Generalize support for NNP/nosuid SELinux domain transitions
As systemd ramps up enabling NNP (NoNewPrivileges) for system services,
it is increasingly breaking SELinux domain transitions for those services
and their descendants. systemd enables NNP not only for services whose
unit files explicitly specify NoNewPrivileges=yes but also for services
whose unit files specify any of the following options in combination with
running without CAP_SYS_ADMIN (e.g. specifying User= or a
CapabilityBoundingSet= without CAP_SYS_ADMIN): SystemCallFilter=,
SystemCallArchitectures=, RestrictAddressFamilies=, RestrictNamespaces=,
PrivateDevices=, ProtectKernelTunables=, ProtectKernelModules=,
MemoryDenyWriteExecute=, or RestrictRealtime= as per the systemd.exec(5)
man page.
The end result is bad for the security of both SELinux-disabled and
SELinux-enabled systems. Packagers have to turn off these
options in the unit files to preserve SELinux domain transitions. For
users who choose to disable SELinux, this means that they miss out on
at least having the systemd-supported protections. For users who keep
SELinux enabled, they may still be missing out on some protections
because it isn't necessarily guaranteed that the SELinux policy for
that service provides the same protections in all cases.
commit 7b0d0b40cd78 ("selinux: Permit bounded transitions under
NO_NEW_PRIVS or NOSUID.") allowed bounded transitions under NNP in
order to support limited usage for sandboxing programs. However,
defining typebounds for all of the affected service domains
is impractical to implement in policy, since typebounds requires us
to ensure that each domain is allowed everything all of its descendant
domains are allowed, and this has to be repeated for the entire chain
of domain transitions. There is no way to clone all allow rules from
descendants to their ancestors in policy currently, and doing so would
be undesirable even if it were practical, as it requires leaking
permissions to objects and operations into ancestor domains that could
weaken their own security in order to allow them to the descendants
(e.g. if a descendant requires execmem permission, then so do all of
its ancestors; if a descendant requires execute permission to a file,
then so do all of its ancestors; if a descendant requires read to a
symbolic link or temporary file, then so do all of its ancestors...).
SELinux domains are intentionally not hierarchical / bounded in this
manner normally, and making them so would undermine their protections
and least privilege.
We have long had a similar tension with SELinux transitions and nosuid
mounts, albeit not as severe. Users often have had to choose between
retaining nosuid on a mount and allowing SELinux domain transitions on
files within those mounts. This likewise leads to unfortunate tradeoffs
in security.
Decouple NNP/nosuid from SELinux transitions, so that we don't have to
make a choice between them. Introduce a nnp_nosuid_transition policy
capability that enables transitions under NNP/nosuid to be based on
a permission (nnp_transition for NNP; nosuid_transition for nosuid)
between the old and new contexts in addition to the current support
for bounded transitions. Domain transitions can then be allowed in
policy without requiring the parent to be a strict superset of all of
its children.
With this change, systemd unit files can be left unmodified from upstream.
SELinux-disabled and SELinux-enabled users will benefit from retaining any
of the systemd-provided protections. SELinux policy will only need to
be adapted to enable the new policy capability and to allow the
new permissions between domain pairs as appropriate.
NB: Allowing nnp_transition between two contexts opens up the potential
for the old context to subvert the new context by installing seccomp
filters before the execve. Allowing nosuid_transition between two contexts
opens up the potential for a context transition to occur on a file from
an untrusted filesystem (e.g. removable media or remote filesystem). Use
with care.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2017-07-31 10:12:46 -04:00
av | = PROCESS2__NNP_TRANSITION ;
if ( nosuid )
av | = PROCESS2__NOSUID_TRANSITION ;
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( old_tsec - > sid , new_tsec - > sid ,
selinux: Generalize support for NNP/nosuid SELinux domain transitions
As systemd ramps up enabling NNP (NoNewPrivileges) for system services,
it is increasingly breaking SELinux domain transitions for those services
and their descendants. systemd enables NNP not only for services whose
unit files explicitly specify NoNewPrivileges=yes but also for services
whose unit files specify any of the following options in combination with
running without CAP_SYS_ADMIN (e.g. specifying User= or a
CapabilityBoundingSet= without CAP_SYS_ADMIN): SystemCallFilter=,
SystemCallArchitectures=, RestrictAddressFamilies=, RestrictNamespaces=,
PrivateDevices=, ProtectKernelTunables=, ProtectKernelModules=,
MemoryDenyWriteExecute=, or RestrictRealtime= as per the systemd.exec(5)
man page.
The end result is bad for the security of both SELinux-disabled and
SELinux-enabled systems. Packagers have to turn off these
options in the unit files to preserve SELinux domain transitions. For
users who choose to disable SELinux, this means that they miss out on
at least having the systemd-supported protections. For users who keep
SELinux enabled, they may still be missing out on some protections
because it isn't necessarily guaranteed that the SELinux policy for
that service provides the same protections in all cases.
commit 7b0d0b40cd78 ("selinux: Permit bounded transitions under
NO_NEW_PRIVS or NOSUID.") allowed bounded transitions under NNP in
order to support limited usage for sandboxing programs. However,
defining typebounds for all of the affected service domains
is impractical to implement in policy, since typebounds requires us
to ensure that each domain is allowed everything all of its descendant
domains are allowed, and this has to be repeated for the entire chain
of domain transitions. There is no way to clone all allow rules from
descendants to their ancestors in policy currently, and doing so would
be undesirable even if it were practical, as it requires leaking
permissions to objects and operations into ancestor domains that could
weaken their own security in order to allow them to the descendants
(e.g. if a descendant requires execmem permission, then so do all of
its ancestors; if a descendant requires execute permission to a file,
then so do all of its ancestors; if a descendant requires read to a
symbolic link or temporary file, then so do all of its ancestors...).
SELinux domains are intentionally not hierarchical / bounded in this
manner normally, and making them so would undermine their protections
and least privilege.
We have long had a similar tension with SELinux transitions and nosuid
mounts, albeit not as severe. Users often have had to choose between
retaining nosuid on a mount and allowing SELinux domain transitions on
files within those mounts. This likewise leads to unfortunate tradeoffs
in security.
Decouple NNP/nosuid from SELinux transitions, so that we don't have to
make a choice between them. Introduce a nnp_nosuid_transition policy
capability that enables transitions under NNP/nosuid to be based on
a permission (nnp_transition for NNP; nosuid_transition for nosuid)
between the old and new contexts in addition to the current support
for bounded transitions. Domain transitions can then be allowed in
policy without requiring the parent to be a strict superset of all of
its children.
With this change, systemd unit files can be left unmodified from upstream.
SELinux-disabled and SELinux-enabled users will benefit from retaining any
of the systemd-provided protections. SELinux policy will only need to
be adapted to enable the new policy capability and to allow the
new permissions between domain pairs as appropriate.
NB: Allowing nnp_transition between two contexts opens up the potential
for the old context to subvert the new context by installing seccomp
filters before the execve. Allowing nosuid_transition between two contexts
opens up the potential for a context transition to occur on a file from
an untrusted filesystem (e.g. removable media or remote filesystem). Use
with care.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2017-07-31 10:12:46 -04:00
SECCLASS_PROCESS2 , av , NULL ) ;
if ( ! rc )
return 0 ;
2014-08-04 13:36:49 -04:00
}
selinux: Generalize support for NNP/nosuid SELinux domain transitions
As systemd ramps up enabling NNP (NoNewPrivileges) for system services,
it is increasingly breaking SELinux domain transitions for those services
and their descendants. systemd enables NNP not only for services whose
unit files explicitly specify NoNewPrivileges=yes but also for services
whose unit files specify any of the following options in combination with
running without CAP_SYS_ADMIN (e.g. specifying User= or a
CapabilityBoundingSet= without CAP_SYS_ADMIN): SystemCallFilter=,
SystemCallArchitectures=, RestrictAddressFamilies=, RestrictNamespaces=,
PrivateDevices=, ProtectKernelTunables=, ProtectKernelModules=,
MemoryDenyWriteExecute=, or RestrictRealtime= as per the systemd.exec(5)
man page.
The end result is bad for the security of both SELinux-disabled and
SELinux-enabled systems. Packagers have to turn off these
options in the unit files to preserve SELinux domain transitions. For
users who choose to disable SELinux, this means that they miss out on
at least having the systemd-supported protections. For users who keep
SELinux enabled, they may still be missing out on some protections
because it isn't necessarily guaranteed that the SELinux policy for
that service provides the same protections in all cases.
commit 7b0d0b40cd78 ("selinux: Permit bounded transitions under
NO_NEW_PRIVS or NOSUID.") allowed bounded transitions under NNP in
order to support limited usage for sandboxing programs. However,
defining typebounds for all of the affected service domains
is impractical to implement in policy, since typebounds requires us
to ensure that each domain is allowed everything all of its descendant
domains are allowed, and this has to be repeated for the entire chain
of domain transitions. There is no way to clone all allow rules from
descendants to their ancestors in policy currently, and doing so would
be undesirable even if it were practical, as it requires leaking
permissions to objects and operations into ancestor domains that could
weaken their own security in order to allow them to the descendants
(e.g. if a descendant requires execmem permission, then so do all of
its ancestors; if a descendant requires execute permission to a file,
then so do all of its ancestors; if a descendant requires read to a
symbolic link or temporary file, then so do all of its ancestors...).
SELinux domains are intentionally not hierarchical / bounded in this
manner normally, and making them so would undermine their protections
and least privilege.
We have long had a similar tension with SELinux transitions and nosuid
mounts, albeit not as severe. Users often have had to choose between
retaining nosuid on a mount and allowing SELinux domain transitions on
files within those mounts. This likewise leads to unfortunate tradeoffs
in security.
Decouple NNP/nosuid from SELinux transitions, so that we don't have to
make a choice between them. Introduce a nnp_nosuid_transition policy
capability that enables transitions under NNP/nosuid to be based on
a permission (nnp_transition for NNP; nosuid_transition for nosuid)
between the old and new contexts in addition to the current support
for bounded transitions. Domain transitions can then be allowed in
policy without requiring the parent to be a strict superset of all of
its children.
With this change, systemd unit files can be left unmodified from upstream.
SELinux-disabled and SELinux-enabled users will benefit from retaining any
of the systemd-provided protections. SELinux policy will only need to
be adapted to enable the new policy capability and to allow the
new permissions between domain pairs as appropriate.
NB: Allowing nnp_transition between two contexts opens up the potential
for the old context to subvert the new context by installing seccomp
filters before the execve. Allowing nosuid_transition between two contexts
opens up the potential for a context transition to occur on a file from
an untrusted filesystem (e.g. removable media or remote filesystem). Use
with care.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2017-07-31 10:12:46 -04:00
/*
* We also permit NNP or nosuid transitions to bounded SIDs ,
* i . e . SIDs that are guaranteed to only be allowed a subset
* of the permissions of the current SID .
*/
2023-03-09 13:30:37 -05:00
rc = security_bounded_transition ( old_tsec - > sid ,
2018-03-01 18:48:02 -05:00
new_tsec - > sid ) ;
selinux: Generalize support for NNP/nosuid SELinux domain transitions
As systemd ramps up enabling NNP (NoNewPrivileges) for system services,
it is increasingly breaking SELinux domain transitions for those services
and their descendants. systemd enables NNP not only for services whose
unit files explicitly specify NoNewPrivileges=yes but also for services
whose unit files specify any of the following options in combination with
running without CAP_SYS_ADMIN (e.g. specifying User= or a
CapabilityBoundingSet= without CAP_SYS_ADMIN): SystemCallFilter=,
SystemCallArchitectures=, RestrictAddressFamilies=, RestrictNamespaces=,
PrivateDevices=, ProtectKernelTunables=, ProtectKernelModules=,
MemoryDenyWriteExecute=, or RestrictRealtime= as per the systemd.exec(5)
man page.
The end result is bad for the security of both SELinux-disabled and
SELinux-enabled systems. Packagers have to turn off these
options in the unit files to preserve SELinux domain transitions. For
users who choose to disable SELinux, this means that they miss out on
at least having the systemd-supported protections. For users who keep
SELinux enabled, they may still be missing out on some protections
because it isn't necessarily guaranteed that the SELinux policy for
that service provides the same protections in all cases.
commit 7b0d0b40cd78 ("selinux: Permit bounded transitions under
NO_NEW_PRIVS or NOSUID.") allowed bounded transitions under NNP in
order to support limited usage for sandboxing programs. However,
defining typebounds for all of the affected service domains
is impractical to implement in policy, since typebounds requires us
to ensure that each domain is allowed everything all of its descendant
domains are allowed, and this has to be repeated for the entire chain
of domain transitions. There is no way to clone all allow rules from
descendants to their ancestors in policy currently, and doing so would
be undesirable even if it were practical, as it requires leaking
permissions to objects and operations into ancestor domains that could
weaken their own security in order to allow them to the descendants
(e.g. if a descendant requires execmem permission, then so do all of
its ancestors; if a descendant requires execute permission to a file,
then so do all of its ancestors; if a descendant requires read to a
symbolic link or temporary file, then so do all of its ancestors...).
SELinux domains are intentionally not hierarchical / bounded in this
manner normally, and making them so would undermine their protections
and least privilege.
We have long had a similar tension with SELinux transitions and nosuid
mounts, albeit not as severe. Users often have had to choose between
retaining nosuid on a mount and allowing SELinux domain transitions on
files within those mounts. This likewise leads to unfortunate tradeoffs
in security.
Decouple NNP/nosuid from SELinux transitions, so that we don't have to
make a choice between them. Introduce a nnp_nosuid_transition policy
capability that enables transitions under NNP/nosuid to be based on
a permission (nnp_transition for NNP; nosuid_transition for nosuid)
between the old and new contexts in addition to the current support
for bounded transitions. Domain transitions can then be allowed in
policy without requiring the parent to be a strict superset of all of
its children.
With this change, systemd unit files can be left unmodified from upstream.
SELinux-disabled and SELinux-enabled users will benefit from retaining any
of the systemd-provided protections. SELinux policy will only need to
be adapted to enable the new policy capability and to allow the
new permissions between domain pairs as appropriate.
NB: Allowing nnp_transition between two contexts opens up the potential
for the old context to subvert the new context by installing seccomp
filters before the execve. Allowing nosuid_transition between two contexts
opens up the potential for a context transition to occur on a file from
an untrusted filesystem (e.g. removable media or remote filesystem). Use
with care.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2017-07-31 10:12:46 -04:00
if ( ! rc )
return 0 ;
/*
* On failure , preserve the errno values for NNP vs nosuid .
* NNP : Operation not permitted for caller .
* nosuid : Permission denied to file .
*/
if ( nnp )
return - EPERM ;
return - EACCES ;
2014-08-04 13:36:49 -04:00
}
2020-03-22 15:46:24 -05:00
static int selinux_bprm_creds_for_exec ( struct linux_binprm * bprm )
2005-04-16 15:20:36 -07:00
{
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
const struct task_security_struct * old_tsec ;
struct task_security_struct * new_tsec ;
2005-04-16 15:20:36 -07:00
struct inode_security_struct * isec ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2013-01-23 17:07:38 -05:00
struct inode * inode = file_inode ( bprm - > file ) ;
2005-04-16 15:20:36 -07:00
int rc ;
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
/* SELinux context only depends on initial program or script and not
* the script interpreter */
2005-04-16 15:20:36 -07:00
2018-09-21 17:17:16 -07:00
old_tsec = selinux_cred ( current_cred ( ) ) ;
new_tsec = selinux_cred ( bprm - > cred ) ;
2015-12-24 11:09:39 -05:00
isec = inode_security ( inode ) ;
2005-04-16 15:20:36 -07:00
/* Default to the current task SID. */
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
new_tsec - > sid = old_tsec - > sid ;
new_tsec - > osid = old_tsec - > sid ;
2005-04-16 15:20:36 -07:00
2006-06-27 02:53:42 -07:00
/* Reset fs, key, and sock SIDs on execve. */
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
new_tsec - > create_sid = 0 ;
new_tsec - > keycreate_sid = 0 ;
new_tsec - > sockcreate_sid = 0 ;
2005-04-16 15:20:36 -07:00
selinux: introduce an initial SID for early boot processes
Currently, SELinux doesn't allow distinguishing between kernel threads
and userspace processes that are started before the policy is first
loaded - both get the label corresponding to the kernel SID. The only
way a process that persists from early boot can get a meaningful label
is by doing a voluntary dyntransition or re-executing itself.
Reusing the kernel label for userspace processes is problematic for
several reasons:
1. The kernel is considered to be a privileged domain and generally
needs to have a wide range of permissions allowed to work correctly,
which prevents the policy writer from effectively hardening against
early boot processes that might remain running unintentionally after
the policy is loaded (they represent a potential extra attack surface
that should be mitigated).
2. Despite the kernel being treated as a privileged domain, the policy
writer may want to impose certain special limitations on kernel
threads that may conflict with the requirements of intentional early
boot processes. For example, it is a good hardening practice to limit
what executables the kernel can execute as usermode helpers and to
confine the resulting usermode helper processes. However, a
(legitimate) process surviving from early boot may need to execute a
different set of executables.
3. As currently implemented, overlayfs remembers the security context of
the process that created an overlayfs mount and uses it to bound
subsequent operations on files using this context. If an overlayfs
mount is created before the SELinux policy is loaded, these "mounter"
checks are made against the kernel context, which may clash with
restrictions on the kernel domain (see 2.).
To resolve this, introduce a new initial SID (reusing the slot of the
former "init" initial SID) that will be assigned to any userspace
process started before the policy is first loaded. This is easy to do,
as we can simply label any process that goes through the
bprm_creds_for_exec LSM hook with the new init-SID instead of
propagating the kernel SID from the parent.
To provide backwards compatibility for existing policies that are
unaware of this new semantic of the "init" initial SID, introduce a new
policy capability "userspace_initial_context" and set the "init" SID to
the same context as the "kernel" SID unless this capability is set by
the policy.
Another small backwards compatibility measure is needed in
security_sid_to_context_core() for before the initial SELinux policy
load - see the code comment for explanation.
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com>
[PM: edited comments based on feedback/discussion]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-11-14 16:51:16 +01:00
/*
* Before policy is loaded , label any task outside kernel space
* as SECINITSID_INIT , so that any userspace tasks surviving from
* early boot end up with a label different from SECINITSID_KERNEL
* ( if the policy chooses to set SECINITSID_INIT ! = SECINITSID_KERNEL ) .
*/
if ( ! selinux_initialized ( ) ) {
new_tsec - > sid = SECINITSID_INIT ;
/* also clear the exec_sid just in case */
new_tsec - > exec_sid = 0 ;
return 0 ;
}
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
if ( old_tsec - > exec_sid ) {
new_tsec - > sid = old_tsec - > exec_sid ;
2005-04-16 15:20:36 -07:00
/* Reset exec SID on execve. */
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
new_tsec - > exec_sid = 0 ;
Add PR_{GET,SET}_NO_NEW_PRIVS to prevent execve from granting privs
With this change, calling
prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)
disables privilege granting operations at execve-time. For example, a
process will not be able to execute a setuid binary to change their uid
or gid if this bit is set. The same is true for file capabilities.
Additionally, LSM_UNSAFE_NO_NEW_PRIVS is defined to ensure that
LSMs respect the requested behavior.
To determine if the NO_NEW_PRIVS bit is set, a task may call
prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
It returns 1 if set and 0 if it is not set. If any of the arguments are
non-zero, it will return -1 and set errno to -EINVAL.
(PR_SET_NO_NEW_PRIVS behaves similarly.)
This functionality is desired for the proposed seccomp filter patch
series. By using PR_SET_NO_NEW_PRIVS, it allows a task to modify the
system call behavior for itself and its child tasks without being
able to impact the behavior of a more privileged task.
Another potential use is making certain privileged operations
unprivileged. For example, chroot may be considered "safe" if it cannot
affect privileged tasks.
Note, this patch causes execve to fail when PR_SET_NO_NEW_PRIVS is
set and AppArmor is in use. It is fixed in a subsequent patch.
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Will Drewry <wad@chromium.org>
Acked-by: Eric Paris <eparis@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
v18: updated change desc
v17: using new define values as per 3.4
Signed-off-by: James Morris <james.l.morris@oracle.com>
2012-04-12 16:47:50 -05:00
2014-08-04 13:36:49 -04:00
/* Fail on NNP or nosuid if not an allowed transition. */
rc = check_nnp_nosuid ( bprm , old_tsec , new_tsec ) ;
if ( rc )
return rc ;
2005-04-16 15:20:36 -07:00
} else {
/* Check for a default transition on this program. */
2023-03-09 13:30:37 -05:00
rc = security_transition_sid ( old_tsec - > sid ,
2018-03-01 18:48:02 -05:00
isec - > sid , SECCLASS_PROCESS , NULL ,
SELinux: Use dentry name in new object labeling
Currently SELinux has rules which label new objects according to 3 criteria.
The label of the process creating the object, the label of the parent
directory, and the type of object (reg, dir, char, block, etc.) This patch
adds a 4th criteria, the dentry name, thus we can distinguish between
creating a file in an etc_t directory called shadow and one called motd.
There is no file globbing, regex parsing, or anything mystical. Either the
policy exactly (strcmp) matches the dentry name of the object or it doesn't.
This patch has no changes from today if policy does not implement the new
rules.
Signed-off-by: Eric Paris <eparis@redhat.com>
2011-02-01 11:05:40 -05:00
& new_tsec - > sid ) ;
2005-04-16 15:20:36 -07:00
if ( rc )
return rc ;
2014-08-04 13:36:49 -04:00
/*
* Fallback to old SID on NNP or nosuid if not an allowed
* transition .
*/
rc = check_nnp_nosuid ( bprm , old_tsec , new_tsec ) ;
if ( rc )
new_tsec - > sid = old_tsec - > sid ;
2005-04-16 15:20:36 -07:00
}
2016-09-09 11:37:49 -04:00
ad . type = LSM_AUDIT_DATA_FILE ;
ad . u . file = bprm - > file ;
2005-04-16 15:20:36 -07:00
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
if ( new_tsec - > sid = = old_tsec - > sid ) {
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( old_tsec - > sid , isec - > sid ,
2005-04-16 15:20:36 -07:00
SECCLASS_FILE , FILE__EXECUTE_NO_TRANS , & ad ) ;
if ( rc )
return rc ;
} else {
/* Check permissions for the transition. */
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( old_tsec - > sid , new_tsec - > sid ,
2005-04-16 15:20:36 -07:00
SECCLASS_PROCESS , PROCESS__TRANSITION , & ad ) ;
if ( rc )
return rc ;
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( new_tsec - > sid , isec - > sid ,
2005-04-16 15:20:36 -07:00
SECCLASS_FILE , FILE__ENTRYPOINT , & ad ) ;
if ( rc )
return rc ;
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
/* Check for shared state */
if ( bprm - > unsafe & LSM_UNSAFE_SHARE ) {
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( old_tsec - > sid , new_tsec - > sid ,
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
SECCLASS_PROCESS , PROCESS__SHARE ,
NULL ) ;
if ( rc )
return - EPERM ;
}
/* Make sure that anyone attempting to ptrace over a task that
* changes its SID has the appropriate permit */
2017-01-23 17:26:31 +13:00
if ( bprm - > unsafe & LSM_UNSAFE_PTRACE ) {
2017-01-09 10:07:31 -05:00
u32 ptsid = ptrace_parent_sid ( ) ;
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
if ( ptsid ! = 0 ) {
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( ptsid , new_tsec - > sid ,
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
SECCLASS_PROCESS ,
PROCESS__PTRACE , NULL ) ;
if ( rc )
return - EPERM ;
}
}
2005-04-16 15:20:36 -07:00
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
/* Clear any possibly unsafe personality bits on exec: */
bprm - > per_clear | = PER_CLEAR_ON_SETID ;
2008-11-14 10:39:19 +11:00
2005-04-16 15:20:36 -07:00
/* Enable secure mode for SIDs transitions unless
the noatsecure permission is granted between
the two SIDs , i . e . ahp returns 0. */
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( old_tsec - > sid , new_tsec - > sid ,
2017-07-18 15:25:25 -07:00
SECCLASS_PROCESS , PROCESS__NOATSECURE ,
NULL ) ;
bprm - > secureexec | = ! ! rc ;
2005-04-16 15:20:36 -07:00
}
2017-07-18 15:25:25 -07:00
return 0 ;
2005-04-16 15:20:36 -07:00
}
2012-08-21 22:32:06 -04:00
static int match_file ( const void * p , struct file * file , unsigned fd )
{
return file_has_perm ( p , file , file_to_av ( file ) ) ? fd + 1 : 0 ;
}
2005-04-16 15:20:36 -07:00
/* Derived from fs/exec.c:flush_old_files. */
2008-11-14 10:39:22 +11:00
static inline void flush_unauthorized_files ( const struct cred * cred ,
struct files_struct * files )
2005-04-16 15:20:36 -07:00
{
struct file * file , * devnull = NULL ;
2006-09-25 23:32:03 -07:00
struct tty_struct * tty ;
2006-12-08 02:36:04 -08:00
int drop_tty = 0 ;
2012-08-21 22:32:06 -04:00
unsigned n ;
2005-04-16 15:20:36 -07:00
2006-12-08 02:36:04 -08:00
tty = get_current_tty ( ) ;
2005-04-16 15:20:36 -07:00
if ( tty ) {
2016-01-09 21:35:23 -08:00
spin_lock ( & tty - > files_lock ) ;
2008-10-31 17:40:00 -04:00
if ( ! list_empty ( & tty - > tty_files ) ) {
tty: fix fu_list abuse
tty: fix fu_list abuse
tty code abuses fu_list, which causes a bug in remount,ro handling.
If a tty device node is opened on a filesystem, then the last link to the inode
removed, the filesystem will be allowed to be remounted readonly. This is
because fs_may_remount_ro does not find the 0 link tty inode on the file sb
list (because the tty code incorrectly removed it to use for its own purpose).
This can result in a filesystem with errors after it is marked "clean".
Taking idea from Christoph's initial patch, allocate a tty private struct
at file->private_data and put our required list fields in there, linking
file and tty. This makes tty nodes behave the same way as other device nodes
and avoid meddling with the vfs, and avoids this bug.
The error handling is not trivial in the tty code, so for this bugfix, I take
the simple approach of using __GFP_NOFAIL and don't worry about memory errors.
This is not a problem because our allocator doesn't fail small allocs as a rule
anyway. So proper error handling is left as an exercise for tty hackers.
[ Arguably filesystem's device inode would ideally be divorced from the
driver's pseudo inode when it is opened, but in practice it's not clear whether
that will ever be worth implementing. ]
Cc: linux-kernel@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2010-08-18 04:37:36 +10:00
struct tty_file_private * file_priv ;
2008-10-31 17:40:00 -04:00
2005-04-16 15:20:36 -07:00
/* Revalidate access to controlling tty.
2013-06-13 23:37:55 +01:00
Use file_path_has_perm on the tty path directly
rather than using file_has_perm , as this particular
open file may belong to another process and we are
only interested in the inode - based check here . */
tty: fix fu_list abuse
tty: fix fu_list abuse
tty code abuses fu_list, which causes a bug in remount,ro handling.
If a tty device node is opened on a filesystem, then the last link to the inode
removed, the filesystem will be allowed to be remounted readonly. This is
because fs_may_remount_ro does not find the 0 link tty inode on the file sb
list (because the tty code incorrectly removed it to use for its own purpose).
This can result in a filesystem with errors after it is marked "clean".
Taking idea from Christoph's initial patch, allocate a tty private struct
at file->private_data and put our required list fields in there, linking
file and tty. This makes tty nodes behave the same way as other device nodes
and avoid meddling with the vfs, and avoids this bug.
The error handling is not trivial in the tty code, so for this bugfix, I take
the simple approach of using __GFP_NOFAIL and don't worry about memory errors.
This is not a problem because our allocator doesn't fail small allocs as a rule
anyway. So proper error handling is left as an exercise for tty hackers.
[ Arguably filesystem's device inode would ideally be divorced from the
driver's pseudo inode when it is opened, but in practice it's not clear whether
that will ever be worth implementing. ]
Cc: linux-kernel@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2010-08-18 04:37:36 +10:00
file_priv = list_first_entry ( & tty - > tty_files ,
struct tty_file_private , list ) ;
file = file_priv - > file ;
2013-06-13 23:37:55 +01:00
if ( file_path_has_perm ( cred , file , FILE__READ | FILE__WRITE ) )
2006-12-08 02:36:04 -08:00
drop_tty = 1 ;
2005-04-16 15:20:36 -07:00
}
2016-01-09 21:35:23 -08:00
spin_unlock ( & tty - > files_lock ) ;
2008-10-13 10:39:13 +01:00
tty_kref_put ( tty ) ;
2005-04-16 15:20:36 -07:00
}
2007-05-08 00:26:56 -07:00
/* Reset controlling tty. */
if ( drop_tty )
no_tty ( ) ;
2005-04-16 15:20:36 -07:00
/* Revalidate access to inherited open files. */
2012-08-21 22:32:06 -04:00
n = iterate_fd ( files , 0 , match_file , cred ) ;
if ( ! n ) /* none found? */
return ;
2005-04-16 15:20:36 -07:00
2012-08-21 22:32:06 -04:00
devnull = dentry_open ( & selinux_null , O_RDWR , cred ) ;
2012-10-16 13:30:07 -04:00
if ( IS_ERR ( devnull ) )
devnull = NULL ;
/* replace all the matching ones with this */
do {
replace_fd ( n - 1 , devnull , 0 ) ;
} while ( ( n = iterate_fd ( files , n , match_file , cred ) ) ! = 0 ) ;
if ( devnull )
2012-08-21 22:32:06 -04:00
fput ( devnull ) ;
2005-04-16 15:20:36 -07:00
}
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
/*
* Prepare a process for imminent new credential changes due to exec
*/
2023-08-23 12:47:56 +05:00
static void selinux_bprm_committing_creds ( const struct linux_binprm * bprm )
2005-04-16 15:20:36 -07:00
{
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
struct task_security_struct * new_tsec ;
struct rlimit * rlim , * initrlim ;
int rc , i ;
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
2018-09-21 17:17:16 -07:00
new_tsec = selinux_cred ( bprm - > cred ) ;
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
if ( new_tsec - > sid = = new_tsec - > osid )
return ;
2005-04-16 15:20:36 -07:00
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
/* Close files for which the new task SID is not authorized. */
flush_unauthorized_files ( bprm - > cred , current - > files ) ;
2008-03-26 15:46:39 -07:00
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
/* Always clear parent death signal on SID transitions. */
current - > pdeath_signal = 0 ;
2008-03-26 15:46:39 -07:00
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
/* Check whether the new SID can inherit resource limits from the old
* SID . If not , reset all soft limits to the lower of the current
* task ' s hard limit and the init task ' s soft limit .
*
* Note that the setting of hard limits ( even to lower them ) can be
* controlled by the setrlimit check . The inclusion of the init task ' s
* soft limit into the computation is to avoid resetting soft limits
* higher than the default soft limit for cases where the default is
* lower than the hard limit , e . g . RLIMIT_CORE or RLIMIT_STACK .
*/
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( new_tsec - > osid , new_tsec - > sid , SECCLASS_PROCESS ,
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
PROCESS__RLIMITINH , NULL ) ;
if ( rc ) {
2010-06-23 22:43:32 +02:00
/* protect against do_prlimit() */
task_lock ( current ) ;
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
for ( i = 0 ; i < RLIM_NLIMITS ; i + + ) {
rlim = current - > signal - > rlim + i ;
initrlim = init_task . signal - > rlim + i ;
rlim - > rlim_cur = min ( rlim - > rlim_max , initrlim - > rlim_cur ) ;
2005-04-16 15:20:36 -07:00
}
2010-06-23 22:43:32 +02:00
task_unlock ( current ) ;
posix-timers: Make them configurable
Some embedded systems have no use for them. This removes about
25KB from the kernel binary size when configured out.
Corresponding syscalls are routed to a stub logging the attempt to
use those syscalls which should be enough of a clue if they were
disabled without proper consideration. They are: timer_create,
timer_gettime: timer_getoverrun, timer_settime, timer_delete,
clock_adjtime, setitimer, getitimer, alarm.
The clock_settime, clock_gettime, clock_getres and clock_nanosleep
syscalls are replaced by simple wrappers compatible with CLOCK_REALTIME,
CLOCK_MONOTONIC and CLOCK_BOOTTIME only which should cover the vast
majority of use cases with very little code.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <john.stultz@linaro.org>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Cc: Paul Bolle <pebolle@tiscali.nl>
Cc: linux-kbuild@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: Michal Marek <mmarek@suse.com>
Cc: Edward Cree <ecree@solarflare.com>
Link: http://lkml.kernel.org/r/1478841010-28605-7-git-send-email-nicolas.pitre@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-11-11 00:10:10 -05:00
if ( IS_ENABLED ( CONFIG_POSIX_TIMERS ) )
update_rlimit_cpu ( current , rlimit ( RLIMIT_CPU ) ) ;
2005-04-16 15:20:36 -07:00
}
}
/*
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
* Clean up the process immediately after the installation of new credentials
* due to exec
2005-04-16 15:20:36 -07:00
*/
2023-08-23 13:16:40 +05:00
static void selinux_bprm_committed_creds ( const struct linux_binprm * bprm )
2005-04-16 15:20:36 -07:00
{
2018-09-21 17:17:16 -07:00
const struct task_security_struct * tsec = selinux_cred ( current_cred ( ) ) ;
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
u32 osid , sid ;
2019-10-25 21:37:43 +02:00
int rc ;
2005-04-16 15:20:36 -07:00
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
osid = tsec - > osid ;
sid = tsec - > sid ;
if ( sid = = osid )
2005-04-16 15:20:36 -07:00
return ;
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
/* Check whether the new SID can inherit signal state from the old SID.
* If not , clear itimers to avoid subsequent signal generation and
* flush and unblock signals .
*
* This must occur _after_ the task SID has been updated so that any
* kill done after the flush will be checked against the new SID .
*/
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( osid , sid , SECCLASS_PROCESS , PROCESS__SIGINH , NULL ) ;
2005-04-16 15:20:36 -07:00
if ( rc ) {
2019-10-25 21:37:43 +02:00
clear_itimer ( ) ;
2022-01-27 10:56:13 -05:00
spin_lock_irq ( & unrcu_pointer ( current - > sighand ) - > siglock ) ;
2015-06-04 16:22:16 -04:00
if ( ! fatal_signal_pending ( current ) ) {
flush_sigqueue ( & current - > pending ) ;
flush_sigqueue ( & current - > signal - > shared_pending ) ;
2009-04-29 13:45:05 +01:00
flush_signal_handlers ( current , 1 ) ;
sigemptyset ( & current - > blocked ) ;
2015-06-04 16:22:16 -04:00
recalc_sigpending ( ) ;
2009-04-29 13:45:05 +01:00
}
2022-01-27 10:56:13 -05:00
spin_unlock_irq ( & unrcu_pointer ( current - > sighand ) - > siglock ) ;
2005-04-16 15:20:36 -07:00
}
CRED: Make execve() take advantage of copy-on-write credentials
Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
The credential bits from struct linux_binprm are, for the most part,
replaced with a single credentials pointer (bprm->cred). This means that
all the creds can be calculated in advance and then applied at the point
of no return with no possibility of failure.
I would like to replace bprm->cap_effective with:
cap_isclear(bprm->cap_effective)
but this seems impossible due to special behaviour for processes of pid 1
(they always retain their parent's capability masks where normally they'd
be changed - see cap_bprm_set_creds()).
The following sequence of events now happens:
(a) At the start of do_execve, the current task's cred_exec_mutex is
locked to prevent PTRACE_ATTACH from obsoleting the calculation of
creds that we make.
(a) prepare_exec_creds() is then called to make a copy of the current
task's credentials and prepare it. This copy is then assigned to
bprm->cred.
This renders security_bprm_alloc() and security_bprm_free()
unnecessary, and so they've been removed.
(b) The determination of unsafe execution is now performed immediately
after (a) rather than later on in the code. The result is stored in
bprm->unsafe for future reference.
(c) prepare_binprm() is called, possibly multiple times.
(i) This applies the result of set[ug]id binaries to the new creds
attached to bprm->cred. Personality bit clearance is recorded,
but now deferred on the basis that the exec procedure may yet
fail.
(ii) This then calls the new security_bprm_set_creds(). This should
calculate the new LSM and capability credentials into *bprm->cred.
This folds together security_bprm_set() and parts of
security_bprm_apply_creds() (these two have been removed).
Anything that might fail must be done at this point.
(iii) bprm->cred_prepared is set to 1.
bprm->cred_prepared is 0 on the first pass of the security
calculations, and 1 on all subsequent passes. This allows SELinux
in (ii) to base its calculations only on the initial script and
not on the interpreter.
(d) flush_old_exec() is called to commit the task to execution. This
performs the following steps with regard to credentials:
(i) Clear pdeath_signal and set dumpable on certain circumstances that
may not be covered by commit_creds().
(ii) Clear any bits in current->personality that were deferred from
(c.i).
(e) install_exec_creds() [compute_creds() as was] is called to install the
new credentials. This performs the following steps with regard to
credentials:
(i) Calls security_bprm_committing_creds() to apply any security
requirements, such as flushing unauthorised files in SELinux, that
must be done before the credentials are changed.
This is made up of bits of security_bprm_apply_creds() and
security_bprm_post_apply_creds(), both of which have been removed.
This function is not allowed to fail; anything that might fail
must have been done in (c.ii).
(ii) Calls commit_creds() to apply the new credentials in a single
assignment (more or less). Possibly pdeath_signal and dumpable
should be part of struct creds.
(iii) Unlocks the task's cred_replace_mutex, thus allowing
PTRACE_ATTACH to take place.
(iv) Clears The bprm->cred pointer as the credentials it was holding
are now immutable.
(v) Calls security_bprm_committed_creds() to apply any security
alterations that must be done after the creds have been changed.
SELinux uses this to flush signals and signal handlers.
(f) If an error occurs before (d.i), bprm_free() will call abort_creds()
to destroy the proposed new credentials and will then unlock
cred_replace_mutex. No changes to the credentials will have been
made.
(2) LSM interface.
A number of functions have been changed, added or removed:
(*) security_bprm_alloc(), ->bprm_alloc_security()
(*) security_bprm_free(), ->bprm_free_security()
Removed in favour of preparing new credentials and modifying those.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
(*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()
Removed; split between security_bprm_set_creds(),
security_bprm_committing_creds() and security_bprm_committed_creds().
(*) security_bprm_set(), ->bprm_set_security()
Removed; folded into security_bprm_set_creds().
(*) security_bprm_set_creds(), ->bprm_set_creds()
New. The new credentials in bprm->creds should be checked and set up
as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the
second and subsequent calls.
(*) security_bprm_committing_creds(), ->bprm_committing_creds()
(*) security_bprm_committed_creds(), ->bprm_committed_creds()
New. Apply the security effects of the new credentials. This
includes closing unauthorised files in SELinux. This function may not
fail. When the former is called, the creds haven't yet been applied
to the process; when the latter is called, they have.
The former may access bprm->cred, the latter may not.
(3) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) The bprm_security_struct struct has been removed in favour of using
the credentials-under-construction approach.
(c) flush_unauthorized_files() now takes a cred pointer and passes it on
to inode_has_perm(), file_has_perm() and dentry_open().
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:24 +11:00
/* Wake up the parent if it is waiting so that it can recheck
* wait permission to the new task SID . */
2009-04-29 16:02:24 +02:00
read_lock ( & tasklist_lock ) ;
2022-01-27 10:56:13 -05:00
__wake_up_parent ( current , unrcu_pointer ( current - > real_parent ) ) ;
2009-04-29 16:02:24 +02:00
read_unlock ( & tasklist_lock ) ;
2005-04-16 15:20:36 -07:00
}
/* superblock security operations */
static int selinux_sb_alloc_security ( struct super_block * sb )
{
2021-04-22 17:41:15 +02:00
struct superblock_security_struct * sbsec = selinux_superblock ( sb ) ;
2020-01-10 16:32:10 -05:00
mutex_init ( & sbsec - > lock ) ;
INIT_LIST_HEAD ( & sbsec - > isec_head ) ;
spin_lock_init ( & sbsec - > isec_lock ) ;
sbsec - > sid = SECINITSID_UNLABELED ;
sbsec - > def_sid = SECINITSID_FILE ;
sbsec - > mntpoint_sid = SECINITSID_UNLABELED ;
return 0 ;
2005-04-16 15:20:36 -07:00
}
2018-12-14 21:56:23 -05:00
static inline int opt_len ( const char * s )
2005-04-16 15:20:36 -07:00
{
2018-12-14 21:56:23 -05:00
bool open_quote = false ;
int len ;
char c ;
2005-04-16 15:20:36 -07:00
2018-12-14 21:56:23 -05:00
for ( len = 0 ; ( c = s [ len ] ) ! = ' \0 ' ; len + + ) {
if ( c = = ' " ' )
open_quote = ! open_quote ;
if ( c = = ' , ' & & ! open_quote )
break ;
[PATCH] SELinux: support mls categories for context mounts
Allows commas to be embedded into context mount options (i.e. "-o
context=some_selinux_context_t"), to better support multiple categories,
which are separated by commas and confuse mount.
For example, with the current code:
mount -t iso9660 /dev/cdrom /media/cdrom -o \
ro,context=system_u:object_r:iso9660_t:s0:c1,c3,c4,exec
The context option that will be interpreted by SELinux is
context=system_u:object_r:iso9660_t:s0:c1
instead of
context=system_u:object_r:iso9660_t:s0:c1,c3,c4
The options that will be passed on to the file system will be
ro,c3,c4,exec.
The proposed solution is to allow/require the SELinux context option
specified to mount to use quotes when the context contains a comma.
This patch modifies the option parsing in parse_opts(), contained in
mount.c, to take options after finding a comma only if it hasn't seen a
quote or if the quotes are matched. It also introduces a new function that
will strip the quotes from the context option prior to translation. The
quotes are replaced after the translation is completed to insure that in
the event the raw context contains commas the kernel will be able to
interpret the correct context.
Signed-off-by: Cory Olmo <colmo@TrustedCS.com>
Signed-off-by: James Morris <jmorris@namei.org>
Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-09-29 01:58:44 -07:00
}
2018-12-14 21:56:23 -05:00
return len ;
[PATCH] SELinux: support mls categories for context mounts
Allows commas to be embedded into context mount options (i.e. "-o
context=some_selinux_context_t"), to better support multiple categories,
which are separated by commas and confuse mount.
For example, with the current code:
mount -t iso9660 /dev/cdrom /media/cdrom -o \
ro,context=system_u:object_r:iso9660_t:s0:c1,c3,c4,exec
The context option that will be interpreted by SELinux is
context=system_u:object_r:iso9660_t:s0:c1
instead of
context=system_u:object_r:iso9660_t:s0:c1,c3,c4
The options that will be passed on to the file system will be
ro,c3,c4,exec.
The proposed solution is to allow/require the SELinux context option
specified to mount to use quotes when the context contains a comma.
This patch modifies the option parsing in parse_opts(), contained in
mount.c, to take options after finding a comma only if it hasn't seen a
quote or if the quotes are matched. It also introduces a new function that
will strip the quotes from the context option prior to translation. The
quotes are replaced after the translation is completed to insure that in
the event the raw context contains commas the kernel will be able to
interpret the correct context.
Signed-off-by: Cory Olmo <colmo@TrustedCS.com>
Signed-off-by: James Morris <jmorris@namei.org>
Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-09-29 01:58:44 -07:00
}
2018-12-14 21:56:23 -05:00
static int selinux_sb_eat_lsm_opts ( char * options , void * * mnt_opts )
2005-04-16 15:20:36 -07:00
{
2018-12-14 21:56:23 -05:00
char * from = options ;
char * to = options ;
bool first = true ;
2019-06-12 21:55:38 +08:00
int rc ;
2005-04-16 15:20:36 -07:00
2018-12-14 21:56:23 -05:00
while ( 1 ) {
int len = opt_len ( from ) ;
2019-06-12 21:55:38 +08:00
int token ;
2018-12-14 21:56:23 -05:00
char * arg = NULL ;
2005-04-16 15:20:36 -07:00
2018-12-14 21:56:23 -05:00
token = match_opt_prefix ( from , len , & arg ) ;
2005-04-16 15:20:36 -07:00
2018-12-14 21:56:23 -05:00
if ( token ! = Opt_error ) {
char * p , * q ;
2005-04-16 15:20:36 -07:00
2018-12-14 21:56:23 -05:00
/* strip quotes */
if ( arg ) {
for ( p = q = arg ; p < from + len ; p + + ) {
char c = * p ;
if ( c ! = ' " ' )
* q + + = c ;
}
arg = kmemdup_nul ( arg , q - arg , GFP_KERNEL ) ;
2019-06-12 21:55:38 +08:00
if ( ! arg ) {
rc = - ENOMEM ;
goto free_opt ;
}
2018-12-14 21:56:23 -05:00
}
rc = selinux_add_opt ( token , arg , mnt_opts ) ;
2022-06-15 17:38:39 +02:00
kfree ( arg ) ;
arg = NULL ;
2018-12-14 21:56:23 -05:00
if ( unlikely ( rc ) ) {
2019-06-12 21:55:38 +08:00
goto free_opt ;
2018-12-14 21:56:23 -05:00
}
} else {
if ( ! first ) { // copy with preceding comma
from - - ;
len + + ;
}
if ( to ! = from )
memmove ( to , from , len ) ;
to + = len ;
first = false ;
2005-04-16 15:20:36 -07:00
}
2018-12-14 21:56:23 -05:00
if ( ! from [ len ] )
break ;
from + = len + 1 ;
}
* to = ' \0 ' ;
return 0 ;
2019-06-12 21:55:38 +08:00
free_opt :
if ( * mnt_opts ) {
selinux_free_mnt_opts ( * mnt_opts ) ;
* mnt_opts = NULL ;
}
return rc ;
2005-04-16 15:20:36 -07:00
}
2021-02-26 22:37:55 -05:00
static int selinux_sb_mnt_opts_compat ( struct super_block * sb , void * mnt_opts )
{
struct selinux_mnt_opts * opts = mnt_opts ;
2022-01-25 15:11:33 +08:00
struct superblock_security_struct * sbsec = selinux_superblock ( sb ) ;
2021-02-26 22:37:55 -05:00
/*
* Superblock not initialized ( i . e . no options ) - reject if any
* options specified , otherwise accept .
*/
if ( ! ( sbsec - > flags & SE_SBINITIALIZED ) )
return opts ? 1 : 0 ;
/*
* Superblock initialized and no options specified - reject if
* superblock has any options set , otherwise accept .
*/
if ( ! opts )
return ( sbsec - > flags & SE_MNTMASK ) ? 1 : 0 ;
2022-02-02 13:55:29 +01:00
if ( opts - > fscontext_sid ) {
if ( bad_option ( sbsec , FSCONTEXT_MNT , sbsec - > sid ,
opts - > fscontext_sid ) )
2021-02-26 22:37:55 -05:00
return 1 ;
}
2022-02-02 13:55:29 +01:00
if ( opts - > context_sid ) {
if ( bad_option ( sbsec , CONTEXT_MNT , sbsec - > mntpoint_sid ,
opts - > context_sid ) )
2021-02-26 22:37:55 -05:00
return 1 ;
}
2022-02-02 13:55:29 +01:00
if ( opts - > rootcontext_sid ) {
struct inode_security_struct * root_isec ;
2022-01-31 13:57:36 -05:00
2022-02-02 13:55:29 +01:00
root_isec = backing_inode_security ( sb - > s_root ) ;
if ( bad_option ( sbsec , ROOTCONTEXT_MNT , root_isec - > sid ,
opts - > rootcontext_sid ) )
2021-02-26 22:37:55 -05:00
return 1 ;
2022-02-02 13:55:29 +01:00
}
if ( opts - > defcontext_sid ) {
if ( bad_option ( sbsec , DEFCONTEXT_MNT , sbsec - > def_sid ,
opts - > defcontext_sid ) )
2021-02-26 22:37:55 -05:00
return 1 ;
}
return 0 ;
}
2018-12-13 13:41:47 -05:00
static int selinux_sb_remount ( struct super_block * sb , void * mnt_opts )
2011-03-03 16:09:14 -05:00
{
2018-12-13 15:04:59 -05:00
struct selinux_mnt_opts * opts = mnt_opts ;
2021-04-22 17:41:15 +02:00
struct superblock_security_struct * sbsec = selinux_superblock ( sb ) ;
2011-03-03 16:09:14 -05:00
if ( ! ( sbsec - > flags & SE_SBINITIALIZED ) )
return 0 ;
2018-12-13 13:41:47 -05:00
if ( ! opts )
2011-03-03 16:09:14 -05:00
return 0 ;
2022-02-02 13:55:29 +01:00
if ( opts - > fscontext_sid ) {
2022-01-31 13:57:37 -05:00
if ( bad_option ( sbsec , FSCONTEXT_MNT , sbsec - > sid ,
opts - > fscontext_sid ) )
2018-12-13 15:04:59 -05:00
goto out_bad_option ;
2011-03-03 16:09:14 -05:00
}
2022-02-02 13:55:29 +01:00
if ( opts - > context_sid ) {
2022-01-31 13:57:37 -05:00
if ( bad_option ( sbsec , CONTEXT_MNT , sbsec - > mntpoint_sid ,
opts - > context_sid ) )
2018-12-13 15:04:59 -05:00
goto out_bad_option ;
}
2022-02-02 13:55:29 +01:00
if ( opts - > rootcontext_sid ) {
2018-12-13 15:04:59 -05:00
struct inode_security_struct * root_isec ;
root_isec = backing_inode_security ( sb - > s_root ) ;
2022-01-31 13:57:37 -05:00
if ( bad_option ( sbsec , ROOTCONTEXT_MNT , root_isec - > sid ,
opts - > rootcontext_sid ) )
2018-12-13 15:04:59 -05:00
goto out_bad_option ;
}
2022-02-02 13:55:29 +01:00
if ( opts - > defcontext_sid ) {
2022-01-31 13:57:37 -05:00
if ( bad_option ( sbsec , DEFCONTEXT_MNT , sbsec - > def_sid ,
opts - > defcontext_sid ) )
2018-12-13 15:04:59 -05:00
goto out_bad_option ;
2011-03-03 16:09:14 -05:00
}
2018-12-01 23:06:57 -05:00
return 0 ;
2011-03-03 16:09:14 -05:00
out_bad_option :
2018-06-12 10:09:03 +02:00
pr_warn ( " SELinux: unable to change security options "
2013-12-15 11:17:45 -08:00
" during remount (dev %s, type=%s) \n " , sb - > s_id ,
sb - > s_type - > name ) ;
2018-12-01 23:06:57 -05:00
return - EINVAL ;
2011-03-03 16:09:14 -05:00
}
2023-08-23 14:01:28 +05:00
static int selinux_sb_kern_mount ( const struct super_block * sb )
2005-04-16 15:20:36 -07:00
{
2008-11-14 10:39:21 +11:00
const struct cred * cred = current_cred ( ) ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2008-12-19 11:41:10 +11:00
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_DENTRY ;
2011-04-25 13:10:27 -04:00
ad . u . dentry = sb - > s_root ;
2008-11-14 10:39:21 +11:00
return superblock_has_perm ( cred , sb , FILESYSTEM__MOUNT , & ad ) ;
2005-04-16 15:20:36 -07:00
}
2006-06-23 02:02:58 -07:00
static int selinux_sb_statfs ( struct dentry * dentry )
2005-04-16 15:20:36 -07:00
{
2008-11-14 10:39:21 +11:00
const struct cred * cred = current_cred ( ) ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2005-04-16 15:20:36 -07:00
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_DENTRY ;
2011-04-25 13:10:27 -04:00
ad . u . dentry = dentry - > d_sb - > s_root ;
2008-11-14 10:39:21 +11:00
return superblock_has_perm ( cred , dentry - > d_sb , FILESYSTEM__GETATTR , & ad ) ;
2005-04-16 15:20:36 -07:00
}
2012-10-11 11:42:01 -04:00
static int selinux_mount ( const char * dev_name ,
2016-03-25 14:52:53 -04:00
const struct path * path ,
2012-10-11 11:42:01 -04:00
const char * type ,
2008-04-17 13:17:49 -04:00
unsigned long flags ,
void * data )
2005-04-16 15:20:36 -07:00
{
2008-11-14 10:39:21 +11:00
const struct cred * cred = current_cred ( ) ;
2005-04-16 15:20:36 -07:00
if ( flags & MS_REMOUNT )
2011-12-07 18:16:57 -05:00
return superblock_has_perm ( cred , path - > dentry - > d_sb ,
2008-04-17 13:17:49 -04:00
FILESYSTEM__REMOUNT , NULL ) ;
2005-04-16 15:20:36 -07:00
else
2011-04-28 16:04:24 -04:00
return path_has_perm ( cred , path , FILE__MOUNTON ) ;
2005-04-16 15:20:36 -07:00
}
2020-01-17 15:24:07 -05:00
static int selinux_move_mount ( const struct path * from_path ,
const struct path * to_path )
{
const struct cred * cred = current_cred ( ) ;
return path_has_perm ( cred , to_path , FILE__MOUNTON ) ;
}
2005-04-16 15:20:36 -07:00
static int selinux_umount ( struct vfsmount * mnt , int flags )
{
2008-11-14 10:39:21 +11:00
const struct cred * cred = current_cred ( ) ;
2005-04-16 15:20:36 -07:00
2008-11-14 10:39:21 +11:00
return superblock_has_perm ( cred , mnt - > mnt_sb ,
2008-04-17 13:17:49 -04:00
FILESYSTEM__UNMOUNT , NULL ) ;
2005-04-16 15:20:36 -07:00
}
vfs, security: Fix automount superblock LSM init problem, preventing NFS sb sharing
When NFS superblocks are created by automounting, their LSM parameters
aren't set in the fs_context struct prior to sget_fc() being called,
leading to failure to match existing superblocks.
This bug leads to messages like the following appearing in dmesg when
fscache is enabled:
NFS: Cache volume key already in use (nfs,4.2,2,108,106a8c0,1,,,,100000,100000,2ee,3a98,1d4c,3a98,1)
Fix this by adding a new LSM hook to load fc->security for submount
creation.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/165962680944.3334508.6610023900349142034.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/165962729225.3357250.14350728846471527137.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/165970659095.2812394.6868894171102318796.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/166133579016.3678898.6283195019480567275.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/217595.1662033775@warthog.procyon.org.uk/ # v5
Fixes: 9bc61ab18b1d ("vfs: Introduce fs_context, switch vfs_kern_mount() to it.")
Fixes: 779df6a5480f ("NFS: Ensure security label is set for root inode")
Tested-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: "Christian Brauner (Microsoft)" <brauner@kernel.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230808-master-v9-1-e0ecde888221@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2023-08-08 07:34:20 -04:00
static int selinux_fs_context_submount ( struct fs_context * fc ,
struct super_block * reference )
{
2023-09-11 16:23:58 +02:00
const struct superblock_security_struct * sbsec = selinux_superblock ( reference ) ;
vfs, security: Fix automount superblock LSM init problem, preventing NFS sb sharing
When NFS superblocks are created by automounting, their LSM parameters
aren't set in the fs_context struct prior to sget_fc() being called,
leading to failure to match existing superblocks.
This bug leads to messages like the following appearing in dmesg when
fscache is enabled:
NFS: Cache volume key already in use (nfs,4.2,2,108,106a8c0,1,,,,100000,100000,2ee,3a98,1d4c,3a98,1)
Fix this by adding a new LSM hook to load fc->security for submount
creation.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/165962680944.3334508.6610023900349142034.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/165962729225.3357250.14350728846471527137.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/165970659095.2812394.6868894171102318796.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/166133579016.3678898.6283195019480567275.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/217595.1662033775@warthog.procyon.org.uk/ # v5
Fixes: 9bc61ab18b1d ("vfs: Introduce fs_context, switch vfs_kern_mount() to it.")
Fixes: 779df6a5480f ("NFS: Ensure security label is set for root inode")
Tested-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: "Christian Brauner (Microsoft)" <brauner@kernel.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230808-master-v9-1-e0ecde888221@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2023-08-08 07:34:20 -04:00
struct selinux_mnt_opts * opts ;
2023-09-11 16:23:58 +02:00
/*
* Ensure that fc - > security remains NULL when no options are set
* as expected by selinux_set_mnt_opts ( ) .
*/
if ( ! ( sbsec - > flags & ( FSCONTEXT_MNT | CONTEXT_MNT | DEFCONTEXT_MNT ) ) )
return 0 ;
vfs, security: Fix automount superblock LSM init problem, preventing NFS sb sharing
When NFS superblocks are created by automounting, their LSM parameters
aren't set in the fs_context struct prior to sget_fc() being called,
leading to failure to match existing superblocks.
This bug leads to messages like the following appearing in dmesg when
fscache is enabled:
NFS: Cache volume key already in use (nfs,4.2,2,108,106a8c0,1,,,,100000,100000,2ee,3a98,1d4c,3a98,1)
Fix this by adding a new LSM hook to load fc->security for submount
creation.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/165962680944.3334508.6610023900349142034.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/165962729225.3357250.14350728846471527137.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/165970659095.2812394.6868894171102318796.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/166133579016.3678898.6283195019480567275.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/217595.1662033775@warthog.procyon.org.uk/ # v5
Fixes: 9bc61ab18b1d ("vfs: Introduce fs_context, switch vfs_kern_mount() to it.")
Fixes: 779df6a5480f ("NFS: Ensure security label is set for root inode")
Tested-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: "Christian Brauner (Microsoft)" <brauner@kernel.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230808-master-v9-1-e0ecde888221@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2023-08-08 07:34:20 -04:00
opts = kzalloc ( sizeof ( * opts ) , GFP_KERNEL ) ;
if ( ! opts )
return - ENOMEM ;
if ( sbsec - > flags & FSCONTEXT_MNT )
opts - > fscontext_sid = sbsec - > sid ;
if ( sbsec - > flags & CONTEXT_MNT )
opts - > context_sid = sbsec - > mntpoint_sid ;
if ( sbsec - > flags & DEFCONTEXT_MNT )
opts - > defcontext_sid = sbsec - > def_sid ;
fc - > security = opts ;
return 0 ;
}
2018-12-23 16:02:47 -05:00
static int selinux_fs_context_dup ( struct fs_context * fc ,
struct fs_context * src_fc )
{
const struct selinux_mnt_opts * src = src_fc - > security ;
if ( ! src )
return 0 ;
2022-02-02 13:55:29 +01:00
fc - > security = kmemdup ( src , sizeof ( * src ) , GFP_KERNEL ) ;
return fc - > security ? 0 : - ENOMEM ;
2018-12-23 16:02:47 -05:00
}
2019-09-07 07:23:15 -04:00
static const struct fs_parameter_spec selinux_fs_parameters [ ] = {
2018-11-01 23:07:24 +00:00
fsparam_string ( CONTEXT_STR , Opt_context ) ,
fsparam_string ( DEFCONTEXT_STR , Opt_defcontext ) ,
fsparam_string ( FSCONTEXT_STR , Opt_fscontext ) ,
fsparam_string ( ROOTCONTEXT_STR , Opt_rootcontext ) ,
fsparam_flag ( SECLABEL_STR , Opt_seclabel ) ,
{ }
} ;
static int selinux_fs_context_parse_param ( struct fs_context * fc ,
struct fs_parameter * param )
{
struct fs_parse_result result ;
2022-06-15 17:38:39 +02:00
int opt ;
2018-11-01 23:07:24 +00:00
2019-09-07 07:23:15 -04:00
opt = fs_parse ( fc , selinux_fs_parameters , param , & result ) ;
2018-11-01 23:07:24 +00:00
if ( opt < 0 )
return opt ;
2022-06-15 17:38:39 +02:00
return selinux_add_opt ( opt , param - > string , & fc - > security ) ;
2018-11-01 23:07:24 +00:00
}
2005-04-16 15:20:36 -07:00
/* inode security operations */
static int selinux_inode_alloc_security ( struct inode * inode )
{
2020-01-10 16:32:10 -05:00
struct inode_security_struct * isec = selinux_inode ( inode ) ;
u32 sid = current_sid ( ) ;
spin_lock_init ( & isec - > lock ) ;
INIT_LIST_HEAD ( & isec - > list ) ;
isec - > inode = inode ;
isec - > sid = SECINITSID_UNLABELED ;
isec - > sclass = SECCLASS_FILE ;
isec - > task_sid = sid ;
isec - > initialized = LABEL_INVALID ;
return 0 ;
2005-04-16 15:20:36 -07:00
}
static void selinux_inode_free_security ( struct inode * inode )
{
inode_free_security ( inode ) ;
}
2013-05-22 12:50:34 -04:00
static int selinux_dentry_init_security ( struct dentry * dentry , int mode ,
2021-10-12 09:23:07 -04:00
const struct qstr * name ,
const char * * xattr_name , void * * ctx ,
2013-05-22 12:50:34 -04:00
u32 * ctxlen )
{
u32 newsid ;
int rc ;
2018-09-21 17:17:16 -07:00
rc = selinux_determine_inode_label ( selinux_cred ( current_cred ( ) ) ,
2016-07-13 10:44:51 -04:00
d_inode ( dentry - > d_parent ) , name ,
2015-07-10 17:19:58 -04:00
inode_mode_to_security_class ( mode ) ,
& newsid ) ;
if ( rc )
return rc ;
2013-05-22 12:50:34 -04:00
2021-10-12 09:23:07 -04:00
if ( xattr_name )
* xattr_name = XATTR_NAME_SELINUX ;
2023-03-09 13:30:37 -05:00
return security_sid_to_context ( newsid , ( char * * ) ctx ,
2018-03-01 18:48:02 -05:00
ctxlen ) ;
2013-05-22 12:50:34 -04:00
}
2016-07-13 10:44:53 -04:00
static int selinux_dentry_create_files_as ( struct dentry * dentry , int mode ,
struct qstr * name ,
const struct cred * old ,
struct cred * new )
{
u32 newsid ;
int rc ;
struct task_security_struct * tsec ;
2018-09-21 17:17:16 -07:00
rc = selinux_determine_inode_label ( selinux_cred ( old ) ,
2016-07-13 10:44:53 -04:00
d_inode ( dentry - > d_parent ) , name ,
inode_mode_to_security_class ( mode ) ,
& newsid ) ;
if ( rc )
return rc ;
2018-09-21 17:17:16 -07:00
tsec = selinux_cred ( new ) ;
2016-07-13 10:44:53 -04:00
tsec - > create_sid = newsid ;
return 0 ;
}
2005-09-09 13:01:35 -07:00
static int selinux_inode_init_security ( struct inode * inode , struct inode * dir ,
2013-07-25 05:44:02 +09:00
const struct qstr * qstr ,
security: Allow all LSMs to provide xattrs for inode_init_security hook
Currently, the LSM infrastructure supports only one LSM providing an xattr
and EVM calculating the HMAC on that xattr, plus other inode metadata.
Allow all LSMs to provide one or multiple xattrs, by extending the security
blob reservation mechanism. Introduce the new lbs_xattr_count field of the
lsm_blob_sizes structure, so that each LSM can specify how many xattrs it
needs, and the LSM infrastructure knows how many xattr slots it should
allocate.
Modify the inode_init_security hook definition, by passing the full
xattr array allocated in security_inode_init_security(), and the current
number of xattr slots in that array filled by LSMs. The first parameter
would allow EVM to access and calculate the HMAC on xattrs supplied by
other LSMs, the second to not leave gaps in the xattr array, when an LSM
requested but did not provide xattrs (e.g. if it is not initialized).
Introduce lsm_get_xattr_slot(), which LSMs can call as many times as the
number specified in the lbs_xattr_count field of the lsm_blob_sizes
structure. During each call, lsm_get_xattr_slot() increments the number of
filled xattrs, so that at the next invocation it returns the next xattr
slot to fill.
Cleanup security_inode_init_security(). Unify the !initxattrs and
initxattrs case by simply not allocating the new_xattrs array in the
former. Update the documentation to reflect the changes, and fix the
description of the xattr name, as it is not allocated anymore.
Adapt both SELinux and Smack to use the new definition of the
inode_init_security hook, and to call lsm_get_xattr_slot() to obtain and
fill the reserved slots in the xattr array.
Move the xattr->name assignment after the xattr->value one, so that it is
done only in case of successful memory allocation.
Finally, change the default return value of the inode_init_security hook
from zero to -EOPNOTSUPP, so that BPF LSM correctly follows the hook
conventions.
Reported-by: Nicolas Bouchinet <nicolas.bouchinet@clip-os.org>
Link: https://lore.kernel.org/linux-integrity/Y1FTSIo+1x+4X0LS@archlinux/
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: minor comment and variable tweaks, approved by RS]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-06-10 09:57:35 +02:00
struct xattr * xattrs , int * xattr_count )
2005-09-09 13:01:35 -07:00
{
2018-09-21 17:17:16 -07:00
const struct task_security_struct * tsec = selinux_cred ( current_cred ( ) ) ;
2005-09-09 13:01:35 -07:00
struct superblock_security_struct * sbsec ;
security: Allow all LSMs to provide xattrs for inode_init_security hook
Currently, the LSM infrastructure supports only one LSM providing an xattr
and EVM calculating the HMAC on that xattr, plus other inode metadata.
Allow all LSMs to provide one or multiple xattrs, by extending the security
blob reservation mechanism. Introduce the new lbs_xattr_count field of the
lsm_blob_sizes structure, so that each LSM can specify how many xattrs it
needs, and the LSM infrastructure knows how many xattr slots it should
allocate.
Modify the inode_init_security hook definition, by passing the full
xattr array allocated in security_inode_init_security(), and the current
number of xattr slots in that array filled by LSMs. The first parameter
would allow EVM to access and calculate the HMAC on xattrs supplied by
other LSMs, the second to not leave gaps in the xattr array, when an LSM
requested but did not provide xattrs (e.g. if it is not initialized).
Introduce lsm_get_xattr_slot(), which LSMs can call as many times as the
number specified in the lbs_xattr_count field of the lsm_blob_sizes
structure. During each call, lsm_get_xattr_slot() increments the number of
filled xattrs, so that at the next invocation it returns the next xattr
slot to fill.
Cleanup security_inode_init_security(). Unify the !initxattrs and
initxattrs case by simply not allocating the new_xattrs array in the
former. Update the documentation to reflect the changes, and fix the
description of the xattr name, as it is not allocated anymore.
Adapt both SELinux and Smack to use the new definition of the
inode_init_security hook, and to call lsm_get_xattr_slot() to obtain and
fill the reserved slots in the xattr array.
Move the xattr->name assignment after the xattr->value one, so that it is
done only in case of successful memory allocation.
Finally, change the default return value of the inode_init_security hook
from zero to -EOPNOTSUPP, so that BPF LSM correctly follows the hook
conventions.
Reported-by: Nicolas Bouchinet <nicolas.bouchinet@clip-os.org>
Link: https://lore.kernel.org/linux-integrity/Y1FTSIo+1x+4X0LS@archlinux/
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: minor comment and variable tweaks, approved by RS]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-06-10 09:57:35 +02:00
struct xattr * xattr = lsm_get_xattr_slot ( xattrs , xattr_count ) ;
2017-10-04 20:32:17 +02:00
u32 newsid , clen ;
2024-01-18 20:44:21 -05:00
u16 newsclass ;
2005-09-09 13:01:35 -07:00
int rc ;
2013-07-25 05:44:02 +09:00
char * context ;
2005-09-09 13:01:35 -07:00
2021-04-22 17:41:15 +02:00
sbsec = selinux_superblock ( dir - > i_sb ) ;
2005-09-09 13:01:35 -07:00
2008-11-14 10:39:19 +11:00
newsid = tsec - > create_sid ;
2024-01-18 20:44:21 -05:00
newsclass = inode_mode_to_security_class ( inode - > i_mode ) ;
rc = selinux_determine_inode_label ( tsec , dir , qstr , newsclass , & newsid ) ;
2015-07-10 17:19:58 -04:00
if ( rc )
return rc ;
2005-09-09 13:01:35 -07:00
2006-09-25 23:32:00 -07:00
/* Possibly defer initialization to selinux_complete_init. */
2009-01-16 09:22:02 -05:00
if ( sbsec - > flags & SE_SBINITIALIZED ) {
2018-09-21 17:19:11 -07:00
struct inode_security_struct * isec = selinux_inode ( inode ) ;
2024-01-18 20:44:21 -05:00
isec - > sclass = newsclass ;
2006-09-25 23:32:00 -07:00
isec - > sid = newsid ;
2015-12-24 11:09:40 -05:00
isec - > initialized = LABEL_INITIALIZED ;
2006-09-25 23:32:00 -07:00
}
2005-09-09 13:01:35 -07:00
2023-03-09 13:30:37 -05:00
if ( ! selinux_initialized ( ) | |
2020-01-07 14:31:53 +01:00
! ( sbsec - > flags & SBLABEL_MNT ) )
2005-11-08 21:34:33 -08:00
return - EOPNOTSUPP ;
security: Allow all LSMs to provide xattrs for inode_init_security hook
Currently, the LSM infrastructure supports only one LSM providing an xattr
and EVM calculating the HMAC on that xattr, plus other inode metadata.
Allow all LSMs to provide one or multiple xattrs, by extending the security
blob reservation mechanism. Introduce the new lbs_xattr_count field of the
lsm_blob_sizes structure, so that each LSM can specify how many xattrs it
needs, and the LSM infrastructure knows how many xattr slots it should
allocate.
Modify the inode_init_security hook definition, by passing the full
xattr array allocated in security_inode_init_security(), and the current
number of xattr slots in that array filled by LSMs. The first parameter
would allow EVM to access and calculate the HMAC on xattrs supplied by
other LSMs, the second to not leave gaps in the xattr array, when an LSM
requested but did not provide xattrs (e.g. if it is not initialized).
Introduce lsm_get_xattr_slot(), which LSMs can call as many times as the
number specified in the lbs_xattr_count field of the lsm_blob_sizes
structure. During each call, lsm_get_xattr_slot() increments the number of
filled xattrs, so that at the next invocation it returns the next xattr
slot to fill.
Cleanup security_inode_init_security(). Unify the !initxattrs and
initxattrs case by simply not allocating the new_xattrs array in the
former. Update the documentation to reflect the changes, and fix the
description of the xattr name, as it is not allocated anymore.
Adapt both SELinux and Smack to use the new definition of the
inode_init_security hook, and to call lsm_get_xattr_slot() to obtain and
fill the reserved slots in the xattr array.
Move the xattr->name assignment after the xattr->value one, so that it is
done only in case of successful memory allocation.
Finally, change the default return value of the inode_init_security hook
from zero to -EOPNOTSUPP, so that BPF LSM correctly follows the hook
conventions.
Reported-by: Nicolas Bouchinet <nicolas.bouchinet@clip-os.org>
Link: https://lore.kernel.org/linux-integrity/Y1FTSIo+1x+4X0LS@archlinux/
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: minor comment and variable tweaks, approved by RS]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-06-10 09:57:35 +02:00
if ( xattr ) {
2023-03-09 13:30:37 -05:00
rc = security_sid_to_context_force ( newsid ,
2018-03-01 18:48:02 -05:00
& context , & clen ) ;
2013-07-25 05:44:02 +09:00
if ( rc )
2005-09-09 13:01:43 -07:00
return rc ;
security: Allow all LSMs to provide xattrs for inode_init_security hook
Currently, the LSM infrastructure supports only one LSM providing an xattr
and EVM calculating the HMAC on that xattr, plus other inode metadata.
Allow all LSMs to provide one or multiple xattrs, by extending the security
blob reservation mechanism. Introduce the new lbs_xattr_count field of the
lsm_blob_sizes structure, so that each LSM can specify how many xattrs it
needs, and the LSM infrastructure knows how many xattr slots it should
allocate.
Modify the inode_init_security hook definition, by passing the full
xattr array allocated in security_inode_init_security(), and the current
number of xattr slots in that array filled by LSMs. The first parameter
would allow EVM to access and calculate the HMAC on xattrs supplied by
other LSMs, the second to not leave gaps in the xattr array, when an LSM
requested but did not provide xattrs (e.g. if it is not initialized).
Introduce lsm_get_xattr_slot(), which LSMs can call as many times as the
number specified in the lbs_xattr_count field of the lsm_blob_sizes
structure. During each call, lsm_get_xattr_slot() increments the number of
filled xattrs, so that at the next invocation it returns the next xattr
slot to fill.
Cleanup security_inode_init_security(). Unify the !initxattrs and
initxattrs case by simply not allocating the new_xattrs array in the
former. Update the documentation to reflect the changes, and fix the
description of the xattr name, as it is not allocated anymore.
Adapt both SELinux and Smack to use the new definition of the
inode_init_security hook, and to call lsm_get_xattr_slot() to obtain and
fill the reserved slots in the xattr array.
Move the xattr->name assignment after the xattr->value one, so that it is
done only in case of successful memory allocation.
Finally, change the default return value of the inode_init_security hook
from zero to -EOPNOTSUPP, so that BPF LSM correctly follows the hook
conventions.
Reported-by: Nicolas Bouchinet <nicolas.bouchinet@clip-os.org>
Link: https://lore.kernel.org/linux-integrity/Y1FTSIo+1x+4X0LS@archlinux/
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: minor comment and variable tweaks, approved by RS]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-06-10 09:57:35 +02:00
xattr - > value = context ;
xattr - > value_len = clen ;
xattr - > name = XATTR_SELINUX_SUFFIX ;
2005-09-09 13:01:35 -07:00
}
return 0 ;
}
2021-01-08 14:22:22 -08:00
static int selinux_inode_init_security_anon ( struct inode * inode ,
const struct qstr * name ,
const struct inode * context_inode )
{
2024-03-15 18:31:00 +01:00
u32 sid = current_sid ( ) ;
2021-01-08 14:22:22 -08:00
struct common_audit_data ad ;
struct inode_security_struct * isec ;
int rc ;
2023-03-09 13:30:37 -05:00
if ( unlikely ( ! selinux_initialized ( ) ) )
2021-01-08 14:22:22 -08:00
return 0 ;
isec = selinux_inode ( inode ) ;
/*
* We only get here once per ephemeral inode . The inode has
* been initialized via inode_alloc_security but is otherwise
* untouched .
*/
if ( context_inode ) {
struct inode_security_struct * context_isec =
selinux_inode ( context_inode ) ;
if ( context_isec - > initialized ! = LABEL_INITIALIZED ) {
2023-07-18 21:00:24 +02:00
pr_err ( " SELinux: context_inode is not initialized \n " ) ;
2021-01-08 14:22:22 -08:00
return - EACCES ;
}
isec - > sclass = context_isec - > sclass ;
isec - > sid = context_isec - > sid ;
} else {
isec - > sclass = SECCLASS_ANON_INODE ;
rc = security_transition_sid (
2024-03-15 18:31:00 +01:00
sid , sid ,
2021-01-08 14:22:22 -08:00
isec - > sclass , name , & isec - > sid ) ;
if ( rc )
return rc ;
}
isec - > initialized = LABEL_INITIALIZED ;
/*
* Now that we ' ve initialized security , check whether we ' re
* allowed to actually create this type of anonymous inode .
*/
2022-03-08 18:09:26 +01:00
ad . type = LSM_AUDIT_DATA_ANONINODE ;
ad . u . anonclass = name ? ( const char * ) name - > name : " ? " ;
2021-01-08 14:22:22 -08:00
2024-03-15 18:31:00 +01:00
return avc_has_perm ( sid ,
2021-01-08 14:22:22 -08:00
isec - > sid ,
isec - > sclass ,
FILE__CREATE ,
& ad ) ;
}
2011-07-26 01:42:34 -04:00
static int selinux_inode_create ( struct inode * dir , struct dentry * dentry , umode_t mode )
2005-04-16 15:20:36 -07:00
{
return may_create ( dir , dentry , SECCLASS_FILE ) ;
}
static int selinux_inode_link ( struct dentry * old_dentry , struct inode * dir , struct dentry * new_dentry )
{
return may_link ( dir , old_dentry , MAY_LINK ) ;
}
static int selinux_inode_unlink ( struct inode * dir , struct dentry * dentry )
{
return may_link ( dir , dentry , MAY_UNLINK ) ;
}
static int selinux_inode_symlink ( struct inode * dir , struct dentry * dentry , const char * name )
{
return may_create ( dir , dentry , SECCLASS_LNK_FILE ) ;
}
2011-07-26 01:41:39 -04:00
static int selinux_inode_mkdir ( struct inode * dir , struct dentry * dentry , umode_t mask )
2005-04-16 15:20:36 -07:00
{
return may_create ( dir , dentry , SECCLASS_DIR ) ;
}
static int selinux_inode_rmdir ( struct inode * dir , struct dentry * dentry )
{
return may_link ( dir , dentry , MAY_RMDIR ) ;
}
2011-07-26 01:52:52 -04:00
static int selinux_inode_mknod ( struct inode * dir , struct dentry * dentry , umode_t mode , dev_t dev )
2005-04-16 15:20:36 -07:00
{
return may_create ( dir , dentry , inode_mode_to_security_class ( mode ) ) ;
}
static int selinux_inode_rename ( struct inode * old_inode , struct dentry * old_dentry ,
2008-04-17 13:17:49 -04:00
struct inode * new_inode , struct dentry * new_dentry )
2005-04-16 15:20:36 -07:00
{
return may_rename ( old_inode , old_dentry , new_inode , new_dentry ) ;
}
static int selinux_inode_readlink ( struct dentry * dentry )
{
2008-11-14 10:39:21 +11:00
const struct cred * cred = current_cred ( ) ;
2011-04-28 16:04:24 -04:00
return dentry_has_perm ( cred , dentry , FILE__READ ) ;
2005-04-16 15:20:36 -07:00
}
2015-03-23 13:37:39 +11:00
static int selinux_inode_follow_link ( struct dentry * dentry , struct inode * inode ,
bool rcu )
2005-04-16 15:20:36 -07:00
{
2015-03-23 13:37:39 +11:00
struct common_audit_data ad ;
struct inode_security_struct * isec ;
2024-03-15 18:31:00 +01:00
u32 sid = current_sid ( ) ;
2005-04-16 15:20:36 -07:00
2015-03-23 13:37:39 +11:00
ad . type = LSM_AUDIT_DATA_DENTRY ;
ad . u . dentry = dentry ;
2015-12-24 11:09:40 -05:00
isec = inode_security_rcu ( inode , rcu ) ;
if ( IS_ERR ( isec ) )
return PTR_ERR ( isec ) ;
2015-03-23 13:37:39 +11:00
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , isec - > sid , isec - > sclass , FILE__READ , & ad ) ;
2005-04-16 15:20:36 -07:00
}
2012-04-04 15:01:42 -04:00
static noinline int audit_inode_permission ( struct inode * inode ,
u32 perms , u32 audited , u32 denied ,
2019-11-22 12:22:45 -05:00
int result )
2005-04-16 15:20:36 -07:00
{
2010-07-23 11:44:03 -04:00
struct common_audit_data ad ;
2018-09-21 17:19:11 -07:00
struct inode_security_struct * isec = selinux_inode ( inode ) ;
2012-04-04 15:01:42 -04:00
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_INODE ;
2012-04-04 15:01:42 -04:00
ad . u . inode = inode ;
2023-03-09 13:30:37 -05:00
return slow_avc_audit ( current_sid ( ) , isec - > sid , isec - > sclass , perms ,
2019-11-22 12:22:45 -05:00
audited , denied , result , & ad ) ;
2012-04-04 15:01:42 -04:00
}
2011-06-20 19:38:15 -04:00
static int selinux_inode_permission ( struct inode * inode , int mask )
2005-04-16 15:20:36 -07:00
{
2010-07-23 11:44:03 -04:00
u32 perms ;
bool from_access ;
2019-11-22 16:16:56 -05:00
bool no_block = mask & MAY_NOT_BLOCK ;
2012-04-04 15:01:42 -04:00
struct inode_security_struct * isec ;
2024-03-15 18:31:00 +01:00
u32 sid = current_sid ( ) ;
2012-04-04 15:01:42 -04:00
struct av_decision avd ;
int rc , rc2 ;
u32 audited , denied ;
2005-04-16 15:20:36 -07:00
2010-07-23 11:44:03 -04:00
from_access = mask & MAY_ACCESS ;
2010-07-23 11:43:57 -04:00
mask & = ( MAY_READ | MAY_WRITE | MAY_EXEC | MAY_APPEND ) ;
2010-07-23 11:44:03 -04:00
/* No permission to check. Existence test. */
if ( ! mask )
2005-04-16 15:20:36 -07:00
return 0 ;
2012-04-04 15:01:42 -04:00
if ( unlikely ( IS_PRIVATE ( inode ) ) )
return 0 ;
2010-07-23 11:44:03 -04:00
perms = file_mask_to_av ( inode - > i_mode , mask ) ;
2019-11-22 16:16:56 -05:00
isec = inode_security_rcu ( inode , no_block ) ;
2015-12-24 11:09:40 -05:00
if ( IS_ERR ( isec ) )
return PTR_ERR ( isec ) ;
2012-04-04 15:01:42 -04:00
2023-03-09 13:30:37 -05:00
rc = avc_has_perm_noaudit ( sid , isec - > sid , isec - > sclass , perms , 0 ,
2018-12-12 10:10:55 -05:00
& avd ) ;
2012-04-04 15:01:42 -04:00
audited = avc_audit_required ( perms , & avd , rc ,
from_access ? FILE__AUDIT_ACCESS : 0 ,
& denied ) ;
if ( likely ( ! audited ) )
return rc ;
2019-11-22 12:22:45 -05:00
rc2 = audit_inode_permission ( inode , perms , audited , denied , rc ) ;
2012-04-04 15:01:42 -04:00
if ( rc2 )
return rc2 ;
return rc ;
2005-04-16 15:20:36 -07:00
}
2024-02-15 11:30:57 +01:00
static int selinux_inode_setattr ( struct mnt_idmap * idmap , struct dentry * dentry ,
struct iattr * iattr )
2005-04-16 15:20:36 -07:00
{
2008-11-14 10:39:21 +11:00
const struct cred * cred = current_cred ( ) ;
2017-05-12 12:41:24 -04:00
struct inode * inode = d_backing_inode ( dentry ) ;
2009-08-20 19:29:02 -07:00
unsigned int ia_valid = iattr - > ia_valid ;
2012-04-04 13:45:34 -04:00
__u32 av = FILE__WRITE ;
2005-04-16 15:20:36 -07:00
2009-08-20 19:29:02 -07:00
/* ATTR_FORCE is just used for ATTR_KILL_S[UG]ID. */
if ( ia_valid & ATTR_FORCE ) {
ia_valid & = ~ ( ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_MODE |
ATTR_FORCE ) ;
if ( ! ia_valid )
return 0 ;
}
2005-04-16 15:20:36 -07:00
2009-08-20 19:29:02 -07:00
if ( ia_valid & ( ATTR_MODE | ATTR_UID | ATTR_GID |
ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET ) )
2011-04-28 16:04:24 -04:00
return dentry_has_perm ( cred , dentry , FILE__SETATTR ) ;
2005-04-16 15:20:36 -07:00
2018-03-01 18:48:02 -05:00
if ( selinux_policycap_openperm ( ) & &
2017-05-12 12:41:24 -04:00
inode - > i_sb - > s_magic ! = SOCKFS_MAGIC & &
( ia_valid & ATTR_SIZE ) & &
! ( ia_valid & ATTR_FILE ) )
2012-04-04 13:45:34 -04:00
av | = FILE__OPEN ;
return dentry_has_perm ( cred , dentry , av ) ;
2005-04-16 15:20:36 -07:00
}
2015-03-08 19:28:30 -04:00
static int selinux_inode_getattr ( const struct path * path )
2005-04-16 15:20:36 -07:00
{
2015-03-08 19:28:30 -04:00
return path_has_perm ( current_cred ( ) , path , FILE__GETATTR ) ;
2005-04-16 15:20:36 -07:00
}
2017-04-20 11:31:30 -04:00
static bool has_cap_mac_admin ( bool audit )
{
const struct cred * cred = current_cred ( ) ;
2019-01-07 16:10:53 -08:00
unsigned int opts = audit ? CAP_OPT_NONE : CAP_OPT_NOAUDIT ;
2017-04-20 11:31:30 -04:00
2019-01-07 16:10:53 -08:00
if ( cap_capable ( cred , & init_user_ns , CAP_MAC_ADMIN , opts ) )
2017-04-20 11:31:30 -04:00
return false ;
2019-01-07 16:10:53 -08:00
if ( cred_has_capability ( cred , CAP_MAC_ADMIN , opts , true ) )
2017-04-20 11:31:30 -04:00
return false ;
return true ;
}
lsm: fixup the inode xattr capability handling
The current security_inode_setxattr() and security_inode_removexattr()
hooks rely on individual LSMs to either call into the associated
capability hooks (cap_inode_setxattr() or cap_inode_removexattr()), or
return a magic value of 1 to indicate that the LSM layer itself should
perform the capability checks. Unfortunately, with the default return
value for these LSM hooks being 0, an individual LSM hook returning a
1 will cause the LSM hook processing to exit early, potentially
skipping a LSM. Thankfully, with the exception of the BPF LSM, none
of the LSMs which currently register inode xattr hooks should end up
returning a value of 1, and in the BPF LSM case, with the BPF LSM hooks
executing last there should be no real harm in stopping processing of
the LSM hooks. However, the reliance on the individual LSMs to either
call the capability hooks themselves, or signal the LSM with a return
value of 1, is fragile and relies on a specific set of LSMs being
enabled. This patch is an effort to resolve, or minimize, these
issues.
Before we discuss the solution, there are a few observations and
considerations that we need to take into account:
* BPF LSM registers an implementation for every LSM hook, and that
implementation simply returns the hook's default return value, a
0 in this case. We want to ensure that the default BPF LSM behavior
results in the capability checks being called.
* SELinux and Smack do not expect the traditional capability checks
to be applied to the xattrs that they "own".
* SELinux and Smack are currently written in such a way that the
xattr capability checks happen before any additional LSM specific
access control checks. SELinux does apply SELinux specific access
controls to all xattrs, even those not "owned" by SELinux.
* IMA and EVM also register xattr hooks but assume that the LSM layer
and specific LSMs have already authorized the basic xattr operation.
In order to ensure we perform the capability based access controls
before the individual LSM access controls, perform only one capability
access control check for each operation, and clarify the logic around
applying the capability controls, we need a mechanism to determine if
any of the enabled LSMs "own" a particular xattr and want to take
responsibility for controlling access to that xattr. The solution in
this patch is to create a new LSM hook, 'inode_xattr_skipcap', that is
not exported to the rest of the kernel via a security_XXX() function,
but is used by the LSM layer to determine if a LSM wants to control
access to a given xattr and avoid the traditional capability controls.
Registering an inode_xattr_skipcap hook is optional, if a LSM declines
to register an implementation, or uses an implementation that simply
returns the default value (0), there is no effect as the LSM continues
to enforce the capability based controls (unless another LSM takes
ownership of the xattr). If none of the LSMs signal that the
capability checks should be skipped, the capability check is performed
and if access is granted the individual LSM xattr access control hooks
are executed, keeping with the DAC-before-LSM convention.
Cc: stable@vger.kernel.org
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2024-05-02 17:57:51 -04:00
/**
* selinux_inode_xattr_skipcap - Skip the xattr capability checks ?
* @ name : name of the xattr
*
* Returns 1 to indicate that SELinux " owns " the access control rights to xattrs
* named @ name ; the LSM layer should avoid enforcing any traditional
* capability based access controls on this xattr . Returns 0 to indicate that
* SELinux does not " own " the access control rights to xattrs named @ name and is
* deferring to the LSM layer for further access controls , including capability
* based controls .
*/
static int selinux_inode_xattr_skipcap ( const char * name )
{
/* require capability check if not a selinux xattr */
return ! strcmp ( name , XATTR_NAME_SELINUX ) ;
}
2023-01-13 12:49:23 +01:00
static int selinux_inode_setxattr ( struct mnt_idmap * idmap ,
2021-01-21 14:19:29 +01:00
struct dentry * dentry , const char * name ,
2008-04-29 00:59:41 -07:00
const void * value , size_t size , int flags )
2005-04-16 15:20:36 -07:00
{
2015-03-17 22:26:22 +00:00
struct inode * inode = d_backing_inode ( dentry ) ;
2016-04-04 14:14:42 -04:00
struct inode_security_struct * isec ;
2005-04-16 15:20:36 -07:00
struct superblock_security_struct * sbsec ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2008-11-14 10:39:19 +11:00
u32 newsid , sid = current_sid ( ) ;
2005-04-16 15:20:36 -07:00
int rc = 0 ;
lsm: fixup the inode xattr capability handling
The current security_inode_setxattr() and security_inode_removexattr()
hooks rely on individual LSMs to either call into the associated
capability hooks (cap_inode_setxattr() or cap_inode_removexattr()), or
return a magic value of 1 to indicate that the LSM layer itself should
perform the capability checks. Unfortunately, with the default return
value for these LSM hooks being 0, an individual LSM hook returning a
1 will cause the LSM hook processing to exit early, potentially
skipping a LSM. Thankfully, with the exception of the BPF LSM, none
of the LSMs which currently register inode xattr hooks should end up
returning a value of 1, and in the BPF LSM case, with the BPF LSM hooks
executing last there should be no real harm in stopping processing of
the LSM hooks. However, the reliance on the individual LSMs to either
call the capability hooks themselves, or signal the LSM with a return
value of 1, is fragile and relies on a specific set of LSMs being
enabled. This patch is an effort to resolve, or minimize, these
issues.
Before we discuss the solution, there are a few observations and
considerations that we need to take into account:
* BPF LSM registers an implementation for every LSM hook, and that
implementation simply returns the hook's default return value, a
0 in this case. We want to ensure that the default BPF LSM behavior
results in the capability checks being called.
* SELinux and Smack do not expect the traditional capability checks
to be applied to the xattrs that they "own".
* SELinux and Smack are currently written in such a way that the
xattr capability checks happen before any additional LSM specific
access control checks. SELinux does apply SELinux specific access
controls to all xattrs, even those not "owned" by SELinux.
* IMA and EVM also register xattr hooks but assume that the LSM layer
and specific LSMs have already authorized the basic xattr operation.
In order to ensure we perform the capability based access controls
before the individual LSM access controls, perform only one capability
access control check for each operation, and clarify the logic around
applying the capability controls, we need a mechanism to determine if
any of the enabled LSMs "own" a particular xattr and want to take
responsibility for controlling access to that xattr. The solution in
this patch is to create a new LSM hook, 'inode_xattr_skipcap', that is
not exported to the rest of the kernel via a security_XXX() function,
but is used by the LSM layer to determine if a LSM wants to control
access to a given xattr and avoid the traditional capability controls.
Registering an inode_xattr_skipcap hook is optional, if a LSM declines
to register an implementation, or uses an implementation that simply
returns the default value (0), there is no effect as the LSM continues
to enforce the capability based controls (unless another LSM takes
ownership of the xattr). If none of the LSMs signal that the
capability checks should be skipped, the capability check is performed
and if access is granted the individual LSM xattr access control hooks
are executed, keeping with the DAC-before-LSM convention.
Cc: stable@vger.kernel.org
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2024-05-02 17:57:51 -04:00
/* if not a selinux xattr, only check the ordinary setattr perm */
if ( strcmp ( name , XATTR_NAME_SELINUX ) )
2017-10-02 09:38:20 -05:00
return dentry_has_perm ( current_cred ( ) , dentry , FILE__SETATTR ) ;
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
if ( ! selinux_initialized ( ) )
2023-01-13 12:49:26 +01:00
return ( inode_owner_or_capable ( idmap , inode ) ? 0 : - EPERM ) ;
selinux: allow labeling before policy is loaded
Currently, the SELinux LSM prevents one from setting the
`security.selinux` xattr on an inode without a policy first being
loaded. However, this restriction is problematic: it makes it impossible
to have newly created files with the correct label before actually
loading the policy.
This is relevant in distributions like Fedora, where the policy is
loaded by systemd shortly after pivoting out of the initrd. In such
instances, all files created prior to pivoting will be unlabeled. One
then has to relabel them after pivoting, an operation which inherently
races with other processes trying to access those same files.
Going further, there are use cases for creating the entire root
filesystem on first boot from the initrd (e.g. Container Linux supports
this today[1], and we'd like to support it in Fedora CoreOS as well[2]).
One can imagine doing this in two ways: at the block device level (e.g.
laying down a disk image), or at the filesystem level. In the former,
labeling can simply be part of the image. But even in the latter
scenario, one still really wants to be able to set the right labels when
populating the new filesystem.
This patch enables this by changing behaviour in the following two ways:
1. allow `setxattr` if we're not initialized
2. don't try to set the in-core inode SID if we're not initialized;
instead leave it as `LABEL_INVALID` so that revalidation may be
attempted at a later time
Note the first hunk of this patch is mostly the same as a previously
discussed one[3], though it was part of a larger series which wasn't
accepted.
[1] https://coreos.com/os/docs/latest/root-filesystem-placement.html
[2] https://github.com/coreos/fedora-coreos-tracker/issues/94
[3] https://www.spinics.net/lists/linux-initramfs/msg04593.html
Co-developed-by: Victor Kamensky <kamensky@cisco.com>
Signed-off-by: Victor Kamensky <kamensky@cisco.com>
Signed-off-by: Jonathan Lebon <jlebon@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2019-09-12 09:30:07 -04:00
2021-04-22 17:41:15 +02:00
sbsec = selinux_superblock ( inode - > i_sb ) ;
2012-10-09 10:56:25 -04:00
if ( ! ( sbsec - > flags & SBLABEL_MNT ) )
2005-04-16 15:20:36 -07:00
return - EOPNOTSUPP ;
2023-01-13 12:49:26 +01:00
if ( ! inode_owner_or_capable ( idmap , inode ) )
2005-04-16 15:20:36 -07:00
return - EPERM ;
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_DENTRY ;
2011-04-25 13:10:27 -04:00
ad . u . dentry = dentry ;
2005-04-16 15:20:36 -07:00
2016-04-04 14:14:42 -04:00
isec = backing_inode_security ( dentry ) ;
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , isec - > sid , isec - > sclass ,
2005-04-16 15:20:36 -07:00
FILE__RELABELFROM , & ad ) ;
if ( rc )
return rc ;
2023-03-09 13:30:37 -05:00
rc = security_context_to_sid ( value , size , & newsid ,
2018-03-01 18:48:02 -05:00
GFP_KERNEL ) ;
2008-05-07 13:03:20 -04:00
if ( rc = = - EINVAL ) {
2017-04-20 11:31:30 -04:00
if ( ! has_cap_mac_admin ( true ) ) {
2012-04-04 13:45:49 -04:00
struct audit_buffer * ab ;
size_t audit_size ;
/* We strip a nul only if it is at the end, otherwise the
* context contains a nul and we should audit that */
selinux: fix selinux_inode_setxattr oops
OK, what we have so far is e.g.
setxattr(path, name, whatever, 0, XATTR_REPLACE)
with name being good enough to get through xattr_permission().
Then we reach security_inode_setxattr() with the desired value and size.
Aha. name should begin with "security.selinux", or we won't get that
far in selinux_inode_setxattr(). Suppose we got there and have enough
permissions to relabel that sucker. We call security_context_to_sid()
with value == NULL, size == 0. OK, we want ss_initialized to be non-zero.
I.e. after everything had been set up and running. No problem...
We do 1-byte kmalloc(), zero-length memcpy() (which doesn't oops, even
thought the source is NULL) and put a NUL there. I.e. form an empty
string. string_to_context_struct() is called and looks for the first
':' in there. Not found, -EINVAL we get. OK, security_context_to_sid_core()
has rc == -EINVAL, force == 0, so it silently returns -EINVAL.
All it takes now is not having CAP_MAC_ADMIN and we are fucked.
All right, it might be a different bug (modulo strange code quoted in the
report), but it's real. Easily fixed, AFAICS:
Deal with size == 0, value == NULL case in selinux_inode_setxattr()
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Tested-by: Dave Jones <davej@redhat.com>
Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
2012-06-09 08:15:16 +01:00
if ( value ) {
2017-10-14 13:46:55 +01:00
const char * str = value ;
selinux: fix selinux_inode_setxattr oops
OK, what we have so far is e.g.
setxattr(path, name, whatever, 0, XATTR_REPLACE)
with name being good enough to get through xattr_permission().
Then we reach security_inode_setxattr() with the desired value and size.
Aha. name should begin with "security.selinux", or we won't get that
far in selinux_inode_setxattr(). Suppose we got there and have enough
permissions to relabel that sucker. We call security_context_to_sid()
with value == NULL, size == 0. OK, we want ss_initialized to be non-zero.
I.e. after everything had been set up and running. No problem...
We do 1-byte kmalloc(), zero-length memcpy() (which doesn't oops, even
thought the source is NULL) and put a NUL there. I.e. form an empty
string. string_to_context_struct() is called and looks for the first
':' in there. Not found, -EINVAL we get. OK, security_context_to_sid_core()
has rc == -EINVAL, force == 0, so it silently returns -EINVAL.
All it takes now is not having CAP_MAC_ADMIN and we are fucked.
All right, it might be a different bug (modulo strange code quoted in the
report), but it's real. Easily fixed, AFAICS:
Deal with size == 0, value == NULL case in selinux_inode_setxattr()
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Tested-by: Dave Jones <davej@redhat.com>
Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
2012-06-09 08:15:16 +01:00
if ( str [ size - 1 ] = = ' \0 ' )
audit_size = size - 1 ;
else
audit_size = size ;
} else {
audit_size = 0 ;
}
2018-05-12 21:58:20 -04:00
ab = audit_log_start ( audit_context ( ) ,
GFP_ATOMIC , AUDIT_SELINUX_ERR ) ;
2021-07-14 01:11:27 +01:00
if ( ! ab )
return rc ;
2012-04-04 13:45:49 -04:00
audit_log_format ( ab , " op=setxattr invalid_context= " ) ;
audit_log_n_untrustedstring ( ab , value , audit_size ) ;
audit_log_end ( ab ) ;
2008-05-07 13:03:20 -04:00
return rc ;
2012-04-04 13:45:49 -04:00
}
2023-03-09 13:30:37 -05:00
rc = security_context_to_sid_force ( value ,
2018-03-01 18:48:02 -05:00
size , & newsid ) ;
2008-05-07 13:03:20 -04:00
}
2005-04-16 15:20:36 -07:00
if ( rc )
return rc ;
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , newsid , isec - > sclass ,
2005-04-16 15:20:36 -07:00
FILE__RELABELTO , & ad ) ;
if ( rc )
return rc ;
2023-03-09 13:30:37 -05:00
rc = security_validate_transition ( isec - > sid , newsid ,
2018-03-01 18:48:02 -05:00
sid , isec - > sclass ) ;
2005-04-16 15:20:36 -07:00
if ( rc )
return rc ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( newsid ,
2005-04-16 15:20:36 -07:00
sbsec - > sid ,
SECCLASS_FILESYSTEM ,
FILESYSTEM__ASSOCIATE ,
& ad ) ;
}
2023-01-13 12:49:24 +01:00
static int selinux_inode_set_acl ( struct mnt_idmap * idmap ,
2022-09-22 17:17:08 +02:00
struct dentry * dentry , const char * acl_name ,
struct posix_acl * kacl )
{
return dentry_has_perm ( current_cred ( ) , dentry , FILE__SETATTR ) ;
}
2023-01-13 12:49:24 +01:00
static int selinux_inode_get_acl ( struct mnt_idmap * idmap ,
2022-09-22 17:17:08 +02:00
struct dentry * dentry , const char * acl_name )
{
return dentry_has_perm ( current_cred ( ) , dentry , FILE__GETATTR ) ;
}
2023-01-13 12:49:24 +01:00
static int selinux_inode_remove_acl ( struct mnt_idmap * idmap ,
2022-09-22 17:17:08 +02:00
struct dentry * dentry , const char * acl_name )
{
return dentry_has_perm ( current_cred ( ) , dentry , FILE__SETATTR ) ;
}
2008-04-29 00:59:41 -07:00
static void selinux_inode_post_setxattr ( struct dentry * dentry , const char * name ,
2008-05-14 11:27:45 -04:00
const void * value , size_t size ,
2008-04-29 00:59:41 -07:00
int flags )
2005-04-16 15:20:36 -07:00
{
2015-03-17 22:26:22 +00:00
struct inode * inode = d_backing_inode ( dentry ) ;
2016-04-04 14:14:42 -04:00
struct inode_security_struct * isec ;
2005-04-16 15:20:36 -07:00
u32 newsid ;
int rc ;
if ( strcmp ( name , XATTR_NAME_SELINUX ) ) {
/* Not an attribute we recognize, so nothing to do. */
return ;
}
2023-03-09 13:30:37 -05:00
if ( ! selinux_initialized ( ) ) {
selinux: allow labeling before policy is loaded
Currently, the SELinux LSM prevents one from setting the
`security.selinux` xattr on an inode without a policy first being
loaded. However, this restriction is problematic: it makes it impossible
to have newly created files with the correct label before actually
loading the policy.
This is relevant in distributions like Fedora, where the policy is
loaded by systemd shortly after pivoting out of the initrd. In such
instances, all files created prior to pivoting will be unlabeled. One
then has to relabel them after pivoting, an operation which inherently
races with other processes trying to access those same files.
Going further, there are use cases for creating the entire root
filesystem on first boot from the initrd (e.g. Container Linux supports
this today[1], and we'd like to support it in Fedora CoreOS as well[2]).
One can imagine doing this in two ways: at the block device level (e.g.
laying down a disk image), or at the filesystem level. In the former,
labeling can simply be part of the image. But even in the latter
scenario, one still really wants to be able to set the right labels when
populating the new filesystem.
This patch enables this by changing behaviour in the following two ways:
1. allow `setxattr` if we're not initialized
2. don't try to set the in-core inode SID if we're not initialized;
instead leave it as `LABEL_INVALID` so that revalidation may be
attempted at a later time
Note the first hunk of this patch is mostly the same as a previously
discussed one[3], though it was part of a larger series which wasn't
accepted.
[1] https://coreos.com/os/docs/latest/root-filesystem-placement.html
[2] https://github.com/coreos/fedora-coreos-tracker/issues/94
[3] https://www.spinics.net/lists/linux-initramfs/msg04593.html
Co-developed-by: Victor Kamensky <kamensky@cisco.com>
Signed-off-by: Victor Kamensky <kamensky@cisco.com>
Signed-off-by: Jonathan Lebon <jlebon@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2019-09-12 09:30:07 -04:00
/* If we haven't even been initialized, then we can't validate
* against a policy , so leave the label as invalid . It may
* resolve to a valid label on the next revalidation try if
* we ' ve since initialized .
*/
return ;
}
2023-03-09 13:30:37 -05:00
rc = security_context_to_sid_force ( value , size ,
2018-03-01 18:48:02 -05:00
& newsid ) ;
2005-04-16 15:20:36 -07:00
if ( rc ) {
2018-06-12 10:09:03 +02:00
pr_err ( " SELinux: unable to map context to SID "
2008-05-07 13:03:20 -04:00
" for (%s, %lu), rc=%d \n " ,
inode - > i_sb - > s_id , inode - > i_ino , - rc ) ;
2005-04-16 15:20:36 -07:00
return ;
}
2016-04-04 14:14:42 -04:00
isec = backing_inode_security ( dentry ) ;
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
spin_lock ( & isec - > lock ) ;
2013-05-22 12:50:44 -04:00
isec - > sclass = inode_mode_to_security_class ( inode - > i_mode ) ;
2005-04-16 15:20:36 -07:00
isec - > sid = newsid ;
2015-12-24 11:09:40 -05:00
isec - > initialized = LABEL_INITIALIZED ;
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
spin_unlock ( & isec - > lock ) ;
2005-04-16 15:20:36 -07:00
}
2008-04-29 00:59:41 -07:00
static int selinux_inode_getxattr ( struct dentry * dentry , const char * name )
2005-04-16 15:20:36 -07:00
{
2008-11-14 10:39:21 +11:00
const struct cred * cred = current_cred ( ) ;
2011-04-28 16:04:24 -04:00
return dentry_has_perm ( cred , dentry , FILE__GETATTR ) ;
2005-04-16 15:20:36 -07:00
}
2008-04-17 13:17:49 -04:00
static int selinux_inode_listxattr ( struct dentry * dentry )
2005-04-16 15:20:36 -07:00
{
2008-11-14 10:39:21 +11:00
const struct cred * cred = current_cred ( ) ;
2011-04-28 16:04:24 -04:00
return dentry_has_perm ( cred , dentry , FILE__GETATTR ) ;
2005-04-16 15:20:36 -07:00
}
2023-01-13 12:49:23 +01:00
static int selinux_inode_removexattr ( struct mnt_idmap * idmap ,
2021-01-21 14:19:29 +01:00
struct dentry * dentry , const char * name )
2005-04-16 15:20:36 -07:00
{
2024-07-03 17:00:20 -04:00
/* if not a selinux xattr, only check the ordinary setattr perm */
if ( strcmp ( name , XATTR_NAME_SELINUX ) )
2017-10-02 09:38:20 -05:00
return dentry_has_perm ( current_cred ( ) , dentry , FILE__SETATTR ) ;
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
if ( ! selinux_initialized ( ) )
2020-08-20 13:00:40 -04:00
return 0 ;
2005-04-16 15:20:36 -07:00
/* No one is allowed to remove a SELinux security label.
You can change the label , but all data must be labeled . */
return - EACCES ;
}
fanotify, inotify, dnotify, security: add security hook for fs notifications
As of now, setting watches on filesystem objects has, at most, applied a
check for read access to the inode, and in the case of fanotify, requires
CAP_SYS_ADMIN. No specific security hook or permission check has been
provided to control the setting of watches. Using any of inotify, dnotify,
or fanotify, it is possible to observe, not only write-like operations, but
even read access to a file. Modeling the watch as being merely a read from
the file is insufficient for the needs of SELinux. This is due to the fact
that read access should not necessarily imply access to information about
when another process reads from a file. Furthermore, fanotify watches grant
more power to an application in the form of permission events. While
notification events are solely, unidirectional (i.e. they only pass
information to the receiving application), permission events are blocking.
Permission events make a request to the receiving application which will
then reply with a decision as to whether or not that action may be
completed. This causes the issue of the watching application having the
ability to exercise control over the triggering process. Without drawing a
distinction within the permission check, the ability to read would imply
the greater ability to control an application. Additionally, mount and
superblock watches apply to all files within the same mount or superblock.
Read access to one file should not necessarily imply the ability to watch
all files accessed within a given mount or superblock.
In order to solve these issues, a new LSM hook is implemented and has been
placed within the system calls for marking filesystem objects with inotify,
fanotify, and dnotify watches. These calls to the hook are placed at the
point at which the target path has been resolved and are provided with the
path struct, the mask of requested notification events, and the type of
object on which the mark is being set (inode, superblock, or mount). The
mask and obj_type have already been translated into common FS_* values
shared by the entirety of the fs notification infrastructure. The path
struct is passed rather than just the inode so that the mount is available,
particularly for mount watches. This also allows for use of the hook by
pathname-based security modules. However, since the hook is intended for
use even by inode based security modules, it is not placed under the
CONFIG_SECURITY_PATH conditional. Otherwise, the inode-based security
modules would need to enable all of the path hooks, even though they do not
use any of them.
This only provides a hook at the point of setting a watch, and presumes
that permission to set a particular watch implies the ability to receive
all notification about that object which match the mask. This is all that
is required for SELinux. If other security modules require additional hooks
or infrastructure to control delivery of notification, these can be added
by them. It does not make sense for us to propose hooks for which we have
no implementation. The understanding that all notifications received by the
requesting application are all strictly of a type for which the application
has been granted permission shows that this implementation is sufficient in
its coverage.
Security modules wishing to provide complete control over fanotify must
also implement a security_file_open hook that validates that the access
requested by the watching application is authorized. Fanotify has the issue
that it returns a file descriptor with the file mode specified during
fanotify_init() to the watching process on event. This is already covered
by the LSM security_file_open hook if the security module implements
checking of the requested file mode there. Otherwise, a watching process
can obtain escalated access to a file for which it has not been authorized.
The selinux_path_notify hook implementation works by adding five new file
permissions: watch, watch_mount, watch_sb, watch_reads, and watch_with_perm
(descriptions about which will follow), and one new filesystem permission:
watch (which is applied to superblock checks). The hook then decides which
subset of these permissions must be held by the requesting application
based on the contents of the provided mask and the obj_type. The
selinux_file_open hook already checks the requested file mode and therefore
ensures that a watching process cannot escalate its access through
fanotify.
The watch, watch_mount, and watch_sb permissions are the baseline
permissions for setting a watch on an object and each are a requirement for
any watch to be set on a file, mount, or superblock respectively. It should
be noted that having either of the other two permissions (watch_reads and
watch_with_perm) does not imply the watch, watch_mount, or watch_sb
permission. Superblock watches further require the filesystem watch
permission to the superblock. As there is no labeled object in view for
mounts, there is no specific check for mount watches beyond watch_mount to
the inode. Such a check could be added in the future, if a suitable labeled
object existed representing the mount.
The watch_reads permission is required to receive notifications from
read-exclusive events on filesystem objects. These events include accessing
a file for the purpose of reading and closing a file which has been opened
read-only. This distinction has been drawn in order to provide a direct
indication in the policy for this otherwise not obvious capability. Read
access to a file should not necessarily imply the ability to observe read
events on a file.
Finally, watch_with_perm only applies to fanotify masks since it is the
only way to set a mask which allows for the blocking, permission event.
This permission is needed for any watch which is of this type. Though
fanotify requires CAP_SYS_ADMIN, this is insufficient as it gives implicit
trust to root, which we do not do, and does not support least privilege.
Signed-off-by: Aaron Goidel <acgoide@tycho.nsa.gov>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2019-08-12 11:20:00 -04:00
static int selinux_path_notify ( const struct path * path , u64 mask ,
unsigned int obj_type )
{
int ret ;
u32 perm ;
struct common_audit_data ad ;
ad . type = LSM_AUDIT_DATA_PATH ;
ad . u . path = * path ;
/*
* Set permission needed based on the type of mark being set .
* Performs an additional check for sb watches .
*/
switch ( obj_type ) {
case FSNOTIFY_OBJ_TYPE_VFSMOUNT :
perm = FILE__WATCH_MOUNT ;
break ;
case FSNOTIFY_OBJ_TYPE_SB :
perm = FILE__WATCH_SB ;
ret = superblock_has_perm ( current_cred ( ) , path - > dentry - > d_sb ,
FILESYSTEM__WATCH , & ad ) ;
if ( ret )
return ret ;
break ;
case FSNOTIFY_OBJ_TYPE_INODE :
perm = FILE__WATCH ;
break ;
default :
return - EINVAL ;
}
/* blocking watches require the file:watch_with_perm permission */
if ( mask & ( ALL_FSNOTIFY_PERM_EVENTS ) )
perm | = FILE__WATCH_WITH_PERM ;
/* watches on read-like events need the file:watch_reads permission */
if ( mask & ( FS_ACCESS | FS_ACCESS_PERM | FS_CLOSE_NOWRITE ) )
perm | = FILE__WATCH_READS ;
return path_has_perm ( current_cred ( ) , path , perm ) ;
}
2005-10-30 14:59:22 -08:00
/*
2008-05-21 14:16:12 -04:00
* Copy the inode security context value to the user .
2005-10-30 14:59:22 -08:00
*
* Permission check is handled by selinux_inode_getxattr hook .
*/
2023-01-13 12:49:22 +01:00
static int selinux_inode_getsecurity ( struct mnt_idmap * idmap ,
2021-01-21 14:19:29 +01:00
struct inode * inode , const char * name ,
void * * buffer , bool alloc )
2005-04-16 15:20:36 -07:00
{
2008-02-04 22:29:39 -08:00
u32 size ;
int error ;
char * context = NULL ;
2016-04-04 14:14:42 -04:00
struct inode_security_struct * isec ;
2005-10-30 14:59:22 -08:00
2020-05-28 10:39:40 -04:00
/*
* If we ' re not initialized yet , then we can ' t validate contexts , so
* just let vfs_getxattr fall back to using the on - disk xattr .
*/
2023-03-09 13:30:37 -05:00
if ( ! selinux_initialized ( ) | |
2020-05-28 10:39:40 -04:00
strcmp ( name , XATTR_SELINUX_SUFFIX ) )
2005-11-03 17:15:16 +00:00
return - EOPNOTSUPP ;
2005-10-30 14:59:22 -08:00
2008-05-21 14:16:12 -04:00
/*
* If the caller has CAP_MAC_ADMIN , then get the raw context
* value even if it is not defined by current policy ; otherwise ,
* use the in - core value under current policy .
* Use the non - auditing forms of the permission checks since
* getxattr may be called by unprivileged processes commonly
* and lack of permission just means that we fall back to the
* in - core context value , not a denial .
*/
2016-04-04 14:14:42 -04:00
isec = inode_security ( inode ) ;
2017-04-20 11:31:30 -04:00
if ( has_cap_mac_admin ( false ) )
2023-03-09 13:30:37 -05:00
error = security_sid_to_context_force ( isec - > sid , & context ,
2008-05-21 14:16:12 -04:00
& size ) ;
else
2023-03-09 13:30:37 -05:00
error = security_sid_to_context ( isec - > sid ,
2018-03-01 18:48:02 -05:00
& context , & size ) ;
2008-02-04 22:29:39 -08:00
if ( error )
return error ;
error = size ;
if ( alloc ) {
* buffer = context ;
goto out_nofree ;
}
kfree ( context ) ;
out_nofree :
return error ;
2005-04-16 15:20:36 -07:00
}
static int selinux_inode_setsecurity ( struct inode * inode , const char * name ,
2008-04-17 13:17:49 -04:00
const void * value , size_t size , int flags )
2005-04-16 15:20:36 -07:00
{
2016-04-19 16:36:28 -04:00
struct inode_security_struct * isec = inode_security_novalidate ( inode ) ;
2021-04-22 17:41:15 +02:00
struct superblock_security_struct * sbsec ;
2005-04-16 15:20:36 -07:00
u32 newsid ;
int rc ;
if ( strcmp ( name , XATTR_SELINUX_SUFFIX ) )
return - EOPNOTSUPP ;
2021-04-22 17:41:15 +02:00
sbsec = selinux_superblock ( inode - > i_sb ) ;
2018-12-21 21:18:53 +01:00
if ( ! ( sbsec - > flags & SBLABEL_MNT ) )
return - EOPNOTSUPP ;
2005-04-16 15:20:36 -07:00
if ( ! value | | ! size )
return - EACCES ;
2023-03-09 13:30:37 -05:00
rc = security_context_to_sid ( value , size , & newsid ,
2018-03-01 18:48:02 -05:00
GFP_KERNEL ) ;
2005-04-16 15:20:36 -07:00
if ( rc )
return rc ;
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
spin_lock ( & isec - > lock ) ;
2013-05-22 12:50:44 -04:00
isec - > sclass = inode_mode_to_security_class ( inode - > i_mode ) ;
2005-04-16 15:20:36 -07:00
isec - > sid = newsid ;
2015-12-24 11:09:40 -05:00
isec - > initialized = LABEL_INITIALIZED ;
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
spin_unlock ( & isec - > lock ) ;
2005-04-16 15:20:36 -07:00
return 0 ;
}
static int selinux_inode_listsecurity ( struct inode * inode , char * buffer , size_t buffer_size )
{
const int len = sizeof ( XATTR_NAME_SELINUX ) ;
2020-12-19 12:05:27 +02:00
2023-03-09 13:30:37 -05:00
if ( ! selinux_initialized ( ) )
2020-12-19 12:05:27 +02:00
return 0 ;
2005-04-16 15:20:36 -07:00
if ( buffer & & len < = buffer_size )
memcpy ( buffer , XATTR_NAME_SELINUX , len ) ;
return len ;
}
2024-10-09 10:32:16 -07:00
static void selinux_inode_getlsmprop ( struct inode * inode , struct lsm_prop * prop )
2008-03-01 21:52:30 +02:00
{
2016-02-18 12:04:08 +01:00
struct inode_security_struct * isec = inode_security_novalidate ( inode ) ;
2024-10-09 10:32:16 -07:00
prop - > selinux . secid = isec - > sid ;
2008-03-01 21:52:30 +02:00
}
2016-07-13 10:44:48 -04:00
static int selinux_inode_copy_up ( struct dentry * src , struct cred * * new )
{
2024-10-09 10:32:16 -07:00
struct lsm_prop prop ;
2016-07-13 10:44:48 -04:00
struct task_security_struct * tsec ;
struct cred * new_creds = * new ;
if ( new_creds = = NULL ) {
new_creds = prepare_creds ( ) ;
if ( ! new_creds )
return - ENOMEM ;
}
2018-09-21 17:17:16 -07:00
tsec = selinux_cred ( new_creds ) ;
2016-07-13 10:44:48 -04:00
/* Get label from overlay inode and set it in create_sid */
2024-10-09 10:32:16 -07:00
selinux_inode_getlsmprop ( d_inode ( src ) , & prop ) ;
tsec - > create_sid = prop . selinux . secid ;
2016-07-13 10:44:48 -04:00
* new = new_creds ;
return 0 ;
}
2024-02-23 12:25:05 -05:00
static int selinux_inode_copy_up_xattr ( struct dentry * dentry , const char * name )
2016-07-13 10:44:50 -04:00
{
/* The copy_up hook above sets the initial context on an inode, but we
* don ' t then want to overwrite it by blindly copying all the lower
2024-02-02 17:40:48 +11:00
* xattrs up . Instead , filter out SELinux - related xattrs following
* policy load .
2016-07-13 10:44:50 -04:00
*/
2024-07-24 10:06:59 +08:00
if ( selinux_initialized ( ) & & ! strcmp ( name , XATTR_NAME_SELINUX ) )
return - ECANCELED ; /* Discard */
2016-07-13 10:44:50 -04:00
/*
* Any other attribute apart from SELINUX is not claimed , supported
* by selinux .
*/
return - EOPNOTSUPP ;
}
2019-02-22 15:57:17 +01:00
/* kernfs node operations */
2019-03-22 22:04:00 +08:00
static int selinux_kernfs_init_security ( struct kernfs_node * kn_dir ,
struct kernfs_node * kn )
2019-02-22 15:57:17 +01:00
{
2019-09-04 10:32:48 -04:00
const struct task_security_struct * tsec = selinux_cred ( current_cred ( ) ) ;
2019-02-22 15:57:17 +01:00
u32 parent_sid , newsid , clen ;
int rc ;
char * context ;
2019-04-03 09:29:41 +02:00
rc = kernfs_xattr_get ( kn_dir , XATTR_NAME_SELINUX , NULL , 0 ) ;
2019-02-22 15:57:17 +01:00
if ( rc = = - ENODATA )
return 0 ;
else if ( rc < 0 )
return rc ;
clen = ( u32 ) rc ;
context = kmalloc ( clen , GFP_KERNEL ) ;
if ( ! context )
return - ENOMEM ;
2019-04-03 09:29:41 +02:00
rc = kernfs_xattr_get ( kn_dir , XATTR_NAME_SELINUX , context , clen ) ;
2019-02-22 15:57:17 +01:00
if ( rc < 0 ) {
kfree ( context ) ;
return rc ;
}
2023-03-09 13:30:37 -05:00
rc = security_context_to_sid ( context , clen , & parent_sid ,
2019-02-22 15:57:17 +01:00
GFP_KERNEL ) ;
kfree ( context ) ;
if ( rc )
return rc ;
if ( tsec - > create_sid ) {
newsid = tsec - > create_sid ;
} else {
u16 secclass = inode_mode_to_security_class ( kn - > mode ) ;
struct qstr q ;
q . name = kn - > name ;
q . hash_len = hashlen_string ( kn_dir , kn - > name ) ;
2023-03-09 13:30:37 -05:00
rc = security_transition_sid ( tsec - > sid ,
2019-02-22 15:57:17 +01:00
parent_sid , secclass , & q ,
& newsid ) ;
if ( rc )
return rc ;
}
2023-03-09 13:30:37 -05:00
rc = security_sid_to_context_force ( newsid ,
2019-02-22 15:57:17 +01:00
& context , & clen ) ;
if ( rc )
return rc ;
2019-04-03 09:29:41 +02:00
rc = kernfs_xattr_set ( kn , XATTR_NAME_SELINUX , context , clen ,
XATTR_CREATE ) ;
2019-02-22 15:57:17 +01:00
kfree ( context ) ;
return rc ;
}
2005-04-16 15:20:36 -07:00
/* file security operations */
2007-09-14 09:27:07 +09:00
static int selinux_revalidate_file_permission ( struct file * file , int mask )
2005-04-16 15:20:36 -07:00
{
2008-11-14 10:39:21 +11:00
const struct cred * cred = current_cred ( ) ;
2013-01-23 17:07:38 -05:00
struct inode * inode = file_inode ( file ) ;
2005-04-16 15:20:36 -07:00
/* file_mask_to_av won't add FILE__WRITE if MAY_APPEND is set */
if ( ( file - > f_flags & O_APPEND ) & & ( mask & MAY_WRITE ) )
mask | = MAY_APPEND ;
2009-03-27 17:10:34 -04:00
return file_has_perm ( cred , file ,
file_mask_to_av ( inode - > i_mode , mask ) ) ;
2005-04-16 15:20:36 -07:00
}
2007-09-14 09:27:07 +09:00
static int selinux_file_permission ( struct file * file , int mask )
{
2013-01-23 17:07:38 -05:00
struct inode * inode = file_inode ( file ) ;
2018-09-21 17:22:32 -07:00
struct file_security_struct * fsec = selinux_file ( file ) ;
2016-01-05 23:12:33 +01:00
struct inode_security_struct * isec ;
2009-06-22 14:54:53 -04:00
u32 sid = current_sid ( ) ;
2009-03-27 17:10:34 -04:00
if ( ! mask )
2007-09-14 09:27:07 +09:00
/* No permission to check. Existence test. */
return 0 ;
2016-01-05 23:12:33 +01:00
isec = inode_security ( inode ) ;
2009-06-22 14:54:53 -04:00
if ( sid = = fsec - > sid & & fsec - > isid = = isec - > sid & &
2023-03-09 13:30:37 -05:00
fsec - > pseqno = = avc_policy_seqno ( ) )
2012-04-04 13:45:40 -04:00
/* No change since file_open check. */
2009-06-22 14:54:53 -04:00
return 0 ;
2007-09-14 09:27:07 +09:00
return selinux_revalidate_file_permission ( file , mask ) ;
}
2005-04-16 15:20:36 -07:00
static int selinux_file_alloc_security ( struct file * file )
{
2020-01-10 16:32:10 -05:00
struct file_security_struct * fsec = selinux_file ( file ) ;
u32 sid = current_sid ( ) ;
fsec - > sid = sid ;
fsec - > fown_sid = sid ;
return 0 ;
2005-04-16 15:20:36 -07:00
}
2015-07-10 17:19:56 -04:00
/*
* Check whether a task has the ioctl permission and cmd
* operation to an inode .
*/
2015-10-21 17:44:27 -04:00
static int ioctl_has_perm ( const struct cred * cred , struct file * file ,
2015-07-10 17:19:56 -04:00
u32 requested , u16 cmd )
{
struct common_audit_data ad ;
2018-09-21 17:22:32 -07:00
struct file_security_struct * fsec = selinux_file ( file ) ;
2015-07-10 17:19:56 -04:00
struct inode * inode = file_inode ( file ) ;
2016-04-04 14:14:42 -04:00
struct inode_security_struct * isec ;
2015-07-10 17:19:56 -04:00
struct lsm_ioctlop_audit ioctl ;
u32 ssid = cred_sid ( cred ) ;
int rc ;
u8 driver = cmd > > 8 ;
u8 xperm = cmd & 0xff ;
ad . type = LSM_AUDIT_DATA_IOCTL_OP ;
ad . u . op = & ioctl ;
ad . u . op - > cmd = cmd ;
ad . u . op - > path = file - > f_path ;
if ( ssid ! = fsec - > sid ) {
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( ssid , fsec - > sid ,
2015-07-10 17:19:56 -04:00
SECCLASS_FD ,
FD__USE ,
& ad ) ;
if ( rc )
goto out ;
}
if ( unlikely ( IS_PRIVATE ( inode ) ) )
return 0 ;
2016-04-04 14:14:42 -04:00
isec = inode_security ( inode ) ;
2023-03-09 13:30:37 -05:00
rc = avc_has_extended_perms ( ssid , isec - > sid , isec - > sclass ,
2018-03-05 11:47:56 -05:00
requested , driver , xperm , & ad ) ;
2015-07-10 17:19:56 -04:00
out :
return rc ;
}
2005-04-16 15:20:36 -07:00
static int selinux_file_ioctl ( struct file * file , unsigned int cmd ,
unsigned long arg )
{
2008-11-14 10:39:21 +11:00
const struct cred * cred = current_cred ( ) ;
2011-02-25 15:39:20 -05:00
int error = 0 ;
2005-04-16 15:20:36 -07:00
2011-02-25 15:39:20 -05:00
switch ( cmd ) {
case FIONREAD :
case FIBMAP :
case FIGETBSZ :
2012-03-23 16:04:05 -04:00
case FS_IOC_GETFLAGS :
case FS_IOC_GETVERSION :
2011-02-25 15:39:20 -05:00
error = file_has_perm ( cred , file , FILE__GETATTR ) ;
break ;
2005-04-16 15:20:36 -07:00
2012-03-23 16:04:05 -04:00
case FS_IOC_SETFLAGS :
case FS_IOC_SETVERSION :
2011-02-25 15:39:20 -05:00
error = file_has_perm ( cred , file , FILE__SETATTR ) ;
break ;
/* sys_ioctl() checks */
case FIONBIO :
case FIOASYNC :
error = file_has_perm ( cred , file , 0 ) ;
break ;
2005-04-16 15:20:36 -07:00
2011-02-25 15:39:20 -05:00
case KDSKBENT :
case KDSKBSENT :
2012-01-03 12:25:14 -05:00
error = cred_has_capability ( cred , CAP_SYS_TTY_CONFIG ,
2019-01-07 16:10:53 -08:00
CAP_OPT_NONE , true ) ;
2011-02-25 15:39:20 -05:00
break ;
2022-02-25 17:54:38 +00:00
case FIOCLEX :
case FIONCLEX :
if ( ! selinux_policycap_ioctl_skip_cloexec ( ) )
error = ioctl_has_perm ( cred , file , FILE__IOCTL , ( u16 ) cmd ) ;
break ;
2011-02-25 15:39:20 -05:00
/* default case assumes that the command will go
* to the file ' s ioctl ( ) function .
*/
default :
2015-07-10 17:19:56 -04:00
error = ioctl_has_perm ( cred , file , FILE__IOCTL , ( u16 ) cmd ) ;
2011-02-25 15:39:20 -05:00
}
return error ;
2005-04-16 15:20:36 -07:00
}
lsm: new security_file_ioctl_compat() hook
Some ioctl commands do not require ioctl permission, but are routed to
other permissions such as FILE_GETATTR or FILE_SETATTR. This routing is
done by comparing the ioctl cmd to a set of 64-bit flags (FS_IOC_*).
However, if a 32-bit process is running on a 64-bit kernel, it emits
32-bit flags (FS_IOC32_*) for certain ioctl operations. These flags are
being checked erroneously, which leads to these ioctl operations being
routed to the ioctl permission, rather than the correct file
permissions.
This was also noted in a RED-PEN finding from a while back -
"/* RED-PEN how should LSM module know it's handling 32bit? */".
This patch introduces a new hook, security_file_ioctl_compat(), that is
called from the compat ioctl syscall. All current LSMs have been changed
to support this hook.
Reviewing the three places where we are currently using
security_file_ioctl(), it appears that only SELinux needs a dedicated
compat change; TOMOYO and SMACK appear to be functional without any
change.
Cc: stable@vger.kernel.org
Fixes: 0b24dcb7f2f7 ("Revert "selinux: simplify ioctl checking"")
Signed-off-by: Alfred Piccioni <alpic@google.com>
Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com>
[PM: subject tweak, line length fixes, and alignment corrections]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-12-19 10:09:09 +01:00
static int selinux_file_ioctl_compat ( struct file * file , unsigned int cmd ,
unsigned long arg )
{
/*
* If we are in a 64 - bit kernel running 32 - bit userspace , we need to
* make sure we don ' t compare 32 - bit flags to 64 - bit flags .
*/
switch ( cmd ) {
case FS_IOC32_GETFLAGS :
cmd = FS_IOC_GETFLAGS ;
break ;
case FS_IOC32_SETFLAGS :
cmd = FS_IOC_SETFLAGS ;
break ;
case FS_IOC32_GETVERSION :
cmd = FS_IOC_GETVERSION ;
break ;
case FS_IOC32_SETVERSION :
cmd = FS_IOC_SETVERSION ;
break ;
default :
break ;
}
return selinux_file_ioctl ( file , cmd , arg ) ;
}
2020-01-08 12:23:56 -05:00
static int default_noexec __ro_after_init ;
selinux: generalize disabling of execmem for plt-in-heap archs
On Tue, 2010-04-27 at 11:47 -0700, David Miller wrote:
> From: "Tom \"spot\" Callaway" <tcallawa@redhat.com>
> Date: Tue, 27 Apr 2010 14:20:21 -0400
>
> > [root@apollo ~]$ cat /proc/2174/maps
> > 00010000-00014000 r-xp 00000000 fd:00 15466577
> > /sbin/mingetty
> > 00022000-00024000 rwxp 00002000 fd:00 15466577
> > /sbin/mingetty
> > 00024000-00046000 rwxp 00000000 00:00 0
> > [heap]
>
> SELINUX probably barfs on the executable heap, the PLT is in the HEAP
> just like powerpc32 and that's why VM_DATA_DEFAULT_FLAGS has to set
> both executable and writable.
>
> You also can't remove the CONFIG_PPC32 ifdefs in selinux, since
> because of the VM_DATA_DEFAULT_FLAGS setting used still in that arch,
> the heap will always have executable permission, just like sparc does.
> You have to support those binaries forever, whether you like it or not.
>
> Let's just replace the CONFIG_PPC32 ifdef in SELINUX with CONFIG_PPC32
> || CONFIG_SPARC as in Tom's original patch and let's be done with
> this.
>
> In fact I would go through all the arch/ header files and check the
> VM_DATA_DEFAULT_FLAGS settings and add the necessary new ifdefs to the
> SELINUX code so that other platforms don't have the pain of having to
> go through this process too.
To avoid maintaining per-arch ifdefs, it seems that we could just
directly use (VM_DATA_DEFAULT_FLAGS & VM_EXEC) as the basis for deciding
whether to enable or disable these checks. VM_DATA_DEFAULT_FLAGS isn't
constant on some architectures but instead depends on
current->personality, but we want this applied uniformly. So we'll just
use the initial task state to determine whether or not to enable these
checks.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: James Morris <jmorris@namei.org>
2010-04-28 15:57:57 -04:00
2005-04-16 15:20:36 -07:00
static int file_map_prot_check ( struct file * file , unsigned long prot , int shared )
{
2008-11-14 10:39:21 +11:00
const struct cred * cred = current_cred ( ) ;
2017-01-09 10:07:31 -05:00
u32 sid = cred_sid ( cred ) ;
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
int rc = 0 ;
2008-11-14 10:39:21 +11:00
selinux: generalize disabling of execmem for plt-in-heap archs
On Tue, 2010-04-27 at 11:47 -0700, David Miller wrote:
> From: "Tom \"spot\" Callaway" <tcallawa@redhat.com>
> Date: Tue, 27 Apr 2010 14:20:21 -0400
>
> > [root@apollo ~]$ cat /proc/2174/maps
> > 00010000-00014000 r-xp 00000000 fd:00 15466577
> > /sbin/mingetty
> > 00022000-00024000 rwxp 00002000 fd:00 15466577
> > /sbin/mingetty
> > 00024000-00046000 rwxp 00000000 00:00 0
> > [heap]
>
> SELINUX probably barfs on the executable heap, the PLT is in the HEAP
> just like powerpc32 and that's why VM_DATA_DEFAULT_FLAGS has to set
> both executable and writable.
>
> You also can't remove the CONFIG_PPC32 ifdefs in selinux, since
> because of the VM_DATA_DEFAULT_FLAGS setting used still in that arch,
> the heap will always have executable permission, just like sparc does.
> You have to support those binaries forever, whether you like it or not.
>
> Let's just replace the CONFIG_PPC32 ifdef in SELINUX with CONFIG_PPC32
> || CONFIG_SPARC as in Tom's original patch and let's be done with
> this.
>
> In fact I would go through all the arch/ header files and check the
> VM_DATA_DEFAULT_FLAGS settings and add the necessary new ifdefs to the
> SELINUX code so that other platforms don't have the pain of having to
> go through this process too.
To avoid maintaining per-arch ifdefs, it seems that we could just
directly use (VM_DATA_DEFAULT_FLAGS & VM_EXEC) as the basis for deciding
whether to enable or disable these checks. VM_DATA_DEFAULT_FLAGS isn't
constant on some architectures but instead depends on
current->personality, but we want this applied uniformly. So we'll just
use the initial task state to determine whether or not to enable these
checks.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: James Morris <jmorris@namei.org>
2010-04-28 15:57:57 -04:00
if ( default_noexec & &
2015-07-10 09:40:59 -04:00
( prot & PROT_EXEC ) & & ( ! file | | IS_PRIVATE ( file_inode ( file ) ) | |
( ! shared & & ( prot & PROT_WRITE ) ) ) ) {
2005-04-16 15:20:36 -07:00
/*
* We are making executable an anonymous mapping or a
* private file mapping that will also be writable .
* This has an additional check .
*/
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , sid , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__EXECMEM , NULL ) ;
2005-04-16 15:20:36 -07:00
if ( rc )
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
goto error ;
2005-04-16 15:20:36 -07:00
}
if ( file ) {
/* read access is always possible with a mapping */
u32 av = FILE__READ ;
/* write access only matters if the mapping is shared */
if ( shared & & ( prot & PROT_WRITE ) )
av | = FILE__WRITE ;
if ( prot & PROT_EXEC )
av | = FILE__EXECUTE ;
2008-11-14 10:39:21 +11:00
return file_has_perm ( cred , file , av ) ;
2005-04-16 15:20:36 -07:00
}
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
error :
return rc ;
2005-04-16 15:20:36 -07:00
}
2012-05-30 13:30:51 -04:00
static int selinux_mmap_addr ( unsigned long addr )
2005-04-16 15:20:36 -07:00
{
2015-05-02 15:11:42 -07:00
int rc = 0 ;
2005-04-16 15:20:36 -07:00
2009-07-31 12:54:11 -04:00
if ( addr < CONFIG_LSM_MMAP_MIN_ADDR ) {
2014-03-19 16:46:11 -04:00
u32 sid = current_sid ( ) ;
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , sid , SECCLASS_MEMPROTECT ,
2007-06-28 15:55:21 -04:00
MEMPROTECT__MMAP_ZERO , NULL ) ;
2009-07-31 12:54:05 -04:00
}
2014-03-19 16:46:11 -04:00
return rc ;
2012-05-30 13:30:51 -04:00
}
2005-04-16 15:20:36 -07:00
selinux: remove the 'checkreqprot' functionality
We originally promised that the SELinux 'checkreqprot' functionality
would be removed no sooner than June 2021, and now that it is March
2023 it seems like it is a good time to do the final removal. The
deprecation notice in the kernel provides plenty of detail on why
'checkreqprot' is not desirable, with the key point repeated below:
This was a compatibility mechanism for legacy userspace and
for the READ_IMPLIES_EXEC personality flag. However, if set to
1, it weakens security by allowing mappings to be made executable
without authorization by policy. The default value of checkreqprot
at boot was changed starting in Linux v4.4 to 0 (i.e. check the
actual protection), and Android and Linux distributions have been
explicitly writing a "0" to /sys/fs/selinux/checkreqprot during
initialization for some time.
Along with the official deprecation notice, we have been discussing
this on-list and directly with several of the larger SELinux-based
distros and everyone is happy to see this feature finally removed.
In an attempt to catch all of the smaller, and DIY, Linux systems
we have been writing a deprecation notice URL into the kernel log,
along with a growing ssleep() penalty, when admins enabled
checkreqprot at runtime or via the kernel command line. We have
yet to have anyone come to us and raise an objection to the
deprecation or planned removal.
It is worth noting that while this patch removes the checkreqprot
functionality, it leaves the user visible interfaces (kernel command
line and selinuxfs file) intact, just inert. This should help
prevent breakages with existing userspace tools that correctly, but
unnecessarily, disable checkreqprot at boot or runtime. Admins
that attempt to enable checkreqprot will be met with a removal
message in the kernel log.
Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-03-16 11:43:08 -04:00
static int selinux_mmap_file ( struct file * file ,
unsigned long reqprot __always_unused ,
2012-05-30 13:30:51 -04:00
unsigned long prot , unsigned long flags )
{
2017-05-05 09:14:48 -04:00
struct common_audit_data ad ;
int rc ;
if ( file ) {
ad . type = LSM_AUDIT_DATA_FILE ;
ad . u . file = file ;
rc = inode_has_perm ( current_cred ( ) , file_inode ( file ) ,
FILE__MAP , & ad ) ;
if ( rc )
return rc ;
}
2005-04-16 15:20:36 -07:00
return file_map_prot_check ( file , prot ,
( flags & MAP_TYPE ) = = MAP_SHARED ) ;
}
static int selinux_file_mprotect ( struct vm_area_struct * vma ,
selinux: remove the 'checkreqprot' functionality
We originally promised that the SELinux 'checkreqprot' functionality
would be removed no sooner than June 2021, and now that it is March
2023 it seems like it is a good time to do the final removal. The
deprecation notice in the kernel provides plenty of detail on why
'checkreqprot' is not desirable, with the key point repeated below:
This was a compatibility mechanism for legacy userspace and
for the READ_IMPLIES_EXEC personality flag. However, if set to
1, it weakens security by allowing mappings to be made executable
without authorization by policy. The default value of checkreqprot
at boot was changed starting in Linux v4.4 to 0 (i.e. check the
actual protection), and Android and Linux distributions have been
explicitly writing a "0" to /sys/fs/selinux/checkreqprot during
initialization for some time.
Along with the official deprecation notice, we have been discussing
this on-list and directly with several of the larger SELinux-based
distros and everyone is happy to see this feature finally removed.
In an attempt to catch all of the smaller, and DIY, Linux systems
we have been writing a deprecation notice URL into the kernel log,
along with a growing ssleep() penalty, when admins enabled
checkreqprot at runtime or via the kernel command line. We have
yet to have anyone come to us and raise an objection to the
deprecation or planned removal.
It is worth noting that while this patch removes the checkreqprot
functionality, it leaves the user visible interfaces (kernel command
line and selinuxfs file) intact, just inert. This should help
prevent breakages with existing userspace tools that correctly, but
unnecessarily, disable checkreqprot at boot or runtime. Admins
that attempt to enable checkreqprot will be met with a removal
message in the kernel log.
Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-03-16 11:43:08 -04:00
unsigned long reqprot __always_unused ,
2005-04-16 15:20:36 -07:00
unsigned long prot )
{
2008-11-14 10:39:21 +11:00
const struct cred * cred = current_cred ( ) ;
2017-01-09 10:07:31 -05:00
u32 sid = cred_sid ( cred ) ;
2005-04-16 15:20:36 -07:00
selinux: generalize disabling of execmem for plt-in-heap archs
On Tue, 2010-04-27 at 11:47 -0700, David Miller wrote:
> From: "Tom \"spot\" Callaway" <tcallawa@redhat.com>
> Date: Tue, 27 Apr 2010 14:20:21 -0400
>
> > [root@apollo ~]$ cat /proc/2174/maps
> > 00010000-00014000 r-xp 00000000 fd:00 15466577
> > /sbin/mingetty
> > 00022000-00024000 rwxp 00002000 fd:00 15466577
> > /sbin/mingetty
> > 00024000-00046000 rwxp 00000000 00:00 0
> > [heap]
>
> SELINUX probably barfs on the executable heap, the PLT is in the HEAP
> just like powerpc32 and that's why VM_DATA_DEFAULT_FLAGS has to set
> both executable and writable.
>
> You also can't remove the CONFIG_PPC32 ifdefs in selinux, since
> because of the VM_DATA_DEFAULT_FLAGS setting used still in that arch,
> the heap will always have executable permission, just like sparc does.
> You have to support those binaries forever, whether you like it or not.
>
> Let's just replace the CONFIG_PPC32 ifdef in SELINUX with CONFIG_PPC32
> || CONFIG_SPARC as in Tom's original patch and let's be done with
> this.
>
> In fact I would go through all the arch/ header files and check the
> VM_DATA_DEFAULT_FLAGS settings and add the necessary new ifdefs to the
> SELINUX code so that other platforms don't have the pain of having to
> go through this process too.
To avoid maintaining per-arch ifdefs, it seems that we could just
directly use (VM_DATA_DEFAULT_FLAGS & VM_EXEC) as the basis for deciding
whether to enable or disable these checks. VM_DATA_DEFAULT_FLAGS isn't
constant on some architectures but instead depends on
current->personality, but we want this applied uniformly. So we'll just
use the initial task state to determine whether or not to enable these
checks.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: James Morris <jmorris@namei.org>
2010-04-28 15:57:57 -04:00
if ( default_noexec & &
( prot & PROT_EXEC ) & & ! ( vma - > vm_flags & VM_EXEC ) ) {
2009-01-29 12:19:51 +11:00
int rc = 0 ;
2024-08-08 11:57:38 -04:00
/*
* We don ' t use the vma_is_initial_heap ( ) helper as it has
* a history of problems and is currently broken on systems
* where there is no heap , e . g . brk = = start_brk . Before
* replacing the conditional below with vma_is_initial_heap ( ) ,
* or something similar , please ensure that the logic is the
* same as what we have below or you have tested every possible
* corner case you can think to test .
*/
if ( vma - > vm_start > = vma - > vm_mm - > start_brk & &
vma - > vm_end < = vma - > vm_mm - > brk ) {
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , sid , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__EXECHEAP , NULL ) ;
2023-07-28 13:00:42 +08:00
} else if ( ! vma - > vm_file & & ( vma_is_initial_stack ( vma ) | |
2016-09-30 10:58:58 -07:00
vma_is_stack_for_current ( vma ) ) ) {
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , sid , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__EXECSTACK , NULL ) ;
2006-02-01 03:05:54 -08:00
} else if ( vma - > vm_file & & vma - > anon_vma ) {
/*
* We are making executable a file mapping that has
* had some COW done . Since pages might have been
* written , check ability to execute the possibly
* modified content . This typically should only
* occur for text relocations .
*/
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
rc = file_has_perm ( cred , vma - > vm_file , FILE__EXECMOD ) ;
2006-02-01 03:05:54 -08:00
}
2005-06-25 14:54:34 -07:00
if ( rc )
return rc ;
}
2005-04-16 15:20:36 -07:00
return file_map_prot_check ( vma - > vm_file , prot , vma - > vm_flags & VM_SHARED ) ;
}
static int selinux_file_lock ( struct file * file , unsigned int cmd )
{
2008-11-14 10:39:21 +11:00
const struct cred * cred = current_cred ( ) ;
return file_has_perm ( cred , file , FILE__LOCK ) ;
2005-04-16 15:20:36 -07:00
}
static int selinux_file_fcntl ( struct file * file , unsigned int cmd ,
unsigned long arg )
{
2008-11-14 10:39:21 +11:00
const struct cred * cred = current_cred ( ) ;
2005-04-16 15:20:36 -07:00
int err = 0 ;
switch ( cmd ) {
2008-04-17 13:17:49 -04:00
case F_SETFL :
if ( ( file - > f_flags & O_APPEND ) & & ! ( arg & O_APPEND ) ) {
2008-11-14 10:39:21 +11:00
err = file_has_perm ( cred , file , FILE__WRITE ) ;
2005-04-16 15:20:36 -07:00
break ;
2008-04-17 13:17:49 -04:00
}
2020-08-23 17:36:59 -05:00
fallthrough ;
2008-04-17 13:17:49 -04:00
case F_SETOWN :
case F_SETSIG :
case F_GETFL :
case F_GETOWN :
case F_GETSIG :
2012-07-30 14:43:00 -07:00
case F_GETOWNER_UIDS :
2008-04-17 13:17:49 -04:00
/* Just check FD__USE permission */
2008-11-14 10:39:21 +11:00
err = file_has_perm ( cred , file , 0 ) ;
2008-04-17 13:17:49 -04:00
break ;
case F_GETLK :
case F_SETLK :
case F_SETLKW :
2014-04-22 08:23:58 -04:00
case F_OFD_GETLK :
case F_OFD_SETLK :
case F_OFD_SETLKW :
2005-04-16 15:20:36 -07:00
# if BITS_PER_LONG == 32
2008-04-17 13:17:49 -04:00
case F_GETLK64 :
case F_SETLK64 :
case F_SETLKW64 :
2005-04-16 15:20:36 -07:00
# endif
2008-11-14 10:39:21 +11:00
err = file_has_perm ( cred , file , FILE__LOCK ) ;
2008-04-17 13:17:49 -04:00
break ;
2005-04-16 15:20:36 -07:00
}
return err ;
}
2014-08-22 11:27:32 -04:00
static void selinux_file_set_fowner ( struct file * file )
2005-04-16 15:20:36 -07:00
{
struct file_security_struct * fsec ;
2018-09-21 17:22:32 -07:00
fsec = selinux_file ( file ) ;
2008-11-14 10:39:19 +11:00
fsec - > fown_sid = current_sid ( ) ;
2005-04-16 15:20:36 -07:00
}
static int selinux_file_send_sigiotask ( struct task_struct * tsk ,
struct fown_struct * fown , int signum )
{
2008-04-17 13:17:49 -04:00
struct file * file ;
2021-02-18 15:13:40 -05:00
u32 sid = task_sid_obj ( tsk ) ;
2005-04-16 15:20:36 -07:00
u32 perm ;
struct file_security_struct * fsec ;
/* struct fown_struct is never outside the context of a struct file */
2024-08-09 18:00:01 +02:00
file = fown - > file ;
2005-04-16 15:20:36 -07:00
2018-09-21 17:22:32 -07:00
fsec = selinux_file ( file ) ;
2005-04-16 15:20:36 -07:00
if ( ! signum )
perm = signal_to_av ( SIGIO ) ; /* as per send_sigio_to_task */
else
perm = signal_to_av ( signum ) ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( fsec - > fown_sid , sid ,
2005-04-16 15:20:36 -07:00
SECCLASS_PROCESS , perm , NULL ) ;
}
static int selinux_file_receive ( struct file * file )
{
2008-11-14 10:39:21 +11:00
const struct cred * cred = current_cred ( ) ;
return file_has_perm ( cred , file , file_to_av ( file ) ) ;
2005-04-16 15:20:36 -07:00
}
2018-07-10 14:13:18 -04:00
static int selinux_file_open ( struct file * file )
2007-09-14 09:27:07 +09:00
{
struct file_security_struct * fsec ;
struct inode_security_struct * isec ;
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
2018-09-21 17:22:32 -07:00
fsec = selinux_file ( file ) ;
2015-12-24 11:09:39 -05:00
isec = inode_security ( file_inode ( file ) ) ;
2007-09-14 09:27:07 +09:00
/*
* Save inode label and policy sequence number
* at open - time so that selinux_file_permission
* can determine whether revalidation is necessary .
* Task label is already saved in the file security
* struct as its SID .
*/
fsec - > isid = isec - > sid ;
2023-03-09 13:30:37 -05:00
fsec - > pseqno = avc_policy_seqno ( ) ;
2007-09-14 09:27:07 +09:00
/*
* Since the inode label or policy seqno may have changed
* between the selinux_inode_permission check and the saving
* of state above , recheck that access is still permitted .
* Otherwise , access might never be revalidated against the
* new inode label or new policy .
* This check is not redundant - do not remove .
*/
2018-07-10 14:13:18 -04:00
return file_path_has_perm ( file - > f_cred , file , open_file_to_av ( file ) ) ;
2007-09-14 09:27:07 +09:00
}
2005-04-16 15:20:36 -07:00
/* task security operations */
2017-03-28 23:08:45 +09:00
static int selinux_task_alloc ( struct task_struct * task ,
unsigned long clone_flags )
2005-04-16 15:20:36 -07:00
{
2017-01-09 10:07:31 -05:00
u32 sid = current_sid ( ) ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , sid , SECCLASS_PROCESS , PROCESS__FORK , NULL ) ;
2005-04-16 15:20:36 -07:00
}
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
/*
* prepare a new set of credentials for modification
*/
static int selinux_cred_prepare ( struct cred * new , const struct cred * old ,
gfp_t gfp )
{
2018-11-12 09:30:56 -08:00
const struct task_security_struct * old_tsec = selinux_cred ( old ) ;
struct task_security_struct * tsec = selinux_cred ( new ) ;
2005-04-16 15:20:36 -07:00
2018-11-12 09:30:56 -08:00
* tsec = * old_tsec ;
2005-04-16 15:20:36 -07:00
return 0 ;
}
KEYS: Add a keyctl to install a process's session keyring on its parent [try #6]
Add a keyctl to install a process's session keyring onto its parent. This
replaces the parent's session keyring. Because the COW credential code does
not permit one process to change another process's credentials directly, the
change is deferred until userspace next starts executing again. Normally this
will be after a wait*() syscall.
To support this, three new security hooks have been provided:
cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in
the blank security creds and key_session_to_parent() - which asks the LSM if
the process may replace its parent's session keyring.
The replacement may only happen if the process has the same ownership details
as its parent, and the process has LINK permission on the session keyring, and
the session keyring is owned by the process, and the LSM permits it.
Note that this requires alteration to each architecture's notify_resume path.
This has been done for all arches barring blackfin, m68k* and xtensa, all of
which need assembly alteration to support TIF_NOTIFY_RESUME. This allows the
replacement to be performed at the point the parent process resumes userspace
execution.
This allows the userspace AFS pioctl emulation to fully emulate newpag() and
the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to
alter the parent process's PAG membership. However, since kAFS doesn't use
PAGs per se, but rather dumps the keys into the session keyring, the session
keyring of the parent must be replaced if, for example, VIOCSETTOK is passed
the newpag flag.
This can be tested with the following program:
#include <stdio.h>
#include <stdlib.h>
#include <keyutils.h>
#define KEYCTL_SESSION_TO_PARENT 18
#define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0)
int main(int argc, char **argv)
{
key_serial_t keyring, key;
long ret;
keyring = keyctl_join_session_keyring(argv[1]);
OSERROR(keyring, "keyctl_join_session_keyring");
key = add_key("user", "a", "b", 1, keyring);
OSERROR(key, "add_key");
ret = keyctl(KEYCTL_SESSION_TO_PARENT);
OSERROR(ret, "KEYCTL_SESSION_TO_PARENT");
return 0;
}
Compiled and linked with -lkeyutils, you should see something like:
[dhowells@andromeda ~]$ keyctl show
Session Keyring
-3 --alswrv 4043 4043 keyring: _ses
355907932 --alswrv 4043 -1 \_ keyring: _uid.4043
[dhowells@andromeda ~]$ /tmp/newpag
[dhowells@andromeda ~]$ keyctl show
Session Keyring
-3 --alswrv 4043 4043 keyring: _ses
1055658746 --alswrv 4043 4043 \_ user: a
[dhowells@andromeda ~]$ /tmp/newpag hello
[dhowells@andromeda ~]$ keyctl show
Session Keyring
-3 --alswrv 4043 4043 keyring: hello
340417692 --alswrv 4043 4043 \_ user: a
Where the test program creates a new session keyring, sticks a user key named
'a' into it and then installs it on its parent.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
2009-09-02 09:14:21 +01:00
/*
* transfer the SELinux data to a blank set of creds
*/
static void selinux_cred_transfer ( struct cred * new , const struct cred * old )
{
2018-09-21 17:17:16 -07:00
const struct task_security_struct * old_tsec = selinux_cred ( old ) ;
struct task_security_struct * tsec = selinux_cred ( new ) ;
KEYS: Add a keyctl to install a process's session keyring on its parent [try #6]
Add a keyctl to install a process's session keyring onto its parent. This
replaces the parent's session keyring. Because the COW credential code does
not permit one process to change another process's credentials directly, the
change is deferred until userspace next starts executing again. Normally this
will be after a wait*() syscall.
To support this, three new security hooks have been provided:
cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in
the blank security creds and key_session_to_parent() - which asks the LSM if
the process may replace its parent's session keyring.
The replacement may only happen if the process has the same ownership details
as its parent, and the process has LINK permission on the session keyring, and
the session keyring is owned by the process, and the LSM permits it.
Note that this requires alteration to each architecture's notify_resume path.
This has been done for all arches barring blackfin, m68k* and xtensa, all of
which need assembly alteration to support TIF_NOTIFY_RESUME. This allows the
replacement to be performed at the point the parent process resumes userspace
execution.
This allows the userspace AFS pioctl emulation to fully emulate newpag() and
the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to
alter the parent process's PAG membership. However, since kAFS doesn't use
PAGs per se, but rather dumps the keys into the session keyring, the session
keyring of the parent must be replaced if, for example, VIOCSETTOK is passed
the newpag flag.
This can be tested with the following program:
#include <stdio.h>
#include <stdlib.h>
#include <keyutils.h>
#define KEYCTL_SESSION_TO_PARENT 18
#define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0)
int main(int argc, char **argv)
{
key_serial_t keyring, key;
long ret;
keyring = keyctl_join_session_keyring(argv[1]);
OSERROR(keyring, "keyctl_join_session_keyring");
key = add_key("user", "a", "b", 1, keyring);
OSERROR(key, "add_key");
ret = keyctl(KEYCTL_SESSION_TO_PARENT);
OSERROR(ret, "KEYCTL_SESSION_TO_PARENT");
return 0;
}
Compiled and linked with -lkeyutils, you should see something like:
[dhowells@andromeda ~]$ keyctl show
Session Keyring
-3 --alswrv 4043 4043 keyring: _ses
355907932 --alswrv 4043 -1 \_ keyring: _uid.4043
[dhowells@andromeda ~]$ /tmp/newpag
[dhowells@andromeda ~]$ keyctl show
Session Keyring
-3 --alswrv 4043 4043 keyring: _ses
1055658746 --alswrv 4043 4043 \_ user: a
[dhowells@andromeda ~]$ /tmp/newpag hello
[dhowells@andromeda ~]$ keyctl show
Session Keyring
-3 --alswrv 4043 4043 keyring: hello
340417692 --alswrv 4043 4043 \_ user: a
Where the test program creates a new session keyring, sticks a user key named
'a' into it and then installs it on its parent.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
2009-09-02 09:14:21 +01:00
* tsec = * old_tsec ;
}
2018-01-08 13:36:19 -08:00
static void selinux_cred_getsecid ( const struct cred * c , u32 * secid )
{
* secid = cred_sid ( c ) ;
}
2024-10-09 10:32:18 -07:00
static void selinux_cred_getlsmprop ( const struct cred * c , struct lsm_prop * prop )
{
prop - > selinux . secid = cred_sid ( c ) ;
}
2008-11-14 10:39:28 +11:00
/*
* set the security data for a kernel service
* - all the creation contexts are set to unlabelled
*/
static int selinux_kernel_act_as ( struct cred * new , u32 secid )
{
2018-09-21 17:17:16 -07:00
struct task_security_struct * tsec = selinux_cred ( new ) ;
2008-11-14 10:39:28 +11:00
u32 sid = current_sid ( ) ;
int ret ;
2023-03-09 13:30:37 -05:00
ret = avc_has_perm ( sid , secid ,
2008-11-14 10:39:28 +11:00
SECCLASS_KERNEL_SERVICE ,
KERNEL_SERVICE__USE_AS_OVERRIDE ,
NULL ) ;
if ( ret = = 0 ) {
tsec - > sid = secid ;
tsec - > create_sid = 0 ;
tsec - > keycreate_sid = 0 ;
tsec - > sockcreate_sid = 0 ;
}
return ret ;
}
/*
* set the file creation context in a security record to the same as the
* objective context of the specified inode
*/
static int selinux_kernel_create_files_as ( struct cred * new , struct inode * inode )
{
2015-12-24 11:09:39 -05:00
struct inode_security_struct * isec = inode_security ( inode ) ;
2018-09-21 17:17:16 -07:00
struct task_security_struct * tsec = selinux_cred ( new ) ;
2008-11-14 10:39:28 +11:00
u32 sid = current_sid ( ) ;
int ret ;
2023-03-09 13:30:37 -05:00
ret = avc_has_perm ( sid , isec - > sid ,
2008-11-14 10:39:28 +11:00
SECCLASS_KERNEL_SERVICE ,
KERNEL_SERVICE__CREATE_FILES_AS ,
NULL ) ;
if ( ret = = 0 )
tsec - > create_sid = isec - > sid ;
2010-02-26 01:56:16 +00:00
return ret ;
2008-11-14 10:39:28 +11:00
}
2009-11-03 16:35:32 +11:00
static int selinux_kernel_module_request ( char * kmod_name )
2009-08-13 09:45:03 -04:00
{
2009-11-03 16:35:32 +11:00
struct common_audit_data ad ;
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_KMOD ;
2009-11-03 16:35:32 +11:00
ad . u . kmod_name = kmod_name ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , SECINITSID_KERNEL , SECCLASS_SYSTEM ,
2009-11-03 16:35:32 +11:00
SYSTEM__MODULE_REQUEST , & ad ) ;
2009-08-13 09:45:03 -04:00
}
2016-04-05 13:06:27 -07:00
static int selinux_kernel_module_from_file ( struct file * file )
{
struct common_audit_data ad ;
struct inode_security_struct * isec ;
struct file_security_struct * fsec ;
u32 sid = current_sid ( ) ;
int rc ;
/* init_module */
if ( file = = NULL )
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , sid , SECCLASS_SYSTEM ,
2016-04-05 13:06:27 -07:00
SYSTEM__MODULE_LOAD , NULL ) ;
/* finit_module */
2016-04-04 14:14:42 -04:00
2016-09-09 11:37:49 -04:00
ad . type = LSM_AUDIT_DATA_FILE ;
ad . u . file = file ;
2016-04-05 13:06:27 -07:00
2018-09-21 17:22:32 -07:00
fsec = selinux_file ( file ) ;
2016-04-05 13:06:27 -07:00
if ( sid ! = fsec - > sid ) {
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , fsec - > sid , SECCLASS_FD , FD__USE , & ad ) ;
2016-04-05 13:06:27 -07:00
if ( rc )
return rc ;
}
2016-04-04 14:14:42 -04:00
isec = inode_security ( file_inode ( file ) ) ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , isec - > sid , SECCLASS_SYSTEM ,
2016-04-05 13:06:27 -07:00
SYSTEM__MODULE_LOAD , & ad ) ;
}
static int selinux_kernel_read_file ( struct file * file ,
2020-10-02 10:38:23 -07:00
enum kernel_read_file_id id ,
bool contents )
2016-04-05 13:06:27 -07:00
{
int rc = 0 ;
switch ( id ) {
case READING_MODULE :
2020-10-02 10:38:23 -07:00
rc = selinux_kernel_module_from_file ( contents ? file : NULL ) ;
2016-04-05 13:06:27 -07:00
break ;
default :
break ;
}
return rc ;
}
2020-10-02 10:38:20 -07:00
static int selinux_kernel_load_data ( enum kernel_load_data_id id , bool contents )
2018-07-13 14:06:02 -04:00
{
int rc = 0 ;
switch ( id ) {
case LOADING_MODULE :
rc = selinux_kernel_module_from_file ( NULL ) ;
2020-11-20 12:32:26 -06:00
break ;
2018-07-13 14:06:02 -04:00
default :
break ;
}
return rc ;
}
2005-04-16 15:20:36 -07:00
static int selinux_task_setpgid ( struct task_struct * p , pid_t pgid )
{
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , task_sid_obj ( p ) , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__SETPGID , NULL ) ;
2005-04-16 15:20:36 -07:00
}
static int selinux_task_getpgid ( struct task_struct * p )
{
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , task_sid_obj ( p ) , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__GETPGID , NULL ) ;
2005-04-16 15:20:36 -07:00
}
static int selinux_task_getsid ( struct task_struct * p )
{
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , task_sid_obj ( p ) , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__GETSESSION , NULL ) ;
2005-04-16 15:20:36 -07:00
}
2024-10-09 10:32:15 -07:00
static void selinux_current_getlsmprop_subj ( struct lsm_prop * prop )
2021-02-18 15:13:40 -05:00
{
2024-10-09 10:32:15 -07:00
prop - > selinux . secid = current_sid ( ) ;
2021-02-18 15:13:40 -05:00
}
2024-10-09 10:32:15 -07:00
static void selinux_task_getlsmprop_obj ( struct task_struct * p ,
struct lsm_prop * prop )
2006-06-30 01:55:46 -07:00
{
2024-10-09 10:32:15 -07:00
prop - > selinux . secid = task_sid_obj ( p ) ;
2006-06-30 01:55:46 -07:00
}
2005-04-16 15:20:36 -07:00
static int selinux_task_setnice ( struct task_struct * p , int nice )
{
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , task_sid_obj ( p ) , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__SETSCHED , NULL ) ;
2005-04-16 15:20:36 -07:00
}
2006-06-23 02:03:58 -07:00
static int selinux_task_setioprio ( struct task_struct * p , int ioprio )
{
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , task_sid_obj ( p ) , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__SETSCHED , NULL ) ;
2006-06-23 02:03:58 -07:00
}
2006-06-30 01:55:49 -07:00
static int selinux_task_getioprio ( struct task_struct * p )
{
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , task_sid_obj ( p ) , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__GETSCHED , NULL ) ;
2006-06-30 01:55:49 -07:00
}
2017-10-04 20:32:18 +02:00
static int selinux_task_prlimit ( const struct cred * cred , const struct cred * tcred ,
unsigned int flags )
prlimit,security,selinux: add a security hook for prlimit
When SELinux was first added to the kernel, a process could only get
and set its own resource limits via getrlimit(2) and setrlimit(2), so no
MAC checks were required for those operations, and thus no security hooks
were defined for them. Later, SELinux introduced a hook for setlimit(2)
with a check if the hard limit was being changed in order to be able to
rely on the hard limit value as a safe reset point upon context
transitions.
Later on, when prlimit(2) was added to the kernel with the ability to get
or set resource limits (hard or soft) of another process, LSM/SELinux was
not updated other than to pass the target process to the setrlimit hook.
This resulted in incomplete control over both getting and setting the
resource limits of another process.
Add a new security_task_prlimit() hook to the check_prlimit_permission()
function to provide complete mediation. The hook is only called when
acting on another task, and only if the existing DAC/capability checks
would allow access. Pass flags down to the hook to indicate whether the
prlimit(2) call will read, write, or both read and write the resource
limits of the target process.
The existing security_task_setrlimit() hook is left alone; it continues
to serve a purpose in supporting the ability to make decisions based on
the old and/or new resource limit values when setting limits. This
is consistent with the DAC/capability logic, where
check_prlimit_permission() performs generic DAC/capability checks for
acting on another task, while do_prlimit() performs a capability check
based on a comparison of the old and new resource limits. Fix the
inline documentation for the hook to match the code.
Implement the new hook for SELinux. For setting resource limits, we
reuse the existing setrlimit permission. Note that this does overload
the setrlimit permission to mean the ability to set the resource limit
(soft or hard) of another process or the ability to change one's own
hard limit. For getting resource limits, a new getrlimit permission
is defined. This was not originally defined since getrlimit(2) could
only be used to obtain a process' own limits.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <james.l.morris@oracle.com>
2017-02-17 07:57:00 -05:00
{
u32 av = 0 ;
2017-02-28 09:35:08 -05:00
if ( ! flags )
return 0 ;
prlimit,security,selinux: add a security hook for prlimit
When SELinux was first added to the kernel, a process could only get
and set its own resource limits via getrlimit(2) and setrlimit(2), so no
MAC checks were required for those operations, and thus no security hooks
were defined for them. Later, SELinux introduced a hook for setlimit(2)
with a check if the hard limit was being changed in order to be able to
rely on the hard limit value as a safe reset point upon context
transitions.
Later on, when prlimit(2) was added to the kernel with the ability to get
or set resource limits (hard or soft) of another process, LSM/SELinux was
not updated other than to pass the target process to the setrlimit hook.
This resulted in incomplete control over both getting and setting the
resource limits of another process.
Add a new security_task_prlimit() hook to the check_prlimit_permission()
function to provide complete mediation. The hook is only called when
acting on another task, and only if the existing DAC/capability checks
would allow access. Pass flags down to the hook to indicate whether the
prlimit(2) call will read, write, or both read and write the resource
limits of the target process.
The existing security_task_setrlimit() hook is left alone; it continues
to serve a purpose in supporting the ability to make decisions based on
the old and/or new resource limit values when setting limits. This
is consistent with the DAC/capability logic, where
check_prlimit_permission() performs generic DAC/capability checks for
acting on another task, while do_prlimit() performs a capability check
based on a comparison of the old and new resource limits. Fix the
inline documentation for the hook to match the code.
Implement the new hook for SELinux. For setting resource limits, we
reuse the existing setrlimit permission. Note that this does overload
the setrlimit permission to mean the ability to set the resource limit
(soft or hard) of another process or the ability to change one's own
hard limit. For getting resource limits, a new getrlimit permission
is defined. This was not originally defined since getrlimit(2) could
only be used to obtain a process' own limits.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <james.l.morris@oracle.com>
2017-02-17 07:57:00 -05:00
if ( flags & LSM_PRLIMIT_WRITE )
av | = PROCESS__SETRLIMIT ;
if ( flags & LSM_PRLIMIT_READ )
av | = PROCESS__GETRLIMIT ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( cred_sid ( cred ) , cred_sid ( tcred ) ,
prlimit,security,selinux: add a security hook for prlimit
When SELinux was first added to the kernel, a process could only get
and set its own resource limits via getrlimit(2) and setrlimit(2), so no
MAC checks were required for those operations, and thus no security hooks
were defined for them. Later, SELinux introduced a hook for setlimit(2)
with a check if the hard limit was being changed in order to be able to
rely on the hard limit value as a safe reset point upon context
transitions.
Later on, when prlimit(2) was added to the kernel with the ability to get
or set resource limits (hard or soft) of another process, LSM/SELinux was
not updated other than to pass the target process to the setrlimit hook.
This resulted in incomplete control over both getting and setting the
resource limits of another process.
Add a new security_task_prlimit() hook to the check_prlimit_permission()
function to provide complete mediation. The hook is only called when
acting on another task, and only if the existing DAC/capability checks
would allow access. Pass flags down to the hook to indicate whether the
prlimit(2) call will read, write, or both read and write the resource
limits of the target process.
The existing security_task_setrlimit() hook is left alone; it continues
to serve a purpose in supporting the ability to make decisions based on
the old and/or new resource limit values when setting limits. This
is consistent with the DAC/capability logic, where
check_prlimit_permission() performs generic DAC/capability checks for
acting on another task, while do_prlimit() performs a capability check
based on a comparison of the old and new resource limits. Fix the
inline documentation for the hook to match the code.
Implement the new hook for SELinux. For setting resource limits, we
reuse the existing setrlimit permission. Note that this does overload
the setrlimit permission to mean the ability to set the resource limit
(soft or hard) of another process or the ability to change one's own
hard limit. For getting resource limits, a new getrlimit permission
is defined. This was not originally defined since getrlimit(2) could
only be used to obtain a process' own limits.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <james.l.morris@oracle.com>
2017-02-17 07:57:00 -05:00
SECCLASS_PROCESS , av , NULL ) ;
}
2009-08-26 18:41:16 +02:00
static int selinux_task_setrlimit ( struct task_struct * p , unsigned int resource ,
struct rlimit * new_rlim )
2005-04-16 15:20:36 -07:00
{
2009-08-26 18:41:16 +02:00
struct rlimit * old_rlim = p - > signal - > rlim + resource ;
2005-04-16 15:20:36 -07:00
/* Control the ability to change the hard limit (whether
lowering or raising it ) , so that the hard limit can
later be used as a safe reset point for the soft limit
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
upon context transitions . See selinux_bprm_committing_creds . */
2005-04-16 15:20:36 -07:00
if ( old_rlim - > rlim_max ! = new_rlim - > rlim_max )
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , task_sid_obj ( p ) ,
2017-01-09 10:07:31 -05:00
SECCLASS_PROCESS , PROCESS__SETRLIMIT , NULL ) ;
2005-04-16 15:20:36 -07:00
return 0 ;
}
2010-10-15 04:21:18 +09:00
static int selinux_task_setscheduler ( struct task_struct * p )
2005-04-16 15:20:36 -07:00
{
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , task_sid_obj ( p ) , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__SETSCHED , NULL ) ;
2005-04-16 15:20:36 -07:00
}
static int selinux_task_getscheduler ( struct task_struct * p )
{
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , task_sid_obj ( p ) , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__GETSCHED , NULL ) ;
2005-04-16 15:20:36 -07:00
}
2006-06-23 02:04:01 -07:00
static int selinux_task_movememory ( struct task_struct * p )
{
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , task_sid_obj ( p ) , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__SETSCHED , NULL ) ;
2006-06-23 02:04:01 -07:00
}
2018-09-25 11:27:20 +02:00
static int selinux_task_kill ( struct task_struct * p , struct kernel_siginfo * info ,
usb, signal, security: only pass the cred, not the secid, to kill_pid_info_as_cred and security_task_kill
commit d178bc3a708f39cbfefc3fab37032d3f2511b4ec ("user namespace: usb:
make usb urbs user namespace aware (v2)") changed kill_pid_info_as_uid
to kill_pid_info_as_cred, saving and passing a cred structure instead of
uids. Since the secid can be obtained from the cred, drop the secid fields
from the usb_dev_state and async structures, and drop the secid argument to
kill_pid_info_as_cred. Replace the secid argument to security_task_kill
with the cred. Update SELinux, Smack, and AppArmor to use the cred, which
avoids the need for Smack and AppArmor to use a secid at all in this hook.
Further changes to Smack might still be required to take full advantage of
this change, since it should now be possible to perform capability
checking based on the supplied cred. The changes to Smack and AppArmor
have only been compile-tested.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Paul Moore <paul@paul-moore.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: John Johansen <john.johansen@canonical.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
2017-09-08 12:40:01 -04:00
int sig , const struct cred * cred )
2005-04-16 15:20:36 -07:00
{
usb, signal, security: only pass the cred, not the secid, to kill_pid_info_as_cred and security_task_kill
commit d178bc3a708f39cbfefc3fab37032d3f2511b4ec ("user namespace: usb:
make usb urbs user namespace aware (v2)") changed kill_pid_info_as_uid
to kill_pid_info_as_cred, saving and passing a cred structure instead of
uids. Since the secid can be obtained from the cred, drop the secid fields
from the usb_dev_state and async structures, and drop the secid argument to
kill_pid_info_as_cred. Replace the secid argument to security_task_kill
with the cred. Update SELinux, Smack, and AppArmor to use the cred, which
avoids the need for Smack and AppArmor to use a secid at all in this hook.
Further changes to Smack might still be required to take full advantage of
this change, since it should now be possible to perform capability
checking based on the supplied cred. The changes to Smack and AppArmor
have only been compile-tested.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Paul Moore <paul@paul-moore.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: John Johansen <john.johansen@canonical.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
2017-09-08 12:40:01 -04:00
u32 secid ;
2005-04-16 15:20:36 -07:00
u32 perm ;
if ( ! sig )
perm = PROCESS__SIGNULL ; /* null signal; existence test */
else
perm = signal_to_av ( sig ) ;
usb, signal, security: only pass the cred, not the secid, to kill_pid_info_as_cred and security_task_kill
commit d178bc3a708f39cbfefc3fab37032d3f2511b4ec ("user namespace: usb:
make usb urbs user namespace aware (v2)") changed kill_pid_info_as_uid
to kill_pid_info_as_cred, saving and passing a cred structure instead of
uids. Since the secid can be obtained from the cred, drop the secid fields
from the usb_dev_state and async structures, and drop the secid argument to
kill_pid_info_as_cred. Replace the secid argument to security_task_kill
with the cred. Update SELinux, Smack, and AppArmor to use the cred, which
avoids the need for Smack and AppArmor to use a secid at all in this hook.
Further changes to Smack might still be required to take full advantage of
this change, since it should now be possible to perform capability
checking based on the supplied cred. The changes to Smack and AppArmor
have only been compile-tested.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Paul Moore <paul@paul-moore.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: John Johansen <john.johansen@canonical.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
2017-09-08 12:40:01 -04:00
if ( ! cred )
2017-01-09 10:07:31 -05:00
secid = current_sid ( ) ;
usb, signal, security: only pass the cred, not the secid, to kill_pid_info_as_cred and security_task_kill
commit d178bc3a708f39cbfefc3fab37032d3f2511b4ec ("user namespace: usb:
make usb urbs user namespace aware (v2)") changed kill_pid_info_as_uid
to kill_pid_info_as_cred, saving and passing a cred structure instead of
uids. Since the secid can be obtained from the cred, drop the secid fields
from the usb_dev_state and async structures, and drop the secid argument to
kill_pid_info_as_cred. Replace the secid argument to security_task_kill
with the cred. Update SELinux, Smack, and AppArmor to use the cred, which
avoids the need for Smack and AppArmor to use a secid at all in this hook.
Further changes to Smack might still be required to take full advantage of
this change, since it should now be possible to perform capability
checking based on the supplied cred. The changes to Smack and AppArmor
have only been compile-tested.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Paul Moore <paul@paul-moore.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: John Johansen <john.johansen@canonical.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
2017-09-08 12:40:01 -04:00
else
secid = cred_sid ( cred ) ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( secid , task_sid_obj ( p ) , SECCLASS_PROCESS , perm , NULL ) ;
2005-04-16 15:20:36 -07:00
}
static void selinux_task_to_inode ( struct task_struct * p ,
struct inode * inode )
{
2018-09-21 17:19:11 -07:00
struct inode_security_struct * isec = selinux_inode ( inode ) ;
2021-02-18 15:13:40 -05:00
u32 sid = task_sid_obj ( p ) ;
2005-04-16 15:20:36 -07:00
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
spin_lock ( & isec - > lock ) ;
2016-11-10 22:18:28 +01:00
isec - > sclass = inode_mode_to_security_class ( inode - > i_mode ) ;
2008-11-14 10:39:19 +11:00
isec - > sid = sid ;
2015-12-24 11:09:40 -05:00
isec - > initialized = LABEL_INITIALIZED ;
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
spin_unlock ( & isec - > lock ) ;
2005-04-16 15:20:36 -07:00
}
2022-08-15 11:20:28 -05:00
static int selinux_userns_create ( const struct cred * cred )
{
u32 sid = current_sid ( ) ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , sid , SECCLASS_USER_NAMESPACE ,
USER_NAMESPACE__CREATE , NULL ) ;
2022-08-15 11:20:28 -05:00
}
2005-04-16 15:20:36 -07:00
/* Returns error only if unable to parse addresses */
2006-11-08 17:04:26 -06:00
static int selinux_parse_skb_ipv4 ( struct sk_buff * skb ,
2009-07-14 12:14:09 -04:00
struct common_audit_data * ad , u8 * proto )
2005-04-16 15:20:36 -07:00
{
int offset , ihlen , ret = - EINVAL ;
struct iphdr _iph , * ih ;
2007-03-10 22:16:10 -03:00
offset = skb_network_offset ( skb ) ;
2005-04-16 15:20:36 -07:00
ih = skb_header_pointer ( skb , offset , sizeof ( _iph ) , & _iph ) ;
if ( ih = = NULL )
goto out ;
ihlen = ih - > ihl * 4 ;
if ( ihlen < sizeof ( _iph ) )
goto out ;
2012-04-02 13:15:44 -04:00
ad - > u . net - > v4info . saddr = ih - > saddr ;
ad - > u . net - > v4info . daddr = ih - > daddr ;
2005-04-16 15:20:36 -07:00
ret = 0 ;
2006-11-08 17:04:26 -06:00
if ( proto )
* proto = ih - > protocol ;
2005-04-16 15:20:36 -07:00
switch ( ih - > protocol ) {
2008-04-17 13:17:49 -04:00
case IPPROTO_TCP : {
struct tcphdr _tcph , * th ;
2005-04-16 15:20:36 -07:00
2008-04-17 13:17:49 -04:00
if ( ntohs ( ih - > frag_off ) & IP_OFFSET )
break ;
2005-04-16 15:20:36 -07:00
offset + = ihlen ;
th = skb_header_pointer ( skb , offset , sizeof ( _tcph ) , & _tcph ) ;
if ( th = = NULL )
break ;
2012-04-02 13:15:44 -04:00
ad - > u . net - > sport = th - > source ;
ad - > u . net - > dport = th - > dest ;
2005-04-16 15:20:36 -07:00
break ;
2008-04-17 13:17:49 -04:00
}
case IPPROTO_UDP : {
struct udphdr _udph , * uh ;
if ( ntohs ( ih - > frag_off ) & IP_OFFSET )
break ;
2005-04-16 15:20:36 -07:00
offset + = ihlen ;
2008-04-17 13:17:49 -04:00
uh = skb_header_pointer ( skb , offset , sizeof ( _udph ) , & _udph ) ;
2005-04-16 15:20:36 -07:00
if ( uh = = NULL )
2008-04-17 13:17:49 -04:00
break ;
2005-04-16 15:20:36 -07:00
2012-04-02 13:15:44 -04:00
ad - > u . net - > sport = uh - > source ;
ad - > u . net - > dport = uh - > dest ;
2008-04-17 13:17:49 -04:00
break ;
}
2005-04-16 15:20:36 -07:00
2006-11-13 16:09:01 -08:00
case IPPROTO_DCCP : {
struct dccp_hdr _dccph , * dh ;
if ( ntohs ( ih - > frag_off ) & IP_OFFSET )
break ;
offset + = ihlen ;
dh = skb_header_pointer ( skb , offset , sizeof ( _dccph ) , & _dccph ) ;
if ( dh = = NULL )
break ;
2012-04-02 13:15:44 -04:00
ad - > u . net - > sport = dh - > dccph_sport ;
ad - > u . net - > dport = dh - > dccph_dport ;
2006-11-13 16:09:01 -08:00
break ;
2008-04-17 13:17:49 -04:00
}
2006-11-13 16:09:01 -08:00
2018-02-13 20:57:18 +00:00
# if IS_ENABLED(CONFIG_IP_SCTP)
case IPPROTO_SCTP : {
struct sctphdr _sctph , * sh ;
if ( ntohs ( ih - > frag_off ) & IP_OFFSET )
break ;
offset + = ihlen ;
sh = skb_header_pointer ( skb , offset , sizeof ( _sctph ) , & _sctph ) ;
if ( sh = = NULL )
break ;
ad - > u . net - > sport = sh - > source ;
ad - > u . net - > dport = sh - > dest ;
break ;
}
# endif
2008-04-17 13:17:49 -04:00
default :
break ;
}
2005-04-16 15:20:36 -07:00
out :
return ret ;
}
2016-08-08 13:08:25 -04:00
# if IS_ENABLED(CONFIG_IPV6)
2005-04-16 15:20:36 -07:00
/* Returns error only if unable to parse addresses */
2006-11-08 17:04:26 -06:00
static int selinux_parse_skb_ipv6 ( struct sk_buff * skb ,
2009-07-14 12:14:09 -04:00
struct common_audit_data * ad , u8 * proto )
2005-04-16 15:20:36 -07:00
{
u8 nexthdr ;
int ret = - EINVAL , offset ;
struct ipv6hdr _ipv6h , * ip6 ;
2011-11-30 17:05:51 -08:00
__be16 frag_off ;
2005-04-16 15:20:36 -07:00
2007-03-10 22:16:10 -03:00
offset = skb_network_offset ( skb ) ;
2005-04-16 15:20:36 -07:00
ip6 = skb_header_pointer ( skb , offset , sizeof ( _ipv6h ) , & _ipv6h ) ;
if ( ip6 = = NULL )
goto out ;
2012-04-02 13:15:44 -04:00
ad - > u . net - > v6info . saddr = ip6 - > saddr ;
ad - > u . net - > v6info . daddr = ip6 - > daddr ;
2005-04-16 15:20:36 -07:00
ret = 0 ;
nexthdr = ip6 - > nexthdr ;
offset + = sizeof ( _ipv6h ) ;
2011-11-30 17:05:51 -08:00
offset = ipv6_skip_exthdr ( skb , offset , & nexthdr , & frag_off ) ;
2005-04-16 15:20:36 -07:00
if ( offset < 0 )
goto out ;
2006-11-08 17:04:26 -06:00
if ( proto )
* proto = nexthdr ;
2005-04-16 15:20:36 -07:00
switch ( nexthdr ) {
case IPPROTO_TCP : {
2008-04-17 13:17:49 -04:00
struct tcphdr _tcph , * th ;
2005-04-16 15:20:36 -07:00
th = skb_header_pointer ( skb , offset , sizeof ( _tcph ) , & _tcph ) ;
if ( th = = NULL )
break ;
2012-04-02 13:15:44 -04:00
ad - > u . net - > sport = th - > source ;
ad - > u . net - > dport = th - > dest ;
2005-04-16 15:20:36 -07:00
break ;
}
case IPPROTO_UDP : {
struct udphdr _udph , * uh ;
uh = skb_header_pointer ( skb , offset , sizeof ( _udph ) , & _udph ) ;
if ( uh = = NULL )
break ;
2012-04-02 13:15:44 -04:00
ad - > u . net - > sport = uh - > source ;
ad - > u . net - > dport = uh - > dest ;
2005-04-16 15:20:36 -07:00
break ;
}
2006-11-13 16:09:01 -08:00
case IPPROTO_DCCP : {
struct dccp_hdr _dccph , * dh ;
dh = skb_header_pointer ( skb , offset , sizeof ( _dccph ) , & _dccph ) ;
if ( dh = = NULL )
break ;
2012-04-02 13:15:44 -04:00
ad - > u . net - > sport = dh - > dccph_sport ;
ad - > u . net - > dport = dh - > dccph_dport ;
2006-11-13 16:09:01 -08:00
break ;
2008-04-17 13:17:49 -04:00
}
2006-11-13 16:09:01 -08:00
2018-02-13 20:57:18 +00:00
# if IS_ENABLED(CONFIG_IP_SCTP)
case IPPROTO_SCTP : {
struct sctphdr _sctph , * sh ;
sh = skb_header_pointer ( skb , offset , sizeof ( _sctph ) , & _sctph ) ;
if ( sh = = NULL )
break ;
ad - > u . net - > sport = sh - > source ;
ad - > u . net - > dport = sh - > dest ;
break ;
}
# endif
2005-04-16 15:20:36 -07:00
/* includes fragments */
default :
break ;
}
out :
return ret ;
}
# endif /* IPV6 */
2009-07-14 12:14:09 -04:00
static int selinux_parse_skb ( struct sk_buff * skb , struct common_audit_data * ad ,
2008-07-27 21:31:07 +10:00
char * * _addrp , int src , u8 * proto )
2005-04-16 15:20:36 -07:00
{
2008-07-27 21:31:07 +10:00
char * addrp ;
int ret ;
2005-04-16 15:20:36 -07:00
2012-04-02 13:15:44 -04:00
switch ( ad - > u . net - > family ) {
2005-04-16 15:20:36 -07:00
case PF_INET :
2006-11-08 17:04:26 -06:00
ret = selinux_parse_skb_ipv4 ( skb , ad , proto ) ;
2008-07-27 21:31:07 +10:00
if ( ret )
goto parse_error ;
2012-04-02 13:15:44 -04:00
addrp = ( char * ) ( src ? & ad - > u . net - > v4info . saddr :
& ad - > u . net - > v4info . daddr ) ;
2008-07-27 21:31:07 +10:00
goto okay ;
2005-04-16 15:20:36 -07:00
2016-08-08 13:08:25 -04:00
# if IS_ENABLED(CONFIG_IPV6)
2005-04-16 15:20:36 -07:00
case PF_INET6 :
2006-11-08 17:04:26 -06:00
ret = selinux_parse_skb_ipv6 ( skb , ad , proto ) ;
2008-07-27 21:31:07 +10:00
if ( ret )
goto parse_error ;
2012-04-02 13:15:44 -04:00
addrp = ( char * ) ( src ? & ad - > u . net - > v6info . saddr :
& ad - > u . net - > v6info . daddr ) ;
2008-07-27 21:31:07 +10:00
goto okay ;
2005-04-16 15:20:36 -07:00
# endif /* IPV6 */
default :
2008-07-27 21:31:07 +10:00
addrp = NULL ;
goto okay ;
2005-04-16 15:20:36 -07:00
}
2008-07-27 21:31:07 +10:00
parse_error :
2018-06-12 10:09:03 +02:00
pr_warn (
2008-07-27 21:31:07 +10:00
" SELinux: failure in selinux_parse_skb(), "
" unable to parse packet \n " ) ;
2005-04-16 15:20:36 -07:00
return ret ;
2008-07-27 21:31:07 +10:00
okay :
if ( _addrp )
* _addrp = addrp ;
return 0 ;
2005-04-16 15:20:36 -07:00
}
2007-03-01 14:35:22 -05:00
/**
2008-01-29 08:38:23 -05:00
* selinux_skb_peerlbl_sid - Determine the peer label of a packet
2007-03-01 14:35:22 -05:00
* @ skb : the packet
2008-01-29 08:38:04 -05:00
* @ family : protocol family
2008-01-29 08:38:23 -05:00
* @ sid : the packet ' s peer label SID
2007-03-01 14:35:22 -05:00
*
* Description :
2008-01-29 08:38:23 -05:00
* Check the various different forms of network peer labeling and determine
* the peer label / SID for the packet ; most of the magic actually occurs in
* the security server function security_net_peersid_cmp ( ) . The function
* returns zero if the value in @ sid is valid ( although it may be SECSID_NULL )
* or - EACCES if @ sid is invalid due to inconsistencies with the different
* peer labels .
2007-03-01 14:35:22 -05:00
*
*/
2008-01-29 08:38:23 -05:00
static int selinux_skb_peerlbl_sid ( struct sk_buff * skb , u16 family , u32 * sid )
2007-03-01 14:35:22 -05:00
{
2008-01-29 08:51:16 -05:00
int err ;
2007-03-01 14:35:22 -05:00
u32 xfrm_sid ;
u32 nlbl_sid ;
2008-01-29 08:38:23 -05:00
u32 nlbl_type ;
2007-03-01 14:35:22 -05:00
2013-12-10 14:57:54 -05:00
err = selinux_xfrm_skb_sid ( skb , & xfrm_sid ) ;
2013-07-23 17:38:40 -04:00
if ( unlikely ( err ) )
return - EACCES ;
err = selinux_netlbl_skbuff_getsid ( skb , family , & nlbl_type , & nlbl_sid ) ;
if ( unlikely ( err ) )
return - EACCES ;
2008-01-29 08:38:23 -05:00
2023-03-09 13:30:37 -05:00
err = security_net_peersid_resolve ( nlbl_sid ,
2018-03-01 18:48:02 -05:00
nlbl_type , xfrm_sid , sid ) ;
2008-01-29 08:51:16 -05:00
if ( unlikely ( err ) ) {
2018-06-12 10:09:03 +02:00
pr_warn (
2008-01-29 08:51:16 -05:00
" SELinux: failure in selinux_skb_peerlbl_sid(), "
" unable to determine packet's peer label \n " ) ;
2008-01-29 08:38:23 -05:00
return - EACCES ;
2008-01-29 08:51:16 -05:00
}
2008-01-29 08:38:23 -05:00
return 0 ;
2007-03-01 14:35:22 -05:00
}
2013-12-04 16:10:51 -05:00
/**
* selinux_conn_sid - Determine the child socket label for a connection
* @ sk_sid : the parent socket ' s SID
* @ skb_sid : the packet ' s SID
* @ conn_sid : the resulting connection SID
*
* If @ skb_sid is valid then the user : role : type information from @ sk_sid is
* combined with the MLS information from @ skb_sid in order to create
2020-08-07 09:51:34 -07:00
* @ conn_sid . If @ skb_sid is not valid then @ conn_sid is simply a copy
2013-12-04 16:10:51 -05:00
* of @ sk_sid . Returns zero on success , negative values on failure .
*
*/
static int selinux_conn_sid ( u32 sk_sid , u32 skb_sid , u32 * conn_sid )
{
int err = 0 ;
if ( skb_sid ! = SECSID_NULL )
2023-03-09 13:30:37 -05:00
err = security_sid_mls_copy ( sk_sid , skb_sid ,
2018-03-01 18:48:02 -05:00
conn_sid ) ;
2013-12-04 16:10:51 -05:00
else
* conn_sid = sk_sid ;
return err ;
}
2005-04-16 15:20:36 -07:00
/* socket security operations */
2010-04-22 14:46:18 -04:00
2011-03-02 13:32:34 +08:00
static int socket_sockcreate_sid ( const struct task_security_struct * tsec ,
u16 secclass , u32 * socksid )
2010-04-22 14:46:18 -04:00
{
2011-03-02 13:32:34 +08:00
if ( tsec - > sockcreate_sid > SECSID_NULL ) {
* socksid = tsec - > sockcreate_sid ;
return 0 ;
}
2023-03-09 13:30:37 -05:00
return security_transition_sid ( tsec - > sid , tsec - > sid ,
2018-03-01 18:48:02 -05:00
secclass , NULL , socksid ) ;
2010-04-22 14:46:18 -04:00
}
selinux: Add netlink xperm support
Reuse the existing extended permissions infrastructure to support
policies based on the netlink message types.
A new policy capability "netlink_xperm" is introduced. When disabled,
the previous behaviour is preserved. That is, netlink_send will rely on
the permission mappings defined in nlmsgtab.c (e.g, nlmsg_read for
RTM_GETADDR on NETLINK_ROUTE). When enabled, the mappings are ignored
and the generic "nlmsg" permission is used instead.
The new "nlmsg" permission is an extended permission. The 16 bits of the
extended permission are mapped to the nlmsg_type field.
Example policy on Android, preventing regular apps from accessing the
device's MAC address and ARP table, but allowing this access to
privileged apps, looks as follows:
allow netdomain self:netlink_route_socket {
create read getattr write setattr lock append connect getopt
setopt shutdown nlmsg
};
allowxperm netdomain self:netlink_route_socket nlmsg ~{
RTM_GETLINK RTM_GETNEIGH RTM_GETNEIGHTBL
};
allowxperm priv_app self:netlink_route_socket nlmsg {
RTM_GETLINK RTM_GETNEIGH RTM_GETNEIGHTBL
};
The constants in the example above (e.g., RTM_GETLINK) are explicitly
defined in the policy.
It is possible to generate policies to support kernels that may or
may not have the capability enabled by generating a rule for each
scenario. For instance:
allow domain self:netlink_audit_socket nlmsg_read;
allow domain self:netlink_audit_socket nlmsg;
allowxperm domain self:netlink_audit_socket nlmsg { AUDIT_GET };
The approach of defining a new permission ("nlmsg") instead of relying
on the existing permissions (e.g., "nlmsg_read", "nlmsg_readpriv" or
"nlmsg_tty_audit") has been preferred because:
1. This is similar to the other extended permission ("ioctl");
2. With the new extended permission, the coarse-grained mapping is not
necessary anymore. It could eventually be removed, which would be
impossible if the extended permission was defined below these.
3. Having a single extra extended permission considerably simplifies
the implementation here and in libselinux.
Signed-off-by: Thiébaud Weksteen <tweek@google.com>
Signed-off-by: Bram Bonné <brambonne@google.com>
[PM: manual merge fixes for sock_skip_has_perm()]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2024-09-12 11:45:03 +10:00
static bool sock_skip_has_perm ( u32 sid )
2005-04-16 15:20:36 -07:00
{
selinux: Add netlink xperm support
Reuse the existing extended permissions infrastructure to support
policies based on the netlink message types.
A new policy capability "netlink_xperm" is introduced. When disabled,
the previous behaviour is preserved. That is, netlink_send will rely on
the permission mappings defined in nlmsgtab.c (e.g, nlmsg_read for
RTM_GETADDR on NETLINK_ROUTE). When enabled, the mappings are ignored
and the generic "nlmsg" permission is used instead.
The new "nlmsg" permission is an extended permission. The 16 bits of the
extended permission are mapped to the nlmsg_type field.
Example policy on Android, preventing regular apps from accessing the
device's MAC address and ARP table, but allowing this access to
privileged apps, looks as follows:
allow netdomain self:netlink_route_socket {
create read getattr write setattr lock append connect getopt
setopt shutdown nlmsg
};
allowxperm netdomain self:netlink_route_socket nlmsg ~{
RTM_GETLINK RTM_GETNEIGH RTM_GETNEIGHTBL
};
allowxperm priv_app self:netlink_route_socket nlmsg {
RTM_GETLINK RTM_GETNEIGH RTM_GETNEIGHTBL
};
The constants in the example above (e.g., RTM_GETLINK) are explicitly
defined in the policy.
It is possible to generate policies to support kernels that may or
may not have the capability enabled by generating a rule for each
scenario. For instance:
allow domain self:netlink_audit_socket nlmsg_read;
allow domain self:netlink_audit_socket nlmsg;
allowxperm domain self:netlink_audit_socket nlmsg { AUDIT_GET };
The approach of defining a new permission ("nlmsg") instead of relying
on the existing permissions (e.g., "nlmsg_read", "nlmsg_readpriv" or
"nlmsg_tty_audit") has been preferred because:
1. This is similar to the other extended permission ("ioctl");
2. With the new extended permission, the coarse-grained mapping is not
necessary anymore. It could eventually be removed, which would be
impossible if the extended permission was defined below these.
3. Having a single extra extended permission considerably simplifies
the implementation here and in libselinux.
Signed-off-by: Thiébaud Weksteen <tweek@google.com>
Signed-off-by: Bram Bonné <brambonne@google.com>
[PM: manual merge fixes for sock_skip_has_perm()]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2024-09-12 11:45:03 +10:00
if ( sid = = SECINITSID_KERNEL )
return true ;
2005-04-16 15:20:36 -07:00
selinux: introduce an initial SID for early boot processes
Currently, SELinux doesn't allow distinguishing between kernel threads
and userspace processes that are started before the policy is first
loaded - both get the label corresponding to the kernel SID. The only
way a process that persists from early boot can get a meaningful label
is by doing a voluntary dyntransition or re-executing itself.
Reusing the kernel label for userspace processes is problematic for
several reasons:
1. The kernel is considered to be a privileged domain and generally
needs to have a wide range of permissions allowed to work correctly,
which prevents the policy writer from effectively hardening against
early boot processes that might remain running unintentionally after
the policy is loaded (they represent a potential extra attack surface
that should be mitigated).
2. Despite the kernel being treated as a privileged domain, the policy
writer may want to impose certain special limitations on kernel
threads that may conflict with the requirements of intentional early
boot processes. For example, it is a good hardening practice to limit
what executables the kernel can execute as usermode helpers and to
confine the resulting usermode helper processes. However, a
(legitimate) process surviving from early boot may need to execute a
different set of executables.
3. As currently implemented, overlayfs remembers the security context of
the process that created an overlayfs mount and uses it to bound
subsequent operations on files using this context. If an overlayfs
mount is created before the SELinux policy is loaded, these "mounter"
checks are made against the kernel context, which may clash with
restrictions on the kernel domain (see 2.).
To resolve this, introduce a new initial SID (reusing the slot of the
former "init" initial SID) that will be assigned to any userspace
process started before the policy is first loaded. This is easy to do,
as we can simply label any process that goes through the
bprm_creds_for_exec LSM hook with the new init-SID instead of
propagating the kernel SID from the parent.
To provide backwards compatibility for existing policies that are
unaware of this new semantic of the "init" initial SID, introduce a new
policy capability "userspace_initial_context" and set the "init" SID to
the same context as the "kernel" SID unless this capability is set by
the policy.
Another small backwards compatibility measure is needed in
security_sid_to_context_core() for before the initial SELinux policy
load - see the code comment for explanation.
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com>
[PM: edited comments based on feedback/discussion]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-11-14 16:51:16 +01:00
/*
* Before POLICYDB_CAP_USERSPACE_INITIAL_CONTEXT , sockets that
* inherited the kernel context from early boot used to be skipped
* here , so preserve that behavior unless the capability is set .
*
* By setting the capability the policy signals that it is ready
* for this quirk to be fixed . Note that sockets created by a kernel
* thread or a usermode helper executed without a transition will
* still be skipped in this check regardless of the policycap
* setting .
*/
if ( ! selinux_policycap_userspace_initial_context ( ) & &
selinux: Add netlink xperm support
Reuse the existing extended permissions infrastructure to support
policies based on the netlink message types.
A new policy capability "netlink_xperm" is introduced. When disabled,
the previous behaviour is preserved. That is, netlink_send will rely on
the permission mappings defined in nlmsgtab.c (e.g, nlmsg_read for
RTM_GETADDR on NETLINK_ROUTE). When enabled, the mappings are ignored
and the generic "nlmsg" permission is used instead.
The new "nlmsg" permission is an extended permission. The 16 bits of the
extended permission are mapped to the nlmsg_type field.
Example policy on Android, preventing regular apps from accessing the
device's MAC address and ARP table, but allowing this access to
privileged apps, looks as follows:
allow netdomain self:netlink_route_socket {
create read getattr write setattr lock append connect getopt
setopt shutdown nlmsg
};
allowxperm netdomain self:netlink_route_socket nlmsg ~{
RTM_GETLINK RTM_GETNEIGH RTM_GETNEIGHTBL
};
allowxperm priv_app self:netlink_route_socket nlmsg {
RTM_GETLINK RTM_GETNEIGH RTM_GETNEIGHTBL
};
The constants in the example above (e.g., RTM_GETLINK) are explicitly
defined in the policy.
It is possible to generate policies to support kernels that may or
may not have the capability enabled by generating a rule for each
scenario. For instance:
allow domain self:netlink_audit_socket nlmsg_read;
allow domain self:netlink_audit_socket nlmsg;
allowxperm domain self:netlink_audit_socket nlmsg { AUDIT_GET };
The approach of defining a new permission ("nlmsg") instead of relying
on the existing permissions (e.g., "nlmsg_read", "nlmsg_readpriv" or
"nlmsg_tty_audit") has been preferred because:
1. This is similar to the other extended permission ("ioctl");
2. With the new extended permission, the coarse-grained mapping is not
necessary anymore. It could eventually be removed, which would be
impossible if the extended permission was defined below these.
3. Having a single extra extended permission considerably simplifies
the implementation here and in libselinux.
Signed-off-by: Thiébaud Weksteen <tweek@google.com>
Signed-off-by: Bram Bonné <brambonne@google.com>
[PM: manual merge fixes for sock_skip_has_perm()]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2024-09-12 11:45:03 +10:00
sid = = SECINITSID_INIT )
return true ;
return false ;
}
static int sock_has_perm ( struct sock * sk , u32 perms )
{
struct sk_security_struct * sksec = sk - > sk_security ;
struct common_audit_data ad ;
struct lsm_network_audit net ;
if ( sock_skip_has_perm ( sksec - > sid ) )
selinux: introduce an initial SID for early boot processes
Currently, SELinux doesn't allow distinguishing between kernel threads
and userspace processes that are started before the policy is first
loaded - both get the label corresponding to the kernel SID. The only
way a process that persists from early boot can get a meaningful label
is by doing a voluntary dyntransition or re-executing itself.
Reusing the kernel label for userspace processes is problematic for
several reasons:
1. The kernel is considered to be a privileged domain and generally
needs to have a wide range of permissions allowed to work correctly,
which prevents the policy writer from effectively hardening against
early boot processes that might remain running unintentionally after
the policy is loaded (they represent a potential extra attack surface
that should be mitigated).
2. Despite the kernel being treated as a privileged domain, the policy
writer may want to impose certain special limitations on kernel
threads that may conflict with the requirements of intentional early
boot processes. For example, it is a good hardening practice to limit
what executables the kernel can execute as usermode helpers and to
confine the resulting usermode helper processes. However, a
(legitimate) process surviving from early boot may need to execute a
different set of executables.
3. As currently implemented, overlayfs remembers the security context of
the process that created an overlayfs mount and uses it to bound
subsequent operations on files using this context. If an overlayfs
mount is created before the SELinux policy is loaded, these "mounter"
checks are made against the kernel context, which may clash with
restrictions on the kernel domain (see 2.).
To resolve this, introduce a new initial SID (reusing the slot of the
former "init" initial SID) that will be assigned to any userspace
process started before the policy is first loaded. This is easy to do,
as we can simply label any process that goes through the
bprm_creds_for_exec LSM hook with the new init-SID instead of
propagating the kernel SID from the parent.
To provide backwards compatibility for existing policies that are
unaware of this new semantic of the "init" initial SID, introduce a new
policy capability "userspace_initial_context" and set the "init" SID to
the same context as the "kernel" SID unless this capability is set by
the policy.
Another small backwards compatibility measure is needed in
security_sid_to_context_core() for before the initial SELinux policy
load - see the code comment for explanation.
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com>
[PM: edited comments based on feedback/discussion]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-11-14 16:51:16 +01:00
return 0 ;
2023-07-19 13:37:49 +02:00
ad_net_init_from_sk ( & ad , & net , sk ) ;
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , sksec - > sid , sksec - > sclass , perms ,
2017-01-09 10:07:31 -05:00
& ad ) ;
2005-04-16 15:20:36 -07:00
}
static int selinux_socket_create ( int family , int type ,
int protocol , int kern )
{
2018-09-21 17:17:16 -07:00
const struct task_security_struct * tsec = selinux_cred ( current_cred ( ) ) ;
2010-04-22 14:46:18 -04:00
u32 newsid ;
2008-11-14 10:39:19 +11:00
u16 secclass ;
2011-03-02 13:32:34 +08:00
int rc ;
2005-04-16 15:20:36 -07:00
if ( kern )
2010-04-22 14:46:18 -04:00
return 0 ;
2008-11-14 10:39:19 +11:00
secclass = socket_type_to_security_class ( family , type , protocol ) ;
2011-03-02 13:32:34 +08:00
rc = socket_sockcreate_sid ( tsec , secclass , & newsid ) ;
if ( rc )
return rc ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( tsec - > sid , newsid , secclass , SOCKET__CREATE , NULL ) ;
2005-04-16 15:20:36 -07:00
}
2006-08-04 23:17:57 -07:00
static int selinux_socket_post_create ( struct socket * sock , int family ,
int type , int protocol , int kern )
2005-04-16 15:20:36 -07:00
{
2018-09-21 17:17:16 -07:00
const struct task_security_struct * tsec = selinux_cred ( current_cred ( ) ) ;
2015-12-24 11:09:40 -05:00
struct inode_security_struct * isec = inode_security_novalidate ( SOCK_INODE ( sock ) ) ;
2006-08-04 23:08:56 -07:00
struct sk_security_struct * sksec ;
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
u16 sclass = socket_type_to_security_class ( family , type , protocol ) ;
u32 sid = SECINITSID_KERNEL ;
2008-11-14 10:39:19 +11:00
int err = 0 ;
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
if ( ! kern ) {
err = socket_sockcreate_sid ( tsec , sclass , & sid ) ;
2011-03-02 13:32:34 +08:00
if ( err )
return err ;
}
2008-11-14 10:39:19 +11:00
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
isec - > sclass = sclass ;
isec - > sid = sid ;
2015-12-24 11:09:40 -05:00
isec - > initialized = LABEL_INITIALIZED ;
2005-04-16 15:20:36 -07:00
2006-08-04 23:08:56 -07:00
if ( sock - > sk ) {
2024-07-10 14:32:25 -07:00
sksec = selinux_sock ( sock - > sk ) ;
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
sksec - > sclass = sclass ;
sksec - > sid = sid ;
2018-02-13 20:57:18 +00:00
/* Allows detection of the first association on this socket */
if ( sksec - > sclass = = SECCLASS_SCTP_SOCKET )
sksec - > sctp_assoc_state = SCTP_ASSOC_UNSET ;
2009-03-27 17:10:34 -04:00
err = selinux_netlbl_socket_post_create ( sock - > sk , family ) ;
2006-08-04 23:08:56 -07:00
}
2006-08-04 23:17:57 -07:00
return err ;
2005-04-16 15:20:36 -07:00
}
2018-05-04 16:28:21 +02:00
static int selinux_socket_socketpair ( struct socket * socka ,
struct socket * sockb )
{
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec_a = selinux_sock ( socka - > sk ) ;
struct sk_security_struct * sksec_b = selinux_sock ( sockb - > sk ) ;
2018-05-04 16:28:21 +02:00
sksec_a - > peer_sid = sksec_b - > sid ;
sksec_b - > peer_sid = sksec_a - > sid ;
return 0 ;
}
2005-04-16 15:20:36 -07:00
/* Range of port numbers used to automatically bind.
Need to determine whether we should perform a name_bind
permission check between the socket and the port number . */
static int selinux_socket_bind ( struct socket * sock , struct sockaddr * address , int addrlen )
{
2010-04-22 14:46:19 -04:00
struct sock * sk = sock - > sk ;
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( sk ) ;
2005-04-16 15:20:36 -07:00
u16 family ;
int err ;
2017-01-09 10:07:31 -05:00
err = sock_has_perm ( sk , SOCKET__BIND ) ;
2005-04-16 15:20:36 -07:00
if ( err )
goto out ;
2018-02-13 20:57:18 +00:00
/* If PF_INET or PF_INET6, check name_bind permission for the port. */
2010-04-22 14:46:19 -04:00
family = sk - > sk_family ;
2005-04-16 15:20:36 -07:00
if ( family = = PF_INET | | family = = PF_INET6 ) {
char * addrp ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2012-04-02 13:15:44 -04:00
struct lsm_network_audit net = { 0 , } ;
2005-04-16 15:20:36 -07:00
struct sockaddr_in * addr4 = NULL ;
struct sockaddr_in6 * addr6 = NULL ;
2019-04-12 19:59:34 +09:00
u16 family_sa ;
2005-04-16 15:20:36 -07:00
unsigned short snum ;
2008-06-12 01:39:58 +10:00
u32 sid , node_perm ;
2005-04-16 15:20:36 -07:00
2018-02-13 20:57:18 +00:00
/*
* sctp_bindx ( 3 ) calls via selinux_sctp_bind_connect ( )
* that validates multiple binding addresses . Because of this
* need to check address - > sa_family as it is possible to have
* sk - > sk_family = PF_INET6 with addr - > sa_family = AF_INET .
*/
2019-04-12 19:59:34 +09:00
if ( addrlen < offsetofend ( struct sockaddr , sa_family ) )
return - EINVAL ;
family_sa = address - > sa_family ;
2018-05-11 20:15:11 +03:00
switch ( family_sa ) {
case AF_UNSPEC :
2018-03-02 19:54:34 +00:00
case AF_INET :
if ( addrlen < sizeof ( struct sockaddr_in ) )
return - EINVAL ;
2005-04-16 15:20:36 -07:00
addr4 = ( struct sockaddr_in * ) address ;
2018-05-11 20:15:11 +03:00
if ( family_sa = = AF_UNSPEC ) {
2024-01-03 17:34:15 +01:00
if ( family = = PF_INET6 ) {
/* Length check from inet6_bind_sk() */
if ( addrlen < SIN6_LEN_RFC2133 )
return - EINVAL ;
/* Family check from __inet6_bind() */
goto err_af ;
}
2018-05-11 20:15:11 +03:00
/* see __inet_bind(), we only want to allow
* AF_UNSPEC if the address is INADDR_ANY
*/
if ( addr4 - > sin_addr . s_addr ! = htonl ( INADDR_ANY ) )
goto err_af ;
family_sa = AF_INET ;
}
2005-04-16 15:20:36 -07:00
snum = ntohs ( addr4 - > sin_port ) ;
addrp = ( char * ) & addr4 - > sin_addr . s_addr ;
2018-03-02 19:54:34 +00:00
break ;
case AF_INET6 :
if ( addrlen < SIN6_LEN_RFC2133 )
return - EINVAL ;
2005-04-16 15:20:36 -07:00
addr6 = ( struct sockaddr_in6 * ) address ;
snum = ntohs ( addr6 - > sin6_port ) ;
addrp = ( char * ) & addr6 - > sin6_addr . s6_addr ;
2018-03-02 19:54:34 +00:00
break ;
default :
2018-05-11 20:15:11 +03:00
goto err_af ;
2005-04-16 15:20:36 -07:00
}
2018-05-11 20:15:12 +03:00
ad . type = LSM_AUDIT_DATA_NET ;
ad . u . net = & net ;
ad . u . net - > sport = htons ( snum ) ;
ad . u . net - > family = family_sa ;
2007-10-10 17:30:46 -07:00
if ( snum ) {
int low , high ;
2013-09-28 14:10:59 -07:00
inet_get_local_port_range ( sock_net ( sk ) , & low , & high ) ;
2007-10-10 17:30:46 -07:00
2019-11-25 15:37:04 -08:00
if ( inet_port_requires_bind_service ( sock_net ( sk ) , snum ) | |
snum < low | | snum > high ) {
2008-04-10 10:48:14 -04:00
err = sel_netport_sid ( sk - > sk_protocol ,
snum , & sid ) ;
2007-10-10 17:30:46 -07:00
if ( err )
goto out ;
2023-03-09 13:30:37 -05:00
err = avc_has_perm ( sksec - > sid , sid ,
2010-04-22 14:46:19 -04:00
sksec - > sclass ,
2007-10-10 17:30:46 -07:00
SOCKET__NAME_BIND , & ad ) ;
if ( err )
goto out ;
}
2005-04-16 15:20:36 -07:00
}
2008-04-17 13:17:49 -04:00
2010-04-22 14:46:19 -04:00
switch ( sksec - > sclass ) {
[PATCH] SELinux - fix SCTP socket bug and general IP protocol handling
The following patch updates the way SELinux classifies and handles IP
based protocols.
Currently, IP sockets are classified by SELinux as being either TCP, UDP
or 'Raw', the latter being a default for IP socket that is not TCP or UDP.
The classification code is out of date and uses only the socket type
parameter to socket(2) to determine the class of IP socket. So, any
socket created with SOCK_STREAM will be classified by SELinux as TCP, and
SOCK_DGRAM as UDP. Also, other socket types such as SOCK_SEQPACKET and
SOCK_DCCP are currently ignored by SELinux, which classifies them as
generic sockets, which means they don't even get basic IP level checking.
This patch changes the SELinux IP socket classification logic, so that
only an IPPROTO_IP protocol value passed to socket(2) classify the socket
as TCP or UDP. The patch also drops the check for SOCK_RAW and converts
it into a default, so that socket types like SOCK_DCCP and SOCK_SEQPACKET
are classified as SECCLASS_RAWIP_SOCKET (instead of generic sockets).
Note that protocol-specific support for SCTP, DCCP etc. is not addressed
here, we're just getting these protocols checked at the IP layer.
This fixes a reported problem where SCTP sockets were being recognized as
generic SELinux sockets yet still being passed in one case to an IP level
check, which then fails for generic sockets.
It will also fix bugs where any SOCK_STREAM socket is classified as TCP or
any SOCK_DGRAM socket is classified as UDP.
This patch also unifies the way IP sockets classes are determined in
selinux_socket_bind(), so we use the already calculated value instead of
trying to recalculate it.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-30 14:24:34 -04:00
case SECCLASS_TCP_SOCKET :
2005-04-16 15:20:36 -07:00
node_perm = TCP_SOCKET__NODE_BIND ;
break ;
2008-04-17 13:17:49 -04:00
[PATCH] SELinux - fix SCTP socket bug and general IP protocol handling
The following patch updates the way SELinux classifies and handles IP
based protocols.
Currently, IP sockets are classified by SELinux as being either TCP, UDP
or 'Raw', the latter being a default for IP socket that is not TCP or UDP.
The classification code is out of date and uses only the socket type
parameter to socket(2) to determine the class of IP socket. So, any
socket created with SOCK_STREAM will be classified by SELinux as TCP, and
SOCK_DGRAM as UDP. Also, other socket types such as SOCK_SEQPACKET and
SOCK_DCCP are currently ignored by SELinux, which classifies them as
generic sockets, which means they don't even get basic IP level checking.
This patch changes the SELinux IP socket classification logic, so that
only an IPPROTO_IP protocol value passed to socket(2) classify the socket
as TCP or UDP. The patch also drops the check for SOCK_RAW and converts
it into a default, so that socket types like SOCK_DCCP and SOCK_SEQPACKET
are classified as SECCLASS_RAWIP_SOCKET (instead of generic sockets).
Note that protocol-specific support for SCTP, DCCP etc. is not addressed
here, we're just getting these protocols checked at the IP layer.
This fixes a reported problem where SCTP sockets were being recognized as
generic SELinux sockets yet still being passed in one case to an IP level
check, which then fails for generic sockets.
It will also fix bugs where any SOCK_STREAM socket is classified as TCP or
any SOCK_DGRAM socket is classified as UDP.
This patch also unifies the way IP sockets classes are determined in
selinux_socket_bind(), so we use the already calculated value instead of
trying to recalculate it.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-30 14:24:34 -04:00
case SECCLASS_UDP_SOCKET :
2005-04-16 15:20:36 -07:00
node_perm = UDP_SOCKET__NODE_BIND ;
break ;
2006-11-13 16:09:01 -08:00
case SECCLASS_DCCP_SOCKET :
node_perm = DCCP_SOCKET__NODE_BIND ;
break ;
2018-02-13 20:57:18 +00:00
case SECCLASS_SCTP_SOCKET :
node_perm = SCTP_SOCKET__NODE_BIND ;
break ;
2005-04-16 15:20:36 -07:00
default :
node_perm = RAWIP_SOCKET__NODE_BIND ;
break ;
}
2008-04-17 13:17:49 -04:00
2018-05-11 20:15:12 +03:00
err = sel_netnode_sid ( addrp , family_sa , & sid ) ;
2005-04-16 15:20:36 -07:00
if ( err )
goto out ;
2008-04-17 13:17:49 -04:00
2018-05-11 20:15:11 +03:00
if ( family_sa = = AF_INET )
2012-04-02 13:15:44 -04:00
ad . u . net - > v4info . saddr = addr4 - > sin_addr . s_addr ;
2005-04-16 15:20:36 -07:00
else
2012-04-02 13:15:44 -04:00
ad . u . net - > v6info . saddr = addr6 - > sin6_addr ;
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
err = avc_has_perm ( sksec - > sid , sid ,
2010-04-22 14:46:19 -04:00
sksec - > sclass , node_perm , & ad ) ;
2005-04-16 15:20:36 -07:00
if ( err )
goto out ;
}
out :
return err ;
2018-05-11 20:15:11 +03:00
err_af :
/* Note that SCTP services expect -EINVAL, others -EAFNOSUPPORT. */
if ( sksec - > sclass = = SECCLASS_SCTP_SOCKET )
return - EINVAL ;
return - EAFNOSUPPORT ;
2005-04-16 15:20:36 -07:00
}
2018-02-13 20:57:18 +00:00
/* This supports connect(2) and SCTP connect services such as sctp_connectx(3)
2019-02-17 14:08:36 -08:00
* and sctp_sendmsg ( 3 ) as described in Documentation / security / SCTP . rst
2018-02-13 20:57:18 +00:00
*/
static int selinux_socket_connect_helper ( struct socket * sock ,
struct sockaddr * address , int addrlen )
2005-04-16 15:20:36 -07:00
{
2008-10-10 10:16:33 -04:00
struct sock * sk = sock - > sk ;
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( sk ) ;
2005-04-16 15:20:36 -07:00
int err ;
2017-01-09 10:07:31 -05:00
err = sock_has_perm ( sk , SOCKET__CONNECT ) ;
2005-04-16 15:20:36 -07:00
if ( err )
return err ;
2019-05-10 19:12:33 +02:00
if ( addrlen < offsetofend ( struct sockaddr , sa_family ) )
return - EINVAL ;
/* connect(AF_UNSPEC) has special handling, as it is a documented
* way to disconnect the socket
*/
if ( address - > sa_family = = AF_UNSPEC )
return 0 ;
2005-04-16 15:20:36 -07:00
/*
2018-02-13 20:57:18 +00:00
* If a TCP , DCCP or SCTP socket , check name_connect permission
* for the port .
2005-04-16 15:20:36 -07:00
*/
2010-04-22 14:46:19 -04:00
if ( sksec - > sclass = = SECCLASS_TCP_SOCKET | |
2018-02-13 20:57:18 +00:00
sksec - > sclass = = SECCLASS_DCCP_SOCKET | |
sksec - > sclass = = SECCLASS_SCTP_SOCKET ) {
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2012-04-02 13:15:44 -04:00
struct lsm_network_audit net = { 0 , } ;
2005-04-16 15:20:36 -07:00
struct sockaddr_in * addr4 = NULL ;
struct sockaddr_in6 * addr6 = NULL ;
unsigned short snum ;
2006-11-13 16:09:01 -08:00
u32 sid , perm ;
2005-04-16 15:20:36 -07:00
2018-02-13 20:57:18 +00:00
/* sctp_connectx(3) calls via selinux_sctp_bind_connect()
* that validates multiple connect addresses . Because of this
* need to check address - > sa_family as it is possible to have
* sk - > sk_family = PF_INET6 with addr - > sa_family = AF_INET .
*/
2018-03-02 19:54:34 +00:00
switch ( address - > sa_family ) {
case AF_INET :
2005-04-16 15:20:36 -07:00
addr4 = ( struct sockaddr_in * ) address ;
2005-07-28 21:16:21 -07:00
if ( addrlen < sizeof ( struct sockaddr_in ) )
2005-04-16 15:20:36 -07:00
return - EINVAL ;
snum = ntohs ( addr4 - > sin_port ) ;
2018-03-02 19:54:34 +00:00
break ;
case AF_INET6 :
2005-04-16 15:20:36 -07:00
addr6 = ( struct sockaddr_in6 * ) address ;
2005-07-28 21:16:21 -07:00
if ( addrlen < SIN6_LEN_RFC2133 )
2005-04-16 15:20:36 -07:00
return - EINVAL ;
snum = ntohs ( addr6 - > sin6_port ) ;
2018-03-02 19:54:34 +00:00
break ;
default :
/* Note that SCTP services expect -EINVAL, whereas
* others expect - EAFNOSUPPORT .
*/
if ( sksec - > sclass = = SECCLASS_SCTP_SOCKET )
return - EINVAL ;
else
return - EAFNOSUPPORT ;
2005-04-16 15:20:36 -07:00
}
2008-04-10 10:48:14 -04:00
err = sel_netport_sid ( sk - > sk_protocol , snum , & sid ) ;
2005-04-16 15:20:36 -07:00
if ( err )
2018-02-13 20:57:18 +00:00
return err ;
2005-04-16 15:20:36 -07:00
2018-02-13 20:57:18 +00:00
switch ( sksec - > sclass ) {
case SECCLASS_TCP_SOCKET :
perm = TCP_SOCKET__NAME_CONNECT ;
break ;
case SECCLASS_DCCP_SOCKET :
perm = DCCP_SOCKET__NAME_CONNECT ;
break ;
case SECCLASS_SCTP_SOCKET :
perm = SCTP_SOCKET__NAME_CONNECT ;
break ;
}
2006-11-13 16:09:01 -08:00
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_NET ;
2012-04-02 13:15:44 -04:00
ad . u . net = & net ;
ad . u . net - > dport = htons ( snum ) ;
2018-05-11 20:15:12 +03:00
ad . u . net - > family = address - > sa_family ;
2023-03-09 13:30:37 -05:00
err = avc_has_perm ( sksec - > sid , sid , sksec - > sclass , perm , & ad ) ;
2005-04-16 15:20:36 -07:00
if ( err )
2018-02-13 20:57:18 +00:00
return err ;
2005-04-16 15:20:36 -07:00
}
2018-02-13 20:57:18 +00:00
return 0 ;
}
2008-10-10 10:16:33 -04:00
2018-02-13 20:57:18 +00:00
/* Supports connect(2), see comments in selinux_socket_connect_helper() */
static int selinux_socket_connect ( struct socket * sock ,
struct sockaddr * address , int addrlen )
{
int err ;
struct sock * sk = sock - > sk ;
err = selinux_socket_connect_helper ( sock , address , addrlen ) ;
if ( err )
return err ;
return selinux_netlbl_socket_connect ( sk , address ) ;
2005-04-16 15:20:36 -07:00
}
static int selinux_socket_listen ( struct socket * sock , int backlog )
{
2017-01-09 10:07:31 -05:00
return sock_has_perm ( sock - > sk , SOCKET__LISTEN ) ;
2005-04-16 15:20:36 -07:00
}
static int selinux_socket_accept ( struct socket * sock , struct socket * newsock )
{
int err ;
struct inode_security_struct * isec ;
struct inode_security_struct * newisec ;
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
u16 sclass ;
u32 sid ;
2005-04-16 15:20:36 -07:00
2017-01-09 10:07:31 -05:00
err = sock_has_perm ( sock - > sk , SOCKET__ACCEPT ) ;
2005-04-16 15:20:36 -07:00
if ( err )
return err ;
2015-12-24 11:09:40 -05:00
isec = inode_security_novalidate ( SOCK_INODE ( sock ) ) ;
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
spin_lock ( & isec - > lock ) ;
sclass = isec - > sclass ;
sid = isec - > sid ;
spin_unlock ( & isec - > lock ) ;
newisec = inode_security_novalidate ( SOCK_INODE ( newsock ) ) ;
newisec - > sclass = sclass ;
newisec - > sid = sid ;
2015-12-24 11:09:40 -05:00
newisec - > initialized = LABEL_INITIALIZED ;
2005-04-16 15:20:36 -07:00
return 0 ;
}
static int selinux_socket_sendmsg ( struct socket * sock , struct msghdr * msg ,
2008-04-17 13:17:49 -04:00
int size )
2005-04-16 15:20:36 -07:00
{
2017-01-09 10:07:31 -05:00
return sock_has_perm ( sock - > sk , SOCKET__WRITE ) ;
2005-04-16 15:20:36 -07:00
}
static int selinux_socket_recvmsg ( struct socket * sock , struct msghdr * msg ,
int size , int flags )
{
2017-01-09 10:07:31 -05:00
return sock_has_perm ( sock - > sk , SOCKET__READ ) ;
2005-04-16 15:20:36 -07:00
}
static int selinux_socket_getsockname ( struct socket * sock )
{
2017-01-09 10:07:31 -05:00
return sock_has_perm ( sock - > sk , SOCKET__GETATTR ) ;
2005-04-16 15:20:36 -07:00
}
static int selinux_socket_getpeername ( struct socket * sock )
{
2017-01-09 10:07:31 -05:00
return sock_has_perm ( sock - > sk , SOCKET__GETATTR ) ;
2005-04-16 15:20:36 -07:00
}
2008-04-17 13:17:49 -04:00
static int selinux_socket_setsockopt ( struct socket * sock , int level , int optname )
2005-04-16 15:20:36 -07:00
{
2006-10-30 15:22:15 -08:00
int err ;
2017-01-09 10:07:31 -05:00
err = sock_has_perm ( sock - > sk , SOCKET__SETOPT ) ;
2006-10-30 15:22:15 -08:00
if ( err )
return err ;
return selinux_netlbl_socket_setsockopt ( sock , level , optname ) ;
2005-04-16 15:20:36 -07:00
}
static int selinux_socket_getsockopt ( struct socket * sock , int level ,
int optname )
{
2017-01-09 10:07:31 -05:00
return sock_has_perm ( sock - > sk , SOCKET__GETOPT ) ;
2005-04-16 15:20:36 -07:00
}
static int selinux_socket_shutdown ( struct socket * sock , int how )
{
2017-01-09 10:07:31 -05:00
return sock_has_perm ( sock - > sk , SOCKET__SHUTDOWN ) ;
2005-04-16 15:20:36 -07:00
}
2011-01-05 15:38:53 -08:00
static int selinux_socket_unix_stream_connect ( struct sock * sock ,
struct sock * other ,
2005-04-16 15:20:36 -07:00
struct sock * newsk )
{
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec_sock = selinux_sock ( sock ) ;
struct sk_security_struct * sksec_other = selinux_sock ( other ) ;
struct sk_security_struct * sksec_new = selinux_sock ( newsk ) ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2023-07-19 13:37:49 +02:00
struct lsm_network_audit net ;
2005-04-16 15:20:36 -07:00
int err ;
2023-07-19 13:37:49 +02:00
ad_net_init_from_sk ( & ad , & net , other ) ;
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
err = avc_has_perm ( sksec_sock - > sid , sksec_other - > sid ,
2010-04-22 14:46:18 -04:00
sksec_other - > sclass ,
2005-04-16 15:20:36 -07:00
UNIX_STREAM_SOCKET__CONNECTTO , & ad ) ;
if ( err )
return err ;
/* server child socket */
2010-04-22 14:46:18 -04:00
sksec_new - > peer_sid = sksec_sock - > sid ;
2023-03-09 13:30:37 -05:00
err = security_sid_mls_copy ( sksec_other - > sid ,
2018-03-01 18:48:02 -05:00
sksec_sock - > sid , & sksec_new - > sid ) ;
2010-04-22 14:46:18 -04:00
if ( err )
return err ;
2006-07-24 23:32:50 -07:00
2010-04-22 14:46:18 -04:00
/* connecting socket */
sksec_sock - > peer_sid = sksec_new - > sid ;
return 0 ;
2005-04-16 15:20:36 -07:00
}
static int selinux_socket_unix_may_send ( struct socket * sock ,
struct socket * other )
{
2024-07-10 14:32:25 -07:00
struct sk_security_struct * ssec = selinux_sock ( sock - > sk ) ;
struct sk_security_struct * osec = selinux_sock ( other - > sk ) ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2023-07-19 13:37:49 +02:00
struct lsm_network_audit net ;
2005-04-16 15:20:36 -07:00
2023-07-19 13:37:49 +02:00
ad_net_init_from_sk ( & ad , & net , other - > sk ) ;
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
return avc_has_perm ( ssec - > sid , osec - > sid , osec - > sclass , SOCKET__SENDTO ,
2010-04-22 14:46:19 -04:00
& ad ) ;
2005-04-16 15:20:36 -07:00
}
2014-09-10 17:09:57 -04:00
static int selinux_inet_sys_rcv_skb ( struct net * ns , int ifindex ,
char * addrp , u16 family , u32 peer_sid ,
2009-07-14 12:14:09 -04:00
struct common_audit_data * ad )
2008-01-29 08:49:27 -05:00
{
int err ;
u32 if_sid ;
u32 node_sid ;
2014-09-10 17:09:57 -04:00
err = sel_netif_sid ( ns , ifindex , & if_sid ) ;
2008-01-29 08:49:27 -05:00
if ( err )
return err ;
2023-03-09 13:30:37 -05:00
err = avc_has_perm ( peer_sid , if_sid ,
2008-01-29 08:49:27 -05:00
SECCLASS_NETIF , NETIF__INGRESS , ad ) ;
if ( err )
return err ;
err = sel_netnode_sid ( addrp , family , & node_sid ) ;
if ( err )
return err ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( peer_sid , node_sid ,
2008-01-29 08:49:27 -05:00
SECCLASS_NODE , NODE__RECVFROM , ad ) ;
}
2008-01-29 08:38:23 -05:00
static int selinux_sock_rcv_skb_compat ( struct sock * sk , struct sk_buff * skb ,
2008-10-10 10:16:30 -04:00
u16 family )
2008-01-29 08:38:23 -05:00
{
2008-12-31 12:54:11 -05:00
int err = 0 ;
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( sk ) ;
2008-01-29 08:38:23 -05:00
u32 sk_sid = sksec - > sid ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2023-07-19 13:37:49 +02:00
struct lsm_network_audit net ;
2008-10-10 10:16:30 -04:00
char * addrp ;
2023-07-19 13:37:49 +02:00
ad_net_init_from_iif ( & ad , & net , skb - > skb_iif , family ) ;
2008-10-10 10:16:30 -04:00
err = selinux_parse_skb ( skb , & ad , & addrp , 1 , NULL ) ;
if ( err )
return err ;
2005-04-16 15:20:36 -07:00
2009-03-27 17:10:41 -04:00
if ( selinux_secmark_enabled ( ) ) {
2023-03-09 13:30:37 -05:00
err = avc_has_perm ( sk_sid , skb - > secmark , SECCLASS_PACKET ,
2008-10-10 10:16:30 -04:00
PACKET__RECV , & ad ) ;
2009-03-27 17:10:41 -04:00
if ( err )
return err ;
}
2008-01-29 08:38:23 -05:00
2011-02-23 12:55:21 +01:00
err = selinux_netlbl_sock_rcv_skb ( sksec , skb , family , & ad ) ;
if ( err )
return err ;
err = selinux_xfrm_sock_rcv_skb ( sksec - > sid , skb , & ad ) ;
[LSM-IPSec]: Per-packet access control.
This patch series implements per packet access control via the
extension of the Linux Security Modules (LSM) interface by hooks in
the XFRM and pfkey subsystems that leverage IPSec security
associations to label packets. Extensions to the SELinux LSM are
included that leverage the patch for this purpose.
This patch implements the changes necessary to the SELinux LSM to
create, deallocate, and use security contexts for policies
(xfrm_policy) and security associations (xfrm_state) that enable
control of a socket's ability to send and receive packets.
Patch purpose:
The patch is designed to enable the SELinux LSM to implement access
control on individual packets based on the strongly authenticated
IPSec security association. Such access controls augment the existing
ones in SELinux based on network interface and IP address. The former
are very coarse-grained, and the latter can be spoofed. By using
IPSec, the SELinux can control access to remote hosts based on
cryptographic keys generated using the IPSec mechanism. This enables
access control on a per-machine basis or per-application if the remote
machine is running the same mechanism and trusted to enforce the
access control policy.
Patch design approach:
The patch's main function is to authorize a socket's access to a IPSec
policy based on their security contexts. Since the communication is
implemented by a security association, the patch ensures that the
security association's negotiated and used have the same security
context. The patch enables allocation and deallocation of such
security contexts for policies and security associations. It also
enables copying of the security context when policies are cloned.
Lastly, the patch ensures that packets that are sent without using a
IPSec security assocation with a security context are allowed to be
sent in that manner.
A presentation available at
www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf
from the SELinux symposium describes the overall approach.
Patch implementation details:
The function which authorizes a socket to perform a requested
operation (send/receive) on a IPSec policy (xfrm_policy) is
selinux_xfrm_policy_lookup. The Netfilter and rcv_skb hooks ensure
that if a IPSec SA with a securit y association has not been used,
then the socket is allowed to send or receive the packet,
respectively.
The patch implements SELinux function for allocating security contexts
when policies (xfrm_policy) are created via the pfkey or xfrm_user
interfaces via selinux_xfrm_policy_alloc. When a security association
is built, SELinux allocates the security context designated by the
XFRM subsystem which is based on that of the authorized policy via
selinux_xfrm_state_alloc.
When a xfrm_policy is cloned, the security context of that policy, if
any, is copied to the clone via selinux_xfrm_policy_clone.
When a xfrm_policy or xfrm_state is freed, its security context, if
any is also freed at selinux_xfrm_policy_free or
selinux_xfrm_state_free.
Testing:
The SELinux authorization function is tested using ipsec-tools. We
created policies and security associations with particular security
contexts and added SELinux access control policy entries to verify the
authorization decision. We also made sure that packets for which no
security context was supplied (which either did or did not use
security associations) were authorized using an unlabelled context.
Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-13 23:12:40 -08:00
[SECMARK]: Add new packet controls to SELinux
Add new per-packet access controls to SELinux, replacing the old
packet controls.
Packets are labeled with the iptables SECMARK and CONNSECMARK targets,
then security policy for the packets is enforced with these controls.
To allow for a smooth transition to the new controls, the old code is
still present, but not active by default. To restore previous
behavior, the old controls may be activated at runtime by writing a
'1' to /selinux/compat_net, and also via the kernel boot parameter
selinux_compat_net. Switching between the network control models
requires the security load_policy permission. The old controls will
probably eventually be removed and any continued use is discouraged.
With this patch, the new secmark controls for SElinux are disabled by
default, so existing behavior is entirely preserved, and the user is
not affected at all.
It also provides a config option to enable the secmark controls by
default (which can always be overridden at boot and runtime). It is
also noted in the kconfig help that the user will need updated
userspace if enabling secmark controls for SELinux and that they'll
probably need the SECMARK and CONNMARK targets, and conntrack protocol
helpers, although such decisions are beyond the scope of kernel
configuration.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 00:33:33 -07:00
return err ;
}
static int selinux_socket_sock_rcv_skb ( struct sock * sk , struct sk_buff * skb )
{
2023-07-06 15:23:27 +02:00
int err , peerlbl_active , secmark_active ;
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( sk ) ;
2008-01-29 08:38:23 -05:00
u16 family = sk - > sk_family ;
u32 sk_sid = sksec - > sid ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2023-07-19 13:37:49 +02:00
struct lsm_network_audit net ;
2008-01-29 08:38:23 -05:00
char * addrp ;
[SECMARK]: Add new packet controls to SELinux
Add new per-packet access controls to SELinux, replacing the old
packet controls.
Packets are labeled with the iptables SECMARK and CONNSECMARK targets,
then security policy for the packets is enforced with these controls.
To allow for a smooth transition to the new controls, the old code is
still present, but not active by default. To restore previous
behavior, the old controls may be activated at runtime by writing a
'1' to /selinux/compat_net, and also via the kernel boot parameter
selinux_compat_net. Switching between the network control models
requires the security load_policy permission. The old controls will
probably eventually be removed and any continued use is discouraged.
With this patch, the new secmark controls for SElinux are disabled by
default, so existing behavior is entirely preserved, and the user is
not affected at all.
It also provides a config option to enable the secmark controls by
default (which can always be overridden at boot and runtime). It is
also noted in the kconfig help that the user will need updated
userspace if enabling secmark controls for SELinux and that they'll
probably need the SECMARK and CONNMARK targets, and conntrack protocol
helpers, although such decisions are beyond the scope of kernel
configuration.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 00:33:33 -07:00
if ( family ! = PF_INET & & family ! = PF_INET6 )
2008-01-29 08:38:23 -05:00
return 0 ;
[SECMARK]: Add new packet controls to SELinux
Add new per-packet access controls to SELinux, replacing the old
packet controls.
Packets are labeled with the iptables SECMARK and CONNSECMARK targets,
then security policy for the packets is enforced with these controls.
To allow for a smooth transition to the new controls, the old code is
still present, but not active by default. To restore previous
behavior, the old controls may be activated at runtime by writing a
'1' to /selinux/compat_net, and also via the kernel boot parameter
selinux_compat_net. Switching between the network control models
requires the security load_policy permission. The old controls will
probably eventually be removed and any continued use is discouraged.
With this patch, the new secmark controls for SElinux are disabled by
default, so existing behavior is entirely preserved, and the user is
not affected at all.
It also provides a config option to enable the secmark controls by
default (which can always be overridden at boot and runtime). It is
also noted in the kconfig help that the user will need updated
userspace if enabling secmark controls for SELinux and that they'll
probably need the SECMARK and CONNMARK targets, and conntrack protocol
helpers, although such decisions are beyond the scope of kernel
configuration.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 00:33:33 -07:00
/* Handle mapped IPv4 packets arriving via IPv6 sockets */
2006-12-04 22:00:55 +00:00
if ( family = = PF_INET6 & & skb - > protocol = = htons ( ETH_P_IP ) )
[SECMARK]: Add new packet controls to SELinux
Add new per-packet access controls to SELinux, replacing the old
packet controls.
Packets are labeled with the iptables SECMARK and CONNSECMARK targets,
then security policy for the packets is enforced with these controls.
To allow for a smooth transition to the new controls, the old code is
still present, but not active by default. To restore previous
behavior, the old controls may be activated at runtime by writing a
'1' to /selinux/compat_net, and also via the kernel boot parameter
selinux_compat_net. Switching between the network control models
requires the security load_policy permission. The old controls will
probably eventually be removed and any continued use is discouraged.
With this patch, the new secmark controls for SElinux are disabled by
default, so existing behavior is entirely preserved, and the user is
not affected at all.
It also provides a config option to enable the secmark controls by
default (which can always be overridden at boot and runtime). It is
also noted in the kconfig help that the user will need updated
userspace if enabling secmark controls for SELinux and that they'll
probably need the SECMARK and CONNMARK targets, and conntrack protocol
helpers, although such decisions are beyond the scope of kernel
configuration.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 00:33:33 -07:00
family = PF_INET ;
2008-10-10 10:16:30 -04:00
/* If any sort of compatibility mode is enabled then handoff processing
* to the selinux_sock_rcv_skb_compat ( ) function to deal with the
* special handling . We do this in an attempt to keep this function
* as fast and as clean as possible . */
2018-03-01 18:48:02 -05:00
if ( ! selinux_policycap_netpeer ( ) )
2008-10-10 10:16:30 -04:00
return selinux_sock_rcv_skb_compat ( sk , skb , family ) ;
secmark_active = selinux_secmark_enabled ( ) ;
2013-05-03 09:05:39 -04:00
peerlbl_active = selinux_peerlbl_enabled ( ) ;
2008-10-10 10:16:30 -04:00
if ( ! secmark_active & & ! peerlbl_active )
return 0 ;
2023-07-19 13:37:49 +02:00
ad_net_init_from_iif ( & ad , & net , skb - > skb_iif , family ) ;
2008-01-29 08:38:13 -05:00
err = selinux_parse_skb ( skb , & ad , & addrp , 1 , NULL ) ;
[SECMARK]: Add new packet controls to SELinux
Add new per-packet access controls to SELinux, replacing the old
packet controls.
Packets are labeled with the iptables SECMARK and CONNSECMARK targets,
then security policy for the packets is enforced with these controls.
To allow for a smooth transition to the new controls, the old code is
still present, but not active by default. To restore previous
behavior, the old controls may be activated at runtime by writing a
'1' to /selinux/compat_net, and also via the kernel boot parameter
selinux_compat_net. Switching between the network control models
requires the security load_policy permission. The old controls will
probably eventually be removed and any continued use is discouraged.
With this patch, the new secmark controls for SElinux are disabled by
default, so existing behavior is entirely preserved, and the user is
not affected at all.
It also provides a config option to enable the secmark controls by
default (which can always be overridden at boot and runtime). It is
also noted in the kconfig help that the user will need updated
userspace if enabling secmark controls for SELinux and that they'll
probably need the SECMARK and CONNMARK targets, and conntrack protocol
helpers, although such decisions are beyond the scope of kernel
configuration.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 00:33:33 -07:00
if ( err )
2008-01-29 08:38:23 -05:00
return err ;
[SECMARK]: Add new packet controls to SELinux
Add new per-packet access controls to SELinux, replacing the old
packet controls.
Packets are labeled with the iptables SECMARK and CONNSECMARK targets,
then security policy for the packets is enforced with these controls.
To allow for a smooth transition to the new controls, the old code is
still present, but not active by default. To restore previous
behavior, the old controls may be activated at runtime by writing a
'1' to /selinux/compat_net, and also via the kernel boot parameter
selinux_compat_net. Switching between the network control models
requires the security load_policy permission. The old controls will
probably eventually be removed and any continued use is discouraged.
With this patch, the new secmark controls for SElinux are disabled by
default, so existing behavior is entirely preserved, and the user is
not affected at all.
It also provides a config option to enable the secmark controls by
default (which can always be overridden at boot and runtime). It is
also noted in the kconfig help that the user will need updated
userspace if enabling secmark controls for SELinux and that they'll
probably need the SECMARK and CONNMARK targets, and conntrack protocol
helpers, although such decisions are beyond the scope of kernel
configuration.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 00:33:33 -07:00
2008-10-10 10:16:30 -04:00
if ( peerlbl_active ) {
2008-01-29 08:43:36 -05:00
u32 peer_sid ;
err = selinux_skb_peerlbl_sid ( skb , family , & peer_sid ) ;
2008-01-29 08:49:27 -05:00
if ( err )
return err ;
2014-09-10 17:09:57 -04:00
err = selinux_inet_sys_rcv_skb ( sock_net ( sk ) , skb - > skb_iif ,
addrp , family , peer_sid , & ad ) ;
2008-10-10 10:16:31 -04:00
if ( err ) {
2016-06-27 15:06:16 -04:00
selinux_netlbl_err ( skb , family , err , 0 ) ;
2008-01-29 08:43:36 -05:00
return err ;
2008-10-10 10:16:31 -04:00
}
2023-03-09 13:30:37 -05:00
err = avc_has_perm ( sk_sid , peer_sid , SECCLASS_PEER ,
2008-01-29 08:43:36 -05:00
PEER__RECV , & ad ) ;
2013-12-23 17:45:01 -05:00
if ( err ) {
2016-06-27 15:06:16 -04:00
selinux_netlbl_err ( skb , family , err , 0 ) ;
2013-12-23 17:45:01 -05:00
return err ;
}
2008-01-29 08:43:36 -05:00
}
2008-10-10 10:16:30 -04:00
if ( secmark_active ) {
2023-03-09 13:30:37 -05:00
err = avc_has_perm ( sk_sid , skb - > secmark , SECCLASS_PACKET ,
2008-01-29 08:49:27 -05:00
PACKET__RECV , & ad ) ;
if ( err )
return err ;
}
2008-01-29 08:43:36 -05:00
return err ;
2005-04-16 15:20:36 -07:00
}
2022-10-10 12:31:21 -04:00
static int selinux_socket_getpeersec_stream ( struct socket * sock ,
sockptr_t optval , sockptr_t optlen ,
unsigned int len )
2005-04-16 15:20:36 -07:00
{
int err = 0 ;
2022-10-10 12:31:21 -04:00
char * scontext = NULL ;
2005-04-16 15:20:36 -07:00
u32 scontext_len ;
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( sock - > sk ) ;
2006-11-17 17:38:54 -05:00
u32 peer_sid = SECSID_NULL ;
2005-04-16 15:20:36 -07:00
2010-04-22 14:46:19 -04:00
if ( sksec - > sclass = = SECCLASS_UNIX_STREAM_SOCKET | |
2018-02-13 20:57:18 +00:00
sksec - > sclass = = SECCLASS_TCP_SOCKET | |
sksec - > sclass = = SECCLASS_SCTP_SOCKET )
2010-04-07 15:08:46 -04:00
peer_sid = sksec - > peer_sid ;
2010-04-22 14:46:19 -04:00
if ( peer_sid = = SECSID_NULL )
return - ENOPROTOOPT ;
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
err = security_sid_to_context ( peer_sid , & scontext ,
2018-03-01 18:48:02 -05:00
& scontext_len ) ;
2005-04-16 15:20:36 -07:00
if ( err )
2010-04-22 14:46:19 -04:00
return err ;
2005-04-16 15:20:36 -07:00
if ( scontext_len > len ) {
err = - ERANGE ;
goto out_len ;
}
2022-10-10 12:31:21 -04:00
if ( copy_to_sockptr ( optval , scontext , scontext_len ) )
2005-04-16 15:20:36 -07:00
err = - EFAULT ;
out_len :
2022-10-10 12:31:21 -04:00
if ( copy_to_sockptr ( optlen , & scontext_len , sizeof ( scontext_len ) ) )
2005-04-16 15:20:36 -07:00
err = - EFAULT ;
kfree ( scontext ) ;
return err ;
}
2024-01-30 16:16:29 -05:00
static int selinux_socket_getpeersec_dgram ( struct socket * sock ,
struct sk_buff * skb , u32 * secid )
[SECURITY]: TCP/UDP getpeersec
This patch implements an application of the LSM-IPSec networking
controls whereby an application can determine the label of the
security association its TCP or UDP sockets are currently connected to
via getsockopt and the auxiliary data mechanism of recvmsg.
Patch purpose:
This patch enables a security-aware application to retrieve the
security context of an IPSec security association a particular TCP or
UDP socket is using. The application can then use this security
context to determine the security context for processing on behalf of
the peer at the other end of this connection. In the case of UDP, the
security context is for each individual packet. An example
application is the inetd daemon, which could be modified to start
daemons running at security contexts dependent on the remote client.
Patch design approach:
- Design for TCP
The patch enables the SELinux LSM to set the peer security context for
a socket based on the security context of the IPSec security
association. The application may retrieve this context using
getsockopt. When called, the kernel determines if the socket is a
connected (TCP_ESTABLISHED) TCP socket and, if so, uses the dst_entry
cache on the socket to retrieve the security associations. If a
security association has a security context, the context string is
returned, as for UNIX domain sockets.
- Design for UDP
Unlike TCP, UDP is connectionless. This requires a somewhat different
API to retrieve the peer security context. With TCP, the peer
security context stays the same throughout the connection, thus it can
be retrieved at any time between when the connection is established
and when it is torn down. With UDP, each read/write can have
different peer and thus the security context might change every time.
As a result the security context retrieval must be done TOGETHER with
the packet retrieval.
The solution is to build upon the existing Unix domain socket API for
retrieving user credentials. Linux offers the API for obtaining user
credentials via ancillary messages (i.e., out of band/control messages
that are bundled together with a normal message).
Patch implementation details:
- Implementation for TCP
The security context can be retrieved by applications using getsockopt
with the existing SO_PEERSEC flag. As an example (ignoring error
checking):
getsockopt(sockfd, SOL_SOCKET, SO_PEERSEC, optbuf, &optlen);
printf("Socket peer context is: %s\n", optbuf);
The SELinux function, selinux_socket_getpeersec, is extended to check
for labeled security associations for connected (TCP_ESTABLISHED ==
sk->sk_state) TCP sockets only. If so, the socket has a dst_cache of
struct dst_entry values that may refer to security associations. If
these have security associations with security contexts, the security
context is returned.
getsockopt returns a buffer that contains a security context string or
the buffer is unmodified.
- Implementation for UDP
To retrieve the security context, the application first indicates to
the kernel such desire by setting the IP_PASSSEC option via
getsockopt. Then the application retrieves the security context using
the auxiliary data mechanism.
An example server application for UDP should look like this:
toggle = 1;
toggle_len = sizeof(toggle);
setsockopt(sockfd, SOL_IP, IP_PASSSEC, &toggle, &toggle_len);
recvmsg(sockfd, &msg_hdr, 0);
if (msg_hdr.msg_controllen > sizeof(struct cmsghdr)) {
cmsg_hdr = CMSG_FIRSTHDR(&msg_hdr);
if (cmsg_hdr->cmsg_len <= CMSG_LEN(sizeof(scontext)) &&
cmsg_hdr->cmsg_level == SOL_IP &&
cmsg_hdr->cmsg_type == SCM_SECURITY) {
memcpy(&scontext, CMSG_DATA(cmsg_hdr), sizeof(scontext));
}
}
ip_setsockopt is enhanced with a new socket option IP_PASSSEC to allow
a server socket to receive security context of the peer. A new
ancillary message type SCM_SECURITY.
When the packet is received we get the security context from the
sec_path pointer which is contained in the sk_buff, and copy it to the
ancillary message space. An additional LSM hook,
selinux_socket_getpeersec_udp, is defined to retrieve the security
context from the SELinux space. The existing function,
selinux_socket_getpeersec does not suit our purpose, because the
security context is copied directly to user space, rather than to
kernel space.
Testing:
We have tested the patch by setting up TCP and UDP connections between
applications on two machines using the IPSec policies that result in
labeled security associations being built. For TCP, we can then
extract the peer security context using getsockopt on either end. For
UDP, the receiving end can retrieve the security context using the
auxiliary data mechanism of recvmsg.
Signed-off-by: Catherine Zhang <cxzhang@watson.ibm.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-03-20 22:41:23 -08:00
{
2006-08-02 14:12:06 -07:00
u32 peer_secid = SECSID_NULL ;
2008-01-29 08:38:04 -05:00
u16 family ;
[AF_UNIX]: Datagram getpeersec
This patch implements an API whereby an application can determine the
label of its peer's Unix datagram sockets via the auxiliary data mechanism of
recvmsg.
Patch purpose:
This patch enables a security-aware application to retrieve the
security context of the peer of a Unix datagram socket. The application
can then use this security context to determine the security context for
processing on behalf of the peer who sent the packet.
Patch design and implementation:
The design and implementation is very similar to the UDP case for INET
sockets. Basically we build upon the existing Unix domain socket API for
retrieving user credentials. Linux offers the API for obtaining user
credentials via ancillary messages (i.e., out of band/control messages
that are bundled together with a normal message). To retrieve the security
context, the application first indicates to the kernel such desire by
setting the SO_PASSSEC option via getsockopt. Then the application
retrieves the security context using the auxiliary data mechanism.
An example server application for Unix datagram socket should look like this:
toggle = 1;
toggle_len = sizeof(toggle);
setsockopt(sockfd, SOL_SOCKET, SO_PASSSEC, &toggle, &toggle_len);
recvmsg(sockfd, &msg_hdr, 0);
if (msg_hdr.msg_controllen > sizeof(struct cmsghdr)) {
cmsg_hdr = CMSG_FIRSTHDR(&msg_hdr);
if (cmsg_hdr->cmsg_len <= CMSG_LEN(sizeof(scontext)) &&
cmsg_hdr->cmsg_level == SOL_SOCKET &&
cmsg_hdr->cmsg_type == SCM_SECURITY) {
memcpy(&scontext, CMSG_DATA(cmsg_hdr), sizeof(scontext));
}
}
sock_setsockopt is enhanced with a new socket option SOCK_PASSSEC to allow
a server socket to receive security context of the peer.
Testing:
We have tested the patch by setting up Unix datagram client and server
applications. We verified that the server can retrieve the security context
using the auxiliary data mechanism of recvmsg.
Signed-off-by: Catherine Zhang <cxzhang@watson.ibm.com>
Acked-by: Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-29 12:27:47 -07:00
2008-10-10 10:16:29 -04:00
if ( skb & & skb - > protocol = = htons ( ETH_P_IP ) )
family = PF_INET ;
else if ( skb & & skb - > protocol = = htons ( ETH_P_IPV6 ) )
family = PF_INET6 ;
else if ( sock )
2008-01-29 08:38:04 -05:00
family = sock - > sk - > sk_family ;
2024-01-30 16:16:29 -05:00
else {
* secid = SECSID_NULL ;
return - EINVAL ;
}
2008-01-29 08:38:04 -05:00
2016-03-28 15:19:10 -04:00
if ( sock & & family = = PF_UNIX ) {
2024-01-30 16:16:29 -05:00
struct inode_security_struct * isec ;
2016-03-28 15:19:10 -04:00
isec = inode_security_novalidate ( SOCK_INODE ( sock ) ) ;
peer_secid = isec - > sid ;
} else if ( skb )
2008-01-29 08:38:23 -05:00
selinux_skb_peerlbl_sid ( skb , family , & peer_secid ) ;
[SECURITY]: TCP/UDP getpeersec
This patch implements an application of the LSM-IPSec networking
controls whereby an application can determine the label of the
security association its TCP or UDP sockets are currently connected to
via getsockopt and the auxiliary data mechanism of recvmsg.
Patch purpose:
This patch enables a security-aware application to retrieve the
security context of an IPSec security association a particular TCP or
UDP socket is using. The application can then use this security
context to determine the security context for processing on behalf of
the peer at the other end of this connection. In the case of UDP, the
security context is for each individual packet. An example
application is the inetd daemon, which could be modified to start
daemons running at security contexts dependent on the remote client.
Patch design approach:
- Design for TCP
The patch enables the SELinux LSM to set the peer security context for
a socket based on the security context of the IPSec security
association. The application may retrieve this context using
getsockopt. When called, the kernel determines if the socket is a
connected (TCP_ESTABLISHED) TCP socket and, if so, uses the dst_entry
cache on the socket to retrieve the security associations. If a
security association has a security context, the context string is
returned, as for UNIX domain sockets.
- Design for UDP
Unlike TCP, UDP is connectionless. This requires a somewhat different
API to retrieve the peer security context. With TCP, the peer
security context stays the same throughout the connection, thus it can
be retrieved at any time between when the connection is established
and when it is torn down. With UDP, each read/write can have
different peer and thus the security context might change every time.
As a result the security context retrieval must be done TOGETHER with
the packet retrieval.
The solution is to build upon the existing Unix domain socket API for
retrieving user credentials. Linux offers the API for obtaining user
credentials via ancillary messages (i.e., out of band/control messages
that are bundled together with a normal message).
Patch implementation details:
- Implementation for TCP
The security context can be retrieved by applications using getsockopt
with the existing SO_PEERSEC flag. As an example (ignoring error
checking):
getsockopt(sockfd, SOL_SOCKET, SO_PEERSEC, optbuf, &optlen);
printf("Socket peer context is: %s\n", optbuf);
The SELinux function, selinux_socket_getpeersec, is extended to check
for labeled security associations for connected (TCP_ESTABLISHED ==
sk->sk_state) TCP sockets only. If so, the socket has a dst_cache of
struct dst_entry values that may refer to security associations. If
these have security associations with security contexts, the security
context is returned.
getsockopt returns a buffer that contains a security context string or
the buffer is unmodified.
- Implementation for UDP
To retrieve the security context, the application first indicates to
the kernel such desire by setting the IP_PASSSEC option via
getsockopt. Then the application retrieves the security context using
the auxiliary data mechanism.
An example server application for UDP should look like this:
toggle = 1;
toggle_len = sizeof(toggle);
setsockopt(sockfd, SOL_IP, IP_PASSSEC, &toggle, &toggle_len);
recvmsg(sockfd, &msg_hdr, 0);
if (msg_hdr.msg_controllen > sizeof(struct cmsghdr)) {
cmsg_hdr = CMSG_FIRSTHDR(&msg_hdr);
if (cmsg_hdr->cmsg_len <= CMSG_LEN(sizeof(scontext)) &&
cmsg_hdr->cmsg_level == SOL_IP &&
cmsg_hdr->cmsg_type == SCM_SECURITY) {
memcpy(&scontext, CMSG_DATA(cmsg_hdr), sizeof(scontext));
}
}
ip_setsockopt is enhanced with a new socket option IP_PASSSEC to allow
a server socket to receive security context of the peer. A new
ancillary message type SCM_SECURITY.
When the packet is received we get the security context from the
sec_path pointer which is contained in the sk_buff, and copy it to the
ancillary message space. An additional LSM hook,
selinux_socket_getpeersec_udp, is defined to retrieve the security
context from the SELinux space. The existing function,
selinux_socket_getpeersec does not suit our purpose, because the
security context is copied directly to user space, rather than to
kernel space.
Testing:
We have tested the patch by setting up TCP and UDP connections between
applications on two machines using the IPSec policies that result in
labeled security associations being built. For TCP, we can then
extract the peer security context using getsockopt on either end. For
UDP, the receiving end can retrieve the security context using the
auxiliary data mechanism of recvmsg.
Signed-off-by: Catherine Zhang <cxzhang@watson.ibm.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-03-20 22:41:23 -08:00
2006-08-02 14:12:06 -07:00
* secid = peer_secid ;
2008-01-29 08:38:04 -05:00
if ( peer_secid = = SECSID_NULL )
2024-01-30 16:16:29 -05:00
return - ENOPROTOOPT ;
2008-01-29 08:38:04 -05:00
return 0 ;
[SECURITY]: TCP/UDP getpeersec
This patch implements an application of the LSM-IPSec networking
controls whereby an application can determine the label of the
security association its TCP or UDP sockets are currently connected to
via getsockopt and the auxiliary data mechanism of recvmsg.
Patch purpose:
This patch enables a security-aware application to retrieve the
security context of an IPSec security association a particular TCP or
UDP socket is using. The application can then use this security
context to determine the security context for processing on behalf of
the peer at the other end of this connection. In the case of UDP, the
security context is for each individual packet. An example
application is the inetd daemon, which could be modified to start
daemons running at security contexts dependent on the remote client.
Patch design approach:
- Design for TCP
The patch enables the SELinux LSM to set the peer security context for
a socket based on the security context of the IPSec security
association. The application may retrieve this context using
getsockopt. When called, the kernel determines if the socket is a
connected (TCP_ESTABLISHED) TCP socket and, if so, uses the dst_entry
cache on the socket to retrieve the security associations. If a
security association has a security context, the context string is
returned, as for UNIX domain sockets.
- Design for UDP
Unlike TCP, UDP is connectionless. This requires a somewhat different
API to retrieve the peer security context. With TCP, the peer
security context stays the same throughout the connection, thus it can
be retrieved at any time between when the connection is established
and when it is torn down. With UDP, each read/write can have
different peer and thus the security context might change every time.
As a result the security context retrieval must be done TOGETHER with
the packet retrieval.
The solution is to build upon the existing Unix domain socket API for
retrieving user credentials. Linux offers the API for obtaining user
credentials via ancillary messages (i.e., out of band/control messages
that are bundled together with a normal message).
Patch implementation details:
- Implementation for TCP
The security context can be retrieved by applications using getsockopt
with the existing SO_PEERSEC flag. As an example (ignoring error
checking):
getsockopt(sockfd, SOL_SOCKET, SO_PEERSEC, optbuf, &optlen);
printf("Socket peer context is: %s\n", optbuf);
The SELinux function, selinux_socket_getpeersec, is extended to check
for labeled security associations for connected (TCP_ESTABLISHED ==
sk->sk_state) TCP sockets only. If so, the socket has a dst_cache of
struct dst_entry values that may refer to security associations. If
these have security associations with security contexts, the security
context is returned.
getsockopt returns a buffer that contains a security context string or
the buffer is unmodified.
- Implementation for UDP
To retrieve the security context, the application first indicates to
the kernel such desire by setting the IP_PASSSEC option via
getsockopt. Then the application retrieves the security context using
the auxiliary data mechanism.
An example server application for UDP should look like this:
toggle = 1;
toggle_len = sizeof(toggle);
setsockopt(sockfd, SOL_IP, IP_PASSSEC, &toggle, &toggle_len);
recvmsg(sockfd, &msg_hdr, 0);
if (msg_hdr.msg_controllen > sizeof(struct cmsghdr)) {
cmsg_hdr = CMSG_FIRSTHDR(&msg_hdr);
if (cmsg_hdr->cmsg_len <= CMSG_LEN(sizeof(scontext)) &&
cmsg_hdr->cmsg_level == SOL_IP &&
cmsg_hdr->cmsg_type == SCM_SECURITY) {
memcpy(&scontext, CMSG_DATA(cmsg_hdr), sizeof(scontext));
}
}
ip_setsockopt is enhanced with a new socket option IP_PASSSEC to allow
a server socket to receive security context of the peer. A new
ancillary message type SCM_SECURITY.
When the packet is received we get the security context from the
sec_path pointer which is contained in the sk_buff, and copy it to the
ancillary message space. An additional LSM hook,
selinux_socket_getpeersec_udp, is defined to retrieve the security
context from the SELinux space. The existing function,
selinux_socket_getpeersec does not suit our purpose, because the
security context is copied directly to user space, rather than to
kernel space.
Testing:
We have tested the patch by setting up TCP and UDP connections between
applications on two machines using the IPSec policies that result in
labeled security associations being built. For TCP, we can then
extract the peer security context using getsockopt on either end. For
UDP, the receiving end can retrieve the security context using the
auxiliary data mechanism of recvmsg.
Signed-off-by: Catherine Zhang <cxzhang@watson.ibm.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-03-20 22:41:23 -08:00
}
2005-10-21 03:20:43 -04:00
static int selinux_sk_alloc_security ( struct sock * sk , int family , gfp_t priority )
2005-04-16 15:20:36 -07:00
{
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( sk ) ;
2010-04-22 14:46:18 -04:00
sksec - > peer_sid = SECINITSID_UNLABELED ;
sksec - > sid = SECINITSID_UNLABELED ;
2015-07-10 17:19:57 -04:00
sksec - > sclass = SECCLASS_SOCKET ;
2010-04-22 14:46:18 -04:00
selinux_netlbl_sk_security_reset ( sksec ) ;
return 0 ;
2005-04-16 15:20:36 -07:00
}
static void selinux_sk_free_security ( struct sock * sk )
{
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( sk ) ;
2010-04-22 14:46:18 -04:00
selinux_netlbl_sk_security_free ( sksec ) ;
2005-04-16 15:20:36 -07:00
}
2006-08-04 23:08:56 -07:00
static void selinux_sk_clone_security ( const struct sock * sk , struct sock * newsk )
[LSM-IPSec]: Per-packet access control.
This patch series implements per packet access control via the
extension of the Linux Security Modules (LSM) interface by hooks in
the XFRM and pfkey subsystems that leverage IPSec security
associations to label packets. Extensions to the SELinux LSM are
included that leverage the patch for this purpose.
This patch implements the changes necessary to the SELinux LSM to
create, deallocate, and use security contexts for policies
(xfrm_policy) and security associations (xfrm_state) that enable
control of a socket's ability to send and receive packets.
Patch purpose:
The patch is designed to enable the SELinux LSM to implement access
control on individual packets based on the strongly authenticated
IPSec security association. Such access controls augment the existing
ones in SELinux based on network interface and IP address. The former
are very coarse-grained, and the latter can be spoofed. By using
IPSec, the SELinux can control access to remote hosts based on
cryptographic keys generated using the IPSec mechanism. This enables
access control on a per-machine basis or per-application if the remote
machine is running the same mechanism and trusted to enforce the
access control policy.
Patch design approach:
The patch's main function is to authorize a socket's access to a IPSec
policy based on their security contexts. Since the communication is
implemented by a security association, the patch ensures that the
security association's negotiated and used have the same security
context. The patch enables allocation and deallocation of such
security contexts for policies and security associations. It also
enables copying of the security context when policies are cloned.
Lastly, the patch ensures that packets that are sent without using a
IPSec security assocation with a security context are allowed to be
sent in that manner.
A presentation available at
www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf
from the SELinux symposium describes the overall approach.
Patch implementation details:
The function which authorizes a socket to perform a requested
operation (send/receive) on a IPSec policy (xfrm_policy) is
selinux_xfrm_policy_lookup. The Netfilter and rcv_skb hooks ensure
that if a IPSec SA with a securit y association has not been used,
then the socket is allowed to send or receive the packet,
respectively.
The patch implements SELinux function for allocating security contexts
when policies (xfrm_policy) are created via the pfkey or xfrm_user
interfaces via selinux_xfrm_policy_alloc. When a security association
is built, SELinux allocates the security context designated by the
XFRM subsystem which is based on that of the authorized policy via
selinux_xfrm_state_alloc.
When a xfrm_policy is cloned, the security context of that policy, if
any, is copied to the clone via selinux_xfrm_policy_clone.
When a xfrm_policy or xfrm_state is freed, its security context, if
any is also freed at selinux_xfrm_policy_free or
selinux_xfrm_state_free.
Testing:
The SELinux authorization function is tested using ipsec-tools. We
created policies and security associations with particular security
contexts and added SELinux access control policy entries to verify the
authorization decision. We also made sure that packets for which no
security context was supplied (which either did or did not use
security associations) were authorized using an unlabelled context.
Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-13 23:12:40 -08:00
{
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( sk ) ;
struct sk_security_struct * newsksec = selinux_sock ( newsk ) ;
[LSM-IPSec]: Per-packet access control.
This patch series implements per packet access control via the
extension of the Linux Security Modules (LSM) interface by hooks in
the XFRM and pfkey subsystems that leverage IPSec security
associations to label packets. Extensions to the SELinux LSM are
included that leverage the patch for this purpose.
This patch implements the changes necessary to the SELinux LSM to
create, deallocate, and use security contexts for policies
(xfrm_policy) and security associations (xfrm_state) that enable
control of a socket's ability to send and receive packets.
Patch purpose:
The patch is designed to enable the SELinux LSM to implement access
control on individual packets based on the strongly authenticated
IPSec security association. Such access controls augment the existing
ones in SELinux based on network interface and IP address. The former
are very coarse-grained, and the latter can be spoofed. By using
IPSec, the SELinux can control access to remote hosts based on
cryptographic keys generated using the IPSec mechanism. This enables
access control on a per-machine basis or per-application if the remote
machine is running the same mechanism and trusted to enforce the
access control policy.
Patch design approach:
The patch's main function is to authorize a socket's access to a IPSec
policy based on their security contexts. Since the communication is
implemented by a security association, the patch ensures that the
security association's negotiated and used have the same security
context. The patch enables allocation and deallocation of such
security contexts for policies and security associations. It also
enables copying of the security context when policies are cloned.
Lastly, the patch ensures that packets that are sent without using a
IPSec security assocation with a security context are allowed to be
sent in that manner.
A presentation available at
www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf
from the SELinux symposium describes the overall approach.
Patch implementation details:
The function which authorizes a socket to perform a requested
operation (send/receive) on a IPSec policy (xfrm_policy) is
selinux_xfrm_policy_lookup. The Netfilter and rcv_skb hooks ensure
that if a IPSec SA with a securit y association has not been used,
then the socket is allowed to send or receive the packet,
respectively.
The patch implements SELinux function for allocating security contexts
when policies (xfrm_policy) are created via the pfkey or xfrm_user
interfaces via selinux_xfrm_policy_alloc. When a security association
is built, SELinux allocates the security context designated by the
XFRM subsystem which is based on that of the authorized policy via
selinux_xfrm_state_alloc.
When a xfrm_policy is cloned, the security context of that policy, if
any, is copied to the clone via selinux_xfrm_policy_clone.
When a xfrm_policy or xfrm_state is freed, its security context, if
any is also freed at selinux_xfrm_policy_free or
selinux_xfrm_state_free.
Testing:
The SELinux authorization function is tested using ipsec-tools. We
created policies and security associations with particular security
contexts and added SELinux access control policy entries to verify the
authorization decision. We also made sure that packets for which no
security context was supplied (which either did or did not use
security associations) were authorized using an unlabelled context.
Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-13 23:12:40 -08:00
2010-04-07 15:08:46 -04:00
newsksec - > sid = sksec - > sid ;
newsksec - > peer_sid = sksec - > peer_sid ;
newsksec - > sclass = sksec - > sclass ;
2006-08-29 17:53:48 -07:00
2010-04-07 15:08:46 -04:00
selinux_netlbl_sk_security_reset ( newsksec ) ;
2006-08-04 23:08:56 -07:00
}
2023-07-11 15:06:08 +02:00
static void selinux_sk_getsecid ( const struct sock * sk , u32 * secid )
2006-08-04 23:08:56 -07:00
{
[LSM-IPSec]: Per-packet access control.
This patch series implements per packet access control via the
extension of the Linux Security Modules (LSM) interface by hooks in
the XFRM and pfkey subsystems that leverage IPSec security
associations to label packets. Extensions to the SELinux LSM are
included that leverage the patch for this purpose.
This patch implements the changes necessary to the SELinux LSM to
create, deallocate, and use security contexts for policies
(xfrm_policy) and security associations (xfrm_state) that enable
control of a socket's ability to send and receive packets.
Patch purpose:
The patch is designed to enable the SELinux LSM to implement access
control on individual packets based on the strongly authenticated
IPSec security association. Such access controls augment the existing
ones in SELinux based on network interface and IP address. The former
are very coarse-grained, and the latter can be spoofed. By using
IPSec, the SELinux can control access to remote hosts based on
cryptographic keys generated using the IPSec mechanism. This enables
access control on a per-machine basis or per-application if the remote
machine is running the same mechanism and trusted to enforce the
access control policy.
Patch design approach:
The patch's main function is to authorize a socket's access to a IPSec
policy based on their security contexts. Since the communication is
implemented by a security association, the patch ensures that the
security association's negotiated and used have the same security
context. The patch enables allocation and deallocation of such
security contexts for policies and security associations. It also
enables copying of the security context when policies are cloned.
Lastly, the patch ensures that packets that are sent without using a
IPSec security assocation with a security context are allowed to be
sent in that manner.
A presentation available at
www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf
from the SELinux symposium describes the overall approach.
Patch implementation details:
The function which authorizes a socket to perform a requested
operation (send/receive) on a IPSec policy (xfrm_policy) is
selinux_xfrm_policy_lookup. The Netfilter and rcv_skb hooks ensure
that if a IPSec SA with a securit y association has not been used,
then the socket is allowed to send or receive the packet,
respectively.
The patch implements SELinux function for allocating security contexts
when policies (xfrm_policy) are created via the pfkey or xfrm_user
interfaces via selinux_xfrm_policy_alloc. When a security association
is built, SELinux allocates the security context designated by the
XFRM subsystem which is based on that of the authorized policy via
selinux_xfrm_state_alloc.
When a xfrm_policy is cloned, the security context of that policy, if
any, is copied to the clone via selinux_xfrm_policy_clone.
When a xfrm_policy or xfrm_state is freed, its security context, if
any is also freed at selinux_xfrm_policy_free or
selinux_xfrm_state_free.
Testing:
The SELinux authorization function is tested using ipsec-tools. We
created policies and security associations with particular security
contexts and added SELinux access control policy entries to verify the
authorization decision. We also made sure that packets for which no
security context was supplied (which either did or did not use
security associations) were authorized using an unlabelled context.
Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-13 23:12:40 -08:00
if ( ! sk )
2006-08-04 23:12:42 -07:00
* secid = SECINITSID_ANY_SOCKET ;
2006-08-04 23:08:56 -07:00
else {
2024-07-10 14:32:25 -07:00
const struct sk_security_struct * sksec = selinux_sock ( sk ) ;
[LSM-IPSec]: Per-packet access control.
This patch series implements per packet access control via the
extension of the Linux Security Modules (LSM) interface by hooks in
the XFRM and pfkey subsystems that leverage IPSec security
associations to label packets. Extensions to the SELinux LSM are
included that leverage the patch for this purpose.
This patch implements the changes necessary to the SELinux LSM to
create, deallocate, and use security contexts for policies
(xfrm_policy) and security associations (xfrm_state) that enable
control of a socket's ability to send and receive packets.
Patch purpose:
The patch is designed to enable the SELinux LSM to implement access
control on individual packets based on the strongly authenticated
IPSec security association. Such access controls augment the existing
ones in SELinux based on network interface and IP address. The former
are very coarse-grained, and the latter can be spoofed. By using
IPSec, the SELinux can control access to remote hosts based on
cryptographic keys generated using the IPSec mechanism. This enables
access control on a per-machine basis or per-application if the remote
machine is running the same mechanism and trusted to enforce the
access control policy.
Patch design approach:
The patch's main function is to authorize a socket's access to a IPSec
policy based on their security contexts. Since the communication is
implemented by a security association, the patch ensures that the
security association's negotiated and used have the same security
context. The patch enables allocation and deallocation of such
security contexts for policies and security associations. It also
enables copying of the security context when policies are cloned.
Lastly, the patch ensures that packets that are sent without using a
IPSec security assocation with a security context are allowed to be
sent in that manner.
A presentation available at
www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf
from the SELinux symposium describes the overall approach.
Patch implementation details:
The function which authorizes a socket to perform a requested
operation (send/receive) on a IPSec policy (xfrm_policy) is
selinux_xfrm_policy_lookup. The Netfilter and rcv_skb hooks ensure
that if a IPSec SA with a securit y association has not been used,
then the socket is allowed to send or receive the packet,
respectively.
The patch implements SELinux function for allocating security contexts
when policies (xfrm_policy) are created via the pfkey or xfrm_user
interfaces via selinux_xfrm_policy_alloc. When a security association
is built, SELinux allocates the security context designated by the
XFRM subsystem which is based on that of the authorized policy via
selinux_xfrm_state_alloc.
When a xfrm_policy is cloned, the security context of that policy, if
any, is copied to the clone via selinux_xfrm_policy_clone.
When a xfrm_policy or xfrm_state is freed, its security context, if
any is also freed at selinux_xfrm_policy_free or
selinux_xfrm_state_free.
Testing:
The SELinux authorization function is tested using ipsec-tools. We
created policies and security associations with particular security
contexts and added SELinux access control policy entries to verify the
authorization decision. We also made sure that packets for which no
security context was supplied (which either did or did not use
security associations) were authorized using an unlabelled context.
Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-13 23:12:40 -08:00
2006-08-04 23:12:42 -07:00
* secid = sksec - > sid ;
2006-08-04 23:08:56 -07:00
}
[LSM-IPSec]: Per-packet access control.
This patch series implements per packet access control via the
extension of the Linux Security Modules (LSM) interface by hooks in
the XFRM and pfkey subsystems that leverage IPSec security
associations to label packets. Extensions to the SELinux LSM are
included that leverage the patch for this purpose.
This patch implements the changes necessary to the SELinux LSM to
create, deallocate, and use security contexts for policies
(xfrm_policy) and security associations (xfrm_state) that enable
control of a socket's ability to send and receive packets.
Patch purpose:
The patch is designed to enable the SELinux LSM to implement access
control on individual packets based on the strongly authenticated
IPSec security association. Such access controls augment the existing
ones in SELinux based on network interface and IP address. The former
are very coarse-grained, and the latter can be spoofed. By using
IPSec, the SELinux can control access to remote hosts based on
cryptographic keys generated using the IPSec mechanism. This enables
access control on a per-machine basis or per-application if the remote
machine is running the same mechanism and trusted to enforce the
access control policy.
Patch design approach:
The patch's main function is to authorize a socket's access to a IPSec
policy based on their security contexts. Since the communication is
implemented by a security association, the patch ensures that the
security association's negotiated and used have the same security
context. The patch enables allocation and deallocation of such
security contexts for policies and security associations. It also
enables copying of the security context when policies are cloned.
Lastly, the patch ensures that packets that are sent without using a
IPSec security assocation with a security context are allowed to be
sent in that manner.
A presentation available at
www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf
from the SELinux symposium describes the overall approach.
Patch implementation details:
The function which authorizes a socket to perform a requested
operation (send/receive) on a IPSec policy (xfrm_policy) is
selinux_xfrm_policy_lookup. The Netfilter and rcv_skb hooks ensure
that if a IPSec SA with a securit y association has not been used,
then the socket is allowed to send or receive the packet,
respectively.
The patch implements SELinux function for allocating security contexts
when policies (xfrm_policy) are created via the pfkey or xfrm_user
interfaces via selinux_xfrm_policy_alloc. When a security association
is built, SELinux allocates the security context designated by the
XFRM subsystem which is based on that of the authorized policy via
selinux_xfrm_state_alloc.
When a xfrm_policy is cloned, the security context of that policy, if
any, is copied to the clone via selinux_xfrm_policy_clone.
When a xfrm_policy or xfrm_state is freed, its security context, if
any is also freed at selinux_xfrm_policy_free or
selinux_xfrm_state_free.
Testing:
The SELinux authorization function is tested using ipsec-tools. We
created policies and security associations with particular security
contexts and added SELinux access control policy entries to verify the
authorization decision. We also made sure that packets for which no
security context was supplied (which either did or did not use
security associations) were authorized using an unlabelled context.
Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-13 23:12:40 -08:00
}
2008-04-17 13:17:49 -04:00
static void selinux_sock_graft ( struct sock * sk , struct socket * parent )
2006-07-24 23:32:50 -07:00
{
2015-12-24 11:09:40 -05:00
struct inode_security_struct * isec =
inode_security_novalidate ( SOCK_INODE ( parent ) ) ;
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( sk ) ;
2006-07-24 23:32:50 -07:00
2014-07-28 10:42:48 -04:00
if ( sk - > sk_family = = PF_INET | | sk - > sk_family = = PF_INET6 | |
sk - > sk_family = = PF_UNIX )
2006-09-29 15:50:25 -07:00
isec - > sid = sksec - > sid ;
2008-01-29 08:38:23 -05:00
sksec - > sclass = isec - > sclass ;
2006-07-24 23:32:50 -07:00
}
security: implement sctp_assoc_established hook in selinux
Do this by extracting the peer labeling per-association logic from
selinux_sctp_assoc_request() into a new helper
selinux_sctp_process_new_assoc() and use this helper in both
selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This
ensures that the peer labeling behavior as documented in
Documentation/security/SCTP.rst is applied both on the client and server
side:
"""
An SCTP socket will only have one peer label assigned to it. This will be
assigned during the establishment of the first association. Any further
associations on this socket will have their packet peer label compared to
the sockets peer label, and only if they are different will the
``association`` permission be validated. This is validated by checking the
socket peer sid against the received packets peer sid to determine whether
the association should be allowed or denied.
"""
At the same time, it also ensures that the peer label of the association
is set to the correct value, such that if it is peeled off into a new
socket, the socket's peer label will then be set to the association's
peer label, same as it already works on the server side.
While selinux_inet_conn_established() (which we are replacing by
selinux_sctp_assoc_established() for SCTP) only deals with assigning a
peer label to the connection (socket), in case of SCTP we need to also
copy the (local) socket label to the association, so that
selinux_sctp_sk_clone() can then pick it up for the new socket in case
of SCTP peeloff.
Careful readers will notice that the selinux_sctp_process_new_assoc()
helper also includes the "IPv4 packet received over an IPv6 socket"
check, even though it hadn't been in selinux_sctp_assoc_request()
before. While such check is not necessary in
selinux_inet_conn_request() (because struct request_sock's family field
is already set according to the skb's family), here it is needed, as we
don't have request_sock and we take the initial family from the socket.
In selinux_sctp_assoc_established() it is similarly needed as well (and
also selinux_inet_conn_established() already has it).
Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2022-02-12 18:59:22 +01:00
/*
* Determines peer_secid for the asoc and updates socket ' s peer label
* if it ' s the first association on the socket .
2018-02-13 20:57:18 +00:00
*/
security: implement sctp_assoc_established hook in selinux
Do this by extracting the peer labeling per-association logic from
selinux_sctp_assoc_request() into a new helper
selinux_sctp_process_new_assoc() and use this helper in both
selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This
ensures that the peer labeling behavior as documented in
Documentation/security/SCTP.rst is applied both on the client and server
side:
"""
An SCTP socket will only have one peer label assigned to it. This will be
assigned during the establishment of the first association. Any further
associations on this socket will have their packet peer label compared to
the sockets peer label, and only if they are different will the
``association`` permission be validated. This is validated by checking the
socket peer sid against the received packets peer sid to determine whether
the association should be allowed or denied.
"""
At the same time, it also ensures that the peer label of the association
is set to the correct value, such that if it is peeled off into a new
socket, the socket's peer label will then be set to the association's
peer label, same as it already works on the server side.
While selinux_inet_conn_established() (which we are replacing by
selinux_sctp_assoc_established() for SCTP) only deals with assigning a
peer label to the connection (socket), in case of SCTP we need to also
copy the (local) socket label to the association, so that
selinux_sctp_sk_clone() can then pick it up for the new socket in case
of SCTP peeloff.
Careful readers will notice that the selinux_sctp_process_new_assoc()
helper also includes the "IPv4 packet received over an IPv6 socket"
check, even though it hadn't been in selinux_sctp_assoc_request()
before. While such check is not necessary in
selinux_inet_conn_request() (because struct request_sock's family field
is already set according to the skb's family), here it is needed, as we
don't have request_sock and we take the initial family from the socket.
In selinux_sctp_assoc_established() it is similarly needed as well (and
also selinux_inet_conn_established() already has it).
Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2022-02-12 18:59:22 +01:00
static int selinux_sctp_process_new_assoc ( struct sctp_association * asoc ,
struct sk_buff * skb )
2018-02-13 20:57:18 +00:00
{
security: implement sctp_assoc_established hook in selinux
Do this by extracting the peer labeling per-association logic from
selinux_sctp_assoc_request() into a new helper
selinux_sctp_process_new_assoc() and use this helper in both
selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This
ensures that the peer labeling behavior as documented in
Documentation/security/SCTP.rst is applied both on the client and server
side:
"""
An SCTP socket will only have one peer label assigned to it. This will be
assigned during the establishment of the first association. Any further
associations on this socket will have their packet peer label compared to
the sockets peer label, and only if they are different will the
``association`` permission be validated. This is validated by checking the
socket peer sid against the received packets peer sid to determine whether
the association should be allowed or denied.
"""
At the same time, it also ensures that the peer label of the association
is set to the correct value, such that if it is peeled off into a new
socket, the socket's peer label will then be set to the association's
peer label, same as it already works on the server side.
While selinux_inet_conn_established() (which we are replacing by
selinux_sctp_assoc_established() for SCTP) only deals with assigning a
peer label to the connection (socket), in case of SCTP we need to also
copy the (local) socket label to the association, so that
selinux_sctp_sk_clone() can then pick it up for the new socket in case
of SCTP peeloff.
Careful readers will notice that the selinux_sctp_process_new_assoc()
helper also includes the "IPv4 packet received over an IPv6 socket"
check, even though it hadn't been in selinux_sctp_assoc_request()
before. While such check is not necessary in
selinux_inet_conn_request() (because struct request_sock's family field
is already set according to the skb's family), here it is needed, as we
don't have request_sock and we take the initial family from the socket.
In selinux_sctp_assoc_established() it is similarly needed as well (and
also selinux_inet_conn_established() already has it).
Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2022-02-12 18:59:22 +01:00
struct sock * sk = asoc - > base . sk ;
u16 family = sk - > sk_family ;
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( sk ) ;
2018-02-13 20:57:18 +00:00
struct common_audit_data ad ;
2023-07-19 13:37:49 +02:00
struct lsm_network_audit net ;
security: implement sctp_assoc_established hook in selinux
Do this by extracting the peer labeling per-association logic from
selinux_sctp_assoc_request() into a new helper
selinux_sctp_process_new_assoc() and use this helper in both
selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This
ensures that the peer labeling behavior as documented in
Documentation/security/SCTP.rst is applied both on the client and server
side:
"""
An SCTP socket will only have one peer label assigned to it. This will be
assigned during the establishment of the first association. Any further
associations on this socket will have their packet peer label compared to
the sockets peer label, and only if they are different will the
``association`` permission be validated. This is validated by checking the
socket peer sid against the received packets peer sid to determine whether
the association should be allowed or denied.
"""
At the same time, it also ensures that the peer label of the association
is set to the correct value, such that if it is peeled off into a new
socket, the socket's peer label will then be set to the association's
peer label, same as it already works on the server side.
While selinux_inet_conn_established() (which we are replacing by
selinux_sctp_assoc_established() for SCTP) only deals with assigning a
peer label to the connection (socket), in case of SCTP we need to also
copy the (local) socket label to the association, so that
selinux_sctp_sk_clone() can then pick it up for the new socket in case
of SCTP peeloff.
Careful readers will notice that the selinux_sctp_process_new_assoc()
helper also includes the "IPv4 packet received over an IPv6 socket"
check, even though it hadn't been in selinux_sctp_assoc_request()
before. While such check is not necessary in
selinux_inet_conn_request() (because struct request_sock's family field
is already set according to the skb's family), here it is needed, as we
don't have request_sock and we take the initial family from the socket.
In selinux_sctp_assoc_established() it is similarly needed as well (and
also selinux_inet_conn_established() already has it).
Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2022-02-12 18:59:22 +01:00
int err ;
2018-02-13 20:57:18 +00:00
security: implement sctp_assoc_established hook in selinux
Do this by extracting the peer labeling per-association logic from
selinux_sctp_assoc_request() into a new helper
selinux_sctp_process_new_assoc() and use this helper in both
selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This
ensures that the peer labeling behavior as documented in
Documentation/security/SCTP.rst is applied both on the client and server
side:
"""
An SCTP socket will only have one peer label assigned to it. This will be
assigned during the establishment of the first association. Any further
associations on this socket will have their packet peer label compared to
the sockets peer label, and only if they are different will the
``association`` permission be validated. This is validated by checking the
socket peer sid against the received packets peer sid to determine whether
the association should be allowed or denied.
"""
At the same time, it also ensures that the peer label of the association
is set to the correct value, such that if it is peeled off into a new
socket, the socket's peer label will then be set to the association's
peer label, same as it already works on the server side.
While selinux_inet_conn_established() (which we are replacing by
selinux_sctp_assoc_established() for SCTP) only deals with assigning a
peer label to the connection (socket), in case of SCTP we need to also
copy the (local) socket label to the association, so that
selinux_sctp_sk_clone() can then pick it up for the new socket in case
of SCTP peeloff.
Careful readers will notice that the selinux_sctp_process_new_assoc()
helper also includes the "IPv4 packet received over an IPv6 socket"
check, even though it hadn't been in selinux_sctp_assoc_request()
before. While such check is not necessary in
selinux_inet_conn_request() (because struct request_sock's family field
is already set according to the skb's family), here it is needed, as we
don't have request_sock and we take the initial family from the socket.
In selinux_sctp_assoc_established() it is similarly needed as well (and
also selinux_inet_conn_established() already has it).
Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2022-02-12 18:59:22 +01:00
/* handle mapped IPv4 packets arriving via IPv6 sockets */
if ( family = = PF_INET6 & & skb - > protocol = = htons ( ETH_P_IP ) )
family = PF_INET ;
2018-02-13 20:57:18 +00:00
security: implement sctp_assoc_established hook in selinux
Do this by extracting the peer labeling per-association logic from
selinux_sctp_assoc_request() into a new helper
selinux_sctp_process_new_assoc() and use this helper in both
selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This
ensures that the peer labeling behavior as documented in
Documentation/security/SCTP.rst is applied both on the client and server
side:
"""
An SCTP socket will only have one peer label assigned to it. This will be
assigned during the establishment of the first association. Any further
associations on this socket will have their packet peer label compared to
the sockets peer label, and only if they are different will the
``association`` permission be validated. This is validated by checking the
socket peer sid against the received packets peer sid to determine whether
the association should be allowed or denied.
"""
At the same time, it also ensures that the peer label of the association
is set to the correct value, such that if it is peeled off into a new
socket, the socket's peer label will then be set to the association's
peer label, same as it already works on the server side.
While selinux_inet_conn_established() (which we are replacing by
selinux_sctp_assoc_established() for SCTP) only deals with assigning a
peer label to the connection (socket), in case of SCTP we need to also
copy the (local) socket label to the association, so that
selinux_sctp_sk_clone() can then pick it up for the new socket in case
of SCTP peeloff.
Careful readers will notice that the selinux_sctp_process_new_assoc()
helper also includes the "IPv4 packet received over an IPv6 socket"
check, even though it hadn't been in selinux_sctp_assoc_request()
before. While such check is not necessary in
selinux_inet_conn_request() (because struct request_sock's family field
is already set according to the skb's family), here it is needed, as we
don't have request_sock and we take the initial family from the socket.
In selinux_sctp_assoc_established() it is similarly needed as well (and
also selinux_inet_conn_established() already has it).
Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2022-02-12 18:59:22 +01:00
if ( selinux_peerlbl_enabled ( ) ) {
asoc - > peer_secid = SECSID_NULL ;
2018-02-13 20:57:18 +00:00
/* This will return peer_sid = SECSID_NULL if there are
* no peer labels , see security_net_peersid_resolve ( ) .
*/
security: implement sctp_assoc_established hook in selinux
Do this by extracting the peer labeling per-association logic from
selinux_sctp_assoc_request() into a new helper
selinux_sctp_process_new_assoc() and use this helper in both
selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This
ensures that the peer labeling behavior as documented in
Documentation/security/SCTP.rst is applied both on the client and server
side:
"""
An SCTP socket will only have one peer label assigned to it. This will be
assigned during the establishment of the first association. Any further
associations on this socket will have their packet peer label compared to
the sockets peer label, and only if they are different will the
``association`` permission be validated. This is validated by checking the
socket peer sid against the received packets peer sid to determine whether
the association should be allowed or denied.
"""
At the same time, it also ensures that the peer label of the association
is set to the correct value, such that if it is peeled off into a new
socket, the socket's peer label will then be set to the association's
peer label, same as it already works on the server side.
While selinux_inet_conn_established() (which we are replacing by
selinux_sctp_assoc_established() for SCTP) only deals with assigning a
peer label to the connection (socket), in case of SCTP we need to also
copy the (local) socket label to the association, so that
selinux_sctp_sk_clone() can then pick it up for the new socket in case
of SCTP peeloff.
Careful readers will notice that the selinux_sctp_process_new_assoc()
helper also includes the "IPv4 packet received over an IPv6 socket"
check, even though it hadn't been in selinux_sctp_assoc_request()
before. While such check is not necessary in
selinux_inet_conn_request() (because struct request_sock's family field
is already set according to the skb's family), here it is needed, as we
don't have request_sock and we take the initial family from the socket.
In selinux_sctp_assoc_established() it is similarly needed as well (and
also selinux_inet_conn_established() already has it).
Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2022-02-12 18:59:22 +01:00
err = selinux_skb_peerlbl_sid ( skb , family , & asoc - > peer_secid ) ;
2018-02-13 20:57:18 +00:00
if ( err )
return err ;
security: implement sctp_assoc_established hook in selinux
Do this by extracting the peer labeling per-association logic from
selinux_sctp_assoc_request() into a new helper
selinux_sctp_process_new_assoc() and use this helper in both
selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This
ensures that the peer labeling behavior as documented in
Documentation/security/SCTP.rst is applied both on the client and server
side:
"""
An SCTP socket will only have one peer label assigned to it. This will be
assigned during the establishment of the first association. Any further
associations on this socket will have their packet peer label compared to
the sockets peer label, and only if they are different will the
``association`` permission be validated. This is validated by checking the
socket peer sid against the received packets peer sid to determine whether
the association should be allowed or denied.
"""
At the same time, it also ensures that the peer label of the association
is set to the correct value, such that if it is peeled off into a new
socket, the socket's peer label will then be set to the association's
peer label, same as it already works on the server side.
While selinux_inet_conn_established() (which we are replacing by
selinux_sctp_assoc_established() for SCTP) only deals with assigning a
peer label to the connection (socket), in case of SCTP we need to also
copy the (local) socket label to the association, so that
selinux_sctp_sk_clone() can then pick it up for the new socket in case
of SCTP peeloff.
Careful readers will notice that the selinux_sctp_process_new_assoc()
helper also includes the "IPv4 packet received over an IPv6 socket"
check, even though it hadn't been in selinux_sctp_assoc_request()
before. While such check is not necessary in
selinux_inet_conn_request() (because struct request_sock's family field
is already set according to the skb's family), here it is needed, as we
don't have request_sock and we take the initial family from the socket.
In selinux_sctp_assoc_established() it is similarly needed as well (and
also selinux_inet_conn_established() already has it).
Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2022-02-12 18:59:22 +01:00
if ( asoc - > peer_secid = = SECSID_NULL )
asoc - > peer_secid = SECINITSID_UNLABELED ;
} else {
asoc - > peer_secid = SECINITSID_UNLABELED ;
2018-02-13 20:57:18 +00:00
}
if ( sksec - > sctp_assoc_state = = SCTP_ASSOC_UNSET ) {
sksec - > sctp_assoc_state = SCTP_ASSOC_SET ;
/* Here as first association on socket. As the peer SID
* was allowed by peer recv ( and the netif / node checks ) ,
* then it is approved by policy and used as the primary
* peer SID for getpeercon ( 3 ) .
*/
security: implement sctp_assoc_established hook in selinux
Do this by extracting the peer labeling per-association logic from
selinux_sctp_assoc_request() into a new helper
selinux_sctp_process_new_assoc() and use this helper in both
selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This
ensures that the peer labeling behavior as documented in
Documentation/security/SCTP.rst is applied both on the client and server
side:
"""
An SCTP socket will only have one peer label assigned to it. This will be
assigned during the establishment of the first association. Any further
associations on this socket will have their packet peer label compared to
the sockets peer label, and only if they are different will the
``association`` permission be validated. This is validated by checking the
socket peer sid against the received packets peer sid to determine whether
the association should be allowed or denied.
"""
At the same time, it also ensures that the peer label of the association
is set to the correct value, such that if it is peeled off into a new
socket, the socket's peer label will then be set to the association's
peer label, same as it already works on the server side.
While selinux_inet_conn_established() (which we are replacing by
selinux_sctp_assoc_established() for SCTP) only deals with assigning a
peer label to the connection (socket), in case of SCTP we need to also
copy the (local) socket label to the association, so that
selinux_sctp_sk_clone() can then pick it up for the new socket in case
of SCTP peeloff.
Careful readers will notice that the selinux_sctp_process_new_assoc()
helper also includes the "IPv4 packet received over an IPv6 socket"
check, even though it hadn't been in selinux_sctp_assoc_request()
before. While such check is not necessary in
selinux_inet_conn_request() (because struct request_sock's family field
is already set according to the skb's family), here it is needed, as we
don't have request_sock and we take the initial family from the socket.
In selinux_sctp_assoc_established() it is similarly needed as well (and
also selinux_inet_conn_established() already has it).
Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2022-02-12 18:59:22 +01:00
sksec - > peer_sid = asoc - > peer_secid ;
} else if ( sksec - > peer_sid ! = asoc - > peer_secid ) {
2018-02-13 20:57:18 +00:00
/* Other association peer SIDs are checked to enforce
* consistency among the peer SIDs .
*/
2023-07-19 13:37:49 +02:00
ad_net_init_from_sk ( & ad , & net , asoc - > base . sk ) ;
2023-03-09 13:30:37 -05:00
err = avc_has_perm ( sksec - > peer_sid , asoc - > peer_secid ,
security: implement sctp_assoc_established hook in selinux
Do this by extracting the peer labeling per-association logic from
selinux_sctp_assoc_request() into a new helper
selinux_sctp_process_new_assoc() and use this helper in both
selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This
ensures that the peer labeling behavior as documented in
Documentation/security/SCTP.rst is applied both on the client and server
side:
"""
An SCTP socket will only have one peer label assigned to it. This will be
assigned during the establishment of the first association. Any further
associations on this socket will have their packet peer label compared to
the sockets peer label, and only if they are different will the
``association`` permission be validated. This is validated by checking the
socket peer sid against the received packets peer sid to determine whether
the association should be allowed or denied.
"""
At the same time, it also ensures that the peer label of the association
is set to the correct value, such that if it is peeled off into a new
socket, the socket's peer label will then be set to the association's
peer label, same as it already works on the server side.
While selinux_inet_conn_established() (which we are replacing by
selinux_sctp_assoc_established() for SCTP) only deals with assigning a
peer label to the connection (socket), in case of SCTP we need to also
copy the (local) socket label to the association, so that
selinux_sctp_sk_clone() can then pick it up for the new socket in case
of SCTP peeloff.
Careful readers will notice that the selinux_sctp_process_new_assoc()
helper also includes the "IPv4 packet received over an IPv6 socket"
check, even though it hadn't been in selinux_sctp_assoc_request()
before. While such check is not necessary in
selinux_inet_conn_request() (because struct request_sock's family field
is already set according to the skb's family), here it is needed, as we
don't have request_sock and we take the initial family from the socket.
In selinux_sctp_assoc_established() it is similarly needed as well (and
also selinux_inet_conn_established() already has it).
Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2022-02-12 18:59:22 +01:00
sksec - > sclass , SCTP_SOCKET__ASSOCIATION ,
& ad ) ;
2018-02-13 20:57:18 +00:00
if ( err )
return err ;
}
security: implement sctp_assoc_established hook in selinux
Do this by extracting the peer labeling per-association logic from
selinux_sctp_assoc_request() into a new helper
selinux_sctp_process_new_assoc() and use this helper in both
selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This
ensures that the peer labeling behavior as documented in
Documentation/security/SCTP.rst is applied both on the client and server
side:
"""
An SCTP socket will only have one peer label assigned to it. This will be
assigned during the establishment of the first association. Any further
associations on this socket will have their packet peer label compared to
the sockets peer label, and only if they are different will the
``association`` permission be validated. This is validated by checking the
socket peer sid against the received packets peer sid to determine whether
the association should be allowed or denied.
"""
At the same time, it also ensures that the peer label of the association
is set to the correct value, such that if it is peeled off into a new
socket, the socket's peer label will then be set to the association's
peer label, same as it already works on the server side.
While selinux_inet_conn_established() (which we are replacing by
selinux_sctp_assoc_established() for SCTP) only deals with assigning a
peer label to the connection (socket), in case of SCTP we need to also
copy the (local) socket label to the association, so that
selinux_sctp_sk_clone() can then pick it up for the new socket in case
of SCTP peeloff.
Careful readers will notice that the selinux_sctp_process_new_assoc()
helper also includes the "IPv4 packet received over an IPv6 socket"
check, even though it hadn't been in selinux_sctp_assoc_request()
before. While such check is not necessary in
selinux_inet_conn_request() (because struct request_sock's family field
is already set according to the skb's family), here it is needed, as we
don't have request_sock and we take the initial family from the socket.
In selinux_sctp_assoc_established() it is similarly needed as well (and
also selinux_inet_conn_established() already has it).
Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2022-02-12 18:59:22 +01:00
return 0 ;
}
/* Called whenever SCTP receives an INIT or COOKIE ECHO chunk. This
* happens on an incoming connect ( 2 ) , sctp_connectx ( 3 ) or
* sctp_sendmsg ( 3 ) ( with no association already present ) .
*/
static int selinux_sctp_assoc_request ( struct sctp_association * asoc ,
struct sk_buff * skb )
{
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( asoc - > base . sk ) ;
security: implement sctp_assoc_established hook in selinux
Do this by extracting the peer labeling per-association logic from
selinux_sctp_assoc_request() into a new helper
selinux_sctp_process_new_assoc() and use this helper in both
selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This
ensures that the peer labeling behavior as documented in
Documentation/security/SCTP.rst is applied both on the client and server
side:
"""
An SCTP socket will only have one peer label assigned to it. This will be
assigned during the establishment of the first association. Any further
associations on this socket will have their packet peer label compared to
the sockets peer label, and only if they are different will the
``association`` permission be validated. This is validated by checking the
socket peer sid against the received packets peer sid to determine whether
the association should be allowed or denied.
"""
At the same time, it also ensures that the peer label of the association
is set to the correct value, such that if it is peeled off into a new
socket, the socket's peer label will then be set to the association's
peer label, same as it already works on the server side.
While selinux_inet_conn_established() (which we are replacing by
selinux_sctp_assoc_established() for SCTP) only deals with assigning a
peer label to the connection (socket), in case of SCTP we need to also
copy the (local) socket label to the association, so that
selinux_sctp_sk_clone() can then pick it up for the new socket in case
of SCTP peeloff.
Careful readers will notice that the selinux_sctp_process_new_assoc()
helper also includes the "IPv4 packet received over an IPv6 socket"
check, even though it hadn't been in selinux_sctp_assoc_request()
before. While such check is not necessary in
selinux_inet_conn_request() (because struct request_sock's family field
is already set according to the skb's family), here it is needed, as we
don't have request_sock and we take the initial family from the socket.
In selinux_sctp_assoc_established() it is similarly needed as well (and
also selinux_inet_conn_established() already has it).
Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2022-02-12 18:59:22 +01:00
u32 conn_sid ;
int err ;
if ( ! selinux_policycap_extsockclass ( ) )
return 0 ;
err = selinux_sctp_process_new_assoc ( asoc , skb ) ;
if ( err )
return err ;
2018-02-13 20:57:18 +00:00
/* Compute the MLS component for the connection and store
2021-11-02 08:02:47 -04:00
* the information in asoc . This will be used by SCTP TCP type
2018-02-13 20:57:18 +00:00
* sockets and peeled off connections as they cause a new
* socket to be generated . selinux_sctp_sk_clone ( ) will then
* plug this into the new socket .
*/
security: implement sctp_assoc_established hook in selinux
Do this by extracting the peer labeling per-association logic from
selinux_sctp_assoc_request() into a new helper
selinux_sctp_process_new_assoc() and use this helper in both
selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This
ensures that the peer labeling behavior as documented in
Documentation/security/SCTP.rst is applied both on the client and server
side:
"""
An SCTP socket will only have one peer label assigned to it. This will be
assigned during the establishment of the first association. Any further
associations on this socket will have their packet peer label compared to
the sockets peer label, and only if they are different will the
``association`` permission be validated. This is validated by checking the
socket peer sid against the received packets peer sid to determine whether
the association should be allowed or denied.
"""
At the same time, it also ensures that the peer label of the association
is set to the correct value, such that if it is peeled off into a new
socket, the socket's peer label will then be set to the association's
peer label, same as it already works on the server side.
While selinux_inet_conn_established() (which we are replacing by
selinux_sctp_assoc_established() for SCTP) only deals with assigning a
peer label to the connection (socket), in case of SCTP we need to also
copy the (local) socket label to the association, so that
selinux_sctp_sk_clone() can then pick it up for the new socket in case
of SCTP peeloff.
Careful readers will notice that the selinux_sctp_process_new_assoc()
helper also includes the "IPv4 packet received over an IPv6 socket"
check, even though it hadn't been in selinux_sctp_assoc_request()
before. While such check is not necessary in
selinux_inet_conn_request() (because struct request_sock's family field
is already set according to the skb's family), here it is needed, as we
don't have request_sock and we take the initial family from the socket.
In selinux_sctp_assoc_established() it is similarly needed as well (and
also selinux_inet_conn_established() already has it).
Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2022-02-12 18:59:22 +01:00
err = selinux_conn_sid ( sksec - > sid , asoc - > peer_secid , & conn_sid ) ;
2018-02-13 20:57:18 +00:00
if ( err )
return err ;
2021-11-02 08:02:47 -04:00
asoc - > secid = conn_sid ;
2018-02-13 20:57:18 +00:00
/* Set any NetLabel labels including CIPSO/CALIPSO options. */
2021-11-02 08:02:47 -04:00
return selinux_netlbl_sctp_assoc_request ( asoc , skb ) ;
2018-02-13 20:57:18 +00:00
}
security: implement sctp_assoc_established hook in selinux
Do this by extracting the peer labeling per-association logic from
selinux_sctp_assoc_request() into a new helper
selinux_sctp_process_new_assoc() and use this helper in both
selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This
ensures that the peer labeling behavior as documented in
Documentation/security/SCTP.rst is applied both on the client and server
side:
"""
An SCTP socket will only have one peer label assigned to it. This will be
assigned during the establishment of the first association. Any further
associations on this socket will have their packet peer label compared to
the sockets peer label, and only if they are different will the
``association`` permission be validated. This is validated by checking the
socket peer sid against the received packets peer sid to determine whether
the association should be allowed or denied.
"""
At the same time, it also ensures that the peer label of the association
is set to the correct value, such that if it is peeled off into a new
socket, the socket's peer label will then be set to the association's
peer label, same as it already works on the server side.
While selinux_inet_conn_established() (which we are replacing by
selinux_sctp_assoc_established() for SCTP) only deals with assigning a
peer label to the connection (socket), in case of SCTP we need to also
copy the (local) socket label to the association, so that
selinux_sctp_sk_clone() can then pick it up for the new socket in case
of SCTP peeloff.
Careful readers will notice that the selinux_sctp_process_new_assoc()
helper also includes the "IPv4 packet received over an IPv6 socket"
check, even though it hadn't been in selinux_sctp_assoc_request()
before. While such check is not necessary in
selinux_inet_conn_request() (because struct request_sock's family field
is already set according to the skb's family), here it is needed, as we
don't have request_sock and we take the initial family from the socket.
In selinux_sctp_assoc_established() it is similarly needed as well (and
also selinux_inet_conn_established() already has it).
Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2022-02-12 18:59:22 +01:00
/* Called when SCTP receives a COOKIE ACK chunk as the final
* response to an association request ( initited by us ) .
*/
static int selinux_sctp_assoc_established ( struct sctp_association * asoc ,
struct sk_buff * skb )
{
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( asoc - > base . sk ) ;
security: implement sctp_assoc_established hook in selinux
Do this by extracting the peer labeling per-association logic from
selinux_sctp_assoc_request() into a new helper
selinux_sctp_process_new_assoc() and use this helper in both
selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This
ensures that the peer labeling behavior as documented in
Documentation/security/SCTP.rst is applied both on the client and server
side:
"""
An SCTP socket will only have one peer label assigned to it. This will be
assigned during the establishment of the first association. Any further
associations on this socket will have their packet peer label compared to
the sockets peer label, and only if they are different will the
``association`` permission be validated. This is validated by checking the
socket peer sid against the received packets peer sid to determine whether
the association should be allowed or denied.
"""
At the same time, it also ensures that the peer label of the association
is set to the correct value, such that if it is peeled off into a new
socket, the socket's peer label will then be set to the association's
peer label, same as it already works on the server side.
While selinux_inet_conn_established() (which we are replacing by
selinux_sctp_assoc_established() for SCTP) only deals with assigning a
peer label to the connection (socket), in case of SCTP we need to also
copy the (local) socket label to the association, so that
selinux_sctp_sk_clone() can then pick it up for the new socket in case
of SCTP peeloff.
Careful readers will notice that the selinux_sctp_process_new_assoc()
helper also includes the "IPv4 packet received over an IPv6 socket"
check, even though it hadn't been in selinux_sctp_assoc_request()
before. While such check is not necessary in
selinux_inet_conn_request() (because struct request_sock's family field
is already set according to the skb's family), here it is needed, as we
don't have request_sock and we take the initial family from the socket.
In selinux_sctp_assoc_established() it is similarly needed as well (and
also selinux_inet_conn_established() already has it).
Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2022-02-12 18:59:22 +01:00
if ( ! selinux_policycap_extsockclass ( ) )
return 0 ;
/* Inherit secid from the parent socket - this will be picked up
* by selinux_sctp_sk_clone ( ) if the association gets peeled off
* into a new socket .
*/
asoc - > secid = sksec - > sid ;
return selinux_sctp_process_new_assoc ( asoc , skb ) ;
}
2018-02-13 20:57:18 +00:00
/* Check if sctp IPv4/IPv6 addresses are valid for binding or connecting
* based on their @ optname .
*/
static int selinux_sctp_bind_connect ( struct sock * sk , int optname ,
struct sockaddr * address ,
int addrlen )
{
int len , err = 0 , walk_size = 0 ;
void * addr_buf ;
struct sockaddr * addr ;
struct socket * sock ;
2018-03-01 18:48:02 -05:00
if ( ! selinux_policycap_extsockclass ( ) )
2018-02-13 20:57:18 +00:00
return 0 ;
/* Process one or more addresses that may be IPv4 or IPv6 */
sock = sk - > sk_socket ;
addr_buf = address ;
while ( walk_size < addrlen ) {
2018-11-13 16:16:08 +01:00
if ( walk_size + sizeof ( sa_family_t ) > addrlen )
return - EINVAL ;
2018-02-13 20:57:18 +00:00
addr = addr_buf ;
switch ( addr - > sa_family ) {
2018-05-11 20:15:13 +03:00
case AF_UNSPEC :
2018-02-13 20:57:18 +00:00
case AF_INET :
len = sizeof ( struct sockaddr_in ) ;
break ;
case AF_INET6 :
len = sizeof ( struct sockaddr_in6 ) ;
break ;
default :
2018-05-11 20:15:13 +03:00
return - EINVAL ;
2018-02-13 20:57:18 +00:00
}
2019-03-09 00:07:34 +08:00
if ( walk_size + len > addrlen )
return - EINVAL ;
2018-02-13 20:57:18 +00:00
err = - EINVAL ;
switch ( optname ) {
/* Bind checks */
case SCTP_PRIMARY_ADDR :
case SCTP_SET_PEER_PRIMARY_ADDR :
case SCTP_SOCKOPT_BINDX_ADD :
err = selinux_socket_bind ( sock , addr , len ) ;
break ;
/* Connect checks */
case SCTP_SOCKOPT_CONNECTX :
case SCTP_PARAM_SET_PRIMARY :
case SCTP_PARAM_ADD_IP :
case SCTP_SENDMSG_CONNECT :
err = selinux_socket_connect_helper ( sock , addr , len ) ;
if ( err )
return err ;
/* As selinux_sctp_bind_connect() is called by the
* SCTP protocol layer , the socket is already locked ,
2020-08-07 09:51:34 -07:00
* therefore selinux_netlbl_socket_connect_locked ( )
2018-02-13 20:57:18 +00:00
* is called here . The situations handled are :
* sctp_connectx ( 3 ) , sctp_sendmsg ( 3 ) , sendmsg ( 2 ) ,
* whenever a new IP address is added or when a new
* primary address is selected .
* Note that an SCTP connect ( 2 ) call happens before
* the SCTP protocol layer and is handled via
* selinux_socket_connect ( ) .
*/
err = selinux_netlbl_socket_connect_locked ( sk , addr ) ;
break ;
}
if ( err )
return err ;
addr_buf + = len ;
walk_size + = len ;
}
return 0 ;
}
/* Called whenever a new socket is created by accept(2) or sctp_peeloff(3). */
2021-11-02 08:02:47 -04:00
static void selinux_sctp_sk_clone ( struct sctp_association * asoc , struct sock * sk ,
2018-02-13 20:57:18 +00:00
struct sock * newsk )
{
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( sk ) ;
struct sk_security_struct * newsksec = selinux_sock ( newsk ) ;
2018-02-13 20:57:18 +00:00
/* If policy does not support SECCLASS_SCTP_SOCKET then call
* the non - sctp clone version .
*/
2018-03-01 18:48:02 -05:00
if ( ! selinux_policycap_extsockclass ( ) )
2018-02-13 20:57:18 +00:00
return selinux_sk_clone_security ( sk , newsk ) ;
2021-11-12 12:07:02 -05:00
newsksec - > sid = asoc - > secid ;
2021-11-02 08:02:47 -04:00
newsksec - > peer_sid = asoc - > peer_secid ;
2018-02-13 20:57:18 +00:00
newsksec - > sclass = sksec - > sclass ;
selinux_netlbl_sctp_sk_clone ( sk , newsk ) ;
}
2023-04-20 19:17:14 +02:00
static int selinux_mptcp_add_subflow ( struct sock * sk , struct sock * ssk )
{
2024-07-10 14:32:25 -07:00
struct sk_security_struct * ssksec = selinux_sock ( ssk ) ;
struct sk_security_struct * sksec = selinux_sock ( sk ) ;
2023-04-20 19:17:14 +02:00
ssksec - > sclass = sksec - > sclass ;
ssksec - > sid = sksec - > sid ;
/* replace the existing subflow label deleting the existing one
* and re - recreating a new label using the updated context
*/
selinux_netlbl_sk_security_free ( ssksec ) ;
return selinux_netlbl_socket_post_create ( ssk , ssk - > sk_family ) ;
}
2020-11-30 16:36:29 +01:00
static int selinux_inet_conn_request ( const struct sock * sk , struct sk_buff * skb ,
2006-08-15 00:03:53 -07:00
struct request_sock * req )
2006-07-24 23:32:50 -07:00
{
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( sk ) ;
2006-07-24 23:32:50 -07:00
int err ;
2013-12-03 11:39:13 -05:00
u16 family = req - > rsk_ops - > family ;
2013-12-04 16:10:51 -05:00
u32 connsid ;
2006-07-24 23:32:50 -07:00
u32 peersid ;
2008-10-10 10:16:29 -04:00
err = selinux_skb_peerlbl_sid ( skb , family , & peersid ) ;
2008-01-29 08:38:23 -05:00
if ( err )
return err ;
2013-12-04 16:10:51 -05:00
err = selinux_conn_sid ( sksec - > sid , peersid , & connsid ) ;
if ( err )
return err ;
req - > secid = connsid ;
req - > peer_secid = peersid ;
2006-07-27 22:01:34 -07:00
2009-03-27 17:10:34 -04:00
return selinux_netlbl_inet_conn_request ( req , family ) ;
2006-07-24 23:32:50 -07:00
}
2006-08-15 00:03:53 -07:00
static void selinux_inet_csk_clone ( struct sock * newsk ,
const struct request_sock * req )
2006-07-24 23:32:50 -07:00
{
2024-07-10 14:32:25 -07:00
struct sk_security_struct * newsksec = selinux_sock ( newsk ) ;
2006-07-24 23:32:50 -07:00
newsksec - > sid = req - > secid ;
2006-11-08 17:04:09 -06:00
newsksec - > peer_sid = req - > peer_secid ;
2006-07-24 23:32:50 -07:00
/* NOTE: Ideally, we should also get the isec->sid for the
new socket in sync , but we don ' t have the isec available yet .
So we will wait until sock_graft to do it , by which
time it will have been created and available . */
2006-08-29 17:53:48 -07:00
2006-11-17 17:38:53 -05:00
/* We don't need to take any sort of lock here as we are the only
* thread with access to newsksec */
2009-03-27 17:10:34 -04:00
selinux_netlbl_inet_csk_clone ( newsk , req - > rsk_ops - > family ) ;
2006-07-24 23:32:50 -07:00
}
2008-10-10 10:16:33 -04:00
static void selinux_inet_conn_established ( struct sock * sk , struct sk_buff * skb )
2006-11-08 17:04:09 -06:00
{
2008-10-10 10:16:29 -04:00
u16 family = sk - > sk_family ;
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( sk ) ;
2006-11-08 17:04:09 -06:00
2008-10-10 10:16:29 -04:00
/* handle mapped IPv4 packets arriving via IPv6 sockets */
if ( family = = PF_INET6 & & skb - > protocol = = htons ( ETH_P_IP ) )
family = PF_INET ;
selinux_skb_peerlbl_sid ( skb , family , & sksec - > peer_sid ) ;
2006-11-08 17:04:09 -06:00
}
2010-10-13 16:24:41 -04:00
static int selinux_secmark_relabel_packet ( u32 sid )
{
2024-03-15 18:31:00 +01:00
return avc_has_perm ( current_sid ( ) , sid , SECCLASS_PACKET , PACKET__RELABELTO ,
2018-03-05 11:47:56 -05:00
NULL ) ;
2010-10-13 16:24:41 -04:00
}
static void selinux_secmark_refcount_inc ( void )
{
atomic_inc ( & selinux_secmark_refcount ) ;
}
static void selinux_secmark_refcount_dec ( void )
{
atomic_dec ( & selinux_secmark_refcount ) ;
}
2006-08-15 00:03:53 -07:00
static void selinux_req_classify_flow ( const struct request_sock * req ,
2020-09-27 22:38:26 -04:00
struct flowi_common * flic )
2006-07-24 23:32:50 -07:00
{
2020-09-27 22:38:26 -04:00
flic - > flowic_secid = req - > secid ;
2006-07-24 23:32:50 -07:00
}
2024-07-10 14:32:28 -07:00
static int selinux_tun_dev_alloc_security ( void * security )
2013-01-14 07:12:19 +00:00
{
2024-07-10 14:32:28 -07:00
struct tun_security_struct * tunsec = selinux_tun_dev ( security ) ;
2013-01-14 07:12:19 +00:00
tunsec - > sid = current_sid ( ) ;
return 0 ;
}
2009-08-28 18:12:49 -04:00
static int selinux_tun_dev_create ( void )
{
u32 sid = current_sid ( ) ;
/* we aren't taking into account the "sockcreate" SID since the socket
* that is being created here is not a socket in the traditional sense ,
* instead it is a private sock , accessible only to the kernel , and
* representing a wide range of network traffic spanning multiple
* connections unlike traditional sockets - check the TUN driver to
* get a better understanding of why this socket is special */
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , sid , SECCLASS_TUN_SOCKET , TUN_SOCKET__CREATE ,
2009-08-28 18:12:49 -04:00
NULL ) ;
}
2013-01-14 07:12:19 +00:00
static int selinux_tun_dev_attach_queue ( void * security )
2009-08-28 18:12:49 -04:00
{
2024-07-10 14:32:28 -07:00
struct tun_security_struct * tunsec = selinux_tun_dev ( security ) ;
2013-01-14 07:12:19 +00:00
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , tunsec - > sid , SECCLASS_TUN_SOCKET ,
2013-01-14 07:12:19 +00:00
TUN_SOCKET__ATTACH_QUEUE , NULL ) ;
}
static int selinux_tun_dev_attach ( struct sock * sk , void * security )
{
2024-07-10 14:32:28 -07:00
struct tun_security_struct * tunsec = selinux_tun_dev ( security ) ;
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( sk ) ;
2009-08-28 18:12:49 -04:00
/* we don't currently perform any NetLabel based labeling here and it
* isn ' t clear that we would want to do so anyway ; while we could apply
* labeling without the support of the TUN user the resulting labeled
* traffic from the other end of the connection would almost certainly
* cause confusion to the TUN user that had no idea network labeling
* protocols were being used */
2013-01-14 07:12:19 +00:00
sksec - > sid = tunsec - > sid ;
2009-08-28 18:12:49 -04:00
sksec - > sclass = SECCLASS_TUN_SOCKET ;
2013-01-14 07:12:19 +00:00
return 0 ;
2009-08-28 18:12:49 -04:00
}
2013-01-14 07:12:19 +00:00
static int selinux_tun_dev_open ( void * security )
2009-08-28 18:12:49 -04:00
{
2024-07-10 14:32:28 -07:00
struct tun_security_struct * tunsec = selinux_tun_dev ( security ) ;
2009-08-28 18:12:49 -04:00
u32 sid = current_sid ( ) ;
int err ;
2023-03-09 13:30:37 -05:00
err = avc_has_perm ( sid , tunsec - > sid , SECCLASS_TUN_SOCKET ,
2009-08-28 18:12:49 -04:00
TUN_SOCKET__RELABELFROM , NULL ) ;
if ( err )
return err ;
2023-03-09 13:30:37 -05:00
err = avc_has_perm ( sid , sid , SECCLASS_TUN_SOCKET ,
2009-08-28 18:12:49 -04:00
TUN_SOCKET__RELABELTO , NULL ) ;
if ( err )
return err ;
2013-01-14 07:12:19 +00:00
tunsec - > sid = sid ;
2009-08-28 18:12:49 -04:00
return 0 ;
}
2005-04-16 15:20:36 -07:00
# ifdef CONFIG_NETFILTER
2021-10-11 22:22:29 +02:00
static unsigned int selinux_ip_forward ( void * priv , struct sk_buff * skb ,
const struct nf_hook_state * state )
2005-04-16 15:20:36 -07:00
{
2021-10-11 17:50:48 -04:00
int ifindex ;
u16 family ;
2008-01-29 08:49:27 -05:00
char * addrp ;
u32 peer_sid ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2023-07-19 13:37:49 +02:00
struct lsm_network_audit net ;
2021-10-11 17:50:48 -04:00
int secmark_active , peerlbl_active ;
2006-07-24 23:32:50 -07:00
2018-03-01 18:48:02 -05:00
if ( ! selinux_policycap_netpeer ( ) )
2008-01-29 08:49:27 -05:00
return NF_ACCEPT ;
2006-07-24 23:32:50 -07:00
2008-01-29 08:49:27 -05:00
secmark_active = selinux_secmark_enabled ( ) ;
2013-05-03 09:05:39 -04:00
peerlbl_active = selinux_peerlbl_enabled ( ) ;
2008-01-29 08:49:27 -05:00
if ( ! secmark_active & & ! peerlbl_active )
return NF_ACCEPT ;
2006-07-24 23:32:50 -07:00
2021-10-11 17:50:48 -04:00
family = state - > pf ;
2008-10-10 10:16:30 -04:00
if ( selinux_skb_peerlbl_sid ( skb , family , & peer_sid ) ! = 0 )
return NF_DROP ;
2021-10-11 17:50:48 -04:00
ifindex = state - > in - > ifindex ;
2023-07-19 13:37:49 +02:00
ad_net_init_from_iif ( & ad , & net , ifindex , family ) ;
2008-01-29 08:49:27 -05:00
if ( selinux_parse_skb ( skb , & ad , & addrp , 1 , NULL ) ! = 0 )
return NF_DROP ;
2008-10-10 10:16:31 -04:00
if ( peerlbl_active ) {
2021-10-11 17:50:48 -04:00
int err ;
err = selinux_inet_sys_rcv_skb ( state - > net , ifindex ,
2014-09-10 17:09:57 -04:00
addrp , family , peer_sid , & ad ) ;
2008-10-10 10:16:31 -04:00
if ( err ) {
2016-06-27 15:06:16 -04:00
selinux_netlbl_err ( skb , family , err , 1 ) ;
2008-01-29 08:49:27 -05:00
return NF_DROP ;
2008-10-10 10:16:31 -04:00
}
}
2008-01-29 08:49:27 -05:00
if ( secmark_active )
2023-03-09 13:30:37 -05:00
if ( avc_has_perm ( peer_sid , skb - > secmark ,
2008-01-29 08:49:27 -05:00
SECCLASS_PACKET , PACKET__FORWARD_IN , & ad ) )
return NF_DROP ;
2021-10-11 17:50:48 -04:00
if ( netlbl_enabled ( ) )
2008-10-10 10:16:32 -04:00
/* we do this in the FORWARD path and not the POST_ROUTING
* path because we want to make sure we apply the necessary
* labeling before IPsec is applied so we can leverage AH
* protection */
if ( selinux_netlbl_skbuff_setsid ( skb , family , peer_sid ) ! = 0 )
return NF_DROP ;
2008-01-29 08:49:27 -05:00
return NF_ACCEPT ;
}
2021-10-11 22:22:29 +02:00
static unsigned int selinux_ip_output ( void * priv , struct sk_buff * skb ,
const struct nf_hook_state * state )
2008-10-10 10:16:32 -04:00
{
2013-12-04 16:10:45 -05:00
struct sock * sk ;
2008-10-10 10:16:32 -04:00
u32 sid ;
if ( ! netlbl_enabled ( ) )
return NF_ACCEPT ;
/* we do this in the LOCAL_OUT path and not the POST_ROUTING path
* because we want to make sure we apply the necessary labeling
* before IPsec is applied so we can leverage AH protection */
2024-11-26 14:59:11 +00:00
sk = sk_to_full_sk ( skb - > sk ) ;
2013-12-04 16:10:45 -05:00
if ( sk ) {
struct sk_security_struct * sksec ;
2015-10-08 05:01:55 -07:00
if ( sk_listener ( sk ) )
2013-12-04 16:10:45 -05:00
/* if the socket is the listening state then this
* packet is a SYN - ACK packet which means it needs to
* be labeled based on the connection / request_sock and
* not the parent socket . unfortunately , we can ' t
* lookup the request_sock yet as it isn ' t queued on
* the parent socket until after the SYN - ACK is sent .
* the " solution " is to simply pass the packet as - is
* as any IP option based labeling should be copied
* from the initial connection request ( in the IP
* layer ) . it is far from ideal , but until we get a
* security label in the packet itself this is the
* best we can do . */
return NF_ACCEPT ;
/* standard practice, label using the parent socket */
2024-07-10 14:32:25 -07:00
sksec = selinux_sock ( sk ) ;
2008-10-10 10:16:32 -04:00
sid = sksec - > sid ;
} else
sid = SECINITSID_KERNEL ;
2021-10-11 17:50:48 -04:00
if ( selinux_netlbl_skbuff_setsid ( skb , state - > pf , sid ) ! = 0 )
2008-10-10 10:16:32 -04:00
return NF_DROP ;
return NF_ACCEPT ;
}
2016-06-27 15:06:15 -04:00
2008-01-29 08:49:27 -05:00
static unsigned int selinux_ip_postroute_compat ( struct sk_buff * skb ,
2021-10-11 17:50:48 -04:00
const struct nf_hook_state * state )
2008-01-29 08:49:27 -05:00
{
2021-10-11 17:50:48 -04:00
struct sock * sk ;
2008-01-29 08:49:27 -05:00
struct sk_security_struct * sksec ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2023-07-19 13:37:49 +02:00
struct lsm_network_audit net ;
2021-12-24 07:07:39 -08:00
u8 proto = 0 ;
2005-04-16 15:20:36 -07:00
2021-10-11 17:50:48 -04:00
sk = skb_to_full_sk ( skb ) ;
2008-01-29 08:49:27 -05:00
if ( sk = = NULL )
return NF_ACCEPT ;
2024-07-10 14:32:25 -07:00
sksec = selinux_sock ( sk ) ;
2008-01-29 08:49:27 -05:00
2023-07-19 13:37:49 +02:00
ad_net_init_from_iif ( & ad , & net , state - > out - > ifindex , state - > pf ) ;
2021-10-11 17:50:48 -04:00
if ( selinux_parse_skb ( skb , & ad , NULL , 0 , & proto ) )
2008-10-10 10:16:30 -04:00
return NF_DROP ;
2009-03-27 17:10:41 -04:00
if ( selinux_secmark_enabled ( ) )
2023-03-09 13:30:37 -05:00
if ( avc_has_perm ( sksec - > sid , skb - > secmark ,
2008-10-10 10:16:30 -04:00
SECCLASS_PACKET , PACKET__SEND , & ad ) )
2010-11-23 06:28:08 +00:00
return NF_DROP_ERR ( - ECONNREFUSED ) ;
2008-01-29 08:49:27 -05:00
2011-02-23 12:55:21 +01:00
if ( selinux_xfrm_postroute_last ( sksec - > sid , skb , & ad , proto ) )
return NF_DROP_ERR ( - ECONNREFUSED ) ;
2008-01-29 08:49:27 -05:00
return NF_ACCEPT ;
[SECMARK]: Add new packet controls to SELinux
Add new per-packet access controls to SELinux, replacing the old
packet controls.
Packets are labeled with the iptables SECMARK and CONNSECMARK targets,
then security policy for the packets is enforced with these controls.
To allow for a smooth transition to the new controls, the old code is
still present, but not active by default. To restore previous
behavior, the old controls may be activated at runtime by writing a
'1' to /selinux/compat_net, and also via the kernel boot parameter
selinux_compat_net. Switching between the network control models
requires the security load_policy permission. The old controls will
probably eventually be removed and any continued use is discouraged.
With this patch, the new secmark controls for SElinux are disabled by
default, so existing behavior is entirely preserved, and the user is
not affected at all.
It also provides a config option to enable the secmark controls by
default (which can always be overridden at boot and runtime). It is
also noted in the kconfig help that the user will need updated
userspace if enabling secmark controls for SELinux and that they'll
probably need the SECMARK and CONNMARK targets, and conntrack protocol
helpers, although such decisions are beyond the scope of kernel
configuration.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 00:33:33 -07:00
}
2021-10-11 22:22:29 +02:00
static unsigned int selinux_ip_postroute ( void * priv ,
struct sk_buff * skb ,
const struct nf_hook_state * state )
[SECMARK]: Add new packet controls to SELinux
Add new per-packet access controls to SELinux, replacing the old
packet controls.
Packets are labeled with the iptables SECMARK and CONNSECMARK targets,
then security policy for the packets is enforced with these controls.
To allow for a smooth transition to the new controls, the old code is
still present, but not active by default. To restore previous
behavior, the old controls may be activated at runtime by writing a
'1' to /selinux/compat_net, and also via the kernel boot parameter
selinux_compat_net. Switching between the network control models
requires the security load_policy permission. The old controls will
probably eventually be removed and any continued use is discouraged.
With this patch, the new secmark controls for SElinux are disabled by
default, so existing behavior is entirely preserved, and the user is
not affected at all.
It also provides a config option to enable the secmark controls by
default (which can always be overridden at boot and runtime). It is
also noted in the kconfig help that the user will need updated
userspace if enabling secmark controls for SELinux and that they'll
probably need the SECMARK and CONNMARK targets, and conntrack protocol
helpers, although such decisions are beyond the scope of kernel
configuration.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 00:33:33 -07:00
{
2021-10-11 17:50:48 -04:00
u16 family ;
2008-01-29 08:49:27 -05:00
u32 secmark_perm ;
u32 peer_sid ;
2021-10-11 17:50:48 -04:00
int ifindex ;
[SECMARK]: Add new packet controls to SELinux
Add new per-packet access controls to SELinux, replacing the old
packet controls.
Packets are labeled with the iptables SECMARK and CONNSECMARK targets,
then security policy for the packets is enforced with these controls.
To allow for a smooth transition to the new controls, the old code is
still present, but not active by default. To restore previous
behavior, the old controls may be activated at runtime by writing a
'1' to /selinux/compat_net, and also via the kernel boot parameter
selinux_compat_net. Switching between the network control models
requires the security load_policy permission. The old controls will
probably eventually be removed and any continued use is discouraged.
With this patch, the new secmark controls for SElinux are disabled by
default, so existing behavior is entirely preserved, and the user is
not affected at all.
It also provides a config option to enable the secmark controls by
default (which can always be overridden at boot and runtime). It is
also noted in the kconfig help that the user will need updated
userspace if enabling secmark controls for SELinux and that they'll
probably need the SECMARK and CONNMARK targets, and conntrack protocol
helpers, although such decisions are beyond the scope of kernel
configuration.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 00:33:33 -07:00
struct sock * sk ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2023-07-19 13:37:49 +02:00
struct lsm_network_audit net ;
2008-01-29 08:49:27 -05:00
char * addrp ;
2021-10-11 17:50:48 -04:00
int secmark_active , peerlbl_active ;
[SECMARK]: Add new packet controls to SELinux
Add new per-packet access controls to SELinux, replacing the old
packet controls.
Packets are labeled with the iptables SECMARK and CONNSECMARK targets,
then security policy for the packets is enforced with these controls.
To allow for a smooth transition to the new controls, the old code is
still present, but not active by default. To restore previous
behavior, the old controls may be activated at runtime by writing a
'1' to /selinux/compat_net, and also via the kernel boot parameter
selinux_compat_net. Switching between the network control models
requires the security load_policy permission. The old controls will
probably eventually be removed and any continued use is discouraged.
With this patch, the new secmark controls for SElinux are disabled by
default, so existing behavior is entirely preserved, and the user is
not affected at all.
It also provides a config option to enable the secmark controls by
default (which can always be overridden at boot and runtime). It is
also noted in the kconfig help that the user will need updated
userspace if enabling secmark controls for SELinux and that they'll
probably need the SECMARK and CONNMARK targets, and conntrack protocol
helpers, although such decisions are beyond the scope of kernel
configuration.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 00:33:33 -07:00
2008-01-29 08:49:27 -05:00
/* If any sort of compatibility mode is enabled then handoff processing
* to the selinux_ip_postroute_compat ( ) function to deal with the
* special handling . We do this in an attempt to keep this function
* as fast and as clean as possible . */
2018-03-01 18:48:02 -05:00
if ( ! selinux_policycap_netpeer ( ) )
2021-10-11 17:50:48 -04:00
return selinux_ip_postroute_compat ( skb , state ) ;
2013-12-10 14:58:01 -05:00
secmark_active = selinux_secmark_enabled ( ) ;
peerlbl_active = selinux_peerlbl_enabled ( ) ;
if ( ! secmark_active & & ! peerlbl_active )
return NF_ACCEPT ;
2015-11-08 10:54:07 -08:00
sk = skb_to_full_sk ( skb ) ;
2013-12-10 14:58:01 -05:00
2008-10-28 13:24:06 -07:00
# ifdef CONFIG_XFRM
2008-01-29 08:49:27 -05:00
/* If skb->dst->xfrm is non-NULL then the packet is undergoing an IPsec
* packet transformation so allow the packet to pass without any checks
* since we ' ll have another chance to perform access control checks
* when the packet is on it ' s final way out .
* NOTE : there appear to be some IPv6 multicast cases where skb - > dst
2013-12-10 14:58:01 -05:00
* is NULL , in this case go ahead and apply access control .
* NOTE : if this is a local socket ( skb - > sk ! = NULL ) that is in the
* TCP listening state we cannot wait until the XFRM processing
* is done as we will miss out on the SA label if we do ;
* unfortunately , this means more work , but it is only once per
* connection . */
if ( skb_dst ( skb ) ! = NULL & & skb_dst ( skb ) - > xfrm ! = NULL & &
2015-10-08 05:01:55 -07:00
! ( sk & & sk_listener ( sk ) ) )
2008-01-29 08:49:27 -05:00
return NF_ACCEPT ;
2008-10-28 13:24:06 -07:00
# endif
2008-01-29 08:49:27 -05:00
2021-10-11 17:50:48 -04:00
family = state - > pf ;
2008-10-10 10:16:30 -04:00
if ( sk = = NULL ) {
2013-12-04 16:10:51 -05:00
/* Without an associated socket the packet is either coming
* from the kernel or it is being forwarded ; check the packet
* to determine which and if the packet is being forwarded
* query the packet directly to determine the security label . */
2011-02-23 12:56:23 +01:00
if ( skb - > skb_iif ) {
secmark_perm = PACKET__FORWARD_OUT ;
2008-10-10 10:16:30 -04:00
if ( selinux_skb_peerlbl_sid ( skb , family , & peer_sid ) )
2010-11-23 06:28:02 +00:00
return NF_DROP ;
2011-02-23 12:56:23 +01:00
} else {
secmark_perm = PACKET__SEND ;
2008-10-10 10:16:30 -04:00
peer_sid = SECINITSID_KERNEL ;
2011-02-23 12:56:23 +01:00
}
2015-10-08 05:01:55 -07:00
} else if ( sk_listener ( sk ) ) {
2013-12-04 16:10:51 -05:00
/* Locally generated packet but the associated socket is in the
* listening state which means this is a SYN - ACK packet . In
* this particular case the correct security label is assigned
* to the connection / request_sock but unfortunately we can ' t
* query the request_sock as it isn ' t queued on the parent
* socket until after the SYN - ACK packet is sent ; the only
* viable choice is to regenerate the label like we do in
* selinux_inet_conn_request ( ) . See also selinux_ip_output ( )
* for similar problems . */
u32 skb_sid ;
2015-10-08 05:01:55 -07:00
struct sk_security_struct * sksec ;
2024-07-10 14:32:25 -07:00
sksec = selinux_sock ( sk ) ;
2013-12-04 16:10:51 -05:00
if ( selinux_skb_peerlbl_sid ( skb , family , & skb_sid ) )
return NF_DROP ;
2013-12-10 14:58:01 -05:00
/* At this point, if the returned skb peerlbl is SECSID_NULL
* and the packet has been through at least one XFRM
* transformation then we must be dealing with the " final "
* form of labeled IPsec packet ; since we ' ve already applied
* all of our access controls on this packet we can safely
* pass the packet . */
if ( skb_sid = = SECSID_NULL ) {
switch ( family ) {
case PF_INET :
if ( IPCB ( skb ) - > flags & IPSKB_XFRM_TRANSFORMED )
return NF_ACCEPT ;
break ;
case PF_INET6 :
if ( IP6CB ( skb ) - > flags & IP6SKB_XFRM_TRANSFORMED )
return NF_ACCEPT ;
2014-09-03 10:51:59 -04:00
break ;
2013-12-10 14:58:01 -05:00
default :
return NF_DROP_ERR ( - ECONNREFUSED ) ;
}
}
2013-12-04 16:10:51 -05:00
if ( selinux_conn_sid ( sksec - > sid , skb_sid , & peer_sid ) )
return NF_DROP ;
secmark_perm = PACKET__SEND ;
2008-10-10 10:16:30 -04:00
} else {
2013-12-04 16:10:51 -05:00
/* Locally generated packet, fetch the security label from the
* associated socket . */
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( sk ) ;
2008-01-29 08:49:27 -05:00
peer_sid = sksec - > sid ;
secmark_perm = PACKET__SEND ;
}
[LSM-IPSec]: Per-packet access control.
This patch series implements per packet access control via the
extension of the Linux Security Modules (LSM) interface by hooks in
the XFRM and pfkey subsystems that leverage IPSec security
associations to label packets. Extensions to the SELinux LSM are
included that leverage the patch for this purpose.
This patch implements the changes necessary to the SELinux LSM to
create, deallocate, and use security contexts for policies
(xfrm_policy) and security associations (xfrm_state) that enable
control of a socket's ability to send and receive packets.
Patch purpose:
The patch is designed to enable the SELinux LSM to implement access
control on individual packets based on the strongly authenticated
IPSec security association. Such access controls augment the existing
ones in SELinux based on network interface and IP address. The former
are very coarse-grained, and the latter can be spoofed. By using
IPSec, the SELinux can control access to remote hosts based on
cryptographic keys generated using the IPSec mechanism. This enables
access control on a per-machine basis or per-application if the remote
machine is running the same mechanism and trusted to enforce the
access control policy.
Patch design approach:
The patch's main function is to authorize a socket's access to a IPSec
policy based on their security contexts. Since the communication is
implemented by a security association, the patch ensures that the
security association's negotiated and used have the same security
context. The patch enables allocation and deallocation of such
security contexts for policies and security associations. It also
enables copying of the security context when policies are cloned.
Lastly, the patch ensures that packets that are sent without using a
IPSec security assocation with a security context are allowed to be
sent in that manner.
A presentation available at
www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf
from the SELinux symposium describes the overall approach.
Patch implementation details:
The function which authorizes a socket to perform a requested
operation (send/receive) on a IPSec policy (xfrm_policy) is
selinux_xfrm_policy_lookup. The Netfilter and rcv_skb hooks ensure
that if a IPSec SA with a securit y association has not been used,
then the socket is allowed to send or receive the packet,
respectively.
The patch implements SELinux function for allocating security contexts
when policies (xfrm_policy) are created via the pfkey or xfrm_user
interfaces via selinux_xfrm_policy_alloc. When a security association
is built, SELinux allocates the security context designated by the
XFRM subsystem which is based on that of the authorized policy via
selinux_xfrm_state_alloc.
When a xfrm_policy is cloned, the security context of that policy, if
any, is copied to the clone via selinux_xfrm_policy_clone.
When a xfrm_policy or xfrm_state is freed, its security context, if
any is also freed at selinux_xfrm_policy_free or
selinux_xfrm_state_free.
Testing:
The SELinux authorization function is tested using ipsec-tools. We
created policies and security associations with particular security
contexts and added SELinux access control policy entries to verify the
authorization decision. We also made sure that packets for which no
security context was supplied (which either did or did not use
security associations) were authorized using an unlabelled context.
Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-13 23:12:40 -08:00
2021-10-11 17:50:48 -04:00
ifindex = state - > out - > ifindex ;
2023-07-19 13:37:49 +02:00
ad_net_init_from_iif ( & ad , & net , ifindex , family ) ;
2008-10-10 10:16:30 -04:00
if ( selinux_parse_skb ( skb , & ad , & addrp , 0 , NULL ) )
2010-11-23 06:28:02 +00:00
return NF_DROP ;
2008-10-10 10:16:30 -04:00
2008-01-29 08:49:27 -05:00
if ( secmark_active )
2023-03-09 13:30:37 -05:00
if ( avc_has_perm ( peer_sid , skb - > secmark ,
2008-01-29 08:49:27 -05:00
SECCLASS_PACKET , secmark_perm , & ad ) )
2010-11-16 11:52:57 +00:00
return NF_DROP_ERR ( - ECONNREFUSED ) ;
2008-01-29 08:49:27 -05:00
if ( peerlbl_active ) {
u32 if_sid ;
u32 node_sid ;
2021-10-11 17:50:48 -04:00
if ( sel_netif_sid ( state - > net , ifindex , & if_sid ) )
2010-11-23 06:28:02 +00:00
return NF_DROP ;
2023-03-09 13:30:37 -05:00
if ( avc_has_perm ( peer_sid , if_sid ,
2008-01-29 08:49:27 -05:00
SECCLASS_NETIF , NETIF__EGRESS , & ad ) )
2010-11-16 11:52:57 +00:00
return NF_DROP_ERR ( - ECONNREFUSED ) ;
2008-01-29 08:49:27 -05:00
if ( sel_netnode_sid ( addrp , family , & node_sid ) )
2010-11-23 06:28:02 +00:00
return NF_DROP ;
2023-03-09 13:30:37 -05:00
if ( avc_has_perm ( peer_sid , node_sid ,
2008-01-29 08:49:27 -05:00
SECCLASS_NODE , NODE__SENDTO , & ad ) )
2010-11-16 11:52:57 +00:00
return NF_DROP_ERR ( - ECONNREFUSED ) ;
2008-01-29 08:49:27 -05:00
}
[SECMARK]: Add new packet controls to SELinux
Add new per-packet access controls to SELinux, replacing the old
packet controls.
Packets are labeled with the iptables SECMARK and CONNSECMARK targets,
then security policy for the packets is enforced with these controls.
To allow for a smooth transition to the new controls, the old code is
still present, but not active by default. To restore previous
behavior, the old controls may be activated at runtime by writing a
'1' to /selinux/compat_net, and also via the kernel boot parameter
selinux_compat_net. Switching between the network control models
requires the security load_policy permission. The old controls will
probably eventually be removed and any continued use is discouraged.
With this patch, the new secmark controls for SElinux are disabled by
default, so existing behavior is entirely preserved, and the user is
not affected at all.
It also provides a config option to enable the secmark controls by
default (which can always be overridden at boot and runtime). It is
also noted in the kconfig help that the user will need updated
userspace if enabling secmark controls for SELinux and that they'll
probably need the SECMARK and CONNMARK targets, and conntrack protocol
helpers, although such decisions are beyond the scope of kernel
configuration.
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 00:33:33 -07:00
2008-01-29 08:49:27 -05:00
return NF_ACCEPT ;
2005-04-16 15:20:36 -07:00
}
# endif /* CONFIG_NETFILTER */
selinux: Add netlink xperm support
Reuse the existing extended permissions infrastructure to support
policies based on the netlink message types.
A new policy capability "netlink_xperm" is introduced. When disabled,
the previous behaviour is preserved. That is, netlink_send will rely on
the permission mappings defined in nlmsgtab.c (e.g, nlmsg_read for
RTM_GETADDR on NETLINK_ROUTE). When enabled, the mappings are ignored
and the generic "nlmsg" permission is used instead.
The new "nlmsg" permission is an extended permission. The 16 bits of the
extended permission are mapped to the nlmsg_type field.
Example policy on Android, preventing regular apps from accessing the
device's MAC address and ARP table, but allowing this access to
privileged apps, looks as follows:
allow netdomain self:netlink_route_socket {
create read getattr write setattr lock append connect getopt
setopt shutdown nlmsg
};
allowxperm netdomain self:netlink_route_socket nlmsg ~{
RTM_GETLINK RTM_GETNEIGH RTM_GETNEIGHTBL
};
allowxperm priv_app self:netlink_route_socket nlmsg {
RTM_GETLINK RTM_GETNEIGH RTM_GETNEIGHTBL
};
The constants in the example above (e.g., RTM_GETLINK) are explicitly
defined in the policy.
It is possible to generate policies to support kernels that may or
may not have the capability enabled by generating a rule for each
scenario. For instance:
allow domain self:netlink_audit_socket nlmsg_read;
allow domain self:netlink_audit_socket nlmsg;
allowxperm domain self:netlink_audit_socket nlmsg { AUDIT_GET };
The approach of defining a new permission ("nlmsg") instead of relying
on the existing permissions (e.g., "nlmsg_read", "nlmsg_readpriv" or
"nlmsg_tty_audit") has been preferred because:
1. This is similar to the other extended permission ("ioctl");
2. With the new extended permission, the coarse-grained mapping is not
necessary anymore. It could eventually be removed, which would be
impossible if the extended permission was defined below these.
3. Having a single extra extended permission considerably simplifies
the implementation here and in libselinux.
Signed-off-by: Thiébaud Weksteen <tweek@google.com>
Signed-off-by: Bram Bonné <brambonne@google.com>
[PM: manual merge fixes for sock_skip_has_perm()]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2024-09-12 11:45:03 +10:00
static int nlmsg_sock_has_extended_perms ( struct sock * sk , u32 perms , u16 nlmsg_type )
{
struct sk_security_struct * sksec = sk - > sk_security ;
struct common_audit_data ad ;
struct lsm_network_audit net ;
u8 driver ;
u8 xperm ;
if ( sock_skip_has_perm ( sksec - > sid ) )
return 0 ;
ad_net_init_from_sk ( & ad , & net , sk ) ;
driver = nlmsg_type > > 8 ;
xperm = nlmsg_type & 0xff ;
return avc_has_extended_perms ( current_sid ( ) , sksec - > sid , sksec - > sclass ,
perms , driver , xperm , & ad ) ;
}
2005-04-16 15:20:36 -07:00
static int selinux_netlink_send ( struct sock * sk , struct sk_buff * skb )
{
2020-04-28 09:59:02 -04:00
int rc = 0 ;
unsigned int msg_len ;
unsigned int data_len = skb - > len ;
unsigned char * data = skb - > data ;
2020-01-13 23:03:31 +08:00
struct nlmsghdr * nlh ;
2024-07-10 14:32:25 -07:00
struct sk_security_struct * sksec = selinux_sock ( sk ) ;
2020-04-28 09:59:02 -04:00
u16 sclass = sksec - > sclass ;
u32 perm ;
2020-01-13 23:03:31 +08:00
2020-04-28 09:59:02 -04:00
while ( data_len > = nlmsg_total_size ( 0 ) ) {
nlh = ( struct nlmsghdr * ) data ;
/* NOTE: the nlmsg_len field isn't reliably set by some netlink
* users which means we can ' t reject skb ' s with bogus
* length fields ; our solution is to follow what
* netlink_rcv_skb ( ) does and simply skip processing at
* messages with length fields that are clearly junk
*/
if ( nlh - > nlmsg_len < NLMSG_HDRLEN | | nlh - > nlmsg_len > data_len )
return 0 ;
2020-01-13 23:03:31 +08:00
2020-04-28 09:59:02 -04:00
rc = selinux_nlmsg_lookup ( sclass , nlh - > nlmsg_type , & perm ) ;
if ( rc = = 0 ) {
selinux: Add netlink xperm support
Reuse the existing extended permissions infrastructure to support
policies based on the netlink message types.
A new policy capability "netlink_xperm" is introduced. When disabled,
the previous behaviour is preserved. That is, netlink_send will rely on
the permission mappings defined in nlmsgtab.c (e.g, nlmsg_read for
RTM_GETADDR on NETLINK_ROUTE). When enabled, the mappings are ignored
and the generic "nlmsg" permission is used instead.
The new "nlmsg" permission is an extended permission. The 16 bits of the
extended permission are mapped to the nlmsg_type field.
Example policy on Android, preventing regular apps from accessing the
device's MAC address and ARP table, but allowing this access to
privileged apps, looks as follows:
allow netdomain self:netlink_route_socket {
create read getattr write setattr lock append connect getopt
setopt shutdown nlmsg
};
allowxperm netdomain self:netlink_route_socket nlmsg ~{
RTM_GETLINK RTM_GETNEIGH RTM_GETNEIGHTBL
};
allowxperm priv_app self:netlink_route_socket nlmsg {
RTM_GETLINK RTM_GETNEIGH RTM_GETNEIGHTBL
};
The constants in the example above (e.g., RTM_GETLINK) are explicitly
defined in the policy.
It is possible to generate policies to support kernels that may or
may not have the capability enabled by generating a rule for each
scenario. For instance:
allow domain self:netlink_audit_socket nlmsg_read;
allow domain self:netlink_audit_socket nlmsg;
allowxperm domain self:netlink_audit_socket nlmsg { AUDIT_GET };
The approach of defining a new permission ("nlmsg") instead of relying
on the existing permissions (e.g., "nlmsg_read", "nlmsg_readpriv" or
"nlmsg_tty_audit") has been preferred because:
1. This is similar to the other extended permission ("ioctl");
2. With the new extended permission, the coarse-grained mapping is not
necessary anymore. It could eventually be removed, which would be
impossible if the extended permission was defined below these.
3. Having a single extra extended permission considerably simplifies
the implementation here and in libselinux.
Signed-off-by: Thiébaud Weksteen <tweek@google.com>
Signed-off-by: Bram Bonné <brambonne@google.com>
[PM: manual merge fixes for sock_skip_has_perm()]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2024-09-12 11:45:03 +10:00
if ( selinux_policycap_netlink_xperm ( ) ) {
rc = nlmsg_sock_has_extended_perms (
sk , perm , nlh - > nlmsg_type ) ;
} else {
rc = sock_has_perm ( sk , perm ) ;
}
2020-04-28 09:59:02 -04:00
if ( rc )
return rc ;
} else if ( rc = = - EINVAL ) {
/* -EINVAL is a missing msg/perm mapping */
2020-01-13 23:03:31 +08:00
pr_warn_ratelimited ( " SELinux: unrecognized netlink "
2020-04-28 09:59:02 -04:00
" message: protocol=%hu nlmsg_type=%hu sclass=%s "
" pid=%d comm=%s \n " ,
sk - > sk_protocol , nlh - > nlmsg_type ,
secclass_map [ sclass - 1 ] . name ,
task_pid_nr ( current ) , current - > comm ) ;
2023-03-09 13:30:37 -05:00
if ( enforcing_enabled ( ) & &
! security_get_allow_unknown ( ) )
2020-04-28 09:59:02 -04:00
return rc ;
rc = 0 ;
} else if ( rc = = - ENOENT ) {
/* -ENOENT is a missing socket/class mapping, ignore */
rc = 0 ;
} else {
return rc ;
2020-01-13 23:03:31 +08:00
}
2020-04-28 09:59:02 -04:00
/* move to the next message after applying netlink padding */
msg_len = NLMSG_ALIGN ( nlh - > nlmsg_len ) ;
if ( msg_len > = data_len )
return 0 ;
data_len - = msg_len ;
data + = msg_len ;
2020-01-13 23:03:31 +08:00
}
2020-04-28 09:59:02 -04:00
return rc ;
2005-04-16 15:20:36 -07:00
}
2018-11-20 11:55:02 -08:00
static void ipc_init_security ( struct ipc_security_struct * isec , u16 sclass )
2005-04-16 15:20:36 -07:00
{
isec - > sclass = sclass ;
2017-01-09 10:07:31 -05:00
isec - > sid = current_sid ( ) ;
2005-04-16 15:20:36 -07:00
}
static int ipc_has_perm ( struct kern_ipc_perm * ipc_perms ,
2005-05-01 08:58:39 -07:00
u32 perms )
2005-04-16 15:20:36 -07:00
{
struct ipc_security_struct * isec ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2008-11-14 10:39:19 +11:00
u32 sid = current_sid ( ) ;
2005-04-16 15:20:36 -07:00
2018-09-21 17:19:45 -07:00
isec = selinux_ipc ( ipc_perms ) ;
2005-04-16 15:20:36 -07:00
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_IPC ;
2005-04-16 15:20:36 -07:00
ad . u . ipc_id = ipc_perms - > key ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , isec - > sid , isec - > sclass , perms , & ad ) ;
2005-04-16 15:20:36 -07:00
}
static int selinux_msg_msg_alloc_security ( struct msg_msg * msg )
{
2020-01-10 17:58:56 +08:00
struct msg_security_struct * msec ;
msec = selinux_msg_msg ( msg ) ;
msec - > sid = SECINITSID_UNLABELED ;
return 0 ;
2005-04-16 15:20:36 -07:00
}
/* message queue security operations */
2018-03-22 21:22:26 -05:00
static int selinux_msg_queue_alloc_security ( struct kern_ipc_perm * msq )
2005-04-16 15:20:36 -07:00
{
struct ipc_security_struct * isec ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2008-11-14 10:39:19 +11:00
u32 sid = current_sid ( ) ;
2005-04-16 15:20:36 -07:00
2018-11-20 11:55:02 -08:00
isec = selinux_ipc ( msq ) ;
ipc_init_security ( isec , SECCLASS_MSGQ ) ;
2005-04-16 15:20:36 -07:00
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_IPC ;
2018-03-22 21:22:26 -05:00
ad . u . ipc_id = msq - > key ;
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , isec - > sid , SECCLASS_MSGQ ,
2022-09-12 09:17:19 +00:00
MSGQ__CREATE , & ad ) ;
2005-04-16 15:20:36 -07:00
}
2018-03-22 21:22:26 -05:00
static int selinux_msg_queue_associate ( struct kern_ipc_perm * msq , int msqflg )
2005-04-16 15:20:36 -07:00
{
struct ipc_security_struct * isec ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2008-11-14 10:39:19 +11:00
u32 sid = current_sid ( ) ;
2005-04-16 15:20:36 -07:00
2018-09-21 17:19:45 -07:00
isec = selinux_ipc ( msq ) ;
2005-04-16 15:20:36 -07:00
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_IPC ;
2018-03-22 21:22:26 -05:00
ad . u . ipc_id = msq - > key ;
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , isec - > sid , SECCLASS_MSGQ ,
2005-04-16 15:20:36 -07:00
MSGQ__ASSOCIATE , & ad ) ;
}
2018-03-22 21:22:26 -05:00
static int selinux_msg_queue_msgctl ( struct kern_ipc_perm * msq , int cmd )
2005-04-16 15:20:36 -07:00
{
2023-07-06 15:23:27 +02:00
u32 perms ;
2005-04-16 15:20:36 -07:00
2008-04-17 13:17:49 -04:00
switch ( cmd ) {
2005-04-16 15:20:36 -07:00
case IPC_INFO :
case MSG_INFO :
/* No specific object, just general system-wide information. */
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , SECINITSID_KERNEL ,
2017-01-09 10:07:31 -05:00
SECCLASS_SYSTEM , SYSTEM__IPC_INFO , NULL ) ;
2005-04-16 15:20:36 -07:00
case IPC_STAT :
case MSG_STAT :
2018-04-10 16:35:30 -07:00
case MSG_STAT_ANY :
2005-04-16 15:20:36 -07:00
perms = MSGQ__GETATTR | MSGQ__ASSOCIATE ;
break ;
case IPC_SET :
perms = MSGQ__SETATTR ;
break ;
case IPC_RMID :
perms = MSGQ__DESTROY ;
break ;
default :
return 0 ;
}
2023-07-06 15:23:27 +02:00
return ipc_has_perm ( msq , perms ) ;
2005-04-16 15:20:36 -07:00
}
2018-03-22 21:22:26 -05:00
static int selinux_msg_queue_msgsnd ( struct kern_ipc_perm * msq , struct msg_msg * msg , int msqflg )
2005-04-16 15:20:36 -07:00
{
struct ipc_security_struct * isec ;
struct msg_security_struct * msec ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2008-11-14 10:39:19 +11:00
u32 sid = current_sid ( ) ;
2005-04-16 15:20:36 -07:00
int rc ;
2018-09-21 17:19:45 -07:00
isec = selinux_ipc ( msq ) ;
msec = selinux_msg_msg ( msg ) ;
2005-04-16 15:20:36 -07:00
/*
* First time through , need to assign label to the message
*/
if ( msec - > sid = = SECINITSID_UNLABELED ) {
/*
* Compute new sid based on current process and
* message queue this message will be stored in
*/
2023-03-09 13:30:37 -05:00
rc = security_transition_sid ( sid , isec - > sid ,
2018-03-01 18:48:02 -05:00
SECCLASS_MSG , NULL , & msec - > sid ) ;
2005-04-16 15:20:36 -07:00
if ( rc )
return rc ;
}
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_IPC ;
2018-03-22 21:22:26 -05:00
ad . u . ipc_id = msq - > key ;
2005-04-16 15:20:36 -07:00
/* Can this process write to the queue? */
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , isec - > sid , SECCLASS_MSGQ ,
2005-04-16 15:20:36 -07:00
MSGQ__WRITE , & ad ) ;
if ( ! rc )
/* Can this process send the message */
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , msec - > sid , SECCLASS_MSG ,
2008-11-14 10:39:19 +11:00
MSG__SEND , & ad ) ;
2005-04-16 15:20:36 -07:00
if ( ! rc )
/* Can the message be put in the queue? */
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( msec - > sid , isec - > sid , SECCLASS_MSGQ ,
2008-11-14 10:39:19 +11:00
MSGQ__ENQUEUE , & ad ) ;
2005-04-16 15:20:36 -07:00
return rc ;
}
2018-03-22 21:22:26 -05:00
static int selinux_msg_queue_msgrcv ( struct kern_ipc_perm * msq , struct msg_msg * msg ,
2005-04-16 15:20:36 -07:00
struct task_struct * target ,
long type , int mode )
{
struct ipc_security_struct * isec ;
struct msg_security_struct * msec ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2021-09-23 09:50:11 -04:00
u32 sid = task_sid_obj ( target ) ;
2005-04-16 15:20:36 -07:00
int rc ;
2018-09-21 17:19:45 -07:00
isec = selinux_ipc ( msq ) ;
msec = selinux_msg_msg ( msg ) ;
2005-04-16 15:20:36 -07:00
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_IPC ;
2018-03-22 21:22:26 -05:00
ad . u . ipc_id = msq - > key ;
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , isec - > sid ,
2005-04-16 15:20:36 -07:00
SECCLASS_MSGQ , MSGQ__READ , & ad ) ;
if ( ! rc )
2023-03-09 13:30:37 -05:00
rc = avc_has_perm ( sid , msec - > sid ,
2005-04-16 15:20:36 -07:00
SECCLASS_MSG , MSG__RECEIVE , & ad ) ;
return rc ;
}
/* Shared Memory security operations */
2018-03-22 21:08:27 -05:00
static int selinux_shm_alloc_security ( struct kern_ipc_perm * shp )
2005-04-16 15:20:36 -07:00
{
struct ipc_security_struct * isec ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2008-11-14 10:39:19 +11:00
u32 sid = current_sid ( ) ;
2005-04-16 15:20:36 -07:00
2018-11-20 11:55:02 -08:00
isec = selinux_ipc ( shp ) ;
ipc_init_security ( isec , SECCLASS_SHM ) ;
2005-04-16 15:20:36 -07:00
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_IPC ;
2018-03-22 21:08:27 -05:00
ad . u . ipc_id = shp - > key ;
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , isec - > sid , SECCLASS_SHM ,
2022-09-12 09:17:19 +00:00
SHM__CREATE , & ad ) ;
2005-04-16 15:20:36 -07:00
}
2018-03-22 21:08:27 -05:00
static int selinux_shm_associate ( struct kern_ipc_perm * shp , int shmflg )
2005-04-16 15:20:36 -07:00
{
struct ipc_security_struct * isec ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2008-11-14 10:39:19 +11:00
u32 sid = current_sid ( ) ;
2005-04-16 15:20:36 -07:00
2018-09-21 17:19:45 -07:00
isec = selinux_ipc ( shp ) ;
2005-04-16 15:20:36 -07:00
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_IPC ;
2018-03-22 21:08:27 -05:00
ad . u . ipc_id = shp - > key ;
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , isec - > sid , SECCLASS_SHM ,
2005-04-16 15:20:36 -07:00
SHM__ASSOCIATE , & ad ) ;
}
/* Note, at this point, shp is locked down */
2018-03-22 21:08:27 -05:00
static int selinux_shm_shmctl ( struct kern_ipc_perm * shp , int cmd )
2005-04-16 15:20:36 -07:00
{
2023-07-06 15:23:27 +02:00
u32 perms ;
2005-04-16 15:20:36 -07:00
2008-04-17 13:17:49 -04:00
switch ( cmd ) {
2005-04-16 15:20:36 -07:00
case IPC_INFO :
case SHM_INFO :
/* No specific object, just general system-wide information. */
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , SECINITSID_KERNEL ,
2017-01-09 10:07:31 -05:00
SECCLASS_SYSTEM , SYSTEM__IPC_INFO , NULL ) ;
2005-04-16 15:20:36 -07:00
case IPC_STAT :
case SHM_STAT :
ipc/shm: introduce shmctl(SHM_STAT_ANY)
Patch series "sysvipc: introduce STAT_ANY commands", v2.
The following patches adds the discussed (see [1]) new command for shm
as well as for sems and msq as they are subject to the same
discrepancies for ipc object permission checks between the syscall and
via procfs. These new commands are justified in that (1) we are stuck
with this semantics as changing syscall and procfs can break userland;
and (2) some users can benefit from performance (for large amounts of
shm segments, for example) from not having to parse the procfs
interface.
Once merged, I will submit the necesary manpage updates. But I'm thinking
something like:
: diff --git a/man2/shmctl.2 b/man2/shmctl.2
: index 7bb503999941..bb00bbe21a57 100644
: --- a/man2/shmctl.2
: +++ b/man2/shmctl.2
: @@ -41,6 +41,7 @@
: .\" 2005-04-25, mtk -- noted aberrant Linux behavior w.r.t. new
: .\" attaches to a segment that has already been marked for deletion.
: .\" 2005-08-02, mtk: Added IPC_INFO, SHM_INFO, SHM_STAT descriptions.
: +.\" 2018-02-13, dbueso: Added SHM_STAT_ANY description.
: .\"
: .TH SHMCTL 2 2017-09-15 "Linux" "Linux Programmer's Manual"
: .SH NAME
: @@ -242,6 +243,18 @@ However, the
: argument is not a segment identifier, but instead an index into
: the kernel's internal array that maintains information about
: all shared memory segments on the system.
: +.TP
: +.BR SHM_STAT_ANY " (Linux-specific)"
: +Return a
: +.I shmid_ds
: +structure as for
: +.BR SHM_STAT .
: +However, the
: +.I shm_perm.mode
: +is not checked for read access for
: +.IR shmid ,
: +resembing the behaviour of
: +/proc/sysvipc/shm.
: .PP
: The caller can prevent or allow swapping of a shared
: memory segment with the following \fIcmd\fP values:
: @@ -287,7 +300,7 @@ operation returns the index of the highest used entry in the
: kernel's internal array recording information about all
: shared memory segments.
: (This information can be used with repeated
: -.B SHM_STAT
: +.B SHM_STAT/SHM_STAT_ANY
: operations to obtain information about all shared memory segments
: on the system.)
: A successful
: @@ -328,7 +341,7 @@ isn't accessible.
: \fIshmid\fP is not a valid identifier, or \fIcmd\fP
: is not a valid command.
: Or: for a
: -.B SHM_STAT
: +.B SHM_STAT/SHM_STAT_ANY
: operation, the index value specified in
: .I shmid
: referred to an array slot that is currently unused.
This patch (of 3):
There is a permission discrepancy when consulting shm ipc object metadata
between /proc/sysvipc/shm (0444) and the SHM_STAT shmctl command. The
later does permission checks for the object vs S_IRUGO. As such there can
be cases where EACCESS is returned via syscall but the info is displayed
anyways in the procfs files.
While this might have security implications via info leaking (albeit no
writing to the shm metadata), this behavior goes way back and showing all
the objects regardless of the permissions was most likely an overlook - so
we are stuck with it. Furthermore, modifying either the syscall or the
procfs file can cause userspace programs to break (ie ipcs). Some
applications require getting the procfs info (without root privileges) and
can be rather slow in comparison with a syscall -- up to 500x in some
reported cases.
This patch introduces a new SHM_STAT_ANY command such that the shm ipc
object permissions are ignored, and only audited instead. In addition,
I've left the lsm security hook checks in place, as if some policy can
block the call, then the user has no other choice than just parsing the
procfs file.
[1] https://lkml.org/lkml/2017/12/19/220
Link: http://lkml.kernel.org/r/20180215162458.10059-2-dave@stgolabs.net
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Robert Kettler <robert.kettler@outlook.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-04-10 16:35:23 -07:00
case SHM_STAT_ANY :
2005-04-16 15:20:36 -07:00
perms = SHM__GETATTR | SHM__ASSOCIATE ;
break ;
case IPC_SET :
perms = SHM__SETATTR ;
break ;
case SHM_LOCK :
case SHM_UNLOCK :
perms = SHM__LOCK ;
break ;
case IPC_RMID :
perms = SHM__DESTROY ;
break ;
default :
return 0 ;
}
2023-07-06 15:23:27 +02:00
return ipc_has_perm ( shp , perms ) ;
2005-04-16 15:20:36 -07:00
}
2018-03-22 21:08:27 -05:00
static int selinux_shm_shmat ( struct kern_ipc_perm * shp ,
2005-04-16 15:20:36 -07:00
char __user * shmaddr , int shmflg )
{
u32 perms ;
if ( shmflg & SHM_RDONLY )
perms = SHM__READ ;
else
perms = SHM__READ | SHM__WRITE ;
2018-03-22 21:08:27 -05:00
return ipc_has_perm ( shp , perms ) ;
2005-04-16 15:20:36 -07:00
}
/* Semaphore security operations */
2018-03-22 20:52:43 -05:00
static int selinux_sem_alloc_security ( struct kern_ipc_perm * sma )
2005-04-16 15:20:36 -07:00
{
struct ipc_security_struct * isec ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2008-11-14 10:39:19 +11:00
u32 sid = current_sid ( ) ;
2005-04-16 15:20:36 -07:00
2018-11-20 11:55:02 -08:00
isec = selinux_ipc ( sma ) ;
ipc_init_security ( isec , SECCLASS_SEM ) ;
2005-04-16 15:20:36 -07:00
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_IPC ;
2018-03-22 20:52:43 -05:00
ad . u . ipc_id = sma - > key ;
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , isec - > sid , SECCLASS_SEM ,
2022-09-12 09:17:19 +00:00
SEM__CREATE , & ad ) ;
2005-04-16 15:20:36 -07:00
}
2018-03-22 20:52:43 -05:00
static int selinux_sem_associate ( struct kern_ipc_perm * sma , int semflg )
2005-04-16 15:20:36 -07:00
{
struct ipc_security_struct * isec ;
2009-07-14 12:14:09 -04:00
struct common_audit_data ad ;
2008-11-14 10:39:19 +11:00
u32 sid = current_sid ( ) ;
2005-04-16 15:20:36 -07:00
2018-09-21 17:19:45 -07:00
isec = selinux_ipc ( sma ) ;
2005-04-16 15:20:36 -07:00
2012-04-04 15:01:43 -04:00
ad . type = LSM_AUDIT_DATA_IPC ;
2018-03-22 20:52:43 -05:00
ad . u . ipc_id = sma - > key ;
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , isec - > sid , SECCLASS_SEM ,
2005-04-16 15:20:36 -07:00
SEM__ASSOCIATE , & ad ) ;
}
/* Note, at this point, sma is locked down */
2018-03-22 20:52:43 -05:00
static int selinux_sem_semctl ( struct kern_ipc_perm * sma , int cmd )
2005-04-16 15:20:36 -07:00
{
int err ;
u32 perms ;
2008-04-17 13:17:49 -04:00
switch ( cmd ) {
2005-04-16 15:20:36 -07:00
case IPC_INFO :
case SEM_INFO :
/* No specific object, just general system-wide information. */
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , SECINITSID_KERNEL ,
2017-01-09 10:07:31 -05:00
SECCLASS_SYSTEM , SYSTEM__IPC_INFO , NULL ) ;
2005-04-16 15:20:36 -07:00
case GETPID :
case GETNCNT :
case GETZCNT :
perms = SEM__GETATTR ;
break ;
case GETVAL :
case GETALL :
perms = SEM__READ ;
break ;
case SETVAL :
case SETALL :
perms = SEM__WRITE ;
break ;
case IPC_RMID :
perms = SEM__DESTROY ;
break ;
case IPC_SET :
perms = SEM__SETATTR ;
break ;
case IPC_STAT :
case SEM_STAT :
2018-04-10 16:35:26 -07:00
case SEM_STAT_ANY :
2005-04-16 15:20:36 -07:00
perms = SEM__GETATTR | SEM__ASSOCIATE ;
break ;
default :
return 0 ;
}
2018-03-22 20:52:43 -05:00
err = ipc_has_perm ( sma , perms ) ;
2005-04-16 15:20:36 -07:00
return err ;
}
2018-03-22 20:52:43 -05:00
static int selinux_sem_semop ( struct kern_ipc_perm * sma ,
2005-04-16 15:20:36 -07:00
struct sembuf * sops , unsigned nsops , int alter )
{
u32 perms ;
if ( alter )
perms = SEM__READ | SEM__WRITE ;
else
perms = SEM__READ ;
2018-03-22 20:52:43 -05:00
return ipc_has_perm ( sma , perms ) ;
2005-04-16 15:20:36 -07:00
}
static int selinux_ipc_permission ( struct kern_ipc_perm * ipcp , short flag )
{
u32 av = 0 ;
av = 0 ;
if ( flag & S_IRUGO )
av | = IPC__UNIX_READ ;
if ( flag & S_IWUGO )
av | = IPC__UNIX_WRITE ;
if ( av = = 0 )
return 0 ;
2005-05-01 08:58:39 -07:00
return ipc_has_perm ( ipcp , av ) ;
2005-04-16 15:20:36 -07:00
}
2024-10-09 10:32:13 -07:00
static void selinux_ipc_getlsmprop ( struct kern_ipc_perm * ipcp ,
struct lsm_prop * prop )
2008-03-01 21:52:30 +02:00
{
2018-09-21 17:19:45 -07:00
struct ipc_security_struct * isec = selinux_ipc ( ipcp ) ;
2024-10-09 10:32:13 -07:00
prop - > selinux . secid = isec - > sid ;
2008-03-01 21:52:30 +02:00
}
2008-04-17 13:17:49 -04:00
static void selinux_d_instantiate ( struct dentry * dentry , struct inode * inode )
2005-04-16 15:20:36 -07:00
{
if ( inode )
inode_doinit_with_dentry ( inode , dentry ) ;
}
2023-09-12 13:56:55 -07:00
static int selinux_lsm_getattr ( unsigned int attr , struct task_struct * p ,
char * * value )
2005-04-16 15:20:36 -07:00
{
2024-02-23 16:26:40 -05:00
const struct task_security_struct * tsec ;
2005-04-16 15:20:36 -07:00
int error ;
2024-02-23 16:26:40 -05:00
u32 sid ;
u32 len ;
2005-04-16 15:20:36 -07:00
2017-01-09 10:07:31 -05:00
rcu_read_lock ( ) ;
2024-02-23 16:26:40 -05:00
tsec = selinux_cred ( __task_cred ( p ) ) ;
if ( p ! = current ) {
error = avc_has_perm ( current_sid ( ) , tsec - > sid ,
2017-01-09 10:07:31 -05:00
SECCLASS_PROCESS , PROCESS__GETATTR , NULL ) ;
2005-04-16 15:20:36 -07:00
if ( error )
2024-02-23 16:26:40 -05:00
goto err_unlock ;
2005-04-16 15:20:36 -07:00
}
2023-09-12 13:56:55 -07:00
switch ( attr ) {
case LSM_ATTR_CURRENT :
2024-02-23 16:26:40 -05:00
sid = tsec - > sid ;
2023-09-12 13:56:55 -07:00
break ;
case LSM_ATTR_PREV :
2024-02-23 16:26:40 -05:00
sid = tsec - > osid ;
2023-09-12 13:56:55 -07:00
break ;
case LSM_ATTR_EXEC :
2024-02-23 16:26:40 -05:00
sid = tsec - > exec_sid ;
2023-09-12 13:56:55 -07:00
break ;
case LSM_ATTR_FSCREATE :
2024-02-23 16:26:40 -05:00
sid = tsec - > create_sid ;
2023-09-12 13:56:55 -07:00
break ;
case LSM_ATTR_KEYCREATE :
2024-02-23 16:26:40 -05:00
sid = tsec - > keycreate_sid ;
2023-09-12 13:56:55 -07:00
break ;
case LSM_ATTR_SOCKCREATE :
2024-02-23 16:26:40 -05:00
sid = tsec - > sockcreate_sid ;
2023-09-12 13:56:55 -07:00
break ;
default :
error = - EOPNOTSUPP ;
2024-02-23 16:26:40 -05:00
goto err_unlock ;
2017-01-09 10:07:31 -05:00
}
2008-11-14 10:39:19 +11:00
rcu_read_unlock ( ) ;
2005-04-16 15:20:36 -07:00
2024-02-23 16:26:40 -05:00
if ( sid = = SECSID_NULL ) {
* value = NULL ;
2005-04-16 15:20:36 -07:00
return 0 ;
2024-02-23 16:26:40 -05:00
}
2005-04-16 15:20:36 -07:00
2023-03-09 13:30:37 -05:00
error = security_sid_to_context ( sid , value , & len ) ;
2007-03-12 16:17:58 +00:00
if ( error )
return error ;
return len ;
2008-11-14 10:39:19 +11:00
2024-02-23 16:26:40 -05:00
err_unlock :
2008-11-14 10:39:19 +11:00
rcu_read_unlock ( ) ;
2017-01-09 10:07:31 -05:00
return error ;
2005-04-16 15:20:36 -07:00
}
2023-09-12 13:56:55 -07:00
static int selinux_lsm_setattr ( u64 attr , void * value , size_t size )
2005-04-16 15:20:36 -07:00
{
struct task_security_struct * tsec ;
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
struct cred * new ;
2017-01-09 10:07:31 -05:00
u32 mysid = current_sid ( ) , sid = 0 , ptsid ;
2005-04-16 15:20:36 -07:00
int error ;
char * str = value ;
/*
* Basic control over ability to set these attributes at all .
*/
2023-09-12 13:56:55 -07:00
switch ( attr ) {
case LSM_ATTR_EXEC :
2023-03-09 13:30:37 -05:00
error = avc_has_perm ( mysid , mysid , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__SETEXEC , NULL ) ;
2023-09-12 13:56:55 -07:00
break ;
case LSM_ATTR_FSCREATE :
2023-03-09 13:30:37 -05:00
error = avc_has_perm ( mysid , mysid , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__SETFSCREATE , NULL ) ;
2023-09-12 13:56:55 -07:00
break ;
case LSM_ATTR_KEYCREATE :
2023-03-09 13:30:37 -05:00
error = avc_has_perm ( mysid , mysid , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__SETKEYCREATE , NULL ) ;
2023-09-12 13:56:55 -07:00
break ;
case LSM_ATTR_SOCKCREATE :
2023-03-09 13:30:37 -05:00
error = avc_has_perm ( mysid , mysid , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__SETSOCKCREATE , NULL ) ;
2023-09-12 13:56:55 -07:00
break ;
case LSM_ATTR_CURRENT :
2023-03-09 13:30:37 -05:00
error = avc_has_perm ( mysid , mysid , SECCLASS_PROCESS ,
2017-01-09 10:07:31 -05:00
PROCESS__SETCURRENT , NULL ) ;
2023-09-12 13:56:55 -07:00
break ;
default :
error = - EOPNOTSUPP ;
break ;
}
2005-04-16 15:20:36 -07:00
if ( error )
return error ;
/* Obtain a SID for the context, if one was specified. */
2017-01-31 11:54:04 -05:00
if ( size & & str [ 0 ] & & str [ 0 ] ! = ' \n ' ) {
2005-04-16 15:20:36 -07:00
if ( str [ size - 1 ] = = ' \n ' ) {
str [ size - 1 ] = 0 ;
size - - ;
}
2023-03-09 13:30:37 -05:00
error = security_context_to_sid ( value , size ,
2018-03-01 18:48:02 -05:00
& sid , GFP_KERNEL ) ;
2023-09-12 13:56:55 -07:00
if ( error = = - EINVAL & & attr = = LSM_ATTR_FSCREATE ) {
2017-04-20 11:31:30 -04:00
if ( ! has_cap_mac_admin ( true ) ) {
2012-04-04 13:45:49 -04:00
struct audit_buffer * ab ;
size_t audit_size ;
2023-09-12 13:56:55 -07:00
/* We strip a nul only if it is at the end,
* otherwise the context contains a nul and
* we should audit that */
2012-04-04 13:45:49 -04:00
if ( str [ size - 1 ] = = ' \0 ' )
audit_size = size - 1 ;
else
audit_size = size ;
2018-05-12 21:58:20 -04:00
ab = audit_log_start ( audit_context ( ) ,
GFP_ATOMIC ,
AUDIT_SELINUX_ERR ) ;
2021-07-14 01:11:27 +01:00
if ( ! ab )
return error ;
2012-04-04 13:45:49 -04:00
audit_log_format ( ab , " op=fscreate invalid_context= " ) ;
2023-09-12 13:56:55 -07:00
audit_log_n_untrustedstring ( ab , value ,
audit_size ) ;
2012-04-04 13:45:49 -04:00
audit_log_end ( ab ) ;
2008-05-07 13:03:20 -04:00
return error ;
2012-04-04 13:45:49 -04:00
}
2023-03-09 13:30:37 -05:00
error = security_context_to_sid_force ( value , size ,
& sid ) ;
2008-05-07 13:03:20 -04:00
}
2005-04-16 15:20:36 -07:00
if ( error )
return error ;
}
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
new = prepare_creds ( ) ;
if ( ! new )
return - ENOMEM ;
2005-04-16 15:20:36 -07:00
/* Permission checking based on the specified context is
performed during the actual operation ( execve ,
open / mkdir / . . . ) , when we know the full context of the
2020-03-22 15:46:24 -05:00
operation . See selinux_bprm_creds_for_exec for the execve
2005-04-16 15:20:36 -07:00
checks and may_create for the file creation checks . The
operation will then fail if the context is not permitted . */
2018-09-21 17:17:16 -07:00
tsec = selinux_cred ( new ) ;
2023-09-12 13:56:55 -07:00
if ( attr = = LSM_ATTR_EXEC ) {
2005-04-16 15:20:36 -07:00
tsec - > exec_sid = sid ;
2023-09-12 13:56:55 -07:00
} else if ( attr = = LSM_ATTR_FSCREATE ) {
2005-04-16 15:20:36 -07:00
tsec - > create_sid = sid ;
2023-09-12 13:56:55 -07:00
} else if ( attr = = LSM_ATTR_KEYCREATE ) {
2019-06-12 10:12:26 +02:00
if ( sid ) {
2023-03-09 13:30:37 -05:00
error = avc_has_perm ( mysid , sid ,
2019-06-12 10:12:26 +02:00
SECCLASS_KEY , KEY__CREATE , NULL ) ;
if ( error )
goto abort_change ;
}
2006-06-26 00:24:57 -07:00
tsec - > keycreate_sid = sid ;
2023-09-12 13:56:55 -07:00
} else if ( attr = = LSM_ATTR_SOCKCREATE ) {
2006-06-26 00:26:03 -07:00
tsec - > sockcreate_sid = sid ;
2023-09-12 13:56:55 -07:00
} else if ( attr = = LSM_ATTR_CURRENT ) {
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
error = - EINVAL ;
2005-04-16 15:20:36 -07:00
if ( sid = = 0 )
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
goto abort_change ;
2009-07-10 03:48:23 +02:00
if ( ! current_is_single_threaded ( ) ) {
2023-03-09 13:30:37 -05:00
error = security_bounded_transition ( tsec - > sid , sid ) ;
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
if ( error )
goto abort_change ;
2008-04-17 13:17:49 -04:00
}
2005-04-16 15:20:36 -07:00
/* Check permissions for the transition. */
2023-03-09 13:30:37 -05:00
error = avc_has_perm ( tsec - > sid , sid , SECCLASS_PROCESS ,
2008-04-17 13:17:49 -04:00
PROCESS__DYNTRANSITION , NULL ) ;
2005-04-16 15:20:36 -07:00
if ( error )
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
goto abort_change ;
2005-04-16 15:20:36 -07:00
/* Check for ptracing, and update the task SID if ok.
Otherwise , leave SID unchanged and fail . */
2017-01-09 10:07:31 -05:00
ptsid = ptrace_parent_sid ( ) ;
2016-03-30 21:41:21 -04:00
if ( ptsid ! = 0 ) {
2023-03-09 13:30:37 -05:00
error = avc_has_perm ( ptsid , sid , SECCLASS_PROCESS ,
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
PROCESS__PTRACE , NULL ) ;
2005-04-16 15:20:36 -07:00
if ( error )
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
goto abort_change ;
2005-04-16 15:20:36 -07:00
}
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
tsec - > sid = sid ;
} else {
error = - EINVAL ;
goto abort_change ;
}
commit_creds ( new ) ;
2005-04-16 15:20:36 -07:00
return size ;
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
abort_change :
abort_creds ( new ) ;
return error ;
2005-04-16 15:20:36 -07:00
}
2023-10-24 14:44:00 -04:00
/**
* selinux_getselfattr - Get SELinux current task attributes
* @ attr : the requested attribute
* @ ctx : buffer to receive the result
* @ size : buffer size ( input ) , buffer size used ( output )
* @ flags : unused
*
* Fill the passed user space @ ctx with the details of the requested
* attribute .
*
* Returns the number of attributes on success , an error code otherwise .
* There will only ever be one attribute .
*/
2023-09-12 13:56:55 -07:00
static int selinux_getselfattr ( unsigned int attr , struct lsm_ctx __user * ctx ,
2024-03-14 11:31:26 -04:00
u32 * size , u32 flags )
2023-09-12 13:56:55 -07:00
{
2023-10-24 14:44:00 -04:00
int rc ;
2024-02-23 20:05:45 +01:00
char * val = NULL ;
2023-10-24 14:44:00 -04:00
int val_len ;
val_len = selinux_lsm_getattr ( attr , current , & val ) ;
if ( val_len < 0 )
return val_len ;
rc = lsm_fill_user_ctx ( ctx , size , val , val_len , LSM_ID_SELINUX , 0 ) ;
kfree ( val ) ;
return ( ! rc ? 1 : rc ) ;
2023-09-12 13:56:55 -07:00
}
static int selinux_setselfattr ( unsigned int attr , struct lsm_ctx * ctx ,
2024-03-14 11:31:26 -04:00
u32 size , u32 flags )
2023-09-12 13:56:55 -07:00
{
int rc ;
rc = selinux_lsm_setattr ( attr , ctx - > ctx , ctx - > ctx_len ) ;
if ( rc > 0 )
return 0 ;
return rc ;
}
static int selinux_getprocattr ( struct task_struct * p ,
const char * name , char * * value )
{
unsigned int attr = lsm_name_to_attr ( name ) ;
int rc ;
if ( attr ) {
rc = selinux_lsm_getattr ( attr , p , value ) ;
if ( rc ! = - EOPNOTSUPP )
return rc ;
}
return - EINVAL ;
}
static int selinux_setprocattr ( const char * name , void * value , size_t size )
{
int attr = lsm_name_to_attr ( name ) ;
if ( attr )
return selinux_lsm_setattr ( attr , value , size ) ;
return - EINVAL ;
}
2013-05-22 12:50:35 -04:00
static int selinux_ismaclabel ( const char * name )
{
return ( strcmp ( name , XATTR_SELINUX_SUFFIX ) = = 0 ) ;
}
2006-08-02 14:12:06 -07:00
static int selinux_secid_to_secctx ( u32 secid , char * * secdata , u32 * seclen )
{
2024-10-09 10:32:11 -07:00
return security_sid_to_context ( secid , secdata , seclen ) ;
}
static int selinux_lsmprop_to_secctx ( struct lsm_prop * prop , char * * secdata ,
u32 * seclen )
{
2024-10-09 10:32:21 -07:00
return selinux_secid_to_secctx ( prop - > selinux . secid , secdata , seclen ) ;
2006-08-02 14:12:06 -07:00
}
2008-04-29 20:52:51 +01:00
static int selinux_secctx_to_secid ( const char * secdata , u32 seclen , u32 * secid )
2008-01-15 23:47:35 +00:00
{
2023-03-09 13:30:37 -05:00
return security_context_to_sid ( secdata , seclen ,
2018-03-01 18:48:02 -05:00
secid , GFP_KERNEL ) ;
2008-01-15 23:47:35 +00:00
}
2006-08-02 14:12:06 -07:00
static void selinux_release_secctx ( char * secdata , u32 seclen )
{
2007-08-01 11:12:58 -04:00
kfree ( secdata ) ;
2006-08-02 14:12:06 -07:00
}
2015-12-24 11:09:40 -05:00
static void selinux_inode_invalidate_secctx ( struct inode * inode )
{
2018-09-21 17:19:11 -07:00
struct inode_security_struct * isec = selinux_inode ( inode ) ;
2015-12-24 11:09:40 -05:00
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
spin_lock ( & isec - > lock ) ;
2015-12-24 11:09:40 -05:00
isec - > initialized = LABEL_INVALID ;
selinux: Convert isec->lock into a spinlock
Convert isec->lock from a mutex into a spinlock. Instead of holding
the lock while sleeping in inode_doinit_with_dentry, set
isec->initialized to LABEL_PENDING and release the lock. Then, when
the sid has been determined, re-acquire the lock. If isec->initialized
is still set to LABEL_PENDING, set isec->sid; otherwise, the sid has
been set by another task (LABEL_INITIALIZED) or invalidated
(LABEL_INVALID) in the meantime.
This fixes a deadlock on gfs2 where
* one task is in inode_doinit_with_dentry -> gfs2_getxattr, holds
isec->lock, and tries to acquire the inode's glock, and
* another task is in do_xmote -> inode_go_inval ->
selinux_inode_invalidate_secctx, holds the inode's glock, and
tries to acquire isec->lock.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[PM: minor tweaks to keep checkpatch.pl happy]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2016-11-15 11:06:40 +01:00
spin_unlock ( & isec - > lock ) ;
2015-12-24 11:09:40 -05:00
}
2009-09-03 14:25:57 -04:00
/*
* called with inode - > i_mutex locked
*/
static int selinux_inode_notifysecctx ( struct inode * inode , void * ctx , u32 ctxlen )
{
2018-12-21 21:18:53 +01:00
int rc = selinux_inode_setsecurity ( inode , XATTR_SELINUX_SUFFIX ,
ctx , ctxlen , 0 ) ;
/* Do not return error when suppressing label (SBLABEL_MNT not set). */
return rc = = - EOPNOTSUPP ? 0 : rc ;
2009-09-03 14:25:57 -04:00
}
/*
* called with inode - > i_mutex locked
*/
static int selinux_inode_setsecctx ( struct dentry * dentry , void * ctx , u32 ctxlen )
{
2024-08-28 15:51:29 -04:00
return __vfs_setxattr_locked ( & nop_mnt_idmap , dentry , XATTR_NAME_SELINUX ,
ctx , ctxlen , 0 , NULL ) ;
2009-09-03 14:25:57 -04:00
}
static int selinux_inode_getsecctx ( struct inode * inode , void * * ctx , u32 * ctxlen )
{
int len = 0 ;
2023-01-13 12:49:22 +01:00
len = selinux_inode_getsecurity ( & nop_mnt_idmap , inode ,
2021-01-21 14:19:29 +01:00
XATTR_SELINUX_SUFFIX , ctx , true ) ;
2009-09-03 14:25:57 -04:00
if ( len < 0 )
return len ;
* ctxlen = len ;
return 0 ;
}
2006-06-22 14:47:17 -07:00
# ifdef CONFIG_KEYS
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
static int selinux_key_alloc ( struct key * k , const struct cred * cred ,
2006-06-26 00:24:50 -07:00
unsigned long flags )
2006-06-22 14:47:17 -07:00
{
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
const struct task_security_struct * tsec ;
2024-07-10 14:32:26 -07:00
struct key_security_struct * ksec = selinux_key ( k ) ;
2006-06-22 14:47:17 -07:00
2018-09-21 17:17:16 -07:00
tsec = selinux_cred ( cred ) ;
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
if ( tsec - > keycreate_sid )
ksec - > sid = tsec - > keycreate_sid ;
2006-06-26 00:24:57 -07:00
else
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
ksec - > sid = tsec - > sid ;
2006-06-22 14:47:17 -07:00
return 0 ;
}
static int selinux_key_permission ( key_ref_t key_ref ,
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
const struct cred * cred ,
2020-05-12 15:16:29 +01:00
enum key_need_perm need_perm )
2006-06-22 14:47:17 -07:00
{
struct key * key ;
struct key_security_struct * ksec ;
2020-05-12 15:16:29 +01:00
u32 perm , sid ;
2006-06-22 14:47:17 -07:00
2020-05-12 15:16:29 +01:00
switch ( need_perm ) {
case KEY_NEED_VIEW :
perm = KEY__VIEW ;
break ;
case KEY_NEED_READ :
perm = KEY__READ ;
break ;
case KEY_NEED_WRITE :
perm = KEY__WRITE ;
break ;
case KEY_NEED_SEARCH :
perm = KEY__SEARCH ;
break ;
case KEY_NEED_LINK :
perm = KEY__LINK ;
break ;
case KEY_NEED_SETATTR :
perm = KEY__SETATTR ;
break ;
case KEY_NEED_UNLINK :
case KEY_SYSADMIN_OVERRIDE :
case KEY_AUTHTOKEN_OVERRIDE :
case KEY_DEFER_PERM_CHECK :
2006-06-22 14:47:17 -07:00
return 0 ;
2020-05-12 15:16:29 +01:00
default :
WARN_ON ( 1 ) ;
return - EPERM ;
2006-06-22 14:47:17 -07:00
2020-05-12 15:16:29 +01:00
}
2008-11-14 10:39:19 +11:00
2020-05-12 15:16:29 +01:00
sid = cred_sid ( cred ) ;
2008-11-14 10:39:19 +11:00
key = key_ref_to_ptr ( key_ref ) ;
2024-07-10 14:32:26 -07:00
ksec = selinux_key ( key ) ;
2008-11-14 10:39:19 +11:00
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , ksec - > sid , SECCLASS_KEY , perm , NULL ) ;
2006-06-22 14:47:17 -07:00
}
2008-04-29 01:01:26 -07:00
static int selinux_key_getsecurity ( struct key * key , char * * _buffer )
{
2024-07-10 14:32:26 -07:00
struct key_security_struct * ksec = selinux_key ( key ) ;
2008-04-29 01:01:26 -07:00
char * context = NULL ;
unsigned len ;
int rc ;
2023-03-09 13:30:37 -05:00
rc = security_sid_to_context ( ksec - > sid ,
2018-03-01 18:48:02 -05:00
& context , & len ) ;
2008-04-29 01:01:26 -07:00
if ( ! rc )
rc = len ;
* _buffer = context ;
return rc ;
}
2020-01-14 17:07:13 +00:00
# ifdef CONFIG_KEY_NOTIFICATIONS
static int selinux_watch_key ( struct key * key )
{
2024-09-19 11:37:11 -04:00
struct key_security_struct * ksec = selinux_key ( key ) ;
2020-01-14 17:07:13 +00:00
u32 sid = current_sid ( ) ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , ksec - > sid , SECCLASS_KEY , KEY__VIEW , NULL ) ;
2020-01-14 17:07:13 +00:00
}
# endif
2017-05-19 15:48:56 +03:00
# endif
# ifdef CONFIG_SECURITY_INFINIBAND
2017-05-19 15:48:57 +03:00
static int selinux_ib_pkey_access ( void * ib_sec , u64 subnet_prefix , u16 pkey_val )
{
struct common_audit_data ad ;
int err ;
u32 sid = 0 ;
struct ib_security_struct * sec = ib_sec ;
struct lsm_ibpkey_audit ibpkey ;
2017-05-19 15:48:59 +03:00
err = sel_ib_pkey_sid ( subnet_prefix , pkey_val , & sid ) ;
2017-05-19 15:48:57 +03:00
if ( err )
return err ;
ad . type = LSM_AUDIT_DATA_IBPKEY ;
ibpkey . subnet_prefix = subnet_prefix ;
ibpkey . pkey = pkey_val ;
ad . u . ibpkey = & ibpkey ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sec - > sid , sid ,
2017-05-19 15:48:57 +03:00
SECCLASS_INFINIBAND_PKEY ,
INFINIBAND_PKEY__ACCESS , & ad ) ;
}
2017-05-19 15:48:58 +03:00
static int selinux_ib_endport_manage_subnet ( void * ib_sec , const char * dev_name ,
u8 port_num )
{
struct common_audit_data ad ;
int err ;
u32 sid = 0 ;
struct ib_security_struct * sec = ib_sec ;
struct lsm_ibendport_audit ibendport ;
2023-03-09 13:30:37 -05:00
err = security_ib_endport_sid ( dev_name , port_num ,
2018-03-01 18:48:02 -05:00
& sid ) ;
2017-05-19 15:48:58 +03:00
if ( err )
return err ;
ad . type = LSM_AUDIT_DATA_IBENDPORT ;
2021-05-12 16:32:10 +02:00
ibendport . dev_name = dev_name ;
2017-05-19 15:48:58 +03:00
ibendport . port = port_num ;
ad . u . ibendport = & ibendport ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sec - > sid , sid ,
2017-05-19 15:48:58 +03:00
SECCLASS_INFINIBAND_ENDPORT ,
INFINIBAND_ENDPORT__MANAGE_SUBNET , & ad ) ;
}
2024-07-10 14:32:29 -07:00
static int selinux_ib_alloc_security ( void * ib_sec )
2017-05-19 15:48:56 +03:00
{
2024-07-10 14:32:29 -07:00
struct ib_security_struct * sec = selinux_ib ( ib_sec ) ;
2008-04-29 01:01:26 -07:00
2017-05-19 15:48:56 +03:00
sec - > sid = current_sid ( ) ;
return 0 ;
}
2006-06-22 14:47:17 -07:00
# endif
2017-10-18 13:00:25 -07:00
# ifdef CONFIG_BPF_SYSCALL
static int selinux_bpf ( int cmd , union bpf_attr * attr ,
unsigned int size )
{
u32 sid = current_sid ( ) ;
int ret ;
switch ( cmd ) {
case BPF_MAP_CREATE :
2023-03-09 13:30:37 -05:00
ret = avc_has_perm ( sid , sid , SECCLASS_BPF , BPF__MAP_CREATE ,
2017-10-18 13:00:25 -07:00
NULL ) ;
break ;
case BPF_PROG_LOAD :
2023-03-09 13:30:37 -05:00
ret = avc_has_perm ( sid , sid , SECCLASS_BPF , BPF__PROG_LOAD ,
2017-10-18 13:00:25 -07:00
NULL ) ;
break ;
default :
ret = 0 ;
break ;
}
return ret ;
}
static u32 bpf_map_fmode_to_av ( fmode_t fmode )
{
u32 av = 0 ;
if ( fmode & FMODE_READ )
av | = BPF__MAP_READ ;
if ( fmode & FMODE_WRITE )
av | = BPF__MAP_WRITE ;
return av ;
}
2017-10-18 13:00:26 -07:00
/* This function will check the file pass through unix socket or binder to see
2022-06-09 00:36:16 +02:00
* if it is a bpf related object . And apply corresponding checks on the bpf
2017-10-18 13:00:26 -07:00
* object based on the type . The bpf maps and programs , not like other files and
* socket , are using a shared anonymous inode inside the kernel as their inode .
* So checking that inode cannot identify if the process have privilege to
* access the bpf object and that ' s why we have to add this additional check in
* selinux_file_receive and selinux_binder_transfer_files .
*/
2023-08-12 20:31:08 +05:00
static int bpf_fd_pass ( const struct file * file , u32 sid )
2017-10-18 13:00:26 -07:00
{
struct bpf_security_struct * bpfsec ;
struct bpf_prog * prog ;
struct bpf_map * map ;
int ret ;
if ( file - > f_op = = & bpf_map_fops ) {
map = file - > private_data ;
bpfsec = map - > security ;
2023-03-09 13:30:37 -05:00
ret = avc_has_perm ( sid , bpfsec - > sid , SECCLASS_BPF ,
2017-10-18 13:00:26 -07:00
bpf_map_fmode_to_av ( file - > f_mode ) , NULL ) ;
if ( ret )
return ret ;
} else if ( file - > f_op = = & bpf_prog_fops ) {
prog = file - > private_data ;
bpfsec = prog - > aux - > security ;
2023-03-09 13:30:37 -05:00
ret = avc_has_perm ( sid , bpfsec - > sid , SECCLASS_BPF ,
2017-10-18 13:00:26 -07:00
BPF__PROG_RUN , NULL ) ;
if ( ret )
return ret ;
}
return 0 ;
}
2017-10-18 13:00:25 -07:00
static int selinux_bpf_map ( struct bpf_map * map , fmode_t fmode )
{
u32 sid = current_sid ( ) ;
struct bpf_security_struct * bpfsec ;
bpfsec = map - > security ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , bpfsec - > sid , SECCLASS_BPF ,
2017-10-18 13:00:25 -07:00
bpf_map_fmode_to_av ( fmode ) , NULL ) ;
}
static int selinux_bpf_prog ( struct bpf_prog * prog )
{
u32 sid = current_sid ( ) ;
struct bpf_security_struct * bpfsec ;
bpfsec = prog - > aux - > security ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , bpfsec - > sid , SECCLASS_BPF ,
2017-10-18 13:00:25 -07:00
BPF__PROG_RUN , NULL ) ;
}
bpf,lsm: Refactor bpf_map_alloc/bpf_map_free LSM hooks
Similarly to bpf_prog_alloc LSM hook, rename and extend bpf_map_alloc
hook into bpf_map_create, taking not just struct bpf_map, but also
bpf_attr and bpf_token, to give a fuller context to LSMs.
Unlike bpf_prog_alloc, there is no need to move the hook around, as it
currently is firing right before allocating BPF map ID and FD, which
seems to be a sweet spot.
But like bpf_prog_alloc/bpf_prog_free combo, make sure that bpf_map_free
LSM hook is called even if bpf_map_create hook returned error, as if few
LSMs are combined together it could be that one LSM successfully
allocated security blob for its needs, while subsequent LSM rejected BPF
map creation. The former LSM would still need to free up LSM blob, so we
need to ensure security_bpf_map_free() is called regardless of the
outcome.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Link: https://lore.kernel.org/bpf/20240124022127.2379740-11-andrii@kernel.org
2024-01-23 18:21:07 -08:00
static int selinux_bpf_map_create ( struct bpf_map * map , union bpf_attr * attr ,
struct bpf_token * token )
2017-10-18 13:00:25 -07:00
{
struct bpf_security_struct * bpfsec ;
bpfsec = kzalloc ( sizeof ( * bpfsec ) , GFP_KERNEL ) ;
if ( ! bpfsec )
return - ENOMEM ;
bpfsec - > sid = current_sid ( ) ;
map - > security = bpfsec ;
return 0 ;
}
static void selinux_bpf_map_free ( struct bpf_map * map )
{
struct bpf_security_struct * bpfsec = map - > security ;
map - > security = NULL ;
kfree ( bpfsec ) ;
}
bpf,lsm: Refactor bpf_prog_alloc/bpf_prog_free LSM hooks
Based on upstream discussion ([0]), rework existing
bpf_prog_alloc_security LSM hook. Rename it to bpf_prog_load and instead
of passing bpf_prog_aux, pass proper bpf_prog pointer for a full BPF
program struct. Also, we pass bpf_attr union with all the user-provided
arguments for BPF_PROG_LOAD command. This will give LSMs as much
information as we can basically provide.
The hook is also BPF token-aware now, and optional bpf_token struct is
passed as a third argument. bpf_prog_load LSM hook is called after
a bunch of sanity checks were performed, bpf_prog and bpf_prog_aux were
allocated and filled out, but right before performing full-fledged BPF
verification step.
bpf_prog_free LSM hook is now accepting struct bpf_prog argument, for
consistency. SELinux code is adjusted to all new names, types, and
signatures.
Note, given that bpf_prog_load (previously bpf_prog_alloc) hook can be
used by some LSMs to allocate extra security blob, but also by other
LSMs to reject BPF program loading, we need to make sure that
bpf_prog_free LSM hook is called after bpf_prog_load/bpf_prog_alloc one
*even* if the hook itself returned error. If we don't do that, we run
the risk of leaking memory. This seems to be possible today when
combining SELinux and BPF LSM, as one example, depending on their
relative ordering.
Also, for BPF LSM setup, add bpf_prog_load and bpf_prog_free to
sleepable LSM hooks list, as they are both executed in sleepable
context. Also drop bpf_prog_load hook from untrusted, as there is no
issue with refcount or anything else anymore, that originally forced us
to add it to untrusted list in c0c852dd1876 ("bpf: Do not mark certain LSM
hook arguments as trusted"). We now trigger this hook much later and it
should not be an issue anymore.
[0] https://lore.kernel.org/bpf/9fe88aef7deabbe87d3fc38c4aea3c69.paul@paul-moore.com/
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Link: https://lore.kernel.org/bpf/20240124022127.2379740-10-andrii@kernel.org
2024-01-23 18:21:06 -08:00
static int selinux_bpf_prog_load ( struct bpf_prog * prog , union bpf_attr * attr ,
struct bpf_token * token )
2017-10-18 13:00:25 -07:00
{
struct bpf_security_struct * bpfsec ;
bpfsec = kzalloc ( sizeof ( * bpfsec ) , GFP_KERNEL ) ;
if ( ! bpfsec )
return - ENOMEM ;
bpfsec - > sid = current_sid ( ) ;
bpf,lsm: Refactor bpf_prog_alloc/bpf_prog_free LSM hooks
Based on upstream discussion ([0]), rework existing
bpf_prog_alloc_security LSM hook. Rename it to bpf_prog_load and instead
of passing bpf_prog_aux, pass proper bpf_prog pointer for a full BPF
program struct. Also, we pass bpf_attr union with all the user-provided
arguments for BPF_PROG_LOAD command. This will give LSMs as much
information as we can basically provide.
The hook is also BPF token-aware now, and optional bpf_token struct is
passed as a third argument. bpf_prog_load LSM hook is called after
a bunch of sanity checks were performed, bpf_prog and bpf_prog_aux were
allocated and filled out, but right before performing full-fledged BPF
verification step.
bpf_prog_free LSM hook is now accepting struct bpf_prog argument, for
consistency. SELinux code is adjusted to all new names, types, and
signatures.
Note, given that bpf_prog_load (previously bpf_prog_alloc) hook can be
used by some LSMs to allocate extra security blob, but also by other
LSMs to reject BPF program loading, we need to make sure that
bpf_prog_free LSM hook is called after bpf_prog_load/bpf_prog_alloc one
*even* if the hook itself returned error. If we don't do that, we run
the risk of leaking memory. This seems to be possible today when
combining SELinux and BPF LSM, as one example, depending on their
relative ordering.
Also, for BPF LSM setup, add bpf_prog_load and bpf_prog_free to
sleepable LSM hooks list, as they are both executed in sleepable
context. Also drop bpf_prog_load hook from untrusted, as there is no
issue with refcount or anything else anymore, that originally forced us
to add it to untrusted list in c0c852dd1876 ("bpf: Do not mark certain LSM
hook arguments as trusted"). We now trigger this hook much later and it
should not be an issue anymore.
[0] https://lore.kernel.org/bpf/9fe88aef7deabbe87d3fc38c4aea3c69.paul@paul-moore.com/
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Link: https://lore.kernel.org/bpf/20240124022127.2379740-10-andrii@kernel.org
2024-01-23 18:21:06 -08:00
prog - > aux - > security = bpfsec ;
2017-10-18 13:00:25 -07:00
return 0 ;
}
bpf,lsm: Refactor bpf_prog_alloc/bpf_prog_free LSM hooks
Based on upstream discussion ([0]), rework existing
bpf_prog_alloc_security LSM hook. Rename it to bpf_prog_load and instead
of passing bpf_prog_aux, pass proper bpf_prog pointer for a full BPF
program struct. Also, we pass bpf_attr union with all the user-provided
arguments for BPF_PROG_LOAD command. This will give LSMs as much
information as we can basically provide.
The hook is also BPF token-aware now, and optional bpf_token struct is
passed as a third argument. bpf_prog_load LSM hook is called after
a bunch of sanity checks were performed, bpf_prog and bpf_prog_aux were
allocated and filled out, but right before performing full-fledged BPF
verification step.
bpf_prog_free LSM hook is now accepting struct bpf_prog argument, for
consistency. SELinux code is adjusted to all new names, types, and
signatures.
Note, given that bpf_prog_load (previously bpf_prog_alloc) hook can be
used by some LSMs to allocate extra security blob, but also by other
LSMs to reject BPF program loading, we need to make sure that
bpf_prog_free LSM hook is called after bpf_prog_load/bpf_prog_alloc one
*even* if the hook itself returned error. If we don't do that, we run
the risk of leaking memory. This seems to be possible today when
combining SELinux and BPF LSM, as one example, depending on their
relative ordering.
Also, for BPF LSM setup, add bpf_prog_load and bpf_prog_free to
sleepable LSM hooks list, as they are both executed in sleepable
context. Also drop bpf_prog_load hook from untrusted, as there is no
issue with refcount or anything else anymore, that originally forced us
to add it to untrusted list in c0c852dd1876 ("bpf: Do not mark certain LSM
hook arguments as trusted"). We now trigger this hook much later and it
should not be an issue anymore.
[0] https://lore.kernel.org/bpf/9fe88aef7deabbe87d3fc38c4aea3c69.paul@paul-moore.com/
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Link: https://lore.kernel.org/bpf/20240124022127.2379740-10-andrii@kernel.org
2024-01-23 18:21:06 -08:00
static void selinux_bpf_prog_free ( struct bpf_prog * prog )
2017-10-18 13:00:25 -07:00
{
bpf,lsm: Refactor bpf_prog_alloc/bpf_prog_free LSM hooks
Based on upstream discussion ([0]), rework existing
bpf_prog_alloc_security LSM hook. Rename it to bpf_prog_load and instead
of passing bpf_prog_aux, pass proper bpf_prog pointer for a full BPF
program struct. Also, we pass bpf_attr union with all the user-provided
arguments for BPF_PROG_LOAD command. This will give LSMs as much
information as we can basically provide.
The hook is also BPF token-aware now, and optional bpf_token struct is
passed as a third argument. bpf_prog_load LSM hook is called after
a bunch of sanity checks were performed, bpf_prog and bpf_prog_aux were
allocated and filled out, but right before performing full-fledged BPF
verification step.
bpf_prog_free LSM hook is now accepting struct bpf_prog argument, for
consistency. SELinux code is adjusted to all new names, types, and
signatures.
Note, given that bpf_prog_load (previously bpf_prog_alloc) hook can be
used by some LSMs to allocate extra security blob, but also by other
LSMs to reject BPF program loading, we need to make sure that
bpf_prog_free LSM hook is called after bpf_prog_load/bpf_prog_alloc one
*even* if the hook itself returned error. If we don't do that, we run
the risk of leaking memory. This seems to be possible today when
combining SELinux and BPF LSM, as one example, depending on their
relative ordering.
Also, for BPF LSM setup, add bpf_prog_load and bpf_prog_free to
sleepable LSM hooks list, as they are both executed in sleepable
context. Also drop bpf_prog_load hook from untrusted, as there is no
issue with refcount or anything else anymore, that originally forced us
to add it to untrusted list in c0c852dd1876 ("bpf: Do not mark certain LSM
hook arguments as trusted"). We now trigger this hook much later and it
should not be an issue anymore.
[0] https://lore.kernel.org/bpf/9fe88aef7deabbe87d3fc38c4aea3c69.paul@paul-moore.com/
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Link: https://lore.kernel.org/bpf/20240124022127.2379740-10-andrii@kernel.org
2024-01-23 18:21:06 -08:00
struct bpf_security_struct * bpfsec = prog - > aux - > security ;
2017-10-18 13:00:25 -07:00
bpf,lsm: Refactor bpf_prog_alloc/bpf_prog_free LSM hooks
Based on upstream discussion ([0]), rework existing
bpf_prog_alloc_security LSM hook. Rename it to bpf_prog_load and instead
of passing bpf_prog_aux, pass proper bpf_prog pointer for a full BPF
program struct. Also, we pass bpf_attr union with all the user-provided
arguments for BPF_PROG_LOAD command. This will give LSMs as much
information as we can basically provide.
The hook is also BPF token-aware now, and optional bpf_token struct is
passed as a third argument. bpf_prog_load LSM hook is called after
a bunch of sanity checks were performed, bpf_prog and bpf_prog_aux were
allocated and filled out, but right before performing full-fledged BPF
verification step.
bpf_prog_free LSM hook is now accepting struct bpf_prog argument, for
consistency. SELinux code is adjusted to all new names, types, and
signatures.
Note, given that bpf_prog_load (previously bpf_prog_alloc) hook can be
used by some LSMs to allocate extra security blob, but also by other
LSMs to reject BPF program loading, we need to make sure that
bpf_prog_free LSM hook is called after bpf_prog_load/bpf_prog_alloc one
*even* if the hook itself returned error. If we don't do that, we run
the risk of leaking memory. This seems to be possible today when
combining SELinux and BPF LSM, as one example, depending on their
relative ordering.
Also, for BPF LSM setup, add bpf_prog_load and bpf_prog_free to
sleepable LSM hooks list, as they are both executed in sleepable
context. Also drop bpf_prog_load hook from untrusted, as there is no
issue with refcount or anything else anymore, that originally forced us
to add it to untrusted list in c0c852dd1876 ("bpf: Do not mark certain LSM
hook arguments as trusted"). We now trigger this hook much later and it
should not be an issue anymore.
[0] https://lore.kernel.org/bpf/9fe88aef7deabbe87d3fc38c4aea3c69.paul@paul-moore.com/
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Link: https://lore.kernel.org/bpf/20240124022127.2379740-10-andrii@kernel.org
2024-01-23 18:21:06 -08:00
prog - > aux - > security = NULL ;
2017-10-18 13:00:25 -07:00
kfree ( bpfsec ) ;
}
2024-01-23 18:21:14 -08:00
static int selinux_bpf_token_create ( struct bpf_token * token , union bpf_attr * attr ,
2024-08-06 15:38:12 -07:00
const struct path * path )
2024-01-23 18:21:14 -08:00
{
struct bpf_security_struct * bpfsec ;
bpfsec = kzalloc ( sizeof ( * bpfsec ) , GFP_KERNEL ) ;
if ( ! bpfsec )
return - ENOMEM ;
bpfsec - > sid = current_sid ( ) ;
token - > security = bpfsec ;
return 0 ;
}
static void selinux_bpf_token_free ( struct bpf_token * token )
{
struct bpf_security_struct * bpfsec = token - > security ;
token - > security = NULL ;
kfree ( bpfsec ) ;
}
2017-10-18 13:00:25 -07:00
# endif
selinux: remove the runtime disable functionality
After working with the larger SELinux-based distros for several
years, we're finally at a place where we can disable the SELinux
runtime disable functionality. The existing kernel deprecation
notice explains the functionality and why we want to remove it:
The selinuxfs "disable" node allows SELinux to be disabled at
runtime prior to a policy being loaded into the kernel. If
disabled via this mechanism, SELinux will remain disabled until
the system is rebooted.
The preferred method of disabling SELinux is via the "selinux=0"
boot parameter, but the selinuxfs "disable" node was created to
make it easier for systems with primitive bootloaders that did not
allow for easy modification of the kernel command line.
Unfortunately, allowing for SELinux to be disabled at runtime makes
it difficult to secure the kernel's LSM hooks using the
"__ro_after_init" feature.
It is that last sentence, mentioning the '__ro_after_init' hardening,
which is the real motivation for this change, and if you look at the
diffstat you'll see that the impact of this patch reaches across all
the different LSMs, helping prevent tampering at the LSM hook level.
From a SELinux perspective, it is important to note that if you
continue to disable SELinux via "/etc/selinux/config" it may appear
that SELinux is disabled, but it is simply in an uninitialized state.
If you load a policy with `load_policy -i`, you will see SELinux
come alive just as if you had loaded the policy during early-boot.
It is also worth noting that the "/sys/fs/selinux/disable" file is
always writable now, regardless of the Kconfig settings, but writing
to the file has no effect on the system, other than to display an
error on the console if a non-zero/true value is written.
Finally, in the several years where we have been working on
deprecating this functionality, there has only been one instance of
someone mentioning any user visible breakage. In this particular
case it was an individual's kernel test system, and the workaround
documented in the deprecation notice ("selinux=0" on the kernel
command line) resolved the issue without problem.
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-03-17 12:43:07 -04:00
struct lsm_blob_sizes selinux_blob_sizes __ro_after_init = {
2018-11-12 09:30:56 -08:00
. lbs_cred = sizeof ( struct task_security_struct ) ,
2018-11-12 12:02:49 -08:00
. lbs_file = sizeof ( struct file_security_struct ) ,
2018-09-21 17:19:29 -07:00
. lbs_inode = sizeof ( struct inode_security_struct ) ,
2018-11-20 11:55:02 -08:00
. lbs_ipc = sizeof ( struct ipc_security_struct ) ,
2024-07-10 14:32:26 -07:00
. lbs_key = sizeof ( struct key_security_struct ) ,
2018-11-20 11:55:02 -08:00
. lbs_msg_msg = sizeof ( struct msg_security_struct ) ,
2024-07-10 14:32:30 -07:00
# ifdef CONFIG_PERF_EVENTS
. lbs_perf_event = sizeof ( struct perf_event_security_struct ) ,
# endif
2024-07-10 14:32:25 -07:00
. lbs_sock = sizeof ( struct sk_security_struct ) ,
2021-04-22 17:41:15 +02:00
. lbs_superblock = sizeof ( struct superblock_security_struct ) ,
security: Allow all LSMs to provide xattrs for inode_init_security hook
Currently, the LSM infrastructure supports only one LSM providing an xattr
and EVM calculating the HMAC on that xattr, plus other inode metadata.
Allow all LSMs to provide one or multiple xattrs, by extending the security
blob reservation mechanism. Introduce the new lbs_xattr_count field of the
lsm_blob_sizes structure, so that each LSM can specify how many xattrs it
needs, and the LSM infrastructure knows how many xattr slots it should
allocate.
Modify the inode_init_security hook definition, by passing the full
xattr array allocated in security_inode_init_security(), and the current
number of xattr slots in that array filled by LSMs. The first parameter
would allow EVM to access and calculate the HMAC on xattrs supplied by
other LSMs, the second to not leave gaps in the xattr array, when an LSM
requested but did not provide xattrs (e.g. if it is not initialized).
Introduce lsm_get_xattr_slot(), which LSMs can call as many times as the
number specified in the lbs_xattr_count field of the lsm_blob_sizes
structure. During each call, lsm_get_xattr_slot() increments the number of
filled xattrs, so that at the next invocation it returns the next xattr
slot to fill.
Cleanup security_inode_init_security(). Unify the !initxattrs and
initxattrs case by simply not allocating the new_xattrs array in the
former. Update the documentation to reflect the changes, and fix the
description of the xattr name, as it is not allocated anymore.
Adapt both SELinux and Smack to use the new definition of the
inode_init_security hook, and to call lsm_get_xattr_slot() to obtain and
fill the reserved slots in the xattr array.
Move the xattr->name assignment after the xattr->value one, so that it is
done only in case of successful memory allocation.
Finally, change the default return value of the inode_init_security hook
from zero to -EOPNOTSUPP, so that BPF LSM correctly follows the hook
conventions.
Reported-by: Nicolas Bouchinet <nicolas.bouchinet@clip-os.org>
Link: https://lore.kernel.org/linux-integrity/Y1FTSIo+1x+4X0LS@archlinux/
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: minor comment and variable tweaks, approved by RS]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-06-10 09:57:35 +02:00
. lbs_xattr_count = SELINUX_INODE_INIT_XATTRS ,
2024-07-10 14:32:28 -07:00
. lbs_tun_dev = sizeof ( struct tun_security_struct ) ,
2024-07-10 14:32:29 -07:00
. lbs_ib = sizeof ( struct ib_security_struct ) ,
2018-11-12 09:30:56 -08:00
} ;
perf_event: Add support for LSM and SELinux checks
In current mainline, the degree of access to perf_event_open(2) system
call depends on the perf_event_paranoid sysctl. This has a number of
limitations:
1. The sysctl is only a single value. Many types of accesses are controlled
based on the single value thus making the control very limited and
coarse grained.
2. The sysctl is global, so if the sysctl is changed, then that means
all processes get access to perf_event_open(2) opening the door to
security issues.
This patch adds LSM and SELinux access checking which will be used in
Android to access perf_event_open(2) for the purposes of attaching BPF
programs to tracepoints, perf profiling and other operations from
userspace. These operations are intended for production systems.
5 new LSM hooks are added:
1. perf_event_open: This controls access during the perf_event_open(2)
syscall itself. The hook is called from all the places that the
perf_event_paranoid sysctl is checked to keep it consistent with the
systctl. The hook gets passed a 'type' argument which controls CPU,
kernel and tracepoint accesses (in this context, CPU, kernel and
tracepoint have the same semantics as the perf_event_paranoid sysctl).
Additionally, I added an 'open' type which is similar to
perf_event_paranoid sysctl == 3 patch carried in Android and several other
distros but was rejected in mainline [1] in 2016.
2. perf_event_alloc: This allocates a new security object for the event
which stores the current SID within the event. It will be useful when
the perf event's FD is passed through IPC to another process which may
try to read the FD. Appropriate security checks will limit access.
3. perf_event_free: Called when the event is closed.
4. perf_event_read: Called from the read(2) and mmap(2) syscalls for the event.
5. perf_event_write: Called from the ioctl(2) syscalls for the event.
[1] https://lwn.net/Articles/696240/
Since Peter had suggest LSM hooks in 2016 [1], I am adding his
Suggested-by tag below.
To use this patch, we set the perf_event_paranoid sysctl to -1 and then
apply selinux checking as appropriate (default deny everything, and then
add policy rules to give access to domains that need it). In the future
we can remove the perf_event_paranoid sysctl altogether.
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Co-developed-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: James Morris <jmorris@namei.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: rostedt@goodmis.org
Cc: Yonghong Song <yhs@fb.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: jeffv@google.com
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: primiano@google.com
Cc: Song Liu <songliubraving@fb.com>
Cc: rsavitski@google.com
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Matthew Garrett <matthewgarrett@google.com>
Link: https://lkml.kernel.org/r/20191014170308.70668-1-joel@joelfernandes.org
2019-10-14 13:03:08 -04:00
# ifdef CONFIG_PERF_EVENTS
static int selinux_perf_event_open ( struct perf_event_attr * attr , int type )
{
u32 requested , sid = current_sid ( ) ;
if ( type = = PERF_SECURITY_OPEN )
requested = PERF_EVENT__OPEN ;
else if ( type = = PERF_SECURITY_CPU )
requested = PERF_EVENT__CPU ;
else if ( type = = PERF_SECURITY_KERNEL )
requested = PERF_EVENT__KERNEL ;
else if ( type = = PERF_SECURITY_TRACEPOINT )
requested = PERF_EVENT__TRACEPOINT ;
else
return - EINVAL ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , sid , SECCLASS_PERF_EVENT ,
perf_event: Add support for LSM and SELinux checks
In current mainline, the degree of access to perf_event_open(2) system
call depends on the perf_event_paranoid sysctl. This has a number of
limitations:
1. The sysctl is only a single value. Many types of accesses are controlled
based on the single value thus making the control very limited and
coarse grained.
2. The sysctl is global, so if the sysctl is changed, then that means
all processes get access to perf_event_open(2) opening the door to
security issues.
This patch adds LSM and SELinux access checking which will be used in
Android to access perf_event_open(2) for the purposes of attaching BPF
programs to tracepoints, perf profiling and other operations from
userspace. These operations are intended for production systems.
5 new LSM hooks are added:
1. perf_event_open: This controls access during the perf_event_open(2)
syscall itself. The hook is called from all the places that the
perf_event_paranoid sysctl is checked to keep it consistent with the
systctl. The hook gets passed a 'type' argument which controls CPU,
kernel and tracepoint accesses (in this context, CPU, kernel and
tracepoint have the same semantics as the perf_event_paranoid sysctl).
Additionally, I added an 'open' type which is similar to
perf_event_paranoid sysctl == 3 patch carried in Android and several other
distros but was rejected in mainline [1] in 2016.
2. perf_event_alloc: This allocates a new security object for the event
which stores the current SID within the event. It will be useful when
the perf event's FD is passed through IPC to another process which may
try to read the FD. Appropriate security checks will limit access.
3. perf_event_free: Called when the event is closed.
4. perf_event_read: Called from the read(2) and mmap(2) syscalls for the event.
5. perf_event_write: Called from the ioctl(2) syscalls for the event.
[1] https://lwn.net/Articles/696240/
Since Peter had suggest LSM hooks in 2016 [1], I am adding his
Suggested-by tag below.
To use this patch, we set the perf_event_paranoid sysctl to -1 and then
apply selinux checking as appropriate (default deny everything, and then
add policy rules to give access to domains that need it). In the future
we can remove the perf_event_paranoid sysctl altogether.
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Co-developed-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: James Morris <jmorris@namei.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: rostedt@goodmis.org
Cc: Yonghong Song <yhs@fb.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: jeffv@google.com
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: primiano@google.com
Cc: Song Liu <songliubraving@fb.com>
Cc: rsavitski@google.com
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Matthew Garrett <matthewgarrett@google.com>
Link: https://lkml.kernel.org/r/20191014170308.70668-1-joel@joelfernandes.org
2019-10-14 13:03:08 -04:00
requested , NULL ) ;
}
static int selinux_perf_event_alloc ( struct perf_event * event )
{
struct perf_event_security_struct * perfsec ;
2024-07-10 14:32:30 -07:00
perfsec = selinux_perf_event ( event - > security ) ;
perf_event: Add support for LSM and SELinux checks
In current mainline, the degree of access to perf_event_open(2) system
call depends on the perf_event_paranoid sysctl. This has a number of
limitations:
1. The sysctl is only a single value. Many types of accesses are controlled
based on the single value thus making the control very limited and
coarse grained.
2. The sysctl is global, so if the sysctl is changed, then that means
all processes get access to perf_event_open(2) opening the door to
security issues.
This patch adds LSM and SELinux access checking which will be used in
Android to access perf_event_open(2) for the purposes of attaching BPF
programs to tracepoints, perf profiling and other operations from
userspace. These operations are intended for production systems.
5 new LSM hooks are added:
1. perf_event_open: This controls access during the perf_event_open(2)
syscall itself. The hook is called from all the places that the
perf_event_paranoid sysctl is checked to keep it consistent with the
systctl. The hook gets passed a 'type' argument which controls CPU,
kernel and tracepoint accesses (in this context, CPU, kernel and
tracepoint have the same semantics as the perf_event_paranoid sysctl).
Additionally, I added an 'open' type which is similar to
perf_event_paranoid sysctl == 3 patch carried in Android and several other
distros but was rejected in mainline [1] in 2016.
2. perf_event_alloc: This allocates a new security object for the event
which stores the current SID within the event. It will be useful when
the perf event's FD is passed through IPC to another process which may
try to read the FD. Appropriate security checks will limit access.
3. perf_event_free: Called when the event is closed.
4. perf_event_read: Called from the read(2) and mmap(2) syscalls for the event.
5. perf_event_write: Called from the ioctl(2) syscalls for the event.
[1] https://lwn.net/Articles/696240/
Since Peter had suggest LSM hooks in 2016 [1], I am adding his
Suggested-by tag below.
To use this patch, we set the perf_event_paranoid sysctl to -1 and then
apply selinux checking as appropriate (default deny everything, and then
add policy rules to give access to domains that need it). In the future
we can remove the perf_event_paranoid sysctl altogether.
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Co-developed-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: James Morris <jmorris@namei.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: rostedt@goodmis.org
Cc: Yonghong Song <yhs@fb.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: jeffv@google.com
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: primiano@google.com
Cc: Song Liu <songliubraving@fb.com>
Cc: rsavitski@google.com
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Matthew Garrett <matthewgarrett@google.com>
Link: https://lkml.kernel.org/r/20191014170308.70668-1-joel@joelfernandes.org
2019-10-14 13:03:08 -04:00
perfsec - > sid = current_sid ( ) ;
return 0 ;
}
static int selinux_perf_event_read ( struct perf_event * event )
{
struct perf_event_security_struct * perfsec = event - > security ;
u32 sid = current_sid ( ) ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , perfsec - > sid ,
perf_event: Add support for LSM and SELinux checks
In current mainline, the degree of access to perf_event_open(2) system
call depends on the perf_event_paranoid sysctl. This has a number of
limitations:
1. The sysctl is only a single value. Many types of accesses are controlled
based on the single value thus making the control very limited and
coarse grained.
2. The sysctl is global, so if the sysctl is changed, then that means
all processes get access to perf_event_open(2) opening the door to
security issues.
This patch adds LSM and SELinux access checking which will be used in
Android to access perf_event_open(2) for the purposes of attaching BPF
programs to tracepoints, perf profiling and other operations from
userspace. These operations are intended for production systems.
5 new LSM hooks are added:
1. perf_event_open: This controls access during the perf_event_open(2)
syscall itself. The hook is called from all the places that the
perf_event_paranoid sysctl is checked to keep it consistent with the
systctl. The hook gets passed a 'type' argument which controls CPU,
kernel and tracepoint accesses (in this context, CPU, kernel and
tracepoint have the same semantics as the perf_event_paranoid sysctl).
Additionally, I added an 'open' type which is similar to
perf_event_paranoid sysctl == 3 patch carried in Android and several other
distros but was rejected in mainline [1] in 2016.
2. perf_event_alloc: This allocates a new security object for the event
which stores the current SID within the event. It will be useful when
the perf event's FD is passed through IPC to another process which may
try to read the FD. Appropriate security checks will limit access.
3. perf_event_free: Called when the event is closed.
4. perf_event_read: Called from the read(2) and mmap(2) syscalls for the event.
5. perf_event_write: Called from the ioctl(2) syscalls for the event.
[1] https://lwn.net/Articles/696240/
Since Peter had suggest LSM hooks in 2016 [1], I am adding his
Suggested-by tag below.
To use this patch, we set the perf_event_paranoid sysctl to -1 and then
apply selinux checking as appropriate (default deny everything, and then
add policy rules to give access to domains that need it). In the future
we can remove the perf_event_paranoid sysctl altogether.
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Co-developed-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: James Morris <jmorris@namei.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: rostedt@goodmis.org
Cc: Yonghong Song <yhs@fb.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: jeffv@google.com
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: primiano@google.com
Cc: Song Liu <songliubraving@fb.com>
Cc: rsavitski@google.com
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Matthew Garrett <matthewgarrett@google.com>
Link: https://lkml.kernel.org/r/20191014170308.70668-1-joel@joelfernandes.org
2019-10-14 13:03:08 -04:00
SECCLASS_PERF_EVENT , PERF_EVENT__READ , NULL ) ;
}
static int selinux_perf_event_write ( struct perf_event * event )
{
struct perf_event_security_struct * perfsec = event - > security ;
u32 sid = current_sid ( ) ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , perfsec - > sid ,
perf_event: Add support for LSM and SELinux checks
In current mainline, the degree of access to perf_event_open(2) system
call depends on the perf_event_paranoid sysctl. This has a number of
limitations:
1. The sysctl is only a single value. Many types of accesses are controlled
based on the single value thus making the control very limited and
coarse grained.
2. The sysctl is global, so if the sysctl is changed, then that means
all processes get access to perf_event_open(2) opening the door to
security issues.
This patch adds LSM and SELinux access checking which will be used in
Android to access perf_event_open(2) for the purposes of attaching BPF
programs to tracepoints, perf profiling and other operations from
userspace. These operations are intended for production systems.
5 new LSM hooks are added:
1. perf_event_open: This controls access during the perf_event_open(2)
syscall itself. The hook is called from all the places that the
perf_event_paranoid sysctl is checked to keep it consistent with the
systctl. The hook gets passed a 'type' argument which controls CPU,
kernel and tracepoint accesses (in this context, CPU, kernel and
tracepoint have the same semantics as the perf_event_paranoid sysctl).
Additionally, I added an 'open' type which is similar to
perf_event_paranoid sysctl == 3 patch carried in Android and several other
distros but was rejected in mainline [1] in 2016.
2. perf_event_alloc: This allocates a new security object for the event
which stores the current SID within the event. It will be useful when
the perf event's FD is passed through IPC to another process which may
try to read the FD. Appropriate security checks will limit access.
3. perf_event_free: Called when the event is closed.
4. perf_event_read: Called from the read(2) and mmap(2) syscalls for the event.
5. perf_event_write: Called from the ioctl(2) syscalls for the event.
[1] https://lwn.net/Articles/696240/
Since Peter had suggest LSM hooks in 2016 [1], I am adding his
Suggested-by tag below.
To use this patch, we set the perf_event_paranoid sysctl to -1 and then
apply selinux checking as appropriate (default deny everything, and then
add policy rules to give access to domains that need it). In the future
we can remove the perf_event_paranoid sysctl altogether.
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Co-developed-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: James Morris <jmorris@namei.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: rostedt@goodmis.org
Cc: Yonghong Song <yhs@fb.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: jeffv@google.com
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: primiano@google.com
Cc: Song Liu <songliubraving@fb.com>
Cc: rsavitski@google.com
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Matthew Garrett <matthewgarrett@google.com>
Link: https://lkml.kernel.org/r/20191014170308.70668-1-joel@joelfernandes.org
2019-10-14 13:03:08 -04:00
SECCLASS_PERF_EVENT , PERF_EVENT__WRITE , NULL ) ;
}
# endif
2021-02-23 18:16:45 -05:00
# ifdef CONFIG_IO_URING
/**
* selinux_uring_override_creds - check the requested cred override
* @ new : the target creds
*
* Check to see if the current task is allowed to override it ' s credentials
* to service an io_uring operation .
*/
static int selinux_uring_override_creds ( const struct cred * new )
{
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , cred_sid ( new ) ,
2021-02-23 18:16:45 -05:00
SECCLASS_IO_URING , IO_URING__OVERRIDE_CREDS , NULL ) ;
}
/**
* selinux_uring_sqpoll - check if a io_uring polling thread can be created
*
* Check to see if the current task is allowed to create a new io_uring
* kernel polling thread .
*/
static int selinux_uring_sqpoll ( void )
{
2023-07-06 15:23:27 +02:00
u32 sid = current_sid ( ) ;
2021-02-23 18:16:45 -05:00
2023-03-09 13:30:37 -05:00
return avc_has_perm ( sid , sid ,
2021-02-23 18:16:45 -05:00
SECCLASS_IO_URING , IO_URING__SQPOLL , NULL ) ;
}
2022-08-10 15:55:36 -04:00
/**
* selinux_uring_cmd - check if IORING_OP_URING_CMD is allowed
* @ ioucmd : the io_uring command structure
*
* Check to see if the current domain is allowed to execute an
* IORING_OP_URING_CMD against the device / file specified in @ ioucmd .
*
*/
static int selinux_uring_cmd ( struct io_uring_cmd * ioucmd )
{
struct file * file = ioucmd - > file ;
struct inode * inode = file_inode ( file ) ;
struct inode_security_struct * isec = selinux_inode ( inode ) ;
struct common_audit_data ad ;
ad . type = LSM_AUDIT_DATA_FILE ;
ad . u . file = file ;
2023-03-09 13:30:37 -05:00
return avc_has_perm ( current_sid ( ) , isec - > sid ,
2022-08-10 15:55:36 -04:00
SECCLASS_IO_URING , IO_URING__CMD , & ad ) ;
}
2021-02-23 18:16:45 -05:00
# endif /* CONFIG_IO_URING */
2023-11-10 12:09:33 -05:00
static const struct lsm_id selinux_lsmid = {
2023-09-12 13:56:46 -07:00
. name = " selinux " ,
. id = LSM_ID_SELINUX ,
} ;
2020-01-08 15:09:58 +01:00
/*
* IMPORTANT NOTE : When adding new hooks , please be careful to keep this order :
* 1. any hooks that don ' t belong to ( 2. ) or ( 3. ) below ,
* 2. hooks that both access structures allocated by other hooks , and allocate
* structures that can be later accessed by other hooks ( mostly " cloning "
* hooks ) ,
* 3. hooks that only allocate structures that can be later accessed by other
* hooks ( " allocating " hooks ) .
*
* Please follow block comment delimiters in the list to keep this order .
*/
selinux: remove the runtime disable functionality
After working with the larger SELinux-based distros for several
years, we're finally at a place where we can disable the SELinux
runtime disable functionality. The existing kernel deprecation
notice explains the functionality and why we want to remove it:
The selinuxfs "disable" node allows SELinux to be disabled at
runtime prior to a policy being loaded into the kernel. If
disabled via this mechanism, SELinux will remain disabled until
the system is rebooted.
The preferred method of disabling SELinux is via the "selinux=0"
boot parameter, but the selinuxfs "disable" node was created to
make it easier for systems with primitive bootloaders that did not
allow for easy modification of the kernel command line.
Unfortunately, allowing for SELinux to be disabled at runtime makes
it difficult to secure the kernel's LSM hooks using the
"__ro_after_init" feature.
It is that last sentence, mentioning the '__ro_after_init' hardening,
which is the real motivation for this change, and if you look at the
diffstat you'll see that the impact of this patch reaches across all
the different LSMs, helping prevent tampering at the LSM hook level.
From a SELinux perspective, it is important to note that if you
continue to disable SELinux via "/etc/selinux/config" it may appear
that SELinux is disabled, but it is simply in an uninitialized state.
If you load a policy with `load_policy -i`, you will see SELinux
come alive just as if you had loaded the policy during early-boot.
It is also worth noting that the "/sys/fs/selinux/disable" file is
always writable now, regardless of the Kconfig settings, but writing
to the file has no effect on the system, other than to display an
error on the console if a non-zero/true value is written.
Finally, in the several years where we have been working on
deprecating this functionality, there has only been one instance of
someone mentioning any user visible breakage. In this particular
case it was an individual's kernel test system, and the workaround
documented in the deprecation notice ("selinux=0" on the kernel
command line) resolved the issue without problem.
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-03-17 12:43:07 -04:00
static struct security_hook_list selinux_hooks [ ] __ro_after_init = {
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( binder_set_context_mgr , selinux_binder_set_context_mgr ) ,
LSM_HOOK_INIT ( binder_transaction , selinux_binder_transaction ) ,
LSM_HOOK_INIT ( binder_transfer_binder , selinux_binder_transfer_binder ) ,
LSM_HOOK_INIT ( binder_transfer_file , selinux_binder_transfer_file ) ,
LSM_HOOK_INIT ( ptrace_access_check , selinux_ptrace_access_check ) ,
LSM_HOOK_INIT ( ptrace_traceme , selinux_ptrace_traceme ) ,
LSM_HOOK_INIT ( capget , selinux_capget ) ,
LSM_HOOK_INIT ( capset , selinux_capset ) ,
LSM_HOOK_INIT ( capable , selinux_capable ) ,
LSM_HOOK_INIT ( quotactl , selinux_quotactl ) ,
LSM_HOOK_INIT ( quota_on , selinux_quota_on ) ,
LSM_HOOK_INIT ( syslog , selinux_syslog ) ,
LSM_HOOK_INIT ( vm_enough_memory , selinux_vm_enough_memory ) ,
LSM_HOOK_INIT ( netlink_send , selinux_netlink_send ) ,
2020-03-22 15:46:24 -05:00
LSM_HOOK_INIT ( bprm_creds_for_exec , selinux_bprm_creds_for_exec ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( bprm_committing_creds , selinux_bprm_committing_creds ) ,
LSM_HOOK_INIT ( bprm_committed_creds , selinux_bprm_committed_creds ) ,
2018-12-13 13:41:47 -05:00
LSM_HOOK_INIT ( sb_free_mnt_opts , selinux_free_mnt_opts ) ,
2021-02-26 22:37:55 -05:00
LSM_HOOK_INIT ( sb_mnt_opts_compat , selinux_sb_mnt_opts_compat ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( sb_remount , selinux_sb_remount ) ,
LSM_HOOK_INIT ( sb_kern_mount , selinux_sb_kern_mount ) ,
LSM_HOOK_INIT ( sb_show_options , selinux_sb_show_options ) ,
LSM_HOOK_INIT ( sb_statfs , selinux_sb_statfs ) ,
LSM_HOOK_INIT ( sb_mount , selinux_mount ) ,
LSM_HOOK_INIT ( sb_umount , selinux_umount ) ,
LSM_HOOK_INIT ( sb_set_mnt_opts , selinux_set_mnt_opts ) ,
LSM_HOOK_INIT ( sb_clone_mnt_opts , selinux_sb_clone_mnt_opts ) ,
2020-01-17 15:24:07 -05:00
LSM_HOOK_INIT ( move_mount , selinux_move_mount ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( dentry_init_security , selinux_dentry_init_security ) ,
2016-07-13 10:44:53 -04:00
LSM_HOOK_INIT ( dentry_create_files_as , selinux_dentry_create_files_as ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( inode_free_security , selinux_inode_free_security ) ,
LSM_HOOK_INIT ( inode_init_security , selinux_inode_init_security ) ,
2021-01-08 14:22:22 -08:00
LSM_HOOK_INIT ( inode_init_security_anon , selinux_inode_init_security_anon ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( inode_create , selinux_inode_create ) ,
LSM_HOOK_INIT ( inode_link , selinux_inode_link ) ,
LSM_HOOK_INIT ( inode_unlink , selinux_inode_unlink ) ,
LSM_HOOK_INIT ( inode_symlink , selinux_inode_symlink ) ,
LSM_HOOK_INIT ( inode_mkdir , selinux_inode_mkdir ) ,
LSM_HOOK_INIT ( inode_rmdir , selinux_inode_rmdir ) ,
LSM_HOOK_INIT ( inode_mknod , selinux_inode_mknod ) ,
LSM_HOOK_INIT ( inode_rename , selinux_inode_rename ) ,
LSM_HOOK_INIT ( inode_readlink , selinux_inode_readlink ) ,
LSM_HOOK_INIT ( inode_follow_link , selinux_inode_follow_link ) ,
LSM_HOOK_INIT ( inode_permission , selinux_inode_permission ) ,
LSM_HOOK_INIT ( inode_setattr , selinux_inode_setattr ) ,
LSM_HOOK_INIT ( inode_getattr , selinux_inode_getattr ) ,
lsm: fixup the inode xattr capability handling
The current security_inode_setxattr() and security_inode_removexattr()
hooks rely on individual LSMs to either call into the associated
capability hooks (cap_inode_setxattr() or cap_inode_removexattr()), or
return a magic value of 1 to indicate that the LSM layer itself should
perform the capability checks. Unfortunately, with the default return
value for these LSM hooks being 0, an individual LSM hook returning a
1 will cause the LSM hook processing to exit early, potentially
skipping a LSM. Thankfully, with the exception of the BPF LSM, none
of the LSMs which currently register inode xattr hooks should end up
returning a value of 1, and in the BPF LSM case, with the BPF LSM hooks
executing last there should be no real harm in stopping processing of
the LSM hooks. However, the reliance on the individual LSMs to either
call the capability hooks themselves, or signal the LSM with a return
value of 1, is fragile and relies on a specific set of LSMs being
enabled. This patch is an effort to resolve, or minimize, these
issues.
Before we discuss the solution, there are a few observations and
considerations that we need to take into account:
* BPF LSM registers an implementation for every LSM hook, and that
implementation simply returns the hook's default return value, a
0 in this case. We want to ensure that the default BPF LSM behavior
results in the capability checks being called.
* SELinux and Smack do not expect the traditional capability checks
to be applied to the xattrs that they "own".
* SELinux and Smack are currently written in such a way that the
xattr capability checks happen before any additional LSM specific
access control checks. SELinux does apply SELinux specific access
controls to all xattrs, even those not "owned" by SELinux.
* IMA and EVM also register xattr hooks but assume that the LSM layer
and specific LSMs have already authorized the basic xattr operation.
In order to ensure we perform the capability based access controls
before the individual LSM access controls, perform only one capability
access control check for each operation, and clarify the logic around
applying the capability controls, we need a mechanism to determine if
any of the enabled LSMs "own" a particular xattr and want to take
responsibility for controlling access to that xattr. The solution in
this patch is to create a new LSM hook, 'inode_xattr_skipcap', that is
not exported to the rest of the kernel via a security_XXX() function,
but is used by the LSM layer to determine if a LSM wants to control
access to a given xattr and avoid the traditional capability controls.
Registering an inode_xattr_skipcap hook is optional, if a LSM declines
to register an implementation, or uses an implementation that simply
returns the default value (0), there is no effect as the LSM continues
to enforce the capability based controls (unless another LSM takes
ownership of the xattr). If none of the LSMs signal that the
capability checks should be skipped, the capability check is performed
and if access is granted the individual LSM xattr access control hooks
are executed, keeping with the DAC-before-LSM convention.
Cc: stable@vger.kernel.org
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2024-05-02 17:57:51 -04:00
LSM_HOOK_INIT ( inode_xattr_skipcap , selinux_inode_xattr_skipcap ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( inode_setxattr , selinux_inode_setxattr ) ,
LSM_HOOK_INIT ( inode_post_setxattr , selinux_inode_post_setxattr ) ,
LSM_HOOK_INIT ( inode_getxattr , selinux_inode_getxattr ) ,
LSM_HOOK_INIT ( inode_listxattr , selinux_inode_listxattr ) ,
LSM_HOOK_INIT ( inode_removexattr , selinux_inode_removexattr ) ,
2022-09-22 17:17:08 +02:00
LSM_HOOK_INIT ( inode_set_acl , selinux_inode_set_acl ) ,
LSM_HOOK_INIT ( inode_get_acl , selinux_inode_get_acl ) ,
LSM_HOOK_INIT ( inode_remove_acl , selinux_inode_remove_acl ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( inode_getsecurity , selinux_inode_getsecurity ) ,
LSM_HOOK_INIT ( inode_setsecurity , selinux_inode_setsecurity ) ,
LSM_HOOK_INIT ( inode_listsecurity , selinux_inode_listsecurity ) ,
2024-10-09 10:32:16 -07:00
LSM_HOOK_INIT ( inode_getlsmprop , selinux_inode_getlsmprop ) ,
2016-07-13 10:44:48 -04:00
LSM_HOOK_INIT ( inode_copy_up , selinux_inode_copy_up ) ,
2016-07-13 10:44:50 -04:00
LSM_HOOK_INIT ( inode_copy_up_xattr , selinux_inode_copy_up_xattr ) ,
fanotify, inotify, dnotify, security: add security hook for fs notifications
As of now, setting watches on filesystem objects has, at most, applied a
check for read access to the inode, and in the case of fanotify, requires
CAP_SYS_ADMIN. No specific security hook or permission check has been
provided to control the setting of watches. Using any of inotify, dnotify,
or fanotify, it is possible to observe, not only write-like operations, but
even read access to a file. Modeling the watch as being merely a read from
the file is insufficient for the needs of SELinux. This is due to the fact
that read access should not necessarily imply access to information about
when another process reads from a file. Furthermore, fanotify watches grant
more power to an application in the form of permission events. While
notification events are solely, unidirectional (i.e. they only pass
information to the receiving application), permission events are blocking.
Permission events make a request to the receiving application which will
then reply with a decision as to whether or not that action may be
completed. This causes the issue of the watching application having the
ability to exercise control over the triggering process. Without drawing a
distinction within the permission check, the ability to read would imply
the greater ability to control an application. Additionally, mount and
superblock watches apply to all files within the same mount or superblock.
Read access to one file should not necessarily imply the ability to watch
all files accessed within a given mount or superblock.
In order to solve these issues, a new LSM hook is implemented and has been
placed within the system calls for marking filesystem objects with inotify,
fanotify, and dnotify watches. These calls to the hook are placed at the
point at which the target path has been resolved and are provided with the
path struct, the mask of requested notification events, and the type of
object on which the mark is being set (inode, superblock, or mount). The
mask and obj_type have already been translated into common FS_* values
shared by the entirety of the fs notification infrastructure. The path
struct is passed rather than just the inode so that the mount is available,
particularly for mount watches. This also allows for use of the hook by
pathname-based security modules. However, since the hook is intended for
use even by inode based security modules, it is not placed under the
CONFIG_SECURITY_PATH conditional. Otherwise, the inode-based security
modules would need to enable all of the path hooks, even though they do not
use any of them.
This only provides a hook at the point of setting a watch, and presumes
that permission to set a particular watch implies the ability to receive
all notification about that object which match the mask. This is all that
is required for SELinux. If other security modules require additional hooks
or infrastructure to control delivery of notification, these can be added
by them. It does not make sense for us to propose hooks for which we have
no implementation. The understanding that all notifications received by the
requesting application are all strictly of a type for which the application
has been granted permission shows that this implementation is sufficient in
its coverage.
Security modules wishing to provide complete control over fanotify must
also implement a security_file_open hook that validates that the access
requested by the watching application is authorized. Fanotify has the issue
that it returns a file descriptor with the file mode specified during
fanotify_init() to the watching process on event. This is already covered
by the LSM security_file_open hook if the security module implements
checking of the requested file mode there. Otherwise, a watching process
can obtain escalated access to a file for which it has not been authorized.
The selinux_path_notify hook implementation works by adding five new file
permissions: watch, watch_mount, watch_sb, watch_reads, and watch_with_perm
(descriptions about which will follow), and one new filesystem permission:
watch (which is applied to superblock checks). The hook then decides which
subset of these permissions must be held by the requesting application
based on the contents of the provided mask and the obj_type. The
selinux_file_open hook already checks the requested file mode and therefore
ensures that a watching process cannot escalate its access through
fanotify.
The watch, watch_mount, and watch_sb permissions are the baseline
permissions for setting a watch on an object and each are a requirement for
any watch to be set on a file, mount, or superblock respectively. It should
be noted that having either of the other two permissions (watch_reads and
watch_with_perm) does not imply the watch, watch_mount, or watch_sb
permission. Superblock watches further require the filesystem watch
permission to the superblock. As there is no labeled object in view for
mounts, there is no specific check for mount watches beyond watch_mount to
the inode. Such a check could be added in the future, if a suitable labeled
object existed representing the mount.
The watch_reads permission is required to receive notifications from
read-exclusive events on filesystem objects. These events include accessing
a file for the purpose of reading and closing a file which has been opened
read-only. This distinction has been drawn in order to provide a direct
indication in the policy for this otherwise not obvious capability. Read
access to a file should not necessarily imply the ability to observe read
events on a file.
Finally, watch_with_perm only applies to fanotify masks since it is the
only way to set a mask which allows for the blocking, permission event.
This permission is needed for any watch which is of this type. Though
fanotify requires CAP_SYS_ADMIN, this is insufficient as it gives implicit
trust to root, which we do not do, and does not support least privilege.
Signed-off-by: Aaron Goidel <acgoide@tycho.nsa.gov>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2019-08-12 11:20:00 -04:00
LSM_HOOK_INIT ( path_notify , selinux_path_notify ) ,
2015-05-02 15:11:36 -07:00
2019-02-22 15:57:17 +01:00
LSM_HOOK_INIT ( kernfs_init_security , selinux_kernfs_init_security ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( file_permission , selinux_file_permission ) ,
LSM_HOOK_INIT ( file_alloc_security , selinux_file_alloc_security ) ,
LSM_HOOK_INIT ( file_ioctl , selinux_file_ioctl ) ,
lsm: new security_file_ioctl_compat() hook
Some ioctl commands do not require ioctl permission, but are routed to
other permissions such as FILE_GETATTR or FILE_SETATTR. This routing is
done by comparing the ioctl cmd to a set of 64-bit flags (FS_IOC_*).
However, if a 32-bit process is running on a 64-bit kernel, it emits
32-bit flags (FS_IOC32_*) for certain ioctl operations. These flags are
being checked erroneously, which leads to these ioctl operations being
routed to the ioctl permission, rather than the correct file
permissions.
This was also noted in a RED-PEN finding from a while back -
"/* RED-PEN how should LSM module know it's handling 32bit? */".
This patch introduces a new hook, security_file_ioctl_compat(), that is
called from the compat ioctl syscall. All current LSMs have been changed
to support this hook.
Reviewing the three places where we are currently using
security_file_ioctl(), it appears that only SELinux needs a dedicated
compat change; TOMOYO and SMACK appear to be functional without any
change.
Cc: stable@vger.kernel.org
Fixes: 0b24dcb7f2f7 ("Revert "selinux: simplify ioctl checking"")
Signed-off-by: Alfred Piccioni <alpic@google.com>
Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com>
[PM: subject tweak, line length fixes, and alignment corrections]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-12-19 10:09:09 +01:00
LSM_HOOK_INIT ( file_ioctl_compat , selinux_file_ioctl_compat ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( mmap_file , selinux_mmap_file ) ,
LSM_HOOK_INIT ( mmap_addr , selinux_mmap_addr ) ,
LSM_HOOK_INIT ( file_mprotect , selinux_file_mprotect ) ,
LSM_HOOK_INIT ( file_lock , selinux_file_lock ) ,
LSM_HOOK_INIT ( file_fcntl , selinux_file_fcntl ) ,
LSM_HOOK_INIT ( file_set_fowner , selinux_file_set_fowner ) ,
LSM_HOOK_INIT ( file_send_sigiotask , selinux_file_send_sigiotask ) ,
LSM_HOOK_INIT ( file_receive , selinux_file_receive ) ,
LSM_HOOK_INIT ( file_open , selinux_file_open ) ,
2017-03-28 23:08:45 +09:00
LSM_HOOK_INIT ( task_alloc , selinux_task_alloc ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( cred_prepare , selinux_cred_prepare ) ,
LSM_HOOK_INIT ( cred_transfer , selinux_cred_transfer ) ,
2018-01-08 13:36:19 -08:00
LSM_HOOK_INIT ( cred_getsecid , selinux_cred_getsecid ) ,
2024-10-09 10:32:18 -07:00
LSM_HOOK_INIT ( cred_getlsmprop , selinux_cred_getlsmprop ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( kernel_act_as , selinux_kernel_act_as ) ,
LSM_HOOK_INIT ( kernel_create_files_as , selinux_kernel_create_files_as ) ,
LSM_HOOK_INIT ( kernel_module_request , selinux_kernel_module_request ) ,
2018-07-13 14:06:02 -04:00
LSM_HOOK_INIT ( kernel_load_data , selinux_kernel_load_data ) ,
2016-04-05 13:06:27 -07:00
LSM_HOOK_INIT ( kernel_read_file , selinux_kernel_read_file ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( task_setpgid , selinux_task_setpgid ) ,
LSM_HOOK_INIT ( task_getpgid , selinux_task_getpgid ) ,
LSM_HOOK_INIT ( task_getsid , selinux_task_getsid ) ,
2024-10-09 10:32:15 -07:00
LSM_HOOK_INIT ( current_getlsmprop_subj , selinux_current_getlsmprop_subj ) ,
LSM_HOOK_INIT ( task_getlsmprop_obj , selinux_task_getlsmprop_obj ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( task_setnice , selinux_task_setnice ) ,
LSM_HOOK_INIT ( task_setioprio , selinux_task_setioprio ) ,
LSM_HOOK_INIT ( task_getioprio , selinux_task_getioprio ) ,
prlimit,security,selinux: add a security hook for prlimit
When SELinux was first added to the kernel, a process could only get
and set its own resource limits via getrlimit(2) and setrlimit(2), so no
MAC checks were required for those operations, and thus no security hooks
were defined for them. Later, SELinux introduced a hook for setlimit(2)
with a check if the hard limit was being changed in order to be able to
rely on the hard limit value as a safe reset point upon context
transitions.
Later on, when prlimit(2) was added to the kernel with the ability to get
or set resource limits (hard or soft) of another process, LSM/SELinux was
not updated other than to pass the target process to the setrlimit hook.
This resulted in incomplete control over both getting and setting the
resource limits of another process.
Add a new security_task_prlimit() hook to the check_prlimit_permission()
function to provide complete mediation. The hook is only called when
acting on another task, and only if the existing DAC/capability checks
would allow access. Pass flags down to the hook to indicate whether the
prlimit(2) call will read, write, or both read and write the resource
limits of the target process.
The existing security_task_setrlimit() hook is left alone; it continues
to serve a purpose in supporting the ability to make decisions based on
the old and/or new resource limit values when setting limits. This
is consistent with the DAC/capability logic, where
check_prlimit_permission() performs generic DAC/capability checks for
acting on another task, while do_prlimit() performs a capability check
based on a comparison of the old and new resource limits. Fix the
inline documentation for the hook to match the code.
Implement the new hook for SELinux. For setting resource limits, we
reuse the existing setrlimit permission. Note that this does overload
the setrlimit permission to mean the ability to set the resource limit
(soft or hard) of another process or the ability to change one's own
hard limit. For getting resource limits, a new getrlimit permission
is defined. This was not originally defined since getrlimit(2) could
only be used to obtain a process' own limits.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <james.l.morris@oracle.com>
2017-02-17 07:57:00 -05:00
LSM_HOOK_INIT ( task_prlimit , selinux_task_prlimit ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( task_setrlimit , selinux_task_setrlimit ) ,
LSM_HOOK_INIT ( task_setscheduler , selinux_task_setscheduler ) ,
LSM_HOOK_INIT ( task_getscheduler , selinux_task_getscheduler ) ,
LSM_HOOK_INIT ( task_movememory , selinux_task_movememory ) ,
LSM_HOOK_INIT ( task_kill , selinux_task_kill ) ,
LSM_HOOK_INIT ( task_to_inode , selinux_task_to_inode ) ,
2022-08-15 11:20:28 -05:00
LSM_HOOK_INIT ( userns_create , selinux_userns_create ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( ipc_permission , selinux_ipc_permission ) ,
2024-10-09 10:32:13 -07:00
LSM_HOOK_INIT ( ipc_getlsmprop , selinux_ipc_getlsmprop ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( msg_queue_associate , selinux_msg_queue_associate ) ,
LSM_HOOK_INIT ( msg_queue_msgctl , selinux_msg_queue_msgctl ) ,
LSM_HOOK_INIT ( msg_queue_msgsnd , selinux_msg_queue_msgsnd ) ,
LSM_HOOK_INIT ( msg_queue_msgrcv , selinux_msg_queue_msgrcv ) ,
LSM_HOOK_INIT ( shm_associate , selinux_shm_associate ) ,
LSM_HOOK_INIT ( shm_shmctl , selinux_shm_shmctl ) ,
LSM_HOOK_INIT ( shm_shmat , selinux_shm_shmat ) ,
LSM_HOOK_INIT ( sem_associate , selinux_sem_associate ) ,
LSM_HOOK_INIT ( sem_semctl , selinux_sem_semctl ) ,
LSM_HOOK_INIT ( sem_semop , selinux_sem_semop ) ,
LSM_HOOK_INIT ( d_instantiate , selinux_d_instantiate ) ,
2023-09-12 13:56:55 -07:00
LSM_HOOK_INIT ( getselfattr , selinux_getselfattr ) ,
LSM_HOOK_INIT ( setselfattr , selinux_setselfattr ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( getprocattr , selinux_getprocattr ) ,
LSM_HOOK_INIT ( setprocattr , selinux_setprocattr ) ,
LSM_HOOK_INIT ( ismaclabel , selinux_ismaclabel ) ,
LSM_HOOK_INIT ( secctx_to_secid , selinux_secctx_to_secid ) ,
LSM_HOOK_INIT ( release_secctx , selinux_release_secctx ) ,
2015-12-24 11:09:40 -05:00
LSM_HOOK_INIT ( inode_invalidate_secctx , selinux_inode_invalidate_secctx ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( inode_notifysecctx , selinux_inode_notifysecctx ) ,
LSM_HOOK_INIT ( inode_setsecctx , selinux_inode_setsecctx ) ,
LSM_HOOK_INIT ( unix_stream_connect , selinux_socket_unix_stream_connect ) ,
LSM_HOOK_INIT ( unix_may_send , selinux_socket_unix_may_send ) ,
LSM_HOOK_INIT ( socket_create , selinux_socket_create ) ,
LSM_HOOK_INIT ( socket_post_create , selinux_socket_post_create ) ,
2018-05-04 16:28:21 +02:00
LSM_HOOK_INIT ( socket_socketpair , selinux_socket_socketpair ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( socket_bind , selinux_socket_bind ) ,
LSM_HOOK_INIT ( socket_connect , selinux_socket_connect ) ,
LSM_HOOK_INIT ( socket_listen , selinux_socket_listen ) ,
LSM_HOOK_INIT ( socket_accept , selinux_socket_accept ) ,
LSM_HOOK_INIT ( socket_sendmsg , selinux_socket_sendmsg ) ,
LSM_HOOK_INIT ( socket_recvmsg , selinux_socket_recvmsg ) ,
LSM_HOOK_INIT ( socket_getsockname , selinux_socket_getsockname ) ,
LSM_HOOK_INIT ( socket_getpeername , selinux_socket_getpeername ) ,
LSM_HOOK_INIT ( socket_getsockopt , selinux_socket_getsockopt ) ,
LSM_HOOK_INIT ( socket_setsockopt , selinux_socket_setsockopt ) ,
LSM_HOOK_INIT ( socket_shutdown , selinux_socket_shutdown ) ,
LSM_HOOK_INIT ( socket_sock_rcv_skb , selinux_socket_sock_rcv_skb ) ,
LSM_HOOK_INIT ( socket_getpeersec_stream ,
selinux_socket_getpeersec_stream ) ,
LSM_HOOK_INIT ( socket_getpeersec_dgram , selinux_socket_getpeersec_dgram ) ,
LSM_HOOK_INIT ( sk_free_security , selinux_sk_free_security ) ,
LSM_HOOK_INIT ( sk_clone_security , selinux_sk_clone_security ) ,
LSM_HOOK_INIT ( sk_getsecid , selinux_sk_getsecid ) ,
LSM_HOOK_INIT ( sock_graft , selinux_sock_graft ) ,
2018-02-13 20:57:18 +00:00
LSM_HOOK_INIT ( sctp_assoc_request , selinux_sctp_assoc_request ) ,
LSM_HOOK_INIT ( sctp_sk_clone , selinux_sctp_sk_clone ) ,
LSM_HOOK_INIT ( sctp_bind_connect , selinux_sctp_bind_connect ) ,
security: implement sctp_assoc_established hook in selinux
Do this by extracting the peer labeling per-association logic from
selinux_sctp_assoc_request() into a new helper
selinux_sctp_process_new_assoc() and use this helper in both
selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This
ensures that the peer labeling behavior as documented in
Documentation/security/SCTP.rst is applied both on the client and server
side:
"""
An SCTP socket will only have one peer label assigned to it. This will be
assigned during the establishment of the first association. Any further
associations on this socket will have their packet peer label compared to
the sockets peer label, and only if they are different will the
``association`` permission be validated. This is validated by checking the
socket peer sid against the received packets peer sid to determine whether
the association should be allowed or denied.
"""
At the same time, it also ensures that the peer label of the association
is set to the correct value, such that if it is peeled off into a new
socket, the socket's peer label will then be set to the association's
peer label, same as it already works on the server side.
While selinux_inet_conn_established() (which we are replacing by
selinux_sctp_assoc_established() for SCTP) only deals with assigning a
peer label to the connection (socket), in case of SCTP we need to also
copy the (local) socket label to the association, so that
selinux_sctp_sk_clone() can then pick it up for the new socket in case
of SCTP peeloff.
Careful readers will notice that the selinux_sctp_process_new_assoc()
helper also includes the "IPv4 packet received over an IPv6 socket"
check, even though it hadn't been in selinux_sctp_assoc_request()
before. While such check is not necessary in
selinux_inet_conn_request() (because struct request_sock's family field
is already set according to the skb's family), here it is needed, as we
don't have request_sock and we take the initial family from the socket.
In selinux_sctp_assoc_established() it is similarly needed as well (and
also selinux_inet_conn_established() already has it).
Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2022-02-12 18:59:22 +01:00
LSM_HOOK_INIT ( sctp_assoc_established , selinux_sctp_assoc_established ) ,
2023-04-20 19:17:14 +02:00
LSM_HOOK_INIT ( mptcp_add_subflow , selinux_mptcp_add_subflow ) ,
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( inet_conn_request , selinux_inet_conn_request ) ,
LSM_HOOK_INIT ( inet_csk_clone , selinux_inet_csk_clone ) ,
LSM_HOOK_INIT ( inet_conn_established , selinux_inet_conn_established ) ,
LSM_HOOK_INIT ( secmark_relabel_packet , selinux_secmark_relabel_packet ) ,
LSM_HOOK_INIT ( secmark_refcount_inc , selinux_secmark_refcount_inc ) ,
LSM_HOOK_INIT ( secmark_refcount_dec , selinux_secmark_refcount_dec ) ,
LSM_HOOK_INIT ( req_classify_flow , selinux_req_classify_flow ) ,
LSM_HOOK_INIT ( tun_dev_create , selinux_tun_dev_create ) ,
LSM_HOOK_INIT ( tun_dev_attach_queue , selinux_tun_dev_attach_queue ) ,
LSM_HOOK_INIT ( tun_dev_attach , selinux_tun_dev_attach ) ,
LSM_HOOK_INIT ( tun_dev_open , selinux_tun_dev_open ) ,
2017-05-19 15:48:56 +03:00
# ifdef CONFIG_SECURITY_INFINIBAND
2017-05-19 15:48:57 +03:00
LSM_HOOK_INIT ( ib_pkey_access , selinux_ib_pkey_access ) ,
2017-05-19 15:48:58 +03:00
LSM_HOOK_INIT ( ib_endport_manage_subnet ,
selinux_ib_endport_manage_subnet ) ,
2017-05-19 15:48:56 +03:00
# endif
[LSM-IPSec]: Per-packet access control.
This patch series implements per packet access control via the
extension of the Linux Security Modules (LSM) interface by hooks in
the XFRM and pfkey subsystems that leverage IPSec security
associations to label packets. Extensions to the SELinux LSM are
included that leverage the patch for this purpose.
This patch implements the changes necessary to the SELinux LSM to
create, deallocate, and use security contexts for policies
(xfrm_policy) and security associations (xfrm_state) that enable
control of a socket's ability to send and receive packets.
Patch purpose:
The patch is designed to enable the SELinux LSM to implement access
control on individual packets based on the strongly authenticated
IPSec security association. Such access controls augment the existing
ones in SELinux based on network interface and IP address. The former
are very coarse-grained, and the latter can be spoofed. By using
IPSec, the SELinux can control access to remote hosts based on
cryptographic keys generated using the IPSec mechanism. This enables
access control on a per-machine basis or per-application if the remote
machine is running the same mechanism and trusted to enforce the
access control policy.
Patch design approach:
The patch's main function is to authorize a socket's access to a IPSec
policy based on their security contexts. Since the communication is
implemented by a security association, the patch ensures that the
security association's negotiated and used have the same security
context. The patch enables allocation and deallocation of such
security contexts for policies and security associations. It also
enables copying of the security context when policies are cloned.
Lastly, the patch ensures that packets that are sent without using a
IPSec security assocation with a security context are allowed to be
sent in that manner.
A presentation available at
www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf
from the SELinux symposium describes the overall approach.
Patch implementation details:
The function which authorizes a socket to perform a requested
operation (send/receive) on a IPSec policy (xfrm_policy) is
selinux_xfrm_policy_lookup. The Netfilter and rcv_skb hooks ensure
that if a IPSec SA with a securit y association has not been used,
then the socket is allowed to send or receive the packet,
respectively.
The patch implements SELinux function for allocating security contexts
when policies (xfrm_policy) are created via the pfkey or xfrm_user
interfaces via selinux_xfrm_policy_alloc. When a security association
is built, SELinux allocates the security context designated by the
XFRM subsystem which is based on that of the authorized policy via
selinux_xfrm_state_alloc.
When a xfrm_policy is cloned, the security context of that policy, if
any, is copied to the clone via selinux_xfrm_policy_clone.
When a xfrm_policy or xfrm_state is freed, its security context, if
any is also freed at selinux_xfrm_policy_free or
selinux_xfrm_state_free.
Testing:
The SELinux authorization function is tested using ipsec-tools. We
created policies and security associations with particular security
contexts and added SELinux access control policy entries to verify the
authorization decision. We also made sure that packets for which no
security context was supplied (which either did or did not use
security associations) were authorized using an unlabelled context.
Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-13 23:12:40 -08:00
# ifdef CONFIG_SECURITY_NETWORK_XFRM
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( xfrm_policy_free_security , selinux_xfrm_policy_free ) ,
LSM_HOOK_INIT ( xfrm_policy_delete_security , selinux_xfrm_policy_delete ) ,
LSM_HOOK_INIT ( xfrm_state_free_security , selinux_xfrm_state_free ) ,
LSM_HOOK_INIT ( xfrm_state_delete_security , selinux_xfrm_state_delete ) ,
LSM_HOOK_INIT ( xfrm_policy_lookup , selinux_xfrm_policy_lookup ) ,
LSM_HOOK_INIT ( xfrm_state_pol_flow_match ,
selinux_xfrm_state_pol_flow_match ) ,
LSM_HOOK_INIT ( xfrm_decode_session , selinux_xfrm_decode_session ) ,
2005-04-16 15:20:36 -07:00
# endif
2006-06-22 14:47:17 -07:00
# ifdef CONFIG_KEYS
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( key_permission , selinux_key_permission ) ,
LSM_HOOK_INIT ( key_getsecurity , selinux_key_getsecurity ) ,
2020-01-14 17:07:13 +00:00
# ifdef CONFIG_KEY_NOTIFICATIONS
LSM_HOOK_INIT ( watch_key , selinux_watch_key ) ,
# endif
2006-06-22 14:47:17 -07:00
# endif
2008-03-01 22:03:14 +02:00
# ifdef CONFIG_AUDIT
2015-05-02 15:11:36 -07:00
LSM_HOOK_INIT ( audit_rule_known , selinux_audit_rule_known ) ,
LSM_HOOK_INIT ( audit_rule_match , selinux_audit_rule_match ) ,
LSM_HOOK_INIT ( audit_rule_free , selinux_audit_rule_free ) ,
2008-03-01 22:03:14 +02:00
# endif
2017-10-18 13:00:25 -07:00
# ifdef CONFIG_BPF_SYSCALL
LSM_HOOK_INIT ( bpf , selinux_bpf ) ,
LSM_HOOK_INIT ( bpf_map , selinux_bpf_map ) ,
LSM_HOOK_INIT ( bpf_prog , selinux_bpf_prog ) ,
bpf,lsm: Refactor bpf_map_alloc/bpf_map_free LSM hooks
Similarly to bpf_prog_alloc LSM hook, rename and extend bpf_map_alloc
hook into bpf_map_create, taking not just struct bpf_map, but also
bpf_attr and bpf_token, to give a fuller context to LSMs.
Unlike bpf_prog_alloc, there is no need to move the hook around, as it
currently is firing right before allocating BPF map ID and FD, which
seems to be a sweet spot.
But like bpf_prog_alloc/bpf_prog_free combo, make sure that bpf_map_free
LSM hook is called even if bpf_map_create hook returned error, as if few
LSMs are combined together it could be that one LSM successfully
allocated security blob for its needs, while subsequent LSM rejected BPF
map creation. The former LSM would still need to free up LSM blob, so we
need to ensure security_bpf_map_free() is called regardless of the
outcome.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Link: https://lore.kernel.org/bpf/20240124022127.2379740-11-andrii@kernel.org
2024-01-23 18:21:07 -08:00
LSM_HOOK_INIT ( bpf_map_free , selinux_bpf_map_free ) ,
bpf,lsm: Refactor bpf_prog_alloc/bpf_prog_free LSM hooks
Based on upstream discussion ([0]), rework existing
bpf_prog_alloc_security LSM hook. Rename it to bpf_prog_load and instead
of passing bpf_prog_aux, pass proper bpf_prog pointer for a full BPF
program struct. Also, we pass bpf_attr union with all the user-provided
arguments for BPF_PROG_LOAD command. This will give LSMs as much
information as we can basically provide.
The hook is also BPF token-aware now, and optional bpf_token struct is
passed as a third argument. bpf_prog_load LSM hook is called after
a bunch of sanity checks were performed, bpf_prog and bpf_prog_aux were
allocated and filled out, but right before performing full-fledged BPF
verification step.
bpf_prog_free LSM hook is now accepting struct bpf_prog argument, for
consistency. SELinux code is adjusted to all new names, types, and
signatures.
Note, given that bpf_prog_load (previously bpf_prog_alloc) hook can be
used by some LSMs to allocate extra security blob, but also by other
LSMs to reject BPF program loading, we need to make sure that
bpf_prog_free LSM hook is called after bpf_prog_load/bpf_prog_alloc one
*even* if the hook itself returned error. If we don't do that, we run
the risk of leaking memory. This seems to be possible today when
combining SELinux and BPF LSM, as one example, depending on their
relative ordering.
Also, for BPF LSM setup, add bpf_prog_load and bpf_prog_free to
sleepable LSM hooks list, as they are both executed in sleepable
context. Also drop bpf_prog_load hook from untrusted, as there is no
issue with refcount or anything else anymore, that originally forced us
to add it to untrusted list in c0c852dd1876 ("bpf: Do not mark certain LSM
hook arguments as trusted"). We now trigger this hook much later and it
should not be an issue anymore.
[0] https://lore.kernel.org/bpf/9fe88aef7deabbe87d3fc38c4aea3c69.paul@paul-moore.com/
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Link: https://lore.kernel.org/bpf/20240124022127.2379740-10-andrii@kernel.org
2024-01-23 18:21:06 -08:00
LSM_HOOK_INIT ( bpf_prog_free , selinux_bpf_prog_free ) ,
2024-01-23 18:21:14 -08:00
LSM_HOOK_INIT ( bpf_token_free , selinux_bpf_token_free ) ,
2017-10-18 13:00:25 -07:00
# endif
perf_event: Add support for LSM and SELinux checks
In current mainline, the degree of access to perf_event_open(2) system
call depends on the perf_event_paranoid sysctl. This has a number of
limitations:
1. The sysctl is only a single value. Many types of accesses are controlled
based on the single value thus making the control very limited and
coarse grained.
2. The sysctl is global, so if the sysctl is changed, then that means
all processes get access to perf_event_open(2) opening the door to
security issues.
This patch adds LSM and SELinux access checking which will be used in
Android to access perf_event_open(2) for the purposes of attaching BPF
programs to tracepoints, perf profiling and other operations from
userspace. These operations are intended for production systems.
5 new LSM hooks are added:
1. perf_event_open: This controls access during the perf_event_open(2)
syscall itself. The hook is called from all the places that the
perf_event_paranoid sysctl is checked to keep it consistent with the
systctl. The hook gets passed a 'type' argument which controls CPU,
kernel and tracepoint accesses (in this context, CPU, kernel and
tracepoint have the same semantics as the perf_event_paranoid sysctl).
Additionally, I added an 'open' type which is similar to
perf_event_paranoid sysctl == 3 patch carried in Android and several other
distros but was rejected in mainline [1] in 2016.
2. perf_event_alloc: This allocates a new security object for the event
which stores the current SID within the event. It will be useful when
the perf event's FD is passed through IPC to another process which may
try to read the FD. Appropriate security checks will limit access.
3. perf_event_free: Called when the event is closed.
4. perf_event_read: Called from the read(2) and mmap(2) syscalls for the event.
5. perf_event_write: Called from the ioctl(2) syscalls for the event.
[1] https://lwn.net/Articles/696240/
Since Peter had suggest LSM hooks in 2016 [1], I am adding his
Suggested-by tag below.
To use this patch, we set the perf_event_paranoid sysctl to -1 and then
apply selinux checking as appropriate (default deny everything, and then
add policy rules to give access to domains that need it). In the future
we can remove the perf_event_paranoid sysctl altogether.
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Co-developed-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: James Morris <jmorris@namei.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: rostedt@goodmis.org
Cc: Yonghong Song <yhs@fb.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: jeffv@google.com
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: primiano@google.com
Cc: Song Liu <songliubraving@fb.com>
Cc: rsavitski@google.com
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Matthew Garrett <matthewgarrett@google.com>
Link: https://lkml.kernel.org/r/20191014170308.70668-1-joel@joelfernandes.org
2019-10-14 13:03:08 -04:00
# ifdef CONFIG_PERF_EVENTS
LSM_HOOK_INIT ( perf_event_open , selinux_perf_event_open ) ,
LSM_HOOK_INIT ( perf_event_read , selinux_perf_event_read ) ,
LSM_HOOK_INIT ( perf_event_write , selinux_perf_event_write ) ,
# endif
security,lockdown,selinux: implement SELinux lockdown
Implement a SELinux hook for lockdown. If the lockdown module is also
enabled, then a denial by the lockdown module will take precedence over
SELinux, so SELinux can only further restrict lockdown decisions.
The SELinux hook only distinguishes at the granularity of integrity
versus confidentiality similar to the lockdown module, but includes the
full lockdown reason as part of the audit record as a hint in diagnosing
what triggered the denial. To support this auditing, move the
lockdown_reasons[] string array from being private to the lockdown
module to the security framework so that it can be used by the lsm audit
code and so that it is always available even when the lockdown module
is disabled.
Note that the SELinux implementation allows the integrity and
confidentiality reasons to be controlled independently from one another.
Thus, in an SELinux policy, one could allow operations that specify
an integrity reason while blocking operations that specify a
confidentiality reason. The SELinux hook implementation is
stricter than the lockdown module in validating the provided reason value.
Sample AVC audit output from denials:
avc: denied { integrity } for pid=3402 comm="fwupd"
lockdown_reason="/dev/mem,kmem,port" scontext=system_u:system_r:fwupd_t:s0
tcontext=system_u:system_r:fwupd_t:s0 tclass=lockdown permissive=0
avc: denied { confidentiality } for pid=4628 comm="cp"
lockdown_reason="/proc/kcore access"
scontext=unconfined_u:unconfined_r:test_lockdown_integrity_t:s0-s0:c0.c1023
tcontext=unconfined_u:unconfined_r:test_lockdown_integrity_t:s0-s0:c0.c1023
tclass=lockdown permissive=0
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Reviewed-by: James Morris <jamorris@linux.microsoft.com>
[PM: some merge fuzz do the the perf hooks]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2019-11-27 12:04:36 -05:00
2021-02-23 18:16:45 -05:00
# ifdef CONFIG_IO_URING
LSM_HOOK_INIT ( uring_override_creds , selinux_uring_override_creds ) ,
LSM_HOOK_INIT ( uring_sqpoll , selinux_uring_sqpoll ) ,
2022-08-10 15:55:36 -04:00
LSM_HOOK_INIT ( uring_cmd , selinux_uring_cmd ) ,
2021-02-23 18:16:45 -05:00
# endif
2020-01-08 15:09:58 +01:00
/*
* PUT " CLONING " ( ACCESSING + ALLOCATING ) HOOKS HERE
*/
vfs, security: Fix automount superblock LSM init problem, preventing NFS sb sharing
When NFS superblocks are created by automounting, their LSM parameters
aren't set in the fs_context struct prior to sget_fc() being called,
leading to failure to match existing superblocks.
This bug leads to messages like the following appearing in dmesg when
fscache is enabled:
NFS: Cache volume key already in use (nfs,4.2,2,108,106a8c0,1,,,,100000,100000,2ee,3a98,1d4c,3a98,1)
Fix this by adding a new LSM hook to load fc->security for submount
creation.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/165962680944.3334508.6610023900349142034.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/165962729225.3357250.14350728846471527137.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/165970659095.2812394.6868894171102318796.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/166133579016.3678898.6283195019480567275.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/217595.1662033775@warthog.procyon.org.uk/ # v5
Fixes: 9bc61ab18b1d ("vfs: Introduce fs_context, switch vfs_kern_mount() to it.")
Fixes: 779df6a5480f ("NFS: Ensure security label is set for root inode")
Tested-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: "Christian Brauner (Microsoft)" <brauner@kernel.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230808-master-v9-1-e0ecde888221@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2023-08-08 07:34:20 -04:00
LSM_HOOK_INIT ( fs_context_submount , selinux_fs_context_submount ) ,
2020-01-08 15:09:58 +01:00
LSM_HOOK_INIT ( fs_context_dup , selinux_fs_context_dup ) ,
LSM_HOOK_INIT ( fs_context_parse_param , selinux_fs_context_parse_param ) ,
LSM_HOOK_INIT ( sb_eat_lsm_opts , selinux_sb_eat_lsm_opts ) ,
# ifdef CONFIG_SECURITY_NETWORK_XFRM
LSM_HOOK_INIT ( xfrm_policy_clone_security , selinux_xfrm_policy_clone ) ,
# endif
/*
* PUT " ALLOCATING " HOOKS HERE
*/
LSM_HOOK_INIT ( msg_msg_alloc_security , selinux_msg_msg_alloc_security ) ,
LSM_HOOK_INIT ( msg_queue_alloc_security ,
selinux_msg_queue_alloc_security ) ,
LSM_HOOK_INIT ( shm_alloc_security , selinux_shm_alloc_security ) ,
LSM_HOOK_INIT ( sb_alloc_security , selinux_sb_alloc_security ) ,
LSM_HOOK_INIT ( inode_alloc_security , selinux_inode_alloc_security ) ,
LSM_HOOK_INIT ( sem_alloc_security , selinux_sem_alloc_security ) ,
LSM_HOOK_INIT ( secid_to_secctx , selinux_secid_to_secctx ) ,
2024-10-09 10:32:11 -07:00
LSM_HOOK_INIT ( lsmprop_to_secctx , selinux_lsmprop_to_secctx ) ,
2020-01-08 15:09:58 +01:00
LSM_HOOK_INIT ( inode_getsecctx , selinux_inode_getsecctx ) ,
LSM_HOOK_INIT ( sk_alloc_security , selinux_sk_alloc_security ) ,
LSM_HOOK_INIT ( tun_dev_alloc_security , selinux_tun_dev_alloc_security ) ,
# ifdef CONFIG_SECURITY_INFINIBAND
LSM_HOOK_INIT ( ib_alloc_security , selinux_ib_alloc_security ) ,
# endif
# ifdef CONFIG_SECURITY_NETWORK_XFRM
LSM_HOOK_INIT ( xfrm_policy_alloc_security , selinux_xfrm_policy_alloc ) ,
LSM_HOOK_INIT ( xfrm_state_alloc , selinux_xfrm_state_alloc ) ,
LSM_HOOK_INIT ( xfrm_state_alloc_acquire ,
selinux_xfrm_state_alloc_acquire ) ,
# endif
# ifdef CONFIG_KEYS
LSM_HOOK_INIT ( key_alloc , selinux_key_alloc ) ,
# endif
# ifdef CONFIG_AUDIT
LSM_HOOK_INIT ( audit_rule_init , selinux_audit_rule_init ) ,
# endif
# ifdef CONFIG_BPF_SYSCALL
bpf,lsm: Refactor bpf_map_alloc/bpf_map_free LSM hooks
Similarly to bpf_prog_alloc LSM hook, rename and extend bpf_map_alloc
hook into bpf_map_create, taking not just struct bpf_map, but also
bpf_attr and bpf_token, to give a fuller context to LSMs.
Unlike bpf_prog_alloc, there is no need to move the hook around, as it
currently is firing right before allocating BPF map ID and FD, which
seems to be a sweet spot.
But like bpf_prog_alloc/bpf_prog_free combo, make sure that bpf_map_free
LSM hook is called even if bpf_map_create hook returned error, as if few
LSMs are combined together it could be that one LSM successfully
allocated security blob for its needs, while subsequent LSM rejected BPF
map creation. The former LSM would still need to free up LSM blob, so we
need to ensure security_bpf_map_free() is called regardless of the
outcome.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Link: https://lore.kernel.org/bpf/20240124022127.2379740-11-andrii@kernel.org
2024-01-23 18:21:07 -08:00
LSM_HOOK_INIT ( bpf_map_create , selinux_bpf_map_create ) ,
bpf,lsm: Refactor bpf_prog_alloc/bpf_prog_free LSM hooks
Based on upstream discussion ([0]), rework existing
bpf_prog_alloc_security LSM hook. Rename it to bpf_prog_load and instead
of passing bpf_prog_aux, pass proper bpf_prog pointer for a full BPF
program struct. Also, we pass bpf_attr union with all the user-provided
arguments for BPF_PROG_LOAD command. This will give LSMs as much
information as we can basically provide.
The hook is also BPF token-aware now, and optional bpf_token struct is
passed as a third argument. bpf_prog_load LSM hook is called after
a bunch of sanity checks were performed, bpf_prog and bpf_prog_aux were
allocated and filled out, but right before performing full-fledged BPF
verification step.
bpf_prog_free LSM hook is now accepting struct bpf_prog argument, for
consistency. SELinux code is adjusted to all new names, types, and
signatures.
Note, given that bpf_prog_load (previously bpf_prog_alloc) hook can be
used by some LSMs to allocate extra security blob, but also by other
LSMs to reject BPF program loading, we need to make sure that
bpf_prog_free LSM hook is called after bpf_prog_load/bpf_prog_alloc one
*even* if the hook itself returned error. If we don't do that, we run
the risk of leaking memory. This seems to be possible today when
combining SELinux and BPF LSM, as one example, depending on their
relative ordering.
Also, for BPF LSM setup, add bpf_prog_load and bpf_prog_free to
sleepable LSM hooks list, as they are both executed in sleepable
context. Also drop bpf_prog_load hook from untrusted, as there is no
issue with refcount or anything else anymore, that originally forced us
to add it to untrusted list in c0c852dd1876 ("bpf: Do not mark certain LSM
hook arguments as trusted"). We now trigger this hook much later and it
should not be an issue anymore.
[0] https://lore.kernel.org/bpf/9fe88aef7deabbe87d3fc38c4aea3c69.paul@paul-moore.com/
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Link: https://lore.kernel.org/bpf/20240124022127.2379740-10-andrii@kernel.org
2024-01-23 18:21:06 -08:00
LSM_HOOK_INIT ( bpf_prog_load , selinux_bpf_prog_load ) ,
2024-01-23 18:21:14 -08:00
LSM_HOOK_INIT ( bpf_token_create , selinux_bpf_token_create ) ,
2020-01-08 15:09:58 +01:00
# endif
# ifdef CONFIG_PERF_EVENTS
LSM_HOOK_INIT ( perf_event_alloc , selinux_perf_event_alloc ) ,
# endif
2005-04-16 15:20:36 -07:00
} ;
static __init int selinux_init ( void )
{
2018-06-12 10:09:03 +02:00
pr_info ( " SELinux: Initializing. \n " ) ;
2005-04-16 15:20:36 -07:00
2018-03-01 18:48:02 -05:00
memset ( & selinux_state , 0 , sizeof ( selinux_state ) ) ;
2023-03-09 13:30:37 -05:00
enforcing_set ( selinux_enforcing_boot ) ;
selinux_avc_init ( ) ;
2020-01-17 14:15:14 +01:00
mutex_init ( & selinux_state . status_lock ) ;
2020-08-26 13:28:53 -04:00
mutex_init ( & selinux_state . policy_mutex ) ;
2018-03-01 18:48:02 -05:00
2005-04-16 15:20:36 -07:00
/* Set the security state for the initial task. */
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
cred_init_security ( ) ;
2005-04-16 15:20:36 -07:00
selinux: generalize disabling of execmem for plt-in-heap archs
On Tue, 2010-04-27 at 11:47 -0700, David Miller wrote:
> From: "Tom \"spot\" Callaway" <tcallawa@redhat.com>
> Date: Tue, 27 Apr 2010 14:20:21 -0400
>
> > [root@apollo ~]$ cat /proc/2174/maps
> > 00010000-00014000 r-xp 00000000 fd:00 15466577
> > /sbin/mingetty
> > 00022000-00024000 rwxp 00002000 fd:00 15466577
> > /sbin/mingetty
> > 00024000-00046000 rwxp 00000000 00:00 0
> > [heap]
>
> SELINUX probably barfs on the executable heap, the PLT is in the HEAP
> just like powerpc32 and that's why VM_DATA_DEFAULT_FLAGS has to set
> both executable and writable.
>
> You also can't remove the CONFIG_PPC32 ifdefs in selinux, since
> because of the VM_DATA_DEFAULT_FLAGS setting used still in that arch,
> the heap will always have executable permission, just like sparc does.
> You have to support those binaries forever, whether you like it or not.
>
> Let's just replace the CONFIG_PPC32 ifdef in SELINUX with CONFIG_PPC32
> || CONFIG_SPARC as in Tom's original patch and let's be done with
> this.
>
> In fact I would go through all the arch/ header files and check the
> VM_DATA_DEFAULT_FLAGS settings and add the necessary new ifdefs to the
> SELINUX code so that other platforms don't have the pain of having to
> go through this process too.
To avoid maintaining per-arch ifdefs, it seems that we could just
directly use (VM_DATA_DEFAULT_FLAGS & VM_EXEC) as the basis for deciding
whether to enable or disable these checks. VM_DATA_DEFAULT_FLAGS isn't
constant on some architectures but instead depends on
current->personality, but we want this applied uniformly. So we'll just
use the initial task state to determine whether or not to enable these
checks.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: James Morris <jmorris@namei.org>
2010-04-28 15:57:57 -04:00
default_noexec = ! ( VM_DATA_DEFAULT_FLAGS & VM_EXEC ) ;
2023-07-28 17:01:49 +02:00
if ( ! default_noexec )
pr_notice ( " SELinux: virtual memory is executable by default \n " ) ;
selinux: generalize disabling of execmem for plt-in-heap archs
On Tue, 2010-04-27 at 11:47 -0700, David Miller wrote:
> From: "Tom \"spot\" Callaway" <tcallawa@redhat.com>
> Date: Tue, 27 Apr 2010 14:20:21 -0400
>
> > [root@apollo ~]$ cat /proc/2174/maps
> > 00010000-00014000 r-xp 00000000 fd:00 15466577
> > /sbin/mingetty
> > 00022000-00024000 rwxp 00002000 fd:00 15466577
> > /sbin/mingetty
> > 00024000-00046000 rwxp 00000000 00:00 0
> > [heap]
>
> SELINUX probably barfs on the executable heap, the PLT is in the HEAP
> just like powerpc32 and that's why VM_DATA_DEFAULT_FLAGS has to set
> both executable and writable.
>
> You also can't remove the CONFIG_PPC32 ifdefs in selinux, since
> because of the VM_DATA_DEFAULT_FLAGS setting used still in that arch,
> the heap will always have executable permission, just like sparc does.
> You have to support those binaries forever, whether you like it or not.
>
> Let's just replace the CONFIG_PPC32 ifdef in SELINUX with CONFIG_PPC32
> || CONFIG_SPARC as in Tom's original patch and let's be done with
> this.
>
> In fact I would go through all the arch/ header files and check the
> VM_DATA_DEFAULT_FLAGS settings and add the necessary new ifdefs to the
> SELINUX code so that other platforms don't have the pain of having to
> go through this process too.
To avoid maintaining per-arch ifdefs, it seems that we could just
directly use (VM_DATA_DEFAULT_FLAGS & VM_EXEC) as the basis for deciding
whether to enable or disable these checks. VM_DATA_DEFAULT_FLAGS isn't
constant on some architectures but instead depends on
current->personality, but we want this applied uniformly. So we'll just
use the initial task state to determine whether or not to enable these
checks.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: James Morris <jmorris@namei.org>
2010-04-28 15:57:57 -04:00
2005-04-16 15:20:36 -07:00
avc_init ( ) ;
2018-03-01 18:48:02 -05:00
avtab_cache_init ( ) ;
ebitmap_cache_init ( ) ;
hashtab_cache_init ( ) ;
2023-09-12 13:56:46 -07:00
security_add_hooks ( selinux_hooks , ARRAY_SIZE ( selinux_hooks ) ,
& selinux_lsmid ) ;
2005-04-16 15:20:36 -07:00
2014-06-26 14:33:56 -04:00
if ( avc_add_callback ( selinux_netcache_avc_callback , AVC_CALLBACK_RESET ) )
panic ( " SELinux: Unable to register AVC netcache callback \n " ) ;
2017-05-19 15:48:53 +03:00
if ( avc_add_callback ( selinux_lsm_notifier_avc_callback , AVC_CALLBACK_RESET ) )
panic ( " SELinux: Unable to register AVC LSM notifier callback \n " ) ;
2018-03-01 18:48:02 -05:00
if ( selinux_enforcing_boot )
2018-06-12 10:09:03 +02:00
pr_debug ( " SELinux: Starting in enforcing mode \n " ) ;
2008-04-17 13:17:49 -04:00
else
2018-06-12 10:09:03 +02:00
pr_debug ( " SELinux: Starting in permissive mode \n " ) ;
2006-06-22 14:47:17 -07:00
2019-09-07 07:23:15 -04:00
fs_validate_description ( " selinux " , selinux_fs_parameters ) ;
2018-11-01 23:07:24 +00:00
2005-04-16 15:20:36 -07:00
return 0 ;
}
2010-03-23 06:36:54 -04:00
static void delayed_superblock_init ( struct super_block * sb , void * unused )
{
2018-12-13 13:41:47 -05:00
selinux_set_mnt_opts ( sb , NULL , 0 , NULL ) ;
2010-03-23 06:36:54 -04:00
}
2005-04-16 15:20:36 -07:00
void selinux_complete_init ( void )
{
2018-06-12 10:09:03 +02:00
pr_debug ( " SELinux: Completing initialization. \n " ) ;
2005-04-16 15:20:36 -07:00
/* Set up any superblocks initialized prior to the policy load. */
2018-06-12 10:09:03 +02:00
pr_debug ( " SELinux: Setting up existing superblocks. \n " ) ;
2010-03-23 06:36:54 -04:00
iterate_supers ( delayed_superblock_init , NULL ) ;
2005-04-16 15:20:36 -07:00
}
/* SELinux requires early initialization in order to label
all processes and objects when they are created . */
2018-10-10 17:18:23 -07:00
DEFINE_LSM ( selinux ) = {
2018-10-10 17:18:24 -07:00
. name = " selinux " ,
2018-09-19 19:57:06 -07:00
. flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE ,
2019-12-17 09:15:10 -05:00
. enabled = & selinux_enabled_boot ,
2018-11-12 09:30:56 -08:00
. blobs = & selinux_blob_sizes ,
2018-10-10 17:18:23 -07:00
. init = selinux_init ,
} ;
2005-04-16 15:20:36 -07:00
2006-02-04 23:27:50 -08:00
# if defined(CONFIG_NETFILTER)
2017-07-26 11:40:52 +02:00
static const struct nf_hook_ops selinux_nf_ops [ ] = {
2008-01-29 08:49:27 -05:00
{
2021-10-11 22:22:29 +02:00
. hook = selinux_ip_postroute ,
2012-05-14 03:56:39 +00:00
. pf = NFPROTO_IPV4 ,
2008-01-29 08:49:27 -05:00
. hooknum = NF_INET_POST_ROUTING ,
. priority = NF_IP_PRI_SELINUX_LAST ,
} ,
{
2021-10-11 22:22:29 +02:00
. hook = selinux_ip_forward ,
2012-05-14 03:56:39 +00:00
. pf = NFPROTO_IPV4 ,
2008-01-29 08:49:27 -05:00
. hooknum = NF_INET_FORWARD ,
. priority = NF_IP_PRI_SELINUX_FIRST ,
2008-10-10 10:16:32 -04:00
} ,
{
2021-10-11 22:22:29 +02:00
. hook = selinux_ip_output ,
2012-05-14 03:56:39 +00:00
. pf = NFPROTO_IPV4 ,
2008-10-10 10:16:32 -04:00
. hooknum = NF_INET_LOCAL_OUT ,
. priority = NF_IP_PRI_SELINUX_FIRST ,
2014-09-03 17:42:13 +02:00
} ,
2016-08-08 13:08:25 -04:00
# if IS_ENABLED(CONFIG_IPV6)
2008-01-29 08:49:27 -05:00
{
2021-10-11 22:22:29 +02:00
. hook = selinux_ip_postroute ,
2012-05-14 03:56:39 +00:00
. pf = NFPROTO_IPV6 ,
2008-01-29 08:49:27 -05:00
. hooknum = NF_INET_POST_ROUTING ,
. priority = NF_IP6_PRI_SELINUX_LAST ,
} ,
{
2021-10-11 22:22:29 +02:00
. hook = selinux_ip_forward ,
2012-05-14 03:56:39 +00:00
. pf = NFPROTO_IPV6 ,
2008-01-29 08:49:27 -05:00
. hooknum = NF_INET_FORWARD ,
. priority = NF_IP6_PRI_SELINUX_FIRST ,
2014-09-03 17:42:13 +02:00
} ,
2016-06-27 15:06:15 -04:00
{
2021-10-11 22:22:29 +02:00
. hook = selinux_ip_output ,
2016-06-27 15:06:15 -04:00
. pf = NFPROTO_IPV6 ,
. hooknum = NF_INET_LOCAL_OUT ,
. priority = NF_IP6_PRI_SELINUX_FIRST ,
} ,
2005-04-16 15:20:36 -07:00
# endif /* IPV6 */
2014-09-03 17:42:13 +02:00
} ;
2005-04-16 15:20:36 -07:00
2017-04-21 11:49:09 +02:00
static int __net_init selinux_nf_register ( struct net * net )
{
return nf_register_net_hooks ( net , selinux_nf_ops ,
ARRAY_SIZE ( selinux_nf_ops ) ) ;
}
static void __net_exit selinux_nf_unregister ( struct net * net )
{
nf_unregister_net_hooks ( net , selinux_nf_ops ,
ARRAY_SIZE ( selinux_nf_ops ) ) ;
}
static struct pernet_operations selinux_net_ops = {
. init = selinux_nf_register ,
. exit = selinux_nf_unregister ,
} ;
2005-04-16 15:20:36 -07:00
static int __init selinux_nf_ip_init ( void )
{
2014-09-03 17:42:13 +02:00
int err ;
2005-04-16 15:20:36 -07:00
2019-12-17 09:15:10 -05:00
if ( ! selinux_enabled_boot )
2014-09-03 17:42:13 +02:00
return 0 ;
2007-02-22 18:11:31 -05:00
2018-06-12 10:09:03 +02:00
pr_debug ( " SELinux: Registering netfilter hooks \n " ) ;
2007-02-22 18:11:31 -05:00
2017-04-21 11:49:09 +02:00
err = register_pernet_subsys ( & selinux_net_ops ) ;
2008-07-26 17:48:15 -07:00
if ( err )
2017-04-21 11:49:09 +02:00
panic ( " SELinux: register_pernet_subsys: error %d \n " , err ) ;
2005-04-16 15:20:36 -07:00
2014-09-03 17:42:13 +02:00
return 0 ;
2005-04-16 15:20:36 -07:00
}
__initcall ( selinux_nf_ip_init ) ;
2006-02-04 23:27:50 -08:00
# endif /* CONFIG_NETFILTER */