linux-stable/security/selinux/avc.c

889 lines
23 KiB
C
Raw Normal View History

/*
* Implementation of the kernel access vector cache (AVC).
*
* Authors: Stephen Smalley, <sds@epoch.ncsc.mil>
* James Morris <jmorris@redhat.com>
*
* Update: KaiGai, Kohei <kaigai@ak.jp.nec.com>
* Replaced the avc_lock spinlock by RCU.
*
* Copyright (C) 2003 Red Hat, Inc., James Morris <jmorris@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2,
* as published by the Free Software Foundation.
*/
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/dcache.h>
#include <linux/init.h>
#include <linux/skbuff.h>
#include <linux/percpu.h>
#include <net/sock.h>
#include <linux/un.h>
#include <net/af_unix.h>
#include <linux/ip.h>
#include <linux/audit.h>
#include <linux/ipv6.h>
#include <net/ipv6.h>
#include "avc.h"
#include "avc_ss.h"
selinux: dynamic class/perm discovery Modify SELinux to dynamically discover class and permission values upon policy load, based on the dynamic object class/perm discovery logic from libselinux. A mapping is created between kernel-private class and permission indices used outside the security server and the policy values used within the security server. The mappings are only applied upon kernel-internal computations; similar mappings for the private indices of userspace object managers is handled on a per-object manager basis by the userspace AVC. The interfaces for compute_av and transition_sid are split for kernel vs. userspace; the userspace functions are distinguished by a _user suffix. The kernel-private class indices are no longer tied to the policy values and thus do not need to skip indices for userspace classes; thus the kernel class index values are compressed. The flask.h definitions were regenerated by deleting the userspace classes from refpolicy's definitions and then regenerating the headers. Going forward, we can just maintain the flask.h, av_permissions.h, and classmap.h definitions separately from policy as they are no longer tied to the policy values. The next patch introduces a utility to automate generation of flask.h and av_permissions.h from the classmap.h definitions. The older kernel class and permission string tables are removed and replaced by a single security class mapping table that is walked at policy load to generate the mapping. The old kernel class validation logic is completely replaced by the mapping logic. The handle unknown logic is reworked. reject_unknown=1 is handled when the mappings are computed at policy load time, similar to the old handling by the class validation logic. allow_unknown=1 is handled when computing and mapping decisions - if the permission was not able to be mapped (i.e. undefined, mapped to zero), then it is automatically added to the allowed vector. If the class was not able to be mapped (i.e. undefined, mapped to zero), then all permissions are allowed for it if allow_unknown=1. avc_audit leverages the new security class mapping table to lookup the class and permission names from the kernel-private indices. The mdp program is updated to use the new table when generating the class definitions and allow rules for a minimal boot policy for the kernel. It should be noted that this policy will not include any userspace classes, nor will its policy index values for the kernel classes correspond with the ones in refpolicy (they will instead match the kernel-private indices). Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-30 17:37:50 +00:00
#include "classmap.h"
#define AVC_CACHE_SLOTS 512
#define AVC_DEF_CACHE_THRESHOLD 512
#define AVC_CACHE_RECLAIM 16
#ifdef CONFIG_SECURITY_SELINUX_AVC_STATS
#define avc_cache_stats_incr(field) this_cpu_inc(avc_cache_stats.field)
#else
#define avc_cache_stats_incr(field) do {} while (0)
#endif
struct avc_entry {
u32 ssid;
u32 tsid;
u16 tclass;
struct av_decision avd;
};
struct avc_node {
struct avc_entry ae;
struct hlist_node list; /* anchored in avc_cache->slots[i] */
struct rcu_head rhead;
};
struct avc_cache {
struct hlist_head slots[AVC_CACHE_SLOTS]; /* head for avc_node->list */
spinlock_t slots_lock[AVC_CACHE_SLOTS]; /* lock for writes */
atomic_t lru_hint; /* LRU hint for reclaim scan */
atomic_t active_nodes;
u32 latest_notif; /* latest revocation notification */
};
struct avc_callback_node {
int (*callback) (u32 event, u32 ssid, u32 tsid,
u16 tclass, u32 perms,
u32 *out_retained);
u32 events;
u32 ssid;
u32 tsid;
u16 tclass;
u32 perms;
struct avc_callback_node *next;
};
/* Exported via selinufs */
unsigned int avc_cache_threshold = AVC_DEF_CACHE_THRESHOLD;
#ifdef CONFIG_SECURITY_SELINUX_AVC_STATS
DEFINE_PER_CPU(struct avc_cache_stats, avc_cache_stats) = { 0 };
#endif
static struct avc_cache avc_cache;
static struct avc_callback_node *avc_callbacks;
static struct kmem_cache *avc_node_cachep;
static inline int avc_hash(u32 ssid, u32 tsid, u16 tclass)
{
return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (AVC_CACHE_SLOTS - 1);
}
/**
* avc_dump_av - Display an access vector in human-readable form.
* @tclass: target security class
* @av: access vector
*/
static void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av)
{
selinux: dynamic class/perm discovery Modify SELinux to dynamically discover class and permission values upon policy load, based on the dynamic object class/perm discovery logic from libselinux. A mapping is created between kernel-private class and permission indices used outside the security server and the policy values used within the security server. The mappings are only applied upon kernel-internal computations; similar mappings for the private indices of userspace object managers is handled on a per-object manager basis by the userspace AVC. The interfaces for compute_av and transition_sid are split for kernel vs. userspace; the userspace functions are distinguished by a _user suffix. The kernel-private class indices are no longer tied to the policy values and thus do not need to skip indices for userspace classes; thus the kernel class index values are compressed. The flask.h definitions were regenerated by deleting the userspace classes from refpolicy's definitions and then regenerating the headers. Going forward, we can just maintain the flask.h, av_permissions.h, and classmap.h definitions separately from policy as they are no longer tied to the policy values. The next patch introduces a utility to automate generation of flask.h and av_permissions.h from the classmap.h definitions. The older kernel class and permission string tables are removed and replaced by a single security class mapping table that is walked at policy load to generate the mapping. The old kernel class validation logic is completely replaced by the mapping logic. The handle unknown logic is reworked. reject_unknown=1 is handled when the mappings are computed at policy load time, similar to the old handling by the class validation logic. allow_unknown=1 is handled when computing and mapping decisions - if the permission was not able to be mapped (i.e. undefined, mapped to zero), then it is automatically added to the allowed vector. If the class was not able to be mapped (i.e. undefined, mapped to zero), then all permissions are allowed for it if allow_unknown=1. avc_audit leverages the new security class mapping table to lookup the class and permission names from the kernel-private indices. The mdp program is updated to use the new table when generating the class definitions and allow rules for a minimal boot policy for the kernel. It should be noted that this policy will not include any userspace classes, nor will its policy index values for the kernel classes correspond with the ones in refpolicy (they will instead match the kernel-private indices). Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-30 17:37:50 +00:00
const char **perms;
int i, perm;
if (av == 0) {
audit_log_format(ab, " null");
return;
}
selinux: dynamic class/perm discovery Modify SELinux to dynamically discover class and permission values upon policy load, based on the dynamic object class/perm discovery logic from libselinux. A mapping is created between kernel-private class and permission indices used outside the security server and the policy values used within the security server. The mappings are only applied upon kernel-internal computations; similar mappings for the private indices of userspace object managers is handled on a per-object manager basis by the userspace AVC. The interfaces for compute_av and transition_sid are split for kernel vs. userspace; the userspace functions are distinguished by a _user suffix. The kernel-private class indices are no longer tied to the policy values and thus do not need to skip indices for userspace classes; thus the kernel class index values are compressed. The flask.h definitions were regenerated by deleting the userspace classes from refpolicy's definitions and then regenerating the headers. Going forward, we can just maintain the flask.h, av_permissions.h, and classmap.h definitions separately from policy as they are no longer tied to the policy values. The next patch introduces a utility to automate generation of flask.h and av_permissions.h from the classmap.h definitions. The older kernel class and permission string tables are removed and replaced by a single security class mapping table that is walked at policy load to generate the mapping. The old kernel class validation logic is completely replaced by the mapping logic. The handle unknown logic is reworked. reject_unknown=1 is handled when the mappings are computed at policy load time, similar to the old handling by the class validation logic. allow_unknown=1 is handled when computing and mapping decisions - if the permission was not able to be mapped (i.e. undefined, mapped to zero), then it is automatically added to the allowed vector. If the class was not able to be mapped (i.e. undefined, mapped to zero), then all permissions are allowed for it if allow_unknown=1. avc_audit leverages the new security class mapping table to lookup the class and permission names from the kernel-private indices. The mdp program is updated to use the new table when generating the class definitions and allow rules for a minimal boot policy for the kernel. It should be noted that this policy will not include any userspace classes, nor will its policy index values for the kernel classes correspond with the ones in refpolicy (they will instead match the kernel-private indices). Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-30 17:37:50 +00:00
perms = secclass_map[tclass-1].perms;
audit_log_format(ab, " {");
i = 0;
perm = 1;
selinux: dynamic class/perm discovery Modify SELinux to dynamically discover class and permission values upon policy load, based on the dynamic object class/perm discovery logic from libselinux. A mapping is created between kernel-private class and permission indices used outside the security server and the policy values used within the security server. The mappings are only applied upon kernel-internal computations; similar mappings for the private indices of userspace object managers is handled on a per-object manager basis by the userspace AVC. The interfaces for compute_av and transition_sid are split for kernel vs. userspace; the userspace functions are distinguished by a _user suffix. The kernel-private class indices are no longer tied to the policy values and thus do not need to skip indices for userspace classes; thus the kernel class index values are compressed. The flask.h definitions were regenerated by deleting the userspace classes from refpolicy's definitions and then regenerating the headers. Going forward, we can just maintain the flask.h, av_permissions.h, and classmap.h definitions separately from policy as they are no longer tied to the policy values. The next patch introduces a utility to automate generation of flask.h and av_permissions.h from the classmap.h definitions. The older kernel class and permission string tables are removed and replaced by a single security class mapping table that is walked at policy load to generate the mapping. The old kernel class validation logic is completely replaced by the mapping logic. The handle unknown logic is reworked. reject_unknown=1 is handled when the mappings are computed at policy load time, similar to the old handling by the class validation logic. allow_unknown=1 is handled when computing and mapping decisions - if the permission was not able to be mapped (i.e. undefined, mapped to zero), then it is automatically added to the allowed vector. If the class was not able to be mapped (i.e. undefined, mapped to zero), then all permissions are allowed for it if allow_unknown=1. avc_audit leverages the new security class mapping table to lookup the class and permission names from the kernel-private indices. The mdp program is updated to use the new table when generating the class definitions and allow rules for a minimal boot policy for the kernel. It should be noted that this policy will not include any userspace classes, nor will its policy index values for the kernel classes correspond with the ones in refpolicy (they will instead match the kernel-private indices). Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-30 17:37:50 +00:00
while (i < (sizeof(av) * 8)) {
if ((perm & av) && perms[i]) {
selinux: dynamic class/perm discovery Modify SELinux to dynamically discover class and permission values upon policy load, based on the dynamic object class/perm discovery logic from libselinux. A mapping is created between kernel-private class and permission indices used outside the security server and the policy values used within the security server. The mappings are only applied upon kernel-internal computations; similar mappings for the private indices of userspace object managers is handled on a per-object manager basis by the userspace AVC. The interfaces for compute_av and transition_sid are split for kernel vs. userspace; the userspace functions are distinguished by a _user suffix. The kernel-private class indices are no longer tied to the policy values and thus do not need to skip indices for userspace classes; thus the kernel class index values are compressed. The flask.h definitions were regenerated by deleting the userspace classes from refpolicy's definitions and then regenerating the headers. Going forward, we can just maintain the flask.h, av_permissions.h, and classmap.h definitions separately from policy as they are no longer tied to the policy values. The next patch introduces a utility to automate generation of flask.h and av_permissions.h from the classmap.h definitions. The older kernel class and permission string tables are removed and replaced by a single security class mapping table that is walked at policy load to generate the mapping. The old kernel class validation logic is completely replaced by the mapping logic. The handle unknown logic is reworked. reject_unknown=1 is handled when the mappings are computed at policy load time, similar to the old handling by the class validation logic. allow_unknown=1 is handled when computing and mapping decisions - if the permission was not able to be mapped (i.e. undefined, mapped to zero), then it is automatically added to the allowed vector. If the class was not able to be mapped (i.e. undefined, mapped to zero), then all permissions are allowed for it if allow_unknown=1. avc_audit leverages the new security class mapping table to lookup the class and permission names from the kernel-private indices. The mdp program is updated to use the new table when generating the class definitions and allow rules for a minimal boot policy for the kernel. It should be noted that this policy will not include any userspace classes, nor will its policy index values for the kernel classes correspond with the ones in refpolicy (they will instead match the kernel-private indices). Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-30 17:37:50 +00:00
audit_log_format(ab, " %s", perms[i]);
av &= ~perm;
}
i++;
perm <<= 1;
}
if (av)
audit_log_format(ab, " 0x%x", av);
audit_log_format(ab, " }");
}
/**
* avc_dump_query - Display a SID pair and a class in human-readable form.
* @ssid: source security identifier
* @tsid: target security identifier
* @tclass: target security class
*/
static void avc_dump_query(struct audit_buffer *ab, u32 ssid, u32 tsid, u16 tclass)
{
int rc;
char *scontext;
u32 scontext_len;
rc = security_sid_to_context(ssid, &scontext, &scontext_len);
if (rc)
audit_log_format(ab, "ssid=%d", ssid);
else {
audit_log_format(ab, "scontext=%s", scontext);
kfree(scontext);
}
rc = security_sid_to_context(tsid, &scontext, &scontext_len);
if (rc)
audit_log_format(ab, " tsid=%d", tsid);
else {
audit_log_format(ab, " tcontext=%s", scontext);
kfree(scontext);
}
selinux: dynamic class/perm discovery Modify SELinux to dynamically discover class and permission values upon policy load, based on the dynamic object class/perm discovery logic from libselinux. A mapping is created between kernel-private class and permission indices used outside the security server and the policy values used within the security server. The mappings are only applied upon kernel-internal computations; similar mappings for the private indices of userspace object managers is handled on a per-object manager basis by the userspace AVC. The interfaces for compute_av and transition_sid are split for kernel vs. userspace; the userspace functions are distinguished by a _user suffix. The kernel-private class indices are no longer tied to the policy values and thus do not need to skip indices for userspace classes; thus the kernel class index values are compressed. The flask.h definitions were regenerated by deleting the userspace classes from refpolicy's definitions and then regenerating the headers. Going forward, we can just maintain the flask.h, av_permissions.h, and classmap.h definitions separately from policy as they are no longer tied to the policy values. The next patch introduces a utility to automate generation of flask.h and av_permissions.h from the classmap.h definitions. The older kernel class and permission string tables are removed and replaced by a single security class mapping table that is walked at policy load to generate the mapping. The old kernel class validation logic is completely replaced by the mapping logic. The handle unknown logic is reworked. reject_unknown=1 is handled when the mappings are computed at policy load time, similar to the old handling by the class validation logic. allow_unknown=1 is handled when computing and mapping decisions - if the permission was not able to be mapped (i.e. undefined, mapped to zero), then it is automatically added to the allowed vector. If the class was not able to be mapped (i.e. undefined, mapped to zero), then all permissions are allowed for it if allow_unknown=1. avc_audit leverages the new security class mapping table to lookup the class and permission names from the kernel-private indices. The mdp program is updated to use the new table when generating the class definitions and allow rules for a minimal boot policy for the kernel. It should be noted that this policy will not include any userspace classes, nor will its policy index values for the kernel classes correspond with the ones in refpolicy (they will instead match the kernel-private indices). Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-30 17:37:50 +00:00
BUG_ON(tclass >= ARRAY_SIZE(secclass_map));
audit_log_format(ab, " tclass=%s", secclass_map[tclass-1].name);
}
/**
* avc_init - Initialize the AVC.
*
* Initialize the access vector cache.
*/
void __init avc_init(void)
{
int i;
for (i = 0; i < AVC_CACHE_SLOTS; i++) {
INIT_HLIST_HEAD(&avc_cache.slots[i]);
spin_lock_init(&avc_cache.slots_lock[i]);
}
atomic_set(&avc_cache.active_nodes, 0);
atomic_set(&avc_cache.lru_hint, 0);
avc_node_cachep = kmem_cache_create("avc_node", sizeof(struct avc_node),
0, SLAB_PANIC, NULL);
audit_log(current->audit_context, GFP_KERNEL, AUDIT_KERNEL, "AVC INITIALIZED\n");
}
int avc_get_hash_stats(char *page)
{
int i, chain_len, max_chain_len, slots_used;
struct avc_node *node;
struct hlist_head *head;
rcu_read_lock();
slots_used = 0;
max_chain_len = 0;
for (i = 0; i < AVC_CACHE_SLOTS; i++) {
head = &avc_cache.slots[i];
if (!hlist_empty(head)) {
struct hlist_node *next;
slots_used++;
chain_len = 0;
hlist_for_each_entry_rcu(node, next, head, list)
chain_len++;
if (chain_len > max_chain_len)
max_chain_len = chain_len;
}
}
rcu_read_unlock();
return scnprintf(page, PAGE_SIZE, "entries: %d\nbuckets used: %d/%d\n"
"longest chain: %d\n",
atomic_read(&avc_cache.active_nodes),
slots_used, AVC_CACHE_SLOTS, max_chain_len);
}
static void avc_node_free(struct rcu_head *rhead)
{
struct avc_node *node = container_of(rhead, struct avc_node, rhead);
kmem_cache_free(avc_node_cachep, node);
avc_cache_stats_incr(frees);
}
static void avc_node_delete(struct avc_node *node)
{
hlist_del_rcu(&node->list);
call_rcu(&node->rhead, avc_node_free);
atomic_dec(&avc_cache.active_nodes);
}
static void avc_node_kill(struct avc_node *node)
{
kmem_cache_free(avc_node_cachep, node);
avc_cache_stats_incr(frees);
atomic_dec(&avc_cache.active_nodes);
}
static void avc_node_replace(struct avc_node *new, struct avc_node *old)
{
hlist_replace_rcu(&old->list, &new->list);
call_rcu(&old->rhead, avc_node_free);
atomic_dec(&avc_cache.active_nodes);
}
static inline int avc_reclaim_node(void)
{
struct avc_node *node;
int hvalue, try, ecx;
unsigned long flags;
struct hlist_head *head;
struct hlist_node *next;
spinlock_t *lock;
for (try = 0, ecx = 0; try < AVC_CACHE_SLOTS; try++) {
hvalue = atomic_inc_return(&avc_cache.lru_hint) & (AVC_CACHE_SLOTS - 1);
head = &avc_cache.slots[hvalue];
lock = &avc_cache.slots_lock[hvalue];
if (!spin_trylock_irqsave(lock, flags))
continue;
rcu_read_lock();
hlist_for_each_entry(node, next, head, list) {
avc_node_delete(node);
avc_cache_stats_incr(reclaims);
ecx++;
if (ecx >= AVC_CACHE_RECLAIM) {
rcu_read_unlock();
spin_unlock_irqrestore(lock, flags);
goto out;
}
}
rcu_read_unlock();
spin_unlock_irqrestore(lock, flags);
}
out:
return ecx;
}
static struct avc_node *avc_alloc_node(void)
{
struct avc_node *node;
node = kmem_cache_zalloc(avc_node_cachep, GFP_ATOMIC);
if (!node)
goto out;
INIT_HLIST_NODE(&node->list);
avc_cache_stats_incr(allocations);
if (atomic_inc_return(&avc_cache.active_nodes) > avc_cache_threshold)
avc_reclaim_node();
out:
return node;
}
static void avc_node_populate(struct avc_node *node, u32 ssid, u32 tsid, u16 tclass, struct av_decision *avd)
{
node->ae.ssid = ssid;
node->ae.tsid = tsid;
node->ae.tclass = tclass;
memcpy(&node->ae.avd, avd, sizeof(node->ae.avd));
}
static inline struct avc_node *avc_search_node(u32 ssid, u32 tsid, u16 tclass)
{
struct avc_node *node, *ret = NULL;
int hvalue;
struct hlist_head *head;
struct hlist_node *next;
hvalue = avc_hash(ssid, tsid, tclass);
head = &avc_cache.slots[hvalue];
hlist_for_each_entry_rcu(node, next, head, list) {
if (ssid == node->ae.ssid &&
tclass == node->ae.tclass &&
tsid == node->ae.tsid) {
ret = node;
break;
}
}
return ret;
}
/**
* avc_lookup - Look up an AVC entry.
* @ssid: source security identifier
* @tsid: target security identifier
* @tclass: target security class
*
* Look up an AVC entry that is valid for the
* (@ssid, @tsid), interpreting the permissions
* based on @tclass. If a valid AVC entry exists,
* then this function returns the avc_node.
* Otherwise, this function returns NULL.
*/
static struct avc_node *avc_lookup(u32 ssid, u32 tsid, u16 tclass)
{
struct avc_node *node;
avc_cache_stats_incr(lookups);
node = avc_search_node(ssid, tsid, tclass);
if (node)
return node;
avc_cache_stats_incr(misses);
return NULL;
}
static int avc_latest_notif_update(int seqno, int is_insert)
{
int ret = 0;
static DEFINE_SPINLOCK(notif_lock);
unsigned long flag;
spin_lock_irqsave(&notif_lock, flag);
if (is_insert) {
if (seqno < avc_cache.latest_notif) {
printk(KERN_WARNING "SELinux: avc: seqno %d < latest_notif %d\n",
seqno, avc_cache.latest_notif);
ret = -EAGAIN;
}
} else {
if (seqno > avc_cache.latest_notif)
avc_cache.latest_notif = seqno;
}
spin_unlock_irqrestore(&notif_lock, flag);
return ret;
}
/**
* avc_insert - Insert an AVC entry.
* @ssid: source security identifier
* @tsid: target security identifier
* @tclass: target security class
* @avd: resulting av decision
*
* Insert an AVC entry for the SID pair
* (@ssid, @tsid) and class @tclass.
* The access vectors and the sequence number are
* normally provided by the security server in
* response to a security_compute_av() call. If the
* sequence number @avd->seqno is not less than the latest
* revocation notification, then the function copies
* the access vectors into a cache entry, returns
* avc_node inserted. Otherwise, this function returns NULL.
*/
static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass, struct av_decision *avd)
{
struct avc_node *pos, *node = NULL;
int hvalue;
unsigned long flag;
if (avc_latest_notif_update(avd->seqno, 1))
goto out;
node = avc_alloc_node();
if (node) {
struct hlist_head *head;
struct hlist_node *next;
spinlock_t *lock;
hvalue = avc_hash(ssid, tsid, tclass);
avc_node_populate(node, ssid, tsid, tclass, avd);
head = &avc_cache.slots[hvalue];
lock = &avc_cache.slots_lock[hvalue];
spin_lock_irqsave(lock, flag);
hlist_for_each_entry(pos, next, head, list) {
if (pos->ae.ssid == ssid &&
pos->ae.tsid == tsid &&
pos->ae.tclass == tclass) {
avc_node_replace(node, pos);
goto found;
}
}
hlist_add_head_rcu(&node->list, head);
found:
spin_unlock_irqrestore(lock, flag);
}
out:
return node;
}
/**
* avc_audit_pre_callback - SELinux specific information
* will be called by generic audit code
* @ab: the audit buffer
* @a: audit_data
*/
static void avc_audit_pre_callback(struct audit_buffer *ab, void *a)
{
struct common_audit_data *ad = a;
audit_log_format(ab, "avc: %s ",
ad->selinux_audit_data->slad->denied ? "denied" : "granted");
avc_dump_av(ab, ad->selinux_audit_data->slad->tclass,
ad->selinux_audit_data->slad->audited);
audit_log_format(ab, " for ");
}
/**
* avc_audit_post_callback - SELinux specific information
* will be called by generic audit code
* @ab: the audit buffer
* @a: audit_data
*/
static void avc_audit_post_callback(struct audit_buffer *ab, void *a)
{
struct common_audit_data *ad = a;
audit_log_format(ab, " ");
avc_dump_query(ab, ad->selinux_audit_data->slad->ssid,
ad->selinux_audit_data->slad->tsid,
ad->selinux_audit_data->slad->tclass);
}
/* This is the slow part of avc audit with big stack footprint */
static noinline int slow_avc_audit(u32 ssid, u32 tsid, u16 tclass,
u32 requested, u32 audited, u32 denied,
struct common_audit_data *a,
unsigned flags)
{
struct common_audit_data stack_data;
struct selinux_audit_data sad = {0,};
struct selinux_late_audit_data slad;
if (!a) {
a = &stack_data;
COMMON_AUDIT_DATA_INIT(a, NONE);
a->selinux_audit_data = &sad;
}
/*
* When in a RCU walk do the audit on the RCU retry. This is because
* the collection of the dname in an inode audit message is not RCU
* safe. Note this may drop some audits when the situation changes
* during retry. However this is logically just as if the operation
* happened a little later.
*/
if ((a->type == LSM_AUDIT_DATA_INODE) &&
(flags & MAY_NOT_BLOCK))
return -ECHILD;
slad.tclass = tclass;
slad.requested = requested;
slad.ssid = ssid;
slad.tsid = tsid;
slad.audited = audited;
slad.denied = denied;
a->selinux_audit_data->slad = &slad;
a->lsm_pre_audit = avc_audit_pre_callback;
a->lsm_post_audit = avc_audit_post_callback;
common_lsm_audit(a);
return 0;
}
/**
* avc_audit - Audit the granting or denial of permissions.
* @ssid: source security identifier
* @tsid: target security identifier
* @tclass: target security class
* @requested: requested permissions
* @avd: access vector decisions
* @result: result from avc_has_perm_noaudit
* @a: auxiliary audit data
* @flags: VFS walk flags
*
* Audit the granting or denial of permissions in accordance
* with the policy. This function is typically called by
* avc_has_perm() after a permission check, but can also be
* called directly by callers who use avc_has_perm_noaudit()
* in order to separate the permission check from the auditing.
* For example, this separation is useful when the permission check must
* be performed under a lock, to allow the lock to be released
* before calling the auditing code.
*/
inline int avc_audit(u32 ssid, u32 tsid,
u16 tclass, u32 requested,
struct av_decision *avd, int result, struct common_audit_data *a,
unsigned flags)
{
u32 denied, audited;
denied = requested & ~avd->allowed;
if (unlikely(denied)) {
audited = denied & avd->auditdeny;
SELinux: special dontaudit for access checks Currently there are a number of applications (nautilus being the main one) which calls access() on files in order to determine how they should be displayed. It is normal and expected that nautilus will want to see if files are executable or if they are really read/write-able. access() should return the real permission. SELinux policy checks are done in access() and can result in lots of AVC denials as policy denies RWX on files which DAC allows. Currently SELinux must dontaudit actual attempts to read/write/execute a file in order to silence these messages (and not flood the logs.) But dontaudit rules like that can hide real attacks. This patch addes a new common file permission audit_access. This permission is special in that it is meaningless and should never show up in an allow rule. Instead the only place this permission has meaning is in a dontaudit rule like so: dontaudit nautilus_t sbin_t:file audit_access With such a rule if nautilus just checks access() we will still get denied and thus userspace will still get the correct answer but we will not log the denial. If nautilus attempted to actually perform one of the forbidden actions (rather than just querying access(2) about it) we would still log a denial. This type of dontaudit rule should be used sparingly, as it could be a method for an attacker to probe the system permissions without detection. Signed-off-by: Eric Paris <eparis@redhat.com> Acked-by: Stephen D. Smalley <sds@tycho.nsa.gov> Signed-off-by: James Morris <jmorris@namei.org>
2010-07-23 15:44:03 +00:00
/*
* a->selinux_audit_data->auditdeny is TRICKY! Setting a bit in
SELinux: special dontaudit for access checks Currently there are a number of applications (nautilus being the main one) which calls access() on files in order to determine how they should be displayed. It is normal and expected that nautilus will want to see if files are executable or if they are really read/write-able. access() should return the real permission. SELinux policy checks are done in access() and can result in lots of AVC denials as policy denies RWX on files which DAC allows. Currently SELinux must dontaudit actual attempts to read/write/execute a file in order to silence these messages (and not flood the logs.) But dontaudit rules like that can hide real attacks. This patch addes a new common file permission audit_access. This permission is special in that it is meaningless and should never show up in an allow rule. Instead the only place this permission has meaning is in a dontaudit rule like so: dontaudit nautilus_t sbin_t:file audit_access With such a rule if nautilus just checks access() we will still get denied and thus userspace will still get the correct answer but we will not log the denial. If nautilus attempted to actually perform one of the forbidden actions (rather than just querying access(2) about it) we would still log a denial. This type of dontaudit rule should be used sparingly, as it could be a method for an attacker to probe the system permissions without detection. Signed-off-by: Eric Paris <eparis@redhat.com> Acked-by: Stephen D. Smalley <sds@tycho.nsa.gov> Signed-off-by: James Morris <jmorris@namei.org>
2010-07-23 15:44:03 +00:00
* this field means that ANY denials should NOT be audited if
* the policy contains an explicit dontaudit rule for that
* permission. Take notice that this is unrelated to the
* actual permissions that were denied. As an example lets
* assume:
*
* denied == READ
* avd.auditdeny & ACCESS == 0 (not set means explicit rule)
* selinux_audit_data->auditdeny & ACCESS == 1
SELinux: special dontaudit for access checks Currently there are a number of applications (nautilus being the main one) which calls access() on files in order to determine how they should be displayed. It is normal and expected that nautilus will want to see if files are executable or if they are really read/write-able. access() should return the real permission. SELinux policy checks are done in access() and can result in lots of AVC denials as policy denies RWX on files which DAC allows. Currently SELinux must dontaudit actual attempts to read/write/execute a file in order to silence these messages (and not flood the logs.) But dontaudit rules like that can hide real attacks. This patch addes a new common file permission audit_access. This permission is special in that it is meaningless and should never show up in an allow rule. Instead the only place this permission has meaning is in a dontaudit rule like so: dontaudit nautilus_t sbin_t:file audit_access With such a rule if nautilus just checks access() we will still get denied and thus userspace will still get the correct answer but we will not log the denial. If nautilus attempted to actually perform one of the forbidden actions (rather than just querying access(2) about it) we would still log a denial. This type of dontaudit rule should be used sparingly, as it could be a method for an attacker to probe the system permissions without detection. Signed-off-by: Eric Paris <eparis@redhat.com> Acked-by: Stephen D. Smalley <sds@tycho.nsa.gov> Signed-off-by: James Morris <jmorris@namei.org>
2010-07-23 15:44:03 +00:00
*
* We will NOT audit the denial even though the denied
* permission was READ and the auditdeny checks were for
* ACCESS
*/
if (a &&
a->selinux_audit_data->auditdeny &&
!(a->selinux_audit_data->auditdeny & avd->auditdeny))
SELinux: special dontaudit for access checks Currently there are a number of applications (nautilus being the main one) which calls access() on files in order to determine how they should be displayed. It is normal and expected that nautilus will want to see if files are executable or if they are really read/write-able. access() should return the real permission. SELinux policy checks are done in access() and can result in lots of AVC denials as policy denies RWX on files which DAC allows. Currently SELinux must dontaudit actual attempts to read/write/execute a file in order to silence these messages (and not flood the logs.) But dontaudit rules like that can hide real attacks. This patch addes a new common file permission audit_access. This permission is special in that it is meaningless and should never show up in an allow rule. Instead the only place this permission has meaning is in a dontaudit rule like so: dontaudit nautilus_t sbin_t:file audit_access With such a rule if nautilus just checks access() we will still get denied and thus userspace will still get the correct answer but we will not log the denial. If nautilus attempted to actually perform one of the forbidden actions (rather than just querying access(2) about it) we would still log a denial. This type of dontaudit rule should be used sparingly, as it could be a method for an attacker to probe the system permissions without detection. Signed-off-by: Eric Paris <eparis@redhat.com> Acked-by: Stephen D. Smalley <sds@tycho.nsa.gov> Signed-off-by: James Morris <jmorris@namei.org>
2010-07-23 15:44:03 +00:00
audited = 0;
} else if (result)
audited = denied = requested;
else
audited = requested & avd->auditallow;
if (likely(!audited))
return 0;
return slow_avc_audit(ssid, tsid, tclass,
requested, audited, denied,
a, flags);
}
/**
* avc_add_callback - Register a callback for security events.
* @callback: callback function
* @events: security events
* @ssid: source security identifier or %SECSID_WILD
* @tsid: target security identifier or %SECSID_WILD
* @tclass: target security class
* @perms: permissions
*
* Register a callback function for events in the set @events
* related to the SID pair (@ssid, @tsid)
* and the permissions @perms, interpreting
* @perms based on @tclass. Returns %0 on success or
* -%ENOMEM if insufficient memory exists to add the callback.
*/
int avc_add_callback(int (*callback)(u32 event, u32 ssid, u32 tsid,
u16 tclass, u32 perms,
u32 *out_retained),
u32 events, u32 ssid, u32 tsid,
u16 tclass, u32 perms)
{
struct avc_callback_node *c;
int rc = 0;
c = kmalloc(sizeof(*c), GFP_ATOMIC);
if (!c) {
rc = -ENOMEM;
goto out;
}
c->callback = callback;
c->events = events;
c->ssid = ssid;
c->tsid = tsid;
c->perms = perms;
c->next = avc_callbacks;
avc_callbacks = c;
out:
return rc;
}
static inline int avc_sidcmp(u32 x, u32 y)
{
return (x == y || x == SECSID_WILD || y == SECSID_WILD);
}
/**
* avc_update_node Update an AVC entry
* @event : Updating event
* @perms : Permission mask bits
* @ssid,@tsid,@tclass : identifier of an AVC entry
* @seqno : sequence number when decision was made
*
* if a valid AVC entry doesn't exist,this function returns -ENOENT.
* if kmalloc() called internal returns NULL, this function returns -ENOMEM.
* otherwise, this function updates the AVC entry. The original AVC-entry object
* will release later by RCU.
*/
static int avc_update_node(u32 event, u32 perms, u32 ssid, u32 tsid, u16 tclass,
u32 seqno)
{
int hvalue, rc = 0;
unsigned long flag;
struct avc_node *pos, *node, *orig = NULL;
struct hlist_head *head;
struct hlist_node *next;
spinlock_t *lock;
node = avc_alloc_node();
if (!node) {
rc = -ENOMEM;
goto out;
}
/* Lock the target slot */
hvalue = avc_hash(ssid, tsid, tclass);
head = &avc_cache.slots[hvalue];
lock = &avc_cache.slots_lock[hvalue];
spin_lock_irqsave(lock, flag);
hlist_for_each_entry(pos, next, head, list) {
if (ssid == pos->ae.ssid &&
tsid == pos->ae.tsid &&
tclass == pos->ae.tclass &&
seqno == pos->ae.avd.seqno){
orig = pos;
break;
}
}
if (!orig) {
rc = -ENOENT;
avc_node_kill(node);
goto out_unlock;
}
/*
* Copy and replace original node.
*/
avc_node_populate(node, ssid, tsid, tclass, &orig->ae.avd);
switch (event) {
case AVC_CALLBACK_GRANT:
node->ae.avd.allowed |= perms;
break;
case AVC_CALLBACK_TRY_REVOKE:
case AVC_CALLBACK_REVOKE:
node->ae.avd.allowed &= ~perms;
break;
case AVC_CALLBACK_AUDITALLOW_ENABLE:
node->ae.avd.auditallow |= perms;
break;
case AVC_CALLBACK_AUDITALLOW_DISABLE:
node->ae.avd.auditallow &= ~perms;
break;
case AVC_CALLBACK_AUDITDENY_ENABLE:
node->ae.avd.auditdeny |= perms;
break;
case AVC_CALLBACK_AUDITDENY_DISABLE:
node->ae.avd.auditdeny &= ~perms;
break;
}
avc_node_replace(node, orig);
out_unlock:
spin_unlock_irqrestore(lock, flag);
out:
return rc;
}
/**
* avc_flush - Flush the cache
*/
static void avc_flush(void)
{
struct hlist_head *head;
struct hlist_node *next;
struct avc_node *node;
spinlock_t *lock;
unsigned long flag;
int i;
for (i = 0; i < AVC_CACHE_SLOTS; i++) {
head = &avc_cache.slots[i];
lock = &avc_cache.slots_lock[i];
spin_lock_irqsave(lock, flag);
/*
* With preemptable RCU, the outer spinlock does not
* prevent RCU grace periods from ending.
*/
rcu_read_lock();
hlist_for_each_entry(node, next, head, list)
avc_node_delete(node);
rcu_read_unlock();
spin_unlock_irqrestore(lock, flag);
}
}
/**
* avc_ss_reset - Flush the cache and revalidate migrated permissions.
* @seqno: policy sequence number
*/
int avc_ss_reset(u32 seqno)
{
struct avc_callback_node *c;
int rc = 0, tmprc;
avc_flush();
for (c = avc_callbacks; c; c = c->next) {
if (c->events & AVC_CALLBACK_RESET) {
tmprc = c->callback(AVC_CALLBACK_RESET,
0, 0, 0, 0, NULL);
/* save the first error encountered for the return
value and continue processing the callbacks */
if (!rc)
rc = tmprc;
}
}
avc_latest_notif_update(seqno, 0);
return rc;
}
/*
* Slow-path helper function for avc_has_perm_noaudit,
* when the avc_node lookup fails. We get called with
* the RCU read lock held, and need to return with it
* still held, but drop if for the security compute.
*
* Don't inline this, since it's the slow-path and just
* results in a bigger stack frame.
*/
static noinline struct avc_node *avc_compute_av(u32 ssid, u32 tsid,
u16 tclass, struct av_decision *avd)
{
rcu_read_unlock();
security_compute_av(ssid, tsid, tclass, avd);
rcu_read_lock();
return avc_insert(ssid, tsid, tclass, avd);
}
static noinline int avc_denied(u32 ssid, u32 tsid,
u16 tclass, u32 requested,
unsigned flags,
struct av_decision *avd)
{
if (flags & AVC_STRICT)
return -EACCES;
if (selinux_enforcing && !(avd->flags & AVD_FLAGS_PERMISSIVE))
return -EACCES;
avc_update_node(AVC_CALLBACK_GRANT, requested, ssid,
tsid, tclass, avd->seqno);
return 0;
}
/**
* avc_has_perm_noaudit - Check permissions but perform no auditing.
* @ssid: source security identifier
* @tsid: target security identifier
* @tclass: target security class
* @requested: requested permissions, interpreted based on @tclass
* @flags: AVC_STRICT or 0
* @avd: access vector decisions
*
* Check the AVC to determine whether the @requested permissions are granted
* for the SID pair (@ssid, @tsid), interpreting the permissions
* based on @tclass, and call the security server on a cache miss to obtain
* a new decision and add it to the cache. Return a copy of the decisions
* in @avd. Return %0 if all @requested permissions are granted,
* -%EACCES if any permissions are denied, or another -errno upon
* other errors. This function is typically called by avc_has_perm(),
* but may also be called directly to separate permission checking from
* auditing, e.g. in cases where a lock must be held for the check but
* should be released for the auditing.
*/
inline int avc_has_perm_noaudit(u32 ssid, u32 tsid,
u16 tclass, u32 requested,
unsigned flags,
struct av_decision *avd)
{
struct avc_node *node;
int rc = 0;
u32 denied;
BUG_ON(!requested);
rcu_read_lock();
node = avc_lookup(ssid, tsid, tclass);
if (unlikely(!node)) {
node = avc_compute_av(ssid, tsid, tclass, avd);
} else {
memcpy(avd, &node->ae.avd, sizeof(*avd));
avd = &node->ae.avd;
}
denied = requested & ~(avd->allowed);
if (unlikely(denied))
rc = avc_denied(ssid, tsid, tclass, requested, flags, avd);
rcu_read_unlock();
return rc;
}
/**
* avc_has_perm - Check permissions and perform any appropriate auditing.
* @ssid: source security identifier
* @tsid: target security identifier
* @tclass: target security class
* @requested: requested permissions, interpreted based on @tclass
* @auditdata: auxiliary audit data
* @flags: VFS walk flags
*
* Check the AVC to determine whether the @requested permissions are granted
* for the SID pair (@ssid, @tsid), interpreting the permissions
* based on @tclass, and call the security server on a cache miss to obtain
* a new decision and add it to the cache. Audit the granting or denial of
* permissions in accordance with the policy. Return %0 if all @requested
* permissions are granted, -%EACCES if any permissions are denied, or
* another -errno upon other errors.
*/
int avc_has_perm_flags(u32 ssid, u32 tsid, u16 tclass,
u32 requested, struct common_audit_data *auditdata,
unsigned flags)
{
struct av_decision avd;
int rc, rc2;
rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd);
rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata,
flags);
if (rc2)
return rc2;
return rc;
}
u32 avc_policy_seqno(void)
{
return avc_cache.latest_notif;
}
void avc_disable(void)
{
SELinux: do not destroy the avc_cache_nodep The security_ops reset done when SELinux is disabled at run time is done after the avc cache is freed and after the kmem_cache for the avc is also freed. This means that between the time the selinux disable code destroys the avc_node_cachep another process could make a security request and could try to allocate from the cache. We are just going to leave the cachep around, like we always have. SELinux: Disabled at runtime. BUG: unable to handle kernel NULL pointer dereference at (null) IP: [<ffffffff81122537>] kmem_cache_alloc+0x9a/0x185 PGD 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC last sysfs file: CPU 1 Modules linked in: Pid: 12, comm: khelper Not tainted 2.6.31-tip-05525-g0eeacc6-dirty #14819 System Product Name RIP: 0010:[<ffffffff81122537>] [<ffffffff81122537>] kmem_cache_alloc+0x9a/0x185 RSP: 0018:ffff88003f9258b0 EFLAGS: 00010086 RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000078c0129e RDX: 0000000000000000 RSI: ffffffff8130b626 RDI: ffffffff81122528 RBP: ffff88003f925900 R08: 0000000078c0129e R09: 0000000000000001 R10: 0000000000000000 R11: 0000000078c0129e R12: 0000000000000246 R13: 0000000000008020 R14: ffff88003f8586d8 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff880002b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 0000000000000000 CR3: 0000000001001000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: ffffffff827bd420 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process khelper (pid: 12, threadinfo ffff88003f924000, task ffff88003f928000) Stack: 0000000000000246 0000802000000246 ffffffff8130b626 0000000000000001 <0> 0000000078c0129e 0000000000000000 ffff88003f925a70 0000000000000002 <0> 0000000000000001 0000000000000001 ffff88003f925960 ffffffff8130b626 Call Trace: [<ffffffff8130b626>] ? avc_alloc_node+0x36/0x273 [<ffffffff8130b626>] avc_alloc_node+0x36/0x273 [<ffffffff8130b545>] ? avc_latest_notif_update+0x7d/0x9e [<ffffffff8130b8b4>] avc_insert+0x51/0x18d [<ffffffff8130bcce>] avc_has_perm_noaudit+0x9d/0x128 [<ffffffff8130bf20>] avc_has_perm+0x45/0x88 [<ffffffff8130f99d>] current_has_perm+0x52/0x6d [<ffffffff8130fbb2>] selinux_task_create+0x2f/0x45 [<ffffffff81303bf7>] security_task_create+0x29/0x3f [<ffffffff8105c6ba>] copy_process+0x82/0xdf0 [<ffffffff81091578>] ? register_lock_class+0x2f/0x36c [<ffffffff81091a13>] ? mark_lock+0x2e/0x1e1 [<ffffffff8105d596>] do_fork+0x16e/0x382 [<ffffffff81091578>] ? register_lock_class+0x2f/0x36c [<ffffffff810d9166>] ? probe_workqueue_execution+0x57/0xf9 [<ffffffff81091a13>] ? mark_lock+0x2e/0x1e1 [<ffffffff810d9166>] ? probe_workqueue_execution+0x57/0xf9 [<ffffffff8100cdb2>] kernel_thread+0x82/0xe0 [<ffffffff81078b1f>] ? ____call_usermodehelper+0x0/0x139 [<ffffffff8100ce10>] ? child_rip+0x0/0x20 [<ffffffff81078aea>] ? __call_usermodehelper+0x65/0x9a [<ffffffff8107a5c7>] run_workqueue+0x171/0x27e [<ffffffff8107a573>] ? run_workqueue+0x11d/0x27e [<ffffffff81078a85>] ? __call_usermodehelper+0x0/0x9a [<ffffffff8107a7bc>] worker_thread+0xe8/0x10f [<ffffffff810808e2>] ? autoremove_wake_function+0x0/0x63 [<ffffffff8107a6d4>] ? worker_thread+0x0/0x10f [<ffffffff8108042e>] kthread+0x91/0x99 [<ffffffff8100ce1a>] child_rip+0xa/0x20 [<ffffffff8100c754>] ? restore_args+0x0/0x30 [<ffffffff8108039d>] ? kthread+0x0/0x99 [<ffffffff8100ce10>] ? child_rip+0x0/0x20 Code: 0f 85 99 00 00 00 9c 58 66 66 90 66 90 49 89 c4 fa 66 66 90 66 66 90 e8 83 34 fb ff e8 d7 e9 26 00 48 98 49 8b 94 c6 10 01 00 00 <48> 8b 1a 44 8b 7a 18 48 85 db 74 0f 8b 42 14 48 8b 04 c3 ff 42 RIP [<ffffffff81122537>] kmem_cache_alloc+0x9a/0x185 RSP <ffff88003f9258b0> CR2: 0000000000000000 ---[ end trace 42f41a982344e606 ]--- Reported-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Eric Paris <eparis@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-21 01:21:10 +00:00
/*
* If you are looking at this because you have realized that we are
* not destroying the avc_node_cachep it might be easy to fix, but
* I don't know the memory barrier semantics well enough to know. It's
* possible that some other task dereferenced security_ops when
* it still pointed to selinux operations. If that is the case it's
* possible that it is about to use the avc and is about to need the
* avc_node_cachep. I know I could wrap the security.c security_ops call
* in an rcu_lock, but seriously, it's not worth it. Instead I just flush
* the cache and get that memory back.
*/
if (avc_node_cachep) {
avc_flush();
/* kmem_cache_destroy(avc_node_cachep); */
}
}