audit: report audit wait metric in audit status reply

In environments where the preservation of audit events and predictable
usage of system memory are prioritized, admins may use a combination of
--backlog_wait_time and -b options at the risk of degraded performance
resulting from backlog waiting. In some cases, this risk may be
preferred to lost events or unbounded memory usage. Ideally, this risk
can be mitigated by making adjustments when backlog waiting is detected.

However, detection can be difficult using the currently available
metrics. For example, an admin attempting to debug degraded performance
may falsely believe a full backlog indicates backlog waiting. It may
turn out the backlog frequently fills up but drains quickly.

To make it easier to reliably track degraded performance to backlog
waiting, this patch makes the following changes:

Add a new field backlog_wait_time_total to the audit status reply.
Initialize this field to zero. Add to this field the total time spent
by the current task on scheduled timeouts while the backlog limit is
exceeded. Reset field to zero upon request via AUDIT_SET.

Tested on Ubuntu 18.04 using complementary changes to the
audit-userspace and audit-testsuite:
- https://github.com/linux-audit/audit-userspace/pull/134
- https://github.com/linux-audit/audit-testsuite/pull/97

Signed-off-by: Max Englander <max.englander@gmail.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
This commit is contained in:
Max Englander 2020-07-04 15:15:28 +00:00 committed by Paul Moore
parent f1d9b23cab
commit b43870c74f
2 changed files with 36 additions and 17 deletions

View File

@ -333,14 +333,15 @@ enum {
}; };
/* Status symbols */ /* Status symbols */
/* Mask values */ /* Mask values */
#define AUDIT_STATUS_ENABLED 0x0001 #define AUDIT_STATUS_ENABLED 0x0001
#define AUDIT_STATUS_FAILURE 0x0002 #define AUDIT_STATUS_FAILURE 0x0002
#define AUDIT_STATUS_PID 0x0004 #define AUDIT_STATUS_PID 0x0004
#define AUDIT_STATUS_RATE_LIMIT 0x0008 #define AUDIT_STATUS_RATE_LIMIT 0x0008
#define AUDIT_STATUS_BACKLOG_LIMIT 0x0010 #define AUDIT_STATUS_BACKLOG_LIMIT 0x0010
#define AUDIT_STATUS_BACKLOG_WAIT_TIME 0x0020 #define AUDIT_STATUS_BACKLOG_WAIT_TIME 0x0020
#define AUDIT_STATUS_LOST 0x0040 #define AUDIT_STATUS_LOST 0x0040
#define AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL 0x0080
#define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT 0x00000001 #define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT 0x00000001
#define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME 0x00000002 #define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME 0x00000002
@ -467,6 +468,9 @@ struct audit_status {
__u32 feature_bitmap; /* bitmap of kernel audit features */ __u32 feature_bitmap; /* bitmap of kernel audit features */
}; };
__u32 backlog_wait_time;/* message queue wait timeout */ __u32 backlog_wait_time;/* message queue wait timeout */
__u32 backlog_wait_time_actual;/* time spent waiting while
* message limit exceeded
*/
}; };
struct audit_features { struct audit_features {

View File

@ -136,6 +136,11 @@ u32 audit_sig_sid = 0;
*/ */
static atomic_t audit_lost = ATOMIC_INIT(0); static atomic_t audit_lost = ATOMIC_INIT(0);
/* Monotonically increasing sum of time the kernel has spent
* waiting while the backlog limit is exceeded.
*/
static atomic_t audit_backlog_wait_time_actual = ATOMIC_INIT(0);
/* Hash for inode-based rules */ /* Hash for inode-based rules */
struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS]; struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
@ -1201,17 +1206,18 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
case AUDIT_GET: { case AUDIT_GET: {
struct audit_status s; struct audit_status s;
memset(&s, 0, sizeof(s)); memset(&s, 0, sizeof(s));
s.enabled = audit_enabled; s.enabled = audit_enabled;
s.failure = audit_failure; s.failure = audit_failure;
/* NOTE: use pid_vnr() so the PID is relative to the current /* NOTE: use pid_vnr() so the PID is relative to the current
* namespace */ * namespace */
s.pid = auditd_pid_vnr(); s.pid = auditd_pid_vnr();
s.rate_limit = audit_rate_limit; s.rate_limit = audit_rate_limit;
s.backlog_limit = audit_backlog_limit; s.backlog_limit = audit_backlog_limit;
s.lost = atomic_read(&audit_lost); s.lost = atomic_read(&audit_lost);
s.backlog = skb_queue_len(&audit_queue); s.backlog = skb_queue_len(&audit_queue);
s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL; s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL;
s.backlog_wait_time = audit_backlog_wait_time; s.backlog_wait_time = audit_backlog_wait_time;
s.backlog_wait_time_actual = atomic_read(&audit_backlog_wait_time_actual);
audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s)); audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
break; break;
} }
@ -1315,6 +1321,12 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
audit_log_config_change("lost", 0, lost, 1); audit_log_config_change("lost", 0, lost, 1);
return lost; return lost;
} }
if (s.mask == AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL) {
u32 actual = atomic_xchg(&audit_backlog_wait_time_actual, 0);
audit_log_config_change("backlog_wait_time_actual", 0, actual, 1);
return actual;
}
break; break;
} }
case AUDIT_GET_FEATURE: case AUDIT_GET_FEATURE:
@ -1826,12 +1838,15 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
/* sleep if we are allowed and we haven't exhausted our /* sleep if we are allowed and we haven't exhausted our
* backlog wait limit */ * backlog wait limit */
if (gfpflags_allow_blocking(gfp_mask) && (stime > 0)) { if (gfpflags_allow_blocking(gfp_mask) && (stime > 0)) {
long rtime = stime;
DECLARE_WAITQUEUE(wait, current); DECLARE_WAITQUEUE(wait, current);
add_wait_queue_exclusive(&audit_backlog_wait, add_wait_queue_exclusive(&audit_backlog_wait,
&wait); &wait);
set_current_state(TASK_UNINTERRUPTIBLE); set_current_state(TASK_UNINTERRUPTIBLE);
stime = schedule_timeout(stime); stime = schedule_timeout(rtime);
atomic_add(rtime - stime, &audit_backlog_wait_time_actual);
remove_wait_queue(&audit_backlog_wait, &wait); remove_wait_queue(&audit_backlog_wait, &wait);
} else { } else {
if (audit_rate_check() && printk_ratelimit()) if (audit_rate_check() && printk_ratelimit())