mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-01 10:45:49 +00:00
perf tools fixes for v6.6: 1st batch
Build: - Update header files in the tools/**/include directory to sync with the kernel sources as usual. - Remove unused bpf-prologue files. While it's not strictly a fix, but the functionality was removed in this cycle so better to get rid of the code together. - Other minor build fixes. Misc: - Fix uninitialized memory access in PMU parsing code - Fix segfaults on software event Signed-off-by: Namhyung Kim <namhyung@kernel.org> -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQSo2x5BnqMqsoHtzsmMstVUGiXMgwUCZRIFKAAKCRCMstVUGiXM g/pXAP9HLB2s+beBTK5iQU4/NfqmAVSl303QCoR9xLByo38vfAEAlLiRIh061pTi PRlXVuY9bUQPyCSYsiBHv/fmLqdQdwU= =ti6G -----END PGP SIGNATURE----- Merge tag 'perf-tools-fixes-for-v6.6-1-2023-09-25' into perf-tools-next To pick up the 'perf bench sched-seccomp-notify' changes to allow us to continue build testing perf-tools-next with the set of distro containers, where some older ones don't have a recent enough seccomp.h UAPI header that contains defines needed by this new 'perf bench' workload. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
commit
87cd3d4819
@ -57,6 +57,7 @@
|
||||
|
||||
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
|
||||
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
|
||||
#define PRED_CMD_SBPB BIT(7) /* Selective Branch Prediction Barrier */
|
||||
|
||||
#define MSR_PPIN_CTL 0x0000004e
|
||||
#define MSR_PPIN 0x0000004f
|
||||
@ -155,6 +156,15 @@
|
||||
* Not susceptible to Post-Barrier
|
||||
* Return Stack Buffer Predictions.
|
||||
*/
|
||||
#define ARCH_CAP_GDS_CTRL BIT(25) /*
|
||||
* CPU is vulnerable to Gather
|
||||
* Data Sampling (GDS) and
|
||||
* has controls for mitigation.
|
||||
*/
|
||||
#define ARCH_CAP_GDS_NO BIT(26) /*
|
||||
* CPU is not vulnerable to Gather
|
||||
* Data Sampling (GDS).
|
||||
*/
|
||||
|
||||
#define ARCH_CAP_XAPIC_DISABLE BIT(21) /*
|
||||
* IA32_XAPIC_DISABLE_STATUS MSR
|
||||
@ -178,6 +188,8 @@
|
||||
#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */
|
||||
#define RTM_ALLOW BIT(1) /* TSX development mode */
|
||||
#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */
|
||||
#define GDS_MITG_DIS BIT(4) /* Disable GDS mitigation */
|
||||
#define GDS_MITG_LOCKED BIT(5) /* GDS mitigation locked */
|
||||
|
||||
#define MSR_IA32_SYSENTER_CS 0x00000174
|
||||
#define MSR_IA32_SYSENTER_ESP 0x00000175
|
||||
|
@ -820,8 +820,11 @@ __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
|
||||
#define __NR_cachestat 451
|
||||
__SYSCALL(__NR_cachestat, sys_cachestat)
|
||||
|
||||
#define __NR_fchmodat2 452
|
||||
__SYSCALL(__NR_fchmodat2, sys_fchmodat2)
|
||||
|
||||
#undef __NR_syscalls
|
||||
#define __NR_syscalls 452
|
||||
#define __NR_syscalls 453
|
||||
|
||||
/*
|
||||
* 32 bit systems traditionally used different
|
||||
|
@ -673,8 +673,11 @@ struct drm_gem_open {
|
||||
* Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT
|
||||
* and &DRM_PRIME_CAP_EXPORT.
|
||||
*
|
||||
* PRIME buffers are exposed as dma-buf file descriptors. See
|
||||
* Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing".
|
||||
* Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and
|
||||
* &DRM_PRIME_CAP_EXPORT are always advertised.
|
||||
*
|
||||
* PRIME buffers are exposed as dma-buf file descriptors.
|
||||
* See :ref:`prime_buffer_sharing`.
|
||||
*/
|
||||
#define DRM_CAP_PRIME 0x5
|
||||
/**
|
||||
@ -682,6 +685,8 @@ struct drm_gem_open {
|
||||
*
|
||||
* If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME
|
||||
* buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl.
|
||||
*
|
||||
* Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME.
|
||||
*/
|
||||
#define DRM_PRIME_CAP_IMPORT 0x1
|
||||
/**
|
||||
@ -689,6 +694,8 @@ struct drm_gem_open {
|
||||
*
|
||||
* If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME
|
||||
* buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl.
|
||||
*
|
||||
* Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME.
|
||||
*/
|
||||
#define DRM_PRIME_CAP_EXPORT 0x2
|
||||
/**
|
||||
@ -756,15 +763,14 @@ struct drm_gem_open {
|
||||
/**
|
||||
* DRM_CAP_SYNCOBJ
|
||||
*
|
||||
* If set to 1, the driver supports sync objects. See
|
||||
* Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".
|
||||
* If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`.
|
||||
*/
|
||||
#define DRM_CAP_SYNCOBJ 0x13
|
||||
/**
|
||||
* DRM_CAP_SYNCOBJ_TIMELINE
|
||||
*
|
||||
* If set to 1, the driver supports timeline operations on sync objects. See
|
||||
* Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".
|
||||
* :ref:`drm_sync_objects`.
|
||||
*/
|
||||
#define DRM_CAP_SYNCOBJ_TIMELINE 0x14
|
||||
|
||||
@ -909,6 +915,27 @@ struct drm_syncobj_timeline_wait {
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_syncobj_eventfd
|
||||
* @handle: syncobj handle.
|
||||
* @flags: Zero to wait for the point to be signalled, or
|
||||
* &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be
|
||||
* available for the point.
|
||||
* @point: syncobj timeline point (set to zero for binary syncobjs).
|
||||
* @fd: Existing eventfd to sent events to.
|
||||
* @pad: Must be zero.
|
||||
*
|
||||
* Register an eventfd to be signalled by a syncobj. The eventfd counter will
|
||||
* be incremented by one.
|
||||
*/
|
||||
struct drm_syncobj_eventfd {
|
||||
__u32 handle;
|
||||
__u32 flags;
|
||||
__u64 point;
|
||||
__s32 fd;
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
|
||||
struct drm_syncobj_array {
|
||||
__u64 handles;
|
||||
@ -1169,6 +1196,8 @@ extern "C" {
|
||||
*/
|
||||
#define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2)
|
||||
|
||||
#define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd)
|
||||
|
||||
/*
|
||||
* Device specific ioctls should only be in their respective headers
|
||||
* The device specific ioctl range is from 0x40 to 0x9f.
|
||||
@ -1180,25 +1209,50 @@ extern "C" {
|
||||
#define DRM_COMMAND_BASE 0x40
|
||||
#define DRM_COMMAND_END 0xA0
|
||||
|
||||
/*
|
||||
* Header for events written back to userspace on the drm fd. The
|
||||
* type defines the type of event, the length specifies the total
|
||||
* length of the event (including the header), and user_data is
|
||||
* typically a 64 bit value passed with the ioctl that triggered the
|
||||
* event. A read on the drm fd will always only return complete
|
||||
* events, that is, if for example the read buffer is 100 bytes, and
|
||||
* there are two 64 byte events pending, only one will be returned.
|
||||
/**
|
||||
* struct drm_event - Header for DRM events
|
||||
* @type: event type.
|
||||
* @length: total number of payload bytes (including header).
|
||||
*
|
||||
* Event types 0 - 0x7fffffff are generic drm events, 0x80000000 and
|
||||
* up are chipset specific.
|
||||
* This struct is a header for events written back to user-space on the DRM FD.
|
||||
* A read on the DRM FD will always only return complete events: e.g. if the
|
||||
* read buffer is 100 bytes large and there are two 64 byte events pending,
|
||||
* only one will be returned.
|
||||
*
|
||||
* Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and
|
||||
* up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK,
|
||||
* &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE.
|
||||
*/
|
||||
struct drm_event {
|
||||
__u32 type;
|
||||
__u32 length;
|
||||
};
|
||||
|
||||
/**
|
||||
* DRM_EVENT_VBLANK - vertical blanking event
|
||||
*
|
||||
* This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the
|
||||
* &_DRM_VBLANK_EVENT flag set.
|
||||
*
|
||||
* The event payload is a struct drm_event_vblank.
|
||||
*/
|
||||
#define DRM_EVENT_VBLANK 0x01
|
||||
/**
|
||||
* DRM_EVENT_FLIP_COMPLETE - page-flip completion event
|
||||
*
|
||||
* This event is sent in response to an atomic commit or legacy page-flip with
|
||||
* the &DRM_MODE_PAGE_FLIP_EVENT flag set.
|
||||
*
|
||||
* The event payload is a struct drm_event_vblank.
|
||||
*/
|
||||
#define DRM_EVENT_FLIP_COMPLETE 0x02
|
||||
/**
|
||||
* DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event
|
||||
*
|
||||
* This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE.
|
||||
*
|
||||
* The event payload is a struct drm_event_crtc_sequence.
|
||||
*/
|
||||
#define DRM_EVENT_CRTC_SEQUENCE 0x03
|
||||
|
||||
struct drm_event_vblank {
|
||||
|
157
tools/include/uapi/linux/seccomp.h
Normal file
157
tools/include/uapi/linux/seccomp.h
Normal file
@ -0,0 +1,157 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
#ifndef _UAPI_LINUX_SECCOMP_H
|
||||
#define _UAPI_LINUX_SECCOMP_H
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
|
||||
/* Valid values for seccomp.mode and prctl(PR_SET_SECCOMP, <mode>) */
|
||||
#define SECCOMP_MODE_DISABLED 0 /* seccomp is not in use. */
|
||||
#define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */
|
||||
#define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */
|
||||
|
||||
/* Valid operations for seccomp syscall. */
|
||||
#define SECCOMP_SET_MODE_STRICT 0
|
||||
#define SECCOMP_SET_MODE_FILTER 1
|
||||
#define SECCOMP_GET_ACTION_AVAIL 2
|
||||
#define SECCOMP_GET_NOTIF_SIZES 3
|
||||
|
||||
/* Valid flags for SECCOMP_SET_MODE_FILTER */
|
||||
#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
|
||||
#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
|
||||
#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
|
||||
#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
|
||||
#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
|
||||
/* Received notifications wait in killable state (only respond to fatal signals) */
|
||||
#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
|
||||
|
||||
/*
|
||||
* All BPF programs must return a 32-bit value.
|
||||
* The bottom 16-bits are for optional return data.
|
||||
* The upper 16-bits are ordered from least permissive values to most,
|
||||
* as a signed value (so 0x8000000 is negative).
|
||||
*
|
||||
* The ordering ensures that a min_t() over composed return values always
|
||||
* selects the least permissive choice.
|
||||
*/
|
||||
#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
|
||||
#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */
|
||||
#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD
|
||||
#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */
|
||||
#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */
|
||||
#define SECCOMP_RET_USER_NOTIF 0x7fc00000U /* notifies userspace */
|
||||
#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */
|
||||
#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */
|
||||
#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
|
||||
|
||||
/* Masks for the return value sections. */
|
||||
#define SECCOMP_RET_ACTION_FULL 0xffff0000U
|
||||
#define SECCOMP_RET_ACTION 0x7fff0000U
|
||||
#define SECCOMP_RET_DATA 0x0000ffffU
|
||||
|
||||
/**
|
||||
* struct seccomp_data - the format the BPF program executes over.
|
||||
* @nr: the system call number
|
||||
* @arch: indicates system call convention as an AUDIT_ARCH_* value
|
||||
* as defined in <linux/audit.h>.
|
||||
* @instruction_pointer: at the time of the system call.
|
||||
* @args: up to 6 system call arguments always stored as 64-bit values
|
||||
* regardless of the architecture.
|
||||
*/
|
||||
struct seccomp_data {
|
||||
int nr;
|
||||
__u32 arch;
|
||||
__u64 instruction_pointer;
|
||||
__u64 args[6];
|
||||
};
|
||||
|
||||
struct seccomp_notif_sizes {
|
||||
__u16 seccomp_notif;
|
||||
__u16 seccomp_notif_resp;
|
||||
__u16 seccomp_data;
|
||||
};
|
||||
|
||||
struct seccomp_notif {
|
||||
__u64 id;
|
||||
__u32 pid;
|
||||
__u32 flags;
|
||||
struct seccomp_data data;
|
||||
};
|
||||
|
||||
/*
|
||||
* Valid flags for struct seccomp_notif_resp
|
||||
*
|
||||
* Note, the SECCOMP_USER_NOTIF_FLAG_CONTINUE flag must be used with caution!
|
||||
* If set by the process supervising the syscalls of another process the
|
||||
* syscall will continue. This is problematic because of an inherent TOCTOU.
|
||||
* An attacker can exploit the time while the supervised process is waiting on
|
||||
* a response from the supervising process to rewrite syscall arguments which
|
||||
* are passed as pointers of the intercepted syscall.
|
||||
* It should be absolutely clear that this means that the seccomp notifier
|
||||
* _cannot_ be used to implement a security policy! It should only ever be used
|
||||
* in scenarios where a more privileged process supervises the syscalls of a
|
||||
* lesser privileged process to get around kernel-enforced security
|
||||
* restrictions when the privileged process deems this safe. In other words,
|
||||
* in order to continue a syscall the supervising process should be sure that
|
||||
* another security mechanism or the kernel itself will sufficiently block
|
||||
* syscalls if arguments are rewritten to something unsafe.
|
||||
*
|
||||
* Similar precautions should be applied when stacking SECCOMP_RET_USER_NOTIF
|
||||
* or SECCOMP_RET_TRACE. For SECCOMP_RET_USER_NOTIF filters acting on the
|
||||
* same syscall, the most recently added filter takes precedence. This means
|
||||
* that the new SECCOMP_RET_USER_NOTIF filter can override any
|
||||
* SECCOMP_IOCTL_NOTIF_SEND from earlier filters, essentially allowing all
|
||||
* such filtered syscalls to be executed by sending the response
|
||||
* SECCOMP_USER_NOTIF_FLAG_CONTINUE. Note that SECCOMP_RET_TRACE can equally
|
||||
* be overriden by SECCOMP_USER_NOTIF_FLAG_CONTINUE.
|
||||
*/
|
||||
#define SECCOMP_USER_NOTIF_FLAG_CONTINUE (1UL << 0)
|
||||
|
||||
struct seccomp_notif_resp {
|
||||
__u64 id;
|
||||
__s64 val;
|
||||
__s32 error;
|
||||
__u32 flags;
|
||||
};
|
||||
|
||||
#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
|
||||
|
||||
/* valid flags for seccomp_notif_addfd */
|
||||
#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
|
||||
#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
|
||||
|
||||
/**
|
||||
* struct seccomp_notif_addfd
|
||||
* @id: The ID of the seccomp notification
|
||||
* @flags: SECCOMP_ADDFD_FLAG_*
|
||||
* @srcfd: The local fd number
|
||||
* @newfd: Optional remote FD number if SETFD option is set, otherwise 0.
|
||||
* @newfd_flags: The O_* flags the remote FD should have applied
|
||||
*/
|
||||
struct seccomp_notif_addfd {
|
||||
__u64 id;
|
||||
__u32 flags;
|
||||
__u32 srcfd;
|
||||
__u32 newfd;
|
||||
__u32 newfd_flags;
|
||||
};
|
||||
|
||||
#define SECCOMP_IOC_MAGIC '!'
|
||||
#define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr)
|
||||
#define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type)
|
||||
#define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type)
|
||||
#define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type)
|
||||
|
||||
/* Flags for seccomp notification fd ioctl. */
|
||||
#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif)
|
||||
#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \
|
||||
struct seccomp_notif_resp)
|
||||
#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64)
|
||||
/* On success, the return value is the remote process's added fd number */
|
||||
#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \
|
||||
struct seccomp_notif_addfd)
|
||||
|
||||
#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64)
|
||||
|
||||
#endif /* _UAPI_LINUX_SECCOMP_H */
|
@ -366,3 +366,4 @@
|
||||
449 n64 futex_waitv sys_futex_waitv
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 n64 cachestat sys_cachestat
|
||||
452 n64 fchmodat2 sys_fchmodat2
|
||||
|
@ -538,3 +538,4 @@
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
452 common fchmodat2 sys_fchmodat2
|
||||
|
@ -454,3 +454,4 @@
|
||||
449 common futex_waitv sys_futex_waitv sys_futex_waitv
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat sys_cachestat
|
||||
452 common fchmodat2 sys_fchmodat2 sys_fchmodat2
|
||||
|
@ -373,6 +373,8 @@
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
452 common fchmodat2 sys_fchmodat2
|
||||
453 64 map_shadow_stack sys_map_shadow_stack
|
||||
|
||||
#
|
||||
# Due to a historical design error, certain syscalls are numbered differently
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/time64.h>
|
||||
#include <linux/seccomp.h>
|
||||
#include <uapi/linux/seccomp.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
#include <unistd.h>
|
||||
|
@ -21,6 +21,7 @@ FILES=(
|
||||
"include/uapi/linux/perf_event.h"
|
||||
"include/uapi/linux/prctl.h"
|
||||
"include/uapi/linux/sched.h"
|
||||
"include/uapi/linux/seccomp.h"
|
||||
"include/uapi/linux/stat.h"
|
||||
"include/uapi/linux/usbdevice_fs.h"
|
||||
"include/uapi/linux/vhost.h"
|
||||
|
@ -1000,7 +1000,7 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
|
||||
}
|
||||
}
|
||||
free(cpuid);
|
||||
if (!pmu)
|
||||
if (!pmu || !table)
|
||||
return table;
|
||||
|
||||
for (i = 0; i < table->num_pmus; i++) {
|
||||
|
@ -413,10 +413,10 @@ def has_event(event: Event) -> Function:
|
||||
# pylint: disable=invalid-name
|
||||
return Function('has_event', event)
|
||||
|
||||
def strcmp_cpuid_str(event: str) -> Function:
|
||||
def strcmp_cpuid_str(cpuid: Event) -> Function:
|
||||
# pylint: disable=redefined-builtin
|
||||
# pylint: disable=invalid-name
|
||||
return Function('strcmp_cpuid_str', event)
|
||||
return Function('strcmp_cpuid_str', cpuid)
|
||||
|
||||
class Metric:
|
||||
"""An individual metric that will specifiable on the perf command line."""
|
||||
|
@ -1,508 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* bpf-prologue.c
|
||||
*
|
||||
* Copyright (C) 2015 He Kuang <hekuang@huawei.com>
|
||||
* Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
|
||||
* Copyright (C) 2015 Huawei Inc.
|
||||
*/
|
||||
|
||||
#include <bpf/libbpf.h>
|
||||
#include "debug.h"
|
||||
#include "bpf-loader.h"
|
||||
#include "bpf-prologue.h"
|
||||
#include "probe-finder.h"
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <dwarf-regs.h>
|
||||
#include <linux/filter.h>
|
||||
|
||||
#define BPF_REG_SIZE 8
|
||||
|
||||
#define JMP_TO_ERROR_CODE -1
|
||||
#define JMP_TO_SUCCESS_CODE -2
|
||||
#define JMP_TO_USER_CODE -3
|
||||
|
||||
struct bpf_insn_pos {
|
||||
struct bpf_insn *begin;
|
||||
struct bpf_insn *end;
|
||||
struct bpf_insn *pos;
|
||||
};
|
||||
|
||||
static inline int
|
||||
pos_get_cnt(struct bpf_insn_pos *pos)
|
||||
{
|
||||
return pos->pos - pos->begin;
|
||||
}
|
||||
|
||||
static int
|
||||
append_insn(struct bpf_insn new_insn, struct bpf_insn_pos *pos)
|
||||
{
|
||||
if (!pos->pos)
|
||||
return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
|
||||
|
||||
if (pos->pos + 1 >= pos->end) {
|
||||
pr_err("bpf prologue: prologue too long\n");
|
||||
pos->pos = NULL;
|
||||
return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
|
||||
}
|
||||
|
||||
*(pos->pos)++ = new_insn;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
check_pos(struct bpf_insn_pos *pos)
|
||||
{
|
||||
if (!pos->pos || pos->pos >= pos->end)
|
||||
return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert type string (u8/u16/u32/u64/s8/s16/s32/s64 ..., see
|
||||
* Documentation/trace/kprobetrace.rst) to size field of BPF_LDX_MEM
|
||||
* instruction (BPF_{B,H,W,DW}).
|
||||
*/
|
||||
static int
|
||||
argtype_to_ldx_size(const char *type)
|
||||
{
|
||||
int arg_size = type ? atoi(&type[1]) : 64;
|
||||
|
||||
switch (arg_size) {
|
||||
case 8:
|
||||
return BPF_B;
|
||||
case 16:
|
||||
return BPF_H;
|
||||
case 32:
|
||||
return BPF_W;
|
||||
case 64:
|
||||
default:
|
||||
return BPF_DW;
|
||||
}
|
||||
}
|
||||
|
||||
static const char *
|
||||
insn_sz_to_str(int insn_sz)
|
||||
{
|
||||
switch (insn_sz) {
|
||||
case BPF_B:
|
||||
return "BPF_B";
|
||||
case BPF_H:
|
||||
return "BPF_H";
|
||||
case BPF_W:
|
||||
return "BPF_W";
|
||||
case BPF_DW:
|
||||
return "BPF_DW";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
/* Give it a shorter name */
|
||||
#define ins(i, p) append_insn((i), (p))
|
||||
|
||||
/*
|
||||
* Give a register name (in 'reg'), generate instruction to
|
||||
* load register into an eBPF register rd:
|
||||
* 'ldd target_reg, offset(ctx_reg)', where:
|
||||
* ctx_reg is pre initialized to pointer of 'struct pt_regs'.
|
||||
*/
|
||||
static int
|
||||
gen_ldx_reg_from_ctx(struct bpf_insn_pos *pos, int ctx_reg,
|
||||
const char *reg, int target_reg)
|
||||
{
|
||||
int offset = regs_query_register_offset(reg);
|
||||
|
||||
if (offset < 0) {
|
||||
pr_err("bpf: prologue: failed to get register %s\n",
|
||||
reg);
|
||||
return offset;
|
||||
}
|
||||
ins(BPF_LDX_MEM(BPF_DW, target_reg, ctx_reg, offset), pos);
|
||||
|
||||
return check_pos(pos);
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a BPF_FUNC_probe_read function call.
|
||||
*
|
||||
* src_base_addr_reg is a register holding base address,
|
||||
* dst_addr_reg is a register holding dest address (on stack),
|
||||
* result is:
|
||||
*
|
||||
* *[dst_addr_reg] = *([src_base_addr_reg] + offset)
|
||||
*
|
||||
* Arguments of BPF_FUNC_probe_read:
|
||||
* ARG1: ptr to stack (dest)
|
||||
* ARG2: size (8)
|
||||
* ARG3: unsafe ptr (src)
|
||||
*/
|
||||
static int
|
||||
gen_read_mem(struct bpf_insn_pos *pos,
|
||||
int src_base_addr_reg,
|
||||
int dst_addr_reg,
|
||||
long offset,
|
||||
int probeid)
|
||||
{
|
||||
/* mov arg3, src_base_addr_reg */
|
||||
if (src_base_addr_reg != BPF_REG_ARG3)
|
||||
ins(BPF_MOV64_REG(BPF_REG_ARG3, src_base_addr_reg), pos);
|
||||
/* add arg3, #offset */
|
||||
if (offset)
|
||||
ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG3, offset), pos);
|
||||
|
||||
/* mov arg2, #reg_size */
|
||||
ins(BPF_ALU64_IMM(BPF_MOV, BPF_REG_ARG2, BPF_REG_SIZE), pos);
|
||||
|
||||
/* mov arg1, dst_addr_reg */
|
||||
if (dst_addr_reg != BPF_REG_ARG1)
|
||||
ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos);
|
||||
|
||||
/* Call probe_read */
|
||||
ins(BPF_EMIT_CALL(probeid), pos);
|
||||
/*
|
||||
* Error processing: if read fail, goto error code,
|
||||
* will be relocated. Target should be the start of
|
||||
* error processing code.
|
||||
*/
|
||||
ins(BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, JMP_TO_ERROR_CODE),
|
||||
pos);
|
||||
|
||||
return check_pos(pos);
|
||||
}
|
||||
|
||||
/*
|
||||
* Each arg should be bare register. Fetch and save them into argument
|
||||
* registers (r3 - r5).
|
||||
*
|
||||
* BPF_REG_1 should have been initialized with pointer to
|
||||
* 'struct pt_regs'.
|
||||
*/
|
||||
static int
|
||||
gen_prologue_fastpath(struct bpf_insn_pos *pos,
|
||||
struct probe_trace_arg *args, int nargs)
|
||||
{
|
||||
int i, err = 0;
|
||||
|
||||
for (i = 0; i < nargs; i++) {
|
||||
err = gen_ldx_reg_from_ctx(pos, BPF_REG_1, args[i].value,
|
||||
BPF_PROLOGUE_START_ARG_REG + i);
|
||||
if (err)
|
||||
goto errout;
|
||||
}
|
||||
|
||||
return check_pos(pos);
|
||||
errout:
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Slow path:
|
||||
* At least one argument has the form of 'offset($rx)'.
|
||||
*
|
||||
* Following code first stores them into stack, then loads all of then
|
||||
* to r2 - r5.
|
||||
* Before final loading, the final result should be:
|
||||
*
|
||||
* low address
|
||||
* BPF_REG_FP - 24 ARG3
|
||||
* BPF_REG_FP - 16 ARG2
|
||||
* BPF_REG_FP - 8 ARG1
|
||||
* BPF_REG_FP
|
||||
* high address
|
||||
*
|
||||
* For each argument (described as: offn(...off2(off1(reg)))),
|
||||
* generates following code:
|
||||
*
|
||||
* r7 <- fp
|
||||
* r7 <- r7 - stack_offset // Ideal code should initialize r7 using
|
||||
* // fp before generating args. However,
|
||||
* // eBPF won't regard r7 as stack pointer
|
||||
* // if it is generated by minus 8 from
|
||||
* // another stack pointer except fp.
|
||||
* // This is why we have to set r7
|
||||
* // to fp for each variable.
|
||||
* r3 <- value of 'reg'-> generated using gen_ldx_reg_from_ctx()
|
||||
* (r7) <- r3 // skip following instructions for bare reg
|
||||
* r3 <- r3 + off1 . // skip if off1 == 0
|
||||
* r2 <- 8 \
|
||||
* r1 <- r7 |-> generated by gen_read_mem()
|
||||
* call probe_read /
|
||||
* jnei r0, 0, err ./
|
||||
* r3 <- (r7)
|
||||
* r3 <- r3 + off2 . // skip if off2 == 0
|
||||
* r2 <- 8 \ // r2 may be broken by probe_read, so set again
|
||||
* r1 <- r7 |-> generated by gen_read_mem()
|
||||
* call probe_read /
|
||||
* jnei r0, 0, err ./
|
||||
* ...
|
||||
*/
|
||||
static int
|
||||
gen_prologue_slowpath(struct bpf_insn_pos *pos,
|
||||
struct probe_trace_arg *args, int nargs)
|
||||
{
|
||||
int err, i, probeid;
|
||||
|
||||
for (i = 0; i < nargs; i++) {
|
||||
struct probe_trace_arg *arg = &args[i];
|
||||
const char *reg = arg->value;
|
||||
struct probe_trace_arg_ref *ref = NULL;
|
||||
int stack_offset = (i + 1) * -8;
|
||||
|
||||
pr_debug("prologue: fetch arg %d, base reg is %s\n",
|
||||
i, reg);
|
||||
|
||||
/* value of base register is stored into ARG3 */
|
||||
err = gen_ldx_reg_from_ctx(pos, BPF_REG_CTX, reg,
|
||||
BPF_REG_ARG3);
|
||||
if (err) {
|
||||
pr_err("prologue: failed to get offset of register %s\n",
|
||||
reg);
|
||||
goto errout;
|
||||
}
|
||||
|
||||
/* Make r7 the stack pointer. */
|
||||
ins(BPF_MOV64_REG(BPF_REG_7, BPF_REG_FP), pos);
|
||||
/* r7 += -8 */
|
||||
ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, stack_offset), pos);
|
||||
/*
|
||||
* Store r3 (base register) onto stack
|
||||
* Ensure fp[offset] is set.
|
||||
* fp is the only valid base register when storing
|
||||
* into stack. We are not allowed to use r7 as base
|
||||
* register here.
|
||||
*/
|
||||
ins(BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_ARG3,
|
||||
stack_offset), pos);
|
||||
|
||||
ref = arg->ref;
|
||||
probeid = BPF_FUNC_probe_read_kernel;
|
||||
while (ref) {
|
||||
pr_debug("prologue: arg %d: offset %ld\n",
|
||||
i, ref->offset);
|
||||
|
||||
if (ref->user_access)
|
||||
probeid = BPF_FUNC_probe_read_user;
|
||||
|
||||
err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7,
|
||||
ref->offset, probeid);
|
||||
if (err) {
|
||||
pr_err("prologue: failed to generate probe_read function call\n");
|
||||
goto errout;
|
||||
}
|
||||
|
||||
ref = ref->next;
|
||||
/*
|
||||
* Load previous result into ARG3. Use
|
||||
* BPF_REG_FP instead of r7 because verifier
|
||||
* allows FP based addressing only.
|
||||
*/
|
||||
if (ref)
|
||||
ins(BPF_LDX_MEM(BPF_DW, BPF_REG_ARG3,
|
||||
BPF_REG_FP, stack_offset), pos);
|
||||
}
|
||||
}
|
||||
|
||||
/* Final pass: read to registers */
|
||||
for (i = 0; i < nargs; i++) {
|
||||
int insn_sz = (args[i].ref) ? argtype_to_ldx_size(args[i].type) : BPF_DW;
|
||||
|
||||
pr_debug("prologue: load arg %d, insn_sz is %s\n",
|
||||
i, insn_sz_to_str(insn_sz));
|
||||
ins(BPF_LDX_MEM(insn_sz, BPF_PROLOGUE_START_ARG_REG + i,
|
||||
BPF_REG_FP, -BPF_REG_SIZE * (i + 1)), pos);
|
||||
}
|
||||
|
||||
ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_SUCCESS_CODE), pos);
|
||||
|
||||
return check_pos(pos);
|
||||
errout:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int
|
||||
prologue_relocate(struct bpf_insn_pos *pos, struct bpf_insn *error_code,
|
||||
struct bpf_insn *success_code, struct bpf_insn *user_code)
|
||||
{
|
||||
struct bpf_insn *insn;
|
||||
|
||||
if (check_pos(pos))
|
||||
return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
|
||||
|
||||
for (insn = pos->begin; insn < pos->pos; insn++) {
|
||||
struct bpf_insn *target;
|
||||
u8 class = BPF_CLASS(insn->code);
|
||||
u8 opcode;
|
||||
|
||||
if (class != BPF_JMP)
|
||||
continue;
|
||||
opcode = BPF_OP(insn->code);
|
||||
if (opcode == BPF_CALL)
|
||||
continue;
|
||||
|
||||
switch (insn->off) {
|
||||
case JMP_TO_ERROR_CODE:
|
||||
target = error_code;
|
||||
break;
|
||||
case JMP_TO_SUCCESS_CODE:
|
||||
target = success_code;
|
||||
break;
|
||||
case JMP_TO_USER_CODE:
|
||||
target = user_code;
|
||||
break;
|
||||
default:
|
||||
pr_err("bpf prologue: internal error: relocation failed\n");
|
||||
return -BPF_LOADER_ERRNO__PROLOGUE;
|
||||
}
|
||||
|
||||
insn->off = target - (insn + 1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
|
||||
struct bpf_insn *new_prog, size_t *new_cnt,
|
||||
size_t cnt_space)
|
||||
{
|
||||
struct bpf_insn *success_code = NULL;
|
||||
struct bpf_insn *error_code = NULL;
|
||||
struct bpf_insn *user_code = NULL;
|
||||
struct bpf_insn_pos pos;
|
||||
bool fastpath = true;
|
||||
int err = 0, i;
|
||||
|
||||
if (!new_prog || !new_cnt)
|
||||
return -EINVAL;
|
||||
|
||||
if (cnt_space > BPF_MAXINSNS)
|
||||
cnt_space = BPF_MAXINSNS;
|
||||
|
||||
pos.begin = new_prog;
|
||||
pos.end = new_prog + cnt_space;
|
||||
pos.pos = new_prog;
|
||||
|
||||
if (!nargs) {
|
||||
ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0),
|
||||
&pos);
|
||||
|
||||
if (check_pos(&pos))
|
||||
goto errout;
|
||||
|
||||
*new_cnt = pos_get_cnt(&pos);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (nargs > BPF_PROLOGUE_MAX_ARGS) {
|
||||
pr_warning("bpf: prologue: %d arguments are dropped\n",
|
||||
nargs - BPF_PROLOGUE_MAX_ARGS);
|
||||
nargs = BPF_PROLOGUE_MAX_ARGS;
|
||||
}
|
||||
|
||||
/* First pass: validation */
|
||||
for (i = 0; i < nargs; i++) {
|
||||
struct probe_trace_arg_ref *ref = args[i].ref;
|
||||
|
||||
if (args[i].value[0] == '@') {
|
||||
/* TODO: fetch global variable */
|
||||
pr_err("bpf: prologue: global %s%+ld not support\n",
|
||||
args[i].value, ref ? ref->offset : 0);
|
||||
return -ENOTSUP;
|
||||
}
|
||||
|
||||
while (ref) {
|
||||
/* fastpath is true if all args has ref == NULL */
|
||||
fastpath = false;
|
||||
|
||||
/*
|
||||
* Instruction encodes immediate value using
|
||||
* s32, ref->offset is long. On systems which
|
||||
* can't fill long in s32, refuse to process if
|
||||
* ref->offset too large (or small).
|
||||
*/
|
||||
#ifdef __LP64__
|
||||
#define OFFSET_MAX ((1LL << 31) - 1)
|
||||
#define OFFSET_MIN ((1LL << 31) * -1)
|
||||
if (ref->offset > OFFSET_MAX ||
|
||||
ref->offset < OFFSET_MIN) {
|
||||
pr_err("bpf: prologue: offset out of bound: %ld\n",
|
||||
ref->offset);
|
||||
return -BPF_LOADER_ERRNO__PROLOGUEOOB;
|
||||
}
|
||||
#endif
|
||||
ref = ref->next;
|
||||
}
|
||||
}
|
||||
pr_debug("prologue: pass validation\n");
|
||||
|
||||
if (fastpath) {
|
||||
/* If all variables are registers... */
|
||||
pr_debug("prologue: fast path\n");
|
||||
err = gen_prologue_fastpath(&pos, args, nargs);
|
||||
if (err)
|
||||
goto errout;
|
||||
} else {
|
||||
pr_debug("prologue: slow path\n");
|
||||
|
||||
/* Initialization: move ctx to a callee saved register. */
|
||||
ins(BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1), &pos);
|
||||
|
||||
err = gen_prologue_slowpath(&pos, args, nargs);
|
||||
if (err)
|
||||
goto errout;
|
||||
/*
|
||||
* start of ERROR_CODE (only slow pass needs error code)
|
||||
* mov r2 <- 1 // r2 is error number
|
||||
* mov r3 <- 0 // r3, r4... should be touched or
|
||||
* // verifier would complain
|
||||
* mov r4 <- 0
|
||||
* ...
|
||||
* goto usercode
|
||||
*/
|
||||
error_code = pos.pos;
|
||||
ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 1),
|
||||
&pos);
|
||||
|
||||
for (i = 0; i < nargs; i++)
|
||||
ins(BPF_ALU64_IMM(BPF_MOV,
|
||||
BPF_PROLOGUE_START_ARG_REG + i,
|
||||
0),
|
||||
&pos);
|
||||
ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_USER_CODE),
|
||||
&pos);
|
||||
}
|
||||
|
||||
/*
|
||||
* start of SUCCESS_CODE:
|
||||
* mov r2 <- 0
|
||||
* goto usercode // skip
|
||||
*/
|
||||
success_code = pos.pos;
|
||||
ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), &pos);
|
||||
|
||||
/*
|
||||
* start of USER_CODE:
|
||||
* Restore ctx to r1
|
||||
*/
|
||||
user_code = pos.pos;
|
||||
if (!fastpath) {
|
||||
/*
|
||||
* Only slow path needs restoring of ctx. In fast path,
|
||||
* register are loaded directly from r1.
|
||||
*/
|
||||
ins(BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX), &pos);
|
||||
err = prologue_relocate(&pos, error_code, success_code,
|
||||
user_code);
|
||||
if (err)
|
||||
goto errout;
|
||||
}
|
||||
|
||||
err = check_pos(&pos);
|
||||
if (err)
|
||||
goto errout;
|
||||
|
||||
*new_cnt = pos_get_cnt(&pos);
|
||||
return 0;
|
||||
errout:
|
||||
return err;
|
||||
}
|
@ -23,7 +23,9 @@
|
||||
#define MAX_CPUS 4096
|
||||
|
||||
// FIXME: These should come from system headers
|
||||
#ifndef bool
|
||||
typedef char bool;
|
||||
#endif
|
||||
typedef int pid_t;
|
||||
typedef long long int __s64;
|
||||
typedef __s64 time64_t;
|
||||
|
@ -80,16 +80,6 @@ struct hashmap {
|
||||
size_t sz;
|
||||
};
|
||||
|
||||
#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \
|
||||
.hash_fn = (hash_fn), \
|
||||
.equal_fn = (equal_fn), \
|
||||
.ctx = (ctx), \
|
||||
.buckets = NULL, \
|
||||
.cap = 0, \
|
||||
.cap_bits = 0, \
|
||||
.sz = 0, \
|
||||
}
|
||||
|
||||
void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
|
||||
hashmap_equal_fn equal_fn, void *ctx);
|
||||
struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
|
||||
|
@ -521,7 +521,7 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
|
||||
pmu_name = pe->pmu;
|
||||
}
|
||||
|
||||
alias = malloc(sizeof(*alias));
|
||||
alias = zalloc(sizeof(*alias));
|
||||
if (!alias)
|
||||
return -ENOMEM;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user