bpf-next-6.12

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEE+soXsSLHKoYyzcli6rmadz2vbToFAmbk/nIACgkQ6rmadz2v
 bTqxuBAAnqW81Rr0nORIxeJMbyo4EiFuYHGk6u5BYP9NPzqHroUPCLVmSP7Hp/Ta
 CJjsiZeivZsGa6Qlc3BCa4hHNpqP5WE1C/73svSDn7/99EfxdSBtirpMVFUPsUtn
 DDb5chNpvnxKNS8Mw5Ty8wBrdbXHMlSx+IfaFHpv0Yn6EAcuF4UdoEUq2l3PqhfD
 Il9Zm127eViPGAP+o+TBZFfW+rRw8d0ngqeRq2GvJ8ibNEDWss+GmBI1Dod7d+fC
 dUDg96Ipdm1a5Xz7dnH80eXz9JHdpu6qhQrQMKKArnlpJElrKiOf9b17ZcJoPQOR
 ZnstEnUyVnrWROZxUuKY72+2tx3TuSf+L9uZqFHNx3Ix5FIoS+tFbHf4b8SxtsOb
 hb2X7SigdGqhQDxUT+IPeO5hsJlIvG1/VYxMXxgc++rh9DjL06hDLUSH1WBSU0fC
 kFQ7HrcpAlVHtWmGbwwUyVjD+KC/qmZBTAnkcYT4C62WZVytSCnihIuSFAvV1tpZ
 SSIhVPyQ599UoZIiQYihp0S4qP74FotCtErWSrThneh2Cl8kDsRq//lV1nj/PTV8
 CpTvz4VCFDFTgthCfd62fP95EwW5K+aE3NjGTPW/9Hx/0+J/1tT+yqWsrToGaruf
 TbrqtzQhpclz9UEqA+696cVAXNj9uRU4AoD3YIg72kVnRlkgYd0=
 =MDwh
 -----END PGP SIGNATURE-----

Merge tag 'bpf-next-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Pull bpf updates from Alexei Starovoitov:

 - Introduce '__attribute__((bpf_fastcall))' for helpers and kfuncs with
   corresponding support in LLVM.

   It is similar to existing 'no_caller_saved_registers' attribute in
   GCC/LLVM with a provision for backward compatibility. It allows
   compilers generate more efficient BPF code assuming the verifier or
   JITs will inline or partially inline a helper/kfunc with such
   attribute. bpf_cast_to_kern_ctx, bpf_rdonly_cast,
   bpf_get_smp_processor_id are the first set of such helpers.

 - Harden and extend ELF build ID parsing logic.

   When called from sleepable context the relevants parts of ELF file
   will be read to find and fetch .note.gnu.build-id information. Also
   harden the logic to avoid TOCTOU, overflow, out-of-bounds problems.

 - Improvements and fixes for sched-ext:
    - Allow passing BPF iterators as kfunc arguments
    - Make the pointer returned from iter_next method trusted
    - Fix x86 JIT convergence issue due to growing/shrinking conditional
      jumps in variable length encoding

 - BPF_LSM related:
    - Introduce few VFS kfuncs and consolidate them in
      fs/bpf_fs_kfuncs.c
    - Enforce correct range of return values from certain LSM hooks
    - Disallow attaching to other LSM hooks

 - Prerequisite work for upcoming Qdisc in BPF:
    - Allow kptrs in program provided structs
    - Support for gen_epilogue in verifier_ops

 - Important fixes:
    - Fix uprobe multi pid filter check
    - Fix bpf_strtol and bpf_strtoul helpers
    - Track equal scalars history on per-instruction level
    - Fix tailcall hierarchy on x86 and arm64
    - Fix signed division overflow to prevent INT_MIN/-1 trap on x86
    - Fix get kernel stack in BPF progs attached to tracepoint:syscall

 - Selftests:
    - Add uprobe bench/stress tool
    - Generate file dependencies to drastically improve re-build time
    - Match JIT-ed and BPF asm with __xlated/__jited keywords
    - Convert older tests to test_progs framework
    - Add support for RISC-V
    - Few fixes when BPF programs are compiled with GCC-BPF backend
      (support for GCC-BPF in BPF CI is ongoing in parallel)
    - Add traffic monitor
    - Enable cross compile and musl libc

* tag 'bpf-next-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (260 commits)
  btf: require pahole 1.21+ for DEBUG_INFO_BTF with default DWARF version
  btf: move pahole check in scripts/link-vmlinux.sh to lib/Kconfig.debug
  btf: remove redundant CONFIG_BPF test in scripts/link-vmlinux.sh
  bpf: Call the missed kfree() when there is no special field in btf
  bpf: Call the missed btf_record_free() when map creation fails
  selftests/bpf: Add a test case to write mtu result into .rodata
  selftests/bpf: Add a test case to write strtol result into .rodata
  selftests/bpf: Rename ARG_PTR_TO_LONG test description
  selftests/bpf: Fix ARG_PTR_TO_LONG {half-,}uninitialized test
  bpf: Zero former ARG_PTR_TO_{LONG,INT} args in case of error
  bpf: Improve check_raw_mode_ok test for MEM_UNINIT-tagged types
  bpf: Fix helper writes to read-only maps
  bpf: Remove truncation test in bpf_strtol and bpf_strtoul helpers
  bpf: Fix bpf_strtol and bpf_strtoul helpers for 32bit
  selftests/bpf: Add tests for sdiv/smod overflow cases
  bpf: Fix a sdiv overflow issue
  libbpf: Add bpf_object__token_fd accessor
  docs/bpf: Add missing BPF program types to docs
  docs/bpf: Add constant values for linkages
  bpf: Use fake pt_regs when doing bpf syscall tracepoint tracing
  ...
This commit is contained in:
Linus Torvalds 2024-09-21 09:27:50 -07:00
commit 440b652328
249 changed files with 11479 additions and 3097 deletions

View File

@ -368,7 +368,7 @@ No additional type data follow ``btf_type``.
* ``info.kind_flag``: 0
* ``info.kind``: BTF_KIND_FUNC
* ``info.vlen``: linkage information (BTF_FUNC_STATIC, BTF_FUNC_GLOBAL
or BTF_FUNC_EXTERN)
or BTF_FUNC_EXTERN - see :ref:`BTF_Function_Linkage_Constants`)
* ``type``: a BTF_KIND_FUNC_PROTO type
No additional type data follow ``btf_type``.
@ -424,9 +424,8 @@ following data::
__u32 linkage;
};
``struct btf_var`` encoding:
* ``linkage``: currently only static variable 0, or globally allocated
variable in ELF sections 1
``btf_var.linkage`` may take the values: BTF_VAR_STATIC, BTF_VAR_GLOBAL_ALLOCATED or BTF_VAR_GLOBAL_EXTERN -
see :ref:`BTF_Var_Linkage_Constants`.
Not all type of global variables are supported by LLVM at this point.
The following is currently available:
@ -549,6 +548,38 @@ The ``btf_enum64`` encoding:
If the original enum value is signed and the size is less than 8,
that value will be sign extended into 8 bytes.
2.3 Constant Values
-------------------
.. _BTF_Function_Linkage_Constants:
2.3.1 Function Linkage Constant Values
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. table:: Function Linkage Values and Meanings
=================== ===== ===========
kind value description
=================== ===== ===========
``BTF_FUNC_STATIC`` 0x0 definition of subprogram not visible outside containing compilation unit
``BTF_FUNC_GLOBAL`` 0x1 definition of subprogram visible outside containing compilation unit
``BTF_FUNC_EXTERN`` 0x2 declaration of a subprogram whose definition is outside the containing compilation unit
=================== ===== ===========
.. _BTF_Var_Linkage_Constants:
2.3.2 Variable Linkage Constant Values
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. table:: Variable Linkage Values and Meanings
============================ ===== ===========
kind value description
============================ ===== ===========
``BTF_VAR_STATIC`` 0x0 definition of global variable not visible outside containing compilation unit
``BTF_VAR_GLOBAL_ALLOCATED`` 0x1 definition of global variable visible outside containing compilation unit
``BTF_VAR_GLOBAL_EXTERN`` 0x2 declaration of global variable whose definition is outside the containing compilation unit
============================ ===== ===========
3. BTF Kernel API
=================

View File

@ -121,6 +121,8 @@ described in more detail in the footnotes.
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_LWT_XMIT`` | | ``lwt_xmit`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_NETFILTER`` | | ``netfilter`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_PERF_EVENT`` | | ``perf_event`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE`` | | ``raw_tp.w+`` [#rawtp]_ | |
@ -131,11 +133,23 @@ described in more detail in the footnotes.
+ + +----------------------------------+-----------+
| | | ``raw_tracepoint+`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SCHED_ACT`` | | ``action`` | |
| ``BPF_PROG_TYPE_SCHED_ACT`` | | ``action`` [#tc_legacy]_ | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SCHED_CLS`` | | ``classifier`` | |
| ``BPF_PROG_TYPE_SCHED_CLS`` | | ``classifier`` [#tc_legacy]_ | |
+ + +----------------------------------+-----------+
| | | ``tc`` | |
| | | ``tc`` [#tc_legacy]_ | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_NETKIT_PRIMARY`` | ``netkit/primary`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_NETKIT_PEER`` | ``netkit/peer`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TCX_INGRESS`` | ``tc/ingress`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TCX_EGRESS`` | ``tc/egress`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TCX_INGRESS`` | ``tcx/ingress`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TCX_EGRESS`` | ``tcx/egress`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SK_LOOKUP`` | ``BPF_SK_LOOKUP`` | ``sk_lookup`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
@ -155,7 +169,9 @@ described in more detail in the footnotes.
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SOCK_OPS`` | ``BPF_CGROUP_SOCK_OPS`` | ``sockops`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_STRUCT_OPS`` | | ``struct_ops+`` | |
| ``BPF_PROG_TYPE_STRUCT_OPS`` | | ``struct_ops+`` [#struct_ops]_ | |
+ + +----------------------------------+-----------+
| | | ``struct_ops.s+`` [#struct_ops]_ | Yes |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SYSCALL`` | | ``syscall`` | Yes |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
@ -209,5 +225,11 @@ described in more detail in the footnotes.
``a-zA-Z0-9_.*?``.
.. [#lsm] The ``lsm`` attachment format is ``lsm[.s]/<hook>``.
.. [#rawtp] The ``raw_tp`` attach format is ``raw_tracepoint[.w]/<tracepoint>``.
.. [#tc_legacy] The ``tc``, ``classifier`` and ``action`` attach types are deprecated, use
``tcx/*`` instead.
.. [#struct_ops] The ``struct_ops`` attach format supports ``struct_ops[.s]/<name>`` convention,
but ``name`` is ignored and it is recommended to just use plain
``SEC("struct_ops[.s]")``. The attachments are defined in a struct initializer
that is tagged with ``SEC(".struct_ops[.link]")``.
.. [#tp] The ``tracepoint`` attach format is ``tracepoint/<category>/<name>``.
.. [#iter] The ``iter`` attach format is ``iter[.s]/<struct-name>``.

View File

@ -418,7 +418,7 @@ The rules for correspondence between registers / stack slots are as follows:
linked to the registers and stack slots of the parent state with the same
indices.
* For the outer stack frames, only caller saved registers (r6-r9) and stack
* For the outer stack frames, only callee saved registers (r6-r9) and stack
slots are linked to the registers and stack slots of the parent state with the
same indices.

View File

@ -3997,7 +3997,7 @@ F: Documentation/devicetree/bindings/iio/imu/bosch,bmi323.yaml
F: drivers/iio/imu/bmi323/
BPF JIT for ARC
M: Shahab Vahedi <shahab@synopsys.com>
M: Shahab Vahedi <list+bpf@vahedi.org>
L: bpf@vger.kernel.org
S: Maintained
F: arch/arc/net/
@ -4164,6 +4164,7 @@ F: include/uapi/linux/btf*
F: include/uapi/linux/filter.h
F: kernel/bpf/
F: kernel/trace/bpf_trace.c
F: lib/buildid.c
F: lib/test_bpf.c
F: net/bpf/
F: net/core/filter.c
@ -4284,6 +4285,7 @@ L: bpf@vger.kernel.org
S: Maintained
F: kernel/bpf/stackmap.c
F: kernel/trace/bpf_trace.c
F: lib/buildid.c
BROADCOM ASP 2.0 ETHERNET DRIVER
M: Justin Chen <justin.chen@broadcom.com>

View File

@ -26,9 +26,8 @@
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
#define TCCNT_PTR (MAX_BPF_JIT_REG + 2)
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
#define FP_BOTTOM (MAX_BPF_JIT_REG + 4)
#define ARENA_VM_START (MAX_BPF_JIT_REG + 5)
#define check_imm(bits, imm) do { \
@ -63,11 +62,10 @@ static const int bpf2a64[] = {
[TMP_REG_1] = A64_R(10),
[TMP_REG_2] = A64_R(11),
[TMP_REG_3] = A64_R(12),
/* tail_call_cnt */
[TCALL_CNT] = A64_R(26),
/* tail_call_cnt_ptr */
[TCCNT_PTR] = A64_R(26),
/* temporary register for blinding constants */
[BPF_REG_AX] = A64_R(9),
[FP_BOTTOM] = A64_R(27),
/* callee saved register for kern_vm_start address */
[ARENA_VM_START] = A64_R(28),
};
@ -78,11 +76,15 @@ struct jit_ctx {
int epilogue_offset;
int *offset;
int exentry_idx;
int nr_used_callee_reg;
u8 used_callee_reg[8]; /* r6~r9, fp, arena_vm_start */
__le32 *image;
__le32 *ro_image;
u32 stack_size;
int fpb_offset;
u64 user_vm_start;
u64 arena_vm_start;
bool fp_used;
bool write;
};
struct bpf_plt {
@ -96,7 +98,7 @@ struct bpf_plt {
static inline void emit(const u32 insn, struct jit_ctx *ctx)
{
if (ctx->image != NULL)
if (ctx->image != NULL && ctx->write)
ctx->image[ctx->idx] = cpu_to_le32(insn);
ctx->idx++;
@ -181,14 +183,47 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val,
}
}
static inline void emit_call(u64 target, struct jit_ctx *ctx)
static bool should_emit_indirect_call(long target, const struct jit_ctx *ctx)
{
u8 tmp = bpf2a64[TMP_REG_1];
long offset;
/* when ctx->ro_image is not allocated or the target is unknown,
* emit indirect call
*/
if (!ctx->ro_image || !target)
return true;
offset = target - (long)&ctx->ro_image[ctx->idx];
return offset < -SZ_128M || offset >= SZ_128M;
}
static void emit_direct_call(u64 target, struct jit_ctx *ctx)
{
u32 insn;
unsigned long pc;
pc = (unsigned long)&ctx->ro_image[ctx->idx];
insn = aarch64_insn_gen_branch_imm(pc, target, AARCH64_INSN_BRANCH_LINK);
emit(insn, ctx);
}
static void emit_indirect_call(u64 target, struct jit_ctx *ctx)
{
u8 tmp;
tmp = bpf2a64[TMP_REG_1];
emit_addr_mov_i64(tmp, target, ctx);
emit(A64_BLR(tmp), ctx);
}
static void emit_call(u64 target, struct jit_ctx *ctx)
{
if (should_emit_indirect_call((long)target, ctx))
emit_indirect_call(target, ctx);
else
emit_direct_call(target, ctx);
}
static inline int bpf2a64_offset(int bpf_insn, int off,
const struct jit_ctx *ctx)
{
@ -273,21 +308,143 @@ static bool is_lsi_offset(int offset, int scale)
return true;
}
/* generated prologue:
/* generated main prog prologue:
* bti c // if CONFIG_ARM64_BTI_KERNEL
* mov x9, lr
* nop // POKE_OFFSET
* paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL
* stp x29, lr, [sp, #-16]!
* mov x29, sp
* stp x19, x20, [sp, #-16]!
* stp x21, x22, [sp, #-16]!
* stp x25, x26, [sp, #-16]!
* stp x27, x28, [sp, #-16]!
* mov x25, sp
* mov tcc, #0
* stp xzr, x26, [sp, #-16]!
* mov x26, sp
* // PROLOGUE_OFFSET
* // save callee-saved registers
*/
static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx)
{
const bool is_main_prog = !bpf_is_subprog(ctx->prog);
const u8 ptr = bpf2a64[TCCNT_PTR];
if (is_main_prog) {
/* Initialize tail_call_cnt. */
emit(A64_PUSH(A64_ZR, ptr, A64_SP), ctx);
emit(A64_MOV(1, ptr, A64_SP), ctx);
} else
emit(A64_PUSH(ptr, ptr, A64_SP), ctx);
}
static void find_used_callee_regs(struct jit_ctx *ctx)
{
int i;
const struct bpf_prog *prog = ctx->prog;
const struct bpf_insn *insn = &prog->insnsi[0];
int reg_used = 0;
for (i = 0; i < prog->len; i++, insn++) {
if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6)
reg_used |= 1;
if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7)
reg_used |= 2;
if (insn->dst_reg == BPF_REG_8 || insn->src_reg == BPF_REG_8)
reg_used |= 4;
if (insn->dst_reg == BPF_REG_9 || insn->src_reg == BPF_REG_9)
reg_used |= 8;
if (insn->dst_reg == BPF_REG_FP || insn->src_reg == BPF_REG_FP) {
ctx->fp_used = true;
reg_used |= 16;
}
}
i = 0;
if (reg_used & 1)
ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_6];
if (reg_used & 2)
ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_7];
if (reg_used & 4)
ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_8];
if (reg_used & 8)
ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_9];
if (reg_used & 16)
ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_FP];
if (ctx->arena_vm_start)
ctx->used_callee_reg[i++] = bpf2a64[ARENA_VM_START];
ctx->nr_used_callee_reg = i;
}
/* Save callee-saved registers */
static void push_callee_regs(struct jit_ctx *ctx)
{
int reg1, reg2, i;
/*
* Program acting as exception boundary should save all ARM64
* Callee-saved registers as the exception callback needs to recover
* all ARM64 Callee-saved registers in its epilogue.
*/
if (ctx->prog->aux->exception_boundary) {
emit(A64_PUSH(A64_R(19), A64_R(20), A64_SP), ctx);
emit(A64_PUSH(A64_R(21), A64_R(22), A64_SP), ctx);
emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx);
emit(A64_PUSH(A64_R(25), A64_R(26), A64_SP), ctx);
emit(A64_PUSH(A64_R(27), A64_R(28), A64_SP), ctx);
} else {
find_used_callee_regs(ctx);
for (i = 0; i + 1 < ctx->nr_used_callee_reg; i += 2) {
reg1 = ctx->used_callee_reg[i];
reg2 = ctx->used_callee_reg[i + 1];
emit(A64_PUSH(reg1, reg2, A64_SP), ctx);
}
if (i < ctx->nr_used_callee_reg) {
reg1 = ctx->used_callee_reg[i];
/* keep SP 16-byte aligned */
emit(A64_PUSH(reg1, A64_ZR, A64_SP), ctx);
}
}
}
/* Restore callee-saved registers */
static void pop_callee_regs(struct jit_ctx *ctx)
{
struct bpf_prog_aux *aux = ctx->prog->aux;
int reg1, reg2, i;
/*
* Program acting as exception boundary pushes R23 and R24 in addition
* to BPF callee-saved registers. Exception callback uses the boundary
* program's stack frame, so recover these extra registers in the above
* two cases.
*/
if (aux->exception_boundary || aux->exception_cb) {
emit(A64_POP(A64_R(27), A64_R(28), A64_SP), ctx);
emit(A64_POP(A64_R(25), A64_R(26), A64_SP), ctx);
emit(A64_POP(A64_R(23), A64_R(24), A64_SP), ctx);
emit(A64_POP(A64_R(21), A64_R(22), A64_SP), ctx);
emit(A64_POP(A64_R(19), A64_R(20), A64_SP), ctx);
} else {
i = ctx->nr_used_callee_reg - 1;
if (ctx->nr_used_callee_reg % 2 != 0) {
reg1 = ctx->used_callee_reg[i];
emit(A64_POP(reg1, A64_ZR, A64_SP), ctx);
i--;
}
while (i > 0) {
reg1 = ctx->used_callee_reg[i - 1];
reg2 = ctx->used_callee_reg[i];
emit(A64_POP(reg1, reg2, A64_SP), ctx);
i -= 2;
}
}
}
#define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0)
#define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0)
@ -296,20 +453,13 @@ static bool is_lsi_offset(int offset, int scale)
#define POKE_OFFSET (BTI_INSNS + 1)
/* Tail call offset to jump into */
#define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8)
#define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 4)
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
bool is_exception_cb, u64 arena_vm_start)
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
{
const struct bpf_prog *prog = ctx->prog;
const bool is_main_prog = !bpf_is_subprog(prog);
const u8 r6 = bpf2a64[BPF_REG_6];
const u8 r7 = bpf2a64[BPF_REG_7];
const u8 r8 = bpf2a64[BPF_REG_8];
const u8 r9 = bpf2a64[BPF_REG_9];
const u8 fp = bpf2a64[BPF_REG_FP];
const u8 tcc = bpf2a64[TCALL_CNT];
const u8 fpb = bpf2a64[FP_BOTTOM];
const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
const int idx0 = ctx->idx;
int cur_offset;
@ -348,19 +498,28 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
emit(A64_MOV(1, A64_R(9), A64_LR), ctx);
emit(A64_NOP, ctx);
if (!is_exception_cb) {
if (!prog->aux->exception_cb) {
/* Sign lr */
if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
emit(A64_PACIASP, ctx);
/* Save FP and LR registers to stay align with ARM64 AAPCS */
emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
emit(A64_MOV(1, A64_FP, A64_SP), ctx);
/* Save callee-saved registers */
emit(A64_PUSH(r6, r7, A64_SP), ctx);
emit(A64_PUSH(r8, r9, A64_SP), ctx);
emit(A64_PUSH(fp, tcc, A64_SP), ctx);
emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx);
prepare_bpf_tail_call_cnt(ctx);
if (!ebpf_from_cbpf && is_main_prog) {
cur_offset = ctx->idx - idx0;
if (cur_offset != PROLOGUE_OFFSET) {
pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
cur_offset, PROLOGUE_OFFSET);
return -1;
}
/* BTI landing pad for the tail call, done with a BR */
emit_bti(A64_BTI_J, ctx);
}
push_callee_regs(ctx);
} else {
/*
* Exception callback receives FP of Main Program as third
@ -372,58 +531,28 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
* callee-saved registers. The exception callback will not push
* anything and re-use the main program's stack.
*
* 10 registers are on the stack
* 12 registers are on the stack
*/
emit(A64_SUB_I(1, A64_SP, A64_FP, 80), ctx);
emit(A64_SUB_I(1, A64_SP, A64_FP, 96), ctx);
}
/* Set up BPF prog stack base register */
emit(A64_MOV(1, fp, A64_SP), ctx);
if (!ebpf_from_cbpf && is_main_prog) {
/* Initialize tail_call_cnt */
emit(A64_MOVZ(1, tcc, 0, 0), ctx);
cur_offset = ctx->idx - idx0;
if (cur_offset != PROLOGUE_OFFSET) {
pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
cur_offset, PROLOGUE_OFFSET);
return -1;
}
/* BTI landing pad for the tail call, done with a BR */
emit_bti(A64_BTI_J, ctx);
}
/*
* Program acting as exception boundary should save all ARM64
* Callee-saved registers as the exception callback needs to recover
* all ARM64 Callee-saved registers in its epilogue.
*/
if (prog->aux->exception_boundary) {
/*
* As we are pushing two more registers, BPF_FP should be moved
* 16 bytes
*/
emit(A64_SUB_I(1, fp, fp, 16), ctx);
emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx);
}
emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx);
if (ctx->fp_used)
/* Set up BPF prog stack base register */
emit(A64_MOV(1, fp, A64_SP), ctx);
/* Stack must be multiples of 16B */
ctx->stack_size = round_up(prog->aux->stack_depth, 16);
/* Set up function call stack */
emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
if (ctx->stack_size)
emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
if (arena_vm_start)
emit_a64_mov_i64(arena_vm_base, arena_vm_start, ctx);
if (ctx->arena_vm_start)
emit_a64_mov_i64(arena_vm_base, ctx->arena_vm_start, ctx);
return 0;
}
static int out_offset = -1; /* initialized on the first pass of build_body() */
static int emit_bpf_tail_call(struct jit_ctx *ctx)
{
/* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
@ -432,11 +561,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
const u8 tmp = bpf2a64[TMP_REG_1];
const u8 prg = bpf2a64[TMP_REG_2];
const u8 tcc = bpf2a64[TCALL_CNT];
const int idx0 = ctx->idx;
#define cur_offset (ctx->idx - idx0)
#define jmp_offset (out_offset - (cur_offset))
const u8 tcc = bpf2a64[TMP_REG_3];
const u8 ptr = bpf2a64[TCCNT_PTR];
size_t off;
__le32 *branch1 = NULL;
__le32 *branch2 = NULL;
__le32 *branch3 = NULL;
/* if (index >= array->map.max_entries)
* goto out;
@ -446,16 +576,20 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
emit(A64_LDR32(tmp, r2, tmp), ctx);
emit(A64_MOV(0, r3, r3), ctx);
emit(A64_CMP(0, r3, tmp), ctx);
emit(A64_B_(A64_COND_CS, jmp_offset), ctx);
branch1 = ctx->image + ctx->idx;
emit(A64_NOP, ctx);
/*
* if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
* if ((*tail_call_cnt_ptr) >= MAX_TAIL_CALL_CNT)
* goto out;
* tail_call_cnt++;
*/
emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
emit(A64_LDR64I(tcc, ptr, 0), ctx);
emit(A64_CMP(1, tcc, tmp), ctx);
emit(A64_B_(A64_COND_CS, jmp_offset), ctx);
branch2 = ctx->image + ctx->idx;
emit(A64_NOP, ctx);
/* (*tail_call_cnt_ptr)++; */
emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
/* prog = array->ptrs[index];
@ -467,27 +601,37 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
emit(A64_ADD(1, tmp, r2, tmp), ctx);
emit(A64_LSL(1, prg, r3, 3), ctx);
emit(A64_LDR64(prg, tmp, prg), ctx);
emit(A64_CBZ(1, prg, jmp_offset), ctx);
branch3 = ctx->image + ctx->idx;
emit(A64_NOP, ctx);
/* Update tail_call_cnt if the slot is populated. */
emit(A64_STR64I(tcc, ptr, 0), ctx);
/* restore SP */
if (ctx->stack_size)
emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
pop_callee_regs(ctx);
/* goto *(prog->bpf_func + prologue_offset); */
off = offsetof(struct bpf_prog, bpf_func);
emit_a64_mov_i64(tmp, off, ctx);
emit(A64_LDR64(tmp, prg, tmp), ctx);
emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
emit(A64_BR(tmp), ctx);
/* out: */
if (out_offset == -1)
out_offset = cur_offset;
if (cur_offset != out_offset) {
pr_err_once("tail_call out_offset = %d, expected %d!\n",
cur_offset, out_offset);
return -1;
if (ctx->image) {
off = &ctx->image[ctx->idx] - branch1;
*branch1 = cpu_to_le32(A64_B_(A64_COND_CS, off));
off = &ctx->image[ctx->idx] - branch2;
*branch2 = cpu_to_le32(A64_B_(A64_COND_CS, off));
off = &ctx->image[ctx->idx] - branch3;
*branch3 = cpu_to_le32(A64_CBZ(1, prg, off));
}
return 0;
#undef cur_offset
#undef jmp_offset
}
#ifdef CONFIG_ARM64_LSE_ATOMICS
@ -713,36 +857,18 @@ static void build_plt(struct jit_ctx *ctx)
plt->target = (u64)&dummy_tramp;
}
static void build_epilogue(struct jit_ctx *ctx, bool is_exception_cb)
static void build_epilogue(struct jit_ctx *ctx)
{
const u8 r0 = bpf2a64[BPF_REG_0];
const u8 r6 = bpf2a64[BPF_REG_6];
const u8 r7 = bpf2a64[BPF_REG_7];
const u8 r8 = bpf2a64[BPF_REG_8];
const u8 r9 = bpf2a64[BPF_REG_9];
const u8 fp = bpf2a64[BPF_REG_FP];
const u8 fpb = bpf2a64[FP_BOTTOM];
const u8 ptr = bpf2a64[TCCNT_PTR];
/* We're done with BPF stack */
emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
if (ctx->stack_size)
emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
/*
* Program acting as exception boundary pushes R23 and R24 in addition
* to BPF callee-saved registers. Exception callback uses the boundary
* program's stack frame, so recover these extra registers in the above
* two cases.
*/
if (ctx->prog->aux->exception_boundary || is_exception_cb)
emit(A64_POP(A64_R(23), A64_R(24), A64_SP), ctx);
pop_callee_regs(ctx);
/* Restore x27 and x28 */
emit(A64_POP(fpb, A64_R(28), A64_SP), ctx);
/* Restore fs (x25) and x26 */
emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
/* Restore callee-saved register */
emit(A64_POP(r8, r9, A64_SP), ctx);
emit(A64_POP(r6, r7, A64_SP), ctx);
emit(A64_POP(A64_ZR, ptr, A64_SP), ctx);
/* Restore FP/LR registers */
emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
@ -862,7 +988,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
const u8 tmp = bpf2a64[TMP_REG_1];
const u8 tmp2 = bpf2a64[TMP_REG_2];
const u8 fp = bpf2a64[BPF_REG_FP];
const u8 fpb = bpf2a64[FP_BOTTOM];
const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
const s16 off = insn->off;
const s32 imm = insn->imm;
@ -1314,9 +1439,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
emit(A64_ADD(1, tmp2, src, arena_vm_base), ctx);
src = tmp2;
}
if (ctx->fpb_offset > 0 && src == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) {
src_adj = fpb;
off_adj = off + ctx->fpb_offset;
if (src == fp) {
src_adj = A64_SP;
off_adj = off + ctx->stack_size;
} else {
src_adj = src;
off_adj = off;
@ -1407,9 +1532,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx);
dst = tmp2;
}
if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) {
dst_adj = fpb;
off_adj = off + ctx->fpb_offset;
if (dst == fp) {
dst_adj = A64_SP;
off_adj = off + ctx->stack_size;
} else {
dst_adj = dst;
off_adj = off;
@ -1469,9 +1594,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx);
dst = tmp2;
}
if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) {
dst_adj = fpb;
off_adj = off + ctx->fpb_offset;
if (dst == fp) {
dst_adj = A64_SP;
off_adj = off + ctx->stack_size;
} else {
dst_adj = dst;
off_adj = off;
@ -1540,79 +1665,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
return 0;
}
/*
* Return 0 if FP may change at runtime, otherwise find the minimum negative
* offset to FP, converts it to positive number, and align down to 8 bytes.
*/
static int find_fpb_offset(struct bpf_prog *prog)
{
int i;
int offset = 0;
for (i = 0; i < prog->len; i++) {
const struct bpf_insn *insn = &prog->insnsi[i];
const u8 class = BPF_CLASS(insn->code);
const u8 mode = BPF_MODE(insn->code);
const u8 src = insn->src_reg;
const u8 dst = insn->dst_reg;
const s32 imm = insn->imm;
const s16 off = insn->off;
switch (class) {
case BPF_STX:
case BPF_ST:
/* fp holds atomic operation result */
if (class == BPF_STX && mode == BPF_ATOMIC &&
((imm == BPF_XCHG ||
imm == (BPF_FETCH | BPF_ADD) ||
imm == (BPF_FETCH | BPF_AND) ||
imm == (BPF_FETCH | BPF_XOR) ||
imm == (BPF_FETCH | BPF_OR)) &&
src == BPF_REG_FP))
return 0;
if (mode == BPF_MEM && dst == BPF_REG_FP &&
off < offset)
offset = insn->off;
break;
case BPF_JMP32:
case BPF_JMP:
break;
case BPF_LDX:
case BPF_LD:
/* fp holds load result */
if (dst == BPF_REG_FP)
return 0;
if (class == BPF_LDX && mode == BPF_MEM &&
src == BPF_REG_FP && off < offset)
offset = off;
break;
case BPF_ALU:
case BPF_ALU64:
default:
/* fp holds ALU result */
if (dst == BPF_REG_FP)
return 0;
}
}
if (offset < 0) {
/*
* safely be converted to a positive 'int', since insn->off
* is 's16'
*/
offset = -offset;
/* align down to 8 bytes */
offset = ALIGN_DOWN(offset, 8);
}
return offset;
}
static int build_body(struct jit_ctx *ctx, bool extra_pass)
{
const struct bpf_prog *prog = ctx->prog;
@ -1631,13 +1683,11 @@ static int build_body(struct jit_ctx *ctx, bool extra_pass)
const struct bpf_insn *insn = &prog->insnsi[i];
int ret;
if (ctx->image == NULL)
ctx->offset[i] = ctx->idx;
ctx->offset[i] = ctx->idx;
ret = build_insn(insn, ctx, extra_pass);
if (ret > 0) {
i++;
if (ctx->image == NULL)
ctx->offset[i] = ctx->idx;
ctx->offset[i] = ctx->idx;
continue;
}
if (ret)
@ -1648,8 +1698,7 @@ static int build_body(struct jit_ctx *ctx, bool extra_pass)
* the last element with the offset after the last
* instruction (end of program)
*/
if (ctx->image == NULL)
ctx->offset[i] = ctx->idx;
ctx->offset[i] = ctx->idx;
return 0;
}
@ -1701,9 +1750,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
bool tmp_blinded = false;
bool extra_pass = false;
struct jit_ctx ctx;
u64 arena_vm_start;
u8 *image_ptr;
u8 *ro_image_ptr;
int body_idx;
int exentry_idx;
if (!prog->jit_requested)
return orig_prog;
@ -1719,7 +1769,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog = tmp;
}
arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
jit_data = prog->aux->jit_data;
if (!jit_data) {
jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
@ -1749,17 +1798,15 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_off;
}
ctx.fpb_offset = find_fpb_offset(prog);
ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
/*
* 1. Initial fake pass to compute ctx->idx and ctx->offset.
/* Pass 1: Estimate the maximum image size.
*
* BPF line info needs ctx->offset[i] to be the offset of
* instruction[i] in jited image, so build prologue first.
*/
if (build_prologue(&ctx, was_classic, prog->aux->exception_cb,
arena_vm_start)) {
if (build_prologue(&ctx, was_classic)) {
prog = orig_prog;
goto out_off;
}
@ -1770,14 +1817,14 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
}
ctx.epilogue_offset = ctx.idx;
build_epilogue(&ctx, prog->aux->exception_cb);
build_epilogue(&ctx);
build_plt(&ctx);
extable_align = __alignof__(struct exception_table_entry);
extable_size = prog->aux->num_exentries *
sizeof(struct exception_table_entry);
/* Now we know the actual image size. */
/* Now we know the maximum image size. */
prog_size = sizeof(u32) * ctx.idx;
/* also allocate space for plt target */
extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
@ -1790,7 +1837,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_off;
}
/* 2. Now, the actual pass. */
/* Pass 2: Determine jited position and result for each instruction */
/*
* Use the image(RW) for writing the JITed instructions. But also save
@ -1806,30 +1853,56 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
skip_init_ctx:
ctx.idx = 0;
ctx.exentry_idx = 0;
ctx.write = true;
build_prologue(&ctx, was_classic, prog->aux->exception_cb, arena_vm_start);
build_prologue(&ctx, was_classic);
/* Record exentry_idx and body_idx before first build_body */
exentry_idx = ctx.exentry_idx;
body_idx = ctx.idx;
/* Dont write body instructions to memory for now */
ctx.write = false;
if (build_body(&ctx, extra_pass)) {
prog = orig_prog;
goto out_free_hdr;
}
build_epilogue(&ctx, prog->aux->exception_cb);
ctx.epilogue_offset = ctx.idx;
ctx.exentry_idx = exentry_idx;
ctx.idx = body_idx;
ctx.write = true;
/* Pass 3: Adjust jump offset and write final image */
if (build_body(&ctx, extra_pass) ||
WARN_ON_ONCE(ctx.idx != ctx.epilogue_offset)) {
prog = orig_prog;
goto out_free_hdr;
}
build_epilogue(&ctx);
build_plt(&ctx);
/* 3. Extra pass to validate JITed code. */
/* Extra pass to validate JITed code. */
if (validate_ctx(&ctx)) {
prog = orig_prog;
goto out_free_hdr;
}
/* update the real prog size */
prog_size = sizeof(u32) * ctx.idx;
/* And we're done. */
if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
if (!prog->is_func || extra_pass) {
if (extra_pass && ctx.idx != jit_data->ctx.idx) {
pr_err_once("multi-func JIT bug %d != %d\n",
/* The jited image may shrink since the jited result for
* BPF_CALL to subprog may be changed from indirect call
* to direct call.
*/
if (extra_pass && ctx.idx > jit_data->ctx.idx) {
pr_err_once("multi-func JIT bug %d > %d\n",
ctx.idx, jit_data->ctx.idx);
prog->bpf_func = NULL;
prog->jited = 0;
@ -2300,6 +2373,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
.image = image,
.ro_image = ro_image,
.idx = 0,
.write = true,
};
nregs = btf_func_model_nregs(m);

View File

@ -64,6 +64,56 @@ static bool is_imm8(int value)
return value <= 127 && value >= -128;
}
/*
* Let us limit the positive offset to be <= 123.
* This is to ensure eventual jit convergence For the following patterns:
* ...
* pass4, final_proglen=4391:
* ...
* 20e: 48 85 ff test rdi,rdi
* 211: 74 7d je 0x290
* 213: 48 8b 77 00 mov rsi,QWORD PTR [rdi+0x0]
* ...
* 289: 48 85 ff test rdi,rdi
* 28c: 74 17 je 0x2a5
* 28e: e9 7f ff ff ff jmp 0x212
* 293: bf 03 00 00 00 mov edi,0x3
* Note that insn at 0x211 is 2-byte cond jump insn for offset 0x7d (-125)
* and insn at 0x28e is 5-byte jmp insn with offset -129.
*
* pass5, final_proglen=4392:
* ...
* 20e: 48 85 ff test rdi,rdi
* 211: 0f 84 80 00 00 00 je 0x297
* 217: 48 8b 77 00 mov rsi,QWORD PTR [rdi+0x0]
* ...
* 28d: 48 85 ff test rdi,rdi
* 290: 74 1a je 0x2ac
* 292: eb 84 jmp 0x218
* 294: bf 03 00 00 00 mov edi,0x3
* Note that insn at 0x211 is 6-byte cond jump insn now since its offset
* becomes 0x80 based on previous round (0x293 - 0x213 = 0x80).
* At the same time, insn at 0x292 is a 2-byte insn since its offset is
* -124.
*
* pass6 will repeat the same code as in pass4 and this will prevent
* eventual convergence.
*
* To fix this issue, we need to break je (2->6 bytes) <-> jmp (5->2 bytes)
* cycle in the above. In the above example je offset <= 0x7c should work.
*
* For other cases, je <-> je needs offset <= 0x7b to avoid no convergence
* issue. For jmp <-> je and jmp <-> jmp cases, jmp offset <= 0x7c should
* avoid no convergence issue.
*
* Overall, let us limit the positive offset for 8bit cond/uncond jmp insn
* to maximum 123 (0x7b). This way, the jit pass can eventually converge.
*/
static bool is_imm8_jmp_offset(int value)
{
return value <= 123 && value >= -128;
}
static bool is_simm32(s64 value)
{
return value == (s64)(s32)value;
@ -273,7 +323,7 @@ struct jit_context {
/* Number of bytes emit_patch() needs to generate instructions */
#define X86_PATCH_SIZE 5
/* Number of bytes that will be skipped on tailcall */
#define X86_TAIL_CALL_OFFSET (11 + ENDBR_INSN_SIZE)
#define X86_TAIL_CALL_OFFSET (12 + ENDBR_INSN_SIZE)
static void push_r12(u8 **pprog)
{
@ -403,6 +453,37 @@ static void emit_cfi(u8 **pprog, u32 hash)
*pprog = prog;
}
static void emit_prologue_tail_call(u8 **pprog, bool is_subprog)
{
u8 *prog = *pprog;
if (!is_subprog) {
/* cmp rax, MAX_TAIL_CALL_CNT */
EMIT4(0x48, 0x83, 0xF8, MAX_TAIL_CALL_CNT);
EMIT2(X86_JA, 6); /* ja 6 */
/* rax is tail_call_cnt if <= MAX_TAIL_CALL_CNT.
* case1: entry of main prog.
* case2: tail callee of main prog.
*/
EMIT1(0x50); /* push rax */
/* Make rax as tail_call_cnt_ptr. */
EMIT3(0x48, 0x89, 0xE0); /* mov rax, rsp */
EMIT2(0xEB, 1); /* jmp 1 */
/* rax is tail_call_cnt_ptr if > MAX_TAIL_CALL_CNT.
* case: tail callee of subprog.
*/
EMIT1(0x50); /* push rax */
/* push tail_call_cnt_ptr */
EMIT1(0x50); /* push rax */
} else { /* is_subprog */
/* rax is tail_call_cnt_ptr. */
EMIT1(0x50); /* push rax */
EMIT1(0x50); /* push rax */
}
*pprog = prog;
}
/*
* Emit x86-64 prologue code for BPF program.
* bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes
@ -424,10 +505,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
/* When it's the entry of the whole tailcall context,
* zeroing rax means initialising tail_call_cnt.
*/
EMIT2(0x31, 0xC0); /* xor eax, eax */
EMIT3(0x48, 0x31, 0xC0); /* xor rax, rax */
else
/* Keep the same instruction layout. */
EMIT2(0x66, 0x90); /* nop2 */
emit_nops(&prog, 3); /* nop3 */
}
/* Exception callback receives FP as third parameter */
if (is_exception_cb) {
@ -453,7 +534,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
if (tail_call_reachable)
EMIT1(0x50); /* push rax */
emit_prologue_tail_call(&prog, is_subprog);
*pprog = prog;
}
@ -589,13 +670,15 @@ static void emit_return(u8 **pprog, u8 *ip)
*pprog = prog;
}
#define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack) (-16 - round_up(stack, 8))
/*
* Generate the following code:
*
* ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
* if (index >= array->map.max_entries)
* goto out;
* if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT)
* if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT)
* goto out;
* prog = array->ptrs[index];
* if (prog == NULL)
@ -608,7 +691,7 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
u32 stack_depth, u8 *ip,
struct jit_context *ctx)
{
int tcc_off = -4 - round_up(stack_depth, 8);
int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack_depth);
u8 *prog = *pprog, *start = *pprog;
int offset;
@ -630,16 +713,14 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
EMIT2(X86_JBE, offset); /* jbe out */
/*
* if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT)
* if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT)
* goto out;
*/
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
EMIT3_off32(0x48, 0x8B, 0x85, tcc_ptr_off); /* mov rax, qword ptr [rbp - tcc_ptr_off] */
EMIT4(0x48, 0x83, 0x38, MAX_TAIL_CALL_CNT); /* cmp qword ptr [rax], MAX_TAIL_CALL_CNT */
offset = ctx->tail_call_indirect_label - (prog + 2 - start);
EMIT2(X86_JAE, offset); /* jae out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
/* prog = array->ptrs[index]; */
EMIT4_off32(0x48, 0x8B, 0x8C, 0xD6, /* mov rcx, [rsi + rdx * 8 + offsetof(...)] */
@ -654,6 +735,9 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
offset = ctx->tail_call_indirect_label - (prog + 2 - start);
EMIT2(X86_JE, offset); /* je out */
/* Inc tail_call_cnt if the slot is populated. */
EMIT4(0x48, 0x83, 0x00, 0x01); /* add qword ptr [rax], 1 */
if (bpf_prog->aux->exception_boundary) {
pop_callee_regs(&prog, all_callee_regs_used);
pop_r12(&prog);
@ -663,6 +747,11 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
pop_r12(&prog);
}
/* Pop tail_call_cnt_ptr. */
EMIT1(0x58); /* pop rax */
/* Pop tail_call_cnt, if it's main prog.
* Pop tail_call_cnt_ptr, if it's subprog.
*/
EMIT1(0x58); /* pop rax */
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */
@ -691,21 +780,19 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
bool *callee_regs_used, u32 stack_depth,
struct jit_context *ctx)
{
int tcc_off = -4 - round_up(stack_depth, 8);
int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack_depth);
u8 *prog = *pprog, *start = *pprog;
int offset;
/*
* if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT)
* if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT)
* goto out;
*/
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
EMIT3_off32(0x48, 0x8B, 0x85, tcc_ptr_off); /* mov rax, qword ptr [rbp - tcc_ptr_off] */
EMIT4(0x48, 0x83, 0x38, MAX_TAIL_CALL_CNT); /* cmp qword ptr [rax], MAX_TAIL_CALL_CNT */
offset = ctx->tail_call_direct_label - (prog + 2 - start);
EMIT2(X86_JAE, offset); /* jae out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
poke->tailcall_bypass = ip + (prog - start);
poke->adj_off = X86_TAIL_CALL_OFFSET;
@ -715,6 +802,9 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,
poke->tailcall_bypass);
/* Inc tail_call_cnt if the slot is populated. */
EMIT4(0x48, 0x83, 0x00, 0x01); /* add qword ptr [rax], 1 */
if (bpf_prog->aux->exception_boundary) {
pop_callee_regs(&prog, all_callee_regs_used);
pop_r12(&prog);
@ -724,6 +814,11 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
pop_r12(&prog);
}
/* Pop tail_call_cnt_ptr. */
EMIT1(0x58); /* pop rax */
/* Pop tail_call_cnt, if it's main prog.
* Pop tail_call_cnt_ptr, if it's subprog.
*/
EMIT1(0x58); /* pop rax */
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
@ -1311,9 +1406,11 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op)
#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
#define RESTORE_TAIL_CALL_CNT(stack) \
EMIT3_off32(0x48, 0x8B, 0x85, -round_up(stack, 8) - 8)
#define __LOAD_TCC_PTR(off) \
EMIT3_off32(0x48, 0x8B, 0x85, off)
/* mov rax, qword ptr [rbp - rounded_stack_depth - 16] */
#define LOAD_TAIL_CALL_CNT_PTR(stack) \
__LOAD_TCC_PTR(BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack))
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
int oldproglen, struct jit_context *ctx, bool jmp_padding)
@ -2031,7 +2128,7 @@ st: if (is_imm8(insn->off))
func = (u8 *) __bpf_call_base + imm32;
if (tail_call_reachable) {
RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth);
ip += 7;
}
if (!imm32)
@ -2184,7 +2281,7 @@ st: if (is_imm8(insn->off))
return -EFAULT;
}
jmp_offset = addrs[i + insn->off] - addrs[i];
if (is_imm8(jmp_offset)) {
if (is_imm8_jmp_offset(jmp_offset)) {
if (jmp_padding) {
/* To keep the jmp_offset valid, the extra bytes are
* padded before the jump insn, so we subtract the
@ -2266,7 +2363,7 @@ st: if (is_imm8(insn->off))
break;
}
emit_jmp:
if (is_imm8(jmp_offset)) {
if (is_imm8_jmp_offset(jmp_offset)) {
if (jmp_padding) {
/* To avoid breaking jmp_offset, the extra bytes
* are padded before the actual jmp insn, so
@ -2706,6 +2803,10 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
return 0;
}
/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
#define LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack) \
__LOAD_TCC_PTR(-round_up(stack, 8) - 8)
/* Example:
* __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
* its 'struct btf_func_model' will be nr_args=2
@ -2826,7 +2927,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
* [ ... ]
* [ stack_arg2 ]
* RBP - arg_stack_off [ stack_arg1 ]
* RSP [ tail_call_cnt ] BPF_TRAMP_F_TAIL_CALL_CTX
* RSP [ tail_call_cnt_ptr ] BPF_TRAMP_F_TAIL_CALL_CTX
*/
/* room for return value of orig_call or fentry prog */
@ -2955,10 +3056,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
save_args(m, &prog, arg_stack_off, true);
if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) {
/* Before calling the original function, restore the
* tail_call_cnt from stack to rax.
/* Before calling the original function, load the
* tail_call_cnt_ptr from stack to rax.
*/
RESTORE_TAIL_CALL_CNT(stack_size);
LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack_size);
}
if (flags & BPF_TRAMP_F_ORIG_STACK) {
@ -3017,10 +3118,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
goto cleanup;
}
} else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) {
/* Before running the original function, restore the
* tail_call_cnt from stack to rax.
/* Before running the original function, load the
* tail_call_cnt_ptr from stack to rax.
*/
RESTORE_TAIL_CALL_CNT(stack_size);
LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack_size);
}
/* restore return value of orig_call or fentry prog back into RAX */

View File

@ -129,3 +129,4 @@ obj-$(CONFIG_EFIVAR_FS) += efivarfs/
obj-$(CONFIG_EROFS_FS) += erofs/
obj-$(CONFIG_VBOXSF_FS) += vboxsf/
obj-$(CONFIG_ZONEFS_FS) += zonefs/
obj-$(CONFIG_BPF_LSM) += bpf_fs_kfuncs.o

185
fs/bpf_fs_kfuncs.c Normal file
View File

@ -0,0 +1,185 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2024 Google LLC. */
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/dcache.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/mm.h>
#include <linux/xattr.h>
__bpf_kfunc_start_defs();
/**
* bpf_get_task_exe_file - get a reference on the exe_file struct file member of
* the mm_struct that is nested within the supplied
* task_struct
* @task: task_struct of which the nested mm_struct exe_file member to get a
* reference on
*
* Get a reference on the exe_file struct file member field of the mm_struct
* nested within the supplied *task*. The referenced file pointer acquired by
* this BPF kfunc must be released using bpf_put_file(). Failing to call
* bpf_put_file() on the returned referenced struct file pointer that has been
* acquired by this BPF kfunc will result in the BPF program being rejected by
* the BPF verifier.
*
* This BPF kfunc may only be called from BPF LSM programs.
*
* Internally, this BPF kfunc leans on get_task_exe_file(), such that calling
* bpf_get_task_exe_file() would be analogous to calling get_task_exe_file()
* directly in kernel context.
*
* Return: A referenced struct file pointer to the exe_file member of the
* mm_struct that is nested within the supplied *task*. On error, NULL is
* returned.
*/
__bpf_kfunc struct file *bpf_get_task_exe_file(struct task_struct *task)
{
return get_task_exe_file(task);
}
/**
* bpf_put_file - put a reference on the supplied file
* @file: file to put a reference on
*
* Put a reference on the supplied *file*. Only referenced file pointers may be
* passed to this BPF kfunc. Attempting to pass an unreferenced file pointer, or
* any other arbitrary pointer for that matter, will result in the BPF program
* being rejected by the BPF verifier.
*
* This BPF kfunc may only be called from BPF LSM programs.
*/
__bpf_kfunc void bpf_put_file(struct file *file)
{
fput(file);
}
/**
* bpf_path_d_path - resolve the pathname for the supplied path
* @path: path to resolve the pathname for
* @buf: buffer to return the resolved pathname in
* @buf__sz: length of the supplied buffer
*
* Resolve the pathname for the supplied *path* and store it in *buf*. This BPF
* kfunc is the safer variant of the legacy bpf_d_path() helper and should be
* used in place of bpf_d_path() whenever possible. It enforces KF_TRUSTED_ARGS
* semantics, meaning that the supplied *path* must itself hold a valid
* reference, or else the BPF program will be outright rejected by the BPF
* verifier.
*
* This BPF kfunc may only be called from BPF LSM programs.
*
* Return: A positive integer corresponding to the length of the resolved
* pathname in *buf*, including the NUL termination character. On error, a
* negative integer is returned.
*/
__bpf_kfunc int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz)
{
int len;
char *ret;
if (!buf__sz)
return -EINVAL;
ret = d_path(path, buf, buf__sz);
if (IS_ERR(ret))
return PTR_ERR(ret);
len = buf + buf__sz - ret;
memmove(buf, ret, len);
return len;
}
/**
* bpf_get_dentry_xattr - get xattr of a dentry
* @dentry: dentry to get xattr from
* @name__str: name of the xattr
* @value_p: output buffer of the xattr value
*
* Get xattr *name__str* of *dentry* and store the output in *value_ptr*.
*
* For security reasons, only *name__str* with prefix "user." is allowed.
*
* Return: 0 on success, a negative value on error.
*/
__bpf_kfunc int bpf_get_dentry_xattr(struct dentry *dentry, const char *name__str,
struct bpf_dynptr *value_p)
{
struct bpf_dynptr_kern *value_ptr = (struct bpf_dynptr_kern *)value_p;
struct inode *inode = d_inode(dentry);
u32 value_len;
void *value;
int ret;
if (WARN_ON(!inode))
return -EINVAL;
if (strncmp(name__str, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
return -EPERM;
value_len = __bpf_dynptr_size(value_ptr);
value = __bpf_dynptr_data_rw(value_ptr, value_len);
if (!value)
return -EINVAL;
ret = inode_permission(&nop_mnt_idmap, inode, MAY_READ);
if (ret)
return ret;
return __vfs_getxattr(dentry, inode, name__str, value, value_len);
}
/**
* bpf_get_file_xattr - get xattr of a file
* @file: file to get xattr from
* @name__str: name of the xattr
* @value_p: output buffer of the xattr value
*
* Get xattr *name__str* of *file* and store the output in *value_ptr*.
*
* For security reasons, only *name__str* with prefix "user." is allowed.
*
* Return: 0 on success, a negative value on error.
*/
__bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str,
struct bpf_dynptr *value_p)
{
struct dentry *dentry;
dentry = file_dentry(file);
return bpf_get_dentry_xattr(dentry, name__str, value_p);
}
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(bpf_fs_kfunc_set_ids)
BTF_ID_FLAGS(func, bpf_get_task_exe_file,
KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_put_file, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_path_d_path, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_get_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
BTF_KFUNCS_END(bpf_fs_kfunc_set_ids)
static int bpf_fs_kfuncs_filter(const struct bpf_prog *prog, u32 kfunc_id)
{
if (!btf_id_set8_contains(&bpf_fs_kfunc_set_ids, kfunc_id) ||
prog->type == BPF_PROG_TYPE_LSM)
return 0;
return -EACCES;
}
static const struct btf_kfunc_id_set bpf_fs_kfunc_set = {
.owner = THIS_MODULE,
.set = &bpf_fs_kfunc_set_ids,
.filter = bpf_fs_kfuncs_filter,
};
static int __init bpf_fs_kfuncs_init(void)
{
return register_btf_kfunc_id_set(BPF_PROG_TYPE_LSM, &bpf_fs_kfunc_set);
}
late_initcall(bpf_fs_kfuncs_init);

View File

@ -294,6 +294,7 @@ struct bpf_map {
* same prog type, JITed flag and xdp_has_frags flag.
*/
struct {
const struct btf_type *attach_func_proto;
spinlock_t lock;
enum bpf_prog_type type;
bool jited;
@ -694,6 +695,11 @@ enum bpf_type_flag {
/* DYNPTR points to xdp_buff */
DYNPTR_TYPE_XDP = BIT(16 + BPF_BASE_TYPE_BITS),
/* Memory must be aligned on some architectures, used in combination with
* MEM_FIXED_SIZE.
*/
MEM_ALIGNED = BIT(17 + BPF_BASE_TYPE_BITS),
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
@ -731,8 +737,6 @@ enum bpf_arg_type {
ARG_ANYTHING, /* any (initialized) argument is ok */
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
ARG_PTR_TO_INT, /* pointer to int */
ARG_PTR_TO_LONG, /* pointer to long */
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */
ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */
ARG_PTR_TO_RINGBUF_MEM, /* pointer to dynamically reserved ringbuf memory */
@ -743,7 +747,7 @@ enum bpf_arg_type {
ARG_PTR_TO_STACK, /* pointer to stack */
ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
ARG_PTR_TO_TIMER, /* pointer to bpf_timer */
ARG_PTR_TO_KPTR, /* pointer to referenced kptr */
ARG_KPTR_XCHG_DEST, /* pointer to destination that kptrs are bpf_kptr_xchg'd into */
ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */
__BPF_ARG_TYPE_MAX,
@ -807,6 +811,12 @@ struct bpf_func_proto {
bool gpl_only;
bool pkt_access;
bool might_sleep;
/* set to true if helper follows contract for llvm
* attribute bpf_fastcall:
* - void functions do not scratch r0
* - functions taking N arguments scratch only registers r1-rN
*/
bool allow_fastcall;
enum bpf_return_type ret_type;
union {
struct {
@ -919,6 +929,7 @@ static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);
*/
struct bpf_insn_access_aux {
enum bpf_reg_type reg_type;
bool is_ldsx;
union {
int ctx_field_size;
struct {
@ -927,6 +938,7 @@ struct bpf_insn_access_aux {
};
};
struct bpf_verifier_log *log; /* for verbose logs */
bool is_retval; /* is accessing function return value ? */
};
static inline void
@ -965,6 +977,8 @@ struct bpf_verifier_ops {
struct bpf_insn_access_aux *info);
int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
const struct bpf_prog *prog);
int (*gen_epilogue)(struct bpf_insn *insn, const struct bpf_prog *prog,
s16 ctx_stack_off);
int (*gen_ld_abs)(const struct bpf_insn *orig,
struct bpf_insn *insn_buf);
u32 (*convert_ctx_access)(enum bpf_access_type type,
@ -1795,6 +1809,7 @@ struct bpf_struct_ops_common_value {
#define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA))
bool bpf_struct_ops_get(const void *kdata);
void bpf_struct_ops_put(const void *kdata);
int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff);
int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
void *value);
int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
@ -1851,6 +1866,10 @@ static inline void bpf_module_put(const void *data, struct module *owner)
{
module_put(owner);
}
static inline int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff)
{
return -ENOTSUPP;
}
static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map,
void *key,
void *value)
@ -3184,7 +3203,9 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
extern const struct bpf_func_proto bpf_get_current_comm_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto;
extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_get_stack_sleepable_proto;
extern const struct bpf_func_proto bpf_get_task_stack_proto;
extern const struct bpf_func_proto bpf_get_task_stack_sleepable_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto_pe;
extern const struct bpf_func_proto bpf_get_stack_proto_pe;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
@ -3192,6 +3213,7 @@ extern const struct bpf_func_proto bpf_sock_hash_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto;
extern const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto;
extern const struct bpf_func_proto bpf_current_task_under_cgroup_proto;
extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;

View File

@ -9,6 +9,7 @@
#include <linux/sched.h>
#include <linux/bpf.h>
#include <linux/bpf_verifier.h>
#include <linux/lsm_hooks.h>
#ifdef CONFIG_BPF_LSM
@ -45,6 +46,8 @@ void bpf_inode_storage_free(struct inode *inode);
void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func);
int bpf_lsm_get_retval_range(const struct bpf_prog *prog,
struct bpf_retval_range *range);
#else /* !CONFIG_BPF_LSM */
static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
@ -78,6 +81,11 @@ static inline void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
{
}
static inline int bpf_lsm_get_retval_range(const struct bpf_prog *prog,
struct bpf_retval_range *range)
{
return -EOPNOTSUPP;
}
#endif /* CONFIG_BPF_LSM */
#endif /* _LINUX_BPF_LSM_H */

View File

@ -23,6 +23,8 @@
* (in the "-8,-16,...,-512" form)
*/
#define TMP_STR_BUF_LEN 320
/* Patch buffer size */
#define INSN_BUF_SIZE 32
/* Liveness marks, used for registers and spilled-regs (in stack slots).
* Read marks propagate upwards until they find a write mark; they record that
@ -371,6 +373,10 @@ struct bpf_jmp_history_entry {
u32 prev_idx : 22;
/* special flags, e.g., whether insn is doing register stack spill/load */
u32 flags : 10;
/* additional registers that need precision tracking when this
* jump is backtracked, vector of six 10-bit records
*/
u64 linked_regs;
};
/* Maximum number of register states that can exist at once */
@ -572,6 +578,14 @@ struct bpf_insn_aux_data {
bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */
bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */
u8 alu_state; /* used in combination with alu_limit */
/* true if STX or LDX instruction is a part of a spill/fill
* pattern for a bpf_fastcall call.
*/
u8 fastcall_pattern:1;
/* for CALL instructions, a number of spill/fill pairs in the
* bpf_fastcall pattern.
*/
u8 fastcall_spills_num:3;
/* below fields are initialized once */
unsigned int orig_idx; /* original instruction index */
@ -641,6 +655,10 @@ struct bpf_subprog_info {
u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
u16 stack_depth; /* max. stack depth used by this function */
u16 stack_extra;
/* offsets in range [stack_depth .. fastcall_stack_off)
* are used for bpf_fastcall spills and fills.
*/
s16 fastcall_stack_off;
bool has_tail_call: 1;
bool tail_call_reachable: 1;
bool has_ld_abs: 1;
@ -648,6 +666,8 @@ struct bpf_subprog_info {
bool is_async_cb: 1;
bool is_exception_cb: 1;
bool args_cached: 1;
/* true if bpf_fastcall stack region is used by functions that can't be inlined */
bool keep_fastcall_stack: 1;
u8 arg_cnt;
struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS];
@ -762,6 +782,8 @@ struct bpf_verifier_env {
* e.g., in reg_type_str() to generate reg_type string
*/
char tmp_str_buf[TMP_STR_BUF_LEN];
struct bpf_insn insn_buf[INSN_BUF_SIZE];
struct bpf_insn epilogue_buf[INSN_BUF_SIZE];
};
static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog)
@ -905,6 +927,11 @@ static inline bool type_is_sk_pointer(enum bpf_reg_type type)
type == PTR_TO_XDP_SOCK;
}
static inline bool type_may_be_null(u32 type)
{
return type & PTR_MAYBE_NULL;
}
static inline void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
{
env->scratched_regs |= 1U << regno;

View File

@ -580,6 +580,7 @@ bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type);
bool btf_types_are_same(const struct btf *btf1, u32 id1,
const struct btf *btf2, u32 id2);
int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx);
#else
static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
u32 type_id)
@ -654,6 +655,10 @@ static inline bool btf_types_are_same(const struct btf *btf1, u32 id1,
{
return false;
}
static inline int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx)
{
return -EOPNOTSUPP;
}
#endif
static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t)

View File

@ -7,8 +7,8 @@
#define BUILD_ID_SIZE_MAX 20
struct vm_area_struct;
int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
__u32 *size);
int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size);
int build_id_parse_nofault(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size);
int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size);
#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_VMCORE_INFO)

View File

@ -437,6 +437,16 @@ static inline bool insn_is_cast_user(const struct bpf_insn *insn)
.off = OFF, \
.imm = 0 })
/* Unconditional jumps, gotol pc + imm32 */
#define BPF_JMP32_A(IMM) \
((struct bpf_insn) { \
.code = BPF_JMP32 | BPF_JA, \
.dst_reg = 0, \
.src_reg = 0, \
.off = 0, \
.imm = IMM })
/* Relative call */
#define BPF_CALL_REL(TGT) \

View File

@ -5519,11 +5519,12 @@ union bpf_attr {
* **-EOPNOTSUPP** if the hash calculation failed or **-EINVAL** if
* invalid arguments are passed.
*
* void *bpf_kptr_xchg(void *map_value, void *ptr)
* void *bpf_kptr_xchg(void *dst, void *ptr)
* Description
* Exchange kptr at pointer *map_value* with *ptr*, and return the
* old value. *ptr* can be NULL, otherwise it must be a referenced
* pointer which will be released when this helper is called.
* Exchange kptr at pointer *dst* with *ptr*, and return the old value.
* *dst* can be map value or local kptr. *ptr* can be NULL, otherwise
* it must be a referenced pointer which will be released when this helper
* is called.
* Return
* The old value of kptr (which can be NULL). The returned pointer
* if not NULL, is a reference which must be released using its
@ -7513,4 +7514,13 @@ struct bpf_iter_num {
__u64 __opaque[1];
} __attribute__((aligned(8)));
/*
* Flags to control BPF kfunc behaviour.
* - BPF_F_PAD_ZEROS: Pad destination buffer with zeros. (See the respective
* helper documentation for details.)
*/
enum bpf_kfunc_flags {
BPF_F_PAD_ZEROS = (1ULL << 0),
};
#endif /* _UAPI__LINUX_BPF_H__ */

View File

@ -52,9 +52,3 @@ obj-$(CONFIG_BPF_PRELOAD) += preload/
obj-$(CONFIG_BPF_SYSCALL) += relo_core.o
obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o
obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o
# Some source files are common to libbpf.
vpath %.c $(srctree)/kernel/bpf:$(srctree)/tools/lib/bpf
$(obj)/%.o: %.c FORCE
$(call if_changed_rule,cc_o_c)

View File

@ -73,6 +73,9 @@ int array_map_alloc_check(union bpf_attr *attr)
/* avoid overflow on round_up(map->value_size) */
if (attr->value_size > INT_MAX)
return -E2BIG;
/* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */
if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE)
return -E2BIG;
return 0;
}
@ -494,7 +497,7 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
if (map->btf_key_type_id)
seq_printf(m, "%u: ", *(u32 *)key);
btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
seq_puts(m, "\n");
seq_putc(m, '\n');
rcu_read_unlock();
}
@ -515,7 +518,7 @@ static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
seq_printf(m, "\tcpu%d: ", cpu);
btf_type_seq_show(map->btf, map->btf_value_type_id,
per_cpu_ptr(pptr, cpu), m);
seq_puts(m, "\n");
seq_putc(m, '\n');
}
seq_puts(m, "}\n");
@ -600,7 +603,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
array = container_of(map, struct bpf_array, map);
index = info->index & array->index_mask;
if (info->percpu_value_buf)
return array->pptrs[index];
return (void *)(uintptr_t)array->pptrs[index];
return array_map_elem_ptr(array, index);
}
@ -619,7 +622,7 @@ static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
array = container_of(map, struct bpf_array, map);
index = info->index & array->index_mask;
if (info->percpu_value_buf)
return array->pptrs[index];
return (void *)(uintptr_t)array->pptrs[index];
return array_map_elem_ptr(array, index);
}
@ -632,7 +635,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
struct bpf_iter_meta meta;
struct bpf_prog *prog;
int off = 0, cpu = 0;
void __percpu **pptr;
void __percpu *pptr;
u32 size;
meta.seq = seq;
@ -648,7 +651,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
if (!info->percpu_value_buf) {
ctx.value = v;
} else {
pptr = v;
pptr = (void __percpu *)(uintptr_t)v;
size = array->elem_size;
for_each_possible_cpu(cpu) {
copy_map_value_long(map, info->percpu_value_buf + off,
@ -993,7 +996,7 @@ static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
prog_id = prog_fd_array_sys_lookup_elem(ptr);
btf_type_seq_show(map->btf, map->btf_value_type_id,
&prog_id, m);
seq_puts(m, "\n");
seq_putc(m, '\n');
}
}

View File

@ -11,7 +11,6 @@
#include <linux/lsm_hooks.h>
#include <linux/bpf_lsm.h>
#include <linux/kallsyms.h>
#include <linux/bpf_verifier.h>
#include <net/bpf_sk_storage.h>
#include <linux/bpf_local_storage.h>
#include <linux/btf_ids.h>
@ -36,6 +35,24 @@ BTF_SET_START(bpf_lsm_hooks)
#undef LSM_HOOK
BTF_SET_END(bpf_lsm_hooks)
BTF_SET_START(bpf_lsm_disabled_hooks)
BTF_ID(func, bpf_lsm_vm_enough_memory)
BTF_ID(func, bpf_lsm_inode_need_killpriv)
BTF_ID(func, bpf_lsm_inode_getsecurity)
BTF_ID(func, bpf_lsm_inode_listsecurity)
BTF_ID(func, bpf_lsm_inode_copy_up_xattr)
BTF_ID(func, bpf_lsm_getselfattr)
BTF_ID(func, bpf_lsm_getprocattr)
BTF_ID(func, bpf_lsm_setprocattr)
#ifdef CONFIG_KEYS
BTF_ID(func, bpf_lsm_key_getsecurity)
#endif
#ifdef CONFIG_AUDIT
BTF_ID(func, bpf_lsm_audit_rule_match)
#endif
BTF_ID(func, bpf_lsm_ismaclabel)
BTF_SET_END(bpf_lsm_disabled_hooks)
/* List of LSM hooks that should operate on 'current' cgroup regardless
* of function signature.
*/
@ -97,15 +114,24 @@ void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
const struct bpf_prog *prog)
{
u32 btf_id = prog->aux->attach_btf_id;
const char *func_name = prog->aux->attach_func_name;
if (!prog->gpl_compatible) {
bpf_log(vlog,
"LSM programs must have a GPL compatible license\n");
return -EINVAL;
}
if (!btf_id_set_contains(&bpf_lsm_hooks, prog->aux->attach_btf_id)) {
if (btf_id_set_contains(&bpf_lsm_disabled_hooks, btf_id)) {
bpf_log(vlog, "attach_btf_id %u points to disabled hook %s\n",
btf_id, func_name);
return -EINVAL;
}
if (!btf_id_set_contains(&bpf_lsm_hooks, btf_id)) {
bpf_log(vlog, "attach_btf_id %u points to wrong type name %s\n",
prog->aux->attach_btf_id, prog->aux->attach_func_name);
btf_id, func_name);
return -EINVAL;
}
@ -390,3 +416,36 @@ const struct bpf_verifier_ops lsm_verifier_ops = {
.get_func_proto = bpf_lsm_func_proto,
.is_valid_access = btf_ctx_access,
};
/* hooks return 0 or 1 */
BTF_SET_START(bool_lsm_hooks)
#ifdef CONFIG_SECURITY_NETWORK_XFRM
BTF_ID(func, bpf_lsm_xfrm_state_pol_flow_match)
#endif
#ifdef CONFIG_AUDIT
BTF_ID(func, bpf_lsm_audit_rule_known)
#endif
BTF_ID(func, bpf_lsm_inode_xattr_skipcap)
BTF_SET_END(bool_lsm_hooks)
int bpf_lsm_get_retval_range(const struct bpf_prog *prog,
struct bpf_retval_range *retval_range)
{
/* no return value range for void hooks */
if (!prog->aux->attach_func_proto->type)
return -EINVAL;
if (btf_id_set_contains(&bool_lsm_hooks, prog->aux->attach_btf_id)) {
retval_range->minval = 0;
retval_range->maxval = 1;
} else {
/* All other available LSM hooks, except task_prctl, return 0
* on success and negative error code on failure.
* To keep things simple, we only allow bpf progs to return 0
* or negative errno for task_prctl too.
*/
retval_range->minval = -MAX_ERRNO;
retval_range->maxval = 0;
}
return 0;
}

View File

@ -837,7 +837,7 @@ static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key,
btf_type_seq_show(st_map->btf,
map->btf_vmlinux_value_type_id,
value, m);
seq_puts(m, "\n");
seq_putc(m, '\n');
}
kfree(value);
@ -1040,6 +1040,13 @@ void bpf_struct_ops_put(const void *kdata)
bpf_map_put(&st_map->map);
}
int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff)
{
void *func_ptr = *(void **)(st_ops->cfi_stubs + moff);
return func_ptr ? 0 : -ENOTSUPP;
}
static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map)
{
struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;

View File

@ -212,7 +212,7 @@ enum btf_kfunc_hook {
BTF_KFUNC_HOOK_TRACING,
BTF_KFUNC_HOOK_SYSCALL,
BTF_KFUNC_HOOK_FMODRET,
BTF_KFUNC_HOOK_CGROUP_SKB,
BTF_KFUNC_HOOK_CGROUP,
BTF_KFUNC_HOOK_SCHED_ACT,
BTF_KFUNC_HOOK_SK_SKB,
BTF_KFUNC_HOOK_SOCKET_FILTER,
@ -790,7 +790,7 @@ const char *btf_str_by_offset(const struct btf *btf, u32 offset)
return NULL;
}
static bool __btf_name_valid(const struct btf *btf, u32 offset)
static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
{
/* offset must be valid */
const char *src = btf_str_by_offset(btf, offset);
@ -811,11 +811,6 @@ static bool __btf_name_valid(const struct btf *btf, u32 offset)
return !*src;
}
static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
{
return __btf_name_valid(btf, offset);
}
/* Allow any printable character in DATASEC names */
static bool btf_name_valid_section(const struct btf *btf, u32 offset)
{
@ -3761,6 +3756,7 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
return -EINVAL;
}
/* Callers have to ensure the life cycle of btf if it is program BTF */
static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
struct btf_field_info *info)
{
@ -3789,7 +3785,6 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
field->kptr.dtor = NULL;
id = info->kptr.type_id;
kptr_btf = (struct btf *)btf;
btf_get(kptr_btf);
goto found_dtor;
}
if (id < 0)
@ -4631,7 +4626,7 @@ static s32 btf_var_check_meta(struct btf_verifier_env *env,
}
if (!t->name_off ||
!__btf_name_valid(env->btf, t->name_off)) {
!btf_name_valid_identifier(env->btf, t->name_off)) {
btf_verifier_log_type(env, t, "Invalid name");
return -EINVAL;
}
@ -5519,36 +5514,72 @@ static const char *alloc_obj_fields[] = {
static struct btf_struct_metas *
btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
{
union {
struct btf_id_set set;
struct {
u32 _cnt;
u32 _ids[ARRAY_SIZE(alloc_obj_fields)];
} _arr;
} aof;
struct btf_struct_metas *tab = NULL;
struct btf_id_set *aof;
int i, n, id, ret;
BUILD_BUG_ON(offsetof(struct btf_id_set, cnt) != 0);
BUILD_BUG_ON(sizeof(struct btf_id_set) != sizeof(u32));
memset(&aof, 0, sizeof(aof));
aof = kmalloc(sizeof(*aof), GFP_KERNEL | __GFP_NOWARN);
if (!aof)
return ERR_PTR(-ENOMEM);
aof->cnt = 0;
for (i = 0; i < ARRAY_SIZE(alloc_obj_fields); i++) {
/* Try to find whether this special type exists in user BTF, and
* if so remember its ID so we can easily find it among members
* of structs that we iterate in the next loop.
*/
struct btf_id_set *new_aof;
id = btf_find_by_name_kind(btf, alloc_obj_fields[i], BTF_KIND_STRUCT);
if (id < 0)
continue;
aof.set.ids[aof.set.cnt++] = id;
new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
GFP_KERNEL | __GFP_NOWARN);
if (!new_aof) {
ret = -ENOMEM;
goto free_aof;
}
aof = new_aof;
aof->ids[aof->cnt++] = id;
}
if (!aof.set.cnt)
return NULL;
sort(&aof.set.ids, aof.set.cnt, sizeof(aof.set.ids[0]), btf_id_cmp_func, NULL);
n = btf_nr_types(btf);
for (i = 1; i < n; i++) {
/* Try to find if there are kptrs in user BTF and remember their ID */
struct btf_id_set *new_aof;
struct btf_field_info tmp;
const struct btf_type *t;
t = btf_type_by_id(btf, i);
if (!t) {
ret = -EINVAL;
goto free_aof;
}
ret = btf_find_kptr(btf, t, 0, 0, &tmp);
if (ret != BTF_FIELD_FOUND)
continue;
new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
GFP_KERNEL | __GFP_NOWARN);
if (!new_aof) {
ret = -ENOMEM;
goto free_aof;
}
aof = new_aof;
aof->ids[aof->cnt++] = i;
}
if (!aof->cnt) {
kfree(aof);
return NULL;
}
sort(&aof->ids, aof->cnt, sizeof(aof->ids[0]), btf_id_cmp_func, NULL);
for (i = 1; i < n; i++) {
struct btf_struct_metas *new_tab;
const struct btf_member *member;
@ -5558,17 +5589,13 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
int j, tab_cnt;
t = btf_type_by_id(btf, i);
if (!t) {
ret = -EINVAL;
goto free;
}
if (!__btf_type_is_struct(t))
continue;
cond_resched();
for_each_member(j, t, member) {
if (btf_id_set_contains(&aof.set, member->type))
if (btf_id_set_contains(aof, member->type))
goto parse;
}
continue;
@ -5587,7 +5614,8 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
type = &tab->types[tab->cnt];
type->btf_id = i;
record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT, t->size);
BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT |
BPF_KPTR, t->size);
/* The record cannot be unset, treat it as an error if so */
if (IS_ERR_OR_NULL(record)) {
ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
@ -5596,9 +5624,12 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
type->record = record;
tab->cnt++;
}
kfree(aof);
return tab;
free:
btf_struct_metas_free(tab);
free_aof:
kfree(aof);
return ERR_PTR(ret);
}
@ -6245,12 +6276,11 @@ static struct btf *btf_parse_module(const char *module_name, const void *data,
btf->kernel_btf = true;
snprintf(btf->name, sizeof(btf->name), "%s", module_name);
btf->data = kvmalloc(data_size, GFP_KERNEL | __GFP_NOWARN);
btf->data = kvmemdup(data, data_size, GFP_KERNEL | __GFP_NOWARN);
if (!btf->data) {
err = -ENOMEM;
goto errout;
}
memcpy(btf->data, data, data_size);
btf->data_size = data_size;
err = btf_parse_hdr(env);
@ -6418,8 +6448,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
if (arg == nr_args) {
switch (prog->expected_attach_type) {
case BPF_LSM_CGROUP:
case BPF_LSM_MAC:
/* mark we are accessing the return value */
info->is_retval = true;
fallthrough;
case BPF_LSM_CGROUP:
case BPF_TRACE_FEXIT:
/* When LSM programs are attached to void LSM hooks
* they use FEXIT trampolines and when attached to
@ -8054,15 +8087,44 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE)
BTF_TRACING_TYPE_xxx
#undef BTF_TRACING_TYPE
/* Validate well-formedness of iter argument type.
* On success, return positive BTF ID of iter state's STRUCT type.
* On error, negative error is returned.
*/
int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx)
{
const struct btf_param *arg;
const struct btf_type *t;
const char *name;
int btf_id;
if (btf_type_vlen(func) <= arg_idx)
return -EINVAL;
arg = &btf_params(func)[arg_idx];
t = btf_type_skip_modifiers(btf, arg->type, NULL);
if (!t || !btf_type_is_ptr(t))
return -EINVAL;
t = btf_type_skip_modifiers(btf, t->type, &btf_id);
if (!t || !__btf_type_is_struct(t))
return -EINVAL;
name = btf_name_by_offset(btf, t->name_off);
if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1))
return -EINVAL;
return btf_id;
}
static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name,
const struct btf_type *func, u32 func_flags)
{
u32 flags = func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
const char *name, *sfx, *iter_name;
const struct btf_param *arg;
const char *sfx, *iter_name;
const struct btf_type *t;
char exp_name[128];
u32 nr_args;
int btf_id;
/* exactly one of KF_ITER_{NEW,NEXT,DESTROY} can be set */
if (!flags || (flags & (flags - 1)))
@ -8073,28 +8135,21 @@ static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name,
if (nr_args < 1)
return -EINVAL;
arg = &btf_params(func)[0];
t = btf_type_skip_modifiers(btf, arg->type, NULL);
if (!t || !btf_type_is_ptr(t))
return -EINVAL;
t = btf_type_skip_modifiers(btf, t->type, NULL);
if (!t || !__btf_type_is_struct(t))
return -EINVAL;
name = btf_name_by_offset(btf, t->name_off);
if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1))
return -EINVAL;
btf_id = btf_check_iter_arg(btf, func, 0);
if (btf_id < 0)
return btf_id;
/* sizeof(struct bpf_iter_<type>) should be a multiple of 8 to
* fit nicely in stack slots
*/
t = btf_type_by_id(btf, btf_id);
if (t->size == 0 || (t->size % 8))
return -EINVAL;
/* validate bpf_iter_<type>_{new,next,destroy}(struct bpf_iter_<type> *)
* naming pattern
*/
iter_name = name + sizeof(ITER_PREFIX) - 1;
iter_name = btf_name_by_offset(btf, t->name_off) + sizeof(ITER_PREFIX) - 1;
if (flags & KF_ITER_NEW)
sfx = "new";
else if (flags & KF_ITER_NEXT)
@ -8309,13 +8364,19 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
case BPF_PROG_TYPE_STRUCT_OPS:
return BTF_KFUNC_HOOK_STRUCT_OPS;
case BPF_PROG_TYPE_TRACING:
case BPF_PROG_TYPE_TRACEPOINT:
case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_LSM:
return BTF_KFUNC_HOOK_TRACING;
case BPF_PROG_TYPE_SYSCALL:
return BTF_KFUNC_HOOK_SYSCALL;
case BPF_PROG_TYPE_CGROUP_SKB:
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_CGROUP_DEVICE:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
return BTF_KFUNC_HOOK_CGROUP_SKB;
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
return BTF_KFUNC_HOOK_CGROUP;
case BPF_PROG_TYPE_SCHED_ACT:
return BTF_KFUNC_HOOK_SCHED_ACT;
case BPF_PROG_TYPE_SK_SKB:
@ -8891,6 +8952,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
struct bpf_core_cand_list cands = {};
struct bpf_core_relo_res targ_res;
struct bpf_core_spec *specs;
const struct btf_type *type;
int err;
/* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5"
@ -8900,6 +8962,13 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
if (!specs)
return -ENOMEM;
type = btf_type_by_id(ctx->btf, relo->type_id);
if (!type) {
bpf_log(ctx->log, "relo #%u: bad type id %u\n",
relo_idx, relo->type_id);
return -EINVAL;
}
if (need_cands) {
struct bpf_cand_cache *cc;
int i;

2
kernel/bpf/btf_iter.c Normal file
View File

@ -0,0 +1,2 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
#include "../../tools/lib/bpf/btf_iter.c"

View File

@ -0,0 +1,2 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
#include "../../tools/lib/bpf/btf_relocate.c"

View File

@ -2581,6 +2581,8 @@ cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_cgroup_classid:
return &bpf_get_cgroup_classid_curr_proto;
#endif
case BPF_FUNC_current_task_under_cgroup:
return &bpf_current_task_under_cgroup_proto;
default:
return NULL;
}

View File

@ -2302,6 +2302,7 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
{
enum bpf_prog_type prog_type = resolve_prog_type(fp);
bool ret;
struct bpf_prog_aux *aux = fp->aux;
if (fp->kprobe_override)
return false;
@ -2311,7 +2312,7 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
* in the case of devmap and cpumap). Until device checks
* are implemented, prohibit adding dev-bound programs to program maps.
*/
if (bpf_prog_is_dev_bound(fp->aux))
if (bpf_prog_is_dev_bound(aux))
return false;
spin_lock(&map->owner.lock);
@ -2321,12 +2322,26 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
*/
map->owner.type = prog_type;
map->owner.jited = fp->jited;
map->owner.xdp_has_frags = fp->aux->xdp_has_frags;
map->owner.xdp_has_frags = aux->xdp_has_frags;
map->owner.attach_func_proto = aux->attach_func_proto;
ret = true;
} else {
ret = map->owner.type == prog_type &&
map->owner.jited == fp->jited &&
map->owner.xdp_has_frags == fp->aux->xdp_has_frags;
map->owner.xdp_has_frags == aux->xdp_has_frags;
if (ret &&
map->owner.attach_func_proto != aux->attach_func_proto) {
switch (prog_type) {
case BPF_PROG_TYPE_TRACING:
case BPF_PROG_TYPE_LSM:
case BPF_PROG_TYPE_EXT:
case BPF_PROG_TYPE_STRUCT_OPS:
ret = false;
break;
default:
break;
}
}
}
spin_unlock(&map->owner.lock);

View File

@ -462,6 +462,9 @@ static int htab_map_alloc_check(union bpf_attr *attr)
* kmalloc-able later in htab_map_update_elem()
*/
return -E2BIG;
/* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */
if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE)
return -E2BIG;
return 0;
}
@ -1049,14 +1052,15 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
pptr = htab_elem_get_ptr(l_new, key_size);
} else {
/* alloc_percpu zero-fills */
pptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
if (!pptr) {
void *ptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
if (!ptr) {
bpf_mem_cache_free(&htab->ma, l_new);
l_new = ERR_PTR(-ENOMEM);
goto dec_count;
}
l_new->ptr_to_pptr = pptr;
pptr = *(void **)pptr;
l_new->ptr_to_pptr = ptr;
pptr = *(void __percpu **)ptr;
}
pcpu_init_value(htab, pptr, value, onallcpus);
@ -1586,7 +1590,7 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
seq_puts(m, ": ");
btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
seq_puts(m, "\n");
seq_putc(m, '\n');
rcu_read_unlock();
}
@ -2450,7 +2454,7 @@ static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key,
seq_printf(m, "\tcpu%d: ", cpu);
btf_type_seq_show(map->btf, map->btf_value_type_id,
per_cpu_ptr(pptr, cpu), m);
seq_puts(m, "\n");
seq_putc(m, '\n');
}
seq_puts(m, "}\n");

View File

@ -158,6 +158,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
.func = bpf_get_smp_processor_id,
.gpl_only = false,
.ret_type = RET_INTEGER,
.allow_fastcall = true,
};
BPF_CALL_0(bpf_get_numa_node_id)
@ -517,16 +518,15 @@ static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
}
BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
long *, res)
s64 *, res)
{
long long _res;
int err;
*res = 0;
err = __bpf_strtoll(buf, buf_len, flags, &_res);
if (err < 0)
return err;
if (_res != (long)_res)
return -ERANGE;
*res = _res;
return err;
}
@ -538,23 +538,23 @@ const struct bpf_func_proto bpf_strtol_proto = {
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_LONG,
.arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
.arg4_size = sizeof(s64),
};
BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
unsigned long *, res)
u64 *, res)
{
unsigned long long _res;
bool is_negative;
int err;
*res = 0;
err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
if (err < 0)
return err;
if (is_negative)
return -EINVAL;
if (_res != (unsigned long)_res)
return -ERANGE;
*res = _res;
return err;
}
@ -566,7 +566,8 @@ const struct bpf_func_proto bpf_strtoul_proto = {
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_LONG,
.arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
.arg4_size = sizeof(u64),
};
BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2)
@ -714,7 +715,7 @@ BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
if (cpu >= nr_cpu_ids)
return (unsigned long)NULL;
return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
return (unsigned long)per_cpu_ptr((const void __percpu *)(const uintptr_t)ptr, cpu);
}
const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
@ -727,7 +728,7 @@ const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
{
return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
return (unsigned long)this_cpu_ptr((const void __percpu *)(const uintptr_t)percpu_ptr);
}
const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
@ -1618,9 +1619,9 @@ void bpf_wq_cancel_and_free(void *val)
schedule_work(&work->delete_work);
}
BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr)
BPF_CALL_2(bpf_kptr_xchg, void *, dst, void *, ptr)
{
unsigned long *kptr = map_value;
unsigned long *kptr = dst;
/* This helper may be inlined by verifier. */
return xchg(kptr, (unsigned long)ptr);
@ -1635,7 +1636,7 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.ret_btf_id = BPF_PTR_POISON,
.arg1_type = ARG_PTR_TO_KPTR,
.arg1_type = ARG_KPTR_XCHG_DEST,
.arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE,
.arg2_btf_id = BPF_PTR_POISON,
};
@ -2033,6 +2034,7 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return NULL;
}
}
EXPORT_SYMBOL_GPL(bpf_base_func_proto);
void bpf_list_head_free(const struct btf_field *field, void *list_head,
struct bpf_spin_lock *spin_lock)
@ -2457,6 +2459,29 @@ __bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task,
return ret;
}
BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct cgroup *cgrp;
if (unlikely(idx >= array->map.max_entries))
return -E2BIG;
cgrp = READ_ONCE(array->ptrs[idx]);
if (unlikely(!cgrp))
return -EAGAIN;
return task_under_cgroup_hierarchy(current, cgrp);
}
const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
.func = bpf_current_task_under_cgroup,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_ANYTHING,
};
/**
* bpf_task_get_cgroup1 - Acquires the associated cgroup of a task within a
* specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its
@ -2938,6 +2963,47 @@ __bpf_kfunc void bpf_iter_bits_destroy(struct bpf_iter_bits *it)
bpf_mem_free(&bpf_global_ma, kit->bits);
}
/**
* bpf_copy_from_user_str() - Copy a string from an unsafe user address
* @dst: Destination address, in kernel space. This buffer must be
* at least @dst__sz bytes long.
* @dst__sz: Maximum number of bytes to copy, includes the trailing NUL.
* @unsafe_ptr__ign: Source address, in user space.
* @flags: The only supported flag is BPF_F_PAD_ZEROS
*
* Copies a NUL-terminated string from userspace to BPF space. If user string is
* too long this will still ensure zero termination in the dst buffer unless
* buffer size is 0.
*
* If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst to 0 on success and
* memset all of @dst on failure.
*/
__bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user *unsafe_ptr__ign, u64 flags)
{
int ret;
if (unlikely(flags & ~BPF_F_PAD_ZEROS))
return -EINVAL;
if (unlikely(!dst__sz))
return 0;
ret = strncpy_from_user(dst, unsafe_ptr__ign, dst__sz - 1);
if (ret < 0) {
if (flags & BPF_F_PAD_ZEROS)
memset((char *)dst, 0, dst__sz);
return ret;
}
if (flags & BPF_F_PAD_ZEROS)
memset((char *)dst + ret, 0, dst__sz - ret);
else
((char *)dst)[ret] = '\0';
return ret + 1;
}
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(generic_btf_ids)
@ -3023,6 +3089,7 @@ BTF_ID_FLAGS(func, bpf_preempt_enable)
BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
BTF_KFUNCS_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
@ -3051,6 +3118,7 @@ static int __init kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &generic_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &generic_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &generic_kfunc_set);
ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors,
ARRAY_SIZE(generic_dtors),
THIS_MODULE);

View File

@ -709,10 +709,10 @@ static void seq_print_delegate_opts(struct seq_file *m,
msk = 1ULL << e->val;
if (delegate_msk & msk) {
/* emit lower-case name without prefix */
seq_printf(m, "%c", first ? '=' : ':');
seq_putc(m, first ? '=' : ':');
name += pfx_len;
while (*name) {
seq_printf(m, "%c", tolower(*name));
seq_putc(m, tolower(*name));
name++;
}

View File

@ -431,7 +431,7 @@ static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key,
seq_puts(m, ": ");
btf_type_seq_show(map->btf, map->btf_value_type_id,
&READ_ONCE(storage->buf)->data[0], m);
seq_puts(m, "\n");
seq_putc(m, '\n');
} else {
seq_puts(m, ": {\n");
for_each_possible_cpu(cpu) {
@ -439,7 +439,7 @@ static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key,
btf_type_seq_show(map->btf, map->btf_value_type_id,
per_cpu_ptr(storage->percpu_buf, cpu),
m);
seq_puts(m, "\n");
seq_putc(m, '\n');
}
seq_puts(m, "}\n");
}

View File

@ -138,8 +138,8 @@ static struct llist_node notrace *__llist_del_first(struct llist_head *head)
static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags)
{
if (c->percpu_size) {
void **obj = kmalloc_node(c->percpu_size, flags, node);
void *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
void __percpu **obj = kmalloc_node(c->percpu_size, flags, node);
void __percpu *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
if (!obj || !pptr) {
free_percpu(pptr);
@ -253,7 +253,7 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node, bool atomic)
static void free_one(void *obj, bool percpu)
{
if (percpu) {
free_percpu(((void **)obj)[1]);
free_percpu(((void __percpu **)obj)[1]);
kfree(obj);
return;
}
@ -509,8 +509,8 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
*/
int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
{
struct bpf_mem_caches *cc, __percpu *pcc;
struct bpf_mem_cache *c, __percpu *pc;
struct bpf_mem_caches *cc; struct bpf_mem_caches __percpu *pcc;
struct bpf_mem_cache *c; struct bpf_mem_cache __percpu *pc;
struct obj_cgroup *objcg = NULL;
int cpu, i, unit_size, percpu_size = 0;
@ -591,7 +591,7 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg
int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size)
{
struct bpf_mem_caches *cc, __percpu *pcc;
struct bpf_mem_caches *cc; struct bpf_mem_caches __percpu *pcc;
int cpu, i, unit_size, percpu_size;
struct obj_cgroup *objcg;
struct bpf_mem_cache *c;

2
kernel/bpf/relo_core.c Normal file
View File

@ -0,0 +1,2 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
#include "../../tools/lib/bpf/relo_core.c"

View File

@ -308,7 +308,7 @@ int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
spin_unlock_bh(&reuseport_lock);
put_file:
fput(socket->file);
sockfd_put(socket);
return err;
}

View File

@ -124,8 +124,24 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
return ERR_PTR(err);
}
static int fetch_build_id(struct vm_area_struct *vma, unsigned char *build_id, bool may_fault)
{
return may_fault ? build_id_parse(vma, build_id, NULL)
: build_id_parse_nofault(vma, build_id, NULL);
}
/*
* Expects all id_offs[i].ip values to be set to correct initial IPs.
* They will be subsequently:
* - either adjusted in place to a file offset, if build ID fetching
* succeeds; in this case id_offs[i].build_id is set to correct build ID,
* and id_offs[i].status is set to BPF_STACK_BUILD_ID_VALID;
* - or IP will be kept intact, if build ID fetching failed; in this case
* id_offs[i].build_id is zeroed out and id_offs[i].status is set to
* BPF_STACK_BUILD_ID_IP.
*/
static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
u64 *ips, u32 trace_nr, bool user)
u32 trace_nr, bool user, bool may_fault)
{
int i;
struct mmap_unlock_irq_work *work = NULL;
@ -142,30 +158,28 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
/* cannot access current->mm, fall back to ips */
for (i = 0; i < trace_nr; i++) {
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
id_offs[i].ip = ips[i];
memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
}
return;
}
for (i = 0; i < trace_nr; i++) {
if (range_in_vma(prev_vma, ips[i], ips[i])) {
u64 ip = READ_ONCE(id_offs[i].ip);
if (range_in_vma(prev_vma, ip, ip)) {
vma = prev_vma;
memcpy(id_offs[i].build_id, prev_build_id,
BUILD_ID_SIZE_MAX);
memcpy(id_offs[i].build_id, prev_build_id, BUILD_ID_SIZE_MAX);
goto build_id_valid;
}
vma = find_vma(current->mm, ips[i]);
if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
vma = find_vma(current->mm, ip);
if (!vma || fetch_build_id(vma, id_offs[i].build_id, may_fault)) {
/* per entry fall back to ips */
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
id_offs[i].ip = ips[i];
memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
continue;
}
build_id_valid:
id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
- vma->vm_start;
id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ip - vma->vm_start;
id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
prev_vma = vma;
prev_build_id = id_offs[i].build_id;
@ -216,7 +230,7 @@ static long __bpf_get_stackid(struct bpf_map *map,
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
u32 hash, id, trace_nr, trace_len;
u32 hash, id, trace_nr, trace_len, i;
bool user = flags & BPF_F_USER_STACK;
u64 *ips;
bool hash_matches;
@ -238,15 +252,18 @@ static long __bpf_get_stackid(struct bpf_map *map,
return id;
if (stack_map_use_build_id(map)) {
struct bpf_stack_build_id *id_offs;
/* for build_id+offset, pop a bucket before slow cmp */
new_bucket = (struct stack_map_bucket *)
pcpu_freelist_pop(&smap->freelist);
if (unlikely(!new_bucket))
return -ENOMEM;
new_bucket->nr = trace_nr;
stack_map_get_build_id_offset(
(struct bpf_stack_build_id *)new_bucket->data,
ips, trace_nr, user);
id_offs = (struct bpf_stack_build_id *)new_bucket->data;
for (i = 0; i < trace_nr; i++)
id_offs[i].ip = ips[i];
stack_map_get_build_id_offset(id_offs, trace_nr, user, false /* !may_fault */);
trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
if (hash_matches && bucket->nr == trace_nr &&
memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
@ -387,7 +404,7 @@ const struct bpf_func_proto bpf_get_stackid_proto_pe = {
static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
struct perf_callchain_entry *trace_in,
void *buf, u32 size, u64 flags)
void *buf, u32 size, u64 flags, bool may_fault)
{
u32 trace_nr, copy_len, elem_size, num_elem, max_depth;
bool user_build_id = flags & BPF_F_USER_BUILD_ID;
@ -405,8 +422,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
if (kernel && user_build_id)
goto clear;
elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id)
: sizeof(u64);
elem_size = user_build_id ? sizeof(struct bpf_stack_build_id) : sizeof(u64);
if (unlikely(size % elem_size))
goto clear;
@ -427,6 +443,9 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
if (sysctl_perf_event_max_stack < max_depth)
max_depth = sysctl_perf_event_max_stack;
if (may_fault)
rcu_read_lock(); /* need RCU for perf's callchain below */
if (trace_in)
trace = trace_in;
else if (kernel && task)
@ -434,21 +453,34 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
else
trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
crosstask, false);
if (unlikely(!trace))
goto err_fault;
if (trace->nr < skip)
if (unlikely(!trace) || trace->nr < skip) {
if (may_fault)
rcu_read_unlock();
goto err_fault;
}
trace_nr = trace->nr - skip;
trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
copy_len = trace_nr * elem_size;
ips = trace->ip + skip;
if (user && user_build_id)
stack_map_get_build_id_offset(buf, ips, trace_nr, user);
else
if (user_build_id) {
struct bpf_stack_build_id *id_offs = buf;
u32 i;
for (i = 0; i < trace_nr; i++)
id_offs[i].ip = ips[i];
} else {
memcpy(buf, ips, copy_len);
}
/* trace/ips should not be dereferenced after this point */
if (may_fault)
rcu_read_unlock();
if (user_build_id)
stack_map_get_build_id_offset(buf, trace_nr, user, may_fault);
if (size > copy_len)
memset(buf + copy_len, 0, size - copy_len);
@ -464,7 +496,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
u64, flags)
{
return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
return __bpf_get_stack(regs, NULL, NULL, buf, size, flags, false /* !may_fault */);
}
const struct bpf_func_proto bpf_get_stack_proto = {
@ -477,8 +509,24 @@ const struct bpf_func_proto bpf_get_stack_proto = {
.arg4_type = ARG_ANYTHING,
};
BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
u32, size, u64, flags)
BPF_CALL_4(bpf_get_stack_sleepable, struct pt_regs *, regs, void *, buf, u32, size,
u64, flags)
{
return __bpf_get_stack(regs, NULL, NULL, buf, size, flags, true /* may_fault */);
}
const struct bpf_func_proto bpf_get_stack_sleepable_proto = {
.func = bpf_get_stack_sleepable,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};
static long __bpf_get_task_stack(struct task_struct *task, void *buf, u32 size,
u64 flags, bool may_fault)
{
struct pt_regs *regs;
long res = -EINVAL;
@ -488,12 +536,18 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
regs = task_pt_regs(task);
if (regs)
res = __bpf_get_stack(regs, task, NULL, buf, size, flags);
res = __bpf_get_stack(regs, task, NULL, buf, size, flags, may_fault);
put_task_stack(task);
return res;
}
BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
u32, size, u64, flags)
{
return __bpf_get_task_stack(task, buf, size, flags, false /* !may_fault */);
}
const struct bpf_func_proto bpf_get_task_stack_proto = {
.func = bpf_get_task_stack,
.gpl_only = false,
@ -505,6 +559,23 @@ const struct bpf_func_proto bpf_get_task_stack_proto = {
.arg4_type = ARG_ANYTHING,
};
BPF_CALL_4(bpf_get_task_stack_sleepable, struct task_struct *, task, void *, buf,
u32, size, u64, flags)
{
return __bpf_get_task_stack(task, buf, size, flags, true /* !may_fault */);
}
const struct bpf_func_proto bpf_get_task_stack_sleepable_proto = {
.func = bpf_get_task_stack_sleepable,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};
BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
void *, buf, u32, size, u64, flags)
{
@ -516,7 +587,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
__u64 nr_kernel;
if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
return __bpf_get_stack(regs, NULL, NULL, buf, size, flags, false /* !may_fault */);
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
BPF_F_USER_BUILD_ID)))
@ -536,7 +607,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
__u64 nr = trace->nr;
trace->nr = nr_kernel;
err = __bpf_get_stack(regs, NULL, trace, buf, size, flags);
err = __bpf_get_stack(regs, NULL, trace, buf, size, flags, false /* !may_fault */);
/* restore nr */
trace->nr = nr;
@ -548,7 +619,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
goto clear;
flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip;
err = __bpf_get_stack(regs, NULL, trace, buf, size, flags);
err = __bpf_get_stack(regs, NULL, trace, buf, size, flags, false /* !may_fault */);
}
return err;

View File

@ -550,7 +550,8 @@ void btf_record_free(struct btf_record *rec)
case BPF_KPTR_PERCPU:
if (rec->fields[i].kptr.module)
module_put(rec->fields[i].kptr.module);
btf_put(rec->fields[i].kptr.btf);
if (btf_is_kernel(rec->fields[i].kptr.btf))
btf_put(rec->fields[i].kptr.btf);
break;
case BPF_LIST_HEAD:
case BPF_LIST_NODE:
@ -596,7 +597,8 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
btf_get(fields[i].kptr.btf);
if (btf_is_kernel(fields[i].kptr.btf))
btf_get(fields[i].kptr.btf);
if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) {
ret = -ENXIO;
goto free;
@ -733,15 +735,11 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
}
}
/* called from workqueue */
static void bpf_map_free_deferred(struct work_struct *work)
static void bpf_map_free(struct bpf_map *map)
{
struct bpf_map *map = container_of(work, struct bpf_map, work);
struct btf_record *rec = map->record;
struct btf *btf = map->btf;
security_bpf_map_free(map);
bpf_map_release_memcg(map);
/* implementation dependent freeing */
map->ops->map_free(map);
/* Delay freeing of btf_record for maps, as map_free
@ -760,6 +758,16 @@ static void bpf_map_free_deferred(struct work_struct *work)
btf_put(btf);
}
/* called from workqueue */
static void bpf_map_free_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_map, work);
security_bpf_map_free(map);
bpf_map_release_memcg(map);
bpf_map_free(map);
}
static void bpf_map_put_uref(struct bpf_map *map)
{
if (atomic64_dec_and_test(&map->usercnt)) {
@ -1411,8 +1419,7 @@ static int map_create(union bpf_attr *attr)
free_map_sec:
security_bpf_map_free(map);
free_map:
btf_put(map->btf);
map->ops->map_free(map);
bpf_map_free(map);
put_token:
bpf_token_put(token);
return err;
@ -5668,7 +5675,7 @@ static int token_create(union bpf_attr *attr)
return bpf_token_create(attr);
}
static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
{
union bpf_attr attr;
int err;
@ -5932,6 +5939,7 @@ static const struct bpf_func_proto bpf_sys_close_proto = {
BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res)
{
*res = 0;
if (flags)
return -EINVAL;
@ -5952,7 +5960,8 @@ static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = {
.arg1_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_LONG,
.arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
.arg4_size = sizeof(u64),
};
static const struct bpf_func_proto *

File diff suppressed because it is too large Load Diff

View File

@ -8964,7 +8964,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
if (atomic_read(&nr_build_id_events))
build_id_parse(vma, mmap_event->build_id, &mmap_event->build_id_size);
build_id_parse_nofault(vma, mmap_event->build_id, &mmap_event->build_id_size);
perf_iterate_sb(perf_event_mmap_output,
mmap_event,

View File

@ -24,7 +24,6 @@
#include <linux/key.h>
#include <linux/verification.h>
#include <linux/namei.h>
#include <linux/fileattr.h>
#include <net/bpf_sk_storage.h>
@ -798,29 +797,6 @@ const struct bpf_func_proto bpf_task_pt_regs_proto = {
.ret_btf_id = &bpf_task_pt_regs_ids[0],
};
BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct cgroup *cgrp;
if (unlikely(idx >= array->map.max_entries))
return -E2BIG;
cgrp = READ_ONCE(array->ptrs[idx]);
if (unlikely(!cgrp))
return -EAGAIN;
return task_under_cgroup_hierarchy(current, cgrp);
}
static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
.func = bpf_current_task_under_cgroup,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_ANYTHING,
};
struct send_signal_irq_work {
struct irq_work irq_work;
struct task_struct *task;
@ -1226,7 +1202,8 @@ static const struct bpf_func_proto bpf_get_func_arg_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_LONG,
.arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
.arg3_size = sizeof(u64),
};
BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value)
@ -1242,7 +1219,8 @@ static const struct bpf_func_proto bpf_get_func_ret_proto = {
.func = get_func_ret,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_LONG,
.arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
.arg2_size = sizeof(u64),
};
BPF_CALL_1(get_func_arg_cnt, void *, ctx)
@ -1439,73 +1417,6 @@ static int __init bpf_key_sig_kfuncs_init(void)
late_initcall(bpf_key_sig_kfuncs_init);
#endif /* CONFIG_KEYS */
/* filesystem kfuncs */
__bpf_kfunc_start_defs();
/**
* bpf_get_file_xattr - get xattr of a file
* @file: file to get xattr from
* @name__str: name of the xattr
* @value_p: output buffer of the xattr value
*
* Get xattr *name__str* of *file* and store the output in *value_ptr*.
*
* For security reasons, only *name__str* with prefix "user." is allowed.
*
* Return: 0 on success, a negative value on error.
*/
__bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str,
struct bpf_dynptr *value_p)
{
struct bpf_dynptr_kern *value_ptr = (struct bpf_dynptr_kern *)value_p;
struct dentry *dentry;
u32 value_len;
void *value;
int ret;
if (strncmp(name__str, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
return -EPERM;
value_len = __bpf_dynptr_size(value_ptr);
value = __bpf_dynptr_data_rw(value_ptr, value_len);
if (!value)
return -EINVAL;
dentry = file_dentry(file);
ret = inode_permission(&nop_mnt_idmap, dentry->d_inode, MAY_READ);
if (ret)
return ret;
return __vfs_getxattr(dentry, dentry->d_inode, name__str, value, value_len);
}
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(fs_kfunc_set_ids)
BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
BTF_KFUNCS_END(fs_kfunc_set_ids)
static int bpf_get_file_xattr_filter(const struct bpf_prog *prog, u32 kfunc_id)
{
if (!btf_id_set8_contains(&fs_kfunc_set_ids, kfunc_id))
return 0;
/* Only allow to attach from LSM hooks, to avoid recursion */
return prog->type != BPF_PROG_TYPE_LSM ? -EACCES : 0;
}
static const struct btf_kfunc_id_set bpf_fs_kfunc_set = {
.owner = THIS_MODULE,
.set = &fs_kfunc_set_ids,
.filter = bpf_get_file_xattr_filter,
};
static int __init bpf_fs_kfuncs_init(void)
{
return register_btf_kfunc_id_set(BPF_PROG_TYPE_LSM, &bpf_fs_kfunc_set);
}
late_initcall(bpf_fs_kfuncs_init);
static const struct bpf_func_proto *
bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@ -1548,8 +1459,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_numa_node_id_proto;
case BPF_FUNC_perf_event_read:
return &bpf_perf_event_read_proto;
case BPF_FUNC_current_task_under_cgroup:
return &bpf_current_task_under_cgroup_proto;
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_probe_write_user:
@ -1578,6 +1487,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_cgrp_storage_get_proto;
case BPF_FUNC_cgrp_storage_delete:
return &bpf_cgrp_storage_delete_proto;
case BPF_FUNC_current_task_under_cgroup:
return &bpf_current_task_under_cgroup_proto;
#endif
case BPF_FUNC_send_signal:
return &bpf_send_signal_proto;
@ -1598,7 +1509,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_jiffies64:
return &bpf_jiffies64_proto;
case BPF_FUNC_get_task_stack:
return &bpf_get_task_stack_proto;
return prog->sleepable ? &bpf_get_task_stack_sleepable_proto
: &bpf_get_task_stack_proto;
case BPF_FUNC_copy_from_user:
return &bpf_copy_from_user_proto;
case BPF_FUNC_copy_from_user_task:
@ -1654,7 +1566,7 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto;
case BPF_FUNC_get_stack:
return &bpf_get_stack_proto;
return prog->sleepable ? &bpf_get_stack_sleepable_proto : &bpf_get_stack_proto;
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
case BPF_FUNC_override_return:
return &bpf_override_return_proto;
@ -3299,7 +3211,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe,
struct bpf_run_ctx *old_run_ctx;
int err = 0;
if (link->task && current->mm != link->task->mm)
if (link->task && !same_thread_group(current, link->task))
return 0;
if (sleepable)

View File

@ -564,6 +564,7 @@ static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *re
BUILD_BUG_ON(sizeof(param.ent) < sizeof(void *));
/* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. &param) */
perf_fetch_caller_regs(regs);
*(struct pt_regs **)&param = regs;
param.syscall_nr = rec->nr;
for (i = 0; i < sys_data->nb_args; i++)
@ -575,6 +576,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
{
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
struct pt_regs *fake_regs;
struct hlist_head *head;
unsigned long args[6];
bool valid_prog_array;
@ -602,7 +604,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
size = ALIGN(size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
rec = perf_trace_buf_alloc(size, NULL, &rctx);
rec = perf_trace_buf_alloc(size, &fake_regs, &rctx);
if (!rec)
return;
@ -611,7 +613,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
if ((valid_prog_array &&
!perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) ||
!perf_call_bpf_enter(sys_data->enter_event, fake_regs, sys_data, rec)) ||
hlist_empty(head)) {
perf_swevent_put_recursion_context(rctx);
return;
@ -666,6 +668,7 @@ static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *reg
} __aligned(8) param;
/* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. &param) */
perf_fetch_caller_regs(regs);
*(struct pt_regs **)&param = regs;
param.syscall_nr = rec->nr;
param.ret = rec->ret;
@ -676,6 +679,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
{
struct syscall_metadata *sys_data;
struct syscall_trace_exit *rec;
struct pt_regs *fake_regs;
struct hlist_head *head;
bool valid_prog_array;
int syscall_nr;
@ -701,7 +705,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
rec = perf_trace_buf_alloc(size, NULL, &rctx);
rec = perf_trace_buf_alloc(size, &fake_regs, &rctx);
if (!rec)
return;
@ -709,7 +713,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
rec->ret = syscall_get_return_value(current, regs);
if ((valid_prog_array &&
!perf_call_bpf_exit(sys_data->exit_event, regs, rec)) ||
!perf_call_bpf_exit(sys_data->exit_event, fake_regs, rec)) ||
hlist_empty(head)) {
perf_swevent_put_recursion_context(rctx);
return;

View File

@ -379,13 +379,15 @@ config DEBUG_INFO_BTF
depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED
depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST
depends on BPF_SYSCALL
depends on !DEBUG_INFO_DWARF5 || PAHOLE_VERSION >= 121
depends on PAHOLE_VERSION >= 116
depends on DEBUG_INFO_DWARF4 || PAHOLE_VERSION >= 121
# pahole uses elfutils, which does not have support for Hexagon relocations
depends on !HEXAGON
help
Generate deduplicated BTF type information from DWARF debug info.
Turning this on expects presence of pahole tool, which will convert
DWARF type info into equivalent deduplicated BTF type info.
Turning this on requires pahole v1.16 or later (v1.21 or later to
support DWARF 5), which will convert DWARF type info into equivalent
deduplicated BTF type info.
config PAHOLE_HAS_SPLIT_BTF
def_bool PAHOLE_VERSION >= 119

View File

@ -8,154 +8,302 @@
#define BUILD_ID 3
#define MAX_PHDR_CNT 256
struct freader {
void *buf;
u32 buf_sz;
int err;
union {
struct {
struct file *file;
struct folio *folio;
void *addr;
loff_t folio_off;
bool may_fault;
};
struct {
const char *data;
u64 data_sz;
};
};
};
static void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz,
struct file *file, bool may_fault)
{
memset(r, 0, sizeof(*r));
r->buf = buf;
r->buf_sz = buf_sz;
r->file = file;
r->may_fault = may_fault;
}
static void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz)
{
memset(r, 0, sizeof(*r));
r->data = data;
r->data_sz = data_sz;
}
static void freader_put_folio(struct freader *r)
{
if (!r->folio)
return;
kunmap_local(r->addr);
folio_put(r->folio);
r->folio = NULL;
}
static int freader_get_folio(struct freader *r, loff_t file_off)
{
/* check if we can just reuse current folio */
if (r->folio && file_off >= r->folio_off &&
file_off < r->folio_off + folio_size(r->folio))
return 0;
freader_put_folio(r);
r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT);
/* if sleeping is allowed, wait for the page, if necessary */
if (r->may_fault && (IS_ERR(r->folio) || !folio_test_uptodate(r->folio))) {
filemap_invalidate_lock_shared(r->file->f_mapping);
r->folio = read_cache_folio(r->file->f_mapping, file_off >> PAGE_SHIFT,
NULL, r->file);
filemap_invalidate_unlock_shared(r->file->f_mapping);
}
if (IS_ERR(r->folio) || !folio_test_uptodate(r->folio)) {
if (!IS_ERR(r->folio))
folio_put(r->folio);
r->folio = NULL;
return -EFAULT;
}
r->folio_off = folio_pos(r->folio);
r->addr = kmap_local_folio(r->folio, 0);
return 0;
}
static const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz)
{
size_t folio_sz;
/* provided internal temporary buffer should be sized correctly */
if (WARN_ON(r->buf && sz > r->buf_sz)) {
r->err = -E2BIG;
return NULL;
}
if (unlikely(file_off + sz < file_off)) {
r->err = -EOVERFLOW;
return NULL;
}
/* working with memory buffer is much more straightforward */
if (!r->buf) {
if (file_off + sz > r->data_sz) {
r->err = -ERANGE;
return NULL;
}
return r->data + file_off;
}
/* fetch or reuse folio for given file offset */
r->err = freader_get_folio(r, file_off);
if (r->err)
return NULL;
/* if requested data is crossing folio boundaries, we have to copy
* everything into our local buffer to keep a simple linear memory
* access interface
*/
folio_sz = folio_size(r->folio);
if (file_off + sz > r->folio_off + folio_sz) {
int part_sz = r->folio_off + folio_sz - file_off;
/* copy the part that resides in the current folio */
memcpy(r->buf, r->addr + (file_off - r->folio_off), part_sz);
/* fetch next folio */
r->err = freader_get_folio(r, r->folio_off + folio_sz);
if (r->err)
return NULL;
/* copy the rest of requested data */
memcpy(r->buf + part_sz, r->addr, sz - part_sz);
return r->buf;
}
/* if data fits in a single folio, just return direct pointer */
return r->addr + (file_off - r->folio_off);
}
static void freader_cleanup(struct freader *r)
{
if (!r->buf)
return; /* non-file-backed mode */
freader_put_folio(r);
}
/*
* Parse build id from the note segment. This logic can be shared between
* 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
* identical.
*/
static int parse_build_id_buf(unsigned char *build_id,
__u32 *size,
const void *note_start,
Elf32_Word note_size)
static int parse_build_id(struct freader *r, unsigned char *build_id, __u32 *size,
loff_t note_off, Elf32_Word note_size)
{
Elf32_Word note_offs = 0, new_offs;
const char note_name[] = "GNU";
const size_t note_name_sz = sizeof(note_name);
u32 build_id_off, new_off, note_end, name_sz, desc_sz;
const Elf32_Nhdr *nhdr;
const char *data;
while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
if (check_add_overflow(note_off, note_size, &note_end))
return -EINVAL;
while (note_end - note_off > sizeof(Elf32_Nhdr) + note_name_sz) {
nhdr = freader_fetch(r, note_off, sizeof(Elf32_Nhdr) + note_name_sz);
if (!nhdr)
return r->err;
name_sz = READ_ONCE(nhdr->n_namesz);
desc_sz = READ_ONCE(nhdr->n_descsz);
new_off = note_off + sizeof(Elf32_Nhdr);
if (check_add_overflow(new_off, ALIGN(name_sz, 4), &new_off) ||
check_add_overflow(new_off, ALIGN(desc_sz, 4), &new_off) ||
new_off > note_end)
break;
if (nhdr->n_type == BUILD_ID &&
nhdr->n_namesz == sizeof("GNU") &&
!strcmp((char *)(nhdr + 1), "GNU") &&
nhdr->n_descsz > 0 &&
nhdr->n_descsz <= BUILD_ID_SIZE_MAX) {
memcpy(build_id,
note_start + note_offs +
ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
nhdr->n_descsz);
memset(build_id + nhdr->n_descsz, 0,
BUILD_ID_SIZE_MAX - nhdr->n_descsz);
name_sz == note_name_sz &&
memcmp(nhdr + 1, note_name, note_name_sz) == 0 &&
desc_sz > 0 && desc_sz <= BUILD_ID_SIZE_MAX) {
build_id_off = note_off + sizeof(Elf32_Nhdr) + ALIGN(note_name_sz, 4);
/* freader_fetch() will invalidate nhdr pointer */
data = freader_fetch(r, build_id_off, desc_sz);
if (!data)
return r->err;
memcpy(build_id, data, desc_sz);
memset(build_id + desc_sz, 0, BUILD_ID_SIZE_MAX - desc_sz);
if (size)
*size = nhdr->n_descsz;
*size = desc_sz;
return 0;
}
new_offs = note_offs + sizeof(Elf32_Nhdr) +
ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
if (new_offs <= note_offs) /* overflow */
break;
note_offs = new_offs;
note_off = new_off;
}
return -EINVAL;
}
static inline int parse_build_id(const void *page_addr,
unsigned char *build_id,
__u32 *size,
const void *note_start,
Elf32_Word note_size)
{
/* check for overflow */
if (note_start < page_addr || note_start + note_size < note_start)
return -EINVAL;
/* only supports note that fits in the first page */
if (note_start + note_size > page_addr + PAGE_SIZE)
return -EINVAL;
return parse_build_id_buf(build_id, size, note_start, note_size);
}
/* Parse build ID from 32-bit ELF */
static int get_build_id_32(const void *page_addr, unsigned char *build_id,
__u32 *size)
static int get_build_id_32(struct freader *r, unsigned char *build_id, __u32 *size)
{
Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
Elf32_Phdr *phdr;
int i;
const Elf32_Ehdr *ehdr;
const Elf32_Phdr *phdr;
__u32 phnum, phoff, i;
/*
* FIXME
* Neither ELF spec nor ELF loader require that program headers
* start immediately after ELF header.
*/
if (ehdr->e_phoff != sizeof(Elf32_Ehdr))
return -EINVAL;
/* only supports phdr that fits in one page */
if (ehdr->e_phnum >
(PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
ehdr = freader_fetch(r, 0, sizeof(Elf32_Ehdr));
if (!ehdr)
return r->err;
/* subsequent freader_fetch() calls invalidate pointers, so remember locally */
phnum = READ_ONCE(ehdr->e_phnum);
phoff = READ_ONCE(ehdr->e_phoff);
/* set upper bound on amount of segments (phdrs) we iterate */
if (phnum > MAX_PHDR_CNT)
phnum = MAX_PHDR_CNT;
/* check that phoff is not large enough to cause an overflow */
if (phoff + phnum * sizeof(Elf32_Phdr) < phoff)
return -EINVAL;
phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
for (i = 0; i < phnum; ++i) {
phdr = freader_fetch(r, phoff + i * sizeof(Elf32_Phdr), sizeof(Elf32_Phdr));
if (!phdr)
return r->err;
for (i = 0; i < ehdr->e_phnum; ++i) {
if (phdr[i].p_type == PT_NOTE &&
!parse_build_id(page_addr, build_id, size,
page_addr + phdr[i].p_offset,
phdr[i].p_filesz))
if (phdr->p_type == PT_NOTE &&
!parse_build_id(r, build_id, size, READ_ONCE(phdr->p_offset),
READ_ONCE(phdr->p_filesz)))
return 0;
}
return -EINVAL;
}
/* Parse build ID from 64-bit ELF */
static int get_build_id_64(const void *page_addr, unsigned char *build_id,
__u32 *size)
static int get_build_id_64(struct freader *r, unsigned char *build_id, __u32 *size)
{
Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
Elf64_Phdr *phdr;
int i;
const Elf64_Ehdr *ehdr;
const Elf64_Phdr *phdr;
__u32 phnum, i;
__u64 phoff;
/*
* FIXME
* Neither ELF spec nor ELF loader require that program headers
* start immediately after ELF header.
*/
if (ehdr->e_phoff != sizeof(Elf64_Ehdr))
return -EINVAL;
/* only supports phdr that fits in one page */
if (ehdr->e_phnum >
(PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
ehdr = freader_fetch(r, 0, sizeof(Elf64_Ehdr));
if (!ehdr)
return r->err;
/* subsequent freader_fetch() calls invalidate pointers, so remember locally */
phnum = READ_ONCE(ehdr->e_phnum);
phoff = READ_ONCE(ehdr->e_phoff);
/* set upper bound on amount of segments (phdrs) we iterate */
if (phnum > MAX_PHDR_CNT)
phnum = MAX_PHDR_CNT;
/* check that phoff is not large enough to cause an overflow */
if (phoff + phnum * sizeof(Elf64_Phdr) < phoff)
return -EINVAL;
phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
for (i = 0; i < phnum; ++i) {
phdr = freader_fetch(r, phoff + i * sizeof(Elf64_Phdr), sizeof(Elf64_Phdr));
if (!phdr)
return r->err;
for (i = 0; i < ehdr->e_phnum; ++i) {
if (phdr[i].p_type == PT_NOTE &&
!parse_build_id(page_addr, build_id, size,
page_addr + phdr[i].p_offset,
phdr[i].p_filesz))
if (phdr->p_type == PT_NOTE &&
!parse_build_id(r, build_id, size, READ_ONCE(phdr->p_offset),
READ_ONCE(phdr->p_filesz)))
return 0;
}
return -EINVAL;
}
/*
* Parse build ID of ELF file mapped to vma
* @vma: vma object
* @build_id: buffer to store build id, at least BUILD_ID_SIZE long
* @size: returns actual build id size in case of success
*
* Return: 0 on success, -EINVAL otherwise
*/
int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
__u32 *size)
/* enough for Elf64_Ehdr, Elf64_Phdr, and all the smaller requests */
#define MAX_FREADER_BUF_SZ 64
static int __build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
__u32 *size, bool may_fault)
{
Elf32_Ehdr *ehdr;
struct page *page;
void *page_addr;
const Elf32_Ehdr *ehdr;
struct freader r;
char buf[MAX_FREADER_BUF_SZ];
int ret;
/* only works for page backed storage */
if (!vma->vm_file)
return -EINVAL;
page = find_get_page(vma->vm_file->f_mapping, 0);
if (!page)
return -EFAULT; /* page not mapped */
freader_init_from_file(&r, buf, sizeof(buf), vma->vm_file, may_fault);
/* fetch first 18 bytes of ELF header for checks */
ehdr = freader_fetch(&r, 0, offsetofend(Elf32_Ehdr, e_type));
if (!ehdr) {
ret = r.err;
goto out;
}
ret = -EINVAL;
page_addr = kmap_local_page(page);
ehdr = (Elf32_Ehdr *)page_addr;
/* compare magic x7f "ELF" */
if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
@ -166,15 +314,46 @@ int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
goto out;
if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
ret = get_build_id_32(page_addr, build_id, size);
ret = get_build_id_32(&r, build_id, size);
else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
ret = get_build_id_64(page_addr, build_id, size);
ret = get_build_id_64(&r, build_id, size);
out:
kunmap_local(page_addr);
put_page(page);
freader_cleanup(&r);
return ret;
}
/*
* Parse build ID of ELF file mapped to vma
* @vma: vma object
* @build_id: buffer to store build id, at least BUILD_ID_SIZE long
* @size: returns actual build id size in case of success
*
* Assumes no page fault can be taken, so if relevant portions of ELF file are
* not already paged in, fetching of build ID fails.
*
* Return: 0 on success; negative error, otherwise
*/
int build_id_parse_nofault(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size)
{
return __build_id_parse(vma, build_id, size, false /* !may_fault */);
}
/*
* Parse build ID of ELF file mapped to VMA
* @vma: vma object
* @build_id: buffer to store build id, at least BUILD_ID_SIZE long
* @size: returns actual build id size in case of success
*
* Assumes faultable context and can cause page faults to bring in file data
* into page cache.
*
* Return: 0 on success; negative error, otherwise
*/
int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size)
{
return __build_id_parse(vma, build_id, size, true /* may_fault */);
}
/**
* build_id_parse_buf - Get build ID from a buffer
* @buf: ELF note section(s) to parse
@ -185,7 +364,15 @@ int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
*/
int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size)
{
return parse_build_id_buf(build_id, NULL, buf, buf_size);
struct freader r;
int err;
freader_init_from_mem(&r, buf, buf_size);
err = parse_build_id(&r, build_id, NULL, 0, buf_size);
freader_cleanup(&r);
return err;
}
#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_VMCORE_INFO)

View File

@ -115,7 +115,7 @@ static int check_test_run_args(struct bpf_prog *prog, struct bpf_dummy_ops_test_
offset = btf_ctx_arg_offset(bpf_dummy_ops_btf, func_proto, arg_no);
info = find_ctx_arg_info(prog->aux, offset);
if (info && (info->reg_type & PTR_MAYBE_NULL))
if (info && type_may_be_null(info->reg_type))
continue;
return -EINVAL;

View File

@ -1266,8 +1266,8 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
* so we need to keep the user BPF around until the 2nd
* pass. At this time, the user BPF is stored in fp->insns.
*/
old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
GFP_KERNEL | __GFP_NOWARN);
old_prog = kmemdup_array(fp->insns, old_len, sizeof(struct sock_filter),
GFP_KERNEL | __GFP_NOWARN);
if (!old_prog) {
err = -ENOMEM;
goto out_err;
@ -6280,20 +6280,25 @@ BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
int ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
struct net_device *dev = skb->dev;
int skb_len, dev_len;
int mtu;
int mtu = 0;
if (unlikely(flags & ~(BPF_MTU_CHK_SEGS)))
return -EINVAL;
if (unlikely(flags & ~(BPF_MTU_CHK_SEGS))) {
ret = -EINVAL;
goto out;
}
if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len)))
return -EINVAL;
if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len))) {
ret = -EINVAL;
goto out;
}
dev = __dev_via_ifindex(dev, ifindex);
if (unlikely(!dev))
return -ENODEV;
if (unlikely(!dev)) {
ret = -ENODEV;
goto out;
}
mtu = READ_ONCE(dev->mtu);
dev_len = mtu + dev->hard_header_len;
/* If set use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
@ -6311,15 +6316,12 @@ BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
*/
if (skb_is_gso(skb)) {
ret = BPF_MTU_CHK_RET_SUCCESS;
if (flags & BPF_MTU_CHK_SEGS &&
!skb_gso_validate_network_len(skb, mtu))
ret = BPF_MTU_CHK_RET_SEGS_TOOBIG;
}
out:
/* BPF verifier guarantees valid pointer */
*mtu_len = mtu;
return ret;
}
@ -6329,19 +6331,21 @@ BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp,
struct net_device *dev = xdp->rxq->dev;
int xdp_len = xdp->data_end - xdp->data;
int ret = BPF_MTU_CHK_RET_SUCCESS;
int mtu, dev_len;
int mtu = 0, dev_len;
/* XDP variant doesn't support multi-buffer segment check (yet) */
if (unlikely(flags))
return -EINVAL;
if (unlikely(flags)) {
ret = -EINVAL;
goto out;
}
dev = __dev_via_ifindex(dev, ifindex);
if (unlikely(!dev))
return -ENODEV;
if (unlikely(!dev)) {
ret = -ENODEV;
goto out;
}
mtu = READ_ONCE(dev->mtu);
/* Add L2-header as dev MTU is L3 size */
dev_len = mtu + dev->hard_header_len;
/* Use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
@ -6351,10 +6355,8 @@ BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp,
xdp_len += len_diff; /* minus result pass check */
if (xdp_len > dev_len)
ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
/* BPF verifier guarantees valid pointer */
out:
*mtu_len = mtu;
return ret;
}
@ -6364,7 +6366,8 @@ static const struct bpf_func_proto bpf_skb_check_mtu_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_INT,
.arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
.arg3_size = sizeof(u32),
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@ -6375,7 +6378,8 @@ static const struct bpf_func_proto bpf_xdp_check_mtu_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_INT,
.arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
.arg3_size = sizeof(u32),
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@ -8597,13 +8601,16 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
if (off + size > offsetofend(struct __sk_buff, cb[4]))
return false;
break;
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
if (info->is_ldsx || size != size_default)
return false;
break;
case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
if (size != size_default)
return false;
break;
@ -9047,6 +9054,14 @@ static bool xdp_is_valid_access(int off, int size,
}
}
return false;
} else {
switch (off) {
case offsetof(struct xdp_md, data_meta):
case offsetof(struct xdp_md, data):
case offsetof(struct xdp_md, data_end):
if (info->is_ldsx)
return false;
}
}
switch (off) {
@ -9372,12 +9387,12 @@ static bool flow_dissector_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, data):
if (size != size_default)
if (info->is_ldsx || size != size_default)
return false;
info->reg_type = PTR_TO_PACKET;
return true;
case bpf_ctx_range(struct __sk_buff, data_end):
if (size != size_default)
if (info->is_ldsx || size != size_default)
return false;
info->reg_type = PTR_TO_PACKET_END;
return true;

View File

@ -14,10 +14,6 @@
/* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */
static struct bpf_struct_ops bpf_tcp_congestion_ops;
static u32 unsupported_ops[] = {
offsetof(struct tcp_congestion_ops, get_info),
};
static const struct btf_type *tcp_sock_type;
static u32 tcp_sock_id, sock_id;
static const struct btf_type *tcp_congestion_ops_type;
@ -45,18 +41,6 @@ static int bpf_tcp_ca_init(struct btf *btf)
return 0;
}
static bool is_unsupported(u32 member_offset)
{
unsigned int i;
for (i = 0; i < ARRAY_SIZE(unsupported_ops); i++) {
if (member_offset == unsupported_ops[i])
return true;
}
return false;
}
static bool bpf_tcp_ca_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
@ -251,15 +235,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
return 0;
}
static int bpf_tcp_ca_check_member(const struct btf_type *t,
const struct btf_member *member,
const struct bpf_prog *prog)
{
if (is_unsupported(__btf_member_bit_offset(t, member) / 8))
return -ENOTSUPP;
return 0;
}
static int bpf_tcp_ca_reg(void *kdata, struct bpf_link *link)
{
return tcp_register_congestion_control(kdata);
@ -354,7 +329,6 @@ static struct bpf_struct_ops bpf_tcp_congestion_ops = {
.reg = bpf_tcp_ca_reg,
.unreg = bpf_tcp_ca_unreg,
.update = bpf_tcp_ca_update,
.check_member = bpf_tcp_ca_check_member,
.init_member = bpf_tcp_ca_init_member,
.init = bpf_tcp_ca_init,
.validate = bpf_tcp_ca_validate,

View File

@ -1320,14 +1320,6 @@ struct xdp_umem_reg_v1 {
__u32 headroom;
};
struct xdp_umem_reg_v2 {
__u64 addr; /* Start of packet data area */
__u64 len; /* Length of packet data area */
__u32 chunk_size;
__u32 headroom;
__u32 flags;
};
static int xsk_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@ -1371,10 +1363,19 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(struct xdp_umem_reg_v1))
return -EINVAL;
else if (optlen < sizeof(struct xdp_umem_reg_v2))
mr_size = sizeof(struct xdp_umem_reg_v1);
else if (optlen < sizeof(mr))
mr_size = sizeof(struct xdp_umem_reg_v2);
mr_size = sizeof(struct xdp_umem_reg_v1);
BUILD_BUG_ON(sizeof(struct xdp_umem_reg_v1) >= sizeof(struct xdp_umem_reg));
/* Make sure the last field of the struct doesn't have
* uninitialized padding. All padding has to be explicit
* and has to be set to zero by the userspace to make
* struct xdp_umem_reg extensible in the future.
*/
BUILD_BUG_ON(offsetof(struct xdp_umem_reg, tx_metadata_len) +
sizeof_field(struct xdp_umem_reg, tx_metadata_len) !=
sizeof(struct xdp_umem_reg));
if (copy_from_sockptr(&mr, optval, mr_size))
return -EFAULT;

View File

@ -13,7 +13,6 @@ tprogs-y += sockex1
tprogs-y += sockex2
tprogs-y += sockex3
tprogs-y += tracex1
tprogs-y += tracex2
tprogs-y += tracex3
tprogs-y += tracex4
tprogs-y += tracex5
@ -63,7 +62,6 @@ sockex1-objs := sockex1_user.o
sockex2-objs := sockex2_user.o
sockex3-objs := sockex3_user.o
tracex1-objs := tracex1_user.o $(TRACE_HELPERS)
tracex2-objs := tracex2_user.o
tracex3-objs := tracex3_user.o
tracex4-objs := tracex4_user.o
tracex5-objs := tracex5_user.o $(TRACE_HELPERS)
@ -105,7 +103,6 @@ always-y += sockex1_kern.o
always-y += sockex2_kern.o
always-y += sockex3_kern.o
always-y += tracex1.bpf.o
always-y += tracex2.bpf.o
always-y += tracex3.bpf.o
always-y += tracex4.bpf.o
always-y += tracex5.bpf.o
@ -169,6 +166,10 @@ BPF_EXTRA_CFLAGS += -I$(srctree)/arch/mips/include/asm/mach-generic
endif
endif
ifeq ($(ARCH), x86)
BPF_EXTRA_CFLAGS += -fcf-protection
endif
TPROGS_CFLAGS += -Wall -O2
TPROGS_CFLAGS += -Wmissing-prototypes
TPROGS_CFLAGS += -Wstrict-prototypes
@ -405,7 +406,7 @@ $(obj)/%.o: $(src)/%.c
-Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \
-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
-fno-asynchronous-unwind-tables -fcf-protection \
-fno-asynchronous-unwind-tables \
-I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \
-O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \
$(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \

View File

@ -1,99 +0,0 @@
/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include "vmlinux.h"
#include <linux/version.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, long);
__type(value, long);
__uint(max_entries, 1024);
} my_map SEC(".maps");
/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
* example will no longer be meaningful
*/
SEC("kprobe/kfree_skb_reason")
int bpf_prog2(struct pt_regs *ctx)
{
long loc = 0;
long init_val = 1;
long *value;
/* read ip of kfree_skb_reason caller.
* non-portable version of __builtin_return_address(0)
*/
BPF_KPROBE_READ_RET_IP(loc, ctx);
value = bpf_map_lookup_elem(&my_map, &loc);
if (value)
*value += 1;
else
bpf_map_update_elem(&my_map, &loc, &init_val, BPF_ANY);
return 0;
}
static unsigned int log2(unsigned int v)
{
unsigned int r;
unsigned int shift;
r = (v > 0xFFFF) << 4; v >>= r;
shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
shift = (v > 0xF) << 2; v >>= shift; r |= shift;
shift = (v > 0x3) << 1; v >>= shift; r |= shift;
r |= (v >> 1);
return r;
}
static unsigned int log2l(unsigned long v)
{
unsigned int hi = v >> 32;
if (hi)
return log2(hi) + 32;
else
return log2(v);
}
struct hist_key {
char comm[16];
u64 pid_tgid;
u64 uid_gid;
u64 index;
};
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
__uint(key_size, sizeof(struct hist_key));
__uint(value_size, sizeof(long));
__uint(max_entries, 1024);
} my_hist_map SEC(".maps");
SEC("ksyscall/write")
int BPF_KSYSCALL(bpf_prog3, unsigned int fd, const char *buf, size_t count)
{
long init_val = 1;
long *value;
struct hist_key key;
key.index = log2l(count);
key.pid_tgid = bpf_get_current_pid_tgid();
key.uid_gid = bpf_get_current_uid_gid();
bpf_get_current_comm(&key.comm, sizeof(key.comm));
value = bpf_map_lookup_elem(&my_hist_map, &key);
if (value)
__sync_fetch_and_add(value, 1);
else
bpf_map_update_elem(&my_hist_map, &key, &init_val, BPF_ANY);
return 0;
}
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;

View File

@ -1,187 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
#include <string.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "bpf_util.h"
#define MAX_INDEX 64
#define MAX_STARS 38
/* my_map, my_hist_map */
static int map_fd[2];
static void stars(char *str, long val, long max, int width)
{
int i;
for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++)
str[i] = '*';
if (val > max)
str[i - 1] = '+';
str[i] = '\0';
}
struct task {
char comm[16];
__u64 pid_tgid;
__u64 uid_gid;
};
struct hist_key {
struct task t;
__u32 index;
};
#define SIZE sizeof(struct task)
static void print_hist_for_pid(int fd, void *task)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
struct hist_key key = {}, next_key;
long values[nr_cpus];
char starstr[MAX_STARS];
long value;
long data[MAX_INDEX] = {};
int max_ind = -1;
long max_value = 0;
int i, ind;
while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
if (memcmp(&next_key, task, SIZE)) {
key = next_key;
continue;
}
bpf_map_lookup_elem(fd, &next_key, values);
value = 0;
for (i = 0; i < nr_cpus; i++)
value += values[i];
ind = next_key.index;
data[ind] = value;
if (value && ind > max_ind)
max_ind = ind;
if (value > max_value)
max_value = value;
key = next_key;
}
printf(" syscall write() stats\n");
printf(" byte_size : count distribution\n");
for (i = 1; i <= max_ind + 1; i++) {
stars(starstr, data[i - 1], max_value, MAX_STARS);
printf("%8ld -> %-8ld : %-8ld |%-*s|\n",
(1l << i) >> 1, (1l << i) - 1, data[i - 1],
MAX_STARS, starstr);
}
}
static void print_hist(int fd)
{
struct hist_key key = {}, next_key;
static struct task tasks[1024];
int task_cnt = 0;
int i;
while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
int found = 0;
for (i = 0; i < task_cnt; i++)
if (memcmp(&tasks[i], &next_key, SIZE) == 0)
found = 1;
if (!found)
memcpy(&tasks[task_cnt++], &next_key, SIZE);
key = next_key;
}
for (i = 0; i < task_cnt; i++) {
printf("\npid %d cmd %s uid %d\n",
(__u32) tasks[i].pid_tgid,
tasks[i].comm,
(__u32) tasks[i].uid_gid);
print_hist_for_pid(fd, &tasks[i]);
}
}
static void int_exit(int sig)
{
print_hist(map_fd[1]);
exit(0);
}
int main(int ac, char **argv)
{
long key, next_key, value;
struct bpf_link *links[2];
struct bpf_program *prog;
struct bpf_object *obj;
char filename[256];
int i, j = 0;
FILE *f;
snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
fprintf(stderr, "ERROR: opening BPF object file failed\n");
return 0;
}
/* load BPF program */
if (bpf_object__load(obj)) {
fprintf(stderr, "ERROR: loading BPF object file failed\n");
goto cleanup;
}
map_fd[0] = bpf_object__find_map_fd_by_name(obj, "my_map");
map_fd[1] = bpf_object__find_map_fd_by_name(obj, "my_hist_map");
if (map_fd[0] < 0 || map_fd[1] < 0) {
fprintf(stderr, "ERROR: finding a map in obj file failed\n");
goto cleanup;
}
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
/* start 'ping' in the background to have some kfree_skb_reason
* events */
f = popen("ping -4 -c5 localhost", "r");
(void) f;
/* start 'dd' in the background to have plenty of 'write' syscalls */
f = popen("dd if=/dev/zero of=/dev/null count=5000000", "r");
(void) f;
bpf_object__for_each_program(prog, obj) {
links[j] = bpf_program__attach(prog);
if (libbpf_get_error(links[j])) {
fprintf(stderr, "ERROR: bpf_program__attach failed\n");
links[j] = NULL;
goto cleanup;
}
j++;
}
for (i = 0; i < 5; i++) {
key = 0;
while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
bpf_map_lookup_elem(map_fd[0], &next_key, &value);
printf("location 0x%lx count %ld\n", next_key, value);
key = next_key;
}
if (key)
printf("\n");
sleep(1);
}
print_hist(map_fd[1]);
cleanup:
for (j--; j >= 0; j--)
bpf_link__destroy(links[j]);
bpf_object__close(obj);
return 0;
}

View File

@ -33,13 +33,13 @@ int bpf_prog1(struct pt_regs *ctx)
return 0;
}
SEC("kretprobe/kmem_cache_alloc_node")
SEC("kretprobe/kmem_cache_alloc_node_noprof")
int bpf_prog2(struct pt_regs *ctx)
{
long ptr = PT_REGS_RC(ctx);
long ip = 0;
/* get ip address of kmem_cache_alloc_node() caller */
/* get ip address of kmem_cache_alloc_node_noprof() caller */
BPF_KRETPROBE_READ_RET_IP(ip, ctx);
struct pair v = {

View File

@ -107,20 +107,8 @@ vmlinux_link()
# ${1} - vmlinux image
gen_btf()
{
local pahole_ver
local btf_data=${1}.btf.o
if ! [ -x "$(command -v ${PAHOLE})" ]; then
echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available"
return 1
fi
pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/')
if [ "${pahole_ver}" -lt "116" ]; then
echo >&2 "BTF: ${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.16"
return 1
fi
info BTF "${btf_data}"
LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${PAHOLE_FLAGS} ${1}
@ -284,7 +272,7 @@ strip_debug=
vmlinux_link vmlinux
# fill in BTF IDs
if is_enabled CONFIG_DEBUG_INFO_BTF && is_enabled CONFIG_BPF; then
if is_enabled CONFIG_DEBUG_INFO_BTF; then
info BTFIDS vmlinux
${RESOLVE_BTFIDS} vmlinux
fi

View File

@ -31,7 +31,6 @@ static int __init bpf_lsm_init(void)
struct lsm_blob_sizes bpf_lsm_blob_sizes __ro_after_init = {
.lbs_inode = sizeof(struct bpf_storage_blob),
.lbs_task = sizeof(struct bpf_storage_blob),
};
DEFINE_LSM(bpf) = {

View File

@ -104,7 +104,7 @@ bpftool gen skeleton *FILE*
- **example__load**.
This function creates maps, loads and verifies BPF programs, initializes
global data maps. It corresponds to libppf's **bpf_object__load**\ ()
global data maps. It corresponds to libbpf's **bpf_object__load**\ ()
API.
- **example__open_and_load** combines **example__open** and
@ -172,7 +172,7 @@ bpftool gen min_core_btf *INPUT* *OUTPUT* *OBJECT* [*OBJECT*...]
CO-RE based application, turning the application portable to different
kernel versions.
Check examples bellow for more information how to use it.
Check examples below for more information on how to use it.
bpftool gen help
Print short help message.

View File

@ -29,7 +29,7 @@ NET COMMANDS
| **bpftool** **net help**
|
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* }
| *ATTACH_TYPE* := { **xdp** | **xdpgeneric** | **xdpdrv** | **xdpoffload** }
| *ATTACH_TYPE* := { **xdp** | **xdpgeneric** | **xdpdrv** | **xdpoffload** | **tcx_ingress** | **tcx_egress** }
DESCRIPTION
===========
@ -69,6 +69,8 @@ bpftool net attach *ATTACH_TYPE* *PROG* dev *NAME* [ overwrite ]
**xdpgeneric** - Generic XDP. runs at generic XDP hook when packet already enters receive path as skb;
**xdpdrv** - Native XDP. runs earliest point in driver's receive path;
**xdpoffload** - Offload XDP. runs directly on NIC on each packet reception;
**tcx_ingress** - Ingress TCX. runs on ingress net traffic;
**tcx_egress** - Egress TCX. runs on egress net traffic;
bpftool net detach *ATTACH_TYPE* dev *NAME*
Detach bpf program attached to network interface *NAME* with type specified
@ -178,3 +180,23 @@ EXAMPLES
::
xdp:
|
| **# bpftool net attach tcx_ingress name tc_prog dev lo**
| **# bpftool net**
|
::
tc:
lo(1) tcx/ingress tc_prog prog_id 29
|
| **# bpftool net attach tcx_ingress name tc_prog dev lo**
| **# bpftool net detach tcx_ingress dev lo**
| **# bpftool net**
|
::
tc:

View File

@ -1079,7 +1079,7 @@ _bpftool()
esac
;;
net)
local ATTACH_TYPES='xdp xdpgeneric xdpdrv xdpoffload'
local ATTACH_TYPES='xdp xdpgeneric xdpdrv xdpoffload tcx_ingress tcx_egress'
case $command in
show|list)
[[ $prev != "$command" ]] && return 0

View File

@ -50,6 +50,7 @@ struct sort_datum {
int type_rank;
const char *sort_name;
const char *own_name;
__u64 disambig_hash;
};
static const char *btf_int_enc_str(__u8 encoding)
@ -561,9 +562,10 @@ static const char *btf_type_sort_name(const struct btf *btf, __u32 index, bool f
case BTF_KIND_ENUM64: {
int name_off = t->name_off;
/* Use name of the first element for anonymous enums if allowed */
if (!from_ref && !t->name_off && btf_vlen(t))
name_off = btf_enum(t)->name_off;
if (!from_ref && !name_off && btf_vlen(t))
name_off = btf_kind(t) == BTF_KIND_ENUM64 ?
btf_enum64(t)->name_off :
btf_enum(t)->name_off;
return btf__name_by_offset(btf, name_off);
}
@ -583,20 +585,88 @@ static const char *btf_type_sort_name(const struct btf *btf, __u32 index, bool f
return NULL;
}
static __u64 hasher(__u64 hash, __u64 val)
{
return hash * 31 + val;
}
static __u64 btf_name_hasher(__u64 hash, const struct btf *btf, __u32 name_off)
{
if (!name_off)
return hash;
return hasher(hash, str_hash(btf__name_by_offset(btf, name_off)));
}
static __u64 btf_type_disambig_hash(const struct btf *btf, __u32 id, bool include_members)
{
const struct btf_type *t = btf__type_by_id(btf, id);
int i;
size_t hash = 0;
hash = btf_name_hasher(hash, btf, t->name_off);
switch (btf_kind(t)) {
case BTF_KIND_ENUM:
case BTF_KIND_ENUM64:
for (i = 0; i < btf_vlen(t); i++) {
__u32 name_off = btf_is_enum(t) ?
btf_enum(t)[i].name_off :
btf_enum64(t)[i].name_off;
hash = btf_name_hasher(hash, btf, name_off);
}
break;
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
if (!include_members)
break;
for (i = 0; i < btf_vlen(t); i++) {
const struct btf_member *m = btf_members(t) + i;
hash = btf_name_hasher(hash, btf, m->name_off);
/* resolve field type's name and hash it as well */
hash = hasher(hash, btf_type_disambig_hash(btf, m->type, false));
}
break;
case BTF_KIND_TYPE_TAG:
case BTF_KIND_CONST:
case BTF_KIND_PTR:
case BTF_KIND_VOLATILE:
case BTF_KIND_RESTRICT:
case BTF_KIND_TYPEDEF:
case BTF_KIND_DECL_TAG:
hash = hasher(hash, btf_type_disambig_hash(btf, t->type, include_members));
break;
case BTF_KIND_ARRAY: {
struct btf_array *arr = btf_array(t);
hash = hasher(hash, arr->nelems);
hash = hasher(hash, btf_type_disambig_hash(btf, arr->type, include_members));
break;
}
default:
break;
}
return hash;
}
static int btf_type_compare(const void *left, const void *right)
{
const struct sort_datum *d1 = (const struct sort_datum *)left;
const struct sort_datum *d2 = (const struct sort_datum *)right;
int r;
if (d1->type_rank != d2->type_rank)
return d1->type_rank < d2->type_rank ? -1 : 1;
r = strcmp(d1->sort_name, d2->sort_name);
r = d1->type_rank - d2->type_rank;
r = r ?: strcmp(d1->sort_name, d2->sort_name);
r = r ?: strcmp(d1->own_name, d2->own_name);
if (r)
return r;
return strcmp(d1->own_name, d2->own_name);
if (d1->disambig_hash != d2->disambig_hash)
return d1->disambig_hash < d2->disambig_hash ? -1 : 1;
return d1->index - d2->index;
}
static struct sort_datum *sort_btf_c(const struct btf *btf)
@ -617,6 +687,7 @@ static struct sort_datum *sort_btf_c(const struct btf *btf)
d->type_rank = btf_type_rank(btf, i, false);
d->sort_name = btf_type_sort_name(btf, i, false);
d->own_name = btf__name_by_offset(btf, t->name_off);
d->disambig_hash = btf_type_disambig_hash(btf, i, true);
}
qsort(datums, n, sizeof(struct sort_datum), btf_type_compare);

View File

@ -196,7 +196,7 @@ static void probe_unprivileged_disabled(void)
{
long res;
/* No support for C-style ouptut */
/* No support for C-style output */
res = read_procfs("/proc/sys/kernel/unprivileged_bpf_disabled");
if (json_output) {
@ -225,7 +225,7 @@ static void probe_jit_enable(void)
{
long res;
/* No support for C-style ouptut */
/* No support for C-style output */
res = read_procfs("/proc/sys/net/core/bpf_jit_enable");
if (json_output) {
@ -255,7 +255,7 @@ static void probe_jit_harden(void)
{
long res;
/* No support for C-style ouptut */
/* No support for C-style output */
res = read_procfs("/proc/sys/net/core/bpf_jit_harden");
if (json_output) {
@ -285,7 +285,7 @@ static void probe_jit_kallsyms(void)
{
long res;
/* No support for C-style ouptut */
/* No support for C-style output */
res = read_procfs("/proc/sys/net/core/bpf_jit_kallsyms");
if (json_output) {
@ -311,7 +311,7 @@ static void probe_jit_limit(void)
{
long res;
/* No support for C-style ouptut */
/* No support for C-style output */
res = read_procfs("/proc/sys/net/core/bpf_jit_limit");
if (json_output) {

View File

@ -67,6 +67,8 @@ enum net_attach_type {
NET_ATTACH_TYPE_XDP_GENERIC,
NET_ATTACH_TYPE_XDP_DRIVER,
NET_ATTACH_TYPE_XDP_OFFLOAD,
NET_ATTACH_TYPE_TCX_INGRESS,
NET_ATTACH_TYPE_TCX_EGRESS,
};
static const char * const attach_type_strings[] = {
@ -74,6 +76,8 @@ static const char * const attach_type_strings[] = {
[NET_ATTACH_TYPE_XDP_GENERIC] = "xdpgeneric",
[NET_ATTACH_TYPE_XDP_DRIVER] = "xdpdrv",
[NET_ATTACH_TYPE_XDP_OFFLOAD] = "xdpoffload",
[NET_ATTACH_TYPE_TCX_INGRESS] = "tcx_ingress",
[NET_ATTACH_TYPE_TCX_EGRESS] = "tcx_egress",
};
static const char * const attach_loc_strings[] = {
@ -482,9 +486,9 @@ static void __show_dev_tc_bpf(const struct ip_devname_ifindex *dev,
if (prog_flags[i] || json_output) {
NET_START_ARRAY("prog_flags", "%s ");
for (j = 0; prog_flags[i] && j < 32; j++) {
if (!(prog_flags[i] & (1 << j)))
if (!(prog_flags[i] & (1U << j)))
continue;
NET_DUMP_UINT_ONLY(1 << j);
NET_DUMP_UINT_ONLY(1U << j);
}
NET_END_ARRAY("");
}
@ -493,9 +497,9 @@ static void __show_dev_tc_bpf(const struct ip_devname_ifindex *dev,
if (link_flags[i] || json_output) {
NET_START_ARRAY("link_flags", "%s ");
for (j = 0; link_flags[i] && j < 32; j++) {
if (!(link_flags[i] & (1 << j)))
if (!(link_flags[i] & (1U << j)))
continue;
NET_DUMP_UINT_ONLY(1 << j);
NET_DUMP_UINT_ONLY(1U << j);
}
NET_END_ARRAY("");
}
@ -647,6 +651,32 @@ static int do_attach_detach_xdp(int progfd, enum net_attach_type attach_type,
return bpf_xdp_attach(ifindex, progfd, flags, NULL);
}
static int get_tcx_type(enum net_attach_type attach_type)
{
switch (attach_type) {
case NET_ATTACH_TYPE_TCX_INGRESS:
return BPF_TCX_INGRESS;
case NET_ATTACH_TYPE_TCX_EGRESS:
return BPF_TCX_EGRESS;
default:
return -1;
}
}
static int do_attach_tcx(int progfd, enum net_attach_type attach_type, int ifindex)
{
int type = get_tcx_type(attach_type);
return bpf_prog_attach(progfd, ifindex, type, 0);
}
static int do_detach_tcx(int targetfd, enum net_attach_type attach_type)
{
int type = get_tcx_type(attach_type);
return bpf_prog_detach(targetfd, type);
}
static int do_attach(int argc, char **argv)
{
enum net_attach_type attach_type;
@ -684,10 +714,23 @@ static int do_attach(int argc, char **argv)
}
}
switch (attach_type) {
/* attach xdp prog */
if (is_prefix("xdp", attach_type_strings[attach_type]))
err = do_attach_detach_xdp(progfd, attach_type, ifindex,
overwrite);
case NET_ATTACH_TYPE_XDP:
case NET_ATTACH_TYPE_XDP_GENERIC:
case NET_ATTACH_TYPE_XDP_DRIVER:
case NET_ATTACH_TYPE_XDP_OFFLOAD:
err = do_attach_detach_xdp(progfd, attach_type, ifindex, overwrite);
break;
/* attach tcx prog */
case NET_ATTACH_TYPE_TCX_INGRESS:
case NET_ATTACH_TYPE_TCX_EGRESS:
err = do_attach_tcx(progfd, attach_type, ifindex);
break;
default:
break;
}
if (err) {
p_err("interface %s attach failed: %s",
attach_type_strings[attach_type], strerror(-err));
@ -721,10 +764,23 @@ static int do_detach(int argc, char **argv)
if (ifindex < 1)
return -EINVAL;
switch (attach_type) {
/* detach xdp prog */
progfd = -1;
if (is_prefix("xdp", attach_type_strings[attach_type]))
case NET_ATTACH_TYPE_XDP:
case NET_ATTACH_TYPE_XDP_GENERIC:
case NET_ATTACH_TYPE_XDP_DRIVER:
case NET_ATTACH_TYPE_XDP_OFFLOAD:
progfd = -1;
err = do_attach_detach_xdp(progfd, attach_type, ifindex, NULL);
break;
/* detach tcx prog */
case NET_ATTACH_TYPE_TCX_INGRESS:
case NET_ATTACH_TYPE_TCX_EGRESS:
err = do_detach_tcx(ifindex, attach_type);
break;
default:
break;
}
if (err < 0) {
p_err("interface %s detach failed: %s",
@ -824,6 +880,9 @@ static void show_link_netfilter(void)
nf_link_count++;
}
if (!nf_link_info)
return;
qsort(nf_link_info, nf_link_count, sizeof(*nf_link_info), netfilter_link_compar);
for (id = 0; id < nf_link_count; id++) {
@ -928,7 +987,8 @@ static int do_help(int argc, char **argv)
" %1$s %2$s help\n"
"\n"
" " HELP_SPEC_PROGRAM "\n"
" ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n"
" ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload | tcx_ingress\n"
" | tcx_egress }\n"
" " HELP_SPEC_OPTIONS " }\n"
"\n"
"Note: Only xdp, tcx, tc, netkit, flow_dissector and netfilter attachments\n"

View File

@ -349,7 +349,7 @@ void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
printf("% 4d: ", i);
printf("%4u: ", i);
print_bpf_insn(&cbs, insn + i, true);
if (opcodes) {
@ -415,7 +415,7 @@ void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end,
}
}
printf("%d: ", insn_off);
printf("%u: ", insn_off);
print_bpf_insn(&cbs, cur, true);
if (opcodes) {

View File

@ -15,6 +15,7 @@ INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../include/uapi)
CFLAGS := -g -Wall $(CLANG_CROSS_FLAGS)
CFLAGS += $(EXTRA_CFLAGS)
LDFLAGS += $(EXTRA_LDFLAGS)
LDLIBS += -lelf -lz
# Try to detect best kernel BTF source
KERNEL_REL := $(shell uname -r)
@ -51,7 +52,7 @@ clean:
libbpf_hdrs: $(BPFOBJ)
$(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ)
$(QUIET_LINK)$(CC) $(CFLAGS) $^ -lelf -lz -o $@
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@
$(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \
$(OUTPUT)/runqslower.bpf.o | libbpf_hdrs

View File

@ -7513,4 +7513,13 @@ struct bpf_iter_num {
__u64 __opaque[1];
} __attribute__((aligned(8)));
/*
* Flags to control BPF kfunc behaviour.
* - BPF_F_PAD_ZEROS: Pad destination buffer with zeros. (See the respective
* helper documentation for details.)
*/
enum bpf_kfunc_flags {
BPF_F_PAD_ZEROS = (1ULL << 0),
};
#endif /* _UAPI__LINUX_BPF_H__ */

View File

@ -100,7 +100,7 @@ struct bpf_prog_load_opts {
__u32 log_level;
__u32 log_size;
char *log_buf;
/* output: actual total log contents size (including termintaing zero).
/* output: actual total log contents size (including terminating zero).
* It could be both larger than original log_size (if log was
* truncated), or smaller (if log buffer wasn't filled completely).
* If kernel doesn't support this feature, log_size is left unchanged.
@ -129,7 +129,7 @@ struct bpf_btf_load_opts {
char *log_buf;
__u32 log_level;
__u32 log_size;
/* output: actual total log contents size (including termintaing zero).
/* output: actual total log contents size (including terminating zero).
* It could be both larger than original log_size (if log was
* truncated), or smaller (if log buffer wasn't filled completely).
* If kernel doesn't support this feature, log_size is left unchanged.

View File

@ -341,7 +341,7 @@ extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __weak __ksym;
* I.e., it looks almost like high-level for each loop in other languages,
* supports continue/break, and is verifiable by BPF verifier.
*
* For iterating integers, the difference betwen bpf_for_each(num, i, N, M)
* For iterating integers, the difference between bpf_for_each(num, i, N, M)
* and bpf_for(i, N, M) is in that bpf_for() provides additional proof to
* verifier that i is in [N, M) range, and in bpf_for_each() case i is `int
* *`, not just `int`. So for integers bpf_for() is more convenient.

View File

@ -163,7 +163,7 @@
struct pt_regs___s390 {
unsigned long orig_gpr2;
};
} __attribute__((preserve_access_index));
/* s390 provides user_pt_regs instead of struct pt_regs to userspace */
#define __PT_REGS_CAST(x) ((const user_pt_regs *)(x))
@ -179,7 +179,7 @@ struct pt_regs___s390 {
#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
#define __PT_PARM6_SYSCALL_REG gprs[7]
#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x)
#define PT_REGS_PARM1_SYSCALL(x) (((const struct pt_regs___s390 *)(x))->__PT_PARM1_SYSCALL_REG)
#define PT_REGS_PARM1_CORE_SYSCALL(x) \
BPF_CORE_READ((const struct pt_regs___s390 *)(x), __PT_PARM1_SYSCALL_REG)
@ -222,7 +222,7 @@ struct pt_regs___s390 {
struct pt_regs___arm64 {
unsigned long orig_x0;
};
} __attribute__((preserve_access_index));
/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */
#define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x))
@ -241,7 +241,7 @@ struct pt_regs___arm64 {
#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x)
#define PT_REGS_PARM1_SYSCALL(x) (((const struct pt_regs___arm64 *)(x))->__PT_PARM1_SYSCALL_REG)
#define PT_REGS_PARM1_CORE_SYSCALL(x) \
BPF_CORE_READ((const struct pt_regs___arm64 *)(x), __PT_PARM1_SYSCALL_REG)
@ -351,6 +351,10 @@ struct pt_regs___arm64 {
* https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#risc-v-calling-conventions
*/
struct pt_regs___riscv {
unsigned long orig_a0;
} __attribute__((preserve_access_index));
/* riscv provides struct user_regs_struct instead of struct pt_regs to userspace */
#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x))
#define __PT_PARM1_REG a0
@ -362,12 +366,15 @@ struct pt_regs___arm64 {
#define __PT_PARM7_REG a6
#define __PT_PARM8_REG a7
#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
#define __PT_PARM1_SYSCALL_REG orig_a0
#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
#define PT_REGS_PARM1_SYSCALL(x) (((const struct pt_regs___riscv *)(x))->__PT_PARM1_SYSCALL_REG)
#define PT_REGS_PARM1_CORE_SYSCALL(x) \
BPF_CORE_READ((const struct pt_regs___riscv *)(x), __PT_PARM1_SYSCALL_REG)
#define __PT_RET_REG ra
#define __PT_FP_REG s0
@ -473,7 +480,7 @@ struct pt_regs;
#endif
/*
* Similarly, syscall-specific conventions might differ between function call
* conventions within each architecutre. All supported architectures pass
* conventions within each architecture. All supported architectures pass
* either 6 or 7 syscall arguments in registers.
*
* See syscall(2) manpage for succinct table with information on each arch.
@ -515,7 +522,7 @@ struct pt_regs;
#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; })
#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
#elif defined(bpf_target_sparc)
#elif defined(bpf_target_sparc) || defined(bpf_target_arm64)
#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); })
#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
@ -651,7 +658,7 @@ struct pt_regs;
* BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and
* similar kinds of BPF programs, that accept input arguments as a single
* pointer to untyped u64 array, where each u64 can actually be a typed
* pointer or integer of different size. Instead of requring user to write
* pointer or integer of different size. Instead of requiring user to write
* manual casts and work with array elements by index, BPF_PROG macro
* allows user to declare a list of named and typed input arguments in the
* same syntax as for normal C function. All the casting is hidden and
@ -801,7 +808,7 @@ struct pt_regs;
* tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific
* low-level way of getting kprobe input arguments from struct pt_regs, and
* provides a familiar typed and named function arguments syntax and
* semantics of accessing kprobe input paremeters.
* semantics of accessing kprobe input parameters.
*
* Original struct pt_regs* context is preserved as 'ctx' argument. This might
* be necessary when using BPF helpers like bpf_perf_event_output().

View File

@ -996,6 +996,7 @@ static struct btf *btf_new_empty(struct btf *base_btf)
btf->base_btf = base_btf;
btf->start_id = btf__type_cnt(base_btf);
btf->start_str_off = base_btf->hdr->str_len;
btf->swapped_endian = base_btf->swapped_endian;
}
/* +1 for empty string at offset 0 */
@ -4191,7 +4192,7 @@ static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id
* and canonical graphs are not compatible structurally, whole graphs are
* incompatible. If types are structurally equivalent (i.e., all information
* except referenced type IDs is exactly the same), a mapping from `canon_id` to
* a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`).
* a `cand_id` is recoded in hypothetical mapping (`btf_dedup->hypot_map`).
* If a type references other types, then those referenced types are checked
* for equivalence recursively.
*
@ -4229,7 +4230,7 @@ static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id
* consists of portions of the graph that come from multiple compilation units.
* This is due to the fact that types within single compilation unit are always
* deduplicated and FWDs are already resolved, if referenced struct/union
* definiton is available. So, if we had unresolved FWD and found corresponding
* definition is available. So, if we had unresolved FWD and found corresponding
* STRUCT/UNION, they will be from different compilation units. This
* consequently means that when we "link" FWD to corresponding STRUCT/UNION,
* type graph will likely have at least two different BTF types that describe
@ -5394,6 +5395,9 @@ int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf,
new_base = btf__new_empty();
if (!new_base)
return libbpf_err(-ENOMEM);
btf__set_endianness(new_base, btf__endianness(src_btf));
dist.id_map = calloc(n, sizeof(*dist.id_map));
if (!dist.id_map) {
err = -ENOMEM;

View File

@ -286,7 +286,7 @@ LIBBPF_API void btf_dump__free(struct btf_dump *d);
LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
struct btf_dump_emit_type_decl_opts {
/* size of this struct, for forward/backward compatiblity */
/* size of this struct, for forward/backward compatibility */
size_t sz;
/* optional field name for type declaration, e.g.:
* - struct my_struct <FNAME>

View File

@ -304,7 +304,7 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
* definition, in which case they have to be declared inline as part of field
* type declaration; or as a top-level anonymous enum, typically used for
* declaring global constants. It's impossible to distinguish between two
* without knowning whether given enum type was referenced from other type:
* without knowing whether given enum type was referenced from other type:
* top-level anonymous enum won't be referenced by anything, while embedded
* one will.
*/

View File

@ -1,4 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (c) 2024, Oracle and/or its affiliates. */
#ifndef _GNU_SOURCE

View File

@ -28,6 +28,9 @@ int elf_open(const char *binary_path, struct elf_fd *elf_fd)
int fd, ret;
Elf *elf;
elf_fd->elf = NULL;
elf_fd->fd = -1;
if (elf_version(EV_CURRENT) == EV_NONE) {
pr_warn("elf: failed to init libelf for %s\n", binary_path);
return -LIBBPF_ERRNO__LIBELF;

View File

@ -496,8 +496,6 @@ struct bpf_program {
};
struct bpf_struct_ops {
const char *tname;
const struct btf_type *type;
struct bpf_program **progs;
__u32 *kern_func_off;
/* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
@ -988,7 +986,7 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
{
const struct btf_type *kern_type, *kern_vtype;
const struct btf_member *kern_data_member;
struct btf *btf;
struct btf *btf = NULL;
__s32 kern_vtype_id, kern_type_id;
char tname[256];
__u32 i;
@ -1083,11 +1081,14 @@ static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj)
continue;
for (j = 0; j < obj->nr_maps; ++j) {
const struct btf_type *type;
map = &obj->maps[j];
if (!bpf_map__is_struct_ops(map))
continue;
vlen = btf_vlen(map->st_ops->type);
type = btf__type_by_id(obj->btf, map->st_ops->type_id);
vlen = btf_vlen(type);
for (k = 0; k < vlen; ++k) {
slot_prog = map->st_ops->progs[k];
if (prog != slot_prog)
@ -1115,14 +1116,14 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map)
const struct btf *btf = obj->btf;
struct bpf_struct_ops *st_ops;
const struct btf *kern_btf;
struct module_btf *mod_btf;
struct module_btf *mod_btf = NULL;
void *data, *kern_data;
const char *tname;
int err;
st_ops = map->st_ops;
type = st_ops->type;
tname = st_ops->tname;
type = btf__type_by_id(btf, st_ops->type_id);
tname = btf__name_by_offset(btf, type->name_off);
err = find_struct_ops_kern_types(obj, tname, &mod_btf,
&kern_type, &kern_type_id,
&kern_vtype, &kern_vtype_id,
@ -1423,8 +1424,6 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
memcpy(st_ops->data,
data->d_buf + vsi->offset,
type->size);
st_ops->tname = tname;
st_ops->type = type;
st_ops->type_id = type_id;
pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
@ -1849,7 +1848,7 @@ static char *internal_map_name(struct bpf_object *obj, const char *real_name)
snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
sfx_len, real_name);
/* sanitise map name to characters allowed by kernel */
/* sanities map name to characters allowed by kernel */
for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
if (!isalnum(*p) && *p != '_' && *p != '.')
*p = '_';
@ -7906,16 +7905,19 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object
}
static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
const char *obj_name,
const struct bpf_object_open_opts *opts)
{
const char *obj_name, *kconfig, *btf_tmp_path, *token_path;
const char *kconfig, *btf_tmp_path, *token_path;
struct bpf_object *obj;
char tmp_name[64];
int err;
char *log_buf;
size_t log_size;
__u32 log_level;
if (obj_buf && !obj_name)
return ERR_PTR(-EINVAL);
if (elf_version(EV_CURRENT) == EV_NONE) {
pr_warn("failed to init libelf for %s\n",
path ? : "(mem buf)");
@ -7925,16 +7927,12 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf,
if (!OPTS_VALID(opts, bpf_object_open_opts))
return ERR_PTR(-EINVAL);
obj_name = OPTS_GET(opts, object_name, NULL);
obj_name = OPTS_GET(opts, object_name, NULL) ?: obj_name;
if (obj_buf) {
if (!obj_name) {
snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
(unsigned long)obj_buf,
(unsigned long)obj_buf_sz);
obj_name = tmp_name;
}
path = obj_name;
pr_debug("loading object '%s' from buffer\n", obj_name);
} else {
pr_debug("loading object from %s\n", path);
}
log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
@ -8018,9 +8016,7 @@ bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
if (!path)
return libbpf_err_ptr(-EINVAL);
pr_debug("loading %s\n", path);
return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));
return libbpf_ptr(bpf_object_open(path, NULL, 0, NULL, opts));
}
struct bpf_object *bpf_object__open(const char *path)
@ -8032,10 +8028,15 @@ struct bpf_object *
bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts)
{
char tmp_name[64];
if (!obj_buf || obj_buf_sz == 0)
return libbpf_err_ptr(-EINVAL);
return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));
/* create a (quite useless) default "name" for this memory buffer object */
snprintf(tmp_name, sizeof(tmp_name), "%lx-%zx", (unsigned long)obj_buf, obj_buf_sz);
return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, tmp_name, opts));
}
static int bpf_object_unload(struct bpf_object *obj)
@ -8445,11 +8446,13 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
static void bpf_map_prepare_vdata(const struct bpf_map *map)
{
const struct btf_type *type;
struct bpf_struct_ops *st_ops;
__u32 i;
st_ops = map->st_ops;
for (i = 0; i < btf_vlen(st_ops->type); i++) {
type = btf__type_by_id(map->obj->btf, st_ops->type_id);
for (i = 0; i < btf_vlen(type); i++) {
struct bpf_program *prog = st_ops->progs[i];
void *kern_data;
int prog_fd;
@ -9056,6 +9059,11 @@ unsigned int bpf_object__kversion(const struct bpf_object *obj)
return obj ? obj->kern_version : 0;
}
int bpf_object__token_fd(const struct bpf_object *obj)
{
return obj->token_fd ?: -1;
}
struct btf *bpf_object__btf(const struct bpf_object *obj)
{
return obj ? obj->btf : NULL;
@ -9712,6 +9720,7 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
Elf64_Shdr *shdr, Elf_Data *data)
{
const struct btf_type *type;
const struct btf_member *member;
struct bpf_struct_ops *st_ops;
struct bpf_program *prog;
@ -9771,13 +9780,14 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
}
insn_idx = sym->st_value / BPF_INSN_SZ;
member = find_member_by_offset(st_ops->type, moff * 8);
type = btf__type_by_id(btf, st_ops->type_id);
member = find_member_by_offset(type, moff * 8);
if (!member) {
pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
map->name, moff);
return -EINVAL;
}
member_idx = member - btf_members(st_ops->type);
member_idx = member - btf_members(type);
name = btf__name_by_offset(btf, member->name_off);
if (!resolve_func_ptr(btf, member->type, NULL)) {
@ -11683,7 +11693,7 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru
ret = 0;
break;
case 3:
opts.retprobe = strcmp(probe_type, "uretprobe.multi") == 0;
opts.retprobe = str_has_pfx(probe_type, "uretprobe.multi");
*link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts);
ret = libbpf_get_error(*link);
break;
@ -13758,29 +13768,13 @@ static int populate_skeleton_progs(const struct bpf_object *obj,
int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
const struct bpf_object_open_opts *opts)
{
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
.object_name = s->name,
);
struct bpf_object *obj;
int err;
/* Attempt to preserve opts->object_name, unless overriden by user
* explicitly. Overwriting object name for skeletons is discouraged,
* as it breaks global data maps, because they contain object name
* prefix as their own map name prefix. When skeleton is generated,
* bpftool is making an assumption that this name will stay the same.
*/
if (opts) {
memcpy(&skel_opts, opts, sizeof(*opts));
if (!opts->object_name)
skel_opts.object_name = s->name;
}
obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
err = libbpf_get_error(obj);
if (err) {
pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
s->name, err);
obj = bpf_object_open(NULL, s->data, s->data_sz, s->name, opts);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
pr_warn("failed to initialize skeleton BPF object '%s': %d\n", s->name, err);
return libbpf_err(err);
}

View File

@ -152,7 +152,7 @@ struct bpf_object_open_opts {
* log_buf and log_level settings.
*
* If specified, this log buffer will be passed for:
* - each BPF progral load (BPF_PROG_LOAD) attempt, unless overriden
* - each BPF progral load (BPF_PROG_LOAD) attempt, unless overridden
* with bpf_program__set_log() on per-program level, to get
* BPF verifier log output.
* - during BPF object's BTF load into kernel (BPF_BTF_LOAD) to get
@ -294,6 +294,14 @@ LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj);
LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);
LIBBPF_API int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version);
/**
* @brief **bpf_object__token_fd** is an accessor for BPF token FD associated
* with BPF object.
* @param obj Pointer to a valid BPF object
* @return BPF token FD or -1, if it wasn't set
*/
LIBBPF_API int bpf_object__token_fd(const struct bpf_object *obj);
struct btf;
LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);
LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
@ -455,7 +463,7 @@ LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
/**
* @brief **bpf_program__attach()** is a generic function for attaching
* a BPF program based on auto-detection of program type, attach type,
* and extra paremeters, where applicable.
* and extra parameters, where applicable.
*
* @param prog BPF program to attach
* @return Reference to the newly created BPF link; or NULL is returned on error,
@ -679,7 +687,7 @@ struct bpf_uprobe_opts {
/**
* @brief **bpf_program__attach_uprobe()** attaches a BPF program
* to the userspace function which is found by binary path and
* offset. You can optionally specify a particular proccess to attach
* offset. You can optionally specify a particular process to attach
* to. You can also optionally attach the program to the function
* exit instead of entry.
*
@ -1593,11 +1601,11 @@ LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_i
* memory region of the ring buffer.
* This ring buffer can be used to implement a custom events consumer.
* The ring buffer starts with the *struct perf_event_mmap_page*, which
* holds the ring buffer managment fields, when accessing the header
* holds the ring buffer management fields, when accessing the header
* structure it's important to be SMP aware.
* You can refer to *perf_event_read_simple* for a simple example.
* @param pb the perf buffer structure
* @param buf_idx the buffer index to retreive
* @param buf_idx the buffer index to retrieve
* @param buf (out) gets the base pointer of the mmap()'ed memory
* @param buf_size (out) gets the size of the mmap()'ed region
* @return 0 on success, negative error code for failure

View File

@ -423,6 +423,7 @@ LIBBPF_1.5.0 {
btf__relocate;
bpf_map__autoattach;
bpf_map__set_autoattach;
bpf_object__token_fd;
bpf_program__attach_sockmap;
ring__consume_n;
ring_buffer__consume_n;

View File

@ -76,7 +76,7 @@ enum libbpf_strict_mode {
* first BPF program or map creation operation. This is done only if
* kernel is too old to support memcg-based memory accounting for BPF
* subsystem. By default, RLIMIT_MEMLOCK limit is set to RLIM_INFINITY,
* but it can be overriden with libbpf_set_memlock_rlim() API.
* but it can be overridden with libbpf_set_memlock_rlim() API.
* Note that libbpf_set_memlock_rlim() needs to be called before
* the very first bpf_prog_load(), bpf_map_create() or bpf_object__load()
* operation.
@ -97,7 +97,7 @@ LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode);
* @brief **libbpf_get_error()** extracts the error code from the passed
* pointer
* @param ptr pointer returned from libbpf API function
* @return error code; or 0 if no error occured
* @return error code; or 0 if no error occurred
*
* Note, as of libbpf 1.0 this function is not necessary and not recommended
* to be used. Libbpf doesn't return error code embedded into the pointer

View File

@ -1413,7 +1413,7 @@ static bool glob_sym_btf_matches(const char *sym_name, bool exact,
return true;
case BTF_KIND_PTR:
/* just validate overall shape of the referenced type, so no
* contents comparison for struct/union, and allowd fwd vs
* contents comparison for struct/union, and allowed fwd vs
* struct/union
*/
exact = false;
@ -1962,7 +1962,7 @@ static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj,
/* If existing symbol is a strong resolved symbol, bail out,
* because we lost resolution battle have nothing to
* contribute. We already checked abover that there is no
* contribute. We already checked above that there is no
* strong-strong conflict. We also already tightened binding
* and visibility, so nothing else to contribute at that point.
*/

View File

@ -107,7 +107,7 @@ static inline void skel_free(const void *p)
* The loader program will perform probe_read_kernel() from maps.rodata.initial_value.
* skel_finalize_map_data() sets skel->rodata to point to actual value in a bpf map and
* does maps.rodata.initial_value = ~0ULL to signal skel_free_map_data() that kvfree
* is not nessary.
* is not necessary.
*
* For user space:
* skel_prep_map_data() mmaps anon memory into skel->rodata that can be accessed directly.

View File

@ -39,7 +39,7 @@ enum __bpf_usdt_arg_type {
struct __bpf_usdt_arg_spec {
/* u64 scalar interpreted depending on arg_type, see below */
__u64 val_off;
/* arg location case, see bpf_udst_arg() for details */
/* arg location case, see bpf_usdt_arg() for details */
enum __bpf_usdt_arg_type arg_type;
/* offset of referenced register within struct pt_regs */
short reg_off;

View File

@ -8,8 +8,8 @@ test_lru_map
test_lpm_map
test_tag
FEATURE-DUMP.libbpf
FEATURE-DUMP.selftests
fixdep
test_dev_cgroup
/test_progs
/test_progs-no_alu32
/test_progs-bpf_gcc
@ -20,9 +20,6 @@ test_sock
urandom_read
test_sockmap
test_lirc_mode2_user
get_cgroup_id_user
test_skb_cgroup_id_user
test_cgroup_storage
test_flow_dissector
flow_dissector_load
test_tcpnotify_user
@ -31,6 +28,7 @@ test_tcp_check_syncookie_user
test_sysctl
xdping
test_cpp
*.d
*.subskel.h
*.skel.h
*.lskel.h

View File

@ -0,0 +1,3 @@
# riscv64 deny list for BPF CI and local vmtest
exceptions # JIT does not support exceptions
tailcalls/tailcall_bpf2bpf* # JIT does not support mixing bpf2bpf and tailcalls

View File

@ -33,6 +33,13 @@ OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0)
LIBELF_CFLAGS := $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null)
LIBELF_LIBS := $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf)
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
endif
CFLAGS += -g $(OPT_FLAGS) -rdynamic \
-Wall -Werror -fno-omit-frame-pointer \
$(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \
@ -41,6 +48,11 @@ CFLAGS += -g $(OPT_FLAGS) -rdynamic \
LDFLAGS += $(SAN_LDFLAGS)
LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread
PCAP_CFLAGS := $(shell $(PKG_CONFIG) --cflags libpcap 2>/dev/null && echo "-DTRAFFIC_MONITOR=1")
PCAP_LIBS := $(shell $(PKG_CONFIG) --libs libpcap 2>/dev/null)
LDLIBS += $(PCAP_LIBS)
CFLAGS += $(PCAP_CFLAGS)
# The following tests perform type punning and they may break strict
# aliasing rules, which are exploited by both GCC and clang by default
# while optimizing. This can lead to broken programs.
@ -54,6 +66,10 @@ progs/test_pkt_md_access.c-CFLAGS := -fno-strict-aliasing
progs/test_sk_lookup.c-CFLAGS := -fno-strict-aliasing
progs/timer_crash.c-CFLAGS := -fno-strict-aliasing
progs/test_global_func9.c-CFLAGS := -fno-strict-aliasing
progs/verifier_nocsr.c-CFLAGS := -fno-strict-aliasing
# Some utility functions use LLVM libraries
jit_disasm_helpers.c-CFLAGS = $(LLVM_CFLAGS)
ifneq ($(LLVM),)
# Silence some warnings when compiled with clang
@ -67,9 +83,7 @@ endif
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_dev_cgroup \
test_sock test_sockmap get_cgroup_id_user \
test_cgroup_storage \
test_sock test_sockmap \
test_tcpnotify_user test_sysctl \
test_progs-no_alu32
TEST_INST_SUBDIRS := no_alu32
@ -115,7 +129,6 @@ TEST_PROGS := test_kmod.sh \
test_xdp_redirect.sh \
test_xdp_redirect_multi.sh \
test_xdp_meta.sh \
test_xdp_veth.sh \
test_tunnel.sh \
test_lwt_seg6local.sh \
test_lirc_mode2.sh \
@ -140,7 +153,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
test_xdp_vlan.sh test_bpftool.py
# Compile but not part of 'make run_tests'
TEST_GEN_PROGS_EXTENDED = test_skb_cgroup_id_user \
TEST_GEN_PROGS_EXTENDED = \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \
@ -166,6 +179,35 @@ endef
include ../lib.mk
NON_CHECK_FEAT_TARGETS := clean docs-clean
CHECK_FEAT := $(filter-out $(NON_CHECK_FEAT_TARGETS),$(or $(MAKECMDGOALS), "none"))
ifneq ($(CHECK_FEAT),)
FEATURE_USER := .selftests
FEATURE_TESTS := llvm
FEATURE_DISPLAY := $(FEATURE_TESTS)
# Makefile.feature expects OUTPUT to end with a slash
ifeq ($(shell expr $(MAKE_VERSION) \>= 4.4), 1)
$(let OUTPUT,$(OUTPUT)/,\
$(eval include ../../../build/Makefile.feature))
else
OUTPUT := $(OUTPUT)/
$(eval include ../../../build/Makefile.feature)
OUTPUT := $(patsubst %/,%,$(OUTPUT))
endif
endif
ifeq ($(feature-llvm),1)
LLVM_CFLAGS += -DHAVE_LLVM_SUPPORT
LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets
# both llvm-config and lib.mk add -D_GNU_SOURCE, which ends up as conflict
LLVM_CFLAGS += $(filter-out -D_GNU_SOURCE,$(shell $(LLVM_CONFIG) --cflags))
LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS))
LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS))
LLVM_LDLIBS += -lstdc++
LLVM_LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags)
endif
SCRATCH_DIR := $(OUTPUT)/tools
BUILD_DIR := $(SCRATCH_DIR)/build
INCLUDE_DIR := $(SCRATCH_DIR)/include
@ -293,13 +335,9 @@ JSON_WRITER := $(OUTPUT)/json_writer.o
CAP_HELPERS := $(OUTPUT)/cap_helpers.o
NETWORK_HELPERS := $(OUTPUT)/network_helpers.o
$(OUTPUT)/test_dev_cgroup: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_skb_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_sock: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS)
$(OUTPUT)/get_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_cgroup_storage: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_sysctl: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_tag: $(TESTING_HELPERS)
@ -365,10 +403,14 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
endif
# vmlinux.h is first dumped to a temprorary file and then compared to
# the previous version. This helps to avoid unnecessary re-builds of
# $(TRUNNER_BPF_OBJS)
$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
ifeq ($(VMLINUX_H),)
$(call msg,GEN,,$@)
$(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
$(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $(INCLUDE_DIR)/.vmlinux.h.tmp
$(Q)cmp -s $(INCLUDE_DIR)/.vmlinux.h.tmp $@ || mv $(INCLUDE_DIR)/.vmlinux.h.tmp $@
else
$(call msg,CP,,$@)
$(Q)cp "$(VMLINUX_H)" $@
@ -396,7 +438,8 @@ define get_sys_includes
$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') \
$(shell $(1) $(2) -dM -E - </dev/null | grep '__loongarch_grlen ' | awk '{printf("-D__BITS_PER_LONG=%d", $$3)}')
$(shell $(1) $(2) -dM -E - </dev/null | grep '__loongarch_grlen ' | awk '{printf("-D__BITS_PER_LONG=%d", $$3)}') \
$(shell $(1) $(2) -dM -E - </dev/null | grep -E 'MIPS(EL|EB)|_MIPS_SZ(PTR|LONG) |_MIPS_SIM |_ABI(O32|N32|64) ' | awk '{printf("-D%s=%s ", $$2, $$3)}')
endef
# Determine target endianness.
@ -427,23 +470,24 @@ $(OUTPUT)/cgroup_getset_retval_hooks.o: cgroup_getset_retval_hooks.h
# $1 - input .c file
# $2 - output .o file
# $3 - CFLAGS
# $4 - binary name
define CLANG_BPF_BUILD_RULE
$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
$(call msg,CLNG-BPF,$4,$2)
$(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v3 -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
define CLANG_NOALU32_BPF_BUILD_RULE
$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
$(call msg,CLNG-BPF,$4,$2)
$(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v2 -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but with cpu-v4
define CLANG_CPUV4_BPF_BUILD_RULE
$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
$(call msg,CLNG-BPF,$4,$2)
$(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v4 -o $2
endef
# Build BPF object using GCC
define GCC_BPF_BUILD_RULE
$(call msg,GCC-BPF,$(TRUNNER_BINARY),$2)
$(call msg,GCC-BPF,$4,$2)
$(Q)$(BPF_GCC) $3 -DBPF_NO_PRESERVE_ACCESS_INDEX -Wno-attributes -O2 -c $1 -o $2
endef
@ -477,7 +521,14 @@ xsk_xdp_progs.skel.h-deps := xsk_xdp_progs.bpf.o
xdp_hw_metadata.skel.h-deps := xdp_hw_metadata.bpf.o
xdp_features.skel.h-deps := xdp_features.bpf.o
LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
LINKED_BPF_OBJS := $(foreach skel,$(LINKED_SKELS),$($(skel)-deps))
LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(LINKED_BPF_OBJS))
HEADERS_FOR_BPF_OBJS := $(wildcard $(BPFDIR)/*.bpf.h) \
$(addprefix $(BPFDIR)/, bpf_core_read.h \
bpf_endian.h \
bpf_helpers.h \
bpf_tracing.h)
# Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
# $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
@ -529,13 +580,12 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.bpf.o: \
$(TRUNNER_BPF_PROGS_DIR)/%.c \
$(TRUNNER_BPF_PROGS_DIR)/*.h \
$$(INCLUDE_DIR)/vmlinux.h \
$(wildcard $(BPFDIR)/bpf_*.h) \
$(wildcard $(BPFDIR)/*.bpf.h) \
$(HEADERS_FOR_BPF_OBJS) \
| $(TRUNNER_OUTPUT) $$(BPFOBJ)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS) \
$$($$<-CFLAGS) \
$$($$<-$2-CFLAGS))
$$($$<-$2-CFLAGS),$(TRUNNER_BINARY))
$(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
@ -556,7 +606,11 @@ $(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
$(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
$(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
$(LINKED_BPF_OBJS): %: $(TRUNNER_OUTPUT)/%
# .SECONDEXPANSION here allows to correctly expand %-deps variables as prerequisites
.SECONDEXPANSION:
$(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_OUTPUT)/%: $$$$(%-deps) $(BPFTOOL) | $(TRUNNER_OUTPUT)
$$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.bpf.o))
$(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked1.o) $$(addprefix $(TRUNNER_OUTPUT)/,$$($$(@F)-deps))
$(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked1.o)
@ -566,6 +620,14 @@ $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
$(Q)$$(BPFTOOL) gen skeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$@
$(Q)$$(BPFTOOL) gen subskeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$(@:.skel.h=.subskel.h)
$(Q)rm -f $$(@:.skel.h=.linked1.o) $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o)
# When the compiler generates a %.d file, only skel basenames (not
# full paths) are specified as prerequisites for corresponding %.o
# file. This target makes %.skel.h basename dependent on full paths,
# linking generated %.d dependency with actual %.skel.h files.
$(notdir %.skel.h): $(TRUNNER_OUTPUT)/%.skel.h
@true
endif
# ensure we set up tests.h header generation rule just once
@ -583,14 +645,25 @@ endif
# Note: we cd into output directory to ensure embedded BPF object is found
$(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
$(TRUNNER_TESTS_DIR)/%.c \
$(TRUNNER_EXTRA_HDRS) \
$(TRUNNER_BPF_OBJS) \
$(TRUNNER_BPF_SKELS) \
$(TRUNNER_BPF_LSKELS) \
$(TRUNNER_BPF_SKELS_LINKED) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
| $(TRUNNER_OUTPUT)/%.test.d
$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
$(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
$(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -MMD -MT $$@ -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
$(TRUNNER_TEST_OBJS:.o=.d): $(TRUNNER_OUTPUT)/%.test.d: \
$(TRUNNER_TESTS_DIR)/%.c \
$(TRUNNER_EXTRA_HDRS) \
$(TRUNNER_BPF_SKELS) \
$(TRUNNER_BPF_LSKELS) \
$(TRUNNER_BPF_SKELS_LINKED) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
ifeq ($(filter clean docs-clean,$(MAKECMDGOALS)),)
include $(wildcard $(TRUNNER_TEST_OBJS:.o=.d))
endif
# add per extra obj CFGLAGS definitions
$(foreach N,$(patsubst $(TRUNNER_OUTPUT)/%.o,%,$(TRUNNER_EXTRA_OBJS)), \
$(eval $(TRUNNER_OUTPUT)/$(N).o: CFLAGS += $($(N).c-CFLAGS)))
$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
%.c \
@ -608,13 +681,19 @@ ifneq ($2:$(OUTPUT),:$(shell pwd))
$(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/
endif
$(OUTPUT)/$(TRUNNER_BINARY): LDLIBS += $$(LLVM_LDLIBS)
$(OUTPUT)/$(TRUNNER_BINARY): LDFLAGS += $$(LLVM_LDFLAGS)
# some X.test.o files have runtime dependencies on Y.bpf.o files
$(OUTPUT)/$(TRUNNER_BINARY): | $(TRUNNER_BPF_OBJS)
$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
$(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \
$(RESOLVE_BTFIDS) \
$(TRUNNER_BPFTOOL) \
| $(TRUNNER_BINARY)-extras
$$(call msg,BINARY,,$$@)
$(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
$(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LDFLAGS) -o $$@
$(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@
$(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/$(USE_BOOTSTRAP)bpftool \
$(OUTPUT)/$(if $2,$2/)bpftool
@ -633,9 +712,11 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \
cap_helpers.c \
unpriv_helpers.c \
netlink_helpers.c \
jit_disasm_helpers.c \
test_loader.c \
xsk.c \
disasm.c \
disasm_helpers.c \
json_writer.c \
flow_dissector_load.h \
ip_check_defrag_frags.h
@ -762,17 +843,21 @@ $(OUTPUT)/veristat: $(OUTPUT)/veristat.o
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
$(OUTPUT)/uprobe_multi: uprobe_multi.c
# Linking uprobe_multi can fail due to relocation overflows on mips.
$(OUTPUT)/uprobe_multi: CFLAGS += $(if $(filter mips, $(ARCH)),-mxgot)
$(OUTPUT)/uprobe_multi: uprobe_multi.c uprobe_multi.ld
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) -O0 $(LDFLAGS) $^ $(LDLIBS) -o $@
$(Q)$(CC) $(CFLAGS) -Wl,-T,uprobe_multi.ld -O0 $(LDFLAGS) \
$(filter-out %.ld,$^) $(LDLIBS) -o $@
EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
feature bpftool \
$(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h \
feature bpftool \
$(addprefix $(OUTPUT)/,*.o *.d *.skel.h *.lskel.h *.subskel.h \
no_alu32 cpuv4 bpf_gcc bpf_testmod.ko \
bpf_test_no_cfi.ko \
liburandom_read.so)
liburandom_read.so) \
$(OUTPUT)/FEATURE-DUMP.selftests
.PHONY: docs docs-clean

View File

@ -85,7 +85,37 @@ In case of linker errors when running selftests, try using static linking:
If you want to change pahole and llvm, you can change `PATH` environment
variable in the beginning of script.
.. note:: The script currently only supports x86_64 and s390x architectures.
Running vmtest on RV64
======================
To speed up testing and avoid various dependency issues, it is recommended to
run vmtest in a Docker container. Before running vmtest, we need to prepare
Docker container and local rootfs image. The overall steps are as follows:
1. Create Docker container as shown in link [0].
2. Use mkrootfs_debian.sh script [1] to build local rootfs image:
.. code-block:: console
$ sudo ./mkrootfs_debian.sh --arch riscv64 --distro noble
3. Start Docker container [0] and run vmtest in the container:
.. code-block:: console
$ PLATFORM=riscv64 CROSS_COMPILE=riscv64-linux-gnu- \
tools/testing/selftests/bpf/vmtest.sh \
-l <path of local rootfs image> -- \
./test_progs -d \
\"$(cat tools/testing/selftests/bpf/DENYLIST.riscv64 \
| cut -d'#' -f1 \
| sed -e 's/^[[:space:]]*//' \
-e 's/[[:space:]]*$//' \
| tr -s '\n' ',' \
)\"
Link: https://github.com/pulehui/riscv-bpf-vmtest.git [0]
Link: https://github.com/libbpf/ci/blob/main/rootfs/mkrootfs_debian.sh [1]
Additional information about selftest failures are
documented here.

View File

@ -10,6 +10,7 @@
#include <sys/sysinfo.h>
#include <signal.h>
#include "bench.h"
#include "bpf_util.h"
#include "testing_helpers.h"
struct env env = {
@ -519,6 +520,12 @@ extern const struct bench bench_trig_uprobe_push;
extern const struct bench bench_trig_uretprobe_push;
extern const struct bench bench_trig_uprobe_ret;
extern const struct bench bench_trig_uretprobe_ret;
extern const struct bench bench_trig_uprobe_multi_nop;
extern const struct bench bench_trig_uretprobe_multi_nop;
extern const struct bench bench_trig_uprobe_multi_push;
extern const struct bench bench_trig_uretprobe_multi_push;
extern const struct bench bench_trig_uprobe_multi_ret;
extern const struct bench bench_trig_uretprobe_multi_ret;
extern const struct bench bench_rb_libbpf;
extern const struct bench bench_rb_custom;
@ -573,6 +580,12 @@ static const struct bench *benchs[] = {
&bench_trig_uretprobe_push,
&bench_trig_uprobe_ret,
&bench_trig_uretprobe_ret,
&bench_trig_uprobe_multi_nop,
&bench_trig_uretprobe_multi_nop,
&bench_trig_uprobe_multi_push,
&bench_trig_uretprobe_multi_push,
&bench_trig_uprobe_multi_ret,
&bench_trig_uretprobe_multi_ret,
/* ringbuf/perfbuf benchmarks */
&bench_rb_libbpf,
&bench_rb_custom,

View File

@ -10,6 +10,7 @@
#include <math.h>
#include <time.h>
#include <sys/syscall.h>
#include <limits.h>
struct cpu_set {
bool *cpus;

View File

@ -276,7 +276,7 @@ static void trigger_rawtp_setup(void)
* instructions. So use two different targets, one of which starts with nop
* and another doesn't.
*
* GCC doesn't generate stack setup preample for these functions due to them
* GCC doesn't generate stack setup preamble for these functions due to them
* having no input arguments and doing nothing in the body.
*/
__nocf_check __weak void uprobe_target_nop(void)
@ -332,7 +332,7 @@ static void *uprobe_producer_ret(void *input)
return NULL;
}
static void usetup(bool use_retprobe, void *target_addr)
static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
{
size_t uprobe_offset;
struct bpf_link *link;
@ -346,7 +346,10 @@ static void usetup(bool use_retprobe, void *target_addr)
exit(1);
}
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);
if (use_multi)
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe_multi, true);
else
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);
err = trigger_bench__load(ctx.skel);
if (err) {
@ -355,16 +358,28 @@ static void usetup(bool use_retprobe, void *target_addr)
}
uprobe_offset = get_uprobe_offset(target_addr);
link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
use_retprobe,
-1 /* all PIDs */,
"/proc/self/exe",
uprobe_offset);
if (use_multi) {
LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
.retprobe = use_retprobe,
.cnt = 1,
.offsets = &uprobe_offset,
);
link = bpf_program__attach_uprobe_multi(
ctx.skel->progs.bench_trigger_uprobe_multi,
-1 /* all PIDs */, "/proc/self/exe", NULL, &opts);
ctx.skel->links.bench_trigger_uprobe_multi = link;
} else {
link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
use_retprobe,
-1 /* all PIDs */,
"/proc/self/exe",
uprobe_offset);
ctx.skel->links.bench_trigger_uprobe = link;
}
if (!link) {
fprintf(stderr, "failed to attach uprobe!\n");
fprintf(stderr, "failed to attach %s!\n", use_multi ? "multi-uprobe" : "uprobe");
exit(1);
}
ctx.skel->links.bench_trigger_uprobe = link;
}
static void usermode_count_setup(void)
@ -374,32 +389,62 @@ static void usermode_count_setup(void)
static void uprobe_nop_setup(void)
{
usetup(false, &uprobe_target_nop);
usetup(false, false /* !use_multi */, &uprobe_target_nop);
}
static void uretprobe_nop_setup(void)
{
usetup(true, &uprobe_target_nop);
usetup(true, false /* !use_multi */, &uprobe_target_nop);
}
static void uprobe_push_setup(void)
{
usetup(false, &uprobe_target_push);
usetup(false, false /* !use_multi */, &uprobe_target_push);
}
static void uretprobe_push_setup(void)
{
usetup(true, &uprobe_target_push);
usetup(true, false /* !use_multi */, &uprobe_target_push);
}
static void uprobe_ret_setup(void)
{
usetup(false, &uprobe_target_ret);
usetup(false, false /* !use_multi */, &uprobe_target_ret);
}
static void uretprobe_ret_setup(void)
{
usetup(true, &uprobe_target_ret);
usetup(true, false /* !use_multi */, &uprobe_target_ret);
}
static void uprobe_multi_nop_setup(void)
{
usetup(false, true /* use_multi */, &uprobe_target_nop);
}
static void uretprobe_multi_nop_setup(void)
{
usetup(true, true /* use_multi */, &uprobe_target_nop);
}
static void uprobe_multi_push_setup(void)
{
usetup(false, true /* use_multi */, &uprobe_target_push);
}
static void uretprobe_multi_push_setup(void)
{
usetup(true, true /* use_multi */, &uprobe_target_push);
}
static void uprobe_multi_ret_setup(void)
{
usetup(false, true /* use_multi */, &uprobe_target_ret);
}
static void uretprobe_multi_ret_setup(void)
{
usetup(true, true /* use_multi */, &uprobe_target_ret);
}
const struct bench bench_trig_syscall_count = {
@ -454,3 +499,9 @@ BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret");
BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop");
BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push");
BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret");
BENCH_TRIG_USERMODE(uprobe_multi_nop, nop, "uprobe-multi-nop");
BENCH_TRIG_USERMODE(uprobe_multi_push, push, "uprobe-multi-push");
BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret");
BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");
BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");
BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");

View File

@ -195,6 +195,32 @@ extern void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) __ksym;
*/
extern void bpf_throw(u64 cookie) __ksym;
/* Description
* Acquire a reference on the exe_file member field belonging to the
* mm_struct that is nested within the supplied task_struct. The supplied
* task_struct must be trusted/referenced.
* Returns
* A referenced file pointer pointing to the exe_file member field of the
* mm_struct nested in the supplied task_struct, or NULL.
*/
extern struct file *bpf_get_task_exe_file(struct task_struct *task) __ksym;
/* Description
* Release a reference on the supplied file. The supplied file must be
* acquired.
*/
extern void bpf_put_file(struct file *file) __ksym;
/* Description
* Resolve a pathname for the supplied path and store it in the supplied
* buffer. The supplied path must be trusted/referenced.
* Returns
* A positive integer corresponding to the length of the resolved pathname,
* including the NULL termination character, stored in the supplied
* buffer. On error, a negative integer is returned.
*/
extern int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz) __ksym;
/* This macro must be used to mark the exception callback corresponding to the
* main program. For example:
*

View File

@ -45,7 +45,7 @@ extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clo
/* Description
* Modify the address of a AF_UNIX sockaddr.
* Returns__bpf_kfunc
* Returns
* -EINVAL if the address size is too big or, 0 if the sockaddr was successfully modified.
*/
extern int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
@ -78,4 +78,13 @@ extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
extern bool bpf_session_is_return(void) __ksym __weak;
extern __u64 *bpf_session_cookie(void) __ksym __weak;
struct dentry;
/* Description
* Returns xattr of a dentry
* Returns
* Error code
*/
extern int bpf_get_dentry_xattr(struct dentry *dentry, const char *name,
struct bpf_dynptr *value_ptr) __ksym __weak;
#endif

View File

@ -17,6 +17,7 @@
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/un.h>
#include <linux/filter.h>
#include <net/sock.h>
#include <linux/namei.h>
#include "bpf_testmod.h"
@ -141,13 +142,12 @@ bpf_testmod_test_mod_kfunc(int i)
__bpf_kfunc int bpf_iter_testmod_seq_new(struct bpf_iter_testmod_seq *it, s64 value, int cnt)
{
if (cnt < 0) {
it->cnt = 0;
it->cnt = cnt;
if (cnt < 0)
return -EINVAL;
}
it->value = value;
it->cnt = cnt;
return 0;
}
@ -162,6 +162,14 @@ __bpf_kfunc s64 *bpf_iter_testmod_seq_next(struct bpf_iter_testmod_seq* it)
return &it->value;
}
__bpf_kfunc s64 bpf_iter_testmod_seq_value(int val, struct bpf_iter_testmod_seq* it__iter)
{
if (it__iter->cnt < 0)
return 0;
return val + it__iter->value;
}
__bpf_kfunc void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it)
{
it->cnt = 0;
@ -176,6 +184,36 @@ __bpf_kfunc void bpf_kfunc_dynptr_test(struct bpf_dynptr *ptr,
{
}
__bpf_kfunc struct sk_buff *bpf_kfunc_nested_acquire_nonzero_offset_test(struct sk_buff_head *ptr)
{
return NULL;
}
__bpf_kfunc struct sk_buff *bpf_kfunc_nested_acquire_zero_offset_test(struct sock_common *ptr)
{
return NULL;
}
__bpf_kfunc void bpf_kfunc_nested_release_test(struct sk_buff *ptr)
{
}
__bpf_kfunc void bpf_kfunc_trusted_vma_test(struct vm_area_struct *ptr)
{
}
__bpf_kfunc void bpf_kfunc_trusted_task_test(struct task_struct *ptr)
{
}
__bpf_kfunc void bpf_kfunc_trusted_num_test(int *ptr)
{
}
__bpf_kfunc void bpf_kfunc_rcu_task_test(struct task_struct *ptr)
{
}
__bpf_kfunc struct bpf_testmod_ctx *
bpf_testmod_ctx_create(int *err)
{
@ -534,8 +572,16 @@ BTF_KFUNCS_START(bpf_testmod_common_kfunc_ids)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_value)
BTF_ID_FLAGS(func, bpf_kfunc_common_test)
BTF_ID_FLAGS(func, bpf_kfunc_dynptr_test)
BTF_ID_FLAGS(func, bpf_kfunc_nested_acquire_nonzero_offset_test, KF_ACQUIRE)
BTF_ID_FLAGS(func, bpf_kfunc_nested_acquire_zero_offset_test, KF_ACQUIRE)
BTF_ID_FLAGS(func, bpf_kfunc_nested_release_test, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_kfunc_trusted_vma_test, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_kfunc_trusted_task_test, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_kfunc_rcu_task_test, KF_RCU)
BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE)
BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids)
@ -923,6 +969,51 @@ __bpf_kfunc int bpf_kfunc_call_kernel_getpeername(struct addr_args *args)
return err;
}
static DEFINE_MUTEX(st_ops_mutex);
static struct bpf_testmod_st_ops *st_ops;
__bpf_kfunc int bpf_kfunc_st_ops_test_prologue(struct st_ops_args *args)
{
int ret = -1;
mutex_lock(&st_ops_mutex);
if (st_ops && st_ops->test_prologue)
ret = st_ops->test_prologue(args);
mutex_unlock(&st_ops_mutex);
return ret;
}
__bpf_kfunc int bpf_kfunc_st_ops_test_epilogue(struct st_ops_args *args)
{
int ret = -1;
mutex_lock(&st_ops_mutex);
if (st_ops && st_ops->test_epilogue)
ret = st_ops->test_epilogue(args);
mutex_unlock(&st_ops_mutex);
return ret;
}
__bpf_kfunc int bpf_kfunc_st_ops_test_pro_epilogue(struct st_ops_args *args)
{
int ret = -1;
mutex_lock(&st_ops_mutex);
if (st_ops && st_ops->test_pro_epilogue)
ret = st_ops->test_pro_epilogue(args);
mutex_unlock(&st_ops_mutex);
return ret;
}
__bpf_kfunc int bpf_kfunc_st_ops_inc10(struct st_ops_args *args)
{
args->a += 10;
return args->a;
}
BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
@ -959,6 +1050,10 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_sendmsg, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_call_sock_sendmsg, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getsockname, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getpeername, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_prologue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_pro_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_st_ops_inc10, KF_TRUSTED_ARGS)
BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids)
static int bpf_testmod_ops_init(struct btf *btf)
@ -1027,6 +1122,11 @@ static void bpf_testmod_test_2(int a, int b)
{
}
static int bpf_testmod_tramp(int value)
{
return 0;
}
static int bpf_testmod_ops__test_maybe_null(int dummy,
struct task_struct *task__nullable)
{
@ -1073,6 +1173,144 @@ struct bpf_struct_ops bpf_testmod_ops2 = {
.owner = THIS_MODULE,
};
static int bpf_test_mod_st_ops__test_prologue(struct st_ops_args *args)
{
return 0;
}
static int bpf_test_mod_st_ops__test_epilogue(struct st_ops_args *args)
{
return 0;
}
static int bpf_test_mod_st_ops__test_pro_epilogue(struct st_ops_args *args)
{
return 0;
}
static int st_ops_gen_prologue(struct bpf_insn *insn_buf, bool direct_write,
const struct bpf_prog *prog)
{
struct bpf_insn *insn = insn_buf;
if (strcmp(prog->aux->attach_func_name, "test_prologue") &&
strcmp(prog->aux->attach_func_name, "test_pro_epilogue"))
return 0;
/* r6 = r1[0]; // r6 will be "struct st_ops *args". r1 is "u64 *ctx".
* r7 = r6->a;
* r7 += 1000;
* r6->a = r7;
*/
*insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0);
*insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_6, offsetof(struct st_ops_args, a));
*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1000);
*insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, offsetof(struct st_ops_args, a));
*insn++ = prog->insnsi[0];
return insn - insn_buf;
}
static int st_ops_gen_epilogue(struct bpf_insn *insn_buf, const struct bpf_prog *prog,
s16 ctx_stack_off)
{
struct bpf_insn *insn = insn_buf;
if (strcmp(prog->aux->attach_func_name, "test_epilogue") &&
strcmp(prog->aux->attach_func_name, "test_pro_epilogue"))
return 0;
/* r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx"
* r1 = r1[0]; // r1 will be "struct st_ops *args"
* r6 = r1->a;
* r6 += 10000;
* r1->a = r6;
* r0 = r6;
* r0 *= 2;
* BPF_EXIT;
*/
*insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_FP, ctx_stack_off);
*insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
*insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct st_ops_args, a));
*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 10000);
*insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(struct st_ops_args, a));
*insn++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_6);
*insn++ = BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, 2);
*insn++ = BPF_EXIT_INSN();
return insn - insn_buf;
}
static int st_ops_btf_struct_access(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
int off, int size)
{
if (off < 0 || off + size > sizeof(struct st_ops_args))
return -EACCES;
return 0;
}
static const struct bpf_verifier_ops st_ops_verifier_ops = {
.is_valid_access = bpf_testmod_ops_is_valid_access,
.btf_struct_access = st_ops_btf_struct_access,
.gen_prologue = st_ops_gen_prologue,
.gen_epilogue = st_ops_gen_epilogue,
.get_func_proto = bpf_base_func_proto,
};
static struct bpf_testmod_st_ops st_ops_cfi_stubs = {
.test_prologue = bpf_test_mod_st_ops__test_prologue,
.test_epilogue = bpf_test_mod_st_ops__test_epilogue,
.test_pro_epilogue = bpf_test_mod_st_ops__test_pro_epilogue,
};
static int st_ops_reg(void *kdata, struct bpf_link *link)
{
int err = 0;
mutex_lock(&st_ops_mutex);
if (st_ops) {
pr_err("st_ops has already been registered\n");
err = -EEXIST;
goto unlock;
}
st_ops = kdata;
unlock:
mutex_unlock(&st_ops_mutex);
return err;
}
static void st_ops_unreg(void *kdata, struct bpf_link *link)
{
mutex_lock(&st_ops_mutex);
st_ops = NULL;
mutex_unlock(&st_ops_mutex);
}
static int st_ops_init(struct btf *btf)
{
return 0;
}
static int st_ops_init_member(const struct btf_type *t,
const struct btf_member *member,
void *kdata, const void *udata)
{
return 0;
}
static struct bpf_struct_ops testmod_st_ops = {
.verifier_ops = &st_ops_verifier_ops,
.init = st_ops_init,
.init_member = st_ops_init_member,
.reg = st_ops_reg,
.unreg = st_ops_unreg,
.cfi_stubs = &st_ops_cfi_stubs,
.name = "bpf_testmod_st_ops",
.owner = THIS_MODULE,
};
extern int bpf_fentry_test1(int a);
static int bpf_testmod_init(void)
@ -1083,14 +1321,17 @@ static int bpf_testmod_init(void)
.kfunc_btf_id = bpf_testmod_dtor_ids[1]
},
};
void **tramp;
int ret;
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_testmod_kfunc_set);
ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops);
ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2);
ret = ret ?: register_bpf_struct_ops(&testmod_st_ops, bpf_testmod_st_ops);
ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors,
ARRAY_SIZE(bpf_testmod_dtors),
THIS_MODULE);
@ -1106,6 +1347,14 @@ static int bpf_testmod_init(void)
ret = register_bpf_testmod_uprobe();
if (ret < 0)
return ret;
/* Ensure nothing is between tramp_1..tramp_40 */
BUILD_BUG_ON(offsetof(struct bpf_testmod_ops, tramp_1) + 40 * sizeof(long) !=
offsetofend(struct bpf_testmod_ops, tramp_40));
tramp = (void **)&__bpf_testmod_ops.tramp_1;
while (tramp <= (void **)&__bpf_testmod_ops.tramp_40)
*tramp++ = bpf_testmod_tramp;
return 0;
}

View File

@ -35,6 +35,7 @@ struct bpf_testmod_ops {
void (*test_2)(int a, int b);
/* Used to test nullable arguments. */
int (*test_maybe_null)(int dummy, struct task_struct *task);
int (*unsupported_ops)(void);
/* The following fields are used to test shadow copies. */
char onebyte;
@ -93,4 +94,15 @@ struct bpf_testmod_ops2 {
int (*test_1)(void);
};
struct st_ops_args {
u64 a;
};
struct bpf_testmod_st_ops {
int (*test_prologue)(struct st_ops_args *args);
int (*test_epilogue)(struct st_ops_args *args);
int (*test_pro_epilogue)(struct st_ops_args *args);
struct module *owner;
};
#endif /* _BPF_TESTMOD_H */

View File

@ -144,4 +144,19 @@ void bpf_kfunc_dynptr_test(struct bpf_dynptr *ptr, struct bpf_dynptr *ptr__nulla
struct bpf_testmod_ctx *bpf_testmod_ctx_create(int *err) __ksym;
void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx) __ksym;
struct sk_buff *bpf_kfunc_nested_acquire_nonzero_offset_test(struct sk_buff_head *ptr) __ksym;
struct sk_buff *bpf_kfunc_nested_acquire_zero_offset_test(struct sock_common *ptr) __ksym;
void bpf_kfunc_nested_release_test(struct sk_buff *ptr) __ksym;
struct st_ops_args;
int bpf_kfunc_st_ops_test_prologue(struct st_ops_args *args) __ksym;
int bpf_kfunc_st_ops_test_epilogue(struct st_ops_args *args) __ksym;
int bpf_kfunc_st_ops_test_pro_epilogue(struct st_ops_args *args) __ksym;
int bpf_kfunc_st_ops_inc10(struct st_ops_args *args) __ksym;
void bpf_kfunc_trusted_vma_test(struct vm_area_struct *ptr) __ksym;
void bpf_kfunc_trusted_task_test(struct task_struct *ptr) __ksym;
void bpf_kfunc_trusted_num_test(int *ptr) __ksym;
void bpf_kfunc_rcu_task_test(struct task_struct *ptr) __ksym;
#endif /* _BPF_TESTMOD_KFUNC_H */

View File

@ -644,7 +644,7 @@ unsigned long long get_classid_cgroup_id(void)
/**
* get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name.
* @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be
* a named cgroup like "name=systemd", a controller name like "net_cls", or multi-contollers like
* a named cgroup like "name=systemd", a controller name like "net_cls", or multi-controllers like
* "net_cls,net_prio".
*/
int get_cgroup1_hierarchy_id(const char *subsys_name)

View File

@ -0,0 +1,84 @@
CONFIG_AUDIT=y
CONFIG_BLK_CGROUP=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BONDING=y
CONFIG_BPF_JIT_ALWAYS_ON=y
CONFIG_BPF_PRELOAD=y
CONFIG_BPF_PRELOAD_UMD=y
CONFIG_CGROUPS=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_HUGETLB=y
CONFIG_CGROUP_NET_CLASSID=y
CONFIG_CGROUP_PERF=y
CONFIG_CGROUP_PIDS=y
CONFIG_CGROUP_SCHED=y
CONFIG_CPUSETS=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_FS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_EXPERT=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
CONFIG_FRAME_POINTER=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_HUGETLBFS=y
CONFIG_INET=y
CONFIG_IPV6_SEG6_LWTUNNEL=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_JUMP_LABEL=y
CONFIG_KALLSYMS_ALL=y
CONFIG_KPROBES=y
CONFIG_MEMCG=y
CONFIG_NAMESPACES=y
CONFIG_NET=y
CONFIG_NETDEVICES=y
CONFIG_NETFILTER_XT_MATCH_BPF=y
CONFIG_NET_ACT_BPF=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NET_VRF=y
CONFIG_NONPORTABLE=y
CONFIG_NO_HZ_IDLE=y
CONFIG_NR_CPUS=256
CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI=y
CONFIG_PCI_HOST_GENERIC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_PRINTK_TIME=y
CONFIG_PROC_KCORE=y
CONFIG_PROFILING=y
CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS=y
CONFIG_RISCV_ISA_C=y
CONFIG_RISCV_PMU=y
CONFIG_RISCV_PMU_SBI=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SMP=y
CONFIG_SOC_VIRT=y
CONFIG_SYSVIPC=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TLS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_TUN=y
CONFIG_UNIX=y
CONFIG_UPROBES=y
CONFIG_USER_NS=y
CONFIG_VETH=y
CONFIG_VLAN_8021Q=y
CONFIG_VSOCKETS_LOOPBACK=y
CONFIG_XFRM_USER=y

View File

@ -0,0 +1,69 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
#include <bpf/bpf.h>
#include "disasm.h"
struct print_insn_context {
char scratch[16];
char *buf;
size_t sz;
};
static void print_insn_cb(void *private_data, const char *fmt, ...)
{
struct print_insn_context *ctx = private_data;
va_list args;
va_start(args, fmt);
vsnprintf(ctx->buf, ctx->sz, fmt, args);
va_end(args);
}
static const char *print_call_cb(void *private_data, const struct bpf_insn *insn)
{
struct print_insn_context *ctx = private_data;
/* For pseudo calls verifier.c:jit_subprogs() hides original
* imm to insn->off and changes insn->imm to be an index of
* the subprog instead.
*/
if (insn->src_reg == BPF_PSEUDO_CALL) {
snprintf(ctx->scratch, sizeof(ctx->scratch), "%+d", insn->off);
return ctx->scratch;
}
return NULL;
}
struct bpf_insn *disasm_insn(struct bpf_insn *insn, char *buf, size_t buf_sz)
{
struct print_insn_context ctx = {
.buf = buf,
.sz = buf_sz,
};
struct bpf_insn_cbs cbs = {
.cb_print = print_insn_cb,
.cb_call = print_call_cb,
.private_data = &ctx,
};
char *tmp, *pfx_end, *sfx_start;
bool double_insn;
int len;
print_bpf_insn(&cbs, insn, true);
/* We share code with kernel BPF disassembler, it adds '(FF) ' prefix
* for each instruction (FF stands for instruction `code` byte).
* Remove the prefix inplace, and also simplify call instructions.
* E.g.: "(85) call foo#10" -> "call foo".
* Also remove newline in the end (the 'max(strlen(buf) - 1, 0)' thing).
*/
pfx_end = buf + 5;
sfx_start = buf + max((int)strlen(buf) - 1, 0);
if (strncmp(pfx_end, "call ", 5) == 0 && (tmp = strrchr(buf, '#')))
sfx_start = tmp;
len = sfx_start - pfx_end;
memmove(buf, pfx_end, len);
buf[len] = 0;
double_insn = insn->code == (BPF_LD | BPF_IMM | BPF_DW);
return insn + (double_insn ? 2 : 1);
}

View File

@ -0,0 +1,12 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#ifndef __DISASM_HELPERS_H
#define __DISASM_HELPERS_H
#include <stdlib.h>
struct bpf_insn;
struct bpf_insn *disasm_insn(struct bpf_insn *insn, char *buf, size_t buf_sz);
#endif /* __DISASM_HELPERS_H */

View File

@ -1,151 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <syscall.h>
#include <unistd.h>
#include <linux/perf_event.h>
#include <sys/ioctl.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
#include "testing_helpers.h"
#define CHECK(condition, tag, format...) ({ \
int __ret = !!(condition); \
if (__ret) { \
printf("%s:FAIL:%s ", __func__, tag); \
printf(format); \
} else { \
printf("%s:PASS:%s\n", __func__, tag); \
} \
__ret; \
})
static int bpf_find_map(const char *test, struct bpf_object *obj,
const char *name)
{
struct bpf_map *map;
map = bpf_object__find_map_by_name(obj, name);
if (!map)
return -1;
return bpf_map__fd(map);
}
#define TEST_CGROUP "/test-bpf-get-cgroup-id/"
int main(int argc, char **argv)
{
const char *probe_name = "syscalls/sys_enter_nanosleep";
const char *file = "get_cgroup_id_kern.bpf.o";
int err, bytes, efd, prog_fd, pmu_fd;
int cgroup_fd, cgidmap_fd, pidmap_fd;
struct perf_event_attr attr = {};
struct bpf_object *obj;
__u64 kcgid = 0, ucgid;
__u32 key = 0, pid;
int exit_code = 1;
char buf[256];
const struct timespec req = {
.tv_sec = 1,
.tv_nsec = 0,
};
cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno))
return 1;
/* Use libbpf 1.0 API mode */
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno))
goto cleanup_cgroup_env;
cgidmap_fd = bpf_find_map(__func__, obj, "cg_ids");
if (CHECK(cgidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
cgidmap_fd, errno))
goto close_prog;
pidmap_fd = bpf_find_map(__func__, obj, "pidmap");
if (CHECK(pidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
pidmap_fd, errno))
goto close_prog;
pid = getpid();
bpf_map_update_elem(pidmap_fd, &key, &pid, 0);
if (access("/sys/kernel/tracing/trace", F_OK) == 0) {
snprintf(buf, sizeof(buf),
"/sys/kernel/tracing/events/%s/id", probe_name);
} else {
snprintf(buf, sizeof(buf),
"/sys/kernel/debug/tracing/events/%s/id", probe_name);
}
efd = open(buf, O_RDONLY, 0);
if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
goto close_prog;
bytes = read(efd, buf, sizeof(buf));
close(efd);
if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
"bytes %d errno %d\n", bytes, errno))
goto close_prog;
attr.config = strtol(buf, NULL, 0);
attr.type = PERF_TYPE_TRACEPOINT;
attr.sample_type = PERF_SAMPLE_RAW;
attr.sample_period = 1;
attr.wakeup_events = 1;
/* attach to this pid so the all bpf invocations will be in the
* cgroup associated with this pid.
*/
pmu_fd = syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
errno))
goto close_prog;
err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
errno))
goto close_pmu;
err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
errno))
goto close_pmu;
/* trigger some syscalls */
syscall(__NR_nanosleep, &req, NULL);
err = bpf_map_lookup_elem(cgidmap_fd, &key, &kcgid);
if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno))
goto close_pmu;
ucgid = get_cgroup_id(TEST_CGROUP);
if (CHECK(kcgid != ucgid, "compare_cgroup_id",
"kern cgid %llx user cgid %llx", kcgid, ucgid))
goto close_pmu;
exit_code = 0;
printf("%s:PASS\n", argv[0]);
close_pmu:
close(pmu_fd);
close_prog:
bpf_object__close(obj);
cleanup_cgroup_env:
cleanup_cgroup_environment();
return exit_code;
}

View File

@ -0,0 +1,245 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include <test_progs.h>
#ifdef HAVE_LLVM_SUPPORT
#include <llvm-c/Core.h>
#include <llvm-c/Disassembler.h>
#include <llvm-c/Target.h>
#include <llvm-c/TargetMachine.h>
/* The intent is to use get_jited_program_text() for small test
* programs written in BPF assembly, thus assume that 32 local labels
* would be sufficient.
*/
#define MAX_LOCAL_LABELS 32
/* Local labels are encoded as 'L42', this requires 4 bytes of storage:
* 3 characters + zero byte
*/
#define LOCAL_LABEL_LEN 4
static bool llvm_initialized;
struct local_labels {
bool print_phase;
__u32 prog_len;
__u32 cnt;
__u32 pcs[MAX_LOCAL_LABELS];
char names[MAX_LOCAL_LABELS][LOCAL_LABEL_LEN];
};
static const char *lookup_symbol(void *data, uint64_t ref_value, uint64_t *ref_type,
uint64_t ref_pc, const char **ref_name)
{
struct local_labels *labels = data;
uint64_t type = *ref_type;
int i;
*ref_type = LLVMDisassembler_ReferenceType_InOut_None;
*ref_name = NULL;
if (type != LLVMDisassembler_ReferenceType_In_Branch)
return NULL;
/* Depending on labels->print_phase either discover local labels or
* return a name assigned with local jump target:
* - if print_phase is true and ref_value is in labels->pcs,
* return corresponding labels->name.
* - if print_phase is false, save program-local jump targets
* in labels->pcs;
*/
if (labels->print_phase) {
for (i = 0; i < labels->cnt; ++i)
if (labels->pcs[i] == ref_value)
return labels->names[i];
} else {
if (labels->cnt < MAX_LOCAL_LABELS && ref_value < labels->prog_len)
labels->pcs[labels->cnt++] = ref_value;
}
return NULL;
}
static int disasm_insn(LLVMDisasmContextRef ctx, uint8_t *image, __u32 len, __u32 pc,
char *buf, __u32 buf_sz)
{
int i, cnt;
cnt = LLVMDisasmInstruction(ctx, image + pc, len - pc, pc,
buf, buf_sz);
if (cnt > 0)
return cnt;
PRINT_FAIL("Can't disasm instruction at offset %d:", pc);
for (i = 0; i < 16 && pc + i < len; ++i)
printf(" %02x", image[pc + i]);
printf("\n");
return -EINVAL;
}
static int cmp_u32(const void *_a, const void *_b)
{
__u32 a = *(__u32 *)_a;
__u32 b = *(__u32 *)_b;
if (a < b)
return -1;
if (a > b)
return 1;
return 0;
}
static int disasm_one_func(FILE *text_out, uint8_t *image, __u32 len)
{
char *label, *colon, *triple = NULL;
LLVMDisasmContextRef ctx = NULL;
struct local_labels labels = {};
__u32 *label_pc, pc;
int i, cnt, err = 0;
char buf[64];
triple = LLVMGetDefaultTargetTriple();
ctx = LLVMCreateDisasm(triple, &labels, 0, NULL, lookup_symbol);
if (!ASSERT_OK_PTR(ctx, "LLVMCreateDisasm")) {
err = -EINVAL;
goto out;
}
cnt = LLVMSetDisasmOptions(ctx, LLVMDisassembler_Option_PrintImmHex);
if (!ASSERT_EQ(cnt, 1, "LLVMSetDisasmOptions")) {
err = -EINVAL;
goto out;
}
/* discover labels */
labels.prog_len = len;
pc = 0;
while (pc < len) {
cnt = disasm_insn(ctx, image, len, pc, buf, 1);
if (cnt < 0) {
err = cnt;
goto out;
}
pc += cnt;
}
qsort(labels.pcs, labels.cnt, sizeof(*labels.pcs), cmp_u32);
for (i = 0; i < labels.cnt; ++i)
/* gcc is unable to infer upper bound for labels.cnt and assumes
* it to be U32_MAX. U32_MAX takes 10 decimal digits.
* snprintf below prints into labels.names[*],
* which has space only for two digits and a letter.
* To avoid truncation warning use (i % MAX_LOCAL_LABELS),
* which informs gcc about printed value upper bound.
*/
snprintf(labels.names[i], sizeof(labels.names[i]), "L%d", i % MAX_LOCAL_LABELS);
/* now print with labels */
labels.print_phase = true;
pc = 0;
while (pc < len) {
cnt = disasm_insn(ctx, image, len, pc, buf, sizeof(buf));
if (cnt < 0) {
err = cnt;
goto out;
}
label_pc = bsearch(&pc, labels.pcs, labels.cnt, sizeof(*labels.pcs), cmp_u32);
label = "";
colon = "";
if (label_pc) {
label = labels.names[label_pc - labels.pcs];
colon = ":";
}
fprintf(text_out, "%x:\t", pc);
for (i = 0; i < cnt; ++i)
fprintf(text_out, "%02x ", image[pc + i]);
for (i = cnt * 3; i < 12 * 3; ++i)
fputc(' ', text_out);
fprintf(text_out, "%s%s%s\n", label, colon, buf);
pc += cnt;
}
out:
if (triple)
LLVMDisposeMessage(triple);
if (ctx)
LLVMDisasmDispose(ctx);
return err;
}
int get_jited_program_text(int fd, char *text, size_t text_sz)
{
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
__u32 jited_funcs, len, pc;
__u32 *func_lens = NULL;
FILE *text_out = NULL;
uint8_t *image = NULL;
int i, err = 0;
if (!llvm_initialized) {
LLVMInitializeAllTargetInfos();
LLVMInitializeAllTargetMCs();
LLVMInitializeAllDisassemblers();
llvm_initialized = 1;
}
text_out = fmemopen(text, text_sz, "w");
if (!ASSERT_OK_PTR(text_out, "open_memstream")) {
err = -errno;
goto out;
}
/* first call is to find out jited program len */
err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd #1"))
goto out;
len = info.jited_prog_len;
image = malloc(len);
if (!ASSERT_OK_PTR(image, "malloc(info.jited_prog_len)")) {
err = -ENOMEM;
goto out;
}
jited_funcs = info.nr_jited_func_lens;
func_lens = malloc(jited_funcs * sizeof(__u32));
if (!ASSERT_OK_PTR(func_lens, "malloc(info.nr_jited_func_lens)")) {
err = -ENOMEM;
goto out;
}
memset(&info, 0, sizeof(info));
info.jited_prog_insns = (__u64)image;
info.jited_prog_len = len;
info.jited_func_lens = (__u64)func_lens;
info.nr_jited_func_lens = jited_funcs;
err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd #2"))
goto out;
for (pc = 0, i = 0; i < jited_funcs; ++i) {
fprintf(text_out, "func #%d:\n", i);
disasm_one_func(text_out, image + pc, func_lens[i]);
fprintf(text_out, "\n");
pc += func_lens[i];
}
out:
if (text_out)
fclose(text_out);
if (image)
free(image);
if (func_lens)
free(func_lens);
return err;
}
#else /* HAVE_LLVM_SUPPORT */
int get_jited_program_text(int fd, char *text, size_t text_sz)
{
if (env.verbosity >= VERBOSE_VERY)
printf("compiled w/o llvm development libraries, can't dis-assembly binary code");
return -EOPNOTSUPP;
}
#endif /* HAVE_LLVM_SUPPORT */

View File

@ -0,0 +1,10 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#ifndef __JIT_DISASM_HELPERS_H
#define __JIT_DISASM_HELPERS_H
#include <stddef.h>
int get_jited_program_text(int fd, char *text, size_t text_sz);
#endif /* __JIT_DISASM_HELPERS_H */

View File

@ -197,7 +197,7 @@ void __test_map_lookup_and_delete_batch(bool is_pcpu)
CHECK(total != max_entries, "delete with steps",
"total = %u, max_entries = %u\n", total, max_entries);
/* check map is empty, errono == ENOENT */
/* check map is empty, errno == ENOENT */
err = bpf_map_get_next_key(map_fd, NULL, &key);
CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()",
"error: %s\n", strerror(errno));

View File

@ -135,7 +135,7 @@ void test_lpm_trie_map_batch_ops(void)
CHECK(total != max_entries, "delete with steps",
"total = %u, max_entries = %u\n", total, max_entries);
/* check map is empty, errono == ENOENT */
/* check map is empty, errno == ENOENT */
err = bpf_map_get_next_key(map_fd, NULL, &key);
CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()",
"error: %s\n", strerror(errno));

View File

@ -17,6 +17,7 @@
#define MAX_ENTRIES_HASH_OF_MAPS 64
#define N_THREADS 8
#define MAX_MAP_KEY_SIZE 4
#define PCPU_MIN_UNIT_SIZE 32768
static void map_info(int map_fd, struct bpf_map_info *info)
{
@ -456,6 +457,22 @@ static void map_percpu_stats_hash_of_maps(void)
printf("test_%s:PASS\n", __func__);
}
static void map_percpu_stats_map_value_size(void)
{
int fd;
int value_sz = PCPU_MIN_UNIT_SIZE + 1;
struct bpf_map_create_opts opts = { .sz = sizeof(opts) };
enum bpf_map_type map_types[] = { BPF_MAP_TYPE_PERCPU_ARRAY,
BPF_MAP_TYPE_PERCPU_HASH,
BPF_MAP_TYPE_LRU_PERCPU_HASH };
for (int i = 0; i < ARRAY_SIZE(map_types); i++) {
fd = bpf_map_create(map_types[i], NULL, sizeof(__u32), value_sz, 1, &opts);
CHECK(fd < 0 && errno != E2BIG, "percpu map value size",
"error: %s\n", strerror(errno));
}
printf("test_%s:PASS\n", __func__);
}
void test_map_percpu_stats(void)
{
map_percpu_stats_hash();
@ -467,4 +484,5 @@ void test_map_percpu_stats(void)
map_percpu_stats_percpu_lru_hash();
map_percpu_stats_percpu_lru_hash_no_common();
map_percpu_stats_hash_of_maps();
map_percpu_stats_map_value_size();
}

View File

@ -412,7 +412,7 @@ static void test_sk_storage_map_stress_free(void)
rlim_new.rlim_max = rlim_new.rlim_cur + 128;
err = setrlimit(RLIMIT_NOFILE, &rlim_new);
CHECK(err, "setrlimit(RLIMIT_NOFILE)", "rlim_new:%lu errno:%d",
rlim_new.rlim_cur, errno);
(unsigned long) rlim_new.rlim_cur, errno);
}
err = do_sk_storage_map_stress_free();

View File

@ -11,17 +11,31 @@
#include <arpa/inet.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/un.h>
#include <sys/eventfd.h>
#include <linux/err.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/limits.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <netinet/tcp.h>
#include <net/if.h>
#include "bpf_util.h"
#include "network_helpers.h"
#include "test_progs.h"
#ifdef TRAFFIC_MONITOR
/* Prevent pcap.h from including pcap/bpf.h and causing conflicts */
#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
#include <pcap/pcap.h>
#include <pcap/dlt.h>
#endif
#ifndef IPPROTO_MPTCP
#define IPPROTO_MPTCP 262
#endif
@ -80,12 +94,15 @@ int settimeo(int fd, int timeout_ms)
#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
static int __start_server(int type, const struct sockaddr *addr, socklen_t addrlen,
const struct network_helper_opts *opts)
int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
const struct network_helper_opts *opts)
{
int fd;
fd = socket(addr->sa_family, type, opts->proto);
if (!opts)
opts = &default_opts;
fd = socket(addr->ss_family, type, opts->proto);
if (fd < 0) {
log_err("Failed to create server socket");
return -1;
@ -100,7 +117,7 @@ static int __start_server(int type, const struct sockaddr *addr, socklen_t addrl
goto error_close;
}
if (bind(fd, addr, addrlen) < 0) {
if (bind(fd, (struct sockaddr *)addr, addrlen) < 0) {
log_err("Failed to bind socket");
goto error_close;
}
@ -131,7 +148,7 @@ int start_server_str(int family, int type, const char *addr_str, __u16 port,
if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
return -1;
return __start_server(type, (struct sockaddr *)&addr, addrlen, opts);
return start_server_addr(type, &addr, addrlen, opts);
}
int start_server(int family, int type, const char *addr_str, __u16 port,
@ -173,7 +190,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str,
if (!fds)
return NULL;
fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen, &opts);
fds[0] = start_server_addr(type, &addr, addrlen, &opts);
if (fds[0] == -1)
goto close_fds;
nr_fds = 1;
@ -182,7 +199,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str,
goto close_fds;
for (; nr_fds < nr_listens; nr_fds++) {
fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr, addrlen, &opts);
fds[nr_fds] = start_server_addr(type, &addr, addrlen, &opts);
if (fds[nr_fds] == -1)
goto close_fds;
}
@ -194,15 +211,6 @@ int *start_reuseport_server(int family, int type, const char *addr_str,
return NULL;
}
int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t len,
const struct network_helper_opts *opts)
{
if (!opts)
opts = &default_opts;
return __start_server(type, (struct sockaddr *)addr, len, opts);
}
void free_fds(int *fds, unsigned int nr_close_fds)
{
if (fds) {
@ -277,33 +285,6 @@ int client_socket(int family, int type,
return -1;
}
static int connect_fd_to_addr(int fd,
const struct sockaddr_storage *addr,
socklen_t addrlen, const bool must_fail)
{
int ret;
errno = 0;
ret = connect(fd, (const struct sockaddr *)addr, addrlen);
if (must_fail) {
if (!ret) {
log_err("Unexpected success to connect to server");
return -1;
}
if (errno != EPERM) {
log_err("Unexpected error from connect to server");
return -1;
}
} else {
if (ret) {
log_err("Failed to connect to server");
return -1;
}
}
return 0;
}
int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
const struct network_helper_opts *opts)
{
@ -318,17 +299,17 @@ int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t add
return -1;
}
if (connect_fd_to_addr(fd, addr, addrlen, opts->must_fail))
goto error_close;
if (connect(fd, (const struct sockaddr *)addr, addrlen)) {
log_err("Failed to connect to server");
save_errno_close(fd);
return -1;
}
return fd;
error_close:
save_errno_close(fd);
return -1;
}
int connect_to_fd_opts(int server_fd, int type, const struct network_helper_opts *opts)
int connect_to_addr_str(int family, int type, const char *addr_str, __u16 port,
const struct network_helper_opts *opts)
{
struct sockaddr_storage addr;
socklen_t addrlen;
@ -336,6 +317,27 @@ int connect_to_fd_opts(int server_fd, int type, const struct network_helper_opts
if (!opts)
opts = &default_opts;
if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
return -1;
return connect_to_addr(type, &addr, addrlen, opts);
}
int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
{
struct sockaddr_storage addr;
socklen_t addrlen, optlen;
int type;
if (!opts)
opts = &default_opts;
optlen = sizeof(type);
if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
log_err("getsockopt(SOL_TYPE)");
return -1;
}
addrlen = sizeof(addr);
if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
log_err("Failed to get server addr");
@ -350,14 +352,8 @@ int connect_to_fd(int server_fd, int timeout_ms)
struct network_helper_opts opts = {
.timeout_ms = timeout_ms,
};
int type, protocol;
socklen_t optlen;
optlen = sizeof(type);
if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
log_err("getsockopt(SOL_TYPE)");
return -1;
}
int protocol;
optlen = sizeof(protocol);
if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) {
@ -366,7 +362,7 @@ int connect_to_fd(int server_fd, int timeout_ms)
}
opts.proto = protocol;
return connect_to_fd_opts(server_fd, type, &opts);
return connect_to_fd_opts(server_fd, &opts);
}
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
@ -382,8 +378,10 @@ int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
return -1;
}
if (connect_fd_to_addr(client_fd, &addr, len, false))
if (connect(client_fd, (const struct sockaddr *)&addr, len)) {
log_err("Failed to connect to server");
return -1;
}
return 0;
}
@ -448,6 +446,52 @@ char *ping_command(int family)
return "ping";
}
int remove_netns(const char *name)
{
char *cmd;
int r;
r = asprintf(&cmd, "ip netns del %s >/dev/null 2>&1", name);
if (r < 0) {
log_err("Failed to malloc cmd");
return -1;
}
r = system(cmd);
free(cmd);
return r;
}
int make_netns(const char *name)
{
char *cmd;
int r;
r = asprintf(&cmd, "ip netns add %s", name);
if (r < 0) {
log_err("Failed to malloc cmd");
return -1;
}
r = system(cmd);
free(cmd);
if (r)
return r;
r = asprintf(&cmd, "ip -n %s link set lo up", name);
if (r < 0) {
log_err("Failed to malloc cmd for setting up lo");
remove_netns(name);
return -1;
}
r = system(cmd);
free(cmd);
return r;
}
struct nstoken {
int orig_netns_fd;
};
@ -676,3 +720,443 @@ int send_recv_data(int lfd, int fd, uint32_t total_bytes)
return err;
}
#ifdef TRAFFIC_MONITOR
struct tmonitor_ctx {
pcap_t *pcap;
pcap_dumper_t *dumper;
pthread_t thread;
int wake_fd;
volatile bool done;
char pkt_fname[PATH_MAX];
int pcap_fd;
};
/* Is this packet captured with a Ethernet protocol type? */
static bool is_ethernet(const u_char *packet)
{
u16 arphdr_type;
memcpy(&arphdr_type, packet + 8, 2);
arphdr_type = ntohs(arphdr_type);
/* Except the following cases, the protocol type contains the
* Ethernet protocol type for the packet.
*
* https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
*/
switch (arphdr_type) {
case 770: /* ARPHRD_FRAD */
case 778: /* ARPHDR_IPGRE */
case 803: /* ARPHRD_IEEE80211_RADIOTAP */
printf("Packet captured: arphdr_type=%d\n", arphdr_type);
return false;
}
return true;
}
static const char * const pkt_types[] = {
"In",
"B", /* Broadcast */
"M", /* Multicast */
"C", /* Captured with the promiscuous mode */
"Out",
};
static const char *pkt_type_str(u16 pkt_type)
{
if (pkt_type < ARRAY_SIZE(pkt_types))
return pkt_types[pkt_type];
return "Unknown";
}
/* Show the information of the transport layer in the packet */
static void show_transport(const u_char *packet, u16 len, u32 ifindex,
const char *src_addr, const char *dst_addr,
u16 proto, bool ipv6, u8 pkt_type)
{
char *ifname, _ifname[IF_NAMESIZE];
const char *transport_str;
u16 src_port, dst_port;
struct udphdr *udp;
struct tcphdr *tcp;
ifname = if_indextoname(ifindex, _ifname);
if (!ifname) {
snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
ifname = _ifname;
}
if (proto == IPPROTO_UDP) {
udp = (struct udphdr *)packet;
src_port = ntohs(udp->source);
dst_port = ntohs(udp->dest);
transport_str = "UDP";
} else if (proto == IPPROTO_TCP) {
tcp = (struct tcphdr *)packet;
src_port = ntohs(tcp->source);
dst_port = ntohs(tcp->dest);
transport_str = "TCP";
} else if (proto == IPPROTO_ICMP) {
printf("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
packet[0], packet[1]);
return;
} else if (proto == IPPROTO_ICMPV6) {
printf("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
packet[0], packet[1]);
return;
} else {
printf("%-7s %-3s %s %s > %s: protocol %d\n",
ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
src_addr, dst_addr, proto);
return;
}
/* TCP or UDP*/
flockfile(stdout);
if (ipv6)
printf("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d",
ifname, pkt_type_str(pkt_type), src_addr, src_port,
dst_addr, dst_port, transport_str, len);
else
printf("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d",
ifname, pkt_type_str(pkt_type), src_addr, src_port,
dst_addr, dst_port, transport_str, len);
if (proto == IPPROTO_TCP) {
if (tcp->fin)
printf(", FIN");
if (tcp->syn)
printf(", SYN");
if (tcp->rst)
printf(", RST");
if (tcp->ack)
printf(", ACK");
}
printf("\n");
funlockfile(stdout);
}
static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
{
char src_buf[INET6_ADDRSTRLEN], dst_buf[INET6_ADDRSTRLEN];
struct ipv6hdr *pkt = (struct ipv6hdr *)packet;
const char *src, *dst;
u_char proto;
src = inet_ntop(AF_INET6, &pkt->saddr, src_buf, sizeof(src_buf));
if (!src)
src = "<invalid>";
dst = inet_ntop(AF_INET6, &pkt->daddr, dst_buf, sizeof(dst_buf));
if (!dst)
dst = "<invalid>";
proto = pkt->nexthdr;
show_transport(packet + sizeof(struct ipv6hdr),
ntohs(pkt->payload_len),
ifindex, src, dst, proto, true, pkt_type);
}
static void show_ipv4_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
{
char src_buf[INET_ADDRSTRLEN], dst_buf[INET_ADDRSTRLEN];
struct iphdr *pkt = (struct iphdr *)packet;
const char *src, *dst;
u_char proto;
src = inet_ntop(AF_INET, &pkt->saddr, src_buf, sizeof(src_buf));
if (!src)
src = "<invalid>";
dst = inet_ntop(AF_INET, &pkt->daddr, dst_buf, sizeof(dst_buf));
if (!dst)
dst = "<invalid>";
proto = pkt->protocol;
show_transport(packet + sizeof(struct iphdr),
ntohs(pkt->tot_len),
ifindex, src, dst, proto, false, pkt_type);
}
static void *traffic_monitor_thread(void *arg)
{
char *ifname, _ifname[IF_NAMESIZE];
const u_char *packet, *payload;
struct tmonitor_ctx *ctx = arg;
pcap_dumper_t *dumper = ctx->dumper;
int fd = ctx->pcap_fd, nfds, r;
int wake_fd = ctx->wake_fd;
struct pcap_pkthdr header;
pcap_t *pcap = ctx->pcap;
u32 ifindex;
fd_set fds;
u16 proto;
u8 ptype;
nfds = (fd > wake_fd ? fd : wake_fd) + 1;
FD_ZERO(&fds);
while (!ctx->done) {
FD_SET(fd, &fds);
FD_SET(wake_fd, &fds);
r = select(nfds, &fds, NULL, NULL, NULL);
if (!r)
continue;
if (r < 0) {
if (errno == EINTR)
continue;
log_err("Fail to select on pcap fd and wake fd");
break;
}
/* This instance of pcap is non-blocking */
packet = pcap_next(pcap, &header);
if (!packet)
continue;
/* According to the man page of pcap_dump(), first argument
* is the pcap_dumper_t pointer even it's argument type is
* u_char *.
*/
pcap_dump((u_char *)dumper, &header, packet);
/* Not sure what other types of packets look like. Here, we
* parse only Ethernet and compatible packets.
*/
if (!is_ethernet(packet))
continue;
/* Skip SLL2 header
* https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
*
* Although the document doesn't mention that, the payload
* doesn't include the Ethernet header. The payload starts
* from the first byte of the network layer header.
*/
payload = packet + 20;
memcpy(&proto, packet, 2);
proto = ntohs(proto);
memcpy(&ifindex, packet + 4, 4);
ifindex = ntohl(ifindex);
ptype = packet[10];
if (proto == ETH_P_IPV6) {
show_ipv6_packet(payload, ifindex, ptype);
} else if (proto == ETH_P_IP) {
show_ipv4_packet(payload, ifindex, ptype);
} else {
ifname = if_indextoname(ifindex, _ifname);
if (!ifname) {
snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
ifname = _ifname;
}
printf("%-7s %-3s Unknown network protocol type 0x%x\n",
ifname, pkt_type_str(ptype), proto);
}
}
return NULL;
}
/* Prepare the pcap handle to capture packets.
*
* This pcap is non-blocking and immediate mode is enabled to receive
* captured packets as soon as possible. The snaplen is set to 1024 bytes
* to limit the size of captured content. The format of the link-layer
* header is set to DLT_LINUX_SLL2 to enable handling various link-layer
* technologies.
*/
static pcap_t *traffic_monitor_prepare_pcap(void)
{
char errbuf[PCAP_ERRBUF_SIZE];
pcap_t *pcap;
int r;
/* Listen on all NICs in the namespace */
pcap = pcap_create("any", errbuf);
if (!pcap) {
log_err("Failed to open pcap: %s", errbuf);
return NULL;
}
/* Limit the size of the packet (first N bytes) */
r = pcap_set_snaplen(pcap, 1024);
if (r) {
log_err("Failed to set snaplen: %s", pcap_geterr(pcap));
goto error;
}
/* To receive packets as fast as possible */
r = pcap_set_immediate_mode(pcap, 1);
if (r) {
log_err("Failed to set immediate mode: %s", pcap_geterr(pcap));
goto error;
}
r = pcap_setnonblock(pcap, 1, errbuf);
if (r) {
log_err("Failed to set nonblock: %s", errbuf);
goto error;
}
r = pcap_activate(pcap);
if (r) {
log_err("Failed to activate pcap: %s", pcap_geterr(pcap));
goto error;
}
/* Determine the format of the link-layer header */
r = pcap_set_datalink(pcap, DLT_LINUX_SLL2);
if (r) {
log_err("Failed to set datalink: %s", pcap_geterr(pcap));
goto error;
}
return pcap;
error:
pcap_close(pcap);
return NULL;
}
static void encode_test_name(char *buf, size_t len, const char *test_name, const char *subtest_name)
{
char *p;
if (subtest_name)
snprintf(buf, len, "%s__%s", test_name, subtest_name);
else
snprintf(buf, len, "%s", test_name);
while ((p = strchr(buf, '/')))
*p = '_';
while ((p = strchr(buf, ' ')))
*p = '_';
}
#define PCAP_DIR "/tmp/tmon_pcap"
/* Start to monitor the network traffic in the given network namespace.
*
* netns: the name of the network namespace to monitor. If NULL, the
* current network namespace is monitored.
* test_name: the name of the running test.
* subtest_name: the name of the running subtest if there is. It should be
* NULL if it is not a subtest.
*
* This function will start a thread to capture packets going through NICs
* in the give network namespace.
*/
struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
const char *subtest_name)
{
struct nstoken *nstoken = NULL;
struct tmonitor_ctx *ctx;
char test_name_buf[64];
static int tmon_seq;
int r;
if (netns) {
nstoken = open_netns(netns);
if (!nstoken)
return NULL;
}
ctx = malloc(sizeof(*ctx));
if (!ctx) {
log_err("Failed to malloc ctx");
goto fail_ctx;
}
memset(ctx, 0, sizeof(*ctx));
encode_test_name(test_name_buf, sizeof(test_name_buf), test_name, subtest_name);
snprintf(ctx->pkt_fname, sizeof(ctx->pkt_fname),
PCAP_DIR "/packets-%d-%d-%s-%s.log", getpid(), tmon_seq++,
test_name_buf, netns ? netns : "unknown");
r = mkdir(PCAP_DIR, 0755);
if (r && errno != EEXIST) {
log_err("Failed to create " PCAP_DIR);
goto fail_pcap;
}
ctx->pcap = traffic_monitor_prepare_pcap();
if (!ctx->pcap)
goto fail_pcap;
ctx->pcap_fd = pcap_get_selectable_fd(ctx->pcap);
if (ctx->pcap_fd < 0) {
log_err("Failed to get pcap fd");
goto fail_dumper;
}
/* Create a packet file */
ctx->dumper = pcap_dump_open(ctx->pcap, ctx->pkt_fname);
if (!ctx->dumper) {
log_err("Failed to open pcap dump: %s", ctx->pkt_fname);
goto fail_dumper;
}
/* Create an eventfd to wake up the monitor thread */
ctx->wake_fd = eventfd(0, 0);
if (ctx->wake_fd < 0) {
log_err("Failed to create eventfd");
goto fail_eventfd;
}
r = pthread_create(&ctx->thread, NULL, traffic_monitor_thread, ctx);
if (r) {
log_err("Failed to create thread");
goto fail;
}
close_netns(nstoken);
return ctx;
fail:
close(ctx->wake_fd);
fail_eventfd:
pcap_dump_close(ctx->dumper);
unlink(ctx->pkt_fname);
fail_dumper:
pcap_close(ctx->pcap);
fail_pcap:
free(ctx);
fail_ctx:
close_netns(nstoken);
return NULL;
}
static void traffic_monitor_release(struct tmonitor_ctx *ctx)
{
pcap_close(ctx->pcap);
pcap_dump_close(ctx->dumper);
close(ctx->wake_fd);
free(ctx);
}
/* Stop the network traffic monitor.
*
* ctx: the context returned by traffic_monitor_start()
*/
void traffic_monitor_stop(struct tmonitor_ctx *ctx)
{
__u64 w = 1;
if (!ctx)
return;
/* Stop the monitor thread */
ctx->done = true;
/* Wake up the background thread. */
write(ctx->wake_fd, &w, sizeof(w));
pthread_join(ctx->thread, NULL);
printf("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
traffic_monitor_release(ctx);
}
#endif /* TRAFFIC_MONITOR */

View File

@ -23,7 +23,6 @@ typedef __u16 __sum16;
struct network_helper_opts {
int timeout_ms;
bool must_fail;
int proto;
/* +ve: Passed to listen() as-is.
* 0: Default when the test does not set
@ -70,8 +69,10 @@ int client_socket(int family, int type,
const struct network_helper_opts *opts);
int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t len,
const struct network_helper_opts *opts);
int connect_to_addr_str(int family, int type, const char *addr_str, __u16 port,
const struct network_helper_opts *opts);
int connect_to_fd(int server_fd, int timeout_ms);
int connect_to_fd_opts(int server_fd, int type, const struct network_helper_opts *opts);
int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts);
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
int timeout_ms);
@ -92,6 +93,8 @@ struct nstoken;
struct nstoken *open_netns(const char *name);
void close_netns(struct nstoken *token);
int send_recv_data(int lfd, int fd, uint32_t total_bytes);
int make_netns(const char *name);
int remove_netns(const char *name);
static __u16 csum_fold(__u32 csum)
{
@ -135,4 +138,22 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
return csum_fold((__u32)s);
}
struct tmonitor_ctx;
#ifdef TRAFFIC_MONITOR
struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
const char *subtest_name);
void traffic_monitor_stop(struct tmonitor_ctx *ctx);
#else
static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
const char *subtest_name)
{
return NULL;
}
static inline void traffic_monitor_stop(struct tmonitor_ctx *ctx)
{
}
#endif
#endif

View File

@ -283,9 +283,11 @@ static void test_uprobe_sleepable(struct test_attach_probe *skel)
trigger_func3();
ASSERT_EQ(skel->bss->uprobe_byname3_sleepable_res, 9, "check_uprobe_byname3_sleepable_res");
ASSERT_EQ(skel->bss->uprobe_byname3_res, 10, "check_uprobe_byname3_res");
ASSERT_EQ(skel->bss->uretprobe_byname3_sleepable_res, 11, "check_uretprobe_byname3_sleepable_res");
ASSERT_EQ(skel->bss->uretprobe_byname3_res, 12, "check_uretprobe_byname3_res");
ASSERT_EQ(skel->bss->uprobe_byname3_str_sleepable_res, 10, "check_uprobe_byname3_str_sleepable_res");
ASSERT_EQ(skel->bss->uprobe_byname3_res, 11, "check_uprobe_byname3_res");
ASSERT_EQ(skel->bss->uretprobe_byname3_sleepable_res, 12, "check_uretprobe_byname3_sleepable_res");
ASSERT_EQ(skel->bss->uretprobe_byname3_str_sleepable_res, 13, "check_uretprobe_byname3_str_sleepable_res");
ASSERT_EQ(skel->bss->uretprobe_byname3_res, 14, "check_uretprobe_byname3_res");
}
void test_attach_probe(void)

View File

@ -1218,7 +1218,7 @@ static void test_bpf_sk_storage_get(void)
bpf_iter_bpf_sk_storage_helpers__destroy(skel);
}
static void test_bpf_sk_stoarge_map_iter_fd(void)
static void test_bpf_sk_storage_map_iter_fd(void)
{
struct bpf_iter_bpf_sk_storage_map *skel;
@ -1693,7 +1693,7 @@ void test_bpf_iter(void)
if (test__start_subtest("bpf_sk_storage_map"))
test_bpf_sk_storage_map();
if (test__start_subtest("bpf_sk_storage_map_iter_fd"))
test_bpf_sk_stoarge_map_iter_fd();
test_bpf_sk_storage_map_iter_fd();
if (test__start_subtest("bpf_sk_storage_delete"))
test_bpf_sk_storage_delete();
if (test__start_subtest("bpf_sk_storage_get"))

Some files were not shown because too many files have changed in this diff Show More