mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-12-28 16:56:26 +00:00
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2021-12-30 The following pull-request contains BPF updates for your *net-next* tree. We've added 72 non-merge commits during the last 20 day(s) which contain a total of 223 files changed, 3510 insertions(+), 1591 deletions(-). The main changes are: 1) Automatic setrlimit in libbpf when bpf is memcg's in the kernel, from Andrii. 2) Beautify and de-verbose verifier logs, from Christy. 3) Composable verifier types, from Hao. 4) bpf_strncmp helper, from Hou. 5) bpf.h header dependency cleanup, from Jakub. 6) get_func_[arg|ret|arg_cnt] helpers, from Jiri. 7) Sleepable local storage, from KP. 8) Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support, from Kumar. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
e63a023489
376
Documentation/bpf/classic_vs_extended.rst
Normal file
376
Documentation/bpf/classic_vs_extended.rst
Normal file
@ -0,0 +1,376 @@
|
||||
|
||||
===================
|
||||
Classic BPF vs eBPF
|
||||
===================
|
||||
|
||||
eBPF is designed to be JITed with one to one mapping, which can also open up
|
||||
the possibility for GCC/LLVM compilers to generate optimized eBPF code through
|
||||
an eBPF backend that performs almost as fast as natively compiled code.
|
||||
|
||||
Some core changes of the eBPF format from classic BPF:
|
||||
|
||||
- Number of registers increase from 2 to 10:
|
||||
|
||||
The old format had two registers A and X, and a hidden frame pointer. The
|
||||
new layout extends this to be 10 internal registers and a read-only frame
|
||||
pointer. Since 64-bit CPUs are passing arguments to functions via registers
|
||||
the number of args from eBPF program to in-kernel function is restricted
|
||||
to 5 and one register is used to accept return value from an in-kernel
|
||||
function. Natively, x86_64 passes first 6 arguments in registers, aarch64/
|
||||
sparcv9/mips64 have 7 - 8 registers for arguments; x86_64 has 6 callee saved
|
||||
registers, and aarch64/sparcv9/mips64 have 11 or more callee saved registers.
|
||||
|
||||
Thus, all eBPF registers map one to one to HW registers on x86_64, aarch64,
|
||||
etc, and eBPF calling convention maps directly to ABIs used by the kernel on
|
||||
64-bit architectures.
|
||||
|
||||
On 32-bit architectures JIT may map programs that use only 32-bit arithmetic
|
||||
and may let more complex programs to be interpreted.
|
||||
|
||||
R0 - R5 are scratch registers and eBPF program needs spill/fill them if
|
||||
necessary across calls. Note that there is only one eBPF program (== one
|
||||
eBPF main routine) and it cannot call other eBPF functions, it can only
|
||||
call predefined in-kernel functions, though.
|
||||
|
||||
- Register width increases from 32-bit to 64-bit:
|
||||
|
||||
Still, the semantics of the original 32-bit ALU operations are preserved
|
||||
via 32-bit subregisters. All eBPF registers are 64-bit with 32-bit lower
|
||||
subregisters that zero-extend into 64-bit if they are being written to.
|
||||
That behavior maps directly to x86_64 and arm64 subregister definition, but
|
||||
makes other JITs more difficult.
|
||||
|
||||
32-bit architectures run 64-bit eBPF programs via interpreter.
|
||||
Their JITs may convert BPF programs that only use 32-bit subregisters into
|
||||
native instruction set and let the rest being interpreted.
|
||||
|
||||
Operation is 64-bit, because on 64-bit architectures, pointers are also
|
||||
64-bit wide, and we want to pass 64-bit values in/out of kernel functions,
|
||||
so 32-bit eBPF registers would otherwise require to define register-pair
|
||||
ABI, thus, there won't be able to use a direct eBPF register to HW register
|
||||
mapping and JIT would need to do combine/split/move operations for every
|
||||
register in and out of the function, which is complex, bug prone and slow.
|
||||
Another reason is the use of atomic 64-bit counters.
|
||||
|
||||
- Conditional jt/jf targets replaced with jt/fall-through:
|
||||
|
||||
While the original design has constructs such as ``if (cond) jump_true;
|
||||
else jump_false;``, they are being replaced into alternative constructs like
|
||||
``if (cond) jump_true; /* else fall-through */``.
|
||||
|
||||
- Introduces bpf_call insn and register passing convention for zero overhead
|
||||
calls from/to other kernel functions:
|
||||
|
||||
Before an in-kernel function call, the eBPF program needs to
|
||||
place function arguments into R1 to R5 registers to satisfy calling
|
||||
convention, then the interpreter will take them from registers and pass
|
||||
to in-kernel function. If R1 - R5 registers are mapped to CPU registers
|
||||
that are used for argument passing on given architecture, the JIT compiler
|
||||
doesn't need to emit extra moves. Function arguments will be in the correct
|
||||
registers and BPF_CALL instruction will be JITed as single 'call' HW
|
||||
instruction. This calling convention was picked to cover common call
|
||||
situations without performance penalty.
|
||||
|
||||
After an in-kernel function call, R1 - R5 are reset to unreadable and R0 has
|
||||
a return value of the function. Since R6 - R9 are callee saved, their state
|
||||
is preserved across the call.
|
||||
|
||||
For example, consider three C functions::
|
||||
|
||||
u64 f1() { return (*_f2)(1); }
|
||||
u64 f2(u64 a) { return f3(a + 1, a); }
|
||||
u64 f3(u64 a, u64 b) { return a - b; }
|
||||
|
||||
GCC can compile f1, f3 into x86_64::
|
||||
|
||||
f1:
|
||||
movl $1, %edi
|
||||
movq _f2(%rip), %rax
|
||||
jmp *%rax
|
||||
f3:
|
||||
movq %rdi, %rax
|
||||
subq %rsi, %rax
|
||||
ret
|
||||
|
||||
Function f2 in eBPF may look like::
|
||||
|
||||
f2:
|
||||
bpf_mov R2, R1
|
||||
bpf_add R1, 1
|
||||
bpf_call f3
|
||||
bpf_exit
|
||||
|
||||
If f2 is JITed and the pointer stored to ``_f2``. The calls f1 -> f2 -> f3 and
|
||||
returns will be seamless. Without JIT, __bpf_prog_run() interpreter needs to
|
||||
be used to call into f2.
|
||||
|
||||
For practical reasons all eBPF programs have only one argument 'ctx' which is
|
||||
already placed into R1 (e.g. on __bpf_prog_run() startup) and the programs
|
||||
can call kernel functions with up to 5 arguments. Calls with 6 or more arguments
|
||||
are currently not supported, but these restrictions can be lifted if necessary
|
||||
in the future.
|
||||
|
||||
On 64-bit architectures all register map to HW registers one to one. For
|
||||
example, x86_64 JIT compiler can map them as ...
|
||||
|
||||
::
|
||||
|
||||
R0 - rax
|
||||
R1 - rdi
|
||||
R2 - rsi
|
||||
R3 - rdx
|
||||
R4 - rcx
|
||||
R5 - r8
|
||||
R6 - rbx
|
||||
R7 - r13
|
||||
R8 - r14
|
||||
R9 - r15
|
||||
R10 - rbp
|
||||
|
||||
... since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing
|
||||
and rbx, r12 - r15 are callee saved.
|
||||
|
||||
Then the following eBPF pseudo-program::
|
||||
|
||||
bpf_mov R6, R1 /* save ctx */
|
||||
bpf_mov R2, 2
|
||||
bpf_mov R3, 3
|
||||
bpf_mov R4, 4
|
||||
bpf_mov R5, 5
|
||||
bpf_call foo
|
||||
bpf_mov R7, R0 /* save foo() return value */
|
||||
bpf_mov R1, R6 /* restore ctx for next call */
|
||||
bpf_mov R2, 6
|
||||
bpf_mov R3, 7
|
||||
bpf_mov R4, 8
|
||||
bpf_mov R5, 9
|
||||
bpf_call bar
|
||||
bpf_add R0, R7
|
||||
bpf_exit
|
||||
|
||||
After JIT to x86_64 may look like::
|
||||
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
sub $0x228,%rsp
|
||||
mov %rbx,-0x228(%rbp)
|
||||
mov %r13,-0x220(%rbp)
|
||||
mov %rdi,%rbx
|
||||
mov $0x2,%esi
|
||||
mov $0x3,%edx
|
||||
mov $0x4,%ecx
|
||||
mov $0x5,%r8d
|
||||
callq foo
|
||||
mov %rax,%r13
|
||||
mov %rbx,%rdi
|
||||
mov $0x6,%esi
|
||||
mov $0x7,%edx
|
||||
mov $0x8,%ecx
|
||||
mov $0x9,%r8d
|
||||
callq bar
|
||||
add %r13,%rax
|
||||
mov -0x228(%rbp),%rbx
|
||||
mov -0x220(%rbp),%r13
|
||||
leaveq
|
||||
retq
|
||||
|
||||
Which is in this example equivalent in C to::
|
||||
|
||||
u64 bpf_filter(u64 ctx)
|
||||
{
|
||||
return foo(ctx, 2, 3, 4, 5) + bar(ctx, 6, 7, 8, 9);
|
||||
}
|
||||
|
||||
In-kernel functions foo() and bar() with prototype: u64 (*)(u64 arg1, u64
|
||||
arg2, u64 arg3, u64 arg4, u64 arg5); will receive arguments in proper
|
||||
registers and place their return value into ``%rax`` which is R0 in eBPF.
|
||||
Prologue and epilogue are emitted by JIT and are implicit in the
|
||||
interpreter. R0-R5 are scratch registers, so eBPF program needs to preserve
|
||||
them across the calls as defined by calling convention.
|
||||
|
||||
For example the following program is invalid::
|
||||
|
||||
bpf_mov R1, 1
|
||||
bpf_call foo
|
||||
bpf_mov R0, R1
|
||||
bpf_exit
|
||||
|
||||
After the call the registers R1-R5 contain junk values and cannot be read.
|
||||
An in-kernel verifier.rst is used to validate eBPF programs.
|
||||
|
||||
Also in the new design, eBPF is limited to 4096 insns, which means that any
|
||||
program will terminate quickly and will only call a fixed number of kernel
|
||||
functions. Original BPF and eBPF are two operand instructions,
|
||||
which helps to do one-to-one mapping between eBPF insn and x86 insn during JIT.
|
||||
|
||||
The input context pointer for invoking the interpreter function is generic,
|
||||
its content is defined by a specific use case. For seccomp register R1 points
|
||||
to seccomp_data, for converted BPF filters R1 points to a skb.
|
||||
|
||||
A program, that is translated internally consists of the following elements::
|
||||
|
||||
op:16, jt:8, jf:8, k:32 ==> op:8, dst_reg:4, src_reg:4, off:16, imm:32
|
||||
|
||||
So far 87 eBPF instructions were implemented. 8-bit 'op' opcode field
|
||||
has room for new instructions. Some of them may use 16/24/32 byte encoding. New
|
||||
instructions must be multiple of 8 bytes to preserve backward compatibility.
|
||||
|
||||
eBPF is a general purpose RISC instruction set. Not every register and
|
||||
every instruction are used during translation from original BPF to eBPF.
|
||||
For example, socket filters are not using ``exclusive add`` instruction, but
|
||||
tracing filters may do to maintain counters of events, for example. Register R9
|
||||
is not used by socket filters either, but more complex filters may be running
|
||||
out of registers and would have to resort to spill/fill to stack.
|
||||
|
||||
eBPF can be used as a generic assembler for last step performance
|
||||
optimizations, socket filters and seccomp are using it as assembler. Tracing
|
||||
filters may use it as assembler to generate code from kernel. In kernel usage
|
||||
may not be bounded by security considerations, since generated eBPF code
|
||||
may be optimizing internal code path and not being exposed to the user space.
|
||||
Safety of eBPF can come from the verifier.rst. In such use cases as
|
||||
described, it may be used as safe instruction set.
|
||||
|
||||
Just like the original BPF, eBPF runs within a controlled environment,
|
||||
is deterministic and the kernel can easily prove that. The safety of the program
|
||||
can be determined in two steps: first step does depth-first-search to disallow
|
||||
loops and other CFG validation; second step starts from the first insn and
|
||||
descends all possible paths. It simulates execution of every insn and observes
|
||||
the state change of registers and stack.
|
||||
|
||||
opcode encoding
|
||||
===============
|
||||
|
||||
eBPF is reusing most of the opcode encoding from classic to simplify conversion
|
||||
of classic BPF to eBPF.
|
||||
|
||||
For arithmetic and jump instructions the 8-bit 'code' field is divided into three
|
||||
parts::
|
||||
|
||||
+----------------+--------+--------------------+
|
||||
| 4 bits | 1 bit | 3 bits |
|
||||
| operation code | source | instruction class |
|
||||
+----------------+--------+--------------------+
|
||||
(MSB) (LSB)
|
||||
|
||||
Three LSB bits store instruction class which is one of:
|
||||
|
||||
=================== ===============
|
||||
Classic BPF classes eBPF classes
|
||||
=================== ===============
|
||||
BPF_LD 0x00 BPF_LD 0x00
|
||||
BPF_LDX 0x01 BPF_LDX 0x01
|
||||
BPF_ST 0x02 BPF_ST 0x02
|
||||
BPF_STX 0x03 BPF_STX 0x03
|
||||
BPF_ALU 0x04 BPF_ALU 0x04
|
||||
BPF_JMP 0x05 BPF_JMP 0x05
|
||||
BPF_RET 0x06 BPF_JMP32 0x06
|
||||
BPF_MISC 0x07 BPF_ALU64 0x07
|
||||
=================== ===============
|
||||
|
||||
The 4th bit encodes the source operand ...
|
||||
|
||||
::
|
||||
|
||||
BPF_K 0x00
|
||||
BPF_X 0x08
|
||||
|
||||
* in classic BPF, this means::
|
||||
|
||||
BPF_SRC(code) == BPF_X - use register X as source operand
|
||||
BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand
|
||||
|
||||
* in eBPF, this means::
|
||||
|
||||
BPF_SRC(code) == BPF_X - use 'src_reg' register as source operand
|
||||
BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand
|
||||
|
||||
... and four MSB bits store operation code.
|
||||
|
||||
If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of::
|
||||
|
||||
BPF_ADD 0x00
|
||||
BPF_SUB 0x10
|
||||
BPF_MUL 0x20
|
||||
BPF_DIV 0x30
|
||||
BPF_OR 0x40
|
||||
BPF_AND 0x50
|
||||
BPF_LSH 0x60
|
||||
BPF_RSH 0x70
|
||||
BPF_NEG 0x80
|
||||
BPF_MOD 0x90
|
||||
BPF_XOR 0xa0
|
||||
BPF_MOV 0xb0 /* eBPF only: mov reg to reg */
|
||||
BPF_ARSH 0xc0 /* eBPF only: sign extending shift right */
|
||||
BPF_END 0xd0 /* eBPF only: endianness conversion */
|
||||
|
||||
If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 [ in eBPF ], BPF_OP(code) is one of::
|
||||
|
||||
BPF_JA 0x00 /* BPF_JMP only */
|
||||
BPF_JEQ 0x10
|
||||
BPF_JGT 0x20
|
||||
BPF_JGE 0x30
|
||||
BPF_JSET 0x40
|
||||
BPF_JNE 0x50 /* eBPF only: jump != */
|
||||
BPF_JSGT 0x60 /* eBPF only: signed '>' */
|
||||
BPF_JSGE 0x70 /* eBPF only: signed '>=' */
|
||||
BPF_CALL 0x80 /* eBPF BPF_JMP only: function call */
|
||||
BPF_EXIT 0x90 /* eBPF BPF_JMP only: function return */
|
||||
BPF_JLT 0xa0 /* eBPF only: unsigned '<' */
|
||||
BPF_JLE 0xb0 /* eBPF only: unsigned '<=' */
|
||||
BPF_JSLT 0xc0 /* eBPF only: signed '<' */
|
||||
BPF_JSLE 0xd0 /* eBPF only: signed '<=' */
|
||||
|
||||
So BPF_ADD | BPF_X | BPF_ALU means 32-bit addition in both classic BPF
|
||||
and eBPF. There are only two registers in classic BPF, so it means A += X.
|
||||
In eBPF it means dst_reg = (u32) dst_reg + (u32) src_reg; similarly,
|
||||
BPF_XOR | BPF_K | BPF_ALU means A ^= imm32 in classic BPF and analogous
|
||||
src_reg = (u32) src_reg ^ (u32) imm32 in eBPF.
|
||||
|
||||
Classic BPF is using BPF_MISC class to represent A = X and X = A moves.
|
||||
eBPF is using BPF_MOV | BPF_X | BPF_ALU code instead. Since there are no
|
||||
BPF_MISC operations in eBPF, the class 7 is used as BPF_ALU64 to mean
|
||||
exactly the same operations as BPF_ALU, but with 64-bit wide operands
|
||||
instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.:
|
||||
dst_reg = dst_reg + src_reg
|
||||
|
||||
Classic BPF wastes the whole BPF_RET class to represent a single ``ret``
|
||||
operation. Classic BPF_RET | BPF_K means copy imm32 into return register
|
||||
and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT
|
||||
in eBPF means function exit only. The eBPF program needs to store return
|
||||
value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is used as
|
||||
BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide
|
||||
operands for the comparisons instead.
|
||||
|
||||
For load and store instructions the 8-bit 'code' field is divided as::
|
||||
|
||||
+--------+--------+-------------------+
|
||||
| 3 bits | 2 bits | 3 bits |
|
||||
| mode | size | instruction class |
|
||||
+--------+--------+-------------------+
|
||||
(MSB) (LSB)
|
||||
|
||||
Size modifier is one of ...
|
||||
|
||||
::
|
||||
|
||||
BPF_W 0x00 /* word */
|
||||
BPF_H 0x08 /* half word */
|
||||
BPF_B 0x10 /* byte */
|
||||
BPF_DW 0x18 /* eBPF only, double word */
|
||||
|
||||
... which encodes size of load/store operation::
|
||||
|
||||
B - 1 byte
|
||||
H - 2 byte
|
||||
W - 4 byte
|
||||
DW - 8 byte (eBPF only)
|
||||
|
||||
Mode modifier is one of::
|
||||
|
||||
BPF_IMM 0x00 /* used for 32-bit mov in classic BPF and 64-bit in eBPF */
|
||||
BPF_ABS 0x20
|
||||
BPF_IND 0x40
|
||||
BPF_MEM 0x60
|
||||
BPF_LEN 0x80 /* classic BPF only, reserved in eBPF */
|
||||
BPF_MSH 0xa0 /* classic BPF only, reserved in eBPF */
|
||||
BPF_ATOMIC 0xc0 /* eBPF only, atomic operations */
|
@ -21,6 +21,7 @@ that goes into great technical depth about the BPF Architecture.
|
||||
helpers
|
||||
programs
|
||||
maps
|
||||
classic_vs_extended.rst
|
||||
bpf_licensing
|
||||
test_debug
|
||||
other
|
||||
|
@ -3,296 +3,68 @@
|
||||
eBPF Instruction Set
|
||||
====================
|
||||
|
||||
eBPF is designed to be JITed with one to one mapping, which can also open up
|
||||
the possibility for GCC/LLVM compilers to generate optimized eBPF code through
|
||||
an eBPF backend that performs almost as fast as natively compiled code.
|
||||
Registers and calling convention
|
||||
================================
|
||||
|
||||
Some core changes of the eBPF format from classic BPF:
|
||||
eBPF has 10 general purpose registers and a read-only frame pointer register,
|
||||
all of which are 64-bits wide.
|
||||
|
||||
- Number of registers increase from 2 to 10:
|
||||
The eBPF calling convention is defined as:
|
||||
|
||||
The old format had two registers A and X, and a hidden frame pointer. The
|
||||
new layout extends this to be 10 internal registers and a read-only frame
|
||||
pointer. Since 64-bit CPUs are passing arguments to functions via registers
|
||||
the number of args from eBPF program to in-kernel function is restricted
|
||||
to 5 and one register is used to accept return value from an in-kernel
|
||||
function. Natively, x86_64 passes first 6 arguments in registers, aarch64/
|
||||
sparcv9/mips64 have 7 - 8 registers for arguments; x86_64 has 6 callee saved
|
||||
registers, and aarch64/sparcv9/mips64 have 11 or more callee saved registers.
|
||||
* R0: return value from function calls, and exit value for eBPF programs
|
||||
* R1 - R5: arguments for function calls
|
||||
* R6 - R9: callee saved registers that function calls will preserve
|
||||
* R10: read-only frame pointer to access stack
|
||||
|
||||
Therefore, eBPF calling convention is defined as:
|
||||
R0 - R5 are scratch registers and eBPF programs needs to spill/fill them if
|
||||
necessary across calls.
|
||||
|
||||
* R0 - return value from in-kernel function, and exit value for eBPF program
|
||||
* R1 - R5 - arguments from eBPF program to in-kernel function
|
||||
* R6 - R9 - callee saved registers that in-kernel function will preserve
|
||||
* R10 - read-only frame pointer to access stack
|
||||
Instruction classes
|
||||
===================
|
||||
|
||||
Thus, all eBPF registers map one to one to HW registers on x86_64, aarch64,
|
||||
etc, and eBPF calling convention maps directly to ABIs used by the kernel on
|
||||
64-bit architectures.
|
||||
The three LSB bits of the 'opcode' field store the instruction class:
|
||||
|
||||
On 32-bit architectures JIT may map programs that use only 32-bit arithmetic
|
||||
and may let more complex programs to be interpreted.
|
||||
========= =====
|
||||
class value
|
||||
========= =====
|
||||
BPF_LD 0x00
|
||||
BPF_LDX 0x01
|
||||
BPF_ST 0x02
|
||||
BPF_STX 0x03
|
||||
BPF_ALU 0x04
|
||||
BPF_JMP 0x05
|
||||
BPF_JMP32 0x06
|
||||
BPF_ALU64 0x07
|
||||
========= =====
|
||||
|
||||
R0 - R5 are scratch registers and eBPF program needs spill/fill them if
|
||||
necessary across calls. Note that there is only one eBPF program (== one
|
||||
eBPF main routine) and it cannot call other eBPF functions, it can only
|
||||
call predefined in-kernel functions, though.
|
||||
Arithmetic and jump instructions
|
||||
================================
|
||||
|
||||
- Register width increases from 32-bit to 64-bit:
|
||||
For arithmetic and jump instructions (BPF_ALU, BPF_ALU64, BPF_JMP and
|
||||
BPF_JMP32), the 8-bit 'opcode' field is divided into three parts:
|
||||
|
||||
Still, the semantics of the original 32-bit ALU operations are preserved
|
||||
via 32-bit subregisters. All eBPF registers are 64-bit with 32-bit lower
|
||||
subregisters that zero-extend into 64-bit if they are being written to.
|
||||
That behavior maps directly to x86_64 and arm64 subregister definition, but
|
||||
makes other JITs more difficult.
|
||||
============== ====== =================
|
||||
4 bits (MSB) 1 bit 3 bits (LSB)
|
||||
============== ====== =================
|
||||
operation code source instruction class
|
||||
============== ====== =================
|
||||
|
||||
32-bit architectures run 64-bit eBPF programs via interpreter.
|
||||
Their JITs may convert BPF programs that only use 32-bit subregisters into
|
||||
native instruction set and let the rest being interpreted.
|
||||
The 4th bit encodes the source operand:
|
||||
|
||||
Operation is 64-bit, because on 64-bit architectures, pointers are also
|
||||
64-bit wide, and we want to pass 64-bit values in/out of kernel functions,
|
||||
so 32-bit eBPF registers would otherwise require to define register-pair
|
||||
ABI, thus, there won't be able to use a direct eBPF register to HW register
|
||||
mapping and JIT would need to do combine/split/move operations for every
|
||||
register in and out of the function, which is complex, bug prone and slow.
|
||||
Another reason is the use of atomic 64-bit counters.
|
||||
====== ===== ========================================
|
||||
source value description
|
||||
====== ===== ========================================
|
||||
BPF_K 0x00 use 32-bit immediate as source operand
|
||||
BPF_X 0x08 use 'src_reg' register as source operand
|
||||
====== ===== ========================================
|
||||
|
||||
- Conditional jt/jf targets replaced with jt/fall-through:
|
||||
The four MSB bits store the operation code.
|
||||
|
||||
While the original design has constructs such as ``if (cond) jump_true;
|
||||
else jump_false;``, they are being replaced into alternative constructs like
|
||||
``if (cond) jump_true; /* else fall-through */``.
|
||||
|
||||
- Introduces bpf_call insn and register passing convention for zero overhead
|
||||
calls from/to other kernel functions:
|
||||
|
||||
Before an in-kernel function call, the eBPF program needs to
|
||||
place function arguments into R1 to R5 registers to satisfy calling
|
||||
convention, then the interpreter will take them from registers and pass
|
||||
to in-kernel function. If R1 - R5 registers are mapped to CPU registers
|
||||
that are used for argument passing on given architecture, the JIT compiler
|
||||
doesn't need to emit extra moves. Function arguments will be in the correct
|
||||
registers and BPF_CALL instruction will be JITed as single 'call' HW
|
||||
instruction. This calling convention was picked to cover common call
|
||||
situations without performance penalty.
|
||||
|
||||
After an in-kernel function call, R1 - R5 are reset to unreadable and R0 has
|
||||
a return value of the function. Since R6 - R9 are callee saved, their state
|
||||
is preserved across the call.
|
||||
|
||||
For example, consider three C functions::
|
||||
|
||||
u64 f1() { return (*_f2)(1); }
|
||||
u64 f2(u64 a) { return f3(a + 1, a); }
|
||||
u64 f3(u64 a, u64 b) { return a - b; }
|
||||
|
||||
GCC can compile f1, f3 into x86_64::
|
||||
|
||||
f1:
|
||||
movl $1, %edi
|
||||
movq _f2(%rip), %rax
|
||||
jmp *%rax
|
||||
f3:
|
||||
movq %rdi, %rax
|
||||
subq %rsi, %rax
|
||||
ret
|
||||
|
||||
Function f2 in eBPF may look like::
|
||||
|
||||
f2:
|
||||
bpf_mov R2, R1
|
||||
bpf_add R1, 1
|
||||
bpf_call f3
|
||||
bpf_exit
|
||||
|
||||
If f2 is JITed and the pointer stored to ``_f2``. The calls f1 -> f2 -> f3 and
|
||||
returns will be seamless. Without JIT, __bpf_prog_run() interpreter needs to
|
||||
be used to call into f2.
|
||||
|
||||
For practical reasons all eBPF programs have only one argument 'ctx' which is
|
||||
already placed into R1 (e.g. on __bpf_prog_run() startup) and the programs
|
||||
can call kernel functions with up to 5 arguments. Calls with 6 or more arguments
|
||||
are currently not supported, but these restrictions can be lifted if necessary
|
||||
in the future.
|
||||
|
||||
On 64-bit architectures all register map to HW registers one to one. For
|
||||
example, x86_64 JIT compiler can map them as ...
|
||||
|
||||
::
|
||||
|
||||
R0 - rax
|
||||
R1 - rdi
|
||||
R2 - rsi
|
||||
R3 - rdx
|
||||
R4 - rcx
|
||||
R5 - r8
|
||||
R6 - rbx
|
||||
R7 - r13
|
||||
R8 - r14
|
||||
R9 - r15
|
||||
R10 - rbp
|
||||
|
||||
... since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing
|
||||
and rbx, r12 - r15 are callee saved.
|
||||
|
||||
Then the following eBPF pseudo-program::
|
||||
|
||||
bpf_mov R6, R1 /* save ctx */
|
||||
bpf_mov R2, 2
|
||||
bpf_mov R3, 3
|
||||
bpf_mov R4, 4
|
||||
bpf_mov R5, 5
|
||||
bpf_call foo
|
||||
bpf_mov R7, R0 /* save foo() return value */
|
||||
bpf_mov R1, R6 /* restore ctx for next call */
|
||||
bpf_mov R2, 6
|
||||
bpf_mov R3, 7
|
||||
bpf_mov R4, 8
|
||||
bpf_mov R5, 9
|
||||
bpf_call bar
|
||||
bpf_add R0, R7
|
||||
bpf_exit
|
||||
|
||||
After JIT to x86_64 may look like::
|
||||
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
sub $0x228,%rsp
|
||||
mov %rbx,-0x228(%rbp)
|
||||
mov %r13,-0x220(%rbp)
|
||||
mov %rdi,%rbx
|
||||
mov $0x2,%esi
|
||||
mov $0x3,%edx
|
||||
mov $0x4,%ecx
|
||||
mov $0x5,%r8d
|
||||
callq foo
|
||||
mov %rax,%r13
|
||||
mov %rbx,%rdi
|
||||
mov $0x6,%esi
|
||||
mov $0x7,%edx
|
||||
mov $0x8,%ecx
|
||||
mov $0x9,%r8d
|
||||
callq bar
|
||||
add %r13,%rax
|
||||
mov -0x228(%rbp),%rbx
|
||||
mov -0x220(%rbp),%r13
|
||||
leaveq
|
||||
retq
|
||||
|
||||
Which is in this example equivalent in C to::
|
||||
|
||||
u64 bpf_filter(u64 ctx)
|
||||
{
|
||||
return foo(ctx, 2, 3, 4, 5) + bar(ctx, 6, 7, 8, 9);
|
||||
}
|
||||
|
||||
In-kernel functions foo() and bar() with prototype: u64 (*)(u64 arg1, u64
|
||||
arg2, u64 arg3, u64 arg4, u64 arg5); will receive arguments in proper
|
||||
registers and place their return value into ``%rax`` which is R0 in eBPF.
|
||||
Prologue and epilogue are emitted by JIT and are implicit in the
|
||||
interpreter. R0-R5 are scratch registers, so eBPF program needs to preserve
|
||||
them across the calls as defined by calling convention.
|
||||
|
||||
For example the following program is invalid::
|
||||
|
||||
bpf_mov R1, 1
|
||||
bpf_call foo
|
||||
bpf_mov R0, R1
|
||||
bpf_exit
|
||||
|
||||
After the call the registers R1-R5 contain junk values and cannot be read.
|
||||
An in-kernel `eBPF verifier`_ is used to validate eBPF programs.
|
||||
|
||||
Also in the new design, eBPF is limited to 4096 insns, which means that any
|
||||
program will terminate quickly and will only call a fixed number of kernel
|
||||
functions. Original BPF and eBPF are two operand instructions,
|
||||
which helps to do one-to-one mapping between eBPF insn and x86 insn during JIT.
|
||||
|
||||
The input context pointer for invoking the interpreter function is generic,
|
||||
its content is defined by a specific use case. For seccomp register R1 points
|
||||
to seccomp_data, for converted BPF filters R1 points to a skb.
|
||||
|
||||
A program, that is translated internally consists of the following elements::
|
||||
|
||||
op:16, jt:8, jf:8, k:32 ==> op:8, dst_reg:4, src_reg:4, off:16, imm:32
|
||||
|
||||
So far 87 eBPF instructions were implemented. 8-bit 'op' opcode field
|
||||
has room for new instructions. Some of them may use 16/24/32 byte encoding. New
|
||||
instructions must be multiple of 8 bytes to preserve backward compatibility.
|
||||
|
||||
eBPF is a general purpose RISC instruction set. Not every register and
|
||||
every instruction are used during translation from original BPF to eBPF.
|
||||
For example, socket filters are not using ``exclusive add`` instruction, but
|
||||
tracing filters may do to maintain counters of events, for example. Register R9
|
||||
is not used by socket filters either, but more complex filters may be running
|
||||
out of registers and would have to resort to spill/fill to stack.
|
||||
|
||||
eBPF can be used as a generic assembler for last step performance
|
||||
optimizations, socket filters and seccomp are using it as assembler. Tracing
|
||||
filters may use it as assembler to generate code from kernel. In kernel usage
|
||||
may not be bounded by security considerations, since generated eBPF code
|
||||
may be optimizing internal code path and not being exposed to the user space.
|
||||
Safety of eBPF can come from the `eBPF verifier`_. In such use cases as
|
||||
described, it may be used as safe instruction set.
|
||||
|
||||
Just like the original BPF, eBPF runs within a controlled environment,
|
||||
is deterministic and the kernel can easily prove that. The safety of the program
|
||||
can be determined in two steps: first step does depth-first-search to disallow
|
||||
loops and other CFG validation; second step starts from the first insn and
|
||||
descends all possible paths. It simulates execution of every insn and observes
|
||||
the state change of registers and stack.
|
||||
|
||||
eBPF opcode encoding
|
||||
====================
|
||||
|
||||
eBPF is reusing most of the opcode encoding from classic to simplify conversion
|
||||
of classic BPF to eBPF. For arithmetic and jump instructions the 8-bit 'code'
|
||||
field is divided into three parts::
|
||||
|
||||
+----------------+--------+--------------------+
|
||||
| 4 bits | 1 bit | 3 bits |
|
||||
| operation code | source | instruction class |
|
||||
+----------------+--------+--------------------+
|
||||
(MSB) (LSB)
|
||||
|
||||
Three LSB bits store instruction class which is one of:
|
||||
|
||||
=================== ===============
|
||||
Classic BPF classes eBPF classes
|
||||
=================== ===============
|
||||
BPF_LD 0x00 BPF_LD 0x00
|
||||
BPF_LDX 0x01 BPF_LDX 0x01
|
||||
BPF_ST 0x02 BPF_ST 0x02
|
||||
BPF_STX 0x03 BPF_STX 0x03
|
||||
BPF_ALU 0x04 BPF_ALU 0x04
|
||||
BPF_JMP 0x05 BPF_JMP 0x05
|
||||
BPF_RET 0x06 BPF_JMP32 0x06
|
||||
BPF_MISC 0x07 BPF_ALU64 0x07
|
||||
=================== ===============
|
||||
|
||||
When BPF_CLASS(code) == BPF_ALU or BPF_JMP, 4th bit encodes source operand ...
|
||||
|
||||
::
|
||||
|
||||
BPF_K 0x00
|
||||
BPF_X 0x08
|
||||
|
||||
* in classic BPF, this means::
|
||||
|
||||
BPF_SRC(code) == BPF_X - use register X as source operand
|
||||
BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand
|
||||
|
||||
* in eBPF, this means::
|
||||
|
||||
BPF_SRC(code) == BPF_X - use 'src_reg' register as source operand
|
||||
BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand
|
||||
|
||||
... and four MSB bits store operation code.
|
||||
|
||||
If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of::
|
||||
For class BPF_ALU or BPF_ALU64:
|
||||
|
||||
======== ===== =========================
|
||||
code value description
|
||||
======== ===== =========================
|
||||
BPF_ADD 0x00
|
||||
BPF_SUB 0x10
|
||||
BPF_MUL 0x20
|
||||
@ -304,116 +76,105 @@ If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of::
|
||||
BPF_NEG 0x80
|
||||
BPF_MOD 0x90
|
||||
BPF_XOR 0xa0
|
||||
BPF_MOV 0xb0 /* eBPF only: mov reg to reg */
|
||||
BPF_ARSH 0xc0 /* eBPF only: sign extending shift right */
|
||||
BPF_END 0xd0 /* eBPF only: endianness conversion */
|
||||
BPF_MOV 0xb0 mov reg to reg
|
||||
BPF_ARSH 0xc0 sign extending shift right
|
||||
BPF_END 0xd0 endianness conversion
|
||||
======== ===== =========================
|
||||
|
||||
If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 [ in eBPF ], BPF_OP(code) is one of::
|
||||
For class BPF_JMP or BPF_JMP32:
|
||||
|
||||
BPF_JA 0x00 /* BPF_JMP only */
|
||||
======== ===== =========================
|
||||
code value description
|
||||
======== ===== =========================
|
||||
BPF_JA 0x00 BPF_JMP only
|
||||
BPF_JEQ 0x10
|
||||
BPF_JGT 0x20
|
||||
BPF_JGE 0x30
|
||||
BPF_JSET 0x40
|
||||
BPF_JNE 0x50 /* eBPF only: jump != */
|
||||
BPF_JSGT 0x60 /* eBPF only: signed '>' */
|
||||
BPF_JSGE 0x70 /* eBPF only: signed '>=' */
|
||||
BPF_CALL 0x80 /* eBPF BPF_JMP only: function call */
|
||||
BPF_EXIT 0x90 /* eBPF BPF_JMP only: function return */
|
||||
BPF_JLT 0xa0 /* eBPF only: unsigned '<' */
|
||||
BPF_JLE 0xb0 /* eBPF only: unsigned '<=' */
|
||||
BPF_JSLT 0xc0 /* eBPF only: signed '<' */
|
||||
BPF_JSLE 0xd0 /* eBPF only: signed '<=' */
|
||||
BPF_JNE 0x50 jump '!='
|
||||
BPF_JSGT 0x60 signed '>'
|
||||
BPF_JSGE 0x70 signed '>='
|
||||
BPF_CALL 0x80 function call
|
||||
BPF_EXIT 0x90 function return
|
||||
BPF_JLT 0xa0 unsigned '<'
|
||||
BPF_JLE 0xb0 unsigned '<='
|
||||
BPF_JSLT 0xc0 signed '<'
|
||||
BPF_JSLE 0xd0 signed '<='
|
||||
======== ===== =========================
|
||||
|
||||
So BPF_ADD | BPF_X | BPF_ALU means 32-bit addition in both classic BPF
|
||||
and eBPF. There are only two registers in classic BPF, so it means A += X.
|
||||
In eBPF it means dst_reg = (u32) dst_reg + (u32) src_reg; similarly,
|
||||
BPF_XOR | BPF_K | BPF_ALU means A ^= imm32 in classic BPF and analogous
|
||||
src_reg = (u32) src_reg ^ (u32) imm32 in eBPF.
|
||||
So BPF_ADD | BPF_X | BPF_ALU means::
|
||||
|
||||
Classic BPF is using BPF_MISC class to represent A = X and X = A moves.
|
||||
eBPF is using BPF_MOV | BPF_X | BPF_ALU code instead. Since there are no
|
||||
BPF_MISC operations in eBPF, the class 7 is used as BPF_ALU64 to mean
|
||||
exactly the same operations as BPF_ALU, but with 64-bit wide operands
|
||||
instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.:
|
||||
dst_reg = dst_reg + src_reg
|
||||
dst_reg = (u32) dst_reg + (u32) src_reg;
|
||||
|
||||
Classic BPF wastes the whole BPF_RET class to represent a single ``ret``
|
||||
operation. Classic BPF_RET | BPF_K means copy imm32 into return register
|
||||
and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT
|
||||
in eBPF means function exit only. The eBPF program needs to store return
|
||||
value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is used as
|
||||
Similarly, BPF_XOR | BPF_K | BPF_ALU means::
|
||||
|
||||
src_reg = (u32) src_reg ^ (u32) imm32
|
||||
|
||||
eBPF is using BPF_MOV | BPF_X | BPF_ALU to represent A = B moves. BPF_ALU64
|
||||
is used to mean exactly the same operations as BPF_ALU, but with 64-bit wide
|
||||
operands instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.::
|
||||
|
||||
dst_reg = dst_reg + src_reg
|
||||
|
||||
BPF_JMP | BPF_EXIT means function exit only. The eBPF program needs to store
|
||||
the return value into register R0 before doing a BPF_EXIT. Class 6 is used as
|
||||
BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide
|
||||
operands for the comparisons instead.
|
||||
|
||||
For load and store instructions the 8-bit 'code' field is divided as::
|
||||
|
||||
+--------+--------+-------------------+
|
||||
| 3 bits | 2 bits | 3 bits |
|
||||
| mode | size | instruction class |
|
||||
+--------+--------+-------------------+
|
||||
(MSB) (LSB)
|
||||
Load and store instructions
|
||||
===========================
|
||||
|
||||
Size modifier is one of ...
|
||||
For load and store instructions (BPF_LD, BPF_LDX, BPF_ST and BPF_STX), the
|
||||
8-bit 'opcode' field is divided as:
|
||||
|
||||
::
|
||||
============ ====== =================
|
||||
3 bits (MSB) 2 bits 3 bits (LSB)
|
||||
============ ====== =================
|
||||
mode size instruction class
|
||||
============ ====== =================
|
||||
|
||||
BPF_W 0x00 /* word */
|
||||
BPF_H 0x08 /* half word */
|
||||
BPF_B 0x10 /* byte */
|
||||
BPF_DW 0x18 /* eBPF only, double word */
|
||||
The size modifier is one of:
|
||||
|
||||
... which encodes size of load/store operation::
|
||||
============= ===== =====================
|
||||
size modifier value description
|
||||
============= ===== =====================
|
||||
BPF_W 0x00 word (4 bytes)
|
||||
BPF_H 0x08 half word (2 bytes)
|
||||
BPF_B 0x10 byte
|
||||
BPF_DW 0x18 double word (8 bytes)
|
||||
============= ===== =====================
|
||||
|
||||
B - 1 byte
|
||||
H - 2 byte
|
||||
W - 4 byte
|
||||
DW - 8 byte (eBPF only)
|
||||
The mode modifier is one of:
|
||||
|
||||
Mode modifier is one of::
|
||||
============= ===== =====================
|
||||
mode modifier value description
|
||||
============= ===== =====================
|
||||
BPF_IMM 0x00 used for 64-bit mov
|
||||
BPF_ABS 0x20
|
||||
BPF_IND 0x40
|
||||
BPF_MEM 0x60
|
||||
BPF_ATOMIC 0xc0 atomic operations
|
||||
============= ===== =====================
|
||||
|
||||
BPF_IMM 0x00 /* used for 32-bit mov in classic BPF and 64-bit in eBPF */
|
||||
BPF_ABS 0x20
|
||||
BPF_IND 0x40
|
||||
BPF_MEM 0x60
|
||||
BPF_LEN 0x80 /* classic BPF only, reserved in eBPF */
|
||||
BPF_MSH 0xa0 /* classic BPF only, reserved in eBPF */
|
||||
BPF_ATOMIC 0xc0 /* eBPF only, atomic operations */
|
||||
BPF_MEM | <size> | BPF_STX means::
|
||||
|
||||
eBPF has two non-generic instructions: (BPF_ABS | <size> | BPF_LD) and
|
||||
(BPF_IND | <size> | BPF_LD) which are used to access packet data.
|
||||
*(size *) (dst_reg + off) = src_reg
|
||||
|
||||
They had to be carried over from classic to have strong performance of
|
||||
socket filters running in eBPF interpreter. These instructions can only
|
||||
be used when interpreter context is a pointer to ``struct sk_buff`` and
|
||||
have seven implicit operands. Register R6 is an implicit input that must
|
||||
contain pointer to sk_buff. Register R0 is an implicit output which contains
|
||||
the data fetched from the packet. Registers R1-R5 are scratch registers
|
||||
and must not be used to store the data across BPF_ABS | BPF_LD or
|
||||
BPF_IND | BPF_LD instructions.
|
||||
BPF_MEM | <size> | BPF_ST means::
|
||||
|
||||
These instructions have implicit program exit condition as well. When
|
||||
eBPF program is trying to access the data beyond the packet boundary,
|
||||
the interpreter will abort the execution of the program. JIT compilers
|
||||
therefore must preserve this property. src_reg and imm32 fields are
|
||||
explicit inputs to these instructions.
|
||||
*(size *) (dst_reg + off) = imm32
|
||||
|
||||
For example::
|
||||
BPF_MEM | <size> | BPF_LDX means::
|
||||
|
||||
BPF_IND | BPF_W | BPF_LD means:
|
||||
|
||||
R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32))
|
||||
and R1 - R5 were scratched.
|
||||
|
||||
Unlike classic BPF instruction set, eBPF has generic load/store operations::
|
||||
|
||||
BPF_MEM | <size> | BPF_STX: *(size *) (dst_reg + off) = src_reg
|
||||
BPF_MEM | <size> | BPF_ST: *(size *) (dst_reg + off) = imm32
|
||||
BPF_MEM | <size> | BPF_LDX: dst_reg = *(size *) (src_reg + off)
|
||||
dst_reg = *(size *) (src_reg + off)
|
||||
|
||||
Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW.
|
||||
|
||||
It also includes atomic operations, which use the immediate field for extra
|
||||
Atomic operations
|
||||
-----------------
|
||||
|
||||
eBPF includes atomic operations, which use the immediate field for extra
|
||||
encoding::
|
||||
|
||||
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg
|
||||
@ -457,11 +218,36 @@ You may encounter ``BPF_XADD`` - this is a legacy name for ``BPF_ATOMIC``,
|
||||
referring to the exclusive-add operation encoded when the immediate field is
|
||||
zero.
|
||||
|
||||
16-byte instructions
|
||||
--------------------
|
||||
|
||||
eBPF has one 16-byte instruction: ``BPF_LD | BPF_DW | BPF_IMM`` which consists
|
||||
of two consecutive ``struct bpf_insn`` 8-byte blocks and interpreted as single
|
||||
instruction that loads 64-bit immediate value into a dst_reg.
|
||||
Classic BPF has similar instruction: ``BPF_LD | BPF_W | BPF_IMM`` which loads
|
||||
32-bit immediate value into a register.
|
||||
|
||||
.. Links:
|
||||
.. _eBPF verifier: verifiers.rst
|
||||
Packet access instructions
|
||||
--------------------------
|
||||
|
||||
eBPF has two non-generic instructions: (BPF_ABS | <size> | BPF_LD) and
|
||||
(BPF_IND | <size> | BPF_LD) which are used to access packet data.
|
||||
|
||||
They had to be carried over from classic BPF to have strong performance of
|
||||
socket filters running in eBPF interpreter. These instructions can only
|
||||
be used when interpreter context is a pointer to ``struct sk_buff`` and
|
||||
have seven implicit operands. Register R6 is an implicit input that must
|
||||
contain pointer to sk_buff. Register R0 is an implicit output which contains
|
||||
the data fetched from the packet. Registers R1-R5 are scratch registers
|
||||
and must not be used to store the data across BPF_ABS | BPF_LD or
|
||||
BPF_IND | BPF_LD instructions.
|
||||
|
||||
These instructions have implicit program exit condition as well. When
|
||||
eBPF program is trying to access the data beyond the packet boundary,
|
||||
the interpreter will abort the execution of the program. JIT compilers
|
||||
therefore must preserve this property. src_reg and imm32 fields are
|
||||
explicit inputs to these instructions.
|
||||
|
||||
For example, BPF_IND | BPF_W | BPF_LD means::
|
||||
|
||||
R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32))
|
||||
|
||||
and R1 - R5 are clobbered.
|
||||
|
@ -9,6 +9,7 @@
|
||||
#define KMSG_COMPONENT "hugetlb"
|
||||
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
|
||||
|
||||
#include <asm/pgalloc.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/mman.h>
|
||||
|
@ -1976,7 +1976,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
void *orig_call)
|
||||
{
|
||||
int ret, i, nr_args = m->nr_args;
|
||||
int stack_size = nr_args * 8;
|
||||
int regs_off, ip_off, args_off, stack_size = nr_args * 8;
|
||||
struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY];
|
||||
struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT];
|
||||
struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN];
|
||||
@ -1991,14 +1991,39 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
if (!is_valid_bpf_tramp_flags(flags))
|
||||
return -EINVAL;
|
||||
|
||||
/* Generated trampoline stack layout:
|
||||
*
|
||||
* RBP + 8 [ return address ]
|
||||
* RBP + 0 [ RBP ]
|
||||
*
|
||||
* RBP - 8 [ return value ] BPF_TRAMP_F_CALL_ORIG or
|
||||
* BPF_TRAMP_F_RET_FENTRY_RET flags
|
||||
*
|
||||
* [ reg_argN ] always
|
||||
* [ ... ]
|
||||
* RBP - regs_off [ reg_arg1 ] program's ctx pointer
|
||||
*
|
||||
* RBP - args_off [ args count ] always
|
||||
*
|
||||
* RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
|
||||
*/
|
||||
|
||||
/* room for return value of orig_call or fentry prog */
|
||||
save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
|
||||
if (save_ret)
|
||||
stack_size += 8;
|
||||
|
||||
regs_off = stack_size;
|
||||
|
||||
/* args count */
|
||||
stack_size += 8;
|
||||
args_off = stack_size;
|
||||
|
||||
if (flags & BPF_TRAMP_F_IP_ARG)
|
||||
stack_size += 8; /* room for IP address argument */
|
||||
|
||||
ip_off = stack_size;
|
||||
|
||||
if (flags & BPF_TRAMP_F_SKIP_FRAME)
|
||||
/* skip patched call instruction and point orig_call to actual
|
||||
* body of the kernel function.
|
||||
@ -2012,23 +2037,25 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
|
||||
EMIT1(0x53); /* push rbx */
|
||||
|
||||
/* Store number of arguments of the traced function:
|
||||
* mov rax, nr_args
|
||||
* mov QWORD PTR [rbp - args_off], rax
|
||||
*/
|
||||
emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_args);
|
||||
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -args_off);
|
||||
|
||||
if (flags & BPF_TRAMP_F_IP_ARG) {
|
||||
/* Store IP address of the traced function:
|
||||
* mov rax, QWORD PTR [rbp + 8]
|
||||
* sub rax, X86_PATCH_SIZE
|
||||
* mov QWORD PTR [rbp - stack_size], rax
|
||||
* mov QWORD PTR [rbp - ip_off], rax
|
||||
*/
|
||||
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
|
||||
EMIT4(0x48, 0x83, 0xe8, X86_PATCH_SIZE);
|
||||
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -stack_size);
|
||||
|
||||
/* Continue with stack_size for regs storage, stack will
|
||||
* be correctly restored with 'leave' instruction.
|
||||
*/
|
||||
stack_size -= 8;
|
||||
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off);
|
||||
}
|
||||
|
||||
save_regs(m, &prog, nr_args, stack_size);
|
||||
save_regs(m, &prog, nr_args, regs_off);
|
||||
|
||||
if (flags & BPF_TRAMP_F_CALL_ORIG) {
|
||||
/* arg1: mov rdi, im */
|
||||
@ -2040,7 +2067,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
}
|
||||
|
||||
if (fentry->nr_progs)
|
||||
if (invoke_bpf(m, &prog, fentry, stack_size,
|
||||
if (invoke_bpf(m, &prog, fentry, regs_off,
|
||||
flags & BPF_TRAMP_F_RET_FENTRY_RET))
|
||||
return -EINVAL;
|
||||
|
||||
@ -2050,7 +2077,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
if (!branches)
|
||||
return -ENOMEM;
|
||||
|
||||
if (invoke_bpf_mod_ret(m, &prog, fmod_ret, stack_size,
|
||||
if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off,
|
||||
branches)) {
|
||||
ret = -EINVAL;
|
||||
goto cleanup;
|
||||
@ -2058,7 +2085,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
}
|
||||
|
||||
if (flags & BPF_TRAMP_F_CALL_ORIG) {
|
||||
restore_regs(m, &prog, nr_args, stack_size);
|
||||
restore_regs(m, &prog, nr_args, regs_off);
|
||||
|
||||
/* call original function */
|
||||
if (emit_call(&prog, orig_call, prog)) {
|
||||
@ -2088,13 +2115,13 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
}
|
||||
|
||||
if (fexit->nr_progs)
|
||||
if (invoke_bpf(m, &prog, fexit, stack_size, false)) {
|
||||
if (invoke_bpf(m, &prog, fexit, regs_off, false)) {
|
||||
ret = -EINVAL;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (flags & BPF_TRAMP_F_RESTORE_REGS)
|
||||
restore_regs(m, &prog, nr_args, stack_size);
|
||||
restore_regs(m, &prog, nr_args, regs_off);
|
||||
|
||||
/* This needs to be done regardless. If there were fmod_ret programs,
|
||||
* the return value is only updated on the stack and still needs to be
|
||||
|
@ -6,6 +6,7 @@
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include <net/bluetooth/bluetooth.h>
|
||||
#include <net/bluetooth/hci_core.h>
|
||||
|
@ -33,6 +33,7 @@
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/if_vlan.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/slab.h>
|
||||
|
@ -1,5 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
|
||||
/* Copyright (c) 2015 - 2021 Intel Corporation */
|
||||
#include <linux/etherdevice.h>
|
||||
|
||||
#include "osdep.h"
|
||||
#include "status.h"
|
||||
#include "hmc.h"
|
||||
|
@ -1,5 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
|
||||
/* Copyright (c) 2016 - 2021 Intel Corporation */
|
||||
#include <linux/etherdevice.h>
|
||||
|
||||
#include "osdep.h"
|
||||
#include "status.h"
|
||||
#include "hmc.h"
|
||||
|
@ -32,6 +32,7 @@
|
||||
|
||||
#include <linux/kref.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <rdma/ib_umem.h>
|
||||
|
||||
#include "mlx5_ib.h"
|
||||
|
@ -30,6 +30,7 @@
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/etherdevice.h>
|
||||
#include <linux/module.h>
|
||||
#include <rdma/ib_umem.h>
|
||||
#include <rdma/ib_cache.h>
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/net.h>
|
||||
#include <linux/igmp.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <net/sch_generic.h>
|
||||
#include <net/net_namespace.h>
|
||||
#include <net/ip.h>
|
||||
#include <net/udp.h>
|
||||
|
@ -23,6 +23,7 @@
|
||||
* of the GNU General Public License, incorporated herein by reference.
|
||||
*/
|
||||
|
||||
#include <linux/compat.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/fcntl.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/ioport.h>
|
||||
|
@ -8,6 +8,7 @@
|
||||
*
|
||||
* Many thanks to Klaus Hitschler <klaus.hitschler@gmx.de>
|
||||
*/
|
||||
#include <asm/unaligned.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/usb.h>
|
||||
#include <linux/module.h>
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <linux/delay.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/gpio.h>
|
||||
#include <linux/if_vlan.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/platform_data/microchip-ksz.h>
|
||||
|
@ -5,6 +5,7 @@
|
||||
*/
|
||||
|
||||
#include <net/dsa.h>
|
||||
#include <linux/etherdevice.h>
|
||||
#include <linux/if_bridge.h>
|
||||
#include <linux/of_device.h>
|
||||
#include <linux/netdev_features.h>
|
||||
|
@ -434,7 +434,7 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
|
||||
xdp_stat = &rx_ring->rx_stats.xdp_pass;
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(verdict);
|
||||
bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict);
|
||||
xdp_stat = &rx_ring->rx_stats.xdp_invalid;
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <uapi/linux/bpf.h>
|
||||
|
||||
#include "ena_com.h"
|
||||
#include "ena_eth_com.h"
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <net/devlink.h>
|
||||
#include "bnxt_hsi.h"
|
||||
#include "bnxt.h"
|
||||
|
@ -195,7 +195,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
|
||||
*event |= BNXT_REDIRECT_EVENT;
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(bp->dev, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(bp->dev, xdp_prog, act);
|
||||
|
@ -590,7 +590,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
|
||||
nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len);
|
||||
return true;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(action);
|
||||
bpf_warn_invalid_xdp_action(nic->netdev, prog, action);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(nic->netdev, prog, action);
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <linux/iommu.h>
|
||||
#include <net/ip.h>
|
||||
#include <net/tso.h>
|
||||
#include <uapi/linux/bpf.h>
|
||||
|
||||
#include "nic_reg.h"
|
||||
#include "nic.h"
|
||||
|
@ -2623,7 +2623,7 @@ static u32 dpaa_run_xdp(struct dpaa_priv *priv, struct qm_fd *fd, void *vaddr,
|
||||
}
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(xdp_act);
|
||||
bpf_warn_invalid_xdp_action(priv->net_dev, xdp_prog, xdp_act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(priv->net_dev, xdp_prog, xdp_act);
|
||||
|
@ -374,7 +374,7 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv,
|
||||
dpaa2_eth_xdp_enqueue(priv, ch, fd, vaddr, rx_fq->flowid);
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(xdp_act);
|
||||
bpf_warn_invalid_xdp_action(priv->net_dev, xdp_prog, xdp_act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(priv->net_dev, xdp_prog, xdp_act);
|
||||
|
@ -1547,7 +1547,7 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
|
||||
|
||||
switch (xdp_act) {
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(xdp_act);
|
||||
bpf_warn_invalid_xdp_action(rx_ring->ndev, prog, xdp_act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(rx_ring->ndev, prog, xdp_act);
|
||||
|
@ -4,6 +4,7 @@
|
||||
* Copyright(c) 2017 Huawei Technologies Co., Ltd
|
||||
*/
|
||||
|
||||
#include <linux/if_vlan.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/u64_stats_sync.h>
|
||||
|
@ -2322,7 +2322,7 @@ static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
|
||||
result = I40E_XDP_REDIR;
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
out_failure:
|
||||
|
@ -176,7 +176,7 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
|
||||
goto out_failure;
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
out_failure:
|
||||
|
@ -22,7 +22,6 @@
|
||||
|
||||
struct i40e_vsi;
|
||||
struct xsk_buff_pool;
|
||||
struct zero_copy_allocator;
|
||||
|
||||
int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);
|
||||
int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair);
|
||||
|
@ -1,6 +1,8 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020, Intel Corporation. */
|
||||
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include "ice.h"
|
||||
#include "ice_lib.h"
|
||||
#include "ice_devlink.h"
|
||||
|
@ -576,7 +576,7 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
|
||||
goto out_failure;
|
||||
return ICE_XDP_REDIR;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
out_failure:
|
||||
|
@ -1,6 +1,8 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2019, Intel Corporation. */
|
||||
|
||||
#include <linux/filter.h>
|
||||
|
||||
#include "ice_txrx_lib.h"
|
||||
#include "ice_eswitch.h"
|
||||
#include "ice_lib.h"
|
||||
|
@ -481,7 +481,7 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
|
||||
goto out_failure;
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
out_failure:
|
||||
|
@ -8500,7 +8500,7 @@ static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter,
|
||||
result = IGB_XDP_REDIR;
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(adapter->netdev, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
out_failure:
|
||||
|
@ -2241,7 +2241,7 @@ static int __igc_xdp_run_prog(struct igc_adapter *adapter,
|
||||
return IGC_XDP_REDIRECT;
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(adapter->netdev, prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
out_failure:
|
||||
|
@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020, Intel Corporation. */
|
||||
|
||||
#include <linux/if_vlan.h>
|
||||
#include <net/xdp_sock_drv.h>
|
||||
|
||||
#include "igc.h"
|
||||
|
@ -2235,7 +2235,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
|
||||
result = IXGBE_XDP_REDIR;
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
out_failure:
|
||||
|
@ -35,8 +35,6 @@ int ixgbe_xsk_pool_setup(struct ixgbe_adapter *adapter,
|
||||
struct xsk_buff_pool *pool,
|
||||
u16 qid);
|
||||
|
||||
void ixgbe_zca_free(struct zero_copy_allocator *alloc, unsigned long handle);
|
||||
|
||||
bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count);
|
||||
int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
|
||||
struct ixgbe_ring *rx_ring,
|
||||
|
@ -131,7 +131,7 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
|
||||
goto out_failure;
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
out_failure:
|
||||
|
@ -1070,7 +1070,7 @@ static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
|
||||
goto out_failure;
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
out_failure:
|
||||
|
@ -2240,7 +2240,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
|
||||
mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(pp->dev, prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(pp->dev, prog, act);
|
||||
|
@ -3823,7 +3823,7 @@ mvpp2_run_xdp(struct mvpp2_port *port, struct bpf_prog *prog,
|
||||
}
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(port->dev, prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(port->dev, prog, act);
|
||||
|
@ -1198,7 +1198,7 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf,
|
||||
put_page(page);
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(pfvf->netdev, prog, act);
|
||||
break;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(pfvf->netdev, prog, act);
|
||||
|
@ -33,6 +33,7 @@
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/etherdevice.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/tcp.h>
|
||||
#include <linux/if_vlan.h>
|
||||
#include <linux/delay.h>
|
||||
|
@ -812,7 +812,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
|
||||
trace_xdp_exception(dev, xdp_prog, act);
|
||||
goto xdp_drop_no_cnt; /* Drop on xmit failure */
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(dev, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(dev, xdp_prog, act);
|
||||
|
@ -1,5 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
|
||||
/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
|
||||
#include <net/sch_generic.h>
|
||||
|
||||
#include "en.h"
|
||||
#include "params.h"
|
||||
|
@ -151,7 +151,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
|
||||
rq->stats->xdp_redirect++;
|
||||
return true;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(rq->netdev, prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
xdp_abort:
|
||||
|
@ -60,7 +60,7 @@ u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq,
|
||||
break;
|
||||
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(ndev, prog, act);
|
||||
}
|
||||
|
||||
out:
|
||||
|
@ -1,6 +1,8 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
|
||||
/* Copyright (c) 2021, Microsoft Corporation. */
|
||||
|
||||
#include <uapi/linux/bpf.h>
|
||||
|
||||
#include <linux/inetdevice.h>
|
||||
#include <linux/etherdevice.h>
|
||||
#include <linux/ethtool.h>
|
||||
|
@ -1944,7 +1944,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
|
||||
xdp_prog, act);
|
||||
continue;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(dp->netdev, xdp_prog, act);
|
||||
|
@ -1153,7 +1153,7 @@ static bool qede_rx_xdp(struct qede_dev *edev,
|
||||
qede_rx_bd_ring_consume(rxq);
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(edev->ndev, prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(edev->ndev, prog, act);
|
||||
|
@ -5,6 +5,7 @@
|
||||
* Copyright 2005-2013 Solarflare Communications Inc.
|
||||
*/
|
||||
|
||||
#include <linux/filter.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/netdevice.h>
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
#include "net_driver.h"
|
||||
#include <linux/module.h>
|
||||
#include <linux/filter.h>
|
||||
#include "efx_channels.h"
|
||||
#include "efx.h"
|
||||
#include "efx_common.h"
|
||||
|
@ -9,6 +9,7 @@
|
||||
*/
|
||||
|
||||
#include "net_driver.h"
|
||||
#include <linux/filter.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <net/gre.h>
|
||||
|
@ -338,7 +338,7 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel,
|
||||
break;
|
||||
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(xdp_act);
|
||||
bpf_warn_invalid_xdp_action(efx->net_dev, xdp_prog, xdp_act);
|
||||
efx_free_rx_buffers(rx_queue, rx_buf, 1);
|
||||
channel->n_rx_xdp_bad_drops++;
|
||||
trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
|
||||
|
@ -933,7 +933,7 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
|
||||
}
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(priv->ndev, prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(priv->ndev, prog, act);
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <linux/net_tstamp.h>
|
||||
#include <linux/reset.h>
|
||||
#include <net/page_pool.h>
|
||||
#include <uapi/linux/bpf.h>
|
||||
|
||||
struct stmmac_resources {
|
||||
void __iomem *addr;
|
||||
|
@ -4723,7 +4723,7 @@ static int __stmmac_xdp_run_prog(struct stmmac_priv *priv,
|
||||
res = STMMAC_XDP_REDIRECT;
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(priv->dev, prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(priv->dev, prog, act);
|
||||
|
@ -1362,7 +1362,7 @@ int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp,
|
||||
xdp_do_flush_map();
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(ndev, prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(ndev, prog, act);
|
||||
|
@ -6,6 +6,8 @@
|
||||
#ifndef DRIVERS_NET_ETHERNET_TI_CPSW_PRIV_H_
|
||||
#define DRIVERS_NET_ETHERNET_TI_CPSW_PRIV_H_
|
||||
|
||||
#include <uapi/linux/bpf.h>
|
||||
|
||||
#include "davinci_cpdma.h"
|
||||
|
||||
#define CPSW_DEBUG (NETIF_MSG_HW | NETIF_MSG_WOL | \
|
||||
|
@ -30,6 +30,7 @@
|
||||
/*****************************************************************************/
|
||||
|
||||
#include <linux/capability.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/net.h>
|
||||
|
@ -148,6 +148,7 @@
|
||||
|
||||
/* ----------------------------------------------------------------------- */
|
||||
|
||||
#include <linux/compat.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/signal.h>
|
||||
|
@ -68,7 +68,7 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
|
||||
break;
|
||||
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(ndev, prog, act);
|
||||
}
|
||||
|
||||
out:
|
||||
|
@ -44,6 +44,7 @@
|
||||
#include <linux/etherdevice.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/ethtool.h>
|
||||
#include <net/sch_generic.h>
|
||||
#include <net/sock.h>
|
||||
#include <net/checksum.h>
|
||||
#include <linux/if_ether.h> /* For the statistics structure. */
|
||||
|
@ -1602,7 +1602,7 @@ static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog,
|
||||
case XDP_PASS:
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(tun->dev, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(tun->dev, xdp_prog, act);
|
||||
|
@ -644,7 +644,7 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
|
||||
rcu_read_unlock();
|
||||
goto xdp_xmit;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(rq->dev, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(rq->dev, xdp_prog, act);
|
||||
@ -794,7 +794,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
|
||||
rcu_read_unlock();
|
||||
goto xdp_xmit;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(rq->dev, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(rq->dev, xdp_prog, act);
|
||||
|
@ -812,7 +812,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
|
||||
rcu_read_unlock();
|
||||
goto xdp_xmit;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(vi->dev, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(vi->dev, xdp_prog, act);
|
||||
@ -1022,7 +1022,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
|
||||
rcu_read_unlock();
|
||||
goto xdp_xmit;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(vi->dev, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(vi->dev, xdp_prog, act);
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include <net/addrconf.h>
|
||||
#include <net/l3mdev.h>
|
||||
#include <net/fib_rules.h>
|
||||
#include <net/sch_generic.h>
|
||||
#include <net/netns/generic.h>
|
||||
#include <net/netfilter/nf_conntrack.h>
|
||||
|
||||
|
@ -948,7 +948,7 @@ static u32 xennet_run_xdp(struct netfront_queue *queue, struct page *pdata,
|
||||
break;
|
||||
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(queue->info->netdev, prog, act);
|
||||
}
|
||||
|
||||
return act;
|
||||
|
@ -18,6 +18,7 @@
|
||||
* 6 Jun 1999 Cache readdir lookups in the page cache. -DaveM
|
||||
*/
|
||||
|
||||
#include <linux/compat.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/errno.h>
|
||||
|
@ -10,6 +10,7 @@
|
||||
* Split from fs/nfs/super.c by David Howells <dhowells@redhat.com>
|
||||
*/
|
||||
|
||||
#include <linux/compat.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/fs_context.h>
|
||||
|
@ -15,6 +15,7 @@
|
||||
* of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
|
||||
*/
|
||||
|
||||
#include <linux/compat.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/sched/rt.h>
|
||||
|
70
include/linux/bpf-cgroup-defs.h
Normal file
70
include/linux/bpf-cgroup-defs.h
Normal file
@ -0,0 +1,70 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BPF_CGROUP_DEFS_H
|
||||
#define _BPF_CGROUP_DEFS_H
|
||||
|
||||
#ifdef CONFIG_CGROUP_BPF
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/percpu-refcount.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
struct bpf_prog_array;
|
||||
|
||||
enum cgroup_bpf_attach_type {
|
||||
CGROUP_BPF_ATTACH_TYPE_INVALID = -1,
|
||||
CGROUP_INET_INGRESS = 0,
|
||||
CGROUP_INET_EGRESS,
|
||||
CGROUP_INET_SOCK_CREATE,
|
||||
CGROUP_SOCK_OPS,
|
||||
CGROUP_DEVICE,
|
||||
CGROUP_INET4_BIND,
|
||||
CGROUP_INET6_BIND,
|
||||
CGROUP_INET4_CONNECT,
|
||||
CGROUP_INET6_CONNECT,
|
||||
CGROUP_INET4_POST_BIND,
|
||||
CGROUP_INET6_POST_BIND,
|
||||
CGROUP_UDP4_SENDMSG,
|
||||
CGROUP_UDP6_SENDMSG,
|
||||
CGROUP_SYSCTL,
|
||||
CGROUP_UDP4_RECVMSG,
|
||||
CGROUP_UDP6_RECVMSG,
|
||||
CGROUP_GETSOCKOPT,
|
||||
CGROUP_SETSOCKOPT,
|
||||
CGROUP_INET4_GETPEERNAME,
|
||||
CGROUP_INET6_GETPEERNAME,
|
||||
CGROUP_INET4_GETSOCKNAME,
|
||||
CGROUP_INET6_GETSOCKNAME,
|
||||
CGROUP_INET_SOCK_RELEASE,
|
||||
MAX_CGROUP_BPF_ATTACH_TYPE
|
||||
};
|
||||
|
||||
struct cgroup_bpf {
|
||||
/* array of effective progs in this cgroup */
|
||||
struct bpf_prog_array __rcu *effective[MAX_CGROUP_BPF_ATTACH_TYPE];
|
||||
|
||||
/* attached progs to this cgroup and attach flags
|
||||
* when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
|
||||
* have either zero or one element
|
||||
* when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
|
||||
*/
|
||||
struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE];
|
||||
u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE];
|
||||
|
||||
/* list of cgroup shared storages */
|
||||
struct list_head storages;
|
||||
|
||||
/* temp storage for effective prog array used by prog_attach/detach */
|
||||
struct bpf_prog_array *inactive;
|
||||
|
||||
/* reference counter used to detach bpf programs after cgroup removal */
|
||||
struct percpu_ref refcnt;
|
||||
|
||||
/* cgroup_bpf is released using a work queue */
|
||||
struct work_struct release_work;
|
||||
};
|
||||
|
||||
#else /* CONFIG_CGROUP_BPF */
|
||||
struct cgroup_bpf {};
|
||||
#endif /* CONFIG_CGROUP_BPF */
|
||||
|
||||
#endif
|
@ -3,10 +3,10 @@
|
||||
#define _BPF_CGROUP_H
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/bpf-cgroup-defs.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/percpu-refcount.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <uapi/linux/bpf.h>
|
||||
|
||||
@ -23,33 +23,6 @@ struct ctl_table_header;
|
||||
struct task_struct;
|
||||
|
||||
#ifdef CONFIG_CGROUP_BPF
|
||||
enum cgroup_bpf_attach_type {
|
||||
CGROUP_BPF_ATTACH_TYPE_INVALID = -1,
|
||||
CGROUP_INET_INGRESS = 0,
|
||||
CGROUP_INET_EGRESS,
|
||||
CGROUP_INET_SOCK_CREATE,
|
||||
CGROUP_SOCK_OPS,
|
||||
CGROUP_DEVICE,
|
||||
CGROUP_INET4_BIND,
|
||||
CGROUP_INET6_BIND,
|
||||
CGROUP_INET4_CONNECT,
|
||||
CGROUP_INET6_CONNECT,
|
||||
CGROUP_INET4_POST_BIND,
|
||||
CGROUP_INET6_POST_BIND,
|
||||
CGROUP_UDP4_SENDMSG,
|
||||
CGROUP_UDP6_SENDMSG,
|
||||
CGROUP_SYSCTL,
|
||||
CGROUP_UDP4_RECVMSG,
|
||||
CGROUP_UDP6_RECVMSG,
|
||||
CGROUP_GETSOCKOPT,
|
||||
CGROUP_SETSOCKOPT,
|
||||
CGROUP_INET4_GETPEERNAME,
|
||||
CGROUP_INET6_GETPEERNAME,
|
||||
CGROUP_INET4_GETSOCKNAME,
|
||||
CGROUP_INET6_GETSOCKNAME,
|
||||
CGROUP_INET_SOCK_RELEASE,
|
||||
MAX_CGROUP_BPF_ATTACH_TYPE
|
||||
};
|
||||
|
||||
#define CGROUP_ATYPE(type) \
|
||||
case BPF_##type: return type
|
||||
@ -127,33 +100,6 @@ struct bpf_prog_list {
|
||||
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
|
||||
};
|
||||
|
||||
struct bpf_prog_array;
|
||||
|
||||
struct cgroup_bpf {
|
||||
/* array of effective progs in this cgroup */
|
||||
struct bpf_prog_array __rcu *effective[MAX_CGROUP_BPF_ATTACH_TYPE];
|
||||
|
||||
/* attached progs to this cgroup and attach flags
|
||||
* when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
|
||||
* have either zero or one element
|
||||
* when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
|
||||
*/
|
||||
struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE];
|
||||
u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE];
|
||||
|
||||
/* list of cgroup shared storages */
|
||||
struct list_head storages;
|
||||
|
||||
/* temp storage for effective prog array used by prog_attach/detach */
|
||||
struct bpf_prog_array *inactive;
|
||||
|
||||
/* reference counter used to detach bpf programs after cgroup removal */
|
||||
struct percpu_ref refcnt;
|
||||
|
||||
/* cgroup_bpf is released using a work queue */
|
||||
struct work_struct release_work;
|
||||
};
|
||||
|
||||
int cgroup_bpf_inherit(struct cgroup *cgrp);
|
||||
void cgroup_bpf_offline(struct cgroup *cgrp);
|
||||
|
||||
@ -451,7 +397,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr);
|
||||
#else
|
||||
|
||||
struct cgroup_bpf {};
|
||||
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
|
||||
static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
|
||||
|
||||
|
@ -3,15 +3,9 @@
|
||||
#define _BPF_NETNS_H
|
||||
|
||||
#include <linux/mutex.h>
|
||||
#include <net/netns/bpf.h>
|
||||
#include <uapi/linux/bpf.h>
|
||||
|
||||
enum netns_bpf_attach_type {
|
||||
NETNS_BPF_INVALID = -1,
|
||||
NETNS_BPF_FLOW_DISSECTOR = 0,
|
||||
NETNS_BPF_SK_LOOKUP,
|
||||
MAX_NETNS_BPF_ATTACH_TYPE
|
||||
};
|
||||
|
||||
static inline enum netns_bpf_attach_type
|
||||
to_netns_bpf_attach_type(enum bpf_attach_type attach_type)
|
||||
{
|
||||
|
@ -297,6 +297,34 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
|
||||
|
||||
extern const struct bpf_map_ops bpf_map_offload_ops;
|
||||
|
||||
/* bpf_type_flag contains a set of flags that are applicable to the values of
|
||||
* arg_type, ret_type and reg_type. For example, a pointer value may be null,
|
||||
* or a memory is read-only. We classify types into two categories: base types
|
||||
* and extended types. Extended types are base types combined with a type flag.
|
||||
*
|
||||
* Currently there are no more than 32 base types in arg_type, ret_type and
|
||||
* reg_types.
|
||||
*/
|
||||
#define BPF_BASE_TYPE_BITS 8
|
||||
|
||||
enum bpf_type_flag {
|
||||
/* PTR may be NULL. */
|
||||
PTR_MAYBE_NULL = BIT(0 + BPF_BASE_TYPE_BITS),
|
||||
|
||||
/* MEM is read-only. When applied on bpf_arg, it indicates the arg is
|
||||
* compatible with both mutable and immutable memory.
|
||||
*/
|
||||
MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS),
|
||||
|
||||
__BPF_TYPE_LAST_FLAG = MEM_RDONLY,
|
||||
};
|
||||
|
||||
/* Max number of base types. */
|
||||
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
|
||||
|
||||
/* Max number of all types. */
|
||||
#define BPF_TYPE_LIMIT (__BPF_TYPE_LAST_FLAG | (__BPF_TYPE_LAST_FLAG - 1))
|
||||
|
||||
/* function argument constraints */
|
||||
enum bpf_arg_type {
|
||||
ARG_DONTCARE = 0, /* unused argument in helper function */
|
||||
@ -308,13 +336,11 @@ enum bpf_arg_type {
|
||||
ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
|
||||
ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */
|
||||
ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */
|
||||
ARG_PTR_TO_MAP_VALUE_OR_NULL, /* pointer to stack used as map value or NULL */
|
||||
|
||||
/* the following constraints used to prototype bpf_memcmp() and other
|
||||
* functions that access data on eBPF program stack
|
||||
*/
|
||||
ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */
|
||||
ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */
|
||||
ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized,
|
||||
* helper function must fill all bytes or clear
|
||||
* them in error case.
|
||||
@ -324,42 +350,65 @@ enum bpf_arg_type {
|
||||
ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */
|
||||
|
||||
ARG_PTR_TO_CTX, /* pointer to context */
|
||||
ARG_PTR_TO_CTX_OR_NULL, /* pointer to context or NULL */
|
||||
ARG_ANYTHING, /* any (initialized) argument is ok */
|
||||
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
|
||||
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
|
||||
ARG_PTR_TO_INT, /* pointer to int */
|
||||
ARG_PTR_TO_LONG, /* pointer to long */
|
||||
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */
|
||||
ARG_PTR_TO_SOCKET_OR_NULL, /* pointer to bpf_sock (fullsock) or NULL */
|
||||
ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */
|
||||
ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */
|
||||
ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
|
||||
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
|
||||
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
|
||||
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
|
||||
ARG_PTR_TO_FUNC, /* pointer to a bpf program function */
|
||||
ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */
|
||||
ARG_PTR_TO_STACK, /* pointer to stack */
|
||||
ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
|
||||
ARG_PTR_TO_TIMER, /* pointer to bpf_timer */
|
||||
__BPF_ARG_TYPE_MAX,
|
||||
|
||||
/* Extended arg_types. */
|
||||
ARG_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MAP_VALUE,
|
||||
ARG_PTR_TO_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MEM,
|
||||
ARG_PTR_TO_CTX_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_CTX,
|
||||
ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET,
|
||||
ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM,
|
||||
ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK,
|
||||
|
||||
/* This must be the last entry. Its purpose is to ensure the enum is
|
||||
* wide enough to hold the higher bits reserved for bpf_type_flag.
|
||||
*/
|
||||
__BPF_ARG_TYPE_LIMIT = BPF_TYPE_LIMIT,
|
||||
};
|
||||
static_assert(__BPF_ARG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);
|
||||
|
||||
/* type of values returned from helper functions */
|
||||
enum bpf_return_type {
|
||||
RET_INTEGER, /* function returns integer */
|
||||
RET_VOID, /* function doesn't return anything */
|
||||
RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */
|
||||
RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
|
||||
RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */
|
||||
RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */
|
||||
RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
|
||||
RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */
|
||||
RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */
|
||||
RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
|
||||
RET_PTR_TO_SOCKET, /* returns a pointer to a socket */
|
||||
RET_PTR_TO_TCP_SOCK, /* returns a pointer to a tcp_sock */
|
||||
RET_PTR_TO_SOCK_COMMON, /* returns a pointer to a sock_common */
|
||||
RET_PTR_TO_ALLOC_MEM, /* returns a pointer to dynamically allocated memory */
|
||||
RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */
|
||||
RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */
|
||||
__BPF_RET_TYPE_MAX,
|
||||
|
||||
/* Extended ret_types. */
|
||||
RET_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_MAP_VALUE,
|
||||
RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET,
|
||||
RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK,
|
||||
RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
|
||||
RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM,
|
||||
RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
|
||||
|
||||
/* This must be the last entry. Its purpose is to ensure the enum is
|
||||
* wide enough to hold the higher bits reserved for bpf_type_flag.
|
||||
*/
|
||||
__BPF_RET_TYPE_LIMIT = BPF_TYPE_LIMIT,
|
||||
};
|
||||
static_assert(__BPF_RET_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);
|
||||
|
||||
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
|
||||
* to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
|
||||
@ -421,18 +470,15 @@ enum bpf_reg_type {
|
||||
PTR_TO_CTX, /* reg points to bpf_context */
|
||||
CONST_PTR_TO_MAP, /* reg points to struct bpf_map */
|
||||
PTR_TO_MAP_VALUE, /* reg points to map element value */
|
||||
PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
|
||||
PTR_TO_MAP_KEY, /* reg points to a map element key */
|
||||
PTR_TO_STACK, /* reg == frame_pointer + offset */
|
||||
PTR_TO_PACKET_META, /* skb->data - meta_len */
|
||||
PTR_TO_PACKET, /* reg points to skb->data */
|
||||
PTR_TO_PACKET_END, /* skb->data + headlen */
|
||||
PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */
|
||||
PTR_TO_SOCKET, /* reg points to struct bpf_sock */
|
||||
PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */
|
||||
PTR_TO_SOCK_COMMON, /* reg points to sock_common */
|
||||
PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
|
||||
PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
|
||||
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
|
||||
PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
|
||||
PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */
|
||||
/* PTR_TO_BTF_ID points to a kernel struct that does not need
|
||||
@ -450,18 +496,25 @@ enum bpf_reg_type {
|
||||
* been checked for null. Used primarily to inform the verifier
|
||||
* an explicit null check is required for this struct.
|
||||
*/
|
||||
PTR_TO_BTF_ID_OR_NULL,
|
||||
PTR_TO_MEM, /* reg points to valid memory region */
|
||||
PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */
|
||||
PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */
|
||||
PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
|
||||
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
|
||||
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
|
||||
PTR_TO_BUF, /* reg points to a read/write buffer */
|
||||
PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
|
||||
PTR_TO_FUNC, /* reg points to a bpf program function */
|
||||
PTR_TO_MAP_KEY, /* reg points to a map element key */
|
||||
__BPF_REG_TYPE_MAX,
|
||||
|
||||
/* Extended reg_types. */
|
||||
PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | PTR_TO_MAP_VALUE,
|
||||
PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCKET,
|
||||
PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON,
|
||||
PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | PTR_TO_TCP_SOCK,
|
||||
PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | PTR_TO_BTF_ID,
|
||||
|
||||
/* This must be the last entry. Its purpose is to ensure the enum is
|
||||
* wide enough to hold the higher bits reserved for bpf_type_flag.
|
||||
*/
|
||||
__BPF_REG_TYPE_LIMIT = BPF_TYPE_LIMIT,
|
||||
};
|
||||
static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);
|
||||
|
||||
/* The information passed from prog-specific *_is_valid_access
|
||||
* back to the verifier.
|
||||
@ -777,6 +830,7 @@ void bpf_ksym_add(struct bpf_ksym *ksym);
|
||||
void bpf_ksym_del(struct bpf_ksym *ksym);
|
||||
int bpf_jit_charge_modmem(u32 pages);
|
||||
void bpf_jit_uncharge_modmem(u32 pages);
|
||||
bool bpf_prog_has_trampoline(const struct bpf_prog *prog);
|
||||
#else
|
||||
static inline int bpf_trampoline_link_prog(struct bpf_prog *prog,
|
||||
struct bpf_trampoline *tr)
|
||||
@ -805,6 +859,10 @@ static inline bool is_bpf_image_address(unsigned long address)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct bpf_func_info_aux {
|
||||
@ -2163,6 +2221,7 @@ extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
|
||||
extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
|
||||
extern const struct bpf_func_proto bpf_find_vma_proto;
|
||||
extern const struct bpf_func_proto bpf_loop_proto;
|
||||
extern const struct bpf_func_proto bpf_strncmp_proto;
|
||||
|
||||
const struct bpf_func_proto *tracing_prog_func_proto(
|
||||
enum bpf_func_id func_id, const struct bpf_prog *prog);
|
||||
|
@ -8,6 +8,7 @@
|
||||
#define _BPF_LOCAL_STORAGE_H
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/rculist.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/hash.h>
|
||||
@ -16,6 +17,9 @@
|
||||
|
||||
#define BPF_LOCAL_STORAGE_CACHE_SIZE 16
|
||||
|
||||
#define bpf_rcu_lock_held() \
|
||||
(rcu_read_lock_held() || rcu_read_lock_trace_held() || \
|
||||
rcu_read_lock_bh_held())
|
||||
struct bpf_local_storage_map_bucket {
|
||||
struct hlist_head list;
|
||||
raw_spinlock_t lock;
|
||||
@ -161,4 +165,6 @@ struct bpf_local_storage_data *
|
||||
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
void *value, u64 map_flags);
|
||||
|
||||
void bpf_local_storage_free_rcu(struct rcu_head *rcu);
|
||||
|
||||
#endif /* _BPF_LOCAL_STORAGE_H */
|
||||
|
@ -18,6 +18,8 @@
|
||||
* that converting umax_value to int cannot overflow.
|
||||
*/
|
||||
#define BPF_MAX_VAR_SIZ (1 << 29)
|
||||
/* size of type_str_buf in bpf_verifier. */
|
||||
#define TYPE_STR_BUF_LEN 64
|
||||
|
||||
/* Liveness marks, used for registers and spilled-regs (in stack slots).
|
||||
* Read marks propagate upwards until they find a write mark; they record that
|
||||
@ -388,6 +390,8 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
|
||||
#define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2)
|
||||
#define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS)
|
||||
#define BPF_LOG_KERNEL (BPF_LOG_MASK + 1) /* kernel internal flag */
|
||||
#define BPF_LOG_MIN_ALIGNMENT 8U
|
||||
#define BPF_LOG_ALIGNMENT 40U
|
||||
|
||||
static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
|
||||
{
|
||||
@ -474,6 +478,16 @@ struct bpf_verifier_env {
|
||||
/* longest register parentage chain walked for liveness marking */
|
||||
u32 longest_mark_read_walk;
|
||||
bpfptr_t fd_array;
|
||||
|
||||
/* bit mask to keep track of whether a register has been accessed
|
||||
* since the last time the function state was printed
|
||||
*/
|
||||
u32 scratched_regs;
|
||||
/* Same as scratched_regs but for stack slots */
|
||||
u64 scratched_stack_slots;
|
||||
u32 prev_log_len, prev_insn_print_len;
|
||||
/* buffer used in reg_type_str() to generate reg_type string */
|
||||
char type_str_buf[TYPE_STR_BUF_LEN];
|
||||
};
|
||||
|
||||
__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
|
||||
@ -536,5 +550,18 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
|
||||
struct bpf_attach_target_info *tgt_info);
|
||||
void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab);
|
||||
|
||||
#define BPF_BASE_TYPE_MASK GENMASK(BPF_BASE_TYPE_BITS - 1, 0)
|
||||
|
||||
/* extract base type from bpf_{arg, return, reg}_type. */
|
||||
static inline u32 base_type(u32 type)
|
||||
{
|
||||
return type & BPF_BASE_TYPE_MASK;
|
||||
}
|
||||
|
||||
/* extract flags from an extended type. See bpf_type_flag in bpf.h. */
|
||||
static inline u32 type_flag(u32 type)
|
||||
{
|
||||
return type & ~BPF_BASE_TYPE_MASK;
|
||||
}
|
||||
|
||||
#endif /* _LINUX_BPF_VERIFIER_H */
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include <linux/percpu-rwsem.h>
|
||||
#include <linux/u64_stats_sync.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/bpf-cgroup.h>
|
||||
#include <linux/bpf-cgroup-defs.h>
|
||||
#include <linux/psi_types.h>
|
||||
|
||||
#ifdef CONFIG_CGROUPS
|
||||
|
@ -2,6 +2,7 @@
|
||||
#ifndef DSA_LOOP_H
|
||||
#define DSA_LOOP_H
|
||||
|
||||
#include <linux/if_vlan.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/ethtool.h>
|
||||
#include <net/dsa.h>
|
||||
|
@ -1027,7 +1027,7 @@ void xdp_do_flush(void);
|
||||
*/
|
||||
#define xdp_do_flush_map xdp_do_flush
|
||||
|
||||
void bpf_warn_invalid_xdp_action(u32 act);
|
||||
void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act);
|
||||
|
||||
#ifdef CONFIG_INET
|
||||
struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
|
||||
|
@ -611,6 +611,7 @@ struct swevent_hlist {
|
||||
#define PERF_ATTACH_SCHED_CB 0x20
|
||||
#define PERF_ATTACH_CHILD 0x40
|
||||
|
||||
struct bpf_prog;
|
||||
struct perf_cgroup;
|
||||
struct perf_buffer;
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <net/inetpeer.h>
|
||||
#include <net/fib_notifier.h>
|
||||
#include <linux/indirect_call_wrapper.h>
|
||||
#include <uapi/linux/bpf.h>
|
||||
|
||||
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
|
||||
#define FIB6_TABLE_HASHSZ 256
|
||||
|
@ -21,6 +21,8 @@
|
||||
#include <net/snmp.h>
|
||||
#include <net/netns/hash.h>
|
||||
|
||||
struct ip_tunnel_info;
|
||||
|
||||
#define SIN6_LEN_RFC2133 24
|
||||
|
||||
#define IPV6_MAXPLEN 65535
|
||||
|
@ -6,11 +6,18 @@
|
||||
#ifndef __NETNS_BPF_H__
|
||||
#define __NETNS_BPF_H__
|
||||
|
||||
#include <linux/bpf-netns.h>
|
||||
#include <linux/list.h>
|
||||
|
||||
struct bpf_prog;
|
||||
struct bpf_prog_array;
|
||||
|
||||
enum netns_bpf_attach_type {
|
||||
NETNS_BPF_INVALID = -1,
|
||||
NETNS_BPF_FLOW_DISSECTOR = 0,
|
||||
NETNS_BPF_SK_LOOKUP,
|
||||
MAX_NETNS_BPF_ATTACH_TYPE
|
||||
};
|
||||
|
||||
struct netns_bpf {
|
||||
/* Array of programs to run compiled from progs or links */
|
||||
struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE];
|
||||
|
@ -43,6 +43,7 @@
|
||||
#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
|
||||
#define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE))
|
||||
|
||||
struct ip_tunnel_info;
|
||||
struct fib_nh;
|
||||
struct fib_info;
|
||||
struct uncached_list;
|
||||
|
@ -56,7 +56,6 @@
|
||||
#include <linux/wait.h>
|
||||
#include <linux/cgroup-defs.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/rculist_nulls.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/sockptr.h>
|
||||
@ -249,6 +248,7 @@ struct sock_common {
|
||||
};
|
||||
|
||||
struct bpf_local_storage;
|
||||
struct sk_filter;
|
||||
|
||||
/**
|
||||
* struct sock - network layer representation of sockets
|
||||
|
@ -10,7 +10,6 @@ struct xdp_mem_allocator {
|
||||
union {
|
||||
void *allocator;
|
||||
struct page_pool *page_pool;
|
||||
struct zero_copy_allocator *zc_alloc;
|
||||
};
|
||||
struct rhash_head node;
|
||||
struct rcu_head rcu;
|
||||
|
@ -6,6 +6,7 @@
|
||||
#ifndef _LINUX_XDP_SOCK_H
|
||||
#define _LINUX_XDP_SOCK_H
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/if_xdp.h>
|
||||
#include <linux/mutex.h>
|
||||
|
@ -4983,6 +4983,41 @@ union bpf_attr {
|
||||
* Return
|
||||
* The number of loops performed, **-EINVAL** for invalid **flags**,
|
||||
* **-E2BIG** if **nr_loops** exceeds the maximum number of loops.
|
||||
*
|
||||
* long bpf_strncmp(const char *s1, u32 s1_sz, const char *s2)
|
||||
* Description
|
||||
* Do strncmp() between **s1** and **s2**. **s1** doesn't need
|
||||
* to be null-terminated and **s1_sz** is the maximum storage
|
||||
* size of **s1**. **s2** must be a read-only string.
|
||||
* Return
|
||||
* An integer less than, equal to, or greater than zero
|
||||
* if the first **s1_sz** bytes of **s1** is found to be
|
||||
* less than, to match, or be greater than **s2**.
|
||||
*
|
||||
* long bpf_get_func_arg(void *ctx, u32 n, u64 *value)
|
||||
* Description
|
||||
* Get **n**-th argument (zero based) of the traced function (for tracing programs)
|
||||
* returned in **value**.
|
||||
*
|
||||
* Return
|
||||
* 0 on success.
|
||||
* **-EINVAL** if n >= arguments count of traced function.
|
||||
*
|
||||
* long bpf_get_func_ret(void *ctx, u64 *value)
|
||||
* Description
|
||||
* Get return value of the traced function (for tracing programs)
|
||||
* in **value**.
|
||||
*
|
||||
* Return
|
||||
* 0 on success.
|
||||
* **-EOPNOTSUPP** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN.
|
||||
*
|
||||
* long bpf_get_func_arg_cnt(void *ctx)
|
||||
* Description
|
||||
* Get number of arguments of the traced function (for tracing programs).
|
||||
*
|
||||
* Return
|
||||
* The number of arguments of the traced function.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@ -5167,6 +5202,10 @@ union bpf_attr {
|
||||
FN(kallsyms_lookup_name), \
|
||||
FN(find_vma), \
|
||||
FN(loop), \
|
||||
FN(strncmp), \
|
||||
FN(get_func_arg), \
|
||||
FN(get_func_ret), \
|
||||
FN(get_func_arg_cnt), \
|
||||
/* */
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
|
@ -82,6 +82,11 @@ static int bloom_map_delete_elem(struct bpf_map *map, void *value)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int bloom_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static struct bpf_map *bloom_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits;
|
||||
@ -192,6 +197,7 @@ const struct bpf_map_ops bloom_filter_map_ops = {
|
||||
.map_meta_equal = bpf_map_meta_equal,
|
||||
.map_alloc = bloom_map_alloc,
|
||||
.map_free = bloom_map_free,
|
||||
.map_get_next_key = bloom_map_get_next_key,
|
||||
.map_push_elem = bloom_map_push_elem,
|
||||
.map_peek_elem = bloom_map_peek_elem,
|
||||
.map_pop_elem = bloom_map_pop_elem,
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <linux/bpf_lsm.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <linux/fdtable.h>
|
||||
#include <linux/rcupdate_trace.h>
|
||||
|
||||
DEFINE_BPF_STORAGE_CACHE(inode_cache);
|
||||
|
||||
@ -44,7 +45,8 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode,
|
||||
if (!bsb)
|
||||
return NULL;
|
||||
|
||||
inode_storage = rcu_dereference(bsb->storage);
|
||||
inode_storage =
|
||||
rcu_dereference_check(bsb->storage, bpf_rcu_lock_held());
|
||||
if (!inode_storage)
|
||||
return NULL;
|
||||
|
||||
@ -172,6 +174,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
|
||||
return (unsigned long)NULL;
|
||||
|
||||
@ -204,6 +207,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
|
||||
BPF_CALL_2(bpf_inode_storage_delete,
|
||||
struct bpf_map *, map, struct inode *, inode)
|
||||
{
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (!inode)
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -11,6 +11,9 @@
|
||||
#include <net/sock.h>
|
||||
#include <uapi/linux/sock_diag.h>
|
||||
#include <uapi/linux/btf.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/rcupdate_trace.h>
|
||||
#include <linux/rcupdate_wait.h>
|
||||
|
||||
#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE)
|
||||
|
||||
@ -81,6 +84,22 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void bpf_local_storage_free_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct bpf_local_storage *local_storage;
|
||||
|
||||
local_storage = container_of(rcu, struct bpf_local_storage, rcu);
|
||||
kfree_rcu(local_storage, rcu);
|
||||
}
|
||||
|
||||
static void bpf_selem_free_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct bpf_local_storage_elem *selem;
|
||||
|
||||
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
|
||||
kfree_rcu(selem, rcu);
|
||||
}
|
||||
|
||||
/* local_storage->lock must be held and selem->local_storage == local_storage.
|
||||
* The caller must ensure selem->smap is still valid to be
|
||||
* dereferenced for its smap->elem_size and smap->cache_idx.
|
||||
@ -93,7 +112,7 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
|
||||
bool free_local_storage;
|
||||
void *owner;
|
||||
|
||||
smap = rcu_dereference(SDATA(selem)->smap);
|
||||
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
|
||||
owner = local_storage->owner;
|
||||
|
||||
/* All uncharging on the owner must be done first.
|
||||
@ -118,12 +137,12 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
|
||||
*
|
||||
* Although the unlock will be done under
|
||||
* rcu_read_lock(), it is more intutivie to
|
||||
* read if kfree_rcu(local_storage, rcu) is done
|
||||
* read if the freeing of the storage is done
|
||||
* after the raw_spin_unlock_bh(&local_storage->lock).
|
||||
*
|
||||
* Hence, a "bool free_local_storage" is returned
|
||||
* to the caller which then calls the kfree_rcu()
|
||||
* after unlock.
|
||||
* to the caller which then calls then frees the storage after
|
||||
* all the RCU grace periods have expired.
|
||||
*/
|
||||
}
|
||||
hlist_del_init_rcu(&selem->snode);
|
||||
@ -131,8 +150,7 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
|
||||
SDATA(selem))
|
||||
RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
|
||||
|
||||
kfree_rcu(selem, rcu);
|
||||
|
||||
call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu);
|
||||
return free_local_storage;
|
||||
}
|
||||
|
||||
@ -146,7 +164,8 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
|
||||
/* selem has already been unlinked from sk */
|
||||
return;
|
||||
|
||||
local_storage = rcu_dereference(selem->local_storage);
|
||||
local_storage = rcu_dereference_check(selem->local_storage,
|
||||
bpf_rcu_lock_held());
|
||||
raw_spin_lock_irqsave(&local_storage->lock, flags);
|
||||
if (likely(selem_linked_to_storage(selem)))
|
||||
free_local_storage = bpf_selem_unlink_storage_nolock(
|
||||
@ -154,7 +173,8 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
|
||||
if (free_local_storage)
|
||||
kfree_rcu(local_storage, rcu);
|
||||
call_rcu_tasks_trace(&local_storage->rcu,
|
||||
bpf_local_storage_free_rcu);
|
||||
}
|
||||
|
||||
void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
|
||||
@ -174,7 +194,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
|
||||
/* selem has already be unlinked from smap */
|
||||
return;
|
||||
|
||||
smap = rcu_dereference(SDATA(selem)->smap);
|
||||
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
|
||||
b = select_bucket(smap, selem);
|
||||
raw_spin_lock_irqsave(&b->lock, flags);
|
||||
if (likely(selem_linked_to_map(selem)))
|
||||
@ -213,12 +233,14 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
|
||||
struct bpf_local_storage_elem *selem;
|
||||
|
||||
/* Fast path (cache hit) */
|
||||
sdata = rcu_dereference(local_storage->cache[smap->cache_idx]);
|
||||
sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx],
|
||||
bpf_rcu_lock_held());
|
||||
if (sdata && rcu_access_pointer(sdata->smap) == smap)
|
||||
return sdata;
|
||||
|
||||
/* Slow path (cache miss) */
|
||||
hlist_for_each_entry_rcu(selem, &local_storage->list, snode)
|
||||
hlist_for_each_entry_rcu(selem, &local_storage->list, snode,
|
||||
rcu_read_lock_trace_held())
|
||||
if (rcu_access_pointer(SDATA(selem)->smap) == smap)
|
||||
break;
|
||||
|
||||
@ -306,7 +328,8 @@ int bpf_local_storage_alloc(void *owner,
|
||||
* bucket->list, first_selem can be freed immediately
|
||||
* (instead of kfree_rcu) because
|
||||
* bpf_local_storage_map_free() does a
|
||||
* synchronize_rcu() before walking the bucket->list.
|
||||
* synchronize_rcu_mult (waiting for both sleepable and
|
||||
* normal programs) before walking the bucket->list.
|
||||
* Hence, no one is accessing selem from the
|
||||
* bucket->list under rcu_read_lock().
|
||||
*/
|
||||
@ -342,7 +365,8 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
!map_value_has_spin_lock(&smap->map)))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
local_storage = rcu_dereference(*owner_storage(smap, owner));
|
||||
local_storage = rcu_dereference_check(*owner_storage(smap, owner),
|
||||
bpf_rcu_lock_held());
|
||||
if (!local_storage || hlist_empty(&local_storage->list)) {
|
||||
/* Very first elem for the owner */
|
||||
err = check_flags(NULL, map_flags);
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <uapi/linux/btf.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <linux/fdtable.h>
|
||||
#include <linux/rcupdate_trace.h>
|
||||
|
||||
DEFINE_BPF_STORAGE_CACHE(task_cache);
|
||||
|
||||
@ -59,7 +60,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map,
|
||||
struct bpf_local_storage *task_storage;
|
||||
struct bpf_local_storage_map *smap;
|
||||
|
||||
task_storage = rcu_dereference(task->bpf_storage);
|
||||
task_storage =
|
||||
rcu_dereference_check(task->bpf_storage, bpf_rcu_lock_held());
|
||||
if (!task_storage)
|
||||
return NULL;
|
||||
|
||||
@ -229,6 +231,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
|
||||
return (unsigned long)NULL;
|
||||
|
||||
@ -260,6 +263,7 @@ BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
|
||||
{
|
||||
int ret;
|
||||
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (!task)
|
||||
return -EINVAL;
|
||||
|
||||
|
124
kernel/bpf/btf.c
124
kernel/bpf/btf.c
@ -4826,7 +4826,7 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
|
||||
return prog->aux->attach_btf;
|
||||
}
|
||||
|
||||
static bool is_string_ptr(struct btf *btf, const struct btf_type *t)
|
||||
static bool is_int_ptr(struct btf *btf, const struct btf_type *t)
|
||||
{
|
||||
/* t comes in already as a pointer */
|
||||
t = btf_type_by_id(btf, t->type);
|
||||
@ -4835,8 +4835,7 @@ static bool is_string_ptr(struct btf *btf, const struct btf_type *t)
|
||||
if (BTF_INFO_KIND(t->info) == BTF_KIND_CONST)
|
||||
t = btf_type_by_id(btf, t->type);
|
||||
|
||||
/* char, signed char, unsigned char */
|
||||
return btf_type_is_int(t) && t->size == 1;
|
||||
return btf_type_is_int(t);
|
||||
}
|
||||
|
||||
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
|
||||
@ -4941,10 +4940,12 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
|
||||
/* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */
|
||||
for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
|
||||
const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
|
||||
u32 type, flag;
|
||||
|
||||
if (ctx_arg_info->offset == off &&
|
||||
(ctx_arg_info->reg_type == PTR_TO_RDONLY_BUF_OR_NULL ||
|
||||
ctx_arg_info->reg_type == PTR_TO_RDWR_BUF_OR_NULL)) {
|
||||
type = base_type(ctx_arg_info->reg_type);
|
||||
flag = type_flag(ctx_arg_info->reg_type);
|
||||
if (ctx_arg_info->offset == off && type == PTR_TO_BUF &&
|
||||
(flag & PTR_MAYBE_NULL)) {
|
||||
info->reg_type = ctx_arg_info->reg_type;
|
||||
return true;
|
||||
}
|
||||
@ -4957,7 +4958,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
|
||||
*/
|
||||
return true;
|
||||
|
||||
if (is_string_ptr(btf, t))
|
||||
if (is_int_ptr(btf, t))
|
||||
return true;
|
||||
|
||||
/* this is a pointer to another type */
|
||||
@ -5575,12 +5576,53 @@ static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
|
||||
static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *t, int rec)
|
||||
{
|
||||
const struct btf_type *member_type;
|
||||
const struct btf_member *member;
|
||||
u32 i;
|
||||
|
||||
if (!btf_type_is_struct(t))
|
||||
return false;
|
||||
|
||||
for_each_member(i, t, member) {
|
||||
const struct btf_array *array;
|
||||
|
||||
member_type = btf_type_skip_modifiers(btf, member->type, NULL);
|
||||
if (btf_type_is_struct(member_type)) {
|
||||
if (rec >= 3) {
|
||||
bpf_log(log, "max struct nesting depth exceeded\n");
|
||||
return false;
|
||||
}
|
||||
if (!__btf_type_is_scalar_struct(log, btf, member_type, rec + 1))
|
||||
return false;
|
||||
continue;
|
||||
}
|
||||
if (btf_type_is_array(member_type)) {
|
||||
array = btf_type_array(member_type);
|
||||
if (!array->nelems)
|
||||
return false;
|
||||
member_type = btf_type_skip_modifiers(btf, array->type, NULL);
|
||||
if (!btf_type_is_scalar(member_type))
|
||||
return false;
|
||||
continue;
|
||||
}
|
||||
if (!btf_type_is_scalar(member_type))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static int btf_check_func_arg_match(struct bpf_verifier_env *env,
|
||||
const struct btf *btf, u32 func_id,
|
||||
struct bpf_reg_state *regs,
|
||||
bool ptr_to_mem_ok)
|
||||
{
|
||||
struct bpf_verifier_log *log = &env->log;
|
||||
bool is_kfunc = btf_is_kernel(btf);
|
||||
const char *func_name, *ref_tname;
|
||||
const struct btf_type *t, *ref_t;
|
||||
const struct btf_param *args;
|
||||
@ -5633,7 +5675,20 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
|
||||
|
||||
ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
|
||||
ref_tname = btf_name_by_offset(btf, ref_t->name_off);
|
||||
if (btf_is_kernel(btf)) {
|
||||
if (btf_get_prog_ctx_type(log, btf, t,
|
||||
env->prog->type, i)) {
|
||||
/* If function expects ctx type in BTF check that caller
|
||||
* is passing PTR_TO_CTX.
|
||||
*/
|
||||
if (reg->type != PTR_TO_CTX) {
|
||||
bpf_log(log,
|
||||
"arg#%d expected pointer to ctx, but got %s\n",
|
||||
i, btf_type_str(t));
|
||||
return -EINVAL;
|
||||
}
|
||||
if (check_ctx_reg(env, reg, regno))
|
||||
return -EINVAL;
|
||||
} else if (is_kfunc && (reg->type == PTR_TO_BTF_ID || reg2btf_ids[reg->type])) {
|
||||
const struct btf_type *reg_ref_t;
|
||||
const struct btf *reg_btf;
|
||||
const char *reg_ref_tname;
|
||||
@ -5649,14 +5704,9 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
|
||||
if (reg->type == PTR_TO_BTF_ID) {
|
||||
reg_btf = reg->btf;
|
||||
reg_ref_id = reg->btf_id;
|
||||
} else if (reg2btf_ids[reg->type]) {
|
||||
} else {
|
||||
reg_btf = btf_vmlinux;
|
||||
reg_ref_id = *reg2btf_ids[reg->type];
|
||||
} else {
|
||||
bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d is not a pointer to btf_id\n",
|
||||
func_name, i,
|
||||
btf_type_str(ref_t), ref_tname, regno);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id,
|
||||
@ -5672,23 +5722,24 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
|
||||
reg_ref_tname);
|
||||
return -EINVAL;
|
||||
}
|
||||
} else if (btf_get_prog_ctx_type(log, btf, t,
|
||||
env->prog->type, i)) {
|
||||
/* If function expects ctx type in BTF check that caller
|
||||
* is passing PTR_TO_CTX.
|
||||
*/
|
||||
if (reg->type != PTR_TO_CTX) {
|
||||
bpf_log(log,
|
||||
"arg#%d expected pointer to ctx, but got %s\n",
|
||||
i, btf_type_str(t));
|
||||
return -EINVAL;
|
||||
}
|
||||
if (check_ctx_reg(env, reg, regno))
|
||||
return -EINVAL;
|
||||
} else if (ptr_to_mem_ok) {
|
||||
const struct btf_type *resolve_ret;
|
||||
u32 type_size;
|
||||
|
||||
if (is_kfunc) {
|
||||
/* Permit pointer to mem, but only when argument
|
||||
* type is pointer to scalar, or struct composed
|
||||
* (recursively) of scalars.
|
||||
*/
|
||||
if (!btf_type_is_scalar(ref_t) &&
|
||||
!__btf_type_is_scalar_struct(log, btf, ref_t, 0)) {
|
||||
bpf_log(log,
|
||||
"arg#%d pointer type %s %s must point to scalar or struct with scalar\n",
|
||||
i, btf_type_str(ref_t), ref_tname);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
|
||||
if (IS_ERR(resolve_ret)) {
|
||||
bpf_log(log,
|
||||
@ -5701,6 +5752,8 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
|
||||
if (check_mem_reg(env, reg, regno, type_size))
|
||||
return -EINVAL;
|
||||
} else {
|
||||
bpf_log(log, "reg type unsupported for arg#%d %sfunction %s#%d\n", i,
|
||||
is_kfunc ? "kernel " : "", func_name, func_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
@ -5750,7 +5803,7 @@ int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
|
||||
const struct btf *btf, u32 func_id,
|
||||
struct bpf_reg_state *regs)
|
||||
{
|
||||
return btf_check_func_arg_match(env, btf, func_id, regs, false);
|
||||
return btf_check_func_arg_match(env, btf, func_id, regs, true);
|
||||
}
|
||||
|
||||
/* Convert BTF of a function into bpf_reg_state if possible
|
||||
@ -5858,7 +5911,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
reg->type = PTR_TO_MEM_OR_NULL;
|
||||
reg->type = PTR_TO_MEM | PTR_MAYBE_NULL;
|
||||
reg->id = ++env->id_gen;
|
||||
|
||||
continue;
|
||||
@ -6352,7 +6405,7 @@ const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = {
|
||||
.func = bpf_btf_find_by_name_kind,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_MEM,
|
||||
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
|
||||
.arg2_type = ARG_CONST_SIZE,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
@ -6534,12 +6587,11 @@ static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands,
|
||||
bpf_free_cands_from_cache(*cc);
|
||||
*cc = NULL;
|
||||
}
|
||||
new_cands = kmalloc(sizeof_cands(cands->cnt), GFP_KERNEL);
|
||||
new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL);
|
||||
if (!new_cands) {
|
||||
bpf_free_cands(cands);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
memcpy(new_cands, cands, sizeof_cands(cands->cnt));
|
||||
/* strdup the name, since it will stay in cache.
|
||||
* the cands->name points to strings in prog's BTF and the prog can be unloaded.
|
||||
*/
|
||||
@ -6657,7 +6709,7 @@ bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id)
|
||||
|
||||
main_btf = bpf_get_btf_vmlinux();
|
||||
if (IS_ERR(main_btf))
|
||||
return (void *)main_btf;
|
||||
return ERR_CAST(main_btf);
|
||||
|
||||
local_type = btf_type_by_id(local_btf, local_type_id);
|
||||
if (!local_type)
|
||||
@ -6684,14 +6736,14 @@ bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id)
|
||||
/* Attempt to find target candidates in vmlinux BTF first */
|
||||
cands = bpf_core_add_cands(cands, main_btf, 1);
|
||||
if (IS_ERR(cands))
|
||||
return cands;
|
||||
return ERR_CAST(cands);
|
||||
|
||||
/* cands is a pointer to kmalloced memory here if cands->cnt > 0 */
|
||||
|
||||
/* populate cache even when cands->cnt == 0 */
|
||||
cc = populate_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE);
|
||||
if (IS_ERR(cc))
|
||||
return cc;
|
||||
return ERR_CAST(cc);
|
||||
|
||||
/* if vmlinux BTF has any candidate, don't go for module BTFs */
|
||||
if (cc->cnt)
|
||||
@ -6717,7 +6769,7 @@ bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id)
|
||||
cands = bpf_core_add_cands(cands, mod_btf, btf_nr_types(main_btf));
|
||||
if (IS_ERR(cands)) {
|
||||
btf_put(mod_btf);
|
||||
return cands;
|
||||
return ERR_CAST(cands);
|
||||
}
|
||||
spin_lock_bh(&btf_idr_lock);
|
||||
btf_put(mod_btf);
|
||||
|
@ -1789,7 +1789,7 @@ static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_PTR_TO_MEM,
|
||||
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
|
||||
.arg3_type = ARG_CONST_SIZE,
|
||||
};
|
||||
|
||||
|
@ -195,7 +195,7 @@ static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
|
||||
}
|
||||
return;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(NULL, rcpu->prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(skb->dev, rcpu->prog, act);
|
||||
@ -254,7 +254,7 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
|
||||
}
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(NULL, rcpu->prog, act);
|
||||
fallthrough;
|
||||
case XDP_DROP:
|
||||
xdp_return_frame(xdpf);
|
||||
|
@ -348,7 +348,7 @@ static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
|
||||
frames[nframes++] = xdpf;
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(NULL, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(dev, xdp_prog, act);
|
||||
@ -507,7 +507,7 @@ static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev
|
||||
__skb_push(skb, skb->mac_len);
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
bpf_warn_invalid_xdp_action(NULL, dst->xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(dst->dev, dst->xdp_prog, act);
|
||||
|
@ -2,6 +2,7 @@
|
||||
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
|
||||
*/
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/bpf-cgroup.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/smp.h>
|
||||
@ -530,7 +531,7 @@ const struct bpf_func_proto bpf_strtol_proto = {
|
||||
.func = bpf_strtol,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_MEM,
|
||||
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
|
||||
.arg2_type = ARG_CONST_SIZE,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_PTR_TO_LONG,
|
||||
@ -558,13 +559,27 @@ const struct bpf_func_proto bpf_strtoul_proto = {
|
||||
.func = bpf_strtoul,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_MEM,
|
||||
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
|
||||
.arg2_type = ARG_CONST_SIZE,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_PTR_TO_LONG,
|
||||
};
|
||||
#endif
|
||||
|
||||
BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2)
|
||||
{
|
||||
return strncmp(s1, s2, s1_sz);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_strncmp_proto = {
|
||||
.func = bpf_strncmp,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_MEM,
|
||||
.arg2_type = ARG_CONST_SIZE,
|
||||
.arg3_type = ARG_PTR_TO_CONST_STR,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
|
||||
struct bpf_pidns_info *, nsdata, u32, size)
|
||||
{
|
||||
@ -630,7 +645,7 @@ const struct bpf_func_proto bpf_event_output_data_proto = {
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_CONST_MAP_PTR,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_PTR_TO_MEM,
|
||||
.arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
|
||||
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
};
|
||||
|
||||
@ -667,7 +682,7 @@ BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
|
||||
const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
|
||||
.func = bpf_per_cpu_ptr,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL,
|
||||
.ret_type = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY,
|
||||
.arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
@ -680,7 +695,7 @@ BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
|
||||
const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
|
||||
.func = bpf_this_cpu_ptr,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_PTR_TO_MEM_OR_BTF_ID,
|
||||
.ret_type = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY,
|
||||
.arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
|
||||
};
|
||||
|
||||
@ -1011,7 +1026,7 @@ const struct bpf_func_proto bpf_snprintf_proto = {
|
||||
.arg1_type = ARG_PTR_TO_MEM_OR_NULL,
|
||||
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg3_type = ARG_PTR_TO_CONST_STR,
|
||||
.arg4_type = ARG_PTR_TO_MEM_OR_NULL,
|
||||
.arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
|
||||
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
};
|
||||
|
||||
@ -1378,6 +1393,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
|
||||
return &bpf_for_each_map_elem_proto;
|
||||
case BPF_FUNC_loop:
|
||||
return &bpf_loop_proto;
|
||||
case BPF_FUNC_strncmp:
|
||||
return &bpf_strncmp_proto;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user