2019-06-01 08:08:55 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2012-03-16 12:37:12 +00:00
|
|
|
/*
|
2017-08-22 06:32:33 +00:00
|
|
|
* Just-In-Time compiler for eBPF filters on 32bit ARM
|
2012-03-16 12:37:12 +00:00
|
|
|
*
|
2023-09-07 23:05:47 +00:00
|
|
|
* Copyright (c) 2023 Puranjay Mohan <puranjay12@gmail.com>
|
2017-08-22 06:32:33 +00:00
|
|
|
* Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
|
2012-03-16 12:37:12 +00:00
|
|
|
* Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
|
|
|
|
*/
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
#include <linux/bpf.h>
|
2012-03-16 12:37:12 +00:00
|
|
|
#include <linux/bitops.h>
|
|
|
|
#include <linux/compiler.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/filter.h>
|
|
|
|
#include <linux/netdevice.h>
|
|
|
|
#include <linux/string.h>
|
|
|
|
#include <linux/slab.h>
|
2012-11-07 15:31:02 +00:00
|
|
|
#include <linux/if_vlan.h>
|
2023-09-07 23:05:47 +00:00
|
|
|
#include <linux/math64.h>
|
net: bpf: arm: make hole-faulting more robust
Will Deacon pointed out, that the currently used opcode for filling holes,
that is 0xe7ffffff, seems not robust enough ...
$ echo 0xffffffe7 | xxd -r > test.bin
$ arm-linux-gnueabihf-objdump -m arm -D -b binary test.bin
...
0: e7ffffff udf #65535 ; 0xffff
... while for Thumb, it ends up as ...
0: ffff e7ff vqshl.u64 q15, <illegal reg q15.5>, #63
... which is a bit fragile. The ARM specification defines some *permanently*
guaranteed undefined instruction (UDF) space, for example for ARM in ARMv7-AR,
section A5.4 and for Thumb in ARMv7-M, section A5.2.6.
Similarly, ptrace, kprobes, kgdb, bug and uprobes make use of such instruction
as well to trap. Given mentioned section from the specification, we can find
such a universe as (where 'x' denotes 'don't care'):
ARM: xxxx 0111 1111 xxxx xxxx xxxx 1111 xxxx
Thumb: 1101 1110 xxxx xxxx
We therefore should use a more robust opcode that fits both. Russell King
suggested that we can even reuse a single 32-bit word, that is, 0xe7fddef1
which will fault if executed in ARM *or* Thumb mode as done in f928d4f2a86f
("ARM: poison the vectors page"). That will still hold our requirements:
$ echo 0xf1defde7 | xxd -r > test.bin
$ arm-unknown-linux-gnueabi-objdump -m arm -D -b binary test.bin
...
0: e7fddef1 udf #56801 ; 0xdde1
$ echo 0xf1defde7f1defde7f1defde7 | xxd -r > test.bin
$ arm-unknown-linux-gnueabi-objdump -marm -Mforce-thumb -D -b binary test.bin
...
0: def1 udf #241 ; 0xf1
2: e7fd b.n 0x0
4: def1 udf #241 ; 0xf1
6: e7fd b.n 0x4
8: def1 udf #241 ; 0xf1
a: e7fd b.n 0x8
So on ARM 0xe7fddef1 conforms to the above UDF pattern, and the low 16 bit
likewise correspond to UDF in Thumb case. The 0xe7fd part is an unconditional
branch back to the UDF instruction.
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mircea Gherzan <mgherzan@gmail.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-19 12:56:57 +00:00
|
|
|
|
2012-03-16 12:37:12 +00:00
|
|
|
#include <asm/cacheflush.h>
|
|
|
|
#include <asm/hwcap.h>
|
2013-07-24 14:44:56 +00:00
|
|
|
#include <asm/opcodes.h>
|
2018-07-11 09:32:38 +00:00
|
|
|
#include <asm/system_info.h>
|
2012-03-16 12:37:12 +00:00
|
|
|
|
|
|
|
#include "bpf_jit_32.h"
|
|
|
|
|
2018-01-13 21:26:14 +00:00
|
|
|
/*
|
2018-01-13 22:51:27 +00:00
|
|
|
* eBPF prog stack layout:
|
2018-01-13 21:26:14 +00:00
|
|
|
*
|
|
|
|
* high
|
2018-01-13 22:51:27 +00:00
|
|
|
* original ARM_SP => +-----+
|
|
|
|
* | | callee saved registers
|
|
|
|
* +-----+ <= (BPF_FP + SCRATCH_SIZE)
|
2018-01-13 21:26:14 +00:00
|
|
|
* | ... | eBPF JIT scratch space
|
2018-01-13 22:51:27 +00:00
|
|
|
* eBPF fp register => +-----+
|
|
|
|
* (BPF_FP) | ... | eBPF prog stack
|
2018-01-13 21:26:14 +00:00
|
|
|
* +-----+
|
|
|
|
* |RSVD | JIT scratchpad
|
2018-01-13 22:51:27 +00:00
|
|
|
* current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE)
|
2021-09-28 09:13:10 +00:00
|
|
|
* | ... | caller-saved registers
|
|
|
|
* +-----+
|
|
|
|
* | ... | arguments passed on stack
|
|
|
|
* ARM_SP during call => +-----|
|
2018-01-13 21:26:14 +00:00
|
|
|
* | |
|
|
|
|
* | ... | Function call stack
|
|
|
|
* | |
|
|
|
|
* +-----+
|
|
|
|
* low
|
2018-01-13 22:51:27 +00:00
|
|
|
*
|
|
|
|
* The callee saved registers depends on whether frame pointers are enabled.
|
|
|
|
* With frame pointers (to be compliant with the ABI):
|
|
|
|
*
|
2018-07-11 09:32:33 +00:00
|
|
|
* high
|
|
|
|
* original ARM_SP => +--------------+ \
|
|
|
|
* | pc | |
|
|
|
|
* current ARM_FP => +--------------+ } callee saved registers
|
|
|
|
* |r4-r9,fp,ip,lr| |
|
|
|
|
* +--------------+ /
|
|
|
|
* low
|
2018-01-13 22:51:27 +00:00
|
|
|
*
|
|
|
|
* Without frame pointers:
|
|
|
|
*
|
2018-07-11 09:32:33 +00:00
|
|
|
* high
|
|
|
|
* original ARM_SP => +--------------+
|
|
|
|
* | r4-r9,fp,lr | callee saved registers
|
|
|
|
* current ARM_FP => +--------------+
|
|
|
|
* low
|
2018-01-13 22:38:18 +00:00
|
|
|
*
|
|
|
|
* When popping registers off the stack at the end of a BPF function, we
|
|
|
|
* reference them via the current ARM_FP register.
|
2021-09-28 09:13:10 +00:00
|
|
|
*
|
|
|
|
* Some eBPF operations are implemented via a call to a helper function.
|
|
|
|
* Such calls are "invisible" in the eBPF code, so it is up to the calling
|
|
|
|
* program to preserve any caller-saved ARM registers during the call. The
|
|
|
|
* JIT emits code to push and pop those registers onto the stack, immediately
|
|
|
|
* above the callee stack frame.
|
2018-01-13 21:26:14 +00:00
|
|
|
*/
|
2018-01-13 22:38:18 +00:00
|
|
|
#define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \
|
2018-07-11 09:32:33 +00:00
|
|
|
1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \
|
2018-01-13 22:38:18 +00:00
|
|
|
1 << ARM_FP)
|
|
|
|
#define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR)
|
|
|
|
#define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC)
|
2018-01-13 21:26:14 +00:00
|
|
|
|
2021-09-28 09:13:10 +00:00
|
|
|
#define CALLER_MASK (1 << ARM_R0 | 1 << ARM_R1 | 1 << ARM_R2 | 1 << ARM_R3)
|
|
|
|
|
2018-07-11 09:31:31 +00:00
|
|
|
enum {
|
|
|
|
/* Stack layout - these are offsets from (top of stack - 4) */
|
|
|
|
BPF_R2_HI,
|
|
|
|
BPF_R2_LO,
|
|
|
|
BPF_R3_HI,
|
|
|
|
BPF_R3_LO,
|
|
|
|
BPF_R4_HI,
|
|
|
|
BPF_R4_LO,
|
|
|
|
BPF_R5_HI,
|
|
|
|
BPF_R5_LO,
|
|
|
|
BPF_R7_HI,
|
|
|
|
BPF_R7_LO,
|
|
|
|
BPF_R8_HI,
|
|
|
|
BPF_R8_LO,
|
|
|
|
BPF_R9_HI,
|
|
|
|
BPF_R9_LO,
|
|
|
|
BPF_FP_HI,
|
|
|
|
BPF_FP_LO,
|
|
|
|
BPF_TC_HI,
|
|
|
|
BPF_TC_LO,
|
|
|
|
BPF_AX_HI,
|
|
|
|
BPF_AX_LO,
|
|
|
|
/* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
|
|
|
|
* BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
|
|
|
|
* BPF_REG_FP and Tail call counts.
|
|
|
|
*/
|
|
|
|
BPF_JIT_SCRATCH_REGS,
|
|
|
|
};
|
|
|
|
|
2018-07-11 09:31:41 +00:00
|
|
|
/*
|
|
|
|
* Negative "register" values indicate the register is stored on the stack
|
|
|
|
* and are the offset from the top of the eBPF JIT scratch space.
|
|
|
|
*/
|
|
|
|
#define STACK_OFFSET(k) (-4 - (k) * 4)
|
2018-07-11 09:31:31 +00:00
|
|
|
#define SCRATCH_SIZE (BPF_JIT_SCRATCH_REGS * 4)
|
|
|
|
|
2018-07-11 09:32:02 +00:00
|
|
|
#ifdef CONFIG_FRAME_POINTER
|
|
|
|
#define EBPF_SCRATCH_TO_ARM_FP(x) ((x) - 4 * hweight16(CALLEE_PUSH_MASK) - 4)
|
|
|
|
#else
|
|
|
|
#define EBPF_SCRATCH_TO_ARM_FP(x) (x)
|
|
|
|
#endif
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */
|
|
|
|
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */
|
|
|
|
#define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */
|
|
|
|
|
|
|
|
#define FLAG_IMM_OVERFLOW (1 << 0)
|
|
|
|
|
2012-03-16 12:37:12 +00:00
|
|
|
/*
|
2017-08-22 06:32:33 +00:00
|
|
|
* Map eBPF registers to ARM 32bit registers or stack scratch space.
|
|
|
|
*
|
|
|
|
* 1. First argument is passed using the arm 32bit registers and rest of the
|
|
|
|
* arguments are passed on stack scratch space.
|
2018-05-11 03:06:34 +00:00
|
|
|
* 2. First callee-saved argument is mapped to arm 32 bit registers and rest
|
2017-08-22 06:32:33 +00:00
|
|
|
* arguments are mapped to scratch space on stack.
|
|
|
|
* 3. We need two 64 bit temp registers to do complex operations on eBPF
|
|
|
|
* registers.
|
|
|
|
*
|
|
|
|
* As the eBPF registers are all 64 bit registers and arm has only 32 bit
|
|
|
|
* registers, we have to map each eBPF registers with two arm 32 bit regs or
|
|
|
|
* scratch memory space and we have to build eBPF 64 bit register from those.
|
2012-03-16 12:37:12 +00:00
|
|
|
*
|
|
|
|
*/
|
2018-07-11 09:31:41 +00:00
|
|
|
static const s8 bpf2a32[][2] = {
|
2017-08-22 06:32:33 +00:00
|
|
|
/* return value from in-kernel function, and exit value from eBPF */
|
|
|
|
[BPF_REG_0] = {ARM_R1, ARM_R0},
|
|
|
|
/* arguments from eBPF program to in-kernel function */
|
|
|
|
[BPF_REG_1] = {ARM_R3, ARM_R2},
|
|
|
|
/* Stored on stack scratch space */
|
2018-07-11 09:31:31 +00:00
|
|
|
[BPF_REG_2] = {STACK_OFFSET(BPF_R2_HI), STACK_OFFSET(BPF_R2_LO)},
|
|
|
|
[BPF_REG_3] = {STACK_OFFSET(BPF_R3_HI), STACK_OFFSET(BPF_R3_LO)},
|
|
|
|
[BPF_REG_4] = {STACK_OFFSET(BPF_R4_HI), STACK_OFFSET(BPF_R4_LO)},
|
|
|
|
[BPF_REG_5] = {STACK_OFFSET(BPF_R5_HI), STACK_OFFSET(BPF_R5_LO)},
|
2017-08-22 06:32:33 +00:00
|
|
|
/* callee saved registers that in-kernel function will preserve */
|
|
|
|
[BPF_REG_6] = {ARM_R5, ARM_R4},
|
|
|
|
/* Stored on stack scratch space */
|
2018-07-11 09:31:31 +00:00
|
|
|
[BPF_REG_7] = {STACK_OFFSET(BPF_R7_HI), STACK_OFFSET(BPF_R7_LO)},
|
|
|
|
[BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)},
|
|
|
|
[BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)},
|
2017-08-22 06:32:33 +00:00
|
|
|
/* Read only Frame Pointer to access Stack */
|
2018-07-11 09:31:31 +00:00
|
|
|
[BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)},
|
2021-11-19 16:32:13 +00:00
|
|
|
/* Temporary Register for BPF JIT, can be used
|
2017-08-22 06:32:33 +00:00
|
|
|
* for constant blindings and others.
|
|
|
|
*/
|
|
|
|
[TMP_REG_1] = {ARM_R7, ARM_R6},
|
2018-07-11 09:32:33 +00:00
|
|
|
[TMP_REG_2] = {ARM_R9, ARM_R8},
|
2017-08-22 06:32:33 +00:00
|
|
|
/* Tail call count. Stored on stack scratch space. */
|
2018-07-11 09:31:31 +00:00
|
|
|
[TCALL_CNT] = {STACK_OFFSET(BPF_TC_HI), STACK_OFFSET(BPF_TC_LO)},
|
2017-08-22 06:32:33 +00:00
|
|
|
/* temporary register for blinding constants.
|
|
|
|
* Stored on stack scratch space.
|
|
|
|
*/
|
2018-07-11 09:31:31 +00:00
|
|
|
[BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)},
|
2017-08-22 06:32:33 +00:00
|
|
|
};
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
#define dst_lo dst[1]
|
|
|
|
#define dst_hi dst[0]
|
|
|
|
#define src_lo src[1]
|
|
|
|
#define src_hi src[0]
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/*
|
|
|
|
* JIT Context:
|
|
|
|
*
|
|
|
|
* prog : bpf_prog
|
|
|
|
* idx : index of current last JITed instruction.
|
|
|
|
* prologue_bytes : bytes used in prologue.
|
|
|
|
* epilogue_offset : offset of epilogue starting.
|
|
|
|
* offsets : array of eBPF instruction offsets in
|
|
|
|
* JITed code.
|
|
|
|
* target : final JITed code.
|
|
|
|
* epilogue_bytes : no of bytes used in epilogue.
|
|
|
|
* imm_count : no of immediate counts used for global
|
|
|
|
* variables.
|
|
|
|
* imms : array of global variable addresses.
|
|
|
|
*/
|
2012-03-16 12:37:12 +00:00
|
|
|
|
|
|
|
struct jit_ctx {
|
2017-08-22 06:32:33 +00:00
|
|
|
const struct bpf_prog *prog;
|
|
|
|
unsigned int idx;
|
|
|
|
unsigned int prologue_bytes;
|
|
|
|
unsigned int epilogue_offset;
|
2018-07-11 09:32:38 +00:00
|
|
|
unsigned int cpu_architecture;
|
2012-03-16 12:37:12 +00:00
|
|
|
u32 flags;
|
|
|
|
u32 *offsets;
|
|
|
|
u32 *target;
|
2017-08-22 06:32:33 +00:00
|
|
|
u32 stack_size;
|
2012-03-16 12:37:12 +00:00
|
|
|
#if __LINUX_ARM_ARCH__ < 7
|
|
|
|
u16 epilogue_bytes;
|
|
|
|
u16 imm_count;
|
|
|
|
u32 *imms;
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
2015-10-02 15:06:47 +00:00
|
|
|
* Wrappers which handle both OABI and EABI and assures Thumb2 interworking
|
2012-03-16 12:37:12 +00:00
|
|
|
* (where the assembly routines like __aeabi_uidiv could cause problems).
|
|
|
|
*/
|
2017-08-22 06:32:33 +00:00
|
|
|
static u32 jit_udiv32(u32 dividend, u32 divisor)
|
2012-03-16 12:37:12 +00:00
|
|
|
{
|
|
|
|
return dividend / divisor;
|
|
|
|
}
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
static u32 jit_mod32(u32 dividend, u32 divisor)
|
2015-10-02 15:06:47 +00:00
|
|
|
{
|
|
|
|
return dividend % divisor;
|
|
|
|
}
|
|
|
|
|
2023-09-07 23:05:46 +00:00
|
|
|
static s32 jit_sdiv32(s32 dividend, s32 divisor)
|
|
|
|
{
|
|
|
|
return dividend / divisor;
|
|
|
|
}
|
|
|
|
|
|
|
|
static s32 jit_smod32(s32 dividend, s32 divisor)
|
|
|
|
{
|
|
|
|
return dividend % divisor;
|
|
|
|
}
|
|
|
|
|
2023-09-07 23:05:47 +00:00
|
|
|
/* Wrappers for 64-bit div/mod */
|
|
|
|
static u64 jit_udiv64(u64 dividend, u64 divisor)
|
|
|
|
{
|
|
|
|
return div64_u64(dividend, divisor);
|
|
|
|
}
|
|
|
|
|
|
|
|
static u64 jit_mod64(u64 dividend, u64 divisor)
|
|
|
|
{
|
|
|
|
u64 rem;
|
|
|
|
|
|
|
|
div64_u64_rem(dividend, divisor, &rem);
|
|
|
|
return rem;
|
|
|
|
}
|
|
|
|
|
|
|
|
static s64 jit_sdiv64(s64 dividend, s64 divisor)
|
|
|
|
{
|
|
|
|
return div64_s64(dividend, divisor);
|
|
|
|
}
|
|
|
|
|
|
|
|
static s64 jit_smod64(s64 dividend, s64 divisor)
|
|
|
|
{
|
|
|
|
u64 q;
|
|
|
|
|
|
|
|
q = div64_s64(dividend, divisor);
|
|
|
|
|
|
|
|
return dividend - q * divisor;
|
|
|
|
}
|
|
|
|
|
2012-03-16 12:37:12 +00:00
|
|
|
static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx)
|
|
|
|
{
|
2013-07-24 14:44:56 +00:00
|
|
|
inst |= (cond << 28);
|
|
|
|
inst = __opcode_to_mem_arm(inst);
|
|
|
|
|
2012-03-16 12:37:12 +00:00
|
|
|
if (ctx->target != NULL)
|
2013-07-24 14:44:56 +00:00
|
|
|
ctx->target[ctx->idx] = inst;
|
2012-03-16 12:37:12 +00:00
|
|
|
|
|
|
|
ctx->idx++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Emit an instruction that will be executed unconditionally.
|
|
|
|
*/
|
|
|
|
static inline void emit(u32 inst, struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
_emit(ARM_COND_AL, inst, ctx);
|
|
|
|
}
|
|
|
|
|
2018-07-11 09:32:07 +00:00
|
|
|
/*
|
|
|
|
* This is rather horrid, but necessary to convert an integer constant
|
|
|
|
* to an immediate operand for the opcodes, and be able to detect at
|
|
|
|
* build time whether the constant can't be converted (iow, usable in
|
|
|
|
* BUILD_BUG_ON()).
|
|
|
|
*/
|
|
|
|
#define imm12val(v, s) (rol32(v, (s)) | (s) << 7)
|
|
|
|
#define const_imm8m(x) \
|
|
|
|
({ int r; \
|
|
|
|
u32 v = (x); \
|
|
|
|
if (!(v & ~0x000000ff)) \
|
|
|
|
r = imm12val(v, 0); \
|
|
|
|
else if (!(v & ~0xc000003f)) \
|
|
|
|
r = imm12val(v, 2); \
|
|
|
|
else if (!(v & ~0xf000000f)) \
|
|
|
|
r = imm12val(v, 4); \
|
|
|
|
else if (!(v & ~0xfc000003)) \
|
|
|
|
r = imm12val(v, 6); \
|
|
|
|
else if (!(v & ~0xff000000)) \
|
|
|
|
r = imm12val(v, 8); \
|
|
|
|
else if (!(v & ~0x3fc00000)) \
|
|
|
|
r = imm12val(v, 10); \
|
|
|
|
else if (!(v & ~0x0ff00000)) \
|
|
|
|
r = imm12val(v, 12); \
|
|
|
|
else if (!(v & ~0x03fc0000)) \
|
|
|
|
r = imm12val(v, 14); \
|
|
|
|
else if (!(v & ~0x00ff0000)) \
|
|
|
|
r = imm12val(v, 16); \
|
|
|
|
else if (!(v & ~0x003fc000)) \
|
|
|
|
r = imm12val(v, 18); \
|
|
|
|
else if (!(v & ~0x000ff000)) \
|
|
|
|
r = imm12val(v, 20); \
|
|
|
|
else if (!(v & ~0x0003fc00)) \
|
|
|
|
r = imm12val(v, 22); \
|
|
|
|
else if (!(v & ~0x0000ff00)) \
|
|
|
|
r = imm12val(v, 24); \
|
|
|
|
else if (!(v & ~0x00003fc0)) \
|
|
|
|
r = imm12val(v, 26); \
|
|
|
|
else if (!(v & ~0x00000ff0)) \
|
|
|
|
r = imm12val(v, 28); \
|
|
|
|
else if (!(v & ~0x000003fc)) \
|
|
|
|
r = imm12val(v, 30); \
|
|
|
|
else \
|
|
|
|
r = -1; \
|
|
|
|
r; })
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/*
|
|
|
|
* Checks if immediate value can be converted to imm12(12 bits) value.
|
|
|
|
*/
|
2018-07-11 09:32:07 +00:00
|
|
|
static int imm8m(u32 x)
|
2012-03-16 12:37:12 +00:00
|
|
|
{
|
2017-08-22 06:32:33 +00:00
|
|
|
u32 rot;
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
for (rot = 0; rot < 16; rot++)
|
|
|
|
if ((x & ~ror32(0xff, 2 * rot)) == 0)
|
|
|
|
return rol32(x, 2 * rot) | (rot << 8);
|
|
|
|
return -1;
|
2012-03-16 12:37:12 +00:00
|
|
|
}
|
|
|
|
|
2018-07-11 09:32:07 +00:00
|
|
|
#define imm8m(x) (__builtin_constant_p(x) ? const_imm8m(x) : imm8m(x))
|
|
|
|
|
2018-07-11 09:31:36 +00:00
|
|
|
static u32 arm_bpf_ldst_imm12(u32 op, u8 rt, u8 rn, s16 imm12)
|
|
|
|
{
|
|
|
|
op |= rt << 12 | rn << 16;
|
|
|
|
if (imm12 >= 0)
|
|
|
|
op |= ARM_INST_LDST__U;
|
|
|
|
else
|
|
|
|
imm12 = -imm12;
|
2018-07-11 09:32:12 +00:00
|
|
|
return op | (imm12 & ARM_INST_LDST__IMM12);
|
2018-07-11 09:31:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8)
|
|
|
|
{
|
|
|
|
op |= rt << 12 | rn << 16;
|
|
|
|
if (imm8 >= 0)
|
|
|
|
op |= ARM_INST_LDST__U;
|
|
|
|
else
|
|
|
|
imm8 = -imm8;
|
|
|
|
return op | (imm8 & 0xf0) << 4 | (imm8 & 0x0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define ARM_LDR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDR_I, rt, rn, off)
|
|
|
|
#define ARM_LDRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDRB_I, rt, rn, off)
|
2018-07-11 09:32:38 +00:00
|
|
|
#define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off)
|
2018-07-11 09:31:36 +00:00
|
|
|
#define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off)
|
|
|
|
|
2023-09-07 23:05:43 +00:00
|
|
|
#define ARM_LDRSH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSH_I, rt, rn, off)
|
|
|
|
#define ARM_LDRSB_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSB_I, rt, rn, off)
|
|
|
|
|
2018-07-11 09:31:36 +00:00
|
|
|
#define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off)
|
|
|
|
#define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off)
|
2018-07-11 09:32:38 +00:00
|
|
|
#define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off)
|
2018-07-11 09:31:36 +00:00
|
|
|
#define ARM_STRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRH_I, rt, rn, off)
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/*
|
|
|
|
* Initializes the JIT space with undefined instructions.
|
|
|
|
*/
|
2014-09-08 06:04:48 +00:00
|
|
|
static void jit_fill_hole(void *area, unsigned int size)
|
|
|
|
{
|
net: bpf: arm: make hole-faulting more robust
Will Deacon pointed out, that the currently used opcode for filling holes,
that is 0xe7ffffff, seems not robust enough ...
$ echo 0xffffffe7 | xxd -r > test.bin
$ arm-linux-gnueabihf-objdump -m arm -D -b binary test.bin
...
0: e7ffffff udf #65535 ; 0xffff
... while for Thumb, it ends up as ...
0: ffff e7ff vqshl.u64 q15, <illegal reg q15.5>, #63
... which is a bit fragile. The ARM specification defines some *permanently*
guaranteed undefined instruction (UDF) space, for example for ARM in ARMv7-AR,
section A5.4 and for Thumb in ARMv7-M, section A5.2.6.
Similarly, ptrace, kprobes, kgdb, bug and uprobes make use of such instruction
as well to trap. Given mentioned section from the specification, we can find
such a universe as (where 'x' denotes 'don't care'):
ARM: xxxx 0111 1111 xxxx xxxx xxxx 1111 xxxx
Thumb: 1101 1110 xxxx xxxx
We therefore should use a more robust opcode that fits both. Russell King
suggested that we can even reuse a single 32-bit word, that is, 0xe7fddef1
which will fault if executed in ARM *or* Thumb mode as done in f928d4f2a86f
("ARM: poison the vectors page"). That will still hold our requirements:
$ echo 0xf1defde7 | xxd -r > test.bin
$ arm-unknown-linux-gnueabi-objdump -m arm -D -b binary test.bin
...
0: e7fddef1 udf #56801 ; 0xdde1
$ echo 0xf1defde7f1defde7f1defde7 | xxd -r > test.bin
$ arm-unknown-linux-gnueabi-objdump -marm -Mforce-thumb -D -b binary test.bin
...
0: def1 udf #241 ; 0xf1
2: e7fd b.n 0x0
4: def1 udf #241 ; 0xf1
6: e7fd b.n 0x4
8: def1 udf #241 ; 0xf1
a: e7fd b.n 0x8
So on ARM 0xe7fddef1 conforms to the above UDF pattern, and the low 16 bit
likewise correspond to UDF in Thumb case. The 0xe7fd part is an unconditional
branch back to the UDF instruction.
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mircea Gherzan <mgherzan@gmail.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-19 12:56:57 +00:00
|
|
|
u32 *ptr;
|
2014-09-08 06:04:48 +00:00
|
|
|
/* We are guaranteed to have aligned memory. */
|
|
|
|
for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
|
net: bpf: arm: make hole-faulting more robust
Will Deacon pointed out, that the currently used opcode for filling holes,
that is 0xe7ffffff, seems not robust enough ...
$ echo 0xffffffe7 | xxd -r > test.bin
$ arm-linux-gnueabihf-objdump -m arm -D -b binary test.bin
...
0: e7ffffff udf #65535 ; 0xffff
... while for Thumb, it ends up as ...
0: ffff e7ff vqshl.u64 q15, <illegal reg q15.5>, #63
... which is a bit fragile. The ARM specification defines some *permanently*
guaranteed undefined instruction (UDF) space, for example for ARM in ARMv7-AR,
section A5.4 and for Thumb in ARMv7-M, section A5.2.6.
Similarly, ptrace, kprobes, kgdb, bug and uprobes make use of such instruction
as well to trap. Given mentioned section from the specification, we can find
such a universe as (where 'x' denotes 'don't care'):
ARM: xxxx 0111 1111 xxxx xxxx xxxx 1111 xxxx
Thumb: 1101 1110 xxxx xxxx
We therefore should use a more robust opcode that fits both. Russell King
suggested that we can even reuse a single 32-bit word, that is, 0xe7fddef1
which will fault if executed in ARM *or* Thumb mode as done in f928d4f2a86f
("ARM: poison the vectors page"). That will still hold our requirements:
$ echo 0xf1defde7 | xxd -r > test.bin
$ arm-unknown-linux-gnueabi-objdump -m arm -D -b binary test.bin
...
0: e7fddef1 udf #56801 ; 0xdde1
$ echo 0xf1defde7f1defde7f1defde7 | xxd -r > test.bin
$ arm-unknown-linux-gnueabi-objdump -marm -Mforce-thumb -D -b binary test.bin
...
0: def1 udf #241 ; 0xf1
2: e7fd b.n 0x0
4: def1 udf #241 ; 0xf1
6: e7fd b.n 0x4
8: def1 udf #241 ; 0xf1
a: e7fd b.n 0x8
So on ARM 0xe7fddef1 conforms to the above UDF pattern, and the low 16 bit
likewise correspond to UDF in Thumb case. The 0xe7fd part is an unconditional
branch back to the UDF instruction.
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mircea Gherzan <mgherzan@gmail.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-19 12:56:57 +00:00
|
|
|
*ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
|
2014-09-08 06:04:48 +00:00
|
|
|
}
|
|
|
|
|
2018-01-13 16:10:07 +00:00
|
|
|
#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
|
|
|
|
/* EABI requires the stack to be aligned to 64-bit boundaries */
|
|
|
|
#define STACK_ALIGNMENT 8
|
|
|
|
#else
|
|
|
|
/* Stack must be aligned to 32-bit boundaries */
|
|
|
|
#define STACK_ALIGNMENT 4
|
|
|
|
#endif
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* total stack size used in JITed code */
|
2018-05-14 21:22:30 +00:00
|
|
|
#define _STACK_SIZE (ctx->prog->aux->stack_depth + SCRATCH_SIZE)
|
|
|
|
#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
|
2012-03-16 12:37:12 +00:00
|
|
|
|
|
|
|
#if __LINUX_ARM_ARCH__ < 7
|
|
|
|
|
|
|
|
static u16 imm_offset(u32 k, struct jit_ctx *ctx)
|
|
|
|
{
|
2017-08-22 06:32:33 +00:00
|
|
|
unsigned int i = 0, offset;
|
2012-03-16 12:37:12 +00:00
|
|
|
u16 imm;
|
|
|
|
|
|
|
|
/* on the "fake" run we just count them (duplicates included) */
|
|
|
|
if (ctx->target == NULL) {
|
|
|
|
ctx->imm_count++;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
while ((i < ctx->imm_count) && ctx->imms[i]) {
|
|
|
|
if (ctx->imms[i] == k)
|
|
|
|
break;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ctx->imms[i] == 0)
|
|
|
|
ctx->imms[i] = k;
|
|
|
|
|
|
|
|
/* constants go just after the epilogue */
|
2017-08-22 06:32:33 +00:00
|
|
|
offset = ctx->offsets[ctx->prog->len - 1] * 4;
|
2012-03-16 12:37:12 +00:00
|
|
|
offset += ctx->prologue_bytes;
|
|
|
|
offset += ctx->epilogue_bytes;
|
|
|
|
offset += i * 4;
|
|
|
|
|
|
|
|
ctx->target[offset / 4] = k;
|
|
|
|
|
|
|
|
/* PC in ARM mode == address of the instruction + 8 */
|
|
|
|
imm = offset - (8 + ctx->idx * 4);
|
|
|
|
|
2015-05-07 15:14:21 +00:00
|
|
|
if (imm & ~0xfff) {
|
|
|
|
/*
|
|
|
|
* literal pool is too far, signal it into flags. we
|
|
|
|
* can only detect it on the second pass unfortunately.
|
|
|
|
*/
|
|
|
|
ctx->flags |= FLAG_IMM_OVERFLOW;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-03-16 12:37:12 +00:00
|
|
|
return imm;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* __LINUX_ARM_ARCH__ */
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
static inline int bpf2a32_offset(int bpf_to, int bpf_from,
|
|
|
|
const struct jit_ctx *ctx) {
|
|
|
|
int to, from;
|
|
|
|
|
|
|
|
if (ctx->target == NULL)
|
|
|
|
return 0;
|
|
|
|
to = ctx->offsets[bpf_to];
|
|
|
|
from = ctx->offsets[bpf_from];
|
|
|
|
|
|
|
|
return to - from - 1;
|
|
|
|
}
|
|
|
|
|
2012-03-16 12:37:12 +00:00
|
|
|
/*
|
|
|
|
* Move an immediate that's not an imm8m to a core register.
|
|
|
|
*/
|
2017-08-22 06:32:33 +00:00
|
|
|
static inline void emit_mov_i_no8m(const u8 rd, u32 val, struct jit_ctx *ctx)
|
2012-03-16 12:37:12 +00:00
|
|
|
{
|
|
|
|
#if __LINUX_ARM_ARCH__ < 7
|
|
|
|
emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx);
|
|
|
|
#else
|
|
|
|
emit(ARM_MOVW(rd, val & 0xffff), ctx);
|
|
|
|
if (val > 0xffff)
|
|
|
|
emit(ARM_MOVT(rd, val >> 16), ctx);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx)
|
2012-03-16 12:37:12 +00:00
|
|
|
{
|
|
|
|
int imm12 = imm8m(val);
|
|
|
|
|
|
|
|
if (imm12 >= 0)
|
|
|
|
emit(ARM_MOV_I(rd, imm12), ctx);
|
|
|
|
else
|
|
|
|
emit_mov_i_no8m(rd, val, ctx);
|
|
|
|
}
|
|
|
|
|
2018-01-13 11:35:15 +00:00
|
|
|
static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx)
|
2012-03-16 12:37:12 +00:00
|
|
|
{
|
2017-08-22 06:32:33 +00:00
|
|
|
if (elf_hwcap & HWCAP_THUMB)
|
|
|
|
emit(ARM_BX(tgt_reg), ctx);
|
|
|
|
else
|
|
|
|
emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
|
2018-01-13 11:35:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
#if __LINUX_ARM_ARCH__ < 5
|
|
|
|
emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
|
|
|
|
emit_bx_r(tgt_reg, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
#else
|
|
|
|
emit(ARM_BLX_R(tgt_reg), ctx);
|
|
|
|
#endif
|
2012-03-16 12:37:12 +00:00
|
|
|
}
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
static inline int epilogue_offset(const struct jit_ctx *ctx)
|
2012-03-16 12:37:12 +00:00
|
|
|
{
|
2017-08-22 06:32:33 +00:00
|
|
|
int to, from;
|
|
|
|
/* No need for 1st dummy run */
|
|
|
|
if (ctx->target == NULL)
|
|
|
|
return 0;
|
|
|
|
to = ctx->epilogue_offset;
|
|
|
|
from = ctx->idx;
|
|
|
|
|
|
|
|
return to - from - 2;
|
2012-03-16 12:37:12 +00:00
|
|
|
}
|
|
|
|
|
2023-09-07 23:05:46 +00:00
|
|
|
static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, u8 sign)
|
2012-03-16 12:37:12 +00:00
|
|
|
{
|
2021-09-28 09:13:10 +00:00
|
|
|
const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1);
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
2023-09-07 23:05:46 +00:00
|
|
|
u32 dst;
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
#if __LINUX_ARM_ARCH__ == 7
|
|
|
|
if (elf_hwcap & HWCAP_IDIVA) {
|
2023-09-07 23:05:46 +00:00
|
|
|
if (op == BPF_DIV) {
|
|
|
|
emit(sign ? ARM_SDIV(rd, rm, rn) : ARM_UDIV(rd, rm, rn), ctx);
|
|
|
|
} else {
|
|
|
|
emit(sign ? ARM_SDIV(ARM_IP, rm, rn) : ARM_UDIV(ARM_IP, rm, rn), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif
|
2013-02-13 17:30:39 +00:00
|
|
|
|
|
|
|
/*
|
2017-08-22 06:32:33 +00:00
|
|
|
* For BPF_ALU | BPF_DIV | BPF_K instructions
|
|
|
|
* As ARM_R1 and ARM_R0 contains 1st argument of bpf
|
|
|
|
* function, we need to save it on caller side to save
|
|
|
|
* it from getting destroyed within callee.
|
|
|
|
* After the return from the callee, we restore ARM_R0
|
|
|
|
* ARM_R1.
|
2013-02-13 17:30:39 +00:00
|
|
|
*/
|
2017-08-22 06:32:33 +00:00
|
|
|
if (rn != ARM_R1) {
|
|
|
|
emit(ARM_MOV_R(tmp[0], ARM_R1), ctx);
|
|
|
|
emit(ARM_MOV_R(ARM_R1, rn), ctx);
|
|
|
|
}
|
|
|
|
if (rm != ARM_R0) {
|
|
|
|
emit(ARM_MOV_R(tmp[1], ARM_R0), ctx);
|
|
|
|
emit(ARM_MOV_R(ARM_R0, rm), ctx);
|
|
|
|
}
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2021-09-28 09:13:10 +00:00
|
|
|
/* Push caller-saved registers on stack */
|
|
|
|
emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx);
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* Call appropriate function */
|
2023-09-07 23:05:46 +00:00
|
|
|
if (sign) {
|
|
|
|
if (op == BPF_DIV)
|
|
|
|
dst = (u32)jit_sdiv32;
|
|
|
|
else
|
|
|
|
dst = (u32)jit_smod32;
|
|
|
|
} else {
|
|
|
|
if (op == BPF_DIV)
|
|
|
|
dst = (u32)jit_udiv32;
|
|
|
|
else
|
|
|
|
dst = (u32)jit_mod32;
|
|
|
|
}
|
|
|
|
|
|
|
|
emit_mov_i(ARM_IP, dst, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
emit_blx_r(ARM_IP, ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2021-09-28 09:13:10 +00:00
|
|
|
/* Restore caller-saved registers from stack */
|
|
|
|
emit(ARM_POP(CALLER_MASK & ~exclude_mask), ctx);
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* Save return value */
|
|
|
|
if (rd != ARM_R0)
|
|
|
|
emit(ARM_MOV_R(rd, ARM_R0), ctx);
|
|
|
|
|
|
|
|
/* Restore ARM_R0 and ARM_R1 */
|
|
|
|
if (rn != ARM_R1)
|
|
|
|
emit(ARM_MOV_R(ARM_R1, tmp[0]), ctx);
|
|
|
|
if (rm != ARM_R0)
|
|
|
|
emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
}
|
|
|
|
|
2023-09-07 23:05:47 +00:00
|
|
|
static inline void emit_udivmod64(const s8 *rd, const s8 *rm, const s8 *rn, struct jit_ctx *ctx,
|
|
|
|
u8 op, u8 sign)
|
|
|
|
{
|
|
|
|
u32 dst;
|
|
|
|
|
|
|
|
/* Push caller-saved registers on stack */
|
|
|
|
emit(ARM_PUSH(CALLER_MASK), ctx);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* As we are implementing 64-bit div/mod as function calls, We need to put the dividend in
|
|
|
|
* R0-R1 and the divisor in R2-R3. As we have already pushed these registers on the stack,
|
|
|
|
* we can recover them later after returning from the function call.
|
|
|
|
*/
|
|
|
|
if (rm[1] != ARM_R0 || rn[1] != ARM_R2) {
|
|
|
|
/*
|
|
|
|
* Move Rm to {R1, R0} if it is not already there.
|
|
|
|
*/
|
|
|
|
if (rm[1] != ARM_R0) {
|
|
|
|
if (rn[1] == ARM_R0)
|
|
|
|
emit(ARM_PUSH(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
|
|
|
|
emit(ARM_MOV_R(ARM_R1, rm[0]), ctx);
|
|
|
|
emit(ARM_MOV_R(ARM_R0, rm[1]), ctx);
|
|
|
|
if (rn[1] == ARM_R0) {
|
|
|
|
emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
|
|
|
|
goto cont;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Move Rn to {R3, R2} if it is not already there.
|
|
|
|
*/
|
|
|
|
if (rn[1] != ARM_R2) {
|
|
|
|
emit(ARM_MOV_R(ARM_R3, rn[0]), ctx);
|
|
|
|
emit(ARM_MOV_R(ARM_R2, rn[1]), ctx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
cont:
|
|
|
|
|
|
|
|
/* Call appropriate function */
|
|
|
|
if (sign) {
|
|
|
|
if (op == BPF_DIV)
|
|
|
|
dst = (u32)jit_sdiv64;
|
|
|
|
else
|
|
|
|
dst = (u32)jit_smod64;
|
|
|
|
} else {
|
|
|
|
if (op == BPF_DIV)
|
|
|
|
dst = (u32)jit_udiv64;
|
|
|
|
else
|
|
|
|
dst = (u32)jit_mod64;
|
|
|
|
}
|
|
|
|
|
|
|
|
emit_mov_i(ARM_IP, dst, ctx);
|
|
|
|
emit_blx_r(ARM_IP, ctx);
|
|
|
|
|
|
|
|
/* Save return value */
|
|
|
|
if (rd[1] != ARM_R0) {
|
|
|
|
emit(ARM_MOV_R(rd[0], ARM_R1), ctx);
|
|
|
|
emit(ARM_MOV_R(rd[1], ARM_R0), ctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Recover {R3, R2} and {R1, R0} from stack if they are not Rd */
|
|
|
|
if (rd[1] != ARM_R0 && rd[1] != ARM_R2) {
|
|
|
|
emit(ARM_POP(CALLER_MASK), ctx);
|
|
|
|
} else if (rd[1] != ARM_R0) {
|
|
|
|
emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
|
|
|
|
emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
|
|
|
|
} else {
|
|
|
|
emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
|
|
|
|
emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-11 09:31:47 +00:00
|
|
|
/* Is the translated BPF register on stack? */
|
|
|
|
static bool is_stacked(s8 reg)
|
2012-03-16 12:37:12 +00:00
|
|
|
{
|
2018-07-11 09:31:47 +00:00
|
|
|
return reg < 0;
|
2012-03-16 12:37:12 +00:00
|
|
|
}
|
|
|
|
|
2018-07-11 09:31:52 +00:00
|
|
|
/* If a BPF register is on the stack (stk is true), load it to the
|
|
|
|
* supplied temporary register and return the temporary register
|
|
|
|
* for subsequent operations, otherwise just use the CPU register.
|
|
|
|
*/
|
|
|
|
static s8 arm_bpf_get_reg32(s8 reg, s8 tmp, struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
if (is_stacked(reg)) {
|
2018-07-11 09:32:02 +00:00
|
|
|
emit(ARM_LDR_I(tmp, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx);
|
2018-07-11 09:31:52 +00:00
|
|
|
reg = tmp;
|
|
|
|
}
|
|
|
|
return reg;
|
|
|
|
}
|
|
|
|
|
2018-07-11 09:31:57 +00:00
|
|
|
static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp,
|
|
|
|
struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
if (is_stacked(reg[1])) {
|
2018-07-11 09:32:38 +00:00
|
|
|
if (__LINUX_ARM_ARCH__ >= 6 ||
|
|
|
|
ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) {
|
|
|
|
emit(ARM_LDRD_I(tmp[1], ARM_FP,
|
|
|
|
EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx);
|
|
|
|
} else {
|
|
|
|
emit(ARM_LDR_I(tmp[1], ARM_FP,
|
|
|
|
EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx);
|
|
|
|
emit(ARM_LDR_I(tmp[0], ARM_FP,
|
|
|
|
EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx);
|
|
|
|
}
|
2018-07-11 09:31:57 +00:00
|
|
|
reg = tmp;
|
|
|
|
}
|
|
|
|
return reg;
|
|
|
|
}
|
|
|
|
|
2018-07-11 09:31:52 +00:00
|
|
|
/* If a BPF register is on the stack (stk is true), save the register
|
|
|
|
* back to the stack. If the source register is not the same, then
|
|
|
|
* move it into the correct register.
|
|
|
|
*/
|
|
|
|
static void arm_bpf_put_reg32(s8 reg, s8 src, struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
if (is_stacked(reg))
|
2018-07-11 09:32:02 +00:00
|
|
|
emit(ARM_STR_I(src, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx);
|
2018-07-11 09:31:52 +00:00
|
|
|
else if (reg != src)
|
|
|
|
emit(ARM_MOV_R(reg, src), ctx);
|
|
|
|
}
|
|
|
|
|
2018-07-11 09:31:57 +00:00
|
|
|
static void arm_bpf_put_reg64(const s8 *reg, const s8 *src,
|
|
|
|
struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
if (is_stacked(reg[1])) {
|
2018-07-11 09:32:38 +00:00
|
|
|
if (__LINUX_ARM_ARCH__ >= 6 ||
|
|
|
|
ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) {
|
|
|
|
emit(ARM_STRD_I(src[1], ARM_FP,
|
|
|
|
EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx);
|
|
|
|
} else {
|
|
|
|
emit(ARM_STR_I(src[1], ARM_FP,
|
|
|
|
EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx);
|
|
|
|
emit(ARM_STR_I(src[0], ARM_FP,
|
|
|
|
EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx);
|
|
|
|
}
|
2018-07-11 09:31:57 +00:00
|
|
|
} else {
|
|
|
|
if (reg[1] != src[1])
|
|
|
|
emit(ARM_MOV_R(reg[1], src[1]), ctx);
|
|
|
|
if (reg[0] != src[0])
|
|
|
|
emit(ARM_MOV_R(reg[0], src[0]), ctx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-11 09:31:41 +00:00
|
|
|
static inline void emit_a32_mov_i(const s8 dst, const u32 val,
|
2018-07-11 09:31:47 +00:00
|
|
|
struct jit_ctx *ctx)
|
2012-03-16 12:37:12 +00:00
|
|
|
{
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
2017-08-22 06:32:33 +00:00
|
|
|
|
2018-07-11 09:31:47 +00:00
|
|
|
if (is_stacked(dst)) {
|
2017-08-22 06:32:33 +00:00
|
|
|
emit_mov_i(tmp[1], val, ctx);
|
2018-07-11 09:31:52 +00:00
|
|
|
arm_bpf_put_reg32(dst, tmp[1], ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
} else {
|
|
|
|
emit_mov_i(dst, val, ctx);
|
|
|
|
}
|
2012-03-16 12:37:12 +00:00
|
|
|
}
|
|
|
|
|
ARM: net: bpf: improve 64-bit load immediate implementation
Rather than writing each 32-bit half of the 64-bit immediate value
separately when the register is on the stack:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
str r6, [fp, #-44] ; 0xffffffd4
mov r6, #0
str r6, [fp, #-40] ; 0xffffffd8
arrange to use the double-word store when available instead:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
mov r7, #0
strd r6, [fp, #-44] ; 0xffffffd4
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:41 +00:00
|
|
|
static void emit_a32_mov_i64(const s8 dst[], u64 val, struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
|
|
|
const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
|
|
|
|
|
|
|
|
emit_mov_i(rd[1], (u32)val, ctx);
|
|
|
|
emit_mov_i(rd[0], val >> 32, ctx);
|
|
|
|
|
|
|
|
arm_bpf_put_reg64(dst, rd, ctx);
|
|
|
|
}
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* Sign extended move */
|
ARM: net: bpf: improve 64-bit load immediate implementation
Rather than writing each 32-bit half of the 64-bit immediate value
separately when the register is on the stack:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
str r6, [fp, #-44] ; 0xffffffd4
mov r6, #0
str r6, [fp, #-40] ; 0xffffffd8
arrange to use the double-word store when available instead:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
mov r7, #0
strd r6, [fp, #-44] ; 0xffffffd4
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:41 +00:00
|
|
|
static inline void emit_a32_mov_se_i64(const bool is64, const s8 dst[],
|
|
|
|
const u32 val, struct jit_ctx *ctx) {
|
ARM: net: bpf: improve 64-bit sign-extended immediate load
Improve the 64-bit sign-extended immediate from:
mov r6, #1
str r6, [fp, #-52] ; 0xffffffcc
mov r6, #0
str r6, [fp, #-48] ; 0xffffffd0
to:
mov r6, #1
mov r7, #0
strd r6, [fp, #-52] ; 0xffffffcc
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:46 +00:00
|
|
|
u64 val64 = val;
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
if (is64 && (val & (1<<31)))
|
ARM: net: bpf: improve 64-bit sign-extended immediate load
Improve the 64-bit sign-extended immediate from:
mov r6, #1
str r6, [fp, #-52] ; 0xffffffcc
mov r6, #0
str r6, [fp, #-48] ; 0xffffffd0
to:
mov r6, #1
mov r7, #0
strd r6, [fp, #-52] ; 0xffffffcc
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:46 +00:00
|
|
|
val64 |= 0xffffffff00000000ULL;
|
|
|
|
emit_a32_mov_i64(dst, val64, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
static inline void emit_a32_add_r(const u8 dst, const u8 src,
|
|
|
|
const bool is64, const bool hi,
|
|
|
|
struct jit_ctx *ctx) {
|
|
|
|
/* 64 bit :
|
|
|
|
* adds dst_lo, dst_lo, src_lo
|
|
|
|
* adc dst_hi, dst_hi, src_hi
|
|
|
|
* 32 bit :
|
|
|
|
* add dst_lo, dst_lo, src_lo
|
|
|
|
*/
|
|
|
|
if (!hi && is64)
|
|
|
|
emit(ARM_ADDS_R(dst, dst, src), ctx);
|
|
|
|
else if (hi && is64)
|
|
|
|
emit(ARM_ADC_R(dst, dst, src), ctx);
|
|
|
|
else
|
|
|
|
emit(ARM_ADD_R(dst, dst, src), ctx);
|
|
|
|
}
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
static inline void emit_a32_sub_r(const u8 dst, const u8 src,
|
|
|
|
const bool is64, const bool hi,
|
|
|
|
struct jit_ctx *ctx) {
|
|
|
|
/* 64 bit :
|
|
|
|
* subs dst_lo, dst_lo, src_lo
|
|
|
|
* sbc dst_hi, dst_hi, src_hi
|
|
|
|
* 32 bit :
|
|
|
|
* sub dst_lo, dst_lo, src_lo
|
2012-03-16 12:37:12 +00:00
|
|
|
*/
|
2017-08-22 06:32:33 +00:00
|
|
|
if (!hi && is64)
|
|
|
|
emit(ARM_SUBS_R(dst, dst, src), ctx);
|
|
|
|
else if (hi && is64)
|
|
|
|
emit(ARM_SBC_R(dst, dst, src), ctx);
|
|
|
|
else
|
|
|
|
emit(ARM_SUB_R(dst, dst, src), ctx);
|
|
|
|
}
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64,
|
|
|
|
const bool hi, const u8 op, struct jit_ctx *ctx){
|
|
|
|
switch (BPF_OP(op)) {
|
|
|
|
/* dst = dst + src */
|
|
|
|
case BPF_ADD:
|
|
|
|
emit_a32_add_r(dst, src, is64, hi, ctx);
|
|
|
|
break;
|
|
|
|
/* dst = dst - src */
|
|
|
|
case BPF_SUB:
|
|
|
|
emit_a32_sub_r(dst, src, is64, hi, ctx);
|
|
|
|
break;
|
|
|
|
/* dst = dst | src */
|
|
|
|
case BPF_OR:
|
|
|
|
emit(ARM_ORR_R(dst, dst, src), ctx);
|
|
|
|
break;
|
|
|
|
/* dst = dst & src */
|
|
|
|
case BPF_AND:
|
|
|
|
emit(ARM_AND_R(dst, dst, src), ctx);
|
|
|
|
break;
|
|
|
|
/* dst = dst ^ src */
|
|
|
|
case BPF_XOR:
|
|
|
|
emit(ARM_EOR_R(dst, dst, src), ctx);
|
|
|
|
break;
|
|
|
|
/* dst = dst * src */
|
|
|
|
case BPF_MUL:
|
|
|
|
emit(ARM_MUL(dst, dst, src), ctx);
|
|
|
|
break;
|
|
|
|
/* dst = dst << src */
|
|
|
|
case BPF_LSH:
|
|
|
|
emit(ARM_LSL_R(dst, dst, src), ctx);
|
|
|
|
break;
|
|
|
|
/* dst = dst >> src */
|
|
|
|
case BPF_RSH:
|
|
|
|
emit(ARM_LSR_R(dst, dst, src), ctx);
|
|
|
|
break;
|
|
|
|
/* dst = dst >> src (signed)*/
|
|
|
|
case BPF_ARSH:
|
|
|
|
emit(ARM_MOV_SR(dst, dst, SRTYPE_ASR, src), ctx);
|
|
|
|
break;
|
|
|
|
}
|
2012-03-16 12:37:12 +00:00
|
|
|
}
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* ALU operation (64 bit) */
|
2018-07-11 09:31:41 +00:00
|
|
|
static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
|
2018-07-11 09:31:47 +00:00
|
|
|
const s8 src[], struct jit_ctx *ctx,
|
2017-08-22 06:32:33 +00:00
|
|
|
const u8 op) {
|
ARM: net: bpf: improve 64-bit ALU implementation
Improbe the 64-bit ALU implementation from:
movw r8, #65532
movt r8, #65535
movw r9, #65535
movt r9, #65535
ldr r7, [fp, #-44]
adds r7, r7, r8
str r7, [fp, #-44]
ldr r7, [fp, #-40]
adc r7, r7, r9
str r7, [fp, #-40]
to:
movw r8, #65532
movt r8, #65535
movw r9, #65535
movt r9, #65535
ldrd r6, [fp, #-44]
adds r6, r6, r8
adc r7, r7, r9
strd r6, [fp, #-44]
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:56 +00:00
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
|
|
|
const s8 *tmp2 = bpf2a32[TMP_REG_2];
|
|
|
|
const s8 *rd;
|
|
|
|
|
|
|
|
rd = arm_bpf_get_reg64(dst, tmp, ctx);
|
|
|
|
if (is64) {
|
|
|
|
const s8 *rs;
|
|
|
|
|
|
|
|
rs = arm_bpf_get_reg64(src, tmp2, ctx);
|
|
|
|
|
|
|
|
/* ALU operation */
|
|
|
|
emit_alu_r(rd[1], rs[1], true, false, op, ctx);
|
|
|
|
emit_alu_r(rd[0], rs[0], true, true, op, ctx);
|
|
|
|
} else {
|
|
|
|
s8 rs;
|
|
|
|
|
|
|
|
rs = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
|
|
|
|
|
|
|
|
/* ALU operation */
|
|
|
|
emit_alu_r(rd[1], rs, true, false, op, ctx);
|
2019-05-24 22:25:22 +00:00
|
|
|
if (!ctx->prog->aux->verifier_zext)
|
|
|
|
emit_a32_mov_i(rd[0], 0, ctx);
|
ARM: net: bpf: improve 64-bit ALU implementation
Improbe the 64-bit ALU implementation from:
movw r8, #65532
movt r8, #65535
movw r9, #65535
movt r9, #65535
ldr r7, [fp, #-44]
adds r7, r7, r8
str r7, [fp, #-44]
ldr r7, [fp, #-40]
adc r7, r7, r9
str r7, [fp, #-40]
to:
movw r8, #65532
movt r8, #65535
movw r9, #65535
movt r9, #65535
ldrd r6, [fp, #-44]
adds r6, r6, r8
adc r7, r7, r9
strd r6, [fp, #-44]
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
arm_bpf_put_reg64(dst, rd, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2018-07-11 09:31:52 +00:00
|
|
|
/* dst = src (4 bytes)*/
|
arm32, bpf: Reimplement sign-extension mov instruction
The current implementation of the mov instruction with sign extension has the
following problems:
1. It clobbers the source register if it is not stacked because it
sign extends the source and then moves it to the destination.
2. If the dst_reg is stacked, the current code doesn't write the value
back in case of 64-bit mov.
3. There is room for improvement by emitting fewer instructions.
The steps for fixing this and the instructions emitted by the JIT are explained
below with examples in all combinations:
Case A: offset == 32:
=====================
Case A.1: src and dst are stacked registers:
--------------------------------------------
1. Load src_lo into tmp_lo
2. Store tmp_lo into dst_lo
3. Sign extend tmp_lo into tmp_hi
4. Store tmp_hi to dst_hi
Example: r3 = (s32)r3
r3 is a stacked register
ldr r6, [r11, #-16] // Load r3_lo into tmp_lo
// str to dst_lo is not emitted because src_lo == dst_lo
asr r7, r6, #31 // Sign extend tmp_lo into tmp_hi
str r7, [r11, #-12] // Store tmp_hi into r3_hi
Case A.2: src is stacked but dst is not:
----------------------------------------
1. Load src_lo into dst_lo
2. Sign extend dst_lo into dst_hi
Example: r6 = (s32)r3
r6 maps to {ARM_R5, ARM_R4} and r3 is stacked
ldr r4, [r11, #-16] // Load r3_lo into r6_lo
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case A.3: src is not stacked but dst is stacked:
------------------------------------------------
1. Store src_lo into dst_lo
2. Sign extend src_lo into tmp_hi
3. Store tmp_hi to dst_hi
Example: r3 = (s32)r6
r3 is stacked and r6 maps to {ARM_R5, ARM_R4}
str r4, [r11, #-16] // Store r6_lo to r3_lo
asr r7, r4, #31 // Sign extend r6_lo into tmp_hi
str r7, [r11, #-12] // Store tmp_hi to dest_hi
Case A.4: Both src and dst are not stacked:
-------------------------------------------
1. Mov src_lo into dst_lo
2. Sign extend src_lo into dst_hi
Example: (bf) r6 = (s32)r6
r6 maps to {ARM_R5, ARM_R4}
// Mov not emitted because dst == src
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case B: offset != 32:
=====================
Case B.1: src and dst are stacked registers:
--------------------------------------------
1. Load src_lo into tmp_lo
2. Sign extend tmp_lo according to offset.
3. Store tmp_lo into dst_lo
4. Sign extend tmp_lo into tmp_hi
5. Store tmp_hi to dst_hi
Example: r9 = (s8)r3
r9 and r3 are both stacked registers
ldr r6, [r11, #-16] // Load r3_lo into tmp_lo
lsl r6, r6, #24 // Sign extend tmp_lo
asr r6, r6, #24 // ..
str r6, [r11, #-56] // Store tmp_lo to r9_lo
asr r7, r6, #31 // Sign extend tmp_lo to tmp_hi
str r7, [r11, #-52] // Store tmp_hi to r9_hi
Case B.2: src is stacked but dst is not:
----------------------------------------
1. Load src_lo into dst_lo
2. Sign extend dst_lo according to offset.
3. Sign extend tmp_lo into dst_hi
Example: r6 = (s8)r3
r6 maps to {ARM_R5, ARM_R4} and r3 is stacked
ldr r4, [r11, #-16] // Load r3_lo to r6_lo
lsl r4, r4, #24 // Sign extend r6_lo
asr r4, r4, #24 // ..
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case B.3: src is not stacked but dst is stacked:
------------------------------------------------
1. Sign extend src_lo into tmp_lo according to offset.
2. Store tmp_lo into dst_lo.
3. Sign extend src_lo into tmp_hi.
4. Store tmp_hi to dst_hi.
Example: r3 = (s8)r1
r3 is stacked and r1 maps to {ARM_R3, ARM_R2}
lsl r6, r2, #24 // Sign extend r1_lo to tmp_lo
asr r6, r6, #24 // ..
str r6, [r11, #-16] // Store tmp_lo to r3_lo
asr r7, r6, #31 // Sign extend tmp_lo to tmp_hi
str r7, [r11, #-12] // Store tmp_hi to r3_hi
Case B.4: Both src and dst are not stacked:
-------------------------------------------
1. Sign extend src_lo into dst_lo according to offset.
2. Sign extend dst_lo into dst_hi.
Example: r6 = (s8)r1
r6 maps to {ARM_R5, ARM_R4} and r1 maps to {ARM_R3, ARM_R2}
lsl r4, r2, #24 // Sign extend r1_lo to r6_lo
asr r4, r4, #24 // ..
asr r5, r4, #31 // Sign extend r6_lo to r6_hi
Fixes: fc832653fa0d ("arm32, bpf: add support for sign-extension mov instruction")
Reported-by: syzbot+186522670e6722692d86@syzkaller.appspotmail.com
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Closes: https://lore.kernel.org/all/000000000000e9a8d80615163f2a@google.com
Link: https://lore.kernel.org/bpf/20240419182832.27707-1-puranjay@kernel.org
2024-04-19 18:28:32 +00:00
|
|
|
static inline void emit_a32_mov_r(const s8 dst, const s8 src, struct jit_ctx *ctx) {
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
2018-07-11 09:31:52 +00:00
|
|
|
s8 rt;
|
2017-08-22 06:32:33 +00:00
|
|
|
|
2018-07-11 09:31:52 +00:00
|
|
|
rt = arm_bpf_get_reg32(src, tmp[0], ctx);
|
|
|
|
arm_bpf_put_reg32(dst, rt, ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
}
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* dst = src */
|
2018-07-11 09:31:41 +00:00
|
|
|
static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
|
2018-07-11 09:31:47 +00:00
|
|
|
const s8 src[],
|
|
|
|
struct jit_ctx *ctx) {
|
2018-07-11 09:32:38 +00:00
|
|
|
if (!is64) {
|
arm32, bpf: Reimplement sign-extension mov instruction
The current implementation of the mov instruction with sign extension has the
following problems:
1. It clobbers the source register if it is not stacked because it
sign extends the source and then moves it to the destination.
2. If the dst_reg is stacked, the current code doesn't write the value
back in case of 64-bit mov.
3. There is room for improvement by emitting fewer instructions.
The steps for fixing this and the instructions emitted by the JIT are explained
below with examples in all combinations:
Case A: offset == 32:
=====================
Case A.1: src and dst are stacked registers:
--------------------------------------------
1. Load src_lo into tmp_lo
2. Store tmp_lo into dst_lo
3. Sign extend tmp_lo into tmp_hi
4. Store tmp_hi to dst_hi
Example: r3 = (s32)r3
r3 is a stacked register
ldr r6, [r11, #-16] // Load r3_lo into tmp_lo
// str to dst_lo is not emitted because src_lo == dst_lo
asr r7, r6, #31 // Sign extend tmp_lo into tmp_hi
str r7, [r11, #-12] // Store tmp_hi into r3_hi
Case A.2: src is stacked but dst is not:
----------------------------------------
1. Load src_lo into dst_lo
2. Sign extend dst_lo into dst_hi
Example: r6 = (s32)r3
r6 maps to {ARM_R5, ARM_R4} and r3 is stacked
ldr r4, [r11, #-16] // Load r3_lo into r6_lo
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case A.3: src is not stacked but dst is stacked:
------------------------------------------------
1. Store src_lo into dst_lo
2. Sign extend src_lo into tmp_hi
3. Store tmp_hi to dst_hi
Example: r3 = (s32)r6
r3 is stacked and r6 maps to {ARM_R5, ARM_R4}
str r4, [r11, #-16] // Store r6_lo to r3_lo
asr r7, r4, #31 // Sign extend r6_lo into tmp_hi
str r7, [r11, #-12] // Store tmp_hi to dest_hi
Case A.4: Both src and dst are not stacked:
-------------------------------------------
1. Mov src_lo into dst_lo
2. Sign extend src_lo into dst_hi
Example: (bf) r6 = (s32)r6
r6 maps to {ARM_R5, ARM_R4}
// Mov not emitted because dst == src
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case B: offset != 32:
=====================
Case B.1: src and dst are stacked registers:
--------------------------------------------
1. Load src_lo into tmp_lo
2. Sign extend tmp_lo according to offset.
3. Store tmp_lo into dst_lo
4. Sign extend tmp_lo into tmp_hi
5. Store tmp_hi to dst_hi
Example: r9 = (s8)r3
r9 and r3 are both stacked registers
ldr r6, [r11, #-16] // Load r3_lo into tmp_lo
lsl r6, r6, #24 // Sign extend tmp_lo
asr r6, r6, #24 // ..
str r6, [r11, #-56] // Store tmp_lo to r9_lo
asr r7, r6, #31 // Sign extend tmp_lo to tmp_hi
str r7, [r11, #-52] // Store tmp_hi to r9_hi
Case B.2: src is stacked but dst is not:
----------------------------------------
1. Load src_lo into dst_lo
2. Sign extend dst_lo according to offset.
3. Sign extend tmp_lo into dst_hi
Example: r6 = (s8)r3
r6 maps to {ARM_R5, ARM_R4} and r3 is stacked
ldr r4, [r11, #-16] // Load r3_lo to r6_lo
lsl r4, r4, #24 // Sign extend r6_lo
asr r4, r4, #24 // ..
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case B.3: src is not stacked but dst is stacked:
------------------------------------------------
1. Sign extend src_lo into tmp_lo according to offset.
2. Store tmp_lo into dst_lo.
3. Sign extend src_lo into tmp_hi.
4. Store tmp_hi to dst_hi.
Example: r3 = (s8)r1
r3 is stacked and r1 maps to {ARM_R3, ARM_R2}
lsl r6, r2, #24 // Sign extend r1_lo to tmp_lo
asr r6, r6, #24 // ..
str r6, [r11, #-16] // Store tmp_lo to r3_lo
asr r7, r6, #31 // Sign extend tmp_lo to tmp_hi
str r7, [r11, #-12] // Store tmp_hi to r3_hi
Case B.4: Both src and dst are not stacked:
-------------------------------------------
1. Sign extend src_lo into dst_lo according to offset.
2. Sign extend dst_lo into dst_hi.
Example: r6 = (s8)r1
r6 maps to {ARM_R5, ARM_R4} and r1 maps to {ARM_R3, ARM_R2}
lsl r4, r2, #24 // Sign extend r1_lo to r6_lo
asr r4, r4, #24 // ..
asr r5, r4, #31 // Sign extend r6_lo to r6_hi
Fixes: fc832653fa0d ("arm32, bpf: add support for sign-extension mov instruction")
Reported-by: syzbot+186522670e6722692d86@syzkaller.appspotmail.com
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Closes: https://lore.kernel.org/all/000000000000e9a8d80615163f2a@google.com
Link: https://lore.kernel.org/bpf/20240419182832.27707-1-puranjay@kernel.org
2024-04-19 18:28:32 +00:00
|
|
|
emit_a32_mov_r(dst_lo, src_lo, ctx);
|
2019-05-24 22:25:22 +00:00
|
|
|
if (!ctx->prog->aux->verifier_zext)
|
|
|
|
/* Zero out high 4 bytes */
|
|
|
|
emit_a32_mov_i(dst_hi, 0, ctx);
|
2018-07-11 09:32:38 +00:00
|
|
|
} else if (__LINUX_ARM_ARCH__ < 6 &&
|
|
|
|
ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
|
2017-08-22 06:32:33 +00:00
|
|
|
/* complete 8 byte move */
|
arm32, bpf: Reimplement sign-extension mov instruction
The current implementation of the mov instruction with sign extension has the
following problems:
1. It clobbers the source register if it is not stacked because it
sign extends the source and then moves it to the destination.
2. If the dst_reg is stacked, the current code doesn't write the value
back in case of 64-bit mov.
3. There is room for improvement by emitting fewer instructions.
The steps for fixing this and the instructions emitted by the JIT are explained
below with examples in all combinations:
Case A: offset == 32:
=====================
Case A.1: src and dst are stacked registers:
--------------------------------------------
1. Load src_lo into tmp_lo
2. Store tmp_lo into dst_lo
3. Sign extend tmp_lo into tmp_hi
4. Store tmp_hi to dst_hi
Example: r3 = (s32)r3
r3 is a stacked register
ldr r6, [r11, #-16] // Load r3_lo into tmp_lo
// str to dst_lo is not emitted because src_lo == dst_lo
asr r7, r6, #31 // Sign extend tmp_lo into tmp_hi
str r7, [r11, #-12] // Store tmp_hi into r3_hi
Case A.2: src is stacked but dst is not:
----------------------------------------
1. Load src_lo into dst_lo
2. Sign extend dst_lo into dst_hi
Example: r6 = (s32)r3
r6 maps to {ARM_R5, ARM_R4} and r3 is stacked
ldr r4, [r11, #-16] // Load r3_lo into r6_lo
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case A.3: src is not stacked but dst is stacked:
------------------------------------------------
1. Store src_lo into dst_lo
2. Sign extend src_lo into tmp_hi
3. Store tmp_hi to dst_hi
Example: r3 = (s32)r6
r3 is stacked and r6 maps to {ARM_R5, ARM_R4}
str r4, [r11, #-16] // Store r6_lo to r3_lo
asr r7, r4, #31 // Sign extend r6_lo into tmp_hi
str r7, [r11, #-12] // Store tmp_hi to dest_hi
Case A.4: Both src and dst are not stacked:
-------------------------------------------
1. Mov src_lo into dst_lo
2. Sign extend src_lo into dst_hi
Example: (bf) r6 = (s32)r6
r6 maps to {ARM_R5, ARM_R4}
// Mov not emitted because dst == src
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case B: offset != 32:
=====================
Case B.1: src and dst are stacked registers:
--------------------------------------------
1. Load src_lo into tmp_lo
2. Sign extend tmp_lo according to offset.
3. Store tmp_lo into dst_lo
4. Sign extend tmp_lo into tmp_hi
5. Store tmp_hi to dst_hi
Example: r9 = (s8)r3
r9 and r3 are both stacked registers
ldr r6, [r11, #-16] // Load r3_lo into tmp_lo
lsl r6, r6, #24 // Sign extend tmp_lo
asr r6, r6, #24 // ..
str r6, [r11, #-56] // Store tmp_lo to r9_lo
asr r7, r6, #31 // Sign extend tmp_lo to tmp_hi
str r7, [r11, #-52] // Store tmp_hi to r9_hi
Case B.2: src is stacked but dst is not:
----------------------------------------
1. Load src_lo into dst_lo
2. Sign extend dst_lo according to offset.
3. Sign extend tmp_lo into dst_hi
Example: r6 = (s8)r3
r6 maps to {ARM_R5, ARM_R4} and r3 is stacked
ldr r4, [r11, #-16] // Load r3_lo to r6_lo
lsl r4, r4, #24 // Sign extend r6_lo
asr r4, r4, #24 // ..
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case B.3: src is not stacked but dst is stacked:
------------------------------------------------
1. Sign extend src_lo into tmp_lo according to offset.
2. Store tmp_lo into dst_lo.
3. Sign extend src_lo into tmp_hi.
4. Store tmp_hi to dst_hi.
Example: r3 = (s8)r1
r3 is stacked and r1 maps to {ARM_R3, ARM_R2}
lsl r6, r2, #24 // Sign extend r1_lo to tmp_lo
asr r6, r6, #24 // ..
str r6, [r11, #-16] // Store tmp_lo to r3_lo
asr r7, r6, #31 // Sign extend tmp_lo to tmp_hi
str r7, [r11, #-12] // Store tmp_hi to r3_hi
Case B.4: Both src and dst are not stacked:
-------------------------------------------
1. Sign extend src_lo into dst_lo according to offset.
2. Sign extend dst_lo into dst_hi.
Example: r6 = (s8)r1
r6 maps to {ARM_R5, ARM_R4} and r1 maps to {ARM_R3, ARM_R2}
lsl r4, r2, #24 // Sign extend r1_lo to r6_lo
asr r4, r4, #24 // ..
asr r5, r4, #31 // Sign extend r6_lo to r6_hi
Fixes: fc832653fa0d ("arm32, bpf: add support for sign-extension mov instruction")
Reported-by: syzbot+186522670e6722692d86@syzkaller.appspotmail.com
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Closes: https://lore.kernel.org/all/000000000000e9a8d80615163f2a@google.com
Link: https://lore.kernel.org/bpf/20240419182832.27707-1-puranjay@kernel.org
2024-04-19 18:28:32 +00:00
|
|
|
emit_a32_mov_r(dst_lo, src_lo, ctx);
|
|
|
|
emit_a32_mov_r(dst_hi, src_hi, ctx);
|
2018-07-11 09:32:38 +00:00
|
|
|
} else if (is_stacked(src_lo) && is_stacked(dst_lo)) {
|
|
|
|
const u8 *tmp = bpf2a32[TMP_REG_1];
|
|
|
|
|
|
|
|
emit(ARM_LDRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx);
|
|
|
|
emit(ARM_STRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx);
|
|
|
|
} else if (is_stacked(src_lo)) {
|
|
|
|
emit(ARM_LDRD_I(dst[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx);
|
|
|
|
} else if (is_stacked(dst_lo)) {
|
|
|
|
emit(ARM_STRD_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
} else {
|
2018-07-11 09:32:38 +00:00
|
|
|
emit(ARM_MOV_R(dst[0], src[0]), ctx);
|
|
|
|
emit(ARM_MOV_R(dst[1], src[1]), ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
}
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
2015-05-06 16:31:56 +00:00
|
|
|
|
2023-09-07 23:05:44 +00:00
|
|
|
/* dst = (signed)src */
|
|
|
|
static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[],
|
|
|
|
struct jit_ctx *ctx) {
|
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
arm32, bpf: Reimplement sign-extension mov instruction
The current implementation of the mov instruction with sign extension has the
following problems:
1. It clobbers the source register if it is not stacked because it
sign extends the source and then moves it to the destination.
2. If the dst_reg is stacked, the current code doesn't write the value
back in case of 64-bit mov.
3. There is room for improvement by emitting fewer instructions.
The steps for fixing this and the instructions emitted by the JIT are explained
below with examples in all combinations:
Case A: offset == 32:
=====================
Case A.1: src and dst are stacked registers:
--------------------------------------------
1. Load src_lo into tmp_lo
2. Store tmp_lo into dst_lo
3. Sign extend tmp_lo into tmp_hi
4. Store tmp_hi to dst_hi
Example: r3 = (s32)r3
r3 is a stacked register
ldr r6, [r11, #-16] // Load r3_lo into tmp_lo
// str to dst_lo is not emitted because src_lo == dst_lo
asr r7, r6, #31 // Sign extend tmp_lo into tmp_hi
str r7, [r11, #-12] // Store tmp_hi into r3_hi
Case A.2: src is stacked but dst is not:
----------------------------------------
1. Load src_lo into dst_lo
2. Sign extend dst_lo into dst_hi
Example: r6 = (s32)r3
r6 maps to {ARM_R5, ARM_R4} and r3 is stacked
ldr r4, [r11, #-16] // Load r3_lo into r6_lo
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case A.3: src is not stacked but dst is stacked:
------------------------------------------------
1. Store src_lo into dst_lo
2. Sign extend src_lo into tmp_hi
3. Store tmp_hi to dst_hi
Example: r3 = (s32)r6
r3 is stacked and r6 maps to {ARM_R5, ARM_R4}
str r4, [r11, #-16] // Store r6_lo to r3_lo
asr r7, r4, #31 // Sign extend r6_lo into tmp_hi
str r7, [r11, #-12] // Store tmp_hi to dest_hi
Case A.4: Both src and dst are not stacked:
-------------------------------------------
1. Mov src_lo into dst_lo
2. Sign extend src_lo into dst_hi
Example: (bf) r6 = (s32)r6
r6 maps to {ARM_R5, ARM_R4}
// Mov not emitted because dst == src
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case B: offset != 32:
=====================
Case B.1: src and dst are stacked registers:
--------------------------------------------
1. Load src_lo into tmp_lo
2. Sign extend tmp_lo according to offset.
3. Store tmp_lo into dst_lo
4. Sign extend tmp_lo into tmp_hi
5. Store tmp_hi to dst_hi
Example: r9 = (s8)r3
r9 and r3 are both stacked registers
ldr r6, [r11, #-16] // Load r3_lo into tmp_lo
lsl r6, r6, #24 // Sign extend tmp_lo
asr r6, r6, #24 // ..
str r6, [r11, #-56] // Store tmp_lo to r9_lo
asr r7, r6, #31 // Sign extend tmp_lo to tmp_hi
str r7, [r11, #-52] // Store tmp_hi to r9_hi
Case B.2: src is stacked but dst is not:
----------------------------------------
1. Load src_lo into dst_lo
2. Sign extend dst_lo according to offset.
3. Sign extend tmp_lo into dst_hi
Example: r6 = (s8)r3
r6 maps to {ARM_R5, ARM_R4} and r3 is stacked
ldr r4, [r11, #-16] // Load r3_lo to r6_lo
lsl r4, r4, #24 // Sign extend r6_lo
asr r4, r4, #24 // ..
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case B.3: src is not stacked but dst is stacked:
------------------------------------------------
1. Sign extend src_lo into tmp_lo according to offset.
2. Store tmp_lo into dst_lo.
3. Sign extend src_lo into tmp_hi.
4. Store tmp_hi to dst_hi.
Example: r3 = (s8)r1
r3 is stacked and r1 maps to {ARM_R3, ARM_R2}
lsl r6, r2, #24 // Sign extend r1_lo to tmp_lo
asr r6, r6, #24 // ..
str r6, [r11, #-16] // Store tmp_lo to r3_lo
asr r7, r6, #31 // Sign extend tmp_lo to tmp_hi
str r7, [r11, #-12] // Store tmp_hi to r3_hi
Case B.4: Both src and dst are not stacked:
-------------------------------------------
1. Sign extend src_lo into dst_lo according to offset.
2. Sign extend dst_lo into dst_hi.
Example: r6 = (s8)r1
r6 maps to {ARM_R5, ARM_R4} and r1 maps to {ARM_R3, ARM_R2}
lsl r4, r2, #24 // Sign extend r1_lo to r6_lo
asr r4, r4, #24 // ..
asr r5, r4, #31 // Sign extend r6_lo to r6_hi
Fixes: fc832653fa0d ("arm32, bpf: add support for sign-extension mov instruction")
Reported-by: syzbot+186522670e6722692d86@syzkaller.appspotmail.com
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Closes: https://lore.kernel.org/all/000000000000e9a8d80615163f2a@google.com
Link: https://lore.kernel.org/bpf/20240419182832.27707-1-puranjay@kernel.org
2024-04-19 18:28:32 +00:00
|
|
|
s8 rs;
|
|
|
|
s8 rd;
|
|
|
|
|
|
|
|
if (is_stacked(dst_lo))
|
|
|
|
rd = tmp[1];
|
|
|
|
else
|
|
|
|
rd = dst_lo;
|
|
|
|
rs = arm_bpf_get_reg32(src_lo, rd, ctx);
|
|
|
|
/* rs may be one of src[1], dst[1], or tmp[1] */
|
|
|
|
|
|
|
|
/* Sign extend rs if needed. If off == 32, lower 32-bits of src are moved to dst and sign
|
|
|
|
* extension only happens in the upper 64 bits.
|
|
|
|
*/
|
|
|
|
if (off != 32) {
|
|
|
|
/* Sign extend rs into rd */
|
|
|
|
emit(ARM_LSL_I(rd, rs, 32 - off), ctx);
|
|
|
|
emit(ARM_ASR_I(rd, rd, 32 - off), ctx);
|
|
|
|
} else {
|
|
|
|
rd = rs;
|
|
|
|
}
|
2023-09-07 23:05:44 +00:00
|
|
|
|
arm32, bpf: Reimplement sign-extension mov instruction
The current implementation of the mov instruction with sign extension has the
following problems:
1. It clobbers the source register if it is not stacked because it
sign extends the source and then moves it to the destination.
2. If the dst_reg is stacked, the current code doesn't write the value
back in case of 64-bit mov.
3. There is room for improvement by emitting fewer instructions.
The steps for fixing this and the instructions emitted by the JIT are explained
below with examples in all combinations:
Case A: offset == 32:
=====================
Case A.1: src and dst are stacked registers:
--------------------------------------------
1. Load src_lo into tmp_lo
2. Store tmp_lo into dst_lo
3. Sign extend tmp_lo into tmp_hi
4. Store tmp_hi to dst_hi
Example: r3 = (s32)r3
r3 is a stacked register
ldr r6, [r11, #-16] // Load r3_lo into tmp_lo
// str to dst_lo is not emitted because src_lo == dst_lo
asr r7, r6, #31 // Sign extend tmp_lo into tmp_hi
str r7, [r11, #-12] // Store tmp_hi into r3_hi
Case A.2: src is stacked but dst is not:
----------------------------------------
1. Load src_lo into dst_lo
2. Sign extend dst_lo into dst_hi
Example: r6 = (s32)r3
r6 maps to {ARM_R5, ARM_R4} and r3 is stacked
ldr r4, [r11, #-16] // Load r3_lo into r6_lo
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case A.3: src is not stacked but dst is stacked:
------------------------------------------------
1. Store src_lo into dst_lo
2. Sign extend src_lo into tmp_hi
3. Store tmp_hi to dst_hi
Example: r3 = (s32)r6
r3 is stacked and r6 maps to {ARM_R5, ARM_R4}
str r4, [r11, #-16] // Store r6_lo to r3_lo
asr r7, r4, #31 // Sign extend r6_lo into tmp_hi
str r7, [r11, #-12] // Store tmp_hi to dest_hi
Case A.4: Both src and dst are not stacked:
-------------------------------------------
1. Mov src_lo into dst_lo
2. Sign extend src_lo into dst_hi
Example: (bf) r6 = (s32)r6
r6 maps to {ARM_R5, ARM_R4}
// Mov not emitted because dst == src
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case B: offset != 32:
=====================
Case B.1: src and dst are stacked registers:
--------------------------------------------
1. Load src_lo into tmp_lo
2. Sign extend tmp_lo according to offset.
3. Store tmp_lo into dst_lo
4. Sign extend tmp_lo into tmp_hi
5. Store tmp_hi to dst_hi
Example: r9 = (s8)r3
r9 and r3 are both stacked registers
ldr r6, [r11, #-16] // Load r3_lo into tmp_lo
lsl r6, r6, #24 // Sign extend tmp_lo
asr r6, r6, #24 // ..
str r6, [r11, #-56] // Store tmp_lo to r9_lo
asr r7, r6, #31 // Sign extend tmp_lo to tmp_hi
str r7, [r11, #-52] // Store tmp_hi to r9_hi
Case B.2: src is stacked but dst is not:
----------------------------------------
1. Load src_lo into dst_lo
2. Sign extend dst_lo according to offset.
3. Sign extend tmp_lo into dst_hi
Example: r6 = (s8)r3
r6 maps to {ARM_R5, ARM_R4} and r3 is stacked
ldr r4, [r11, #-16] // Load r3_lo to r6_lo
lsl r4, r4, #24 // Sign extend r6_lo
asr r4, r4, #24 // ..
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case B.3: src is not stacked but dst is stacked:
------------------------------------------------
1. Sign extend src_lo into tmp_lo according to offset.
2. Store tmp_lo into dst_lo.
3. Sign extend src_lo into tmp_hi.
4. Store tmp_hi to dst_hi.
Example: r3 = (s8)r1
r3 is stacked and r1 maps to {ARM_R3, ARM_R2}
lsl r6, r2, #24 // Sign extend r1_lo to tmp_lo
asr r6, r6, #24 // ..
str r6, [r11, #-16] // Store tmp_lo to r3_lo
asr r7, r6, #31 // Sign extend tmp_lo to tmp_hi
str r7, [r11, #-12] // Store tmp_hi to r3_hi
Case B.4: Both src and dst are not stacked:
-------------------------------------------
1. Sign extend src_lo into dst_lo according to offset.
2. Sign extend dst_lo into dst_hi.
Example: r6 = (s8)r1
r6 maps to {ARM_R5, ARM_R4} and r1 maps to {ARM_R3, ARM_R2}
lsl r4, r2, #24 // Sign extend r1_lo to r6_lo
asr r4, r4, #24 // ..
asr r5, r4, #31 // Sign extend r6_lo to r6_hi
Fixes: fc832653fa0d ("arm32, bpf: add support for sign-extension mov instruction")
Reported-by: syzbot+186522670e6722692d86@syzkaller.appspotmail.com
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Closes: https://lore.kernel.org/all/000000000000e9a8d80615163f2a@google.com
Link: https://lore.kernel.org/bpf/20240419182832.27707-1-puranjay@kernel.org
2024-04-19 18:28:32 +00:00
|
|
|
/* Write rd to dst_lo
|
|
|
|
*
|
|
|
|
* Optimization:
|
|
|
|
* Assume:
|
|
|
|
* 1. dst == src and stacked.
|
|
|
|
* 2. off == 32
|
|
|
|
*
|
|
|
|
* In this case src_lo was loaded into rd(tmp[1]) but rd was not sign extended as off==32.
|
|
|
|
* So, we don't need to write rd back to dst_lo as they have the same value.
|
|
|
|
* This saves us one str instruction.
|
|
|
|
*/
|
|
|
|
if (dst_lo != src_lo || off != 32)
|
|
|
|
arm_bpf_put_reg32(dst_lo, rd, ctx);
|
2023-09-07 23:05:44 +00:00
|
|
|
|
|
|
|
if (!is64) {
|
|
|
|
if (!ctx->prog->aux->verifier_zext)
|
|
|
|
/* Zero out high 4 bytes */
|
|
|
|
emit_a32_mov_i(dst_hi, 0, ctx);
|
|
|
|
} else {
|
arm32, bpf: Reimplement sign-extension mov instruction
The current implementation of the mov instruction with sign extension has the
following problems:
1. It clobbers the source register if it is not stacked because it
sign extends the source and then moves it to the destination.
2. If the dst_reg is stacked, the current code doesn't write the value
back in case of 64-bit mov.
3. There is room for improvement by emitting fewer instructions.
The steps for fixing this and the instructions emitted by the JIT are explained
below with examples in all combinations:
Case A: offset == 32:
=====================
Case A.1: src and dst are stacked registers:
--------------------------------------------
1. Load src_lo into tmp_lo
2. Store tmp_lo into dst_lo
3. Sign extend tmp_lo into tmp_hi
4. Store tmp_hi to dst_hi
Example: r3 = (s32)r3
r3 is a stacked register
ldr r6, [r11, #-16] // Load r3_lo into tmp_lo
// str to dst_lo is not emitted because src_lo == dst_lo
asr r7, r6, #31 // Sign extend tmp_lo into tmp_hi
str r7, [r11, #-12] // Store tmp_hi into r3_hi
Case A.2: src is stacked but dst is not:
----------------------------------------
1. Load src_lo into dst_lo
2. Sign extend dst_lo into dst_hi
Example: r6 = (s32)r3
r6 maps to {ARM_R5, ARM_R4} and r3 is stacked
ldr r4, [r11, #-16] // Load r3_lo into r6_lo
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case A.3: src is not stacked but dst is stacked:
------------------------------------------------
1. Store src_lo into dst_lo
2. Sign extend src_lo into tmp_hi
3. Store tmp_hi to dst_hi
Example: r3 = (s32)r6
r3 is stacked and r6 maps to {ARM_R5, ARM_R4}
str r4, [r11, #-16] // Store r6_lo to r3_lo
asr r7, r4, #31 // Sign extend r6_lo into tmp_hi
str r7, [r11, #-12] // Store tmp_hi to dest_hi
Case A.4: Both src and dst are not stacked:
-------------------------------------------
1. Mov src_lo into dst_lo
2. Sign extend src_lo into dst_hi
Example: (bf) r6 = (s32)r6
r6 maps to {ARM_R5, ARM_R4}
// Mov not emitted because dst == src
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case B: offset != 32:
=====================
Case B.1: src and dst are stacked registers:
--------------------------------------------
1. Load src_lo into tmp_lo
2. Sign extend tmp_lo according to offset.
3. Store tmp_lo into dst_lo
4. Sign extend tmp_lo into tmp_hi
5. Store tmp_hi to dst_hi
Example: r9 = (s8)r3
r9 and r3 are both stacked registers
ldr r6, [r11, #-16] // Load r3_lo into tmp_lo
lsl r6, r6, #24 // Sign extend tmp_lo
asr r6, r6, #24 // ..
str r6, [r11, #-56] // Store tmp_lo to r9_lo
asr r7, r6, #31 // Sign extend tmp_lo to tmp_hi
str r7, [r11, #-52] // Store tmp_hi to r9_hi
Case B.2: src is stacked but dst is not:
----------------------------------------
1. Load src_lo into dst_lo
2. Sign extend dst_lo according to offset.
3. Sign extend tmp_lo into dst_hi
Example: r6 = (s8)r3
r6 maps to {ARM_R5, ARM_R4} and r3 is stacked
ldr r4, [r11, #-16] // Load r3_lo to r6_lo
lsl r4, r4, #24 // Sign extend r6_lo
asr r4, r4, #24 // ..
asr r5, r4, #31 // Sign extend r6_lo into r6_hi
Case B.3: src is not stacked but dst is stacked:
------------------------------------------------
1. Sign extend src_lo into tmp_lo according to offset.
2. Store tmp_lo into dst_lo.
3. Sign extend src_lo into tmp_hi.
4. Store tmp_hi to dst_hi.
Example: r3 = (s8)r1
r3 is stacked and r1 maps to {ARM_R3, ARM_R2}
lsl r6, r2, #24 // Sign extend r1_lo to tmp_lo
asr r6, r6, #24 // ..
str r6, [r11, #-16] // Store tmp_lo to r3_lo
asr r7, r6, #31 // Sign extend tmp_lo to tmp_hi
str r7, [r11, #-12] // Store tmp_hi to r3_hi
Case B.4: Both src and dst are not stacked:
-------------------------------------------
1. Sign extend src_lo into dst_lo according to offset.
2. Sign extend dst_lo into dst_hi.
Example: r6 = (s8)r1
r6 maps to {ARM_R5, ARM_R4} and r1 maps to {ARM_R3, ARM_R2}
lsl r4, r2, #24 // Sign extend r1_lo to r6_lo
asr r4, r4, #24 // ..
asr r5, r4, #31 // Sign extend r6_lo to r6_hi
Fixes: fc832653fa0d ("arm32, bpf: add support for sign-extension mov instruction")
Reported-by: syzbot+186522670e6722692d86@syzkaller.appspotmail.com
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Closes: https://lore.kernel.org/all/000000000000e9a8d80615163f2a@google.com
Link: https://lore.kernel.org/bpf/20240419182832.27707-1-puranjay@kernel.org
2024-04-19 18:28:32 +00:00
|
|
|
if (is_stacked(dst_hi)) {
|
|
|
|
emit(ARM_ASR_I(tmp[0], rd, 31), ctx);
|
|
|
|
arm_bpf_put_reg32(dst_hi, tmp[0], ctx);
|
|
|
|
} else {
|
|
|
|
emit(ARM_ASR_I(dst_hi, rd, 31), ctx);
|
|
|
|
}
|
2023-09-07 23:05:44 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* Shift operations */
|
2018-07-11 09:31:47 +00:00
|
|
|
static inline void emit_a32_alu_i(const s8 dst, const u32 val,
|
2017-08-22 06:32:33 +00:00
|
|
|
struct jit_ctx *ctx, const u8 op) {
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
2018-07-11 09:31:52 +00:00
|
|
|
s8 rd;
|
2017-08-22 06:32:33 +00:00
|
|
|
|
2018-07-11 09:31:52 +00:00
|
|
|
rd = arm_bpf_get_reg32(dst, tmp[0], ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* Do shift operation */
|
|
|
|
switch (op) {
|
|
|
|
case BPF_LSH:
|
|
|
|
emit(ARM_LSL_I(rd, rd, val), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_RSH:
|
|
|
|
emit(ARM_LSR_I(rd, rd, val), ctx);
|
|
|
|
break;
|
bpf, arm: Optimize ALU ARSH K using asr immediate instruction
This patch adds an optimization that uses the asr immediate instruction
for BPF_ALU BPF_ARSH BPF_K, rather than loading the immediate to
a temporary register. This is similar to existing code for handling
BPF_ALU BPF_{LSH,RSH} BPF_K. This optimization saves two instructions
and is more consistent with LSH and RSH.
Example of the code generated for BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5)
before the optimization:
2c: mov r8, #5
30: mov r9, #0
34: asr r0, r0, r8
and after optimization:
2c: asr r0, r0, #5
Tested on QEMU using lib/test_bpf and test_verifier.
Co-developed-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200501020210.32294-3-luke.r.nels@gmail.com
2020-05-01 02:02:10 +00:00
|
|
|
case BPF_ARSH:
|
|
|
|
emit(ARM_ASR_I(rd, rd, val), ctx);
|
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
case BPF_NEG:
|
|
|
|
emit(ARM_RSB_I(rd, rd, val), ctx);
|
|
|
|
break;
|
|
|
|
}
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2018-07-11 09:31:52 +00:00
|
|
|
arm_bpf_put_reg32(dst, rd, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* dst = ~dst (64 bit) */
|
2018-07-11 09:31:47 +00:00
|
|
|
static inline void emit_a32_neg64(const s8 dst[],
|
2017-08-22 06:32:33 +00:00
|
|
|
struct jit_ctx *ctx){
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
2018-07-11 09:31:57 +00:00
|
|
|
const s8 *rd;
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* Setup Operand */
|
2018-07-11 09:31:57 +00:00
|
|
|
rd = arm_bpf_get_reg64(dst, tmp, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* Do Negate Operation */
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_RSBS_I(rd[1], rd[1], 0), ctx);
|
|
|
|
emit(ARM_RSC_I(rd[0], rd[0], 0), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
2018-07-11 09:31:57 +00:00
|
|
|
arm_bpf_put_reg64(dst, rd, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* dst = dst << src */
|
2018-07-11 09:31:47 +00:00
|
|
|
static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[],
|
|
|
|
struct jit_ctx *ctx) {
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
|
|
|
const s8 *tmp2 = bpf2a32[TMP_REG_2];
|
2018-07-11 09:31:57 +00:00
|
|
|
const s8 *rd;
|
|
|
|
s8 rt;
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* Setup Operands */
|
2018-07-11 09:31:52 +00:00
|
|
|
rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
|
2018-07-11 09:31:57 +00:00
|
|
|
rd = arm_bpf_get_reg64(dst, tmp, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* Do LSH operation */
|
|
|
|
emit(ARM_SUB_I(ARM_IP, rt, 32), ctx);
|
|
|
|
emit(ARM_RSB_I(tmp2[0], rt, 32), ctx);
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_MOV_SR(ARM_LR, rd[0], SRTYPE_ASL, rt), ctx);
|
|
|
|
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[1], SRTYPE_ASL, ARM_IP), ctx);
|
|
|
|
emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd[1], SRTYPE_LSR, tmp2[0]), ctx);
|
|
|
|
emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_ASL, rt), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
2018-07-11 09:31:52 +00:00
|
|
|
arm_bpf_put_reg32(dst_lo, ARM_LR, ctx);
|
|
|
|
arm_bpf_put_reg32(dst_hi, ARM_IP, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* dst = dst >> src (signed)*/
|
2018-07-11 09:31:47 +00:00
|
|
|
static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[],
|
|
|
|
struct jit_ctx *ctx) {
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
|
|
|
const s8 *tmp2 = bpf2a32[TMP_REG_2];
|
2018-07-11 09:31:57 +00:00
|
|
|
const s8 *rd;
|
|
|
|
s8 rt;
|
2018-07-11 09:31:52 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* Setup Operands */
|
2018-07-11 09:31:52 +00:00
|
|
|
rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
|
2018-07-11 09:31:57 +00:00
|
|
|
rd = arm_bpf_get_reg64(dst, tmp, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* Do the ARSH operation */
|
|
|
|
emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
|
|
|
|
emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx);
|
|
|
|
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx);
|
bpf, arm: Optimize ALU64 ARSH X using orrpl conditional instruction
This patch optimizes the code generated by emit_a32_arsh_r64, which
handles the BPF_ALU64 BPF_ARSH BPF_X instruction.
The original code uses a conditional B followed by an unconditional ORR.
The optimization saves one instruction by removing the B instruction
and using a conditional ORR (with an inverted condition).
Example of the code generated for BPF_ALU64_REG(BPF_ARSH, BPF_REG_0,
BPF_REG_1), before optimization:
34: rsb ip, r2, #32
38: subs r9, r2, #32
3c: lsr lr, r0, r2
40: orr lr, lr, r1, lsl ip
44: bmi 0x4c
48: orr lr, lr, r1, asr r9
4c: asr ip, r1, r2
50: mov r0, lr
54: mov r1, ip
and after optimization:
34: rsb ip, r2, #32
38: subs r9, r2, #32
3c: lsr lr, r0, r2
40: orr lr, lr, r1, lsl ip
44: orrpl lr, lr, r1, asr r9
48: asr ip, r1, r2
4c: mov r0, lr
50: mov r1, ip
Tested on QEMU using lib/test_bpf and test_verifier.
Co-developed-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200501020210.32294-2-luke.r.nels@gmail.com
2020-05-01 02:02:09 +00:00
|
|
|
_emit(ARM_COND_PL,
|
|
|
|
ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx);
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_ASR, rt), ctx);
|
2018-07-11 09:31:52 +00:00
|
|
|
|
|
|
|
arm_bpf_put_reg32(dst_lo, ARM_LR, ctx);
|
|
|
|
arm_bpf_put_reg32(dst_hi, ARM_IP, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* dst = dst >> src */
|
2018-07-11 09:31:47 +00:00
|
|
|
static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[],
|
|
|
|
struct jit_ctx *ctx) {
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
|
|
|
const s8 *tmp2 = bpf2a32[TMP_REG_2];
|
2018-07-11 09:31:57 +00:00
|
|
|
const s8 *rd;
|
|
|
|
s8 rt;
|
2018-07-11 09:31:52 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* Setup Operands */
|
2018-07-11 09:31:52 +00:00
|
|
|
rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
|
2018-07-11 09:31:57 +00:00
|
|
|
rd = arm_bpf_get_reg64(dst, tmp, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
2018-05-11 02:52:17 +00:00
|
|
|
/* Do RSH operation */
|
2017-08-22 06:32:33 +00:00
|
|
|
emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
|
|
|
|
emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx);
|
|
|
|
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx);
|
|
|
|
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_LSR, tmp2[0]), ctx);
|
|
|
|
emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_LSR, rt), ctx);
|
2018-07-11 09:31:52 +00:00
|
|
|
|
|
|
|
arm_bpf_put_reg32(dst_lo, ARM_LR, ctx);
|
|
|
|
arm_bpf_put_reg32(dst_hi, ARM_IP, ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
}
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* dst = dst << val */
|
2018-07-11 09:31:47 +00:00
|
|
|
static inline void emit_a32_lsh_i64(const s8 dst[],
|
|
|
|
const u32 val, struct jit_ctx *ctx){
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
|
|
|
const s8 *tmp2 = bpf2a32[TMP_REG_2];
|
2018-07-11 09:31:57 +00:00
|
|
|
const s8 *rd;
|
2017-08-22 06:32:33 +00:00
|
|
|
|
2018-07-11 09:31:52 +00:00
|
|
|
/* Setup operands */
|
2018-07-11 09:31:57 +00:00
|
|
|
rd = arm_bpf_get_reg64(dst, tmp, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* Do LSH operation */
|
|
|
|
if (val < 32) {
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_MOV_SI(tmp2[0], rd[0], SRTYPE_ASL, val), ctx);
|
|
|
|
emit(ARM_ORR_SI(rd[0], tmp2[0], rd[1], SRTYPE_LSR, 32 - val), ctx);
|
|
|
|
emit(ARM_MOV_SI(rd[1], rd[1], SRTYPE_ASL, val), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
} else {
|
|
|
|
if (val == 32)
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_MOV_R(rd[0], rd[1]), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
else
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_MOV_SI(rd[0], rd[1], SRTYPE_ASL, val - 32), ctx);
|
|
|
|
emit(ARM_EOR_R(rd[1], rd[1], rd[1]), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
|
2018-07-11 09:31:57 +00:00
|
|
|
arm_bpf_put_reg64(dst, rd, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* dst = dst >> val */
|
2018-07-11 09:31:47 +00:00
|
|
|
static inline void emit_a32_rsh_i64(const s8 dst[],
|
2017-08-22 06:32:33 +00:00
|
|
|
const u32 val, struct jit_ctx *ctx) {
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
|
|
|
const s8 *tmp2 = bpf2a32[TMP_REG_2];
|
2018-07-11 09:31:57 +00:00
|
|
|
const s8 *rd;
|
2017-08-22 06:32:33 +00:00
|
|
|
|
2018-07-11 09:31:52 +00:00
|
|
|
/* Setup operands */
|
2018-07-11 09:31:57 +00:00
|
|
|
rd = arm_bpf_get_reg64(dst, tmp, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* Do LSR operation */
|
2020-04-08 18:12:29 +00:00
|
|
|
if (val == 0) {
|
|
|
|
/* An immediate value of 0 encodes a shift amount of 32
|
|
|
|
* for LSR. To shift by 0, don't do anything.
|
|
|
|
*/
|
|
|
|
} else if (val < 32) {
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx);
|
|
|
|
emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx);
|
|
|
|
emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_LSR, val), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
} else if (val == 32) {
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_MOV_R(rd[1], rd[0]), ctx);
|
|
|
|
emit(ARM_MOV_I(rd[0], 0), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
} else {
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_LSR, val - 32), ctx);
|
|
|
|
emit(ARM_MOV_I(rd[0], 0), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
|
2018-07-11 09:31:57 +00:00
|
|
|
arm_bpf_put_reg64(dst, rd, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* dst = dst >> val (signed) */
|
2018-07-11 09:31:47 +00:00
|
|
|
static inline void emit_a32_arsh_i64(const s8 dst[],
|
2017-08-22 06:32:33 +00:00
|
|
|
const u32 val, struct jit_ctx *ctx){
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
|
|
|
const s8 *tmp2 = bpf2a32[TMP_REG_2];
|
2018-07-11 09:31:57 +00:00
|
|
|
const s8 *rd;
|
2017-08-22 06:32:33 +00:00
|
|
|
|
2018-07-11 09:31:52 +00:00
|
|
|
/* Setup operands */
|
2018-07-11 09:31:57 +00:00
|
|
|
rd = arm_bpf_get_reg64(dst, tmp, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* Do ARSH operation */
|
2020-04-08 18:12:29 +00:00
|
|
|
if (val == 0) {
|
|
|
|
/* An immediate value of 0 encodes a shift amount of 32
|
|
|
|
* for ASR. To shift by 0, don't do anything.
|
|
|
|
*/
|
|
|
|
} else if (val < 32) {
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx);
|
|
|
|
emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx);
|
|
|
|
emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, val), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
} else if (val == 32) {
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_MOV_R(rd[1], rd[0]), ctx);
|
|
|
|
emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
} else {
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_ASR, val - 32), ctx);
|
|
|
|
emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
|
2018-07-11 09:31:57 +00:00
|
|
|
arm_bpf_put_reg64(dst, rd, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
|
2018-07-11 09:31:47 +00:00
|
|
|
static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[],
|
|
|
|
struct jit_ctx *ctx) {
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
|
|
|
const s8 *tmp2 = bpf2a32[TMP_REG_2];
|
2018-07-11 09:31:57 +00:00
|
|
|
const s8 *rd, *rt;
|
2018-07-11 09:31:52 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* Setup operands for multiplication */
|
2018-07-11 09:31:57 +00:00
|
|
|
rd = arm_bpf_get_reg64(dst, tmp, ctx);
|
|
|
|
rt = arm_bpf_get_reg64(src, tmp2, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* Do Multiplication */
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_MUL(ARM_IP, rd[1], rt[0]), ctx);
|
|
|
|
emit(ARM_MUL(ARM_LR, rd[0], rt[1]), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx);
|
|
|
|
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_UMULL(ARM_IP, rd[0], rd[1], rt[1]), ctx);
|
|
|
|
emit(ARM_ADD_R(rd[0], ARM_LR, rd[0]), ctx);
|
2018-07-11 09:31:52 +00:00
|
|
|
|
|
|
|
arm_bpf_put_reg32(dst_lo, ARM_IP, ctx);
|
2018-07-11 09:31:57 +00:00
|
|
|
arm_bpf_put_reg32(dst_hi, rd[0], ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
|
2020-04-09 22:17:52 +00:00
|
|
|
static bool is_ldst_imm(s16 off, const u8 size)
|
|
|
|
{
|
|
|
|
s16 off_max = 0;
|
|
|
|
|
|
|
|
switch (size) {
|
|
|
|
case BPF_B:
|
|
|
|
case BPF_W:
|
|
|
|
off_max = 0xfff;
|
|
|
|
break;
|
|
|
|
case BPF_H:
|
|
|
|
off_max = 0xff;
|
|
|
|
break;
|
|
|
|
case BPF_DW:
|
|
|
|
/* Need to make sure off+4 does not overflow. */
|
|
|
|
off_max = 0xfff - 4;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return -off_max <= off && off <= off_max;
|
|
|
|
}
|
|
|
|
|
2023-09-07 23:05:43 +00:00
|
|
|
static bool is_ldst_imm8(s16 off, const u8 size)
|
|
|
|
{
|
|
|
|
s16 off_max = 0;
|
|
|
|
|
|
|
|
switch (size) {
|
|
|
|
case BPF_B:
|
|
|
|
off_max = 0xff;
|
|
|
|
break;
|
|
|
|
case BPF_W:
|
|
|
|
off_max = 0xfff;
|
|
|
|
break;
|
|
|
|
case BPF_H:
|
|
|
|
off_max = 0xff;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return -off_max <= off && off <= off_max;
|
|
|
|
}
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* *(size *)(dst + off) = src */
|
ARM: net: bpf: improve 64-bit store implementation
Improve the 64-bit store implementation from:
ldr r6, [fp, #-8]
str r8, [r6]
ldr r6, [fp, #-8]
mov r7, #4
add r7, r6, r7
str r9, [r7]
to:
ldr r6, [fp, #-8]
str r8, [r6]
str r9, [r6, #4]
We leave the store as two separate STR instructions rather than using
STRD as the store may not be aligned, and STR can handle misalignment.
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:51 +00:00
|
|
|
static inline void emit_str_r(const s8 dst, const s8 src[],
|
2020-04-09 22:17:52 +00:00
|
|
|
s16 off, struct jit_ctx *ctx, const u8 sz){
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
2018-07-11 09:31:52 +00:00
|
|
|
s8 rd;
|
2017-08-22 06:32:33 +00:00
|
|
|
|
2018-07-11 09:31:52 +00:00
|
|
|
rd = arm_bpf_get_reg32(dst, tmp[1], ctx);
|
ARM: net: bpf: improve 64-bit store implementation
Improve the 64-bit store implementation from:
ldr r6, [fp, #-8]
str r8, [r6]
ldr r6, [fp, #-8]
mov r7, #4
add r7, r6, r7
str r9, [r7]
to:
ldr r6, [fp, #-8]
str r8, [r6]
str r9, [r6, #4]
We leave the store as two separate STR instructions rather than using
STRD as the store may not be aligned, and STR can handle misalignment.
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:51 +00:00
|
|
|
|
2020-04-09 22:17:52 +00:00
|
|
|
if (!is_ldst_imm(off, sz)) {
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_mov_i(tmp[0], off, ctx);
|
ARM: net: bpf: improve 64-bit store implementation
Improve the 64-bit store implementation from:
ldr r6, [fp, #-8]
str r8, [r6]
ldr r6, [fp, #-8]
mov r7, #4
add r7, r6, r7
str r9, [r7]
to:
ldr r6, [fp, #-8]
str r8, [r6]
str r9, [r6, #4]
We leave the store as two separate STR instructions rather than using
STRD as the store may not be aligned, and STR can handle misalignment.
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:51 +00:00
|
|
|
emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
rd = tmp[0];
|
ARM: net: bpf: improve 64-bit store implementation
Improve the 64-bit store implementation from:
ldr r6, [fp, #-8]
str r8, [r6]
ldr r6, [fp, #-8]
mov r7, #4
add r7, r6, r7
str r9, [r7]
to:
ldr r6, [fp, #-8]
str r8, [r6]
str r9, [r6, #4]
We leave the store as two separate STR instructions rather than using
STRD as the store may not be aligned, and STR can handle misalignment.
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:51 +00:00
|
|
|
off = 0;
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
switch (sz) {
|
ARM: net: bpf: improve 64-bit store implementation
Improve the 64-bit store implementation from:
ldr r6, [fp, #-8]
str r8, [r6]
ldr r6, [fp, #-8]
mov r7, #4
add r7, r6, r7
str r9, [r7]
to:
ldr r6, [fp, #-8]
str r8, [r6]
str r9, [r6, #4]
We leave the store as two separate STR instructions rather than using
STRD as the store may not be aligned, and STR can handle misalignment.
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:51 +00:00
|
|
|
case BPF_B:
|
|
|
|
/* Store a Byte */
|
|
|
|
emit(ARM_STRB_I(src_lo, rd, off), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
case BPF_H:
|
|
|
|
/* Store a HalfWord */
|
ARM: net: bpf: improve 64-bit store implementation
Improve the 64-bit store implementation from:
ldr r6, [fp, #-8]
str r8, [r6]
ldr r6, [fp, #-8]
mov r7, #4
add r7, r6, r7
str r9, [r7]
to:
ldr r6, [fp, #-8]
str r8, [r6]
str r9, [r6, #4]
We leave the store as two separate STR instructions rather than using
STRD as the store may not be aligned, and STR can handle misalignment.
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:51 +00:00
|
|
|
emit(ARM_STRH_I(src_lo, rd, off), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
ARM: net: bpf: improve 64-bit store implementation
Improve the 64-bit store implementation from:
ldr r6, [fp, #-8]
str r8, [r6]
ldr r6, [fp, #-8]
mov r7, #4
add r7, r6, r7
str r9, [r7]
to:
ldr r6, [fp, #-8]
str r8, [r6]
str r9, [r6, #4]
We leave the store as two separate STR instructions rather than using
STRD as the store may not be aligned, and STR can handle misalignment.
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:51 +00:00
|
|
|
case BPF_W:
|
|
|
|
/* Store a Word */
|
|
|
|
emit(ARM_STR_I(src_lo, rd, off), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_DW:
|
|
|
|
/* Store a Double Word */
|
|
|
|
emit(ARM_STR_I(src_lo, rd, off), ctx);
|
|
|
|
emit(ARM_STR_I(src_hi, rd, off + 4), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* dst = *(size*)(src + off) */
|
2018-07-11 09:31:47 +00:00
|
|
|
static inline void emit_ldx_r(const s8 dst[], const s8 src,
|
2020-04-09 22:17:52 +00:00
|
|
|
s16 off, struct jit_ctx *ctx, const u8 sz){
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
2018-07-11 09:31:47 +00:00
|
|
|
const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
|
2018-07-11 09:31:41 +00:00
|
|
|
s8 rm = src;
|
2018-01-13 21:06:16 +00:00
|
|
|
|
2020-04-09 22:17:52 +00:00
|
|
|
if (!is_ldst_imm(off, sz)) {
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_mov_i(tmp[0], off, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
|
|
|
|
rm = tmp[0];
|
2018-01-13 21:06:16 +00:00
|
|
|
off = 0;
|
|
|
|
} else if (rd[1] == rm) {
|
|
|
|
emit(ARM_MOV_R(tmp[0], rm), ctx);
|
|
|
|
rm = tmp[0];
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
switch (sz) {
|
2018-01-13 21:06:16 +00:00
|
|
|
case BPF_B:
|
|
|
|
/* Load a Byte */
|
|
|
|
emit(ARM_LDRB_I(rd[1], rm, off), ctx);
|
2019-05-24 22:25:22 +00:00
|
|
|
if (!ctx->prog->aux->verifier_zext)
|
|
|
|
emit_a32_mov_i(rd[0], 0, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
case BPF_H:
|
|
|
|
/* Load a HalfWord */
|
2018-01-13 21:06:16 +00:00
|
|
|
emit(ARM_LDRH_I(rd[1], rm, off), ctx);
|
2019-05-24 22:25:22 +00:00
|
|
|
if (!ctx->prog->aux->verifier_zext)
|
|
|
|
emit_a32_mov_i(rd[0], 0, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
2018-01-13 21:06:16 +00:00
|
|
|
case BPF_W:
|
|
|
|
/* Load a Word */
|
|
|
|
emit(ARM_LDR_I(rd[1], rm, off), ctx);
|
2019-05-24 22:25:22 +00:00
|
|
|
if (!ctx->prog->aux->verifier_zext)
|
|
|
|
emit_a32_mov_i(rd[0], 0, ctx);
|
2018-01-13 21:06:16 +00:00
|
|
|
break;
|
|
|
|
case BPF_DW:
|
|
|
|
/* Load a Double Word */
|
|
|
|
emit(ARM_LDR_I(rd[1], rm, off), ctx);
|
|
|
|
emit(ARM_LDR_I(rd[0], rm, off + 4), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
}
|
2018-07-11 09:31:57 +00:00
|
|
|
arm_bpf_put_reg64(dst, rd, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
|
2023-09-07 23:05:43 +00:00
|
|
|
/* dst = *(signed size*)(src + off) */
|
|
|
|
static inline void emit_ldsx_r(const s8 dst[], const s8 src,
|
|
|
|
s16 off, struct jit_ctx *ctx, const u8 sz){
|
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
|
|
|
const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
|
|
|
|
s8 rm = src;
|
|
|
|
int add_off;
|
|
|
|
|
|
|
|
if (!is_ldst_imm8(off, sz)) {
|
|
|
|
/*
|
|
|
|
* offset does not fit in the load/store immediate,
|
|
|
|
* construct an ADD instruction to apply the offset.
|
|
|
|
*/
|
|
|
|
add_off = imm8m(off);
|
|
|
|
if (add_off > 0) {
|
|
|
|
emit(ARM_ADD_I(tmp[0], src, add_off), ctx);
|
|
|
|
rm = tmp[0];
|
|
|
|
} else {
|
|
|
|
emit_a32_mov_i(tmp[0], off, ctx);
|
|
|
|
emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
|
|
|
|
rm = tmp[0];
|
|
|
|
}
|
|
|
|
off = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (sz) {
|
|
|
|
case BPF_B:
|
|
|
|
/* Load a Byte with sign extension*/
|
|
|
|
emit(ARM_LDRSB_I(rd[1], rm, off), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_H:
|
|
|
|
/* Load a HalfWord with sign extension*/
|
|
|
|
emit(ARM_LDRSH_I(rd[1], rm, off), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_W:
|
|
|
|
/* Load a Word*/
|
|
|
|
emit(ARM_LDR_I(rd[1], rm, off), ctx);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* Carry the sign extension to upper 32 bits */
|
|
|
|
emit(ARM_ASR_I(rd[0], rd[1], 31), ctx);
|
|
|
|
arm_bpf_put_reg64(dst, rd, ctx);
|
|
|
|
}
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* Arithmatic Operation */
|
|
|
|
static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
|
2019-01-26 17:26:09 +00:00
|
|
|
const u8 rn, struct jit_ctx *ctx, u8 op,
|
|
|
|
bool is_jmp64) {
|
2017-08-22 06:32:33 +00:00
|
|
|
switch (op) {
|
|
|
|
case BPF_JSET:
|
2019-01-26 17:26:09 +00:00
|
|
|
if (is_jmp64) {
|
|
|
|
emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
|
|
|
|
emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
|
|
|
|
emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
|
|
|
|
} else {
|
|
|
|
emit(ARM_ANDS_R(ARM_IP, rt, rn), ctx);
|
|
|
|
}
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
case BPF_JEQ:
|
|
|
|
case BPF_JNE:
|
|
|
|
case BPF_JGT:
|
|
|
|
case BPF_JGE:
|
|
|
|
case BPF_JLE:
|
|
|
|
case BPF_JLT:
|
2019-01-26 17:26:09 +00:00
|
|
|
if (is_jmp64) {
|
|
|
|
emit(ARM_CMP_R(rd, rm), ctx);
|
|
|
|
/* Only compare low halve if high halve are equal. */
|
|
|
|
_emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx);
|
|
|
|
} else {
|
|
|
|
emit(ARM_CMP_R(rt, rn), ctx);
|
|
|
|
}
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
case BPF_JSLE:
|
|
|
|
case BPF_JSGT:
|
|
|
|
emit(ARM_CMP_R(rn, rt), ctx);
|
2019-01-26 17:26:09 +00:00
|
|
|
if (is_jmp64)
|
|
|
|
emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
case BPF_JSLT:
|
|
|
|
case BPF_JSGE:
|
|
|
|
emit(ARM_CMP_R(rt, rn), ctx);
|
2019-01-26 17:26:09 +00:00
|
|
|
if (is_jmp64)
|
|
|
|
emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int out_offset = -1; /* initialized on the first pass of build_body() */
|
|
|
|
static int emit_bpf_tail_call(struct jit_ctx *ctx)
|
2012-03-16 12:37:12 +00:00
|
|
|
{
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *r2 = bpf2a32[BPF_REG_2];
|
|
|
|
const s8 *r3 = bpf2a32[BPF_REG_3];
|
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
|
|
|
const s8 *tmp2 = bpf2a32[TMP_REG_2];
|
|
|
|
const s8 *tcc = bpf2a32[TCALL_CNT];
|
2018-07-11 09:31:57 +00:00
|
|
|
const s8 *tc;
|
2017-08-22 06:32:33 +00:00
|
|
|
const int idx0 = ctx->idx;
|
|
|
|
#define cur_offset (ctx->idx - idx0)
|
ARM: net: bpf: fix tail call jumps
When a tail call fails, it is documented that the tail call should
continue execution at the following instruction. An example tail call
sequence is:
12: (85) call bpf_tail_call#12
13: (b7) r0 = 0
14: (95) exit
The ARM assembler for the tail call in this case ends up branching to
instruction 14 instead of instruction 13, resulting in the BPF filter
returning a non-zero value:
178: ldr r8, [sp, #588] ; insn 12
17c: ldr r6, [r8, r6]
180: ldr r8, [sp, #580]
184: cmp r8, r6
188: bcs 0x1e8
18c: ldr r6, [sp, #524]
190: ldr r7, [sp, #528]
194: cmp r7, #0
198: cmpeq r6, #32
19c: bhi 0x1e8
1a0: adds r6, r6, #1
1a4: adc r7, r7, #0
1a8: str r6, [sp, #524]
1ac: str r7, [sp, #528]
1b0: mov r6, #104
1b4: ldr r8, [sp, #588]
1b8: add r6, r8, r6
1bc: ldr r8, [sp, #580]
1c0: lsl r7, r8, #2
1c4: ldr r6, [r6, r7]
1c8: cmp r6, #0
1cc: beq 0x1e8
1d0: mov r8, #32
1d4: ldr r6, [r6, r8]
1d8: add r6, r6, #44
1dc: bx r6
1e0: mov r0, #0 ; insn 13
1e4: mov r1, #0
1e8: add sp, sp, #596 ; insn 14
1ec: pop {r4, r5, r6, r7, r8, sl, pc}
For other sequences, the tail call could end up branching midway through
the following BPF instructions, or maybe off the end of the function,
leading to unknown behaviours.
Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler")
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
2018-01-13 11:39:54 +00:00
|
|
|
#define jmp_offset (out_offset - (cur_offset) - 2)
|
2018-07-11 09:32:12 +00:00
|
|
|
u32 lo, hi;
|
2018-07-11 09:31:57 +00:00
|
|
|
s8 r_array, r_index;
|
2018-07-11 09:32:12 +00:00
|
|
|
int off;
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* if (index >= array->map.max_entries)
|
|
|
|
* goto out;
|
|
|
|
*/
|
2018-07-11 09:32:12 +00:00
|
|
|
BUILD_BUG_ON(offsetof(struct bpf_array, map.max_entries) >
|
|
|
|
ARM_INST_LDST__IMM12);
|
2017-08-22 06:32:33 +00:00
|
|
|
off = offsetof(struct bpf_array, map.max_entries);
|
2018-07-11 09:32:28 +00:00
|
|
|
r_array = arm_bpf_get_reg32(r2[1], tmp2[0], ctx);
|
2018-01-13 12:11:26 +00:00
|
|
|
/* index is 32-bit for arrays */
|
2018-07-11 09:31:52 +00:00
|
|
|
r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx);
|
2018-07-11 09:32:28 +00:00
|
|
|
/* array->map.max_entries */
|
|
|
|
emit(ARM_LDR_I(tmp[1], r_array, off), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
/* index >= array->map.max_entries */
|
2018-07-11 09:31:52 +00:00
|
|
|
emit(ARM_CMP_R(r_index, tmp[1]), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
_emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
|
|
|
|
|
2018-07-11 09:32:28 +00:00
|
|
|
/* tmp2[0] = array, tmp2[1] = index */
|
2018-07-11 09:32:22 +00:00
|
|
|
|
bpf: Change value of MAX_TAIL_CALL_CNT from 32 to 33
In the current code, the actual max tail call count is 33 which is greater
than MAX_TAIL_CALL_CNT (defined as 32). The actual limit is not consistent
with the meaning of MAX_TAIL_CALL_CNT and thus confusing at first glance.
We can see the historical evolution from commit 04fd61ab36ec ("bpf: allow
bpf programs to tail-call other bpf programs") and commit f9dabe016b63
("bpf: Undo off-by-one in interpreter tail call count limit"). In order
to avoid changing existing behavior, the actual limit is 33 now, this is
reasonable.
After commit 874be05f525e ("bpf, tests: Add tail call test suite"), we can
see there exists failed testcase.
On all archs when CONFIG_BPF_JIT_ALWAYS_ON is not set:
# echo 0 > /proc/sys/net/core/bpf_jit_enable
# modprobe test_bpf
# dmesg | grep -w FAIL
Tail call error path, max count reached jited:0 ret 34 != 33 FAIL
On some archs:
# echo 1 > /proc/sys/net/core/bpf_jit_enable
# modprobe test_bpf
# dmesg | grep -w FAIL
Tail call error path, max count reached jited:1 ret 34 != 33 FAIL
Although the above failed testcase has been fixed in commit 18935a72eb25
("bpf/tests: Fix error in tail call limit tests"), it would still be good
to change the value of MAX_TAIL_CALL_CNT from 32 to 33 to make the code
more readable.
The 32-bit x86 JIT was using a limit of 32, just fix the wrong comments and
limit to 33 tail calls as the constant MAX_TAIL_CALL_CNT updated. For the
mips64 JIT, use "ori" instead of "addiu" as suggested by Johan Almbladh.
For the riscv JIT, use RV_REG_TCC directly to save one register move as
suggested by Björn Töpel. For the other implementations, no function changes,
it does not change the current limit 33, the new value of MAX_TAIL_CALL_CNT
can reflect the actual max tail call count, the related tail call testcases
in test_bpf module and selftests can work well for the interpreter and the
JIT.
Here are the test results on x86_64:
# uname -m
x86_64
# echo 0 > /proc/sys/net/core/bpf_jit_enable
# modprobe test_bpf test_suite=test_tail_calls
# dmesg | tail -1
test_bpf: test_tail_calls: Summary: 8 PASSED, 0 FAILED, [0/8 JIT'ed]
# rmmod test_bpf
# echo 1 > /proc/sys/net/core/bpf_jit_enable
# modprobe test_bpf test_suite=test_tail_calls
# dmesg | tail -1
test_bpf: test_tail_calls: Summary: 8 PASSED, 0 FAILED, [8/8 JIT'ed]
# rmmod test_bpf
# ./test_progs -t tailcalls
#142 tailcalls:OK
Summary: 1/11 PASSED, 0 SKIPPED, 0 FAILED
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Acked-by: Björn Töpel <bjorn@kernel.org>
Acked-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/bpf/1636075800-3264-1-git-send-email-yangtiezhu@loongson.cn
2021-11-05 01:30:00 +00:00
|
|
|
/*
|
|
|
|
* if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
|
2017-08-22 06:32:33 +00:00
|
|
|
* goto out;
|
|
|
|
* tail_call_cnt++;
|
|
|
|
*/
|
|
|
|
lo = (u32)MAX_TAIL_CALL_CNT;
|
|
|
|
hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
|
2018-07-11 09:31:57 +00:00
|
|
|
tc = arm_bpf_get_reg64(tcc, tmp, ctx);
|
|
|
|
emit(ARM_CMP_I(tc[0], hi), ctx);
|
|
|
|
_emit(ARM_COND_EQ, ARM_CMP_I(tc[1], lo), ctx);
|
bpf: Change value of MAX_TAIL_CALL_CNT from 32 to 33
In the current code, the actual max tail call count is 33 which is greater
than MAX_TAIL_CALL_CNT (defined as 32). The actual limit is not consistent
with the meaning of MAX_TAIL_CALL_CNT and thus confusing at first glance.
We can see the historical evolution from commit 04fd61ab36ec ("bpf: allow
bpf programs to tail-call other bpf programs") and commit f9dabe016b63
("bpf: Undo off-by-one in interpreter tail call count limit"). In order
to avoid changing existing behavior, the actual limit is 33 now, this is
reasonable.
After commit 874be05f525e ("bpf, tests: Add tail call test suite"), we can
see there exists failed testcase.
On all archs when CONFIG_BPF_JIT_ALWAYS_ON is not set:
# echo 0 > /proc/sys/net/core/bpf_jit_enable
# modprobe test_bpf
# dmesg | grep -w FAIL
Tail call error path, max count reached jited:0 ret 34 != 33 FAIL
On some archs:
# echo 1 > /proc/sys/net/core/bpf_jit_enable
# modprobe test_bpf
# dmesg | grep -w FAIL
Tail call error path, max count reached jited:1 ret 34 != 33 FAIL
Although the above failed testcase has been fixed in commit 18935a72eb25
("bpf/tests: Fix error in tail call limit tests"), it would still be good
to change the value of MAX_TAIL_CALL_CNT from 32 to 33 to make the code
more readable.
The 32-bit x86 JIT was using a limit of 32, just fix the wrong comments and
limit to 33 tail calls as the constant MAX_TAIL_CALL_CNT updated. For the
mips64 JIT, use "ori" instead of "addiu" as suggested by Johan Almbladh.
For the riscv JIT, use RV_REG_TCC directly to save one register move as
suggested by Björn Töpel. For the other implementations, no function changes,
it does not change the current limit 33, the new value of MAX_TAIL_CALL_CNT
can reflect the actual max tail call count, the related tail call testcases
in test_bpf module and selftests can work well for the interpreter and the
JIT.
Here are the test results on x86_64:
# uname -m
x86_64
# echo 0 > /proc/sys/net/core/bpf_jit_enable
# modprobe test_bpf test_suite=test_tail_calls
# dmesg | tail -1
test_bpf: test_tail_calls: Summary: 8 PASSED, 0 FAILED, [0/8 JIT'ed]
# rmmod test_bpf
# echo 1 > /proc/sys/net/core/bpf_jit_enable
# modprobe test_bpf test_suite=test_tail_calls
# dmesg | tail -1
test_bpf: test_tail_calls: Summary: 8 PASSED, 0 FAILED, [8/8 JIT'ed]
# rmmod test_bpf
# ./test_progs -t tailcalls
#142 tailcalls:OK
Summary: 1/11 PASSED, 0 SKIPPED, 0 FAILED
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Acked-by: Björn Töpel <bjorn@kernel.org>
Acked-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/bpf/1636075800-3264-1-git-send-email-yangtiezhu@loongson.cn
2021-11-05 01:30:00 +00:00
|
|
|
_emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_ADDS_I(tc[1], tc[1], 1), ctx);
|
|
|
|
emit(ARM_ADC_I(tc[0], tc[0], 0), ctx);
|
|
|
|
arm_bpf_put_reg64(tcc, tmp, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* prog = array->ptrs[index]
|
|
|
|
* if (prog == NULL)
|
|
|
|
* goto out;
|
|
|
|
*/
|
2018-07-11 09:32:12 +00:00
|
|
|
BUILD_BUG_ON(imm8m(offsetof(struct bpf_array, ptrs)) < 0);
|
|
|
|
off = imm8m(offsetof(struct bpf_array, ptrs));
|
|
|
|
emit(ARM_ADD_I(tmp[1], r_array, off), ctx);
|
2018-07-11 09:32:17 +00:00
|
|
|
emit(ARM_LDR_R_SI(tmp[1], tmp[1], r_index, SRTYPE_ASL, 2), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
emit(ARM_CMP_I(tmp[1], 0), ctx);
|
|
|
|
_emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
|
|
|
|
|
|
|
|
/* goto *(prog->bpf_func + prologue_size); */
|
2018-07-11 09:32:12 +00:00
|
|
|
BUILD_BUG_ON(offsetof(struct bpf_prog, bpf_func) >
|
|
|
|
ARM_INST_LDST__IMM12);
|
2017-08-22 06:32:33 +00:00
|
|
|
off = offsetof(struct bpf_prog, bpf_func);
|
2018-07-11 09:32:12 +00:00
|
|
|
emit(ARM_LDR_I(tmp[1], tmp[1], off), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx);
|
2018-01-13 11:35:15 +00:00
|
|
|
emit_bx_r(tmp[1], ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* out: */
|
|
|
|
if (out_offset == -1)
|
|
|
|
out_offset = cur_offset;
|
|
|
|
if (cur_offset != out_offset) {
|
|
|
|
pr_err_once("tail_call out_offset = %d, expected %d!\n",
|
|
|
|
cur_offset, out_offset);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
#undef cur_offset
|
|
|
|
#undef jmp_offset
|
2012-03-16 12:37:12 +00:00
|
|
|
}
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* 0xabcd => 0xcdab */
|
|
|
|
static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx)
|
2012-03-16 12:37:12 +00:00
|
|
|
{
|
2017-08-22 06:32:33 +00:00
|
|
|
#if __LINUX_ARM_ARCH__ < 6
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tmp2 = bpf2a32[TMP_REG_2];
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx);
|
|
|
|
emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 8), ctx);
|
|
|
|
emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx);
|
|
|
|
emit(ARM_ORR_SI(rd, tmp2[0], tmp2[1], SRTYPE_LSL, 8), ctx);
|
|
|
|
#else /* ARMv6+ */
|
|
|
|
emit(ARM_REV16(rd, rn), ctx);
|
|
|
|
#endif
|
|
|
|
}
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* 0xabcdefgh => 0xghefcdab */
|
|
|
|
static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
#if __LINUX_ARM_ARCH__ < 6
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tmp2 = bpf2a32[TMP_REG_2];
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx);
|
|
|
|
emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 24), ctx);
|
|
|
|
emit(ARM_ORR_SI(ARM_IP, tmp2[0], tmp2[1], SRTYPE_LSL, 24), ctx);
|
|
|
|
|
|
|
|
emit(ARM_MOV_SI(tmp2[1], rn, SRTYPE_LSR, 8), ctx);
|
|
|
|
emit(ARM_AND_I(tmp2[1], tmp2[1], 0xff), ctx);
|
|
|
|
emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 16), ctx);
|
|
|
|
emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx);
|
|
|
|
emit(ARM_MOV_SI(tmp2[0], tmp2[0], SRTYPE_LSL, 8), ctx);
|
|
|
|
emit(ARM_ORR_SI(tmp2[0], tmp2[0], tmp2[1], SRTYPE_LSL, 16), ctx);
|
|
|
|
emit(ARM_ORR_R(rd, ARM_IP, tmp2[0]), ctx);
|
|
|
|
|
|
|
|
#else /* ARMv6+ */
|
|
|
|
emit(ARM_REV(rd, rn), ctx);
|
|
|
|
#endif
|
|
|
|
}
|
net: filter: get rid of BPF_S_* enum
This patch finally allows us to get rid of the BPF_S_* enum.
Currently, the code performs unnecessary encode and decode
workarounds in seccomp and filter migration itself when a filter
is being attached in order to overcome BPF_S_* encoding which
is not used anymore by the new interpreter resp. JIT compilers.
Keeping it around would mean that also in future we would need
to extend and maintain this enum and related encoders/decoders.
We can get rid of all that and save us these operations during
filter attaching. Naturally, also JIT compilers need to be updated
by this.
Before JIT conversion is being done, each compiler checks if A
is being loaded at startup to obtain information if it needs to
emit instructions to clear A first. Since BPF extensions are a
subset of BPF_LD | BPF_{W,H,B} | BPF_ABS variants, case statements
for extensions can be removed at that point. To ease and minimalize
code changes in the classic JITs, we have introduced bpf_anc_helper().
Tested with test_bpf on x86_64 (JIT, int), s390x (JIT, int),
arm (JIT, int), i368 (int), ppc64 (JIT, int); for sparc we
unfortunately didn't have access, but changes are analogous to
the rest.
Joint work with Alexei Starovoitov.
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mircea Gherzan <mgherzan@gmail.com>
Cc: Kees Cook <keescook@chromium.org>
Acked-by: Chema Gonzalez <chemag@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-05-29 08:22:50 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
// push the scratch stack register on top of the stack
|
2018-07-11 09:32:02 +00:00
|
|
|
static inline void emit_push_r64(const s8 src[], struct jit_ctx *ctx)
|
2017-08-22 06:32:33 +00:00
|
|
|
{
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tmp2 = bpf2a32[TMP_REG_2];
|
2018-07-11 09:32:02 +00:00
|
|
|
const s8 *rt;
|
2017-08-22 06:32:33 +00:00
|
|
|
u16 reg_set = 0;
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2018-07-11 09:32:02 +00:00
|
|
|
rt = arm_bpf_get_reg64(src, tmp2, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
2018-07-11 09:32:02 +00:00
|
|
|
reg_set = (1 << rt[1]) | (1 << rt[0]);
|
2017-08-22 06:32:33 +00:00
|
|
|
emit(ARM_PUSH(reg_set), ctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void build_prologue(struct jit_ctx *ctx)
|
|
|
|
{
|
ARM: net: bpf: Improve prologue code sequence
Improve the prologue code sequence to be able to take advantage of
64-bit stores, changing the code from:
push {r4, r5, r6, r7, r8, r9, fp, lr}
mov fp, sp
sub ip, sp, #80 ; 0x50
sub sp, sp, #600 ; 0x258
str ip, [fp, #-100] ; 0xffffff9c
mov r6, #0
str r6, [fp, #-96] ; 0xffffffa0
mov r4, #0
mov r3, r4
mov r2, r0
str r4, [fp, #-104] ; 0xffffff98
str r4, [fp, #-108] ; 0xffffff94
to the tighter:
push {r4, r5, r6, r7, r8, r9, fp, lr}
mov fp, sp
mov r3, #0
sub r2, sp, #80 ; 0x50
sub sp, sp, #600 ; 0x258
strd r2, [fp, #-100] ; 0xffffff9c
mov r2, #0
strd r2, [fp, #-108] ; 0xffffff94
mov r2, r0
resulting in a saving of three instructions.
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/E1ieH2g-0004ih-Rb@rmk-PC.armlinux.org.uk
2019-12-09 11:17:30 +00:00
|
|
|
const s8 arm_r0 = bpf2a32[BPF_REG_0][1];
|
|
|
|
const s8 *bpf_r1 = bpf2a32[BPF_REG_1];
|
|
|
|
const s8 *bpf_fp = bpf2a32[BPF_REG_FP];
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *tcc = bpf2a32[TCALL_CNT];
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* Save callee saved registers. */
|
|
|
|
#ifdef CONFIG_FRAME_POINTER
|
2018-01-13 22:38:18 +00:00
|
|
|
u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC;
|
|
|
|
emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
emit(ARM_PUSH(reg_set), ctx);
|
|
|
|
emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
|
|
|
|
#else
|
2018-01-13 22:38:18 +00:00
|
|
|
emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx);
|
|
|
|
emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
#endif
|
ARM: net: bpf: Improve prologue code sequence
Improve the prologue code sequence to be able to take advantage of
64-bit stores, changing the code from:
push {r4, r5, r6, r7, r8, r9, fp, lr}
mov fp, sp
sub ip, sp, #80 ; 0x50
sub sp, sp, #600 ; 0x258
str ip, [fp, #-100] ; 0xffffff9c
mov r6, #0
str r6, [fp, #-96] ; 0xffffffa0
mov r4, #0
mov r3, r4
mov r2, r0
str r4, [fp, #-104] ; 0xffffff98
str r4, [fp, #-108] ; 0xffffff94
to the tighter:
push {r4, r5, r6, r7, r8, r9, fp, lr}
mov fp, sp
mov r3, #0
sub r2, sp, #80 ; 0x50
sub sp, sp, #600 ; 0x258
strd r2, [fp, #-100] ; 0xffffff9c
mov r2, #0
strd r2, [fp, #-108] ; 0xffffff94
mov r2, r0
resulting in a saving of three instructions.
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/E1ieH2g-0004ih-Rb@rmk-PC.armlinux.org.uk
2019-12-09 11:17:30 +00:00
|
|
|
/* mov r3, #0 */
|
|
|
|
/* sub r2, sp, #SCRATCH_SIZE */
|
|
|
|
emit(ARM_MOV_I(bpf_r1[0], 0), ctx);
|
|
|
|
emit(ARM_SUB_I(bpf_r1[1], ARM_SP, SCRATCH_SIZE), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
ctx->stack_size = imm8m(STACK_SIZE);
|
|
|
|
|
|
|
|
/* Set up function call stack */
|
|
|
|
emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* Set up BPF prog stack base register */
|
ARM: net: bpf: Improve prologue code sequence
Improve the prologue code sequence to be able to take advantage of
64-bit stores, changing the code from:
push {r4, r5, r6, r7, r8, r9, fp, lr}
mov fp, sp
sub ip, sp, #80 ; 0x50
sub sp, sp, #600 ; 0x258
str ip, [fp, #-100] ; 0xffffff9c
mov r6, #0
str r6, [fp, #-96] ; 0xffffffa0
mov r4, #0
mov r3, r4
mov r2, r0
str r4, [fp, #-104] ; 0xffffff98
str r4, [fp, #-108] ; 0xffffff94
to the tighter:
push {r4, r5, r6, r7, r8, r9, fp, lr}
mov fp, sp
mov r3, #0
sub r2, sp, #80 ; 0x50
sub sp, sp, #600 ; 0x258
strd r2, [fp, #-100] ; 0xffffff9c
mov r2, #0
strd r2, [fp, #-108] ; 0xffffff94
mov r2, r0
resulting in a saving of three instructions.
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/E1ieH2g-0004ih-Rb@rmk-PC.armlinux.org.uk
2019-12-09 11:17:30 +00:00
|
|
|
emit_a32_mov_r64(true, bpf_fp, bpf_r1, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
ARM: net: bpf: Improve prologue code sequence
Improve the prologue code sequence to be able to take advantage of
64-bit stores, changing the code from:
push {r4, r5, r6, r7, r8, r9, fp, lr}
mov fp, sp
sub ip, sp, #80 ; 0x50
sub sp, sp, #600 ; 0x258
str ip, [fp, #-100] ; 0xffffff9c
mov r6, #0
str r6, [fp, #-96] ; 0xffffffa0
mov r4, #0
mov r3, r4
mov r2, r0
str r4, [fp, #-104] ; 0xffffff98
str r4, [fp, #-108] ; 0xffffff94
to the tighter:
push {r4, r5, r6, r7, r8, r9, fp, lr}
mov fp, sp
mov r3, #0
sub r2, sp, #80 ; 0x50
sub sp, sp, #600 ; 0x258
strd r2, [fp, #-100] ; 0xffffff9c
mov r2, #0
strd r2, [fp, #-108] ; 0xffffff94
mov r2, r0
resulting in a saving of three instructions.
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/E1ieH2g-0004ih-Rb@rmk-PC.armlinux.org.uk
2019-12-09 11:17:30 +00:00
|
|
|
/* Initialize Tail Count */
|
|
|
|
emit(ARM_MOV_I(bpf_r1[1], 0), ctx);
|
|
|
|
emit_a32_mov_r64(true, tcc, bpf_r1, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* Move BPF_CTX to BPF_R1 */
|
ARM: net: bpf: Improve prologue code sequence
Improve the prologue code sequence to be able to take advantage of
64-bit stores, changing the code from:
push {r4, r5, r6, r7, r8, r9, fp, lr}
mov fp, sp
sub ip, sp, #80 ; 0x50
sub sp, sp, #600 ; 0x258
str ip, [fp, #-100] ; 0xffffff9c
mov r6, #0
str r6, [fp, #-96] ; 0xffffffa0
mov r4, #0
mov r3, r4
mov r2, r0
str r4, [fp, #-104] ; 0xffffff98
str r4, [fp, #-108] ; 0xffffff94
to the tighter:
push {r4, r5, r6, r7, r8, r9, fp, lr}
mov fp, sp
mov r3, #0
sub r2, sp, #80 ; 0x50
sub sp, sp, #600 ; 0x258
strd r2, [fp, #-100] ; 0xffffff9c
mov r2, #0
strd r2, [fp, #-108] ; 0xffffff94
mov r2, r0
resulting in a saving of three instructions.
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/E1ieH2g-0004ih-Rb@rmk-PC.armlinux.org.uk
2019-12-09 11:17:30 +00:00
|
|
|
emit(ARM_MOV_R(bpf_r1[1], arm_r0), ctx);
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* end of prologue */
|
|
|
|
}
|
|
|
|
|
2018-01-13 22:38:18 +00:00
|
|
|
/* restore callee saved registers. */
|
2017-08-22 06:32:33 +00:00
|
|
|
static void build_epilogue(struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_FRAME_POINTER
|
2018-01-13 22:38:18 +00:00
|
|
|
/* When using frame pointers, some additional registers need to
|
|
|
|
* be loaded. */
|
|
|
|
u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP;
|
|
|
|
emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
emit(ARM_LDM(ARM_SP, reg_set), ctx);
|
|
|
|
#else
|
|
|
|
/* Restore callee saved registers. */
|
2018-01-13 22:38:18 +00:00
|
|
|
emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx);
|
|
|
|
emit(ARM_POP(CALLEE_POP_MASK), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert an eBPF instruction to native instruction, i.e
|
|
|
|
* JITs an eBPF instruction.
|
|
|
|
* Returns :
|
|
|
|
* 0 - Successfully JITed an 8-byte eBPF instruction
|
|
|
|
* >0 - Successfully JITed a 16-byte eBPF instruction
|
|
|
|
* <0 - Failed to JIT.
|
|
|
|
*/
|
|
|
|
static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
const u8 code = insn->code;
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *dst = bpf2a32[insn->dst_reg];
|
|
|
|
const s8 *src = bpf2a32[insn->src_reg];
|
|
|
|
const s8 *tmp = bpf2a32[TMP_REG_1];
|
|
|
|
const s8 *tmp2 = bpf2a32[TMP_REG_2];
|
2017-08-22 06:32:33 +00:00
|
|
|
const s16 off = insn->off;
|
|
|
|
const s32 imm = insn->imm;
|
|
|
|
const int i = insn - ctx->prog->insnsi;
|
|
|
|
const bool is64 = BPF_CLASS(code) == BPF_ALU64;
|
2018-07-11 09:31:57 +00:00
|
|
|
const s8 *rd, *rs;
|
|
|
|
s8 rd_lo, rt, rm, rn;
|
2017-08-22 06:32:33 +00:00
|
|
|
s32 jmp_offset;
|
|
|
|
|
|
|
|
#define check_imm(bits, imm) do { \
|
2018-05-11 03:06:34 +00:00
|
|
|
if ((imm) >= (1 << ((bits) - 1)) || \
|
|
|
|
(imm) < -(1 << ((bits) - 1))) { \
|
2017-08-22 06:32:33 +00:00
|
|
|
pr_info("[%2d] imm=%d(0x%x) out of range\n", \
|
|
|
|
i, imm, imm); \
|
|
|
|
return -EINVAL; \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
#define check_imm24(imm) check_imm(24, imm)
|
|
|
|
|
|
|
|
switch (code) {
|
|
|
|
/* ALU operations */
|
|
|
|
|
|
|
|
/* dst = src */
|
|
|
|
case BPF_ALU | BPF_MOV | BPF_K:
|
|
|
|
case BPF_ALU | BPF_MOV | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_MOV | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_MOV | BPF_X:
|
|
|
|
switch (BPF_SRC(code)) {
|
|
|
|
case BPF_X:
|
2019-05-24 22:25:22 +00:00
|
|
|
if (imm == 1) {
|
|
|
|
/* Special mov32 for zext */
|
|
|
|
emit_a32_mov_i(dst_hi, 0, ctx);
|
|
|
|
break;
|
|
|
|
}
|
2023-09-07 23:05:44 +00:00
|
|
|
if (insn->off)
|
|
|
|
emit_a32_movsx_r64(is64, insn->off, dst, src, ctx);
|
|
|
|
else
|
|
|
|
emit_a32_mov_r64(is64, dst, src, ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
case BPF_K:
|
|
|
|
/* Sign-extend immediate value to destination reg */
|
ARM: net: bpf: improve 64-bit load immediate implementation
Rather than writing each 32-bit half of the 64-bit immediate value
separately when the register is on the stack:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
str r6, [fp, #-44] ; 0xffffffd4
mov r6, #0
str r6, [fp, #-40] ; 0xffffffd8
arrange to use the double-word store when available instead:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
mov r7, #0
strd r6, [fp, #-44] ; 0xffffffd4
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:41 +00:00
|
|
|
emit_a32_mov_se_i64(is64, dst, imm, ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
/* dst = dst + src/imm */
|
|
|
|
/* dst = dst - src/imm */
|
|
|
|
/* dst = dst | src/imm */
|
|
|
|
/* dst = dst & src/imm */
|
|
|
|
/* dst = dst ^ src/imm */
|
|
|
|
/* dst = dst * src/imm */
|
|
|
|
/* dst = dst << src */
|
|
|
|
/* dst = dst >> src */
|
|
|
|
case BPF_ALU | BPF_ADD | BPF_K:
|
|
|
|
case BPF_ALU | BPF_ADD | BPF_X:
|
|
|
|
case BPF_ALU | BPF_SUB | BPF_K:
|
|
|
|
case BPF_ALU | BPF_SUB | BPF_X:
|
|
|
|
case BPF_ALU | BPF_OR | BPF_K:
|
|
|
|
case BPF_ALU | BPF_OR | BPF_X:
|
|
|
|
case BPF_ALU | BPF_AND | BPF_K:
|
|
|
|
case BPF_ALU | BPF_AND | BPF_X:
|
|
|
|
case BPF_ALU | BPF_XOR | BPF_K:
|
|
|
|
case BPF_ALU | BPF_XOR | BPF_X:
|
|
|
|
case BPF_ALU | BPF_MUL | BPF_K:
|
|
|
|
case BPF_ALU | BPF_MUL | BPF_X:
|
|
|
|
case BPF_ALU | BPF_LSH | BPF_X:
|
|
|
|
case BPF_ALU | BPF_RSH | BPF_X:
|
|
|
|
case BPF_ALU | BPF_ARSH | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_ADD | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_ADD | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_SUB | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_SUB | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_OR | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_OR | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_AND | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_AND | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_XOR | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_XOR | BPF_X:
|
|
|
|
switch (BPF_SRC(code)) {
|
|
|
|
case BPF_X:
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_alu_r64(is64, dst, src, ctx, BPF_OP(code));
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
case BPF_K:
|
|
|
|
/* Move immediate value to the temporary register
|
|
|
|
* and then do the ALU operation on the temporary
|
|
|
|
* register as this will sign-extend the immediate
|
|
|
|
* value into temporary reg and then it would be
|
|
|
|
* safe to do the operation on it.
|
2015-07-21 12:14:13 +00:00
|
|
|
*/
|
ARM: net: bpf: improve 64-bit load immediate implementation
Rather than writing each 32-bit half of the 64-bit immediate value
separately when the register is on the stack:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
str r6, [fp, #-44] ; 0xffffffd4
mov r6, #0
str r6, [fp, #-40] ; 0xffffffd8
arrange to use the double-word store when available instead:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
mov r7, #0
strd r6, [fp, #-44] ; 0xffffffd4
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:41 +00:00
|
|
|
emit_a32_mov_se_i64(is64, tmp2, imm, ctx);
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_alu_r64(is64, dst, tmp2, ctx, BPF_OP(code));
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
/* dst = dst / src(imm) */
|
|
|
|
/* dst = dst % src(imm) */
|
|
|
|
case BPF_ALU | BPF_DIV | BPF_K:
|
|
|
|
case BPF_ALU | BPF_DIV | BPF_X:
|
|
|
|
case BPF_ALU | BPF_MOD | BPF_K:
|
|
|
|
case BPF_ALU | BPF_MOD | BPF_X:
|
2018-07-11 09:31:57 +00:00
|
|
|
rd_lo = arm_bpf_get_reg32(dst_lo, tmp2[1], ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
switch (BPF_SRC(code)) {
|
|
|
|
case BPF_X:
|
2018-07-11 09:31:52 +00:00
|
|
|
rt = arm_bpf_get_reg32(src_lo, tmp2[0], ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
case BPF_K:
|
|
|
|
rt = tmp2[0];
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_mov_i(rt, imm, ctx);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
rt = src_lo;
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
2023-09-07 23:05:46 +00:00
|
|
|
emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code), off);
|
2018-07-11 09:31:57 +00:00
|
|
|
arm_bpf_put_reg32(dst_lo, rd_lo, ctx);
|
2019-05-24 22:25:22 +00:00
|
|
|
if (!ctx->prog->aux->verifier_zext)
|
|
|
|
emit_a32_mov_i(dst_hi, 0, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
case BPF_ALU64 | BPF_DIV | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_DIV | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_MOD | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_MOD | BPF_X:
|
2023-09-07 23:05:47 +00:00
|
|
|
rd = arm_bpf_get_reg64(dst, tmp2, ctx);
|
|
|
|
switch (BPF_SRC(code)) {
|
|
|
|
case BPF_X:
|
|
|
|
rs = arm_bpf_get_reg64(src, tmp, ctx);
|
|
|
|
break;
|
|
|
|
case BPF_K:
|
|
|
|
rs = tmp;
|
|
|
|
emit_a32_mov_se_i64(is64, rs, imm, ctx);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
emit_udivmod64(rd, rd, rs, ctx, BPF_OP(code), off);
|
|
|
|
arm_bpf_put_reg64(dst, rd, ctx);
|
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
/* dst = dst << imm */
|
bpf, arm: Optimize ALU ARSH K using asr immediate instruction
This patch adds an optimization that uses the asr immediate instruction
for BPF_ALU BPF_ARSH BPF_K, rather than loading the immediate to
a temporary register. This is similar to existing code for handling
BPF_ALU BPF_{LSH,RSH} BPF_K. This optimization saves two instructions
and is more consistent with LSH and RSH.
Example of the code generated for BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5)
before the optimization:
2c: mov r8, #5
30: mov r9, #0
34: asr r0, r0, r8
and after optimization:
2c: asr r0, r0, #5
Tested on QEMU using lib/test_bpf and test_verifier.
Co-developed-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200501020210.32294-3-luke.r.nels@gmail.com
2020-05-01 02:02:10 +00:00
|
|
|
/* dst = dst >> imm */
|
|
|
|
/* dst = dst >> imm (signed) */
|
2017-08-22 06:32:33 +00:00
|
|
|
case BPF_ALU | BPF_LSH | BPF_K:
|
bpf, arm: Optimize ALU ARSH K using asr immediate instruction
This patch adds an optimization that uses the asr immediate instruction
for BPF_ALU BPF_ARSH BPF_K, rather than loading the immediate to
a temporary register. This is similar to existing code for handling
BPF_ALU BPF_{LSH,RSH} BPF_K. This optimization saves two instructions
and is more consistent with LSH and RSH.
Example of the code generated for BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5)
before the optimization:
2c: mov r8, #5
30: mov r9, #0
34: asr r0, r0, r8
and after optimization:
2c: asr r0, r0, #5
Tested on QEMU using lib/test_bpf and test_verifier.
Co-developed-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200501020210.32294-3-luke.r.nels@gmail.com
2020-05-01 02:02:10 +00:00
|
|
|
case BPF_ALU | BPF_RSH | BPF_K:
|
|
|
|
case BPF_ALU | BPF_ARSH | BPF_K:
|
2017-08-22 06:32:33 +00:00
|
|
|
if (unlikely(imm > 31))
|
|
|
|
return -EINVAL;
|
|
|
|
if (imm)
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_alu_i(dst_lo, imm, ctx, BPF_OP(code));
|
2019-05-24 22:25:22 +00:00
|
|
|
if (!ctx->prog->aux->verifier_zext)
|
|
|
|
emit_a32_mov_i(dst_hi, 0, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
/* dst = dst << imm */
|
|
|
|
case BPF_ALU64 | BPF_LSH | BPF_K:
|
|
|
|
if (unlikely(imm > 63))
|
|
|
|
return -EINVAL;
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_lsh_i64(dst, imm, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
/* dst = dst >> imm */
|
|
|
|
case BPF_ALU64 | BPF_RSH | BPF_K:
|
|
|
|
if (unlikely(imm > 63))
|
|
|
|
return -EINVAL;
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_rsh_i64(dst, imm, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
/* dst = dst << src */
|
|
|
|
case BPF_ALU64 | BPF_LSH | BPF_X:
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_lsh_r64(dst, src, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
/* dst = dst >> src */
|
|
|
|
case BPF_ALU64 | BPF_RSH | BPF_X:
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_rsh_r64(dst, src, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
/* dst = dst >> src (signed) */
|
|
|
|
case BPF_ALU64 | BPF_ARSH | BPF_X:
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_arsh_r64(dst, src, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
/* dst = dst >> imm (signed) */
|
|
|
|
case BPF_ALU64 | BPF_ARSH | BPF_K:
|
|
|
|
if (unlikely(imm > 63))
|
|
|
|
return -EINVAL;
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_arsh_i64(dst, imm, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
/* dst = ~dst */
|
|
|
|
case BPF_ALU | BPF_NEG:
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_alu_i(dst_lo, 0, ctx, BPF_OP(code));
|
2019-05-24 22:25:22 +00:00
|
|
|
if (!ctx->prog->aux->verifier_zext)
|
|
|
|
emit_a32_mov_i(dst_hi, 0, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
/* dst = ~dst (64 bit) */
|
|
|
|
case BPF_ALU64 | BPF_NEG:
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_neg64(dst, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
/* dst = dst * src/imm */
|
|
|
|
case BPF_ALU64 | BPF_MUL | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_MUL | BPF_K:
|
|
|
|
switch (BPF_SRC(code)) {
|
|
|
|
case BPF_X:
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_mul_r64(dst, src, ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
case BPF_K:
|
|
|
|
/* Move immediate value to the temporary register
|
|
|
|
* and then do the multiplication on it as this
|
|
|
|
* will sign-extend the immediate value into temp
|
|
|
|
* reg then it would be safe to do the operation
|
|
|
|
* on it.
|
2012-03-16 12:37:12 +00:00
|
|
|
*/
|
ARM: net: bpf: improve 64-bit load immediate implementation
Rather than writing each 32-bit half of the 64-bit immediate value
separately when the register is on the stack:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
str r6, [fp, #-44] ; 0xffffffd4
mov r6, #0
str r6, [fp, #-40] ; 0xffffffd8
arrange to use the double-word store when available instead:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
mov r7, #0
strd r6, [fp, #-44] ; 0xffffffd4
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:41 +00:00
|
|
|
emit_a32_mov_se_i64(is64, tmp2, imm, ctx);
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_mul_r64(dst, tmp2, ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
/* dst = htole(dst) */
|
|
|
|
/* dst = htobe(dst) */
|
2023-09-07 23:05:45 +00:00
|
|
|
case BPF_ALU | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */
|
|
|
|
case BPF_ALU | BPF_END | BPF_FROM_BE: /* also BPF_TO_BE */
|
|
|
|
/* dst = bswap(dst) */
|
|
|
|
case BPF_ALU64 | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */
|
2018-07-11 09:31:57 +00:00
|
|
|
rd = arm_bpf_get_reg64(dst, tmp, ctx);
|
2023-09-07 23:05:45 +00:00
|
|
|
if (BPF_SRC(code) == BPF_FROM_LE && BPF_CLASS(code) != BPF_ALU64)
|
2017-08-22 06:32:33 +00:00
|
|
|
goto emit_bswap_uxt;
|
|
|
|
switch (imm) {
|
|
|
|
case 16:
|
2018-07-11 09:31:57 +00:00
|
|
|
emit_rev16(rd[1], rd[1], ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
goto emit_bswap_uxt;
|
|
|
|
case 32:
|
2018-07-11 09:31:57 +00:00
|
|
|
emit_rev32(rd[1], rd[1], ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
goto emit_bswap_uxt;
|
|
|
|
case 64:
|
2018-07-11 09:31:57 +00:00
|
|
|
emit_rev32(ARM_LR, rd[1], ctx);
|
|
|
|
emit_rev32(rd[1], rd[0], ctx);
|
|
|
|
emit(ARM_MOV_R(rd[0], ARM_LR), ctx);
|
2015-10-02 15:06:47 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
goto exit;
|
|
|
|
emit_bswap_uxt:
|
|
|
|
switch (imm) {
|
|
|
|
case 16:
|
|
|
|
/* zero-extend 16 bits into 64 bits */
|
|
|
|
#if __LINUX_ARM_ARCH__ < 6
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_mov_i(tmp2[1], 0xffff, ctx);
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_AND_R(rd[1], rd[1], tmp2[1]), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
#else /* ARMv6+ */
|
2018-07-11 09:31:57 +00:00
|
|
|
emit(ARM_UXTH(rd[1], rd[1]), ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
#endif
|
2019-05-24 22:25:22 +00:00
|
|
|
if (!ctx->prog->aux->verifier_zext)
|
|
|
|
emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
|
2015-10-02 15:06:47 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
case 32:
|
|
|
|
/* zero-extend 32 bits into 64 bits */
|
2019-05-24 22:25:22 +00:00
|
|
|
if (!ctx->prog->aux->verifier_zext)
|
|
|
|
emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
case 64:
|
|
|
|
/* nop */
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
exit:
|
2018-07-11 09:31:57 +00:00
|
|
|
arm_bpf_put_reg64(dst, rd, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
/* dst = imm64 */
|
|
|
|
case BPF_LD | BPF_IMM | BPF_DW:
|
|
|
|
{
|
ARM: net: bpf: improve 64-bit load immediate implementation
Rather than writing each 32-bit half of the 64-bit immediate value
separately when the register is on the stack:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
str r6, [fp, #-44] ; 0xffffffd4
mov r6, #0
str r6, [fp, #-40] ; 0xffffffd8
arrange to use the double-word store when available instead:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
mov r7, #0
strd r6, [fp, #-44] ; 0xffffffd4
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:41 +00:00
|
|
|
u64 val = (u32)imm | (u64)insn[1].imm << 32;
|
2017-08-22 06:32:33 +00:00
|
|
|
|
ARM: net: bpf: improve 64-bit load immediate implementation
Rather than writing each 32-bit half of the 64-bit immediate value
separately when the register is on the stack:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
str r6, [fp, #-44] ; 0xffffffd4
mov r6, #0
str r6, [fp, #-40] ; 0xffffffd8
arrange to use the double-word store when available instead:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
mov r7, #0
strd r6, [fp, #-44] ; 0xffffffd4
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:41 +00:00
|
|
|
emit_a32_mov_i64(dst, val, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
/* LDX: dst = *(size *)(src + off) */
|
|
|
|
case BPF_LDX | BPF_MEM | BPF_W:
|
|
|
|
case BPF_LDX | BPF_MEM | BPF_H:
|
|
|
|
case BPF_LDX | BPF_MEM | BPF_B:
|
|
|
|
case BPF_LDX | BPF_MEM | BPF_DW:
|
2023-09-07 23:05:43 +00:00
|
|
|
/* LDSX: dst = *(signed size *)(src + off) */
|
|
|
|
case BPF_LDX | BPF_MEMSX | BPF_B:
|
|
|
|
case BPF_LDX | BPF_MEMSX | BPF_H:
|
|
|
|
case BPF_LDX | BPF_MEMSX | BPF_W:
|
2018-07-11 09:31:52 +00:00
|
|
|
rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
|
2023-09-07 23:05:43 +00:00
|
|
|
if (BPF_MODE(insn->code) == BPF_MEMSX)
|
|
|
|
emit_ldsx_r(dst, rn, off, ctx, BPF_SIZE(code));
|
|
|
|
else
|
|
|
|
emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
2021-07-13 08:18:31 +00:00
|
|
|
/* speculation barrier */
|
|
|
|
case BPF_ST | BPF_NOSPEC:
|
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
/* ST: *(size *)(dst + off) = imm */
|
|
|
|
case BPF_ST | BPF_MEM | BPF_W:
|
|
|
|
case BPF_ST | BPF_MEM | BPF_H:
|
|
|
|
case BPF_ST | BPF_MEM | BPF_B:
|
|
|
|
case BPF_ST | BPF_MEM | BPF_DW:
|
|
|
|
switch (BPF_SIZE(code)) {
|
|
|
|
case BPF_DW:
|
|
|
|
/* Sign-extend immediate value into temp reg */
|
ARM: net: bpf: improve 64-bit load immediate implementation
Rather than writing each 32-bit half of the 64-bit immediate value
separately when the register is on the stack:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
str r6, [fp, #-44] ; 0xffffffd4
mov r6, #0
str r6, [fp, #-40] ; 0xffffffd8
arrange to use the double-word store when available instead:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
mov r7, #0
strd r6, [fp, #-44] ; 0xffffffd4
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:41 +00:00
|
|
|
emit_a32_mov_se_i64(true, tmp2, imm, ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
case BPF_W:
|
|
|
|
case BPF_H:
|
|
|
|
case BPF_B:
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_mov_i(tmp2[1], imm, ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
ARM: net: bpf: improve 64-bit store implementation
Improve the 64-bit store implementation from:
ldr r6, [fp, #-8]
str r8, [r6]
ldr r6, [fp, #-8]
mov r7, #4
add r7, r6, r7
str r9, [r7]
to:
ldr r6, [fp, #-8]
str r8, [r6]
str r9, [r6, #4]
We leave the store as two separate STR instructions rather than using
STRD as the store may not be aligned, and STR can handle misalignment.
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:51 +00:00
|
|
|
emit_str_r(dst_lo, tmp2, off, ctx, BPF_SIZE(code));
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
2021-01-14 18:17:44 +00:00
|
|
|
/* Atomic ops */
|
|
|
|
case BPF_STX | BPF_ATOMIC | BPF_W:
|
|
|
|
case BPF_STX | BPF_ATOMIC | BPF_DW:
|
2017-08-22 06:32:33 +00:00
|
|
|
goto notyet;
|
|
|
|
/* STX: *(size *)(dst + off) = src */
|
|
|
|
case BPF_STX | BPF_MEM | BPF_W:
|
|
|
|
case BPF_STX | BPF_MEM | BPF_H:
|
|
|
|
case BPF_STX | BPF_MEM | BPF_B:
|
|
|
|
case BPF_STX | BPF_MEM | BPF_DW:
|
2018-07-11 09:31:57 +00:00
|
|
|
rs = arm_bpf_get_reg64(src, tmp2, ctx);
|
ARM: net: bpf: improve 64-bit store implementation
Improve the 64-bit store implementation from:
ldr r6, [fp, #-8]
str r8, [r6]
ldr r6, [fp, #-8]
mov r7, #4
add r7, r6, r7
str r9, [r7]
to:
ldr r6, [fp, #-8]
str r8, [r6]
str r9, [r6, #4]
We leave the store as two separate STR instructions rather than using
STRD as the store may not be aligned, and STR can handle misalignment.
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:51 +00:00
|
|
|
emit_str_r(dst_lo, rs, off, ctx, BPF_SIZE(code));
|
2017-08-22 06:32:33 +00:00
|
|
|
break;
|
|
|
|
/* PC += off if dst == src */
|
|
|
|
/* PC += off if dst > src */
|
|
|
|
/* PC += off if dst >= src */
|
|
|
|
/* PC += off if dst < src */
|
|
|
|
/* PC += off if dst <= src */
|
|
|
|
/* PC += off if dst != src */
|
|
|
|
/* PC += off if dst > src (signed) */
|
|
|
|
/* PC += off if dst >= src (signed) */
|
|
|
|
/* PC += off if dst < src (signed) */
|
|
|
|
/* PC += off if dst <= src (signed) */
|
|
|
|
/* PC += off if dst & src */
|
|
|
|
case BPF_JMP | BPF_JEQ | BPF_X:
|
|
|
|
case BPF_JMP | BPF_JGT | BPF_X:
|
|
|
|
case BPF_JMP | BPF_JGE | BPF_X:
|
|
|
|
case BPF_JMP | BPF_JNE | BPF_X:
|
|
|
|
case BPF_JMP | BPF_JSGT | BPF_X:
|
|
|
|
case BPF_JMP | BPF_JSGE | BPF_X:
|
|
|
|
case BPF_JMP | BPF_JSET | BPF_X:
|
|
|
|
case BPF_JMP | BPF_JLE | BPF_X:
|
|
|
|
case BPF_JMP | BPF_JLT | BPF_X:
|
|
|
|
case BPF_JMP | BPF_JSLT | BPF_X:
|
|
|
|
case BPF_JMP | BPF_JSLE | BPF_X:
|
2019-01-26 17:26:09 +00:00
|
|
|
case BPF_JMP32 | BPF_JEQ | BPF_X:
|
|
|
|
case BPF_JMP32 | BPF_JGT | BPF_X:
|
|
|
|
case BPF_JMP32 | BPF_JGE | BPF_X:
|
|
|
|
case BPF_JMP32 | BPF_JNE | BPF_X:
|
|
|
|
case BPF_JMP32 | BPF_JSGT | BPF_X:
|
|
|
|
case BPF_JMP32 | BPF_JSGE | BPF_X:
|
|
|
|
case BPF_JMP32 | BPF_JSET | BPF_X:
|
|
|
|
case BPF_JMP32 | BPF_JLE | BPF_X:
|
|
|
|
case BPF_JMP32 | BPF_JLT | BPF_X:
|
|
|
|
case BPF_JMP32 | BPF_JSLT | BPF_X:
|
|
|
|
case BPF_JMP32 | BPF_JSLE | BPF_X:
|
2017-08-22 06:32:33 +00:00
|
|
|
/* Setup source registers */
|
2018-07-11 09:31:52 +00:00
|
|
|
rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx);
|
|
|
|
rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
goto go_jmp;
|
|
|
|
/* PC += off if dst == imm */
|
|
|
|
/* PC += off if dst > imm */
|
|
|
|
/* PC += off if dst >= imm */
|
|
|
|
/* PC += off if dst < imm */
|
|
|
|
/* PC += off if dst <= imm */
|
|
|
|
/* PC += off if dst != imm */
|
|
|
|
/* PC += off if dst > imm (signed) */
|
|
|
|
/* PC += off if dst >= imm (signed) */
|
|
|
|
/* PC += off if dst < imm (signed) */
|
|
|
|
/* PC += off if dst <= imm (signed) */
|
|
|
|
/* PC += off if dst & imm */
|
|
|
|
case BPF_JMP | BPF_JEQ | BPF_K:
|
|
|
|
case BPF_JMP | BPF_JGT | BPF_K:
|
|
|
|
case BPF_JMP | BPF_JGE | BPF_K:
|
|
|
|
case BPF_JMP | BPF_JNE | BPF_K:
|
|
|
|
case BPF_JMP | BPF_JSGT | BPF_K:
|
|
|
|
case BPF_JMP | BPF_JSGE | BPF_K:
|
|
|
|
case BPF_JMP | BPF_JSET | BPF_K:
|
|
|
|
case BPF_JMP | BPF_JLT | BPF_K:
|
|
|
|
case BPF_JMP | BPF_JLE | BPF_K:
|
|
|
|
case BPF_JMP | BPF_JSLT | BPF_K:
|
|
|
|
case BPF_JMP | BPF_JSLE | BPF_K:
|
2019-01-26 17:26:09 +00:00
|
|
|
case BPF_JMP32 | BPF_JEQ | BPF_K:
|
|
|
|
case BPF_JMP32 | BPF_JGT | BPF_K:
|
|
|
|
case BPF_JMP32 | BPF_JGE | BPF_K:
|
|
|
|
case BPF_JMP32 | BPF_JNE | BPF_K:
|
|
|
|
case BPF_JMP32 | BPF_JSGT | BPF_K:
|
|
|
|
case BPF_JMP32 | BPF_JSGE | BPF_K:
|
|
|
|
case BPF_JMP32 | BPF_JSET | BPF_K:
|
|
|
|
case BPF_JMP32 | BPF_JLT | BPF_K:
|
|
|
|
case BPF_JMP32 | BPF_JLE | BPF_K:
|
|
|
|
case BPF_JMP32 | BPF_JSLT | BPF_K:
|
|
|
|
case BPF_JMP32 | BPF_JSLE | BPF_K:
|
2017-08-22 06:32:33 +00:00
|
|
|
if (off == 0)
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
rm = tmp2[0];
|
|
|
|
rn = tmp2[1];
|
|
|
|
/* Sign-extend immediate value */
|
ARM: net: bpf: improve 64-bit load immediate implementation
Rather than writing each 32-bit half of the 64-bit immediate value
separately when the register is on the stack:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
str r6, [fp, #-44] ; 0xffffffd4
mov r6, #0
str r6, [fp, #-40] ; 0xffffffd8
arrange to use the double-word store when available instead:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
mov r7, #0
strd r6, [fp, #-44] ; 0xffffffd4
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-07-12 20:50:41 +00:00
|
|
|
emit_a32_mov_se_i64(true, tmp2, imm, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
go_jmp:
|
|
|
|
/* Setup destination register */
|
2018-07-11 09:31:57 +00:00
|
|
|
rd = arm_bpf_get_reg64(dst, tmp, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* Check for the condition */
|
2019-01-26 17:26:09 +00:00
|
|
|
emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code),
|
|
|
|
BPF_CLASS(code) == BPF_JMP);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* Setup JUMP instruction */
|
|
|
|
jmp_offset = bpf2a32_offset(i+off, i, ctx);
|
|
|
|
switch (BPF_OP(code)) {
|
|
|
|
case BPF_JNE:
|
|
|
|
case BPF_JSET:
|
|
|
|
_emit(ARM_COND_NE, ARM_B(jmp_offset), ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
case BPF_JEQ:
|
|
|
|
_emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
case BPF_JGT:
|
|
|
|
_emit(ARM_COND_HI, ARM_B(jmp_offset), ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
case BPF_JGE:
|
|
|
|
_emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
case BPF_JSGT:
|
|
|
|
_emit(ARM_COND_LT, ARM_B(jmp_offset), ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
case BPF_JSGE:
|
|
|
|
_emit(ARM_COND_GE, ARM_B(jmp_offset), ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
case BPF_JLE:
|
|
|
|
_emit(ARM_COND_LS, ARM_B(jmp_offset), ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
case BPF_JLT:
|
|
|
|
_emit(ARM_COND_CC, ARM_B(jmp_offset), ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
case BPF_JSLT:
|
|
|
|
_emit(ARM_COND_LT, ARM_B(jmp_offset), ctx);
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
case BPF_JSLE:
|
|
|
|
_emit(ARM_COND_GE, ARM_B(jmp_offset), ctx);
|
2012-11-07 15:31:02 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
/* JMP OFF */
|
|
|
|
case BPF_JMP | BPF_JA:
|
2023-09-07 23:05:42 +00:00
|
|
|
case BPF_JMP32 | BPF_JA:
|
2017-08-22 06:32:33 +00:00
|
|
|
{
|
2023-09-07 23:05:42 +00:00
|
|
|
if (BPF_CLASS(code) == BPF_JMP32 && imm != 0)
|
|
|
|
jmp_offset = bpf2a32_offset(i + imm, i, ctx);
|
|
|
|
else if (BPF_CLASS(code) == BPF_JMP && off != 0)
|
|
|
|
jmp_offset = bpf2a32_offset(i + off, i, ctx);
|
|
|
|
else
|
2015-07-27 13:06:49 +00:00
|
|
|
break;
|
2023-09-07 23:05:42 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
check_imm24(jmp_offset);
|
|
|
|
emit(ARM_B(jmp_offset), ctx);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* tail call */
|
|
|
|
case BPF_JMP | BPF_TAIL_CALL:
|
|
|
|
if (emit_bpf_tail_call(ctx))
|
|
|
|
return -EFAULT;
|
|
|
|
break;
|
|
|
|
/* function call */
|
|
|
|
case BPF_JMP | BPF_CALL:
|
|
|
|
{
|
2018-07-11 09:31:41 +00:00
|
|
|
const s8 *r0 = bpf2a32[BPF_REG_0];
|
|
|
|
const s8 *r1 = bpf2a32[BPF_REG_1];
|
|
|
|
const s8 *r2 = bpf2a32[BPF_REG_2];
|
|
|
|
const s8 *r3 = bpf2a32[BPF_REG_3];
|
|
|
|
const s8 *r4 = bpf2a32[BPF_REG_4];
|
|
|
|
const s8 *r5 = bpf2a32[BPF_REG_5];
|
2017-08-22 06:32:33 +00:00
|
|
|
const u32 func = (u32)__bpf_call_base + (u32)imm;
|
|
|
|
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_mov_r64(true, r0, r1, ctx);
|
|
|
|
emit_a32_mov_r64(true, r1, r2, ctx);
|
2018-07-11 09:32:02 +00:00
|
|
|
emit_push_r64(r5, ctx);
|
|
|
|
emit_push_r64(r4, ctx);
|
|
|
|
emit_push_r64(r3, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
2018-07-11 09:31:47 +00:00
|
|
|
emit_a32_mov_i(tmp[1], func, ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
emit_blx_r(tmp[1], ctx);
|
|
|
|
|
|
|
|
emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* function return */
|
|
|
|
case BPF_JMP | BPF_EXIT:
|
|
|
|
/* Optimization: when last instruction is EXIT
|
|
|
|
* simply fallthrough to epilogue.
|
|
|
|
*/
|
|
|
|
if (i == ctx->prog->len - 1)
|
2012-03-16 12:37:12 +00:00
|
|
|
break;
|
2017-08-22 06:32:33 +00:00
|
|
|
jmp_offset = epilogue_offset(ctx);
|
|
|
|
check_imm24(jmp_offset);
|
|
|
|
emit(ARM_B(jmp_offset), ctx);
|
|
|
|
break;
|
|
|
|
notyet:
|
|
|
|
pr_info_once("*** NOT YET: opcode %02x ***\n", code);
|
|
|
|
return -EFAULT;
|
|
|
|
default:
|
|
|
|
pr_err_once("unknown opcode %02x\n", code);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2015-07-27 13:06:50 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
if (ctx->flags & FLAG_IMM_OVERFLOW)
|
|
|
|
/*
|
|
|
|
* this instruction generated an overflow when
|
|
|
|
* trying to access the literal pool, so
|
|
|
|
* delegate this filter to the kernel interpreter.
|
|
|
|
*/
|
|
|
|
return -1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int build_body(struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
const struct bpf_prog *prog = ctx->prog;
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i = 0; i < prog->len; i++) {
|
|
|
|
const struct bpf_insn *insn = &(prog->insnsi[i]);
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = build_insn(insn, ctx);
|
|
|
|
|
|
|
|
/* It's used with loading the 64 bit immediate value. */
|
|
|
|
if (ret > 0) {
|
|
|
|
i++;
|
|
|
|
if (ctx->target == NULL)
|
|
|
|
ctx->offsets[i] = ctx->idx;
|
|
|
|
continue;
|
2012-03-16 12:37:12 +00:00
|
|
|
}
|
2015-05-07 15:14:21 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
if (ctx->target == NULL)
|
|
|
|
ctx->offsets[i] = ctx->idx;
|
|
|
|
|
2022-03-18 10:37:04 +00:00
|
|
|
/* If unsuccesful, return with error code */
|
2017-08-22 06:32:33 +00:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2012-03-16 12:37:12 +00:00
|
|
|
}
|
2017-08-22 06:32:33 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
static int validate_code(struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < ctx->idx; i++) {
|
|
|
|
if (ctx->target[i] == __opcode_to_mem_arm(ARM_INST_UDF))
|
|
|
|
return -1;
|
|
|
|
}
|
2012-03-16 12:37:12 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-05-24 22:25:22 +00:00
|
|
|
bool bpf_jit_needs_zext(void)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
|
2012-03-16 12:37:12 +00:00
|
|
|
{
|
2017-08-22 06:32:33 +00:00
|
|
|
struct bpf_prog *tmp, *orig_prog = prog;
|
2014-09-08 06:04:48 +00:00
|
|
|
struct bpf_binary_header *header;
|
2017-08-22 06:32:33 +00:00
|
|
|
bool tmp_blinded = false;
|
2012-03-16 12:37:12 +00:00
|
|
|
struct jit_ctx ctx;
|
2017-08-22 06:32:33 +00:00
|
|
|
unsigned int tmp_idx;
|
|
|
|
unsigned int image_size;
|
|
|
|
u8 *image_ptr;
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* If BPF JIT was not enabled then we must fall back to
|
|
|
|
* the interpreter.
|
|
|
|
*/
|
2017-12-15 01:55:14 +00:00
|
|
|
if (!prog->jit_requested)
|
2017-08-22 06:32:33 +00:00
|
|
|
return orig_prog;
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* If constant blinding was enabled and we failed during blinding
|
|
|
|
* then we must fall back to the interpreter. Otherwise, we save
|
|
|
|
* the new JITed code.
|
|
|
|
*/
|
|
|
|
tmp = bpf_jit_blind_constants(prog);
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
if (IS_ERR(tmp))
|
|
|
|
return orig_prog;
|
|
|
|
if (tmp != prog) {
|
|
|
|
tmp_blinded = true;
|
|
|
|
prog = tmp;
|
|
|
|
}
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
memset(&ctx, 0, sizeof(ctx));
|
|
|
|
ctx.prog = prog;
|
2018-07-11 09:32:38 +00:00
|
|
|
ctx.cpu_architecture = cpu_architecture();
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* Not able to allocate memory for offsets[] , then
|
|
|
|
* we must fall back to the interpreter
|
|
|
|
*/
|
|
|
|
ctx.offsets = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
|
|
|
|
if (ctx.offsets == NULL) {
|
|
|
|
prog = orig_prog;
|
2012-03-16 12:37:12 +00:00
|
|
|
goto out;
|
2017-08-22 06:32:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* 1) fake pass to find in the length of the JITed code,
|
|
|
|
* to compute ctx->offsets and other context variables
|
|
|
|
* needed to compute final JITed code.
|
|
|
|
* Also, calculate random starting pointer/start of JITed code
|
|
|
|
* which is prefixed by random number of fault instructions.
|
|
|
|
*
|
|
|
|
* If the first pass fails then there is no chance of it
|
|
|
|
* being successful in the second pass, so just fall back
|
|
|
|
* to the interpreter.
|
|
|
|
*/
|
|
|
|
if (build_body(&ctx)) {
|
|
|
|
prog = orig_prog;
|
|
|
|
goto out_off;
|
|
|
|
}
|
2012-03-16 12:37:12 +00:00
|
|
|
|
|
|
|
tmp_idx = ctx.idx;
|
|
|
|
build_prologue(&ctx);
|
|
|
|
ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
ctx.epilogue_offset = ctx.idx;
|
|
|
|
|
2012-03-16 12:37:12 +00:00
|
|
|
#if __LINUX_ARM_ARCH__ < 7
|
|
|
|
tmp_idx = ctx.idx;
|
|
|
|
build_epilogue(&ctx);
|
|
|
|
ctx.epilogue_bytes = (ctx.idx - tmp_idx) * 4;
|
|
|
|
|
|
|
|
ctx.idx += ctx.imm_count;
|
|
|
|
if (ctx.imm_count) {
|
2017-08-22 06:32:33 +00:00
|
|
|
ctx.imms = kcalloc(ctx.imm_count, sizeof(u32), GFP_KERNEL);
|
|
|
|
if (ctx.imms == NULL) {
|
|
|
|
prog = orig_prog;
|
|
|
|
goto out_off;
|
|
|
|
}
|
2012-03-16 12:37:12 +00:00
|
|
|
}
|
|
|
|
#else
|
2017-08-22 06:32:33 +00:00
|
|
|
/* there's nothing about the epilogue on ARMv7 */
|
2012-03-16 12:37:12 +00:00
|
|
|
build_epilogue(&ctx);
|
|
|
|
#endif
|
2017-08-22 06:32:33 +00:00
|
|
|
/* Now we can get the actual image size of the JITed arm code.
|
|
|
|
* Currently, we are not considering the THUMB-2 instructions
|
|
|
|
* for jit, although it can decrease the size of the image.
|
|
|
|
*
|
|
|
|
* As each arm instruction is of length 32bit, we are translating
|
2022-03-18 10:37:04 +00:00
|
|
|
* number of JITed instructions into the size required to store these
|
2017-08-22 06:32:33 +00:00
|
|
|
* JITed code.
|
|
|
|
*/
|
|
|
|
image_size = sizeof(u32) * ctx.idx;
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* Now we know the size of the structure to make */
|
|
|
|
header = bpf_jit_binary_alloc(image_size, &image_ptr,
|
|
|
|
sizeof(u32), jit_fill_hole);
|
|
|
|
/* Not able to allocate memory for the structure then
|
|
|
|
* we must fall back to the interpretation
|
|
|
|
*/
|
|
|
|
if (header == NULL) {
|
|
|
|
prog = orig_prog;
|
|
|
|
goto out_imms;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* 2.) Actual pass to generate final JIT code */
|
|
|
|
ctx.target = (u32 *) image_ptr;
|
2012-03-16 12:37:12 +00:00
|
|
|
ctx.idx = 0;
|
2014-09-08 06:04:48 +00:00
|
|
|
|
2012-03-16 12:37:12 +00:00
|
|
|
build_prologue(&ctx);
|
2017-08-22 06:32:33 +00:00
|
|
|
|
|
|
|
/* If building the body of the JITed code fails somehow,
|
|
|
|
* we fall back to the interpretation.
|
|
|
|
*/
|
2024-03-08 05:38:08 +00:00
|
|
|
if (build_body(&ctx) < 0)
|
|
|
|
goto out_free;
|
2012-03-16 12:37:12 +00:00
|
|
|
build_epilogue(&ctx);
|
|
|
|
|
2017-08-22 06:32:33 +00:00
|
|
|
/* 3.) Extra pass to validate JITed Code */
|
2024-03-08 05:38:08 +00:00
|
|
|
if (validate_code(&ctx))
|
|
|
|
goto out_free;
|
2015-11-14 00:26:53 +00:00
|
|
|
flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx));
|
2012-03-16 12:37:12 +00:00
|
|
|
|
|
|
|
if (bpf_jit_enable > 1)
|
2013-03-21 21:22:03 +00:00
|
|
|
/* there are 2 passes here */
|
2017-08-22 06:32:33 +00:00
|
|
|
bpf_jit_dump(prog->len, image_size, 2, ctx.target);
|
2012-03-16 12:37:12 +00:00
|
|
|
|
2024-03-08 05:38:08 +00:00
|
|
|
if (bpf_jit_binary_lock_ro(header))
|
|
|
|
goto out_free;
|
2017-08-22 06:32:33 +00:00
|
|
|
prog->bpf_func = (void *)ctx.target;
|
|
|
|
prog->jited = 1;
|
|
|
|
prog->jited_len = image_size;
|
|
|
|
|
|
|
|
out_imms:
|
|
|
|
#if __LINUX_ARM_ARCH__ < 7
|
|
|
|
if (ctx.imm_count)
|
|
|
|
kfree(ctx.imms);
|
|
|
|
#endif
|
|
|
|
out_off:
|
2012-03-16 12:37:12 +00:00
|
|
|
kfree(ctx.offsets);
|
2017-08-22 06:32:33 +00:00
|
|
|
out:
|
|
|
|
if (tmp_blinded)
|
|
|
|
bpf_jit_prog_release_other(prog, prog == orig_prog ?
|
|
|
|
tmp : orig_prog);
|
|
|
|
return prog;
|
2024-03-08 05:38:08 +00:00
|
|
|
|
|
|
|
out_free:
|
|
|
|
image_ptr = NULL;
|
|
|
|
bpf_jit_binary_free(header);
|
|
|
|
prog = orig_prog;
|
|
|
|
goto out_imms;
|
2012-03-16 12:37:12 +00:00
|
|
|
}
|
|
|
|
|