/* * Just-In-Time compiler for BPF filters on 32bit ARM * * Copyright (c) 2011 Mircea Gherzan * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; version 2 of the License. */ #include #include #include #include #include #include #include #include #include #include #include #include "bpf_jit_32.h" /* * ABI: * * r0 scratch register * r4 BPF register A * r5 BPF register X * r6 pointer to the skb * r7 skb->data * r8 skb_headlen(skb) */ #define r_scratch ARM_R0 /* r1-r3 are (also) used for the unaligned loads on the non-ARMv7 slowpath */ #define r_off ARM_R1 #define r_A ARM_R4 #define r_X ARM_R5 #define r_skb ARM_R6 #define r_skb_data ARM_R7 #define r_skb_hl ARM_R8 #define SCRATCH_SP_OFFSET 0 #define SCRATCH_OFF(k) (SCRATCH_SP_OFFSET + 4 * (k)) #define SEEN_MEM ((1 << BPF_MEMWORDS) - 1) #define SEEN_MEM_WORD(k) (1 << (k)) #define SEEN_X (1 << BPF_MEMWORDS) #define SEEN_CALL (1 << (BPF_MEMWORDS + 1)) #define SEEN_SKB (1 << (BPF_MEMWORDS + 2)) #define SEEN_DATA (1 << (BPF_MEMWORDS + 3)) #define FLAG_NEED_X_RESET (1 << 0) struct jit_ctx { const struct sk_filter *skf; unsigned idx; unsigned prologue_bytes; int ret0_fp_idx; u32 seen; u32 flags; u32 *offsets; u32 *target; #if __LINUX_ARM_ARCH__ < 7 u16 epilogue_bytes; u16 imm_count; u32 *imms; #endif }; int bpf_jit_enable __read_mostly; static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset) { u8 ret; int err; err = skb_copy_bits(skb, offset, &ret, 1); return (u64)err << 32 | ret; } static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset) { u16 ret; int err; err = skb_copy_bits(skb, offset, &ret, 2); return (u64)err << 32 | ntohs(ret); } static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset) { u32 ret; int err; err = skb_copy_bits(skb, offset, &ret, 4); return (u64)err << 32 | ntohl(ret); } /* * Wrapper that handles both OABI and EABI and assures Thumb2 interworking * (where the assembly routines like __aeabi_uidiv could cause problems). */ static u32 jit_udiv(u32 dividend, u32 divisor) { return dividend / divisor; } static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) { if (ctx->target != NULL) ctx->target[ctx->idx] = inst | (cond << 28); ctx->idx++; } /* * Emit an instruction that will be executed unconditionally. */ static inline void emit(u32 inst, struct jit_ctx *ctx) { _emit(ARM_COND_AL, inst, ctx); } static u16 saved_regs(struct jit_ctx *ctx) { u16 ret = 0; if ((ctx->skf->len > 1) || (ctx->skf->insns[0].code == BPF_S_RET_A)) ret |= 1 << r_A; #ifdef CONFIG_FRAME_POINTER ret |= (1 << ARM_FP) | (1 << ARM_IP) | (1 << ARM_LR) | (1 << ARM_PC); #else if (ctx->seen & SEEN_CALL) ret |= 1 << ARM_LR; #endif if (ctx->seen & (SEEN_DATA | SEEN_SKB)) ret |= 1 << r_skb; if (ctx->seen & SEEN_DATA) ret |= (1 << r_skb_data) | (1 << r_skb_hl); if (ctx->seen & SEEN_X) ret |= 1 << r_X; return ret; } static inline int mem_words_used(struct jit_ctx *ctx) { /* yes, we do waste some stack space IF there are "holes" in the set" */ return fls(ctx->seen & SEEN_MEM); } static inline bool is_load_to_a(u16 inst) { switch (inst) { case BPF_S_LD_W_LEN: case BPF_S_LD_W_ABS: case BPF_S_LD_H_ABS: case BPF_S_LD_B_ABS: case BPF_S_ANC_CPU: case BPF_S_ANC_IFINDEX: case BPF_S_ANC_MARK: case BPF_S_ANC_PROTOCOL: case BPF_S_ANC_RXHASH: case BPF_S_ANC_VLAN_TAG: case BPF_S_ANC_VLAN_TAG_PRESENT: case BPF_S_ANC_QUEUE: return true; default: return false; } } static void build_prologue(struct jit_ctx *ctx) { u16 reg_set = saved_regs(ctx); u16 first_inst = ctx->skf->insns[0].code; u16 off; #ifdef CONFIG_FRAME_POINTER emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx); emit(ARM_PUSH(reg_set), ctx); emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx); #else if (reg_set) emit(ARM_PUSH(reg_set), ctx); #endif if (ctx->seen & (SEEN_DATA | SEEN_SKB)) emit(ARM_MOV_R(r_skb, ARM_R0), ctx); if (ctx->seen & SEEN_DATA) { off = offsetof(struct sk_buff, data); emit(ARM_LDR_I(r_skb_data, r_skb, off), ctx); /* headlen = len - data_len */ off = offsetof(struct sk_buff, len); emit(ARM_LDR_I(r_skb_hl, r_skb, off), ctx); off = offsetof(struct sk_buff, data_len); emit(ARM_LDR_I(r_scratch, r_skb, off), ctx); emit(ARM_SUB_R(r_skb_hl, r_skb_hl, r_scratch), ctx); }