Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: filter: BPF 'JIT' compiler for PPC64

An implementation of a code generator for BPF programs to speed up packet
filtering on PPC64, inspired by Eric Dumazet's x86-64 version.

Filter code is generated as an ABI-compliant function in module_alloc()'d mem
with stackframe & prologue/epilogue generated if required (simple filters don't
need anything more than an li/blr). The filter's local variables, M[], live in
registers. Supports all BPF opcodes, although "complicated" loads from negative
packet offsets (e.g. SKF_LL_OFF) are not yet supported.

There are a couple of further optimisations left for future work; many-pass
assembly with branch-reach reduction and a register allocator to push M[]
variables into volatile registers would improve the code quality further.

This currently supports big-endian 64-bit PowerPC only (but is fairly simple
to port to PPC32 or LE!).

Enabled in the same way as x86-64:

echo 1 > /proc/sys/net/core/bpf_jit_enable

Or, enabled with extra debug output:

echo 2 > /proc/sys/net/core/bpf_jit_enable

Signed-off-by: Matt Evans <matt@ozlabs.org>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Matt Evans and committed by
David S. Miller
0ca87f05 3aeb7d22

+1106 -1
+1
arch/powerpc/Kconfig
··· 134 134 select GENERIC_IRQ_SHOW_LEVEL 135 135 select HAVE_RCU_TABLE_FREE if SMP 136 136 select HAVE_SYSCALL_TRACEPOINTS 137 + select HAVE_BPF_JIT if PPC64 137 138 138 139 config EARLY_PRINTK 139 140 bool
+2 -1
arch/powerpc/Makefile
··· 154 154 arch/powerpc/lib/ \ 155 155 arch/powerpc/sysdev/ \ 156 156 arch/powerpc/platforms/ \ 157 - arch/powerpc/math-emu/ 157 + arch/powerpc/math-emu/ \ 158 + arch/powerpc/net/ 158 159 core-$(CONFIG_XMON) += arch/powerpc/xmon/ 159 160 core-$(CONFIG_KVM) += arch/powerpc/kvm/ 160 161
+40
arch/powerpc/include/asm/ppc-opcode.h
··· 71 71 #define PPC_INST_ERATSX 0x7c000126 72 72 #define PPC_INST_ERATSX_DOT 0x7c000127 73 73 74 + /* Misc instructions for BPF compiler */ 75 + #define PPC_INST_LD 0xe8000000 76 + #define PPC_INST_LHZ 0xa0000000 77 + #define PPC_INST_LWZ 0x80000000 78 + #define PPC_INST_STD 0xf8000000 79 + #define PPC_INST_STDU 0xf8000001 80 + #define PPC_INST_MFLR 0x7c0802a6 81 + #define PPC_INST_MTLR 0x7c0803a6 82 + #define PPC_INST_CMPWI 0x2c000000 83 + #define PPC_INST_CMPDI 0x2c200000 84 + #define PPC_INST_CMPLW 0x7c000040 85 + #define PPC_INST_CMPLWI 0x28000000 86 + #define PPC_INST_ADDI 0x38000000 87 + #define PPC_INST_ADDIS 0x3c000000 88 + #define PPC_INST_ADD 0x7c000214 89 + #define PPC_INST_SUB 0x7c000050 90 + #define PPC_INST_BLR 0x4e800020 91 + #define PPC_INST_BLRL 0x4e800021 92 + #define PPC_INST_MULLW 0x7c0001d6 93 + #define PPC_INST_MULHWU 0x7c000016 94 + #define PPC_INST_MULLI 0x1c000000 95 + #define PPC_INST_DIVWU 0x7c0003d6 96 + #define PPC_INST_RLWINM 0x54000000 97 + #define PPC_INST_RLDICR 0x78000004 98 + #define PPC_INST_SLW 0x7c000030 99 + #define PPC_INST_SRW 0x7c000430 100 + #define PPC_INST_AND 0x7c000038 101 + #define PPC_INST_ANDDOT 0x7c000039 102 + #define PPC_INST_OR 0x7c000378 103 + #define PPC_INST_ANDI 0x70000000 104 + #define PPC_INST_ORI 0x60000000 105 + #define PPC_INST_ORIS 0x64000000 106 + #define PPC_INST_NEG 0x7c0000d0 107 + #define PPC_INST_BRANCH 0x48000000 108 + #define PPC_INST_BRANCH_COND 0x40800000 109 + 74 110 /* macros to insert fields into opcodes */ 75 111 #define __PPC_RA(a) (((a) & 0x1f) << 16) 76 112 #define __PPC_RB(b) (((b) & 0x1f) << 11) ··· 119 83 #define __PPC_T_TLB(t) (((t) & 0x3) << 21) 120 84 #define __PPC_WC(w) (((w) & 0x3) << 21) 121 85 #define __PPC_WS(w) (((w) & 0x1f) << 11) 86 + #define __PPC_SH(s) __PPC_WS(s) 87 + #define __PPC_MB(s) (((s) & 0x1f) << 6) 88 + #define __PPC_ME(s) (((s) & 0x1f) << 1) 89 + #define __PPC_BI(s) (((s) & 0x1f) << 16) 122 90 123 91 /* 124 92 * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a
+4
arch/powerpc/net/Makefile
··· 1 + # 2 + # Arch-specific network modules 3 + # 4 + obj-$(CONFIG_BPF_JIT) += bpf_jit_64.o bpf_jit_comp.o
+227
arch/powerpc/net/bpf_jit.h
··· 1 + /* bpf_jit.h: BPF JIT compiler for PPC64 2 + * 3 + * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation 4 + * 5 + * This program is free software; you can redistribute it and/or 6 + * modify it under the terms of the GNU General Public License 7 + * as published by the Free Software Foundation; version 2 8 + * of the License. 9 + */ 10 + #ifndef _BPF_JIT_H 11 + #define _BPF_JIT_H 12 + 13 + #define BPF_PPC_STACK_LOCALS 32 14 + #define BPF_PPC_STACK_BASIC (48+64) 15 + #define BPF_PPC_STACK_SAVE (18*8) 16 + #define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \ 17 + BPF_PPC_STACK_SAVE) 18 + #define BPF_PPC_SLOWPATH_FRAME (48+64) 19 + 20 + /* 21 + * Generated code register usage: 22 + * 23 + * As normal PPC C ABI (e.g. r1=sp, r2=TOC), with: 24 + * 25 + * skb r3 (Entry parameter) 26 + * A register r4 27 + * X register r5 28 + * addr param r6 29 + * r7-r10 scratch 30 + * skb->data r14 31 + * skb headlen r15 (skb->len - skb->data_len) 32 + * m[0] r16 33 + * m[...] ... 34 + * m[15] r31 35 + */ 36 + #define r_skb 3 37 + #define r_ret 3 38 + #define r_A 4 39 + #define r_X 5 40 + #define r_addr 6 41 + #define r_scratch1 7 42 + #define r_D 14 43 + #define r_HL 15 44 + #define r_M 16 45 + 46 + #ifndef __ASSEMBLY__ 47 + 48 + /* 49 + * Assembly helpers from arch/powerpc/net/bpf_jit.S: 50 + */ 51 + extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; 52 + 53 + #define FUNCTION_DESCR_SIZE 24 54 + 55 + /* 56 + * 16-bit immediate helper macros: HA() is for use with sign-extending instrs 57 + * (e.g. LD, ADDI). If the bottom 16 bits is "-ve", add another bit into the 58 + * top half to negate the effect (i.e. 0xffff + 1 = 0x(1)0000). 59 + */ 60 + #define IMM_H(i) ((uintptr_t)(i)>>16) 61 + #define IMM_HA(i) (((uintptr_t)(i)>>16) + \ 62 + (((uintptr_t)(i) & 0x8000) >> 15)) 63 + #define IMM_L(i) ((uintptr_t)(i) & 0xffff) 64 + 65 + #define PLANT_INSTR(d, idx, instr) \ 66 + do { if (d) { (d)[idx] = instr; } idx++; } while (0) 67 + #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr) 68 + 69 + #define PPC_NOP() EMIT(PPC_INST_NOP) 70 + #define PPC_BLR() EMIT(PPC_INST_BLR) 71 + #define PPC_BLRL() EMIT(PPC_INST_BLRL) 72 + #define PPC_MTLR(r) EMIT(PPC_INST_MTLR | __PPC_RT(r)) 73 + #define PPC_ADDI(d, a, i) EMIT(PPC_INST_ADDI | __PPC_RT(d) | \ 74 + __PPC_RA(a) | IMM_L(i)) 75 + #define PPC_MR(d, a) PPC_OR(d, a, a) 76 + #define PPC_LI(r, i) PPC_ADDI(r, 0, i) 77 + #define PPC_ADDIS(d, a, i) EMIT(PPC_INST_ADDIS | \ 78 + __PPC_RS(d) | __PPC_RA(a) | IMM_L(i)) 79 + #define PPC_LIS(r, i) PPC_ADDIS(r, 0, i) 80 + #define PPC_STD(r, base, i) EMIT(PPC_INST_STD | __PPC_RS(r) | \ 81 + __PPC_RA(base) | ((i) & 0xfffc)) 82 + 83 + #define PPC_LD(r, base, i) EMIT(PPC_INST_LD | __PPC_RT(r) | \ 84 + __PPC_RA(base) | IMM_L(i)) 85 + #define PPC_LWZ(r, base, i) EMIT(PPC_INST_LWZ | __PPC_RT(r) | \ 86 + __PPC_RA(base) | IMM_L(i)) 87 + #define PPC_LHZ(r, base, i) EMIT(PPC_INST_LHZ | __PPC_RT(r) | \ 88 + __PPC_RA(base) | IMM_L(i)) 89 + /* Convenience helpers for the above with 'far' offsets: */ 90 + #define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) PPC_LD(r, base, i); \ 91 + else { PPC_ADDIS(r, base, IMM_HA(i)); \ 92 + PPC_LD(r, r, IMM_L(i)); } } while(0) 93 + 94 + #define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LWZ(r, base, i); \ 95 + else { PPC_ADDIS(r, base, IMM_HA(i)); \ 96 + PPC_LWZ(r, r, IMM_L(i)); } } while(0) 97 + 98 + #define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LHZ(r, base, i); \ 99 + else { PPC_ADDIS(r, base, IMM_HA(i)); \ 100 + PPC_LHZ(r, r, IMM_L(i)); } } while(0) 101 + 102 + #define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | __PPC_RA(a) | IMM_L(i)) 103 + #define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | __PPC_RA(a) | IMM_L(i)) 104 + #define PPC_CMPLWI(a, i) EMIT(PPC_INST_CMPLWI | __PPC_RA(a) | IMM_L(i)) 105 + #define PPC_CMPLW(a, b) EMIT(PPC_INST_CMPLW | __PPC_RA(a) | __PPC_RB(b)) 106 + 107 + #define PPC_SUB(d, a, b) EMIT(PPC_INST_SUB | __PPC_RT(d) | \ 108 + __PPC_RB(a) | __PPC_RA(b)) 109 + #define PPC_ADD(d, a, b) EMIT(PPC_INST_ADD | __PPC_RT(d) | \ 110 + __PPC_RA(a) | __PPC_RB(b)) 111 + #define PPC_MUL(d, a, b) EMIT(PPC_INST_MULLW | __PPC_RT(d) | \ 112 + __PPC_RA(a) | __PPC_RB(b)) 113 + #define PPC_MULHWU(d, a, b) EMIT(PPC_INST_MULHWU | __PPC_RT(d) | \ 114 + __PPC_RA(a) | __PPC_RB(b)) 115 + #define PPC_MULI(d, a, i) EMIT(PPC_INST_MULLI | __PPC_RT(d) | \ 116 + __PPC_RA(a) | IMM_L(i)) 117 + #define PPC_DIVWU(d, a, b) EMIT(PPC_INST_DIVWU | __PPC_RT(d) | \ 118 + __PPC_RA(a) | __PPC_RB(b)) 119 + #define PPC_AND(d, a, b) EMIT(PPC_INST_AND | __PPC_RA(d) | \ 120 + __PPC_RS(a) | __PPC_RB(b)) 121 + #define PPC_ANDI(d, a, i) EMIT(PPC_INST_ANDI | __PPC_RA(d) | \ 122 + __PPC_RS(a) | IMM_L(i)) 123 + #define PPC_AND_DOT(d, a, b) EMIT(PPC_INST_ANDDOT | __PPC_RA(d) | \ 124 + __PPC_RS(a) | __PPC_RB(b)) 125 + #define PPC_OR(d, a, b) EMIT(PPC_INST_OR | __PPC_RA(d) | \ 126 + __PPC_RS(a) | __PPC_RB(b)) 127 + #define PPC_ORI(d, a, i) EMIT(PPC_INST_ORI | __PPC_RA(d) | \ 128 + __PPC_RS(a) | IMM_L(i)) 129 + #define PPC_ORIS(d, a, i) EMIT(PPC_INST_ORIS | __PPC_RA(d) | \ 130 + __PPC_RS(a) | IMM_L(i)) 131 + #define PPC_SLW(d, a, s) EMIT(PPC_INST_SLW | __PPC_RA(d) | \ 132 + __PPC_RS(a) | __PPC_RB(s)) 133 + #define PPC_SRW(d, a, s) EMIT(PPC_INST_SRW | __PPC_RA(d) | \ 134 + __PPC_RS(a) | __PPC_RB(s)) 135 + /* slwi = rlwinm Rx, Ry, n, 0, 31-n */ 136 + #define PPC_SLWI(d, a, i) EMIT(PPC_INST_RLWINM | __PPC_RA(d) | \ 137 + __PPC_RS(a) | __PPC_SH(i) | \ 138 + __PPC_MB(0) | __PPC_ME(31-(i))) 139 + /* srwi = rlwinm Rx, Ry, 32-n, n, 31 */ 140 + #define PPC_SRWI(d, a, i) EMIT(PPC_INST_RLWINM | __PPC_RA(d) | \ 141 + __PPC_RS(a) | __PPC_SH(32-(i)) | \ 142 + __PPC_MB(i) | __PPC_ME(31)) 143 + /* sldi = rldicr Rx, Ry, n, 63-n */ 144 + #define PPC_SLDI(d, a, i) EMIT(PPC_INST_RLDICR | __PPC_RA(d) | \ 145 + __PPC_RS(a) | __PPC_SH(i) | \ 146 + __PPC_MB(63-(i)) | (((i) & 0x20) >> 4)) 147 + #define PPC_NEG(d, a) EMIT(PPC_INST_NEG | __PPC_RT(d) | __PPC_RA(a)) 148 + 149 + /* Long jump; (unconditional 'branch') */ 150 + #define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \ 151 + (((dest) - (ctx->idx * 4)) & 0x03fffffc)) 152 + /* "cond" here covers BO:BI fields. */ 153 + #define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \ 154 + (((cond) & 0x3ff) << 16) | \ 155 + (((dest) - (ctx->idx * 4)) & \ 156 + 0xfffc)) 157 + #define PPC_LI32(d, i) do { PPC_LI(d, IMM_L(i)); \ 158 + if ((u32)(uintptr_t)(i) >= 32768) { \ 159 + PPC_ADDIS(d, d, IMM_HA(i)); \ 160 + } } while(0) 161 + #define PPC_LI64(d, i) do { \ 162 + if (!((uintptr_t)(i) & 0xffffffff00000000ULL)) \ 163 + PPC_LI32(d, i); \ 164 + else { \ 165 + PPC_LIS(d, ((uintptr_t)(i) >> 48)); \ 166 + if ((uintptr_t)(i) & 0x0000ffff00000000ULL) \ 167 + PPC_ORI(d, d, \ 168 + ((uintptr_t)(i) >> 32) & 0xffff); \ 169 + PPC_SLDI(d, d, 32); \ 170 + if ((uintptr_t)(i) & 0x00000000ffff0000ULL) \ 171 + PPC_ORIS(d, d, \ 172 + ((uintptr_t)(i) >> 16) & 0xffff); \ 173 + if ((uintptr_t)(i) & 0x000000000000ffffULL) \ 174 + PPC_ORI(d, d, (uintptr_t)(i) & 0xffff); \ 175 + } } while (0); 176 + 177 + static inline bool is_nearbranch(int offset) 178 + { 179 + return (offset < 32768) && (offset >= -32768); 180 + } 181 + 182 + /* 183 + * The fly in the ointment of code size changing from pass to pass is 184 + * avoided by padding the short branch case with a NOP. If code size differs 185 + * with different branch reaches we will have the issue of code moving from 186 + * one pass to the next and will need a few passes to converge on a stable 187 + * state. 188 + */ 189 + #define PPC_BCC(cond, dest) do { \ 190 + if (is_nearbranch((dest) - (ctx->idx * 4))) { \ 191 + PPC_BCC_SHORT(cond, dest); \ 192 + PPC_NOP(); \ 193 + } else { \ 194 + /* Flip the 'T or F' bit to invert comparison */ \ 195 + PPC_BCC_SHORT(cond ^ COND_CMP_TRUE, (ctx->idx+2)*4); \ 196 + PPC_JMP(dest); \ 197 + } } while(0) 198 + 199 + /* To create a branch condition, select a bit of cr0... */ 200 + #define CR0_LT 0 201 + #define CR0_GT 1 202 + #define CR0_EQ 2 203 + /* ...and modify BO[3] */ 204 + #define COND_CMP_TRUE 0x100 205 + #define COND_CMP_FALSE 0x000 206 + /* Together, they make all required comparisons: */ 207 + #define COND_GT (CR0_GT | COND_CMP_TRUE) 208 + #define COND_GE (CR0_LT | COND_CMP_FALSE) 209 + #define COND_EQ (CR0_EQ | COND_CMP_TRUE) 210 + #define COND_NE (CR0_EQ | COND_CMP_FALSE) 211 + #define COND_LT (CR0_LT | COND_CMP_TRUE) 212 + 213 + #define SEEN_DATAREF 0x10000 /* might call external helpers */ 214 + #define SEEN_XREG 0x20000 /* X reg is used */ 215 + #define SEEN_MEM 0x40000 /* SEEN_MEM+(1<<n) = use mem[n] for temporary 216 + * storage */ 217 + #define SEEN_MEM_MSK 0x0ffff 218 + 219 + struct codegen_context { 220 + unsigned int seen; 221 + unsigned int idx; 222 + int pc_ret0; /* bpf index of first RET #0 instruction (if any) */ 223 + }; 224 + 225 + #endif 226 + 227 + #endif
+138
arch/powerpc/net/bpf_jit_64.S
··· 1 + /* bpf_jit.S: Packet/header access helper functions 2 + * for PPC64 BPF compiler. 3 + * 4 + * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; version 2 9 + * of the License. 10 + */ 11 + 12 + #include <asm/ppc_asm.h> 13 + #include "bpf_jit.h" 14 + 15 + /* 16 + * All of these routines are called directly from generated code, 17 + * whose register usage is: 18 + * 19 + * r3 skb 20 + * r4,r5 A,X 21 + * r6 *** address parameter to helper *** 22 + * r7-r10 scratch 23 + * r14 skb->data 24 + * r15 skb headlen 25 + * r16-31 M[] 26 + */ 27 + 28 + /* 29 + * To consider: These helpers are so small it could be better to just 30 + * generate them inline. Inline code can do the simple headlen check 31 + * then branch directly to slow_path_XXX if required. (In fact, could 32 + * load a spare GPR with the address of slow_path_generic and pass size 33 + * as an argument, making the call site a mtlr, li and bllr.) 34 + * 35 + * Technically, the "is addr < 0" check is unnecessary & slowing down 36 + * the ABS path, as it's statically checked on generation. 37 + */ 38 + .globl sk_load_word 39 + sk_load_word: 40 + cmpdi r_addr, 0 41 + blt bpf_error 42 + /* Are we accessing past headlen? */ 43 + subi r_scratch1, r_HL, 4 44 + cmpd r_scratch1, r_addr 45 + blt bpf_slow_path_word 46 + /* Nope, just hitting the header. cr0 here is eq or gt! */ 47 + lwzx r_A, r_D, r_addr 48 + /* When big endian we don't need to byteswap. */ 49 + blr /* Return success, cr0 != LT */ 50 + 51 + .globl sk_load_half 52 + sk_load_half: 53 + cmpdi r_addr, 0 54 + blt bpf_error 55 + subi r_scratch1, r_HL, 2 56 + cmpd r_scratch1, r_addr 57 + blt bpf_slow_path_half 58 + lhzx r_A, r_D, r_addr 59 + blr 60 + 61 + .globl sk_load_byte 62 + sk_load_byte: 63 + cmpdi r_addr, 0 64 + blt bpf_error 65 + cmpd r_HL, r_addr 66 + ble bpf_slow_path_byte 67 + lbzx r_A, r_D, r_addr 68 + blr 69 + 70 + /* 71 + * BPF_S_LDX_B_MSH: ldxb 4*([offset]&0xf) 72 + * r_addr is the offset value, already known positive 73 + */ 74 + .globl sk_load_byte_msh 75 + sk_load_byte_msh: 76 + cmpd r_HL, r_addr 77 + ble bpf_slow_path_byte_msh 78 + lbzx r_X, r_D, r_addr 79 + rlwinm r_X, r_X, 2, 32-4-2, 31-2 80 + blr 81 + 82 + bpf_error: 83 + /* Entered with cr0 = lt */ 84 + li r3, 0 85 + /* Generated code will 'blt epilogue', returning 0. */ 86 + blr 87 + 88 + /* Call out to skb_copy_bits: 89 + * We'll need to back up our volatile regs first; we have 90 + * local variable space at r1+(BPF_PPC_STACK_BASIC). 91 + * Allocate a new stack frame here to remain ABI-compliant in 92 + * stashing LR. 93 + */ 94 + #define bpf_slow_path_common(SIZE) \ 95 + mflr r0; \ 96 + std r0, 16(r1); \ 97 + /* R3 goes in parameter space of caller's frame */ \ 98 + std r_skb, (BPF_PPC_STACKFRAME+48)(r1); \ 99 + std r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \ 100 + std r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \ 101 + addi r5, r1, BPF_PPC_STACK_BASIC+(2*8); \ 102 + stdu r1, -BPF_PPC_SLOWPATH_FRAME(r1); \ 103 + /* R3 = r_skb, as passed */ \ 104 + mr r4, r_addr; \ 105 + li r6, SIZE; \ 106 + bl skb_copy_bits; \ 107 + /* R3 = 0 on success */ \ 108 + addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \ 109 + ld r0, 16(r1); \ 110 + ld r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \ 111 + ld r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \ 112 + mtlr r0; \ 113 + cmpdi r3, 0; \ 114 + blt bpf_error; /* cr0 = LT */ \ 115 + ld r_skb, (BPF_PPC_STACKFRAME+48)(r1); \ 116 + /* Great success! */ 117 + 118 + bpf_slow_path_word: 119 + bpf_slow_path_common(4) 120 + /* Data value is on stack, and cr0 != LT */ 121 + lwz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1) 122 + blr 123 + 124 + bpf_slow_path_half: 125 + bpf_slow_path_common(2) 126 + lhz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1) 127 + blr 128 + 129 + bpf_slow_path_byte: 130 + bpf_slow_path_common(1) 131 + lbz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1) 132 + blr 133 + 134 + bpf_slow_path_byte_msh: 135 + bpf_slow_path_common(1) 136 + lbz r_X, BPF_PPC_STACK_BASIC+(2*8)(r1) 137 + rlwinm r_X, r_X, 2, 32-4-2, 31-2 138 + blr
+694
arch/powerpc/net/bpf_jit_comp.c
··· 1 + /* bpf_jit_comp.c: BPF JIT compiler for PPC64 2 + * 3 + * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation 4 + * 5 + * Based on the x86 BPF compiler, by Eric Dumazet (eric.dumazet@gmail.com) 6 + * 7 + * This program is free software; you can redistribute it and/or 8 + * modify it under the terms of the GNU General Public License 9 + * as published by the Free Software Foundation; version 2 10 + * of the License. 11 + */ 12 + #include <linux/moduleloader.h> 13 + #include <asm/cacheflush.h> 14 + #include <linux/netdevice.h> 15 + #include <linux/filter.h> 16 + #include "bpf_jit.h" 17 + 18 + #ifndef __BIG_ENDIAN 19 + /* There are endianness assumptions herein. */ 20 + #error "Little-endian PPC not supported in BPF compiler" 21 + #endif 22 + 23 + int bpf_jit_enable __read_mostly; 24 + 25 + 26 + static inline void bpf_flush_icache(void *start, void *end) 27 + { 28 + smp_wmb(); 29 + flush_icache_range((unsigned long)start, (unsigned long)end); 30 + } 31 + 32 + static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image, 33 + struct codegen_context *ctx) 34 + { 35 + int i; 36 + const struct sock_filter *filter = fp->insns; 37 + 38 + if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) { 39 + /* Make stackframe */ 40 + if (ctx->seen & SEEN_DATAREF) { 41 + /* If we call any helpers (for loads), save LR */ 42 + EMIT(PPC_INST_MFLR | __PPC_RT(0)); 43 + PPC_STD(0, 1, 16); 44 + 45 + /* Back up non-volatile regs. */ 46 + PPC_STD(r_D, 1, -(8*(32-r_D))); 47 + PPC_STD(r_HL, 1, -(8*(32-r_HL))); 48 + } 49 + if (ctx->seen & SEEN_MEM) { 50 + /* 51 + * Conditionally save regs r15-r31 as some will be used 52 + * for M[] data. 53 + */ 54 + for (i = r_M; i < (r_M+16); i++) { 55 + if (ctx->seen & (1 << (i-r_M))) 56 + PPC_STD(i, 1, -(8*(32-i))); 57 + } 58 + } 59 + EMIT(PPC_INST_STDU | __PPC_RS(1) | __PPC_RA(1) | 60 + (-BPF_PPC_STACKFRAME & 0xfffc)); 61 + } 62 + 63 + if (ctx->seen & SEEN_DATAREF) { 64 + /* 65 + * If this filter needs to access skb data, 66 + * prepare r_D and r_HL: 67 + * r_HL = skb->len - skb->data_len 68 + * r_D = skb->data 69 + */ 70 + PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, 71 + data_len)); 72 + PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len)); 73 + PPC_SUB(r_HL, r_HL, r_scratch1); 74 + PPC_LD_OFFS(r_D, r_skb, offsetof(struct sk_buff, data)); 75 + } 76 + 77 + if (ctx->seen & SEEN_XREG) { 78 + /* 79 + * TODO: Could also detect whether first instr. sets X and 80 + * avoid this (as below, with A). 81 + */ 82 + PPC_LI(r_X, 0); 83 + } 84 + 85 + switch (filter[0].code) { 86 + case BPF_S_RET_K: 87 + case BPF_S_LD_W_LEN: 88 + case BPF_S_ANC_PROTOCOL: 89 + case BPF_S_ANC_IFINDEX: 90 + case BPF_S_ANC_MARK: 91 + case BPF_S_ANC_RXHASH: 92 + case BPF_S_ANC_CPU: 93 + case BPF_S_ANC_QUEUE: 94 + case BPF_S_LD_W_ABS: 95 + case BPF_S_LD_H_ABS: 96 + case BPF_S_LD_B_ABS: 97 + /* first instruction sets A register (or is RET 'constant') */ 98 + break; 99 + default: 100 + /* make sure we dont leak kernel information to user */ 101 + PPC_LI(r_A, 0); 102 + } 103 + } 104 + 105 + static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) 106 + { 107 + int i; 108 + 109 + if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) { 110 + PPC_ADDI(1, 1, BPF_PPC_STACKFRAME); 111 + if (ctx->seen & SEEN_DATAREF) { 112 + PPC_LD(0, 1, 16); 113 + PPC_MTLR(0); 114 + PPC_LD(r_D, 1, -(8*(32-r_D))); 115 + PPC_LD(r_HL, 1, -(8*(32-r_HL))); 116 + } 117 + if (ctx->seen & SEEN_MEM) { 118 + /* Restore any saved non-vol registers */ 119 + for (i = r_M; i < (r_M+16); i++) { 120 + if (ctx->seen & (1 << (i-r_M))) 121 + PPC_LD(i, 1, -(8*(32-i))); 122 + } 123 + } 124 + } 125 + /* The RETs have left a return value in R3. */ 126 + 127 + PPC_BLR(); 128 + } 129 + 130 + /* Assemble the body code between the prologue & epilogue. */ 131 + static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, 132 + struct codegen_context *ctx, 133 + unsigned int *addrs) 134 + { 135 + const struct sock_filter *filter = fp->insns; 136 + int flen = fp->len; 137 + u8 *func; 138 + unsigned int true_cond; 139 + int i; 140 + 141 + /* Start of epilogue code */ 142 + unsigned int exit_addr = addrs[flen]; 143 + 144 + for (i = 0; i < flen; i++) { 145 + unsigned int K = filter[i].k; 146 + 147 + /* 148 + * addrs[] maps a BPF bytecode address into a real offset from 149 + * the start of the body code. 150 + */ 151 + addrs[i] = ctx->idx * 4; 152 + 153 + switch (filter[i].code) { 154 + /*** ALU ops ***/ 155 + case BPF_S_ALU_ADD_X: /* A += X; */ 156 + ctx->seen |= SEEN_XREG; 157 + PPC_ADD(r_A, r_A, r_X); 158 + break; 159 + case BPF_S_ALU_ADD_K: /* A += K; */ 160 + if (!K) 161 + break; 162 + PPC_ADDI(r_A, r_A, IMM_L(K)); 163 + if (K >= 32768) 164 + PPC_ADDIS(r_A, r_A, IMM_HA(K)); 165 + break; 166 + case BPF_S_ALU_SUB_X: /* A -= X; */ 167 + ctx->seen |= SEEN_XREG; 168 + PPC_SUB(r_A, r_A, r_X); 169 + break; 170 + case BPF_S_ALU_SUB_K: /* A -= K */ 171 + if (!K) 172 + break; 173 + PPC_ADDI(r_A, r_A, IMM_L(-K)); 174 + if (K >= 32768) 175 + PPC_ADDIS(r_A, r_A, IMM_HA(-K)); 176 + break; 177 + case BPF_S_ALU_MUL_X: /* A *= X; */ 178 + ctx->seen |= SEEN_XREG; 179 + PPC_MUL(r_A, r_A, r_X); 180 + break; 181 + case BPF_S_ALU_MUL_K: /* A *= K */ 182 + if (K < 32768) 183 + PPC_MULI(r_A, r_A, K); 184 + else { 185 + PPC_LI32(r_scratch1, K); 186 + PPC_MUL(r_A, r_A, r_scratch1); 187 + } 188 + break; 189 + case BPF_S_ALU_DIV_X: /* A /= X; */ 190 + ctx->seen |= SEEN_XREG; 191 + PPC_CMPWI(r_X, 0); 192 + if (ctx->pc_ret0 != -1) { 193 + PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); 194 + } else { 195 + /* 196 + * Exit, returning 0; first pass hits here 197 + * (longer worst-case code size). 198 + */ 199 + PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12); 200 + PPC_LI(r_ret, 0); 201 + PPC_JMP(exit_addr); 202 + } 203 + PPC_DIVWU(r_A, r_A, r_X); 204 + break; 205 + case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */ 206 + PPC_LI32(r_scratch1, K); 207 + /* Top 32 bits of 64bit result -> A */ 208 + PPC_MULHWU(r_A, r_A, r_scratch1); 209 + break; 210 + case BPF_S_ALU_AND_X: 211 + ctx->seen |= SEEN_XREG; 212 + PPC_AND(r_A, r_A, r_X); 213 + break; 214 + case BPF_S_ALU_AND_K: 215 + if (!IMM_H(K)) 216 + PPC_ANDI(r_A, r_A, K); 217 + else { 218 + PPC_LI32(r_scratch1, K); 219 + PPC_AND(r_A, r_A, r_scratch1); 220 + } 221 + break; 222 + case BPF_S_ALU_OR_X: 223 + ctx->seen |= SEEN_XREG; 224 + PPC_OR(r_A, r_A, r_X); 225 + break; 226 + case BPF_S_ALU_OR_K: 227 + if (IMM_L(K)) 228 + PPC_ORI(r_A, r_A, IMM_L(K)); 229 + if (K >= 65536) 230 + PPC_ORIS(r_A, r_A, IMM_H(K)); 231 + break; 232 + case BPF_S_ALU_LSH_X: /* A <<= X; */ 233 + ctx->seen |= SEEN_XREG; 234 + PPC_SLW(r_A, r_A, r_X); 235 + break; 236 + case BPF_S_ALU_LSH_K: 237 + if (K == 0) 238 + break; 239 + else 240 + PPC_SLWI(r_A, r_A, K); 241 + break; 242 + case BPF_S_ALU_RSH_X: /* A >>= X; */ 243 + ctx->seen |= SEEN_XREG; 244 + PPC_SRW(r_A, r_A, r_X); 245 + break; 246 + case BPF_S_ALU_RSH_K: /* A >>= K; */ 247 + if (K == 0) 248 + break; 249 + else 250 + PPC_SRWI(r_A, r_A, K); 251 + break; 252 + case BPF_S_ALU_NEG: 253 + PPC_NEG(r_A, r_A); 254 + break; 255 + case BPF_S_RET_K: 256 + PPC_LI32(r_ret, K); 257 + if (!K) { 258 + if (ctx->pc_ret0 == -1) 259 + ctx->pc_ret0 = i; 260 + } 261 + /* 262 + * If this isn't the very last instruction, branch to 263 + * the epilogue if we've stuff to clean up. Otherwise, 264 + * if there's nothing to tidy, just return. If we /are/ 265 + * the last instruction, we're about to fall through to 266 + * the epilogue to return. 267 + */ 268 + if (i != flen - 1) { 269 + /* 270 + * Note: 'seen' is properly valid only on pass 271 + * #2. Both parts of this conditional are the 272 + * same instruction size though, meaning the 273 + * first pass will still correctly determine the 274 + * code size/addresses. 275 + */ 276 + if (ctx->seen) 277 + PPC_JMP(exit_addr); 278 + else 279 + PPC_BLR(); 280 + } 281 + break; 282 + case BPF_S_RET_A: 283 + PPC_MR(r_ret, r_A); 284 + if (i != flen - 1) { 285 + if (ctx->seen) 286 + PPC_JMP(exit_addr); 287 + else 288 + PPC_BLR(); 289 + } 290 + break; 291 + case BPF_S_MISC_TAX: /* X = A */ 292 + PPC_MR(r_X, r_A); 293 + break; 294 + case BPF_S_MISC_TXA: /* A = X */ 295 + ctx->seen |= SEEN_XREG; 296 + PPC_MR(r_A, r_X); 297 + break; 298 + 299 + /*** Constant loads/M[] access ***/ 300 + case BPF_S_LD_IMM: /* A = K */ 301 + PPC_LI32(r_A, K); 302 + break; 303 + case BPF_S_LDX_IMM: /* X = K */ 304 + PPC_LI32(r_X, K); 305 + break; 306 + case BPF_S_LD_MEM: /* A = mem[K] */ 307 + PPC_MR(r_A, r_M + (K & 0xf)); 308 + ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); 309 + break; 310 + case BPF_S_LDX_MEM: /* X = mem[K] */ 311 + PPC_MR(r_X, r_M + (K & 0xf)); 312 + ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); 313 + break; 314 + case BPF_S_ST: /* mem[K] = A */ 315 + PPC_MR(r_M + (K & 0xf), r_A); 316 + ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); 317 + break; 318 + case BPF_S_STX: /* mem[K] = X */ 319 + PPC_MR(r_M + (K & 0xf), r_X); 320 + ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf)); 321 + break; 322 + case BPF_S_LD_W_LEN: /* A = skb->len; */ 323 + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); 324 + PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len)); 325 + break; 326 + case BPF_S_LDX_W_LEN: /* X = skb->len; */ 327 + PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len)); 328 + break; 329 + 330 + /*** Ancillary info loads ***/ 331 + 332 + /* None of the BPF_S_ANC* codes appear to be passed by 333 + * sk_chk_filter(). The interpreter and the x86 BPF 334 + * compiler implement them so we do too -- they may be 335 + * planted in future. 336 + */ 337 + case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ 338 + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 339 + protocol) != 2); 340 + PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, 341 + protocol)); 342 + /* ntohs is a NOP with BE loads. */ 343 + break; 344 + case BPF_S_ANC_IFINDEX: 345 + PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, 346 + dev)); 347 + PPC_CMPDI(r_scratch1, 0); 348 + if (ctx->pc_ret0 != -1) { 349 + PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); 350 + } else { 351 + /* Exit, returning 0; first pass hits here. */ 352 + PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12); 353 + PPC_LI(r_ret, 0); 354 + PPC_JMP(exit_addr); 355 + } 356 + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, 357 + ifindex) != 4); 358 + PPC_LWZ_OFFS(r_A, r_scratch1, 359 + offsetof(struct net_device, ifindex)); 360 + break; 361 + case BPF_S_ANC_MARK: 362 + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); 363 + PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, 364 + mark)); 365 + break; 366 + case BPF_S_ANC_RXHASH: 367 + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); 368 + PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, 369 + rxhash)); 370 + break; 371 + case BPF_S_ANC_QUEUE: 372 + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 373 + queue_mapping) != 2); 374 + PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, 375 + queue_mapping)); 376 + break; 377 + case BPF_S_ANC_CPU: 378 + #ifdef CONFIG_SMP 379 + /* 380 + * PACA ptr is r13: 381 + * raw_smp_processor_id() = local_paca->paca_index 382 + */ 383 + BUILD_BUG_ON(FIELD_SIZEOF(struct paca_struct, 384 + paca_index) != 2); 385 + PPC_LHZ_OFFS(r_A, 13, 386 + offsetof(struct paca_struct, paca_index)); 387 + #else 388 + PPC_LI(r_A, 0); 389 + #endif 390 + break; 391 + 392 + /*** Absolute loads from packet header/data ***/ 393 + case BPF_S_LD_W_ABS: 394 + func = sk_load_word; 395 + goto common_load; 396 + case BPF_S_LD_H_ABS: 397 + func = sk_load_half; 398 + goto common_load; 399 + case BPF_S_LD_B_ABS: 400 + func = sk_load_byte; 401 + common_load: 402 + /* 403 + * Load from [K]. Reference with the (negative) 404 + * SKF_NET_OFF/SKF_LL_OFF offsets is unsupported. 405 + */ 406 + ctx->seen |= SEEN_DATAREF; 407 + if ((int)K < 0) 408 + return -ENOTSUPP; 409 + PPC_LI64(r_scratch1, func); 410 + PPC_MTLR(r_scratch1); 411 + PPC_LI32(r_addr, K); 412 + PPC_BLRL(); 413 + /* 414 + * Helper returns 'lt' condition on error, and an 415 + * appropriate return value in r3 416 + */ 417 + PPC_BCC(COND_LT, exit_addr); 418 + break; 419 + 420 + /*** Indirect loads from packet header/data ***/ 421 + case BPF_S_LD_W_IND: 422 + func = sk_load_word; 423 + goto common_load_ind; 424 + case BPF_S_LD_H_IND: 425 + func = sk_load_half; 426 + goto common_load_ind; 427 + case BPF_S_LD_B_IND: 428 + func = sk_load_byte; 429 + common_load_ind: 430 + /* 431 + * Load from [X + K]. Negative offsets are tested for 432 + * in the helper functions, and result in a 'ret 0'. 433 + */ 434 + ctx->seen |= SEEN_DATAREF | SEEN_XREG; 435 + PPC_LI64(r_scratch1, func); 436 + PPC_MTLR(r_scratch1); 437 + PPC_ADDI(r_addr, r_X, IMM_L(K)); 438 + if (K >= 32768) 439 + PPC_ADDIS(r_addr, r_addr, IMM_HA(K)); 440 + PPC_BLRL(); 441 + /* If error, cr0.LT set */ 442 + PPC_BCC(COND_LT, exit_addr); 443 + break; 444 + 445 + case BPF_S_LDX_B_MSH: 446 + /* 447 + * x86 version drops packet (RET 0) when K<0, whereas 448 + * interpreter does allow K<0 (__load_pointer, special 449 + * ancillary data). common_load returns ENOTSUPP if K<0, 450 + * so we fall back to interpreter & filter works. 451 + */ 452 + func = sk_load_byte_msh; 453 + goto common_load; 454 + break; 455 + 456 + /*** Jump and branches ***/ 457 + case BPF_S_JMP_JA: 458 + if (K != 0) 459 + PPC_JMP(addrs[i + 1 + K]); 460 + break; 461 + 462 + case BPF_S_JMP_JGT_K: 463 + case BPF_S_JMP_JGT_X: 464 + true_cond = COND_GT; 465 + goto cond_branch; 466 + case BPF_S_JMP_JGE_K: 467 + case BPF_S_JMP_JGE_X: 468 + true_cond = COND_GE; 469 + goto cond_branch; 470 + case BPF_S_JMP_JEQ_K: 471 + case BPF_S_JMP_JEQ_X: 472 + true_cond = COND_EQ; 473 + goto cond_branch; 474 + case BPF_S_JMP_JSET_K: 475 + case BPF_S_JMP_JSET_X: 476 + true_cond = COND_NE; 477 + /* Fall through */ 478 + cond_branch: 479 + /* same targets, can avoid doing the test :) */ 480 + if (filter[i].jt == filter[i].jf) { 481 + if (filter[i].jt > 0) 482 + PPC_JMP(addrs[i + 1 + filter[i].jt]); 483 + break; 484 + } 485 + 486 + switch (filter[i].code) { 487 + case BPF_S_JMP_JGT_X: 488 + case BPF_S_JMP_JGE_X: 489 + case BPF_S_JMP_JEQ_X: 490 + ctx->seen |= SEEN_XREG; 491 + PPC_CMPLW(r_A, r_X); 492 + break; 493 + case BPF_S_JMP_JSET_X: 494 + ctx->seen |= SEEN_XREG; 495 + PPC_AND_DOT(r_scratch1, r_A, r_X); 496 + break; 497 + case BPF_S_JMP_JEQ_K: 498 + case BPF_S_JMP_JGT_K: 499 + case BPF_S_JMP_JGE_K: 500 + if (K < 32768) 501 + PPC_CMPLWI(r_A, K); 502 + else { 503 + PPC_LI32(r_scratch1, K); 504 + PPC_CMPLW(r_A, r_scratch1); 505 + } 506 + break; 507 + case BPF_S_JMP_JSET_K: 508 + if (K < 32768) 509 + /* PPC_ANDI is /only/ dot-form */ 510 + PPC_ANDI(r_scratch1, r_A, K); 511 + else { 512 + PPC_LI32(r_scratch1, K); 513 + PPC_AND_DOT(r_scratch1, r_A, 514 + r_scratch1); 515 + } 516 + break; 517 + } 518 + /* Sometimes branches are constructed "backward", with 519 + * the false path being the branch and true path being 520 + * a fallthrough to the next instruction. 521 + */ 522 + if (filter[i].jt == 0) 523 + /* Swap the sense of the branch */ 524 + PPC_BCC(true_cond ^ COND_CMP_TRUE, 525 + addrs[i + 1 + filter[i].jf]); 526 + else { 527 + PPC_BCC(true_cond, addrs[i + 1 + filter[i].jt]); 528 + if (filter[i].jf != 0) 529 + PPC_JMP(addrs[i + 1 + filter[i].jf]); 530 + } 531 + break; 532 + default: 533 + /* The filter contains something cruel & unusual. 534 + * We don't handle it, but also there shouldn't be 535 + * anything missing from our list. 536 + */ 537 + if (printk_ratelimit()) 538 + pr_err("BPF filter opcode %04x (@%d) unsupported\n", 539 + filter[i].code, i); 540 + return -ENOTSUPP; 541 + } 542 + 543 + } 544 + /* Set end-of-body-code address for exit. */ 545 + addrs[i] = ctx->idx * 4; 546 + 547 + return 0; 548 + } 549 + 550 + void bpf_jit_compile(struct sk_filter *fp) 551 + { 552 + unsigned int proglen; 553 + unsigned int alloclen; 554 + u32 *image = NULL; 555 + u32 *code_base; 556 + unsigned int *addrs; 557 + struct codegen_context cgctx; 558 + int pass; 559 + int flen = fp->len; 560 + 561 + if (!bpf_jit_enable) 562 + return; 563 + 564 + addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL); 565 + if (addrs == NULL) 566 + return; 567 + 568 + /* 569 + * There are multiple assembly passes as the generated code will change 570 + * size as it settles down, figuring out the max branch offsets/exit 571 + * paths required. 572 + * 573 + * The range of standard conditional branches is +/- 32Kbytes. Since 574 + * BPF_MAXINSNS = 4096, we can only jump from (worst case) start to 575 + * finish with 8 bytes/instruction. Not feasible, so long jumps are 576 + * used, distinct from short branches. 577 + * 578 + * Current: 579 + * 580 + * For now, both branch types assemble to 2 words (short branches padded 581 + * with a NOP); this is less efficient, but assembly will always complete 582 + * after exactly 3 passes: 583 + * 584 + * First pass: No code buffer; Program is "faux-generated" -- no code 585 + * emitted but maximum size of output determined (and addrs[] filled 586 + * in). Also, we note whether we use M[], whether we use skb data, etc. 587 + * All generation choices assumed to be 'worst-case', e.g. branches all 588 + * far (2 instructions), return path code reduction not available, etc. 589 + * 590 + * Second pass: Code buffer allocated with size determined previously. 591 + * Prologue generated to support features we have seen used. Exit paths 592 + * determined and addrs[] is filled in again, as code may be slightly 593 + * smaller as a result. 594 + * 595 + * Third pass: Code generated 'for real', and branch destinations 596 + * determined from now-accurate addrs[] map. 597 + * 598 + * Ideal: 599 + * 600 + * If we optimise this, near branches will be shorter. On the 601 + * first assembly pass, we should err on the side of caution and 602 + * generate the biggest code. On subsequent passes, branches will be 603 + * generated short or long and code size will reduce. With smaller 604 + * code, more branches may fall into the short category, and code will 605 + * reduce more. 606 + * 607 + * Finally, if we see one pass generate code the same size as the 608 + * previous pass we have converged and should now generate code for 609 + * real. Allocating at the end will also save the memory that would 610 + * otherwise be wasted by the (small) current code shrinkage. 611 + * Preferably, we should do a small number of passes (e.g. 5) and if we 612 + * haven't converged by then, get impatient and force code to generate 613 + * as-is, even if the odd branch would be left long. The chances of a 614 + * long jump are tiny with all but the most enormous of BPF filter 615 + * inputs, so we should usually converge on the third pass. 616 + */ 617 + 618 + cgctx.idx = 0; 619 + cgctx.seen = 0; 620 + cgctx.pc_ret0 = -1; 621 + /* Scouting faux-generate pass 0 */ 622 + if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) 623 + /* We hit something illegal or unsupported. */ 624 + goto out; 625 + 626 + /* 627 + * Pretend to build prologue, given the features we've seen. This will 628 + * update ctgtx.idx as it pretends to output instructions, then we can 629 + * calculate total size from idx. 630 + */ 631 + bpf_jit_build_prologue(fp, 0, &cgctx); 632 + bpf_jit_build_epilogue(0, &cgctx); 633 + 634 + proglen = cgctx.idx * 4; 635 + alloclen = proglen + FUNCTION_DESCR_SIZE; 636 + image = module_alloc(max_t(unsigned int, alloclen, 637 + sizeof(struct work_struct))); 638 + if (!image) 639 + goto out; 640 + 641 + code_base = image + (FUNCTION_DESCR_SIZE/4); 642 + 643 + /* Code generation passes 1-2 */ 644 + for (pass = 1; pass < 3; pass++) { 645 + /* Now build the prologue, body code & epilogue for real. */ 646 + cgctx.idx = 0; 647 + bpf_jit_build_prologue(fp, code_base, &cgctx); 648 + bpf_jit_build_body(fp, code_base, &cgctx, addrs); 649 + bpf_jit_build_epilogue(code_base, &cgctx); 650 + 651 + if (bpf_jit_enable > 1) 652 + pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, 653 + proglen - (cgctx.idx * 4), cgctx.seen); 654 + } 655 + 656 + if (bpf_jit_enable > 1) 657 + pr_info("flen=%d proglen=%u pass=%d image=%p\n", 658 + flen, proglen, pass, image); 659 + 660 + if (image) { 661 + if (bpf_jit_enable > 1) 662 + print_hex_dump(KERN_ERR, "JIT code: ", 663 + DUMP_PREFIX_ADDRESS, 664 + 16, 1, code_base, 665 + proglen, false); 666 + 667 + bpf_flush_icache(code_base, code_base + (proglen/4)); 668 + /* Function descriptor nastiness: Address + TOC */ 669 + ((u64 *)image)[0] = (u64)code_base; 670 + ((u64 *)image)[1] = local_paca->kernel_toc; 671 + fp->bpf_func = (void *)image; 672 + } 673 + out: 674 + kfree(addrs); 675 + return; 676 + } 677 + 678 + static void jit_free_defer(struct work_struct *arg) 679 + { 680 + module_free(NULL, arg); 681 + } 682 + 683 + /* run from softirq, we must use a work_struct to call 684 + * module_free() from process context 685 + */ 686 + void bpf_jit_free(struct sk_filter *fp) 687 + { 688 + if (fp->bpf_func != sk_run_filter) { 689 + struct work_struct *work = (struct work_struct *)fp->bpf_func; 690 + 691 + INIT_WORK(work, jit_free_defer); 692 + schedule_work(work); 693 + } 694 + }