Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

s390/bpf,jit: BPF Just In Time compiler for s390

The s390 implementation of the JIT compiler for packet filter speedup.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

+898
+1
arch/s390/Kbuild
··· 5 5 obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ 6 6 obj-$(CONFIG_APPLDATA_BASE) += appldata/ 7 7 obj-$(CONFIG_MATHEMU) += math-emu/ 8 + obj-y += net/
+1
arch/s390/Kconfig
··· 84 84 select HAVE_KERNEL_XZ 85 85 select HAVE_ARCH_MUTEX_CPU_RELAX 86 86 select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 87 + select HAVE_BPF_JIT if 64BIT && PACK_STACK 87 88 select ARCH_SAVE_PAGE_KEYS if HIBERNATION 88 89 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE 89 90 select HAVE_MEMBLOCK
+1
arch/s390/include/asm/processor.h
··· 140 140 extern unsigned long thread_saved_pc(struct task_struct *t); 141 141 142 142 extern void show_code(struct pt_regs *regs); 143 + extern void print_fn_code(unsigned char *code, unsigned long len); 143 144 144 145 unsigned long get_wchan(struct task_struct *p); 145 146 #define task_pt_regs(tsk) ((struct pt_regs *) \
+23
arch/s390/kernel/dis.c
··· 1601 1601 } 1602 1602 printk("\n"); 1603 1603 } 1604 + 1605 + void print_fn_code(unsigned char *code, unsigned long len) 1606 + { 1607 + char buffer[64], *ptr; 1608 + int opsize, i; 1609 + 1610 + while (len) { 1611 + ptr = buffer; 1612 + opsize = insn_length(*code); 1613 + ptr += sprintf(ptr, "%p: ", code); 1614 + for (i = 0; i < opsize; i++) 1615 + ptr += sprintf(ptr, "%02x", code[i]); 1616 + *ptr++ = '\t'; 1617 + if (i < 4) 1618 + *ptr++ = '\t'; 1619 + ptr += print_insn(ptr, code, (unsigned long) code); 1620 + *ptr++ = '\n'; 1621 + *ptr++ = 0; 1622 + printk(buffer); 1623 + code += opsize; 1624 + len -= opsize; 1625 + } 1626 + }
+4
arch/s390/net/Makefile
··· 1 + # 2 + # Arch-specific network modules 3 + # 4 + obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
+130
arch/s390/net/bpf_jit.S
··· 1 + /* 2 + * BPF Jit compiler for s390, help functions. 3 + * 4 + * Copyright IBM Corp. 2012 5 + * 6 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 7 + */ 8 + #include <linux/linkage.h> 9 + 10 + /* 11 + * Calling convention: 12 + * registers %r2, %r6-%r8, %r10-%r11, %r13, %r15 are call saved 13 + * %r2: skb pointer 14 + * %r3: offset parameter 15 + * %r5: BPF A accumulator 16 + * %r8: return address 17 + * %r9: save register for skb pointer 18 + * %r10: skb->data 19 + * %r11: skb->len - skb->data_len (headlen) 20 + * %r12: BPF X accumulator 21 + * 22 + * skb_copy_bits takes 4 parameters: 23 + * %r2 = skb pointer 24 + * %r3 = offset into skb data 25 + * %r4 = length to copy 26 + * %r5 = pointer to temp buffer 27 + */ 28 + #define SKBDATA %r8 29 + 30 + /* A = *(u32 *) (skb->data+K+X) */ 31 + ENTRY(sk_load_word_ind) 32 + ar %r3,%r12 # offset += X 33 + bmr %r8 # < 0 -> return with cc 34 + 35 + /* A = *(u32 *) (skb->data+K) */ 36 + ENTRY(sk_load_word) 37 + llgfr %r1,%r3 # extend offset 38 + ahi %r3,4 # offset + 4 39 + clr %r11,%r3 # hlen <= offset + 4 ? 40 + jl sk_load_word_slow 41 + l %r5,0(%r1,%r10) # get word from skb 42 + xr %r1,%r1 # set cc to zero 43 + br %r8 44 + 45 + sk_load_word_slow: 46 + lgr %r9,%r2 # save %r2 47 + lhi %r4,4 # 4 bytes 48 + la %r5,160(%r15) # pointer to temp buffer 49 + brasl %r14,skb_copy_bits # get data from skb 50 + l %r5,160(%r15) # load result from temp buffer 51 + ltgr %r2,%r2 # set cc to (%r2 != 0) 52 + lgr %r2,%r9 # restore %r2 53 + br %r8 54 + 55 + /* A = *(u16 *) (skb->data+K+X) */ 56 + ENTRY(sk_load_half_ind) 57 + ar %r3,%r12 # offset += X 58 + bmr %r8 # < 0 -> return with cc 59 + 60 + /* A = *(u16 *) (skb->data+K) */ 61 + ENTRY(sk_load_half) 62 + llgfr %r1,%r3 # extend offset 63 + ahi %r3,2 # offset + 2 64 + clr %r11,%r3 # hlen <= offset + 2 ? 65 + jl sk_load_half_slow 66 + llgh %r5,0(%r1,%r10) # get half from skb 67 + xr %r1,%r1 # set cc to zero 68 + br %r8 69 + 70 + sk_load_half_slow: 71 + lgr %r9,%r2 # save %r2 72 + lhi %r4,2 # 2 bytes 73 + la %r5,162(%r15) # pointer to temp buffer 74 + brasl %r14,skb_copy_bits # get data from skb 75 + xc 160(2,%r15),160(%r15) 76 + l %r5,160(%r15) # load result from temp buffer 77 + ltgr %r2,%r2 # set cc to (%r2 != 0) 78 + lgr %r2,%r9 # restore %r2 79 + br %r8 80 + 81 + /* A = *(u8 *) (skb->data+K+X) */ 82 + ENTRY(sk_load_byte_ind) 83 + ar %r3,%r12 # offset += X 84 + bmr %r8 # < 0 -> return with cc 85 + 86 + /* A = *(u8 *) (skb->data+K) */ 87 + ENTRY(sk_load_byte) 88 + llgfr %r1,%r3 # extend offset 89 + clr %r11,%r3 # hlen < offset ? 90 + jle sk_load_byte_slow 91 + lhi %r5,0 92 + ic %r5,0(%r1,%r10) # get byte from skb 93 + xr %r1,%r1 # set cc to zero 94 + br %r8 95 + 96 + sk_load_byte_slow: 97 + lgr %r9,%r2 # save %r2 98 + lhi %r4,1 # 1 bytes 99 + la %r5,163(%r15) # pointer to temp buffer 100 + brasl %r14,skb_copy_bits # get data from skb 101 + xc 160(3,%r15),160(%r15) 102 + l %r5,160(%r15) # load result from temp buffer 103 + ltgr %r2,%r2 # set cc to (%r2 != 0) 104 + lgr %r2,%r9 # restore %r2 105 + br %r8 106 + 107 + /* A = (*(u8 *)(skb->data+K) & 0xf) << 2 */ 108 + ENTRY(sk_load_byte_msh) 109 + llgfr %r1,%r3 # extend offset 110 + clr %r11,%r3 # hlen < offset ? 111 + jle sk_load_byte_slow 112 + lhi %r12,0 113 + ic %r12,0(%r1,%r10) # get byte from skb 114 + nill %r12,0x0f 115 + sll %r12,2 116 + xr %r1,%r1 # set cc to zero 117 + br %r8 118 + 119 + sk_load_byte_msh_slow: 120 + lgr %r9,%r2 # save %r2 121 + lhi %r4,2 # 2 bytes 122 + la %r5,162(%r15) # pointer to temp buffer 123 + brasl %r14,skb_copy_bits # get data from skb 124 + xc 160(3,%r15),160(%r15) 125 + l %r12,160(%r15) # load result from temp buffer 126 + nill %r12,0x0f 127 + sll %r12,2 128 + ltgr %r2,%r2 # set cc to (%r2 != 0) 129 + lgr %r2,%r9 # restore %r2 130 + br %r8
+738
arch/s390/net/bpf_jit_comp.c
··· 1 + /* 2 + * BPF Jit compiler for s390. 3 + * 4 + * Copyright IBM Corp. 2012 5 + * 6 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 7 + */ 8 + #include <linux/moduleloader.h> 9 + #include <linux/netdevice.h> 10 + #include <linux/filter.h> 11 + #include <asm/cacheflush.h> 12 + #include <asm/processor.h> 13 + 14 + /* 15 + * Conventions: 16 + * %r2 = skb pointer 17 + * %r3 = offset parameter 18 + * %r4 = scratch register / length parameter 19 + * %r5 = BPF A accumulator 20 + * %r8 = return address 21 + * %r9 = save register for skb pointer 22 + * %r10 = skb->data 23 + * %r11 = skb->len - skb->data_len (headlen) 24 + * %r12 = BPF X accumulator 25 + * %r13 = literal pool pointer 26 + * 0(%r15) - 63(%r15) scratch memory array with BPF_MEMWORDS 27 + */ 28 + int bpf_jit_enable __read_mostly; 29 + 30 + /* 31 + * assembly code in arch/x86/net/bpf_jit.S 32 + */ 33 + extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; 34 + extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[]; 35 + 36 + struct bpf_jit { 37 + unsigned int seen; 38 + u8 *start; 39 + u8 *prg; 40 + u8 *mid; 41 + u8 *lit; 42 + u8 *end; 43 + u8 *base_ip; 44 + u8 *ret0_ip; 45 + u8 *exit_ip; 46 + unsigned int off_load_word; 47 + unsigned int off_load_half; 48 + unsigned int off_load_byte; 49 + unsigned int off_load_bmsh; 50 + unsigned int off_load_iword; 51 + unsigned int off_load_ihalf; 52 + unsigned int off_load_ibyte; 53 + }; 54 + 55 + #define BPF_SIZE_MAX 4096 /* Max size for program */ 56 + 57 + #define SEEN_DATAREF 1 /* might call external helpers */ 58 + #define SEEN_XREG 2 /* ebx is used */ 59 + #define SEEN_MEM 4 /* use mem[] for temporary storage */ 60 + #define SEEN_RET0 8 /* pc_ret0 points to a valid return 0 */ 61 + #define SEEN_LITERAL 16 /* code uses literals */ 62 + #define SEEN_LOAD_WORD 32 /* code uses sk_load_word */ 63 + #define SEEN_LOAD_HALF 64 /* code uses sk_load_half */ 64 + #define SEEN_LOAD_BYTE 128 /* code uses sk_load_byte */ 65 + #define SEEN_LOAD_BMSH 256 /* code uses sk_load_byte_msh */ 66 + #define SEEN_LOAD_IWORD 512 /* code uses sk_load_word_ind */ 67 + #define SEEN_LOAD_IHALF 1024 /* code uses sk_load_half_ind */ 68 + #define SEEN_LOAD_IBYTE 2048 /* code uses sk_load_byte_ind */ 69 + 70 + #define EMIT2(op) \ 71 + ({ \ 72 + if (jit->prg + 2 <= jit->mid) \ 73 + *(u16 *) jit->prg = op; \ 74 + jit->prg += 2; \ 75 + }) 76 + 77 + #define EMIT4(op) \ 78 + ({ \ 79 + if (jit->prg + 4 <= jit->mid) \ 80 + *(u32 *) jit->prg = op; \ 81 + jit->prg += 4; \ 82 + }) 83 + 84 + #define EMIT4_DISP(op, disp) \ 85 + ({ \ 86 + unsigned int __disp = (disp) & 0xfff; \ 87 + EMIT4(op | __disp); \ 88 + }) 89 + 90 + #define EMIT4_IMM(op, imm) \ 91 + ({ \ 92 + unsigned int __imm = (imm) & 0xffff; \ 93 + EMIT4(op | __imm); \ 94 + }) 95 + 96 + #define EMIT4_PCREL(op, pcrel) \ 97 + ({ \ 98 + long __pcrel = ((pcrel) >> 1) & 0xffff; \ 99 + EMIT4(op | __pcrel); \ 100 + }) 101 + 102 + #define EMIT6(op1, op2) \ 103 + ({ \ 104 + if (jit->prg + 6 <= jit->mid) { \ 105 + *(u32 *) jit->prg = op1; \ 106 + *(u16 *) (jit->prg + 4) = op2; \ 107 + } \ 108 + jit->prg += 6; \ 109 + }) 110 + 111 + #define EMIT6_DISP(op1, op2, disp) \ 112 + ({ \ 113 + unsigned int __disp = (disp) & 0xfff; \ 114 + EMIT6(op1 | __disp, op2); \ 115 + }) 116 + 117 + #define EMIT_CONST(val) \ 118 + ({ \ 119 + unsigned int ret; \ 120 + ret = (unsigned int) (jit->lit - jit->base_ip); \ 121 + jit->seen |= SEEN_LITERAL; \ 122 + if (jit->lit + 4 <= jit->end) \ 123 + *(u32 *) jit->lit = val; \ 124 + jit->lit += 4; \ 125 + ret; \ 126 + }) 127 + 128 + #define EMIT_FN_CONST(bit, fn) \ 129 + ({ \ 130 + unsigned int ret; \ 131 + ret = (unsigned int) (jit->lit - jit->base_ip); \ 132 + if (jit->seen & bit) { \ 133 + jit->seen |= SEEN_LITERAL; \ 134 + if (jit->lit + 8 <= jit->end) \ 135 + *(void **) jit->lit = fn; \ 136 + jit->lit += 8; \ 137 + } \ 138 + ret; \ 139 + }) 140 + 141 + static void bpf_jit_prologue(struct bpf_jit *jit) 142 + { 143 + /* Save registers and create stack frame if necessary */ 144 + if (jit->seen & SEEN_DATAREF) { 145 + /* stmg %r8,%r15,88(%r15) */ 146 + EMIT6(0xeb8ff058, 0x0024); 147 + /* lgr %r14,%r15 */ 148 + EMIT4(0xb90400ef); 149 + /* ahi %r15,<offset> */ 150 + EMIT4_IMM(0xa7fa0000, (jit->seen & SEEN_MEM) ? -112 : -80); 151 + /* stg %r14,152(%r15) */ 152 + EMIT6(0xe3e0f098, 0x0024); 153 + } else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL)) 154 + /* stmg %r12,%r13,120(%r15) */ 155 + EMIT6(0xebcdf078, 0x0024); 156 + else if (jit->seen & SEEN_XREG) 157 + /* stg %r12,120(%r15) */ 158 + EMIT6(0xe3c0f078, 0x0024); 159 + else if (jit->seen & SEEN_LITERAL) 160 + /* stg %r13,128(%r15) */ 161 + EMIT6(0xe3d0f080, 0x0024); 162 + 163 + /* Setup literal pool */ 164 + if (jit->seen & SEEN_LITERAL) { 165 + /* basr %r13,0 */ 166 + EMIT2(0x0dd0); 167 + jit->base_ip = jit->prg; 168 + } 169 + jit->off_load_word = EMIT_FN_CONST(SEEN_LOAD_WORD, sk_load_word); 170 + jit->off_load_half = EMIT_FN_CONST(SEEN_LOAD_HALF, sk_load_half); 171 + jit->off_load_byte = EMIT_FN_CONST(SEEN_LOAD_BYTE, sk_load_byte); 172 + jit->off_load_bmsh = EMIT_FN_CONST(SEEN_LOAD_BMSH, sk_load_byte_msh); 173 + jit->off_load_iword = EMIT_FN_CONST(SEEN_LOAD_IWORD, sk_load_word_ind); 174 + jit->off_load_ihalf = EMIT_FN_CONST(SEEN_LOAD_IHALF, sk_load_half_ind); 175 + jit->off_load_ibyte = EMIT_FN_CONST(SEEN_LOAD_IBYTE, sk_load_byte_ind); 176 + 177 + /* Filter needs to access skb data */ 178 + if (jit->seen & SEEN_DATAREF) { 179 + /* l %r11,<len>(%r2) */ 180 + EMIT4_DISP(0x58b02000, offsetof(struct sk_buff, len)); 181 + /* s %r11,<data_len>(%r2) */ 182 + EMIT4_DISP(0x5bb02000, offsetof(struct sk_buff, data_len)); 183 + /* lg %r10,<data>(%r2) */ 184 + EMIT6_DISP(0xe3a02000, 0x0004, 185 + offsetof(struct sk_buff, data)); 186 + } 187 + } 188 + 189 + static void bpf_jit_epilogue(struct bpf_jit *jit) 190 + { 191 + /* Return 0 */ 192 + if (jit->seen & SEEN_RET0) { 193 + jit->ret0_ip = jit->prg; 194 + /* lghi %r2,0 */ 195 + EMIT4(0xa7290000); 196 + } 197 + jit->exit_ip = jit->prg; 198 + /* Restore registers */ 199 + if (jit->seen & SEEN_DATAREF) 200 + /* lmg %r8,%r15,<offset>(%r15) */ 201 + EMIT6_DISP(0xeb8ff000, 0x0004, 202 + (jit->seen & SEEN_MEM) ? 200 : 168); 203 + else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL)) 204 + /* lmg %r12,%r13,120(%r15) */ 205 + EMIT6(0xebcdf078, 0x0004); 206 + else if (jit->seen & SEEN_XREG) 207 + /* lg %r12,120(%r15) */ 208 + EMIT6(0xe3c0f078, 0x0004); 209 + else if (jit->seen & SEEN_LITERAL) 210 + /* lg %r13,128(%r15) */ 211 + EMIT6(0xe3d0f080, 0x0004); 212 + /* br %r14 */ 213 + EMIT2(0x07fe); 214 + } 215 + 216 + /* 217 + * make sure we dont leak kernel information to user 218 + */ 219 + static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter) 220 + { 221 + /* Clear temporary memory if (seen & SEEN_MEM) */ 222 + if (jit->seen & SEEN_MEM) 223 + /* xc 0(64,%r15),0(%r15) */ 224 + EMIT6(0xd73ff000, 0xf000); 225 + /* Clear X if (seen & SEEN_XREG) */ 226 + if (jit->seen & SEEN_XREG) 227 + /* lhi %r12,0 */ 228 + EMIT4(0xa7c80000); 229 + /* Clear A if the first register does not set it. */ 230 + switch (filter[0].code) { 231 + case BPF_S_LD_W_ABS: 232 + case BPF_S_LD_H_ABS: 233 + case BPF_S_LD_B_ABS: 234 + case BPF_S_LD_W_LEN: 235 + case BPF_S_LD_W_IND: 236 + case BPF_S_LD_H_IND: 237 + case BPF_S_LD_B_IND: 238 + case BPF_S_LDX_B_MSH: 239 + case BPF_S_LD_IMM: 240 + case BPF_S_LD_MEM: 241 + case BPF_S_MISC_TXA: 242 + case BPF_S_ANC_PROTOCOL: 243 + case BPF_S_ANC_PKTTYPE: 244 + case BPF_S_ANC_IFINDEX: 245 + case BPF_S_ANC_MARK: 246 + case BPF_S_ANC_QUEUE: 247 + case BPF_S_ANC_HATYPE: 248 + case BPF_S_ANC_RXHASH: 249 + case BPF_S_ANC_CPU: 250 + case BPF_S_RET_K: 251 + /* first instruction sets A register */ 252 + break; 253 + default: /* A = 0 */ 254 + /* lhi %r5,0 */ 255 + EMIT4(0xa7580000); 256 + } 257 + } 258 + 259 + static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, 260 + unsigned int *addrs, int i, int last) 261 + { 262 + unsigned int K; 263 + int offset; 264 + unsigned int mask; 265 + 266 + K = filter->k; 267 + switch (filter->code) { 268 + case BPF_S_ALU_ADD_X: /* A += X */ 269 + jit->seen |= SEEN_XREG; 270 + /* ar %r5,%r12 */ 271 + EMIT2(0x1a5c); 272 + break; 273 + case BPF_S_ALU_ADD_K: /* A += K */ 274 + if (!K) 275 + break; 276 + if (K <= 16383) 277 + /* ahi %r5,<K> */ 278 + EMIT4_IMM(0xa75a0000, K); 279 + else 280 + /* a %r5,<d(K)>(%r13) */ 281 + EMIT4_DISP(0x5a50d000, EMIT_CONST(K)); 282 + break; 283 + case BPF_S_ALU_SUB_X: /* A -= X */ 284 + jit->seen |= SEEN_XREG; 285 + /* sr %r5,%r12 */ 286 + EMIT2(0x1b5c); 287 + break; 288 + case BPF_S_ALU_SUB_K: /* A -= K */ 289 + if (!K) 290 + break; 291 + if (K <= 16384) 292 + /* ahi %r5,-K */ 293 + EMIT4_IMM(0xa75a0000, -K); 294 + else 295 + /* s %r5,<d(K)>(%r13) */ 296 + EMIT4_DISP(0x5b50d000, EMIT_CONST(K)); 297 + break; 298 + case BPF_S_ALU_MUL_X: /* A *= X */ 299 + jit->seen |= SEEN_XREG; 300 + /* msr %r5,%r12 */ 301 + EMIT4(0xb252005c); 302 + break; 303 + case BPF_S_ALU_MUL_K: /* A *= K */ 304 + if (K <= 16383) 305 + /* mhi %r5,K */ 306 + EMIT4_IMM(0xa75c0000, K); 307 + else 308 + /* ms %r5,<d(K)>(%r13) */ 309 + EMIT4_DISP(0x7150d000, EMIT_CONST(K)); 310 + break; 311 + case BPF_S_ALU_DIV_X: /* A /= X */ 312 + jit->seen |= SEEN_XREG | SEEN_RET0; 313 + /* ltr %r12,%r12 */ 314 + EMIT2(0x12cc); 315 + /* jz <ret0> */ 316 + EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg)); 317 + /* lhi %r4,0 */ 318 + EMIT4(0xa7480000); 319 + /* dr %r4,%r12 */ 320 + EMIT2(0x1d4c); 321 + break; 322 + case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K) */ 323 + /* m %r4,<d(K)>(%r13) */ 324 + EMIT4_DISP(0x5c40d000, EMIT_CONST(K)); 325 + /* lr %r5,%r4 */ 326 + EMIT2(0x1854); 327 + break; 328 + case BPF_S_ALU_AND_X: /* A &= X */ 329 + jit->seen |= SEEN_XREG; 330 + /* nr %r5,%r12 */ 331 + EMIT2(0x145c); 332 + break; 333 + case BPF_S_ALU_AND_K: /* A &= K */ 334 + /* n %r5,<d(K)>(%r13) */ 335 + EMIT4_DISP(0x5450d000, EMIT_CONST(K)); 336 + break; 337 + case BPF_S_ALU_OR_X: /* A |= X */ 338 + jit->seen |= SEEN_XREG; 339 + /* or %r5,%r12 */ 340 + EMIT2(0x165c); 341 + break; 342 + case BPF_S_ALU_OR_K: /* A |= K */ 343 + /* o %r5,<d(K)>(%r13) */ 344 + EMIT4_DISP(0x5650d000, EMIT_CONST(K)); 345 + break; 346 + case BPF_S_ALU_LSH_X: /* A <<= X; */ 347 + jit->seen |= SEEN_XREG; 348 + /* sll %r5,0(%r12) */ 349 + EMIT4(0x8950c000); 350 + break; 351 + case BPF_S_ALU_LSH_K: /* A <<= K */ 352 + if (K == 0) 353 + break; 354 + /* sll %r5,K */ 355 + EMIT4_DISP(0x89500000, K); 356 + break; 357 + case BPF_S_ALU_RSH_X: /* A >>= X; */ 358 + jit->seen |= SEEN_XREG; 359 + /* srl %r5,0(%r12) */ 360 + EMIT4(0x8850c000); 361 + break; 362 + case BPF_S_ALU_RSH_K: /* A >>= K; */ 363 + if (K == 0) 364 + break; 365 + /* srl %r5,K */ 366 + EMIT4_DISP(0x88500000, K); 367 + break; 368 + case BPF_S_ALU_NEG: /* A = -A */ 369 + /* lnr %r5,%r5 */ 370 + EMIT2(0x1155); 371 + break; 372 + case BPF_S_JMP_JA: /* ip += K */ 373 + offset = addrs[i + K] + jit->start - jit->prg; 374 + EMIT4_PCREL(0xa7f40000, offset); 375 + break; 376 + case BPF_S_JMP_JGT_K: /* ip += (A > K) ? jt : jf */ 377 + mask = 0x200000; /* jh */ 378 + goto kbranch; 379 + case BPF_S_JMP_JGE_K: /* ip += (A >= K) ? jt : jf */ 380 + mask = 0xa00000; /* jhe */ 381 + goto kbranch; 382 + case BPF_S_JMP_JEQ_K: /* ip += (A == K) ? jt : jf */ 383 + mask = 0x800000; /* je */ 384 + kbranch: /* Emit compare if the branch targets are different */ 385 + if (filter->jt != filter->jf) { 386 + if (K <= 16383) 387 + /* chi %r5,<K> */ 388 + EMIT4_IMM(0xa75e0000, K); 389 + else 390 + /* c %r5,<d(K)>(%r13) */ 391 + EMIT4_DISP(0x5950d000, EMIT_CONST(K)); 392 + } 393 + branch: if (filter->jt == filter->jf) { 394 + if (filter->jt == 0) 395 + break; 396 + /* j <jt> */ 397 + offset = addrs[i + filter->jt] + jit->start - jit->prg; 398 + EMIT4_PCREL(0xa7f40000, offset); 399 + break; 400 + } 401 + if (filter->jt != 0) { 402 + /* brc <mask>,<jt> */ 403 + offset = addrs[i + filter->jt] + jit->start - jit->prg; 404 + EMIT4_PCREL(0xa7040000 | mask, offset); 405 + } 406 + if (filter->jf != 0) { 407 + /* brc <mask^15>,<jf> */ 408 + offset = addrs[i + filter->jf] + jit->start - jit->prg; 409 + EMIT4_PCREL(0xa7040000 | (mask ^ 0xf00000), offset); 410 + } 411 + break; 412 + case BPF_S_JMP_JSET_K: /* ip += (A & K) ? jt : jf */ 413 + mask = 0x700000; /* jnz */ 414 + /* Emit test if the branch targets are different */ 415 + if (filter->jt != filter->jf) { 416 + if (K > 65535) { 417 + /* lr %r4,%r5 */ 418 + EMIT2(0x1845); 419 + /* n %r4,<d(K)>(%r13) */ 420 + EMIT4_DISP(0x5440d000, EMIT_CONST(K)); 421 + } else 422 + /* tmll %r5,K */ 423 + EMIT4_IMM(0xa7510000, K); 424 + } 425 + goto branch; 426 + case BPF_S_JMP_JGT_X: /* ip += (A > X) ? jt : jf */ 427 + mask = 0x200000; /* jh */ 428 + goto xbranch; 429 + case BPF_S_JMP_JGE_X: /* ip += (A >= X) ? jt : jf */ 430 + mask = 0xa00000; /* jhe */ 431 + goto xbranch; 432 + case BPF_S_JMP_JEQ_X: /* ip += (A == X) ? jt : jf */ 433 + mask = 0x800000; /* je */ 434 + xbranch: /* Emit compare if the branch targets are different */ 435 + if (filter->jt != filter->jf) { 436 + jit->seen |= SEEN_XREG; 437 + /* cr %r5,%r12 */ 438 + EMIT2(0x195c); 439 + } 440 + goto branch; 441 + case BPF_S_JMP_JSET_X: /* ip += (A & X) ? jt : jf */ 442 + mask = 0x700000; /* jnz */ 443 + /* Emit test if the branch targets are different */ 444 + if (filter->jt != filter->jf) { 445 + jit->seen |= SEEN_XREG; 446 + /* lr %r4,%r5 */ 447 + EMIT2(0x1845); 448 + /* nr %r4,%r12 */ 449 + EMIT2(0x144c); 450 + } 451 + goto branch; 452 + case BPF_S_LD_W_ABS: /* A = *(u32 *) (skb->data+K) */ 453 + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_WORD; 454 + offset = jit->off_load_word; 455 + goto load_abs; 456 + case BPF_S_LD_H_ABS: /* A = *(u16 *) (skb->data+K) */ 457 + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_HALF; 458 + offset = jit->off_load_half; 459 + goto load_abs; 460 + case BPF_S_LD_B_ABS: /* A = *(u8 *) (skb->data+K) */ 461 + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_BYTE; 462 + offset = jit->off_load_byte; 463 + load_abs: if ((int) K < 0) 464 + goto out; 465 + call_fn: /* lg %r1,<d(function)>(%r13) */ 466 + EMIT6_DISP(0xe310d000, 0x0004, offset); 467 + /* l %r3,<d(K)>(%r13) */ 468 + EMIT4_DISP(0x5830d000, EMIT_CONST(K)); 469 + /* basr %r8,%r1 */ 470 + EMIT2(0x0d81); 471 + /* jnz <ret0> */ 472 + EMIT4_PCREL(0xa7740000, (jit->ret0_ip - jit->prg)); 473 + break; 474 + case BPF_S_LD_W_IND: /* A = *(u32 *) (skb->data+K+X) */ 475 + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IWORD; 476 + offset = jit->off_load_iword; 477 + goto call_fn; 478 + case BPF_S_LD_H_IND: /* A = *(u16 *) (skb->data+K+X) */ 479 + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IHALF; 480 + offset = jit->off_load_ihalf; 481 + goto call_fn; 482 + case BPF_S_LD_B_IND: /* A = *(u8 *) (skb->data+K+X) */ 483 + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IBYTE; 484 + offset = jit->off_load_ibyte; 485 + goto call_fn; 486 + case BPF_S_LDX_B_MSH: 487 + /* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */ 488 + jit->seen |= SEEN_RET0; 489 + if ((int) K < 0) { 490 + /* j <ret0> */ 491 + EMIT4_PCREL(0xa7f40000, (jit->ret0_ip - jit->prg)); 492 + break; 493 + } 494 + jit->seen |= SEEN_DATAREF | SEEN_LOAD_BMSH; 495 + offset = jit->off_load_bmsh; 496 + goto call_fn; 497 + case BPF_S_LD_W_LEN: /* A = skb->len; */ 498 + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); 499 + /* l %r5,<d(len)>(%r2) */ 500 + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, len)); 501 + break; 502 + case BPF_S_LDX_W_LEN: /* X = skb->len; */ 503 + jit->seen |= SEEN_XREG; 504 + /* l %r12,<d(len)>(%r2) */ 505 + EMIT4_DISP(0x58c02000, offsetof(struct sk_buff, len)); 506 + break; 507 + case BPF_S_LD_IMM: /* A = K */ 508 + if (K <= 16383) 509 + /* lhi %r5,K */ 510 + EMIT4_IMM(0xa7580000, K); 511 + else 512 + /* l %r5,<d(K)>(%r13) */ 513 + EMIT4_DISP(0x5850d000, EMIT_CONST(K)); 514 + break; 515 + case BPF_S_LDX_IMM: /* X = K */ 516 + jit->seen |= SEEN_XREG; 517 + if (K <= 16383) 518 + /* lhi %r12,<K> */ 519 + EMIT4_IMM(0xa7c80000, K); 520 + else 521 + /* l %r12,<d(K)>(%r13) */ 522 + EMIT4_DISP(0x58c0d000, EMIT_CONST(K)); 523 + break; 524 + case BPF_S_LD_MEM: /* A = mem[K] */ 525 + jit->seen |= SEEN_MEM; 526 + /* l %r5,<K>(%r15) */ 527 + EMIT4_DISP(0x5850f000, 528 + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); 529 + break; 530 + case BPF_S_LDX_MEM: /* X = mem[K] */ 531 + jit->seen |= SEEN_XREG | SEEN_MEM; 532 + /* l %r12,<K>(%r15) */ 533 + EMIT4_DISP(0x58c0f000, 534 + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); 535 + break; 536 + case BPF_S_MISC_TAX: /* X = A */ 537 + jit->seen |= SEEN_XREG; 538 + /* lr %r12,%r5 */ 539 + EMIT2(0x18c5); 540 + break; 541 + case BPF_S_MISC_TXA: /* A = X */ 542 + jit->seen |= SEEN_XREG; 543 + /* lr %r5,%r12 */ 544 + EMIT2(0x185c); 545 + break; 546 + case BPF_S_RET_K: 547 + if (K == 0) { 548 + jit->seen |= SEEN_RET0; 549 + if (last) 550 + break; 551 + /* j <ret0> */ 552 + EMIT4_PCREL(0xa7f40000, jit->ret0_ip - jit->prg); 553 + } else { 554 + if (K <= 16383) 555 + /* lghi %r2,K */ 556 + EMIT4_IMM(0xa7290000, K); 557 + else 558 + /* llgf %r2,<K>(%r13) */ 559 + EMIT6_DISP(0xe320d000, 0x0016, EMIT_CONST(K)); 560 + /* j <exit> */ 561 + if (last && !(jit->seen & SEEN_RET0)) 562 + break; 563 + EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); 564 + } 565 + break; 566 + case BPF_S_RET_A: 567 + /* llgfr %r2,%r5 */ 568 + EMIT4(0xb9160025); 569 + /* j <exit> */ 570 + EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); 571 + break; 572 + case BPF_S_ST: /* mem[K] = A */ 573 + jit->seen |= SEEN_MEM; 574 + /* st %r5,<K>(%r15) */ 575 + EMIT4_DISP(0x5050f000, 576 + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); 577 + break; 578 + case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ 579 + jit->seen |= SEEN_XREG | SEEN_MEM; 580 + /* st %r12,<K>(%r15) */ 581 + EMIT4_DISP(0x50c0f000, 582 + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); 583 + break; 584 + case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ 585 + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); 586 + /* lhi %r5,0 */ 587 + EMIT4(0xa7580000); 588 + /* icm %r5,3,<d(protocol)>(%r2) */ 589 + EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, protocol)); 590 + break; 591 + case BPF_S_ANC_IFINDEX: /* if (!skb->dev) return 0; 592 + * A = skb->dev->ifindex */ 593 + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); 594 + jit->seen |= SEEN_RET0; 595 + /* lg %r1,<d(dev)>(%r2) */ 596 + EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev)); 597 + /* ltgr %r1,%r1 */ 598 + EMIT4(0xb9020011); 599 + /* jz <ret0> */ 600 + EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg); 601 + /* l %r5,<d(ifindex)>(%r1) */ 602 + EMIT4_DISP(0x58501000, offsetof(struct net_device, ifindex)); 603 + break; 604 + case BPF_S_ANC_MARK: /* A = skb->mark */ 605 + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); 606 + /* l %r5,<d(mark)>(%r2) */ 607 + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, mark)); 608 + break; 609 + case BPF_S_ANC_QUEUE: /* A = skb->queue_mapping */ 610 + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); 611 + /* lhi %r5,0 */ 612 + EMIT4(0xa7580000); 613 + /* icm %r5,3,<d(queue_mapping)>(%r2) */ 614 + EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, queue_mapping)); 615 + break; 616 + case BPF_S_ANC_HATYPE: /* if (!skb->dev) return 0; 617 + * A = skb->dev->type */ 618 + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); 619 + jit->seen |= SEEN_RET0; 620 + /* lg %r1,<d(dev)>(%r2) */ 621 + EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev)); 622 + /* ltgr %r1,%r1 */ 623 + EMIT4(0xb9020011); 624 + /* jz <ret0> */ 625 + EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg); 626 + /* lhi %r5,0 */ 627 + EMIT4(0xa7580000); 628 + /* icm %r5,3,<d(type)>(%r1) */ 629 + EMIT4_DISP(0xbf531000, offsetof(struct net_device, type)); 630 + break; 631 + case BPF_S_ANC_RXHASH: /* A = skb->rxhash */ 632 + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); 633 + /* l %r5,<d(rxhash)>(%r2) */ 634 + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, rxhash)); 635 + break; 636 + case BPF_S_ANC_CPU: /* A = smp_processor_id() */ 637 + #ifdef CONFIG_SMP 638 + /* l %r5,<d(cpu_nr)> */ 639 + EMIT4_DISP(0x58500000, offsetof(struct _lowcore, cpu_nr)); 640 + #else 641 + /* lhi %r5,0 */ 642 + EMIT4(0xa7580000); 643 + #endif 644 + break; 645 + default: /* too complex, give up */ 646 + goto out; 647 + } 648 + addrs[i] = jit->prg - jit->start; 649 + return 0; 650 + out: 651 + return -1; 652 + } 653 + 654 + void bpf_jit_compile(struct sk_filter *fp) 655 + { 656 + unsigned long size, prg_len, lit_len; 657 + struct bpf_jit jit, cjit; 658 + unsigned int *addrs; 659 + int pass, i; 660 + 661 + if (!bpf_jit_enable) 662 + return; 663 + addrs = kmalloc(fp->len * sizeof(*addrs), GFP_KERNEL); 664 + if (addrs == NULL) 665 + return; 666 + memset(addrs, 0, fp->len * sizeof(*addrs)); 667 + memset(&jit, 0, sizeof(cjit)); 668 + memset(&cjit, 0, sizeof(cjit)); 669 + 670 + for (pass = 0; pass < 10; pass++) { 671 + jit.prg = jit.start; 672 + jit.lit = jit.mid; 673 + 674 + bpf_jit_prologue(&jit); 675 + bpf_jit_noleaks(&jit, fp->insns); 676 + for (i = 0; i < fp->len; i++) { 677 + if (bpf_jit_insn(&jit, fp->insns + i, addrs, i, 678 + i == fp->len - 1)) 679 + goto out; 680 + } 681 + bpf_jit_epilogue(&jit); 682 + if (jit.start) { 683 + WARN_ON(jit.prg > cjit.prg || jit.lit > cjit.lit); 684 + if (memcmp(&jit, &cjit, sizeof(jit)) == 0) 685 + break; 686 + } else if (jit.prg == cjit.prg && jit.lit == cjit.lit) { 687 + prg_len = jit.prg - jit.start; 688 + lit_len = jit.lit - jit.mid; 689 + size = max_t(unsigned long, prg_len + lit_len, 690 + sizeof(struct work_struct)); 691 + if (size >= BPF_SIZE_MAX) 692 + goto out; 693 + jit.start = module_alloc(size); 694 + if (!jit.start) 695 + goto out; 696 + jit.prg = jit.mid = jit.start + prg_len; 697 + jit.lit = jit.end = jit.start + prg_len + lit_len; 698 + jit.base_ip += (unsigned long) jit.start; 699 + jit.exit_ip += (unsigned long) jit.start; 700 + jit.ret0_ip += (unsigned long) jit.start; 701 + } 702 + cjit = jit; 703 + } 704 + if (bpf_jit_enable > 1) { 705 + pr_err("flen=%d proglen=%lu pass=%d image=%p\n", 706 + fp->len, jit.end - jit.start, pass, jit.start); 707 + if (jit.start) { 708 + printk(KERN_ERR "JIT code:\n"); 709 + print_fn_code(jit.start, jit.mid - jit.start); 710 + print_hex_dump(KERN_ERR, "JIT literals:\n", 711 + DUMP_PREFIX_ADDRESS, 16, 1, 712 + jit.mid, jit.end - jit.mid, false); 713 + } 714 + } 715 + if (jit.start) 716 + fp->bpf_func = (void *) jit.start; 717 + out: 718 + kfree(addrs); 719 + } 720 + 721 + static void jit_free_defer(struct work_struct *arg) 722 + { 723 + module_free(NULL, arg); 724 + } 725 + 726 + /* run from softirq, we must use a work_struct to call 727 + * module_free() from process context 728 + */ 729 + void bpf_jit_free(struct sk_filter *fp) 730 + { 731 + struct work_struct *work; 732 + 733 + if (fp->bpf_func == sk_run_filter) 734 + return; 735 + work = (struct work_struct *)fp->bpf_func; 736 + INIT_WORK(work, jit_free_defer); 737 + schedule_work(work); 738 + }