Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: Support for pointers beyond pkt_end.

This patch adds the verifier support to recognize inlined branch conditions.
The LLVM knows that the branch evaluates to the same value, but the verifier
couldn't track it. Hence causing valid programs to be rejected.
The potential LLVM workaround: https://reviews.llvm.org/D87428
can have undesired side effects, since LLVM doesn't know that
skb->data/data_end are being compared. LLVM has to introduce extra boolean
variable and use inline_asm trick to force easier for the verifier assembly.

Instead teach the verifier to recognize that
r1 = skb->data;
r1 += 10;
r2 = skb->data_end;
if (r1 > r2) {
here r1 points beyond packet_end and
subsequent
if (r1 > r2) // always evaluates to "true".
}

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20201111031213.25109-2-alexei.starovoitov@gmail.com

authored by

Alexei Starovoitov and committed by
Daniel Borkmann
6d94e741 c3653879

+108 -23
+1 -1
include/linux/bpf_verifier.h
··· 45 45 enum bpf_reg_type type; 46 46 union { 47 47 /* valid when type == PTR_TO_PACKET */ 48 - u16 range; 48 + int range; 49 49 50 50 /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | 51 51 * PTR_TO_MAP_VALUE_OR_NULL
+107 -22
kernel/bpf/verifier.c
··· 2739 2739 regno); 2740 2740 return -EACCES; 2741 2741 } 2742 - err = __check_mem_access(env, regno, off, size, reg->range, 2742 + 2743 + err = reg->range < 0 ? -EINVAL : 2744 + __check_mem_access(env, regno, off, size, reg->range, 2743 2745 zero_size_allowed); 2744 2746 if (err) { 2745 2747 verbose(env, "R%d offset is outside of the packet\n", regno); ··· 4697 4695 4698 4696 for (i = 0; i <= vstate->curframe; i++) 4699 4697 __clear_all_pkt_pointers(env, vstate->frame[i]); 4698 + } 4699 + 4700 + enum { 4701 + AT_PKT_END = -1, 4702 + BEYOND_PKT_END = -2, 4703 + }; 4704 + 4705 + static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open) 4706 + { 4707 + struct bpf_func_state *state = vstate->frame[vstate->curframe]; 4708 + struct bpf_reg_state *reg = &state->regs[regn]; 4709 + 4710 + if (reg->type != PTR_TO_PACKET) 4711 + /* PTR_TO_PACKET_META is not supported yet */ 4712 + return; 4713 + 4714 + /* The 'reg' is pkt > pkt_end or pkt >= pkt_end. 4715 + * How far beyond pkt_end it goes is unknown. 4716 + * if (!range_open) it's the case of pkt >= pkt_end 4717 + * if (range_open) it's the case of pkt > pkt_end 4718 + * hence this pointer is at least 1 byte bigger than pkt_end 4719 + */ 4720 + if (range_open) 4721 + reg->range = BEYOND_PKT_END; 4722 + else 4723 + reg->range = AT_PKT_END; 4700 4724 } 4701 4725 4702 4726 static void release_reg_references(struct bpf_verifier_env *env, ··· 6736 6708 6737 6709 static void __find_good_pkt_pointers(struct bpf_func_state *state, 6738 6710 struct bpf_reg_state *dst_reg, 6739 - enum bpf_reg_type type, u16 new_range) 6711 + enum bpf_reg_type type, int new_range) 6740 6712 { 6741 6713 struct bpf_reg_state *reg; 6742 6714 int i; ··· 6761 6733 enum bpf_reg_type type, 6762 6734 bool range_right_open) 6763 6735 { 6764 - u16 new_range; 6765 - int i; 6736 + int new_range, i; 6766 6737 6767 6738 if (dst_reg->off < 0 || 6768 6739 (dst_reg->off == 0 && range_right_open)) ··· 7012 6985 return is_branch64_taken(reg, val, opcode); 7013 6986 } 7014 6987 6988 + static int flip_opcode(u32 opcode) 6989 + { 6990 + /* How can we transform "a <op> b" into "b <op> a"? */ 6991 + static const u8 opcode_flip[16] = { 6992 + /* these stay the same */ 6993 + [BPF_JEQ >> 4] = BPF_JEQ, 6994 + [BPF_JNE >> 4] = BPF_JNE, 6995 + [BPF_JSET >> 4] = BPF_JSET, 6996 + /* these swap "lesser" and "greater" (L and G in the opcodes) */ 6997 + [BPF_JGE >> 4] = BPF_JLE, 6998 + [BPF_JGT >> 4] = BPF_JLT, 6999 + [BPF_JLE >> 4] = BPF_JGE, 7000 + [BPF_JLT >> 4] = BPF_JGT, 7001 + [BPF_JSGE >> 4] = BPF_JSLE, 7002 + [BPF_JSGT >> 4] = BPF_JSLT, 7003 + [BPF_JSLE >> 4] = BPF_JSGE, 7004 + [BPF_JSLT >> 4] = BPF_JSGT 7005 + }; 7006 + return opcode_flip[opcode >> 4]; 7007 + } 7008 + 7009 + static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg, 7010 + struct bpf_reg_state *src_reg, 7011 + u8 opcode) 7012 + { 7013 + struct bpf_reg_state *pkt; 7014 + 7015 + if (src_reg->type == PTR_TO_PACKET_END) { 7016 + pkt = dst_reg; 7017 + } else if (dst_reg->type == PTR_TO_PACKET_END) { 7018 + pkt = src_reg; 7019 + opcode = flip_opcode(opcode); 7020 + } else { 7021 + return -1; 7022 + } 7023 + 7024 + if (pkt->range >= 0) 7025 + return -1; 7026 + 7027 + switch (opcode) { 7028 + case BPF_JLE: 7029 + /* pkt <= pkt_end */ 7030 + fallthrough; 7031 + case BPF_JGT: 7032 + /* pkt > pkt_end */ 7033 + if (pkt->range == BEYOND_PKT_END) 7034 + /* pkt has at last one extra byte beyond pkt_end */ 7035 + return opcode == BPF_JGT; 7036 + break; 7037 + case BPF_JLT: 7038 + /* pkt < pkt_end */ 7039 + fallthrough; 7040 + case BPF_JGE: 7041 + /* pkt >= pkt_end */ 7042 + if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END) 7043 + return opcode == BPF_JGE; 7044 + break; 7045 + } 7046 + return -1; 7047 + } 7048 + 7015 7049 /* Adjusts the register min/max values in the case that the dst_reg is the 7016 7050 * variable register that we are working on, and src_reg is a constant or we're 7017 7051 * simply doing a BPF_K check. ··· 7236 7148 u64 val, u32 val32, 7237 7149 u8 opcode, bool is_jmp32) 7238 7150 { 7239 - /* How can we transform "a <op> b" into "b <op> a"? */ 7240 - static const u8 opcode_flip[16] = { 7241 - /* these stay the same */ 7242 - [BPF_JEQ >> 4] = BPF_JEQ, 7243 - [BPF_JNE >> 4] = BPF_JNE, 7244 - [BPF_JSET >> 4] = BPF_JSET, 7245 - /* these swap "lesser" and "greater" (L and G in the opcodes) */ 7246 - [BPF_JGE >> 4] = BPF_JLE, 7247 - [BPF_JGT >> 4] = BPF_JLT, 7248 - [BPF_JLE >> 4] = BPF_JGE, 7249 - [BPF_JLT >> 4] = BPF_JGT, 7250 - [BPF_JSGE >> 4] = BPF_JSLE, 7251 - [BPF_JSGT >> 4] = BPF_JSLT, 7252 - [BPF_JSLE >> 4] = BPF_JSGE, 7253 - [BPF_JSLT >> 4] = BPF_JSGT 7254 - }; 7255 - opcode = opcode_flip[opcode >> 4]; 7151 + opcode = flip_opcode(opcode); 7256 7152 /* This uses zero as "not present in table"; luckily the zero opcode, 7257 7153 * BPF_JA, can't get here. 7258 7154 */ ··· 7418 7346 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */ 7419 7347 find_good_pkt_pointers(this_branch, dst_reg, 7420 7348 dst_reg->type, false); 7349 + mark_pkt_end(other_branch, insn->dst_reg, true); 7421 7350 } else if ((dst_reg->type == PTR_TO_PACKET_END && 7422 7351 src_reg->type == PTR_TO_PACKET) || 7423 7352 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && ··· 7426 7353 /* pkt_end > pkt_data', pkt_data > pkt_meta' */ 7427 7354 find_good_pkt_pointers(other_branch, src_reg, 7428 7355 src_reg->type, true); 7356 + mark_pkt_end(this_branch, insn->src_reg, false); 7429 7357 } else { 7430 7358 return false; 7431 7359 } ··· 7439 7365 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */ 7440 7366 find_good_pkt_pointers(other_branch, dst_reg, 7441 7367 dst_reg->type, true); 7368 + mark_pkt_end(this_branch, insn->dst_reg, false); 7442 7369 } else if ((dst_reg->type == PTR_TO_PACKET_END && 7443 7370 src_reg->type == PTR_TO_PACKET) || 7444 7371 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && ··· 7447 7372 /* pkt_end < pkt_data', pkt_data > pkt_meta' */ 7448 7373 find_good_pkt_pointers(this_branch, src_reg, 7449 7374 src_reg->type, false); 7375 + mark_pkt_end(other_branch, insn->src_reg, true); 7450 7376 } else { 7451 7377 return false; 7452 7378 } ··· 7460 7384 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */ 7461 7385 find_good_pkt_pointers(this_branch, dst_reg, 7462 7386 dst_reg->type, true); 7387 + mark_pkt_end(other_branch, insn->dst_reg, false); 7463 7388 } else if ((dst_reg->type == PTR_TO_PACKET_END && 7464 7389 src_reg->type == PTR_TO_PACKET) || 7465 7390 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && ··· 7468 7391 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */ 7469 7392 find_good_pkt_pointers(other_branch, src_reg, 7470 7393 src_reg->type, false); 7394 + mark_pkt_end(this_branch, insn->src_reg, true); 7471 7395 } else { 7472 7396 return false; 7473 7397 } ··· 7481 7403 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */ 7482 7404 find_good_pkt_pointers(other_branch, dst_reg, 7483 7405 dst_reg->type, false); 7406 + mark_pkt_end(this_branch, insn->dst_reg, true); 7484 7407 } else if ((dst_reg->type == PTR_TO_PACKET_END && 7485 7408 src_reg->type == PTR_TO_PACKET) || 7486 7409 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && ··· 7489 7410 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */ 7490 7411 find_good_pkt_pointers(this_branch, src_reg, 7491 7412 src_reg->type, true); 7413 + mark_pkt_end(other_branch, insn->src_reg, false); 7492 7414 } else { 7493 7415 return false; 7494 7416 } ··· 7589 7509 src_reg->var_off.value, 7590 7510 opcode, 7591 7511 is_jmp32); 7512 + } else if (reg_is_pkt_pointer_any(dst_reg) && 7513 + reg_is_pkt_pointer_any(src_reg) && 7514 + !is_jmp32) { 7515 + pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode); 7592 7516 } 7593 7517 7594 7518 if (pred >= 0) { ··· 7601 7517 */ 7602 7518 if (!__is_pointer_value(false, dst_reg)) 7603 7519 err = mark_chain_precision(env, insn->dst_reg); 7604 - if (BPF_SRC(insn->code) == BPF_X && !err) 7520 + if (BPF_SRC(insn->code) == BPF_X && !err && 7521 + !__is_pointer_value(false, src_reg)) 7605 7522 err = mark_chain_precision(env, insn->src_reg); 7606 7523 if (err) 7607 7524 return err;