Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: reject program if a __user tagged memory accessed in kernel way

BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.

Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.

In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Yonghong Song and committed by
Alexei Starovoitov
c6f1bfe8 7472d5a6

+71 -24
+6 -3
include/linux/bpf.h
··· 332 332 */ 333 333 MEM_ALLOC = BIT(2 + BPF_BASE_TYPE_BITS), 334 334 335 - __BPF_TYPE_LAST_FLAG = MEM_ALLOC, 335 + /* MEM is in user address space. */ 336 + MEM_USER = BIT(3 + BPF_BASE_TYPE_BITS), 337 + 338 + __BPF_TYPE_LAST_FLAG = MEM_USER, 336 339 }; 337 340 338 341 /* Max number of base types. */ ··· 591 588 const struct btf *btf, 592 589 const struct btf_type *t, int off, int size, 593 590 enum bpf_access_type atype, 594 - u32 *next_btf_id); 591 + u32 *next_btf_id, enum bpf_type_flag *flag); 595 592 }; 596 593 597 594 struct bpf_prog_offload_ops { ··· 1783 1780 int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, 1784 1781 const struct btf_type *t, int off, int size, 1785 1782 enum bpf_access_type atype, 1786 - u32 *next_btf_id); 1783 + u32 *next_btf_id, enum bpf_type_flag *flag); 1787 1784 bool btf_struct_ids_match(struct bpf_verifier_log *log, 1788 1785 const struct btf *btf, u32 id, int off, 1789 1786 const struct btf *need_btf, u32 need_type_id);
+5
include/linux/btf.h
··· 238 238 return BTF_INFO_KIND(t->info) == BTF_KIND_VAR; 239 239 } 240 240 241 + static inline bool btf_type_is_type_tag(const struct btf_type *t) 242 + { 243 + return BTF_INFO_KIND(t->info) == BTF_KIND_TYPE_TAG; 244 + } 245 + 241 246 /* union is only a special case of struct: 242 247 * all its offsetof(member) == 0 243 248 */
+28 -6
kernel/bpf/btf.c
··· 4886 4886 const char *tname = prog->aux->attach_func_name; 4887 4887 struct bpf_verifier_log *log = info->log; 4888 4888 const struct btf_param *args; 4889 + const char *tag_value; 4889 4890 u32 nr_args, arg; 4890 4891 int i, ret; 4891 4892 ··· 5039 5038 info->btf = btf; 5040 5039 info->btf_id = t->type; 5041 5040 t = btf_type_by_id(btf, t->type); 5041 + 5042 + if (btf_type_is_type_tag(t)) { 5043 + tag_value = __btf_name_by_offset(btf, t->name_off); 5044 + if (strcmp(tag_value, "user") == 0) 5045 + info->reg_type |= MEM_USER; 5046 + } 5047 + 5042 5048 /* skip modifiers */ 5043 5049 while (btf_type_is_modifier(t)) { 5044 5050 info->btf_id = t->type; ··· 5072 5064 5073 5065 static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf, 5074 5066 const struct btf_type *t, int off, int size, 5075 - u32 *next_btf_id) 5067 + u32 *next_btf_id, enum bpf_type_flag *flag) 5076 5068 { 5077 5069 u32 i, moff, mtrue_end, msize = 0, total_nelems = 0; 5078 5070 const struct btf_type *mtype, *elem_type = NULL; 5079 5071 const struct btf_member *member; 5080 - const char *tname, *mname; 5072 + const char *tname, *mname, *tag_value; 5081 5073 u32 vlen, elem_id, mid; 5082 5074 5083 5075 again: ··· 5261 5253 } 5262 5254 5263 5255 if (btf_type_is_ptr(mtype)) { 5264 - const struct btf_type *stype; 5256 + const struct btf_type *stype, *t; 5257 + enum bpf_type_flag tmp_flag = 0; 5265 5258 u32 id; 5266 5259 5267 5260 if (msize != size || off != moff) { ··· 5271 5262 mname, moff, tname, off, size); 5272 5263 return -EACCES; 5273 5264 } 5265 + 5266 + /* check __user tag */ 5267 + t = btf_type_by_id(btf, mtype->type); 5268 + if (btf_type_is_type_tag(t)) { 5269 + tag_value = __btf_name_by_offset(btf, t->name_off); 5270 + if (strcmp(tag_value, "user") == 0) 5271 + tmp_flag = MEM_USER; 5272 + } 5273 + 5274 5274 stype = btf_type_skip_modifiers(btf, mtype->type, &id); 5275 5275 if (btf_type_is_struct(stype)) { 5276 5276 *next_btf_id = id; 5277 + *flag = tmp_flag; 5277 5278 return WALK_PTR; 5278 5279 } 5279 5280 } ··· 5310 5291 int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, 5311 5292 const struct btf_type *t, int off, int size, 5312 5293 enum bpf_access_type atype __maybe_unused, 5313 - u32 *next_btf_id) 5294 + u32 *next_btf_id, enum bpf_type_flag *flag) 5314 5295 { 5296 + enum bpf_type_flag tmp_flag = 0; 5315 5297 int err; 5316 5298 u32 id; 5317 5299 5318 5300 do { 5319 - err = btf_struct_walk(log, btf, t, off, size, &id); 5301 + err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_flag); 5320 5302 5321 5303 switch (err) { 5322 5304 case WALK_PTR: ··· 5325 5305 * we're done. 5326 5306 */ 5327 5307 *next_btf_id = id; 5308 + *flag = tmp_flag; 5328 5309 return PTR_TO_BTF_ID; 5329 5310 case WALK_SCALAR: 5330 5311 return SCALAR_VALUE; ··· 5370 5349 const struct btf *need_btf, u32 need_type_id) 5371 5350 { 5372 5351 const struct btf_type *type; 5352 + enum bpf_type_flag flag; 5373 5353 int err; 5374 5354 5375 5355 /* Are we already done? */ ··· 5381 5359 type = btf_type_by_id(btf, id); 5382 5360 if (!type) 5383 5361 return false; 5384 - err = btf_struct_walk(log, btf, type, off, 1, &id); 5362 + err = btf_struct_walk(log, btf, type, off, 1, &id, &flag); 5385 5363 if (err != WALK_STRUCT) 5386 5364 return false; 5387 5365
+24 -11
kernel/bpf/verifier.c
··· 536 536 static const char *reg_type_str(struct bpf_verifier_env *env, 537 537 enum bpf_reg_type type) 538 538 { 539 - char postfix[16] = {0}, prefix[16] = {0}; 539 + char postfix[16] = {0}, prefix[32] = {0}; 540 540 static const char * const str[] = { 541 541 [NOT_INIT] = "?", 542 542 [SCALAR_VALUE] = "inv", ··· 570 570 } 571 571 572 572 if (type & MEM_RDONLY) 573 - strncpy(prefix, "rdonly_", 16); 573 + strncpy(prefix, "rdonly_", 32); 574 574 if (type & MEM_ALLOC) 575 - strncpy(prefix, "alloc_", 16); 575 + strncpy(prefix, "alloc_", 32); 576 + if (type & MEM_USER) 577 + strncpy(prefix, "user_", 32); 576 578 577 579 snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", 578 580 prefix, str[base_type(type)], postfix); ··· 1549 1547 static void mark_btf_ld_reg(struct bpf_verifier_env *env, 1550 1548 struct bpf_reg_state *regs, u32 regno, 1551 1549 enum bpf_reg_type reg_type, 1552 - struct btf *btf, u32 btf_id) 1550 + struct btf *btf, u32 btf_id, 1551 + enum bpf_type_flag flag) 1553 1552 { 1554 1553 if (reg_type == SCALAR_VALUE) { 1555 1554 mark_reg_unknown(env, regs, regno); 1556 1555 return; 1557 1556 } 1558 1557 mark_reg_known_zero(env, regs, regno); 1559 - regs[regno].type = PTR_TO_BTF_ID; 1558 + regs[regno].type = PTR_TO_BTF_ID | flag; 1560 1559 regs[regno].btf = btf; 1561 1560 regs[regno].btf_id = btf_id; 1562 1561 } ··· 4155 4152 struct bpf_reg_state *reg = regs + regno; 4156 4153 const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id); 4157 4154 const char *tname = btf_name_by_offset(reg->btf, t->name_off); 4155 + enum bpf_type_flag flag = 0; 4158 4156 u32 btf_id; 4159 4157 int ret; 4160 4158 ··· 4175 4171 return -EACCES; 4176 4172 } 4177 4173 4174 + if (reg->type & MEM_USER) { 4175 + verbose(env, 4176 + "R%d is ptr_%s access user memory: off=%d\n", 4177 + regno, tname, off); 4178 + return -EACCES; 4179 + } 4180 + 4178 4181 if (env->ops->btf_struct_access) { 4179 4182 ret = env->ops->btf_struct_access(&env->log, reg->btf, t, 4180 - off, size, atype, &btf_id); 4183 + off, size, atype, &btf_id, &flag); 4181 4184 } else { 4182 4185 if (atype != BPF_READ) { 4183 4186 verbose(env, "only read is supported\n"); ··· 4192 4181 } 4193 4182 4194 4183 ret = btf_struct_access(&env->log, reg->btf, t, off, size, 4195 - atype, &btf_id); 4184 + atype, &btf_id, &flag); 4196 4185 } 4197 4186 4198 4187 if (ret < 0) 4199 4188 return ret; 4200 4189 4201 4190 if (atype == BPF_READ && value_regno >= 0) 4202 - mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id); 4191 + mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag); 4203 4192 4204 4193 return 0; 4205 4194 } ··· 4212 4201 { 4213 4202 struct bpf_reg_state *reg = regs + regno; 4214 4203 struct bpf_map *map = reg->map_ptr; 4204 + enum bpf_type_flag flag = 0; 4215 4205 const struct btf_type *t; 4216 4206 const char *tname; 4217 4207 u32 btf_id; ··· 4250 4238 return -EACCES; 4251 4239 } 4252 4240 4253 - ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id); 4241 + ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id, &flag); 4254 4242 if (ret < 0) 4255 4243 return ret; 4256 4244 4257 4245 if (value_regno >= 0) 4258 - mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id); 4246 + mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag); 4259 4247 4260 4248 return 0; 4261 4249 } ··· 4456 4444 if (err < 0) 4457 4445 return err; 4458 4446 4459 - err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf, &btf_id); 4447 + err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf, 4448 + &btf_id); 4460 4449 if (err) 4461 4450 verbose_linfo(env, insn_idx, "; "); 4462 4451 if (!err && t == BPF_READ && value_regno >= 0) {
+4 -2
net/bpf/bpf_dummy_struct_ops.c
··· 145 145 const struct btf *btf, 146 146 const struct btf_type *t, int off, 147 147 int size, enum bpf_access_type atype, 148 - u32 *next_btf_id) 148 + u32 *next_btf_id, 149 + enum bpf_type_flag *flag) 149 150 { 150 151 const struct btf_type *state; 151 152 s32 type_id; ··· 163 162 return -EACCES; 164 163 } 165 164 166 - err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id); 165 + err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id, 166 + flag); 167 167 if (err < 0) 168 168 return err; 169 169
+4 -2
net/ipv4/bpf_tcp_ca.c
··· 96 96 const struct btf *btf, 97 97 const struct btf_type *t, int off, 98 98 int size, enum bpf_access_type atype, 99 - u32 *next_btf_id) 99 + u32 *next_btf_id, 100 + enum bpf_type_flag *flag) 100 101 { 101 102 size_t end; 102 103 103 104 if (atype == BPF_READ) 104 - return btf_struct_access(log, btf, t, off, size, atype, next_btf_id); 105 + return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, 106 + flag); 105 107 106 108 if (t != tcp_sock_type) { 107 109 bpf_log(log, "only read is supported\n");