Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2018-06-08

The following pull-request contains BPF updates for your *net* tree.

The main changes are:

1) Fix in the BPF verifier to reject modified ctx pointers on helper
functions, from Daniel.

2) Fix in BPF kselftests for get_cgroup_id_user() helper to only
record the cgroup id for a provided pid in order to reduce test
failures from processes interferring with the test, from Yonghong.

3) Fix a crash in AF_XDP's mem accounting when the process owning
the sock has CAP_IPC_LOCK capabilities set, from Daniel.

4) Fix an issue for AF_XDP on 32 bit machines where XDP_UMEM_PGOFF_*_RING
defines need ULL suffixes and use loff_t type as they are otherwise
truncated, from Geert.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+118 -26
+2 -2
include/uapi/linux/if_xdp.h
··· 63 63 /* Pgoff for mmaping the rings */ 64 64 #define XDP_PGOFF_RX_RING 0 65 65 #define XDP_PGOFF_TX_RING 0x80000000 66 - #define XDP_UMEM_PGOFF_FILL_RING 0x100000000 67 - #define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000 66 + #define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL 67 + #define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL 68 68 69 69 /* Rx/Tx descriptor */ 70 70 struct xdp_desc {
+31 -17
kernel/bpf/verifier.c
··· 1617 1617 } 1618 1618 #endif 1619 1619 1620 + static int check_ctx_reg(struct bpf_verifier_env *env, 1621 + const struct bpf_reg_state *reg, int regno) 1622 + { 1623 + /* Access to ctx or passing it to a helper is only allowed in 1624 + * its original, unmodified form. 1625 + */ 1626 + 1627 + if (reg->off) { 1628 + verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n", 1629 + regno, reg->off); 1630 + return -EACCES; 1631 + } 1632 + 1633 + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { 1634 + char tn_buf[48]; 1635 + 1636 + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 1637 + verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf); 1638 + return -EACCES; 1639 + } 1640 + 1641 + return 0; 1642 + } 1643 + 1620 1644 /* truncate register to smaller size (in bytes) 1621 1645 * must be called with size < BPF_REG_SIZE 1622 1646 */ ··· 1710 1686 verbose(env, "R%d leaks addr into ctx\n", value_regno); 1711 1687 return -EACCES; 1712 1688 } 1713 - /* ctx accesses must be at a fixed offset, so that we can 1714 - * determine what type of data were returned. 1715 - */ 1716 - if (reg->off) { 1717 - verbose(env, 1718 - "dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n", 1719 - regno, reg->off, off - reg->off); 1720 - return -EACCES; 1721 - } 1722 - if (!tnum_is_const(reg->var_off) || reg->var_off.value) { 1723 - char tn_buf[48]; 1724 1689 1725 - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 1726 - verbose(env, 1727 - "variable ctx access var_off=%s off=%d size=%d", 1728 - tn_buf, off, size); 1729 - return -EACCES; 1730 - } 1690 + err = check_ctx_reg(env, reg, regno); 1691 + if (err < 0) 1692 + return err; 1693 + 1731 1694 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type); 1732 1695 if (!err && t == BPF_READ && value_regno >= 0) { 1733 1696 /* ctx access returns either a scalar, or a ··· 1995 1984 expected_type = PTR_TO_CTX; 1996 1985 if (type != expected_type) 1997 1986 goto err_type; 1987 + err = check_ctx_reg(env, reg, regno); 1988 + if (err < 0) 1989 + return err; 1998 1990 } else if (arg_type_is_mem_ptr(arg_type)) { 1999 1991 expected_type = PTR_TO_STACK; 2000 1992 /* One exception here. In case function allows for NULL to be
+4 -2
net/xdp/xdp_umem.c
··· 132 132 133 133 static void xdp_umem_unaccount_pages(struct xdp_umem *umem) 134 134 { 135 - atomic_long_sub(umem->npgs, &umem->user->locked_vm); 136 - free_uid(umem->user); 135 + if (umem->user) { 136 + atomic_long_sub(umem->npgs, &umem->user->locked_vm); 137 + free_uid(umem->user); 138 + } 137 139 } 138 140 139 141 static void xdp_umem_release(struct xdp_umem *umem)
+1 -1
net/xdp/xsk.c
··· 643 643 static int xsk_mmap(struct file *file, struct socket *sock, 644 644 struct vm_area_struct *vma) 645 645 { 646 - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; 646 + loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; 647 647 unsigned long size = vma->vm_end - vma->vm_start; 648 648 struct xdp_sock *xs = xdp_sk(sock->sk); 649 649 struct xsk_queue *q = NULL;
+13 -1
tools/testing/selftests/bpf/get_cgroup_id_kern.c
··· 11 11 .max_entries = 1, 12 12 }; 13 13 14 + struct bpf_map_def SEC("maps") pidmap = { 15 + .type = BPF_MAP_TYPE_ARRAY, 16 + .key_size = sizeof(__u32), 17 + .value_size = sizeof(__u32), 18 + .max_entries = 1, 19 + }; 20 + 14 21 SEC("tracepoint/syscalls/sys_enter_nanosleep") 15 22 int trace(void *ctx) 16 23 { 17 - __u32 key = 0; 24 + __u32 pid = bpf_get_current_pid_tgid(); 25 + __u32 key = 0, *expected_pid; 18 26 __u64 *val; 27 + 28 + expected_pid = bpf_map_lookup_elem(&pidmap, &key); 29 + if (!expected_pid || *expected_pid != pid) 30 + return 0; 19 31 20 32 val = bpf_map_lookup_elem(&cg_ids, &key); 21 33 if (val)
+10 -2
tools/testing/selftests/bpf/get_cgroup_id_user.c
··· 50 50 const char *probe_name = "syscalls/sys_enter_nanosleep"; 51 51 const char *file = "get_cgroup_id_kern.o"; 52 52 int err, bytes, efd, prog_fd, pmu_fd; 53 + int cgroup_fd, cgidmap_fd, pidmap_fd; 53 54 struct perf_event_attr attr = {}; 54 - int cgroup_fd, cgidmap_fd; 55 55 struct bpf_object *obj; 56 56 __u64 kcgid = 0, ucgid; 57 + __u32 key = 0, pid; 57 58 int exit_code = 1; 58 59 char buf[256]; 59 - __u32 key = 0; 60 60 61 61 err = setup_cgroup_environment(); 62 62 if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err, ··· 80 80 if (CHECK(cgidmap_fd < 0, "bpf_find_map", "err %d errno %d\n", 81 81 cgidmap_fd, errno)) 82 82 goto close_prog; 83 + 84 + pidmap_fd = bpf_find_map(__func__, obj, "pidmap"); 85 + if (CHECK(pidmap_fd < 0, "bpf_find_map", "err %d errno %d\n", 86 + pidmap_fd, errno)) 87 + goto close_prog; 88 + 89 + pid = getpid(); 90 + bpf_map_update_elem(pidmap_fd, &key, &pid, 0); 83 91 84 92 snprintf(buf, sizeof(buf), 85 93 "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+57 -1
tools/testing/selftests/bpf/test_verifier.c
··· 8647 8647 offsetof(struct __sk_buff, mark)), 8648 8648 BPF_EXIT_INSN(), 8649 8649 }, 8650 - .errstr = "dereference of modified ctx ptr R1 off=68+8, ctx+const is allowed, ctx+const+const is not", 8650 + .errstr = "dereference of modified ctx ptr", 8651 8651 .result = REJECT, 8652 8652 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 8653 8653 }, ··· 12257 12257 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 12258 12258 .result = ACCEPT, 12259 12259 .retval = 5, 12260 + }, 12261 + { 12262 + "pass unmodified ctx pointer to helper", 12263 + .insns = { 12264 + BPF_MOV64_IMM(BPF_REG_2, 0), 12265 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 12266 + BPF_FUNC_csum_update), 12267 + BPF_MOV64_IMM(BPF_REG_0, 0), 12268 + BPF_EXIT_INSN(), 12269 + }, 12270 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 12271 + .result = ACCEPT, 12272 + }, 12273 + { 12274 + "pass modified ctx pointer to helper, 1", 12275 + .insns = { 12276 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612), 12277 + BPF_MOV64_IMM(BPF_REG_2, 0), 12278 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 12279 + BPF_FUNC_csum_update), 12280 + BPF_MOV64_IMM(BPF_REG_0, 0), 12281 + BPF_EXIT_INSN(), 12282 + }, 12283 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 12284 + .result = REJECT, 12285 + .errstr = "dereference of modified ctx ptr", 12286 + }, 12287 + { 12288 + "pass modified ctx pointer to helper, 2", 12289 + .insns = { 12290 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612), 12291 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 12292 + BPF_FUNC_get_socket_cookie), 12293 + BPF_MOV64_IMM(BPF_REG_0, 0), 12294 + BPF_EXIT_INSN(), 12295 + }, 12296 + .result_unpriv = REJECT, 12297 + .result = REJECT, 12298 + .errstr_unpriv = "dereference of modified ctx ptr", 12299 + .errstr = "dereference of modified ctx ptr", 12300 + }, 12301 + { 12302 + "pass modified ctx pointer to helper, 3", 12303 + .insns = { 12304 + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0), 12305 + BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 4), 12306 + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), 12307 + BPF_MOV64_IMM(BPF_REG_2, 0), 12308 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 12309 + BPF_FUNC_csum_update), 12310 + BPF_MOV64_IMM(BPF_REG_0, 0), 12311 + BPF_EXIT_INSN(), 12312 + }, 12313 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 12314 + .result = REJECT, 12315 + .errstr = "variable ctx access var_off=(0x0; 0x4)", 12260 12316 }, 12261 12317 }; 12262 12318