Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: introduce BPF_F_ALLOW_OVERRIDE flag

If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
to the given cgroup the descendent cgroup will be able to override
effective bpf program that was inherited from this cgroup.
By default it's not passed, therefore override is disallowed.

Examples:
1.
prog X attached to /A with default
prog Y fails to attach to /A/B and /A/B/C
Everything under /A runs prog X

2.
prog X attached to /A with allow_override.
prog Y fails to attach to /A/B with default (non-override)
prog M attached to /A/B with allow_override.
Everything under /A/B runs prog M only.

3.
prog X attached to /A with allow_override.
prog Y fails to attach to /A with default.
The user has to detach first to switch the mode.

In the future this behavior may be extended with a chain of
non-overridable programs.

Also fix the bug where detach from cgroup where nothing is attached
was not throwing error. Return ENOENT in such case.

Add several testcases and adjust libbpf.

Fixes: 3007098494be ("cgroup: add support for eBPF programs")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Alexei Starovoitov and committed by
David S. Miller
7f677633 e722af63

+151 -38
+6 -7
include/linux/bpf-cgroup.h
··· 21 21 */ 22 22 struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE]; 23 23 struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE]; 24 + bool disallow_override[MAX_BPF_ATTACH_TYPE]; 24 25 }; 25 26 26 27 void cgroup_bpf_put(struct cgroup *cgrp); 27 28 void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent); 28 29 29 - void __cgroup_bpf_update(struct cgroup *cgrp, 30 - struct cgroup *parent, 31 - struct bpf_prog *prog, 32 - enum bpf_attach_type type); 30 + int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, 31 + struct bpf_prog *prog, enum bpf_attach_type type, 32 + bool overridable); 33 33 34 34 /* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */ 35 - void cgroup_bpf_update(struct cgroup *cgrp, 36 - struct bpf_prog *prog, 37 - enum bpf_attach_type type); 35 + int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, 36 + enum bpf_attach_type type, bool overridable); 38 37 39 38 int __cgroup_bpf_run_filter_skb(struct sock *sk, 40 39 struct sk_buff *skb,
+7
include/uapi/linux/bpf.h
··· 116 116 117 117 #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE 118 118 119 + /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command 120 + * to the given target_fd cgroup the descendent cgroup will be able to 121 + * override effective bpf program that was inherited from this cgroup 122 + */ 123 + #define BPF_F_ALLOW_OVERRIDE (1U << 0) 124 + 119 125 #define BPF_PSEUDO_MAP_FD 1 120 126 121 127 /* flags for BPF_MAP_UPDATE_ELEM command */ ··· 177 171 __u32 target_fd; /* container object to attach to */ 178 172 __u32 attach_bpf_fd; /* eBPF program to attach */ 179 173 __u32 attach_type; 174 + __u32 attach_flags; 180 175 }; 181 176 } __attribute__((aligned(8))); 182 177
+47 -12
kernel/bpf/cgroup.c
··· 52 52 e = rcu_dereference_protected(parent->bpf.effective[type], 53 53 lockdep_is_held(&cgroup_mutex)); 54 54 rcu_assign_pointer(cgrp->bpf.effective[type], e); 55 + cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type]; 55 56 } 56 57 } 57 58 ··· 83 82 * 84 83 * Must be called with cgroup_mutex held. 85 84 */ 86 - void __cgroup_bpf_update(struct cgroup *cgrp, 87 - struct cgroup *parent, 88 - struct bpf_prog *prog, 89 - enum bpf_attach_type type) 85 + int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, 86 + struct bpf_prog *prog, enum bpf_attach_type type, 87 + bool new_overridable) 90 88 { 91 - struct bpf_prog *old_prog, *effective; 89 + struct bpf_prog *old_prog, *effective = NULL; 92 90 struct cgroup_subsys_state *pos; 91 + bool overridable = true; 93 92 94 - old_prog = xchg(cgrp->bpf.prog + type, prog); 93 + if (parent) { 94 + overridable = !parent->bpf.disallow_override[type]; 95 + effective = rcu_dereference_protected(parent->bpf.effective[type], 96 + lockdep_is_held(&cgroup_mutex)); 97 + } 95 98 96 - effective = (!prog && parent) ? 97 - rcu_dereference_protected(parent->bpf.effective[type], 98 - lockdep_is_held(&cgroup_mutex)) : 99 - prog; 99 + if (prog && effective && !overridable) 100 + /* if parent has non-overridable prog attached, disallow 101 + * attaching new programs to descendent cgroup 102 + */ 103 + return -EPERM; 104 + 105 + if (prog && effective && overridable != new_overridable) 106 + /* if parent has overridable prog attached, only 107 + * allow overridable programs in descendent cgroup 108 + */ 109 + return -EPERM; 110 + 111 + old_prog = cgrp->bpf.prog[type]; 112 + 113 + if (prog) { 114 + overridable = new_overridable; 115 + effective = prog; 116 + if (old_prog && 117 + cgrp->bpf.disallow_override[type] == new_overridable) 118 + /* disallow attaching non-overridable on top 119 + * of existing overridable in this cgroup 120 + * and vice versa 121 + */ 122 + return -EPERM; 123 + } 124 + 125 + if (!prog && !old_prog) 126 + /* report error when trying to detach and nothing is attached */ 127 + return -ENOENT; 128 + 129 + cgrp->bpf.prog[type] = prog; 100 130 101 131 css_for_each_descendant_pre(pos, &cgrp->self) { 102 132 struct cgroup *desc = container_of(pos, struct cgroup, self); 103 133 104 134 /* skip the subtree if the descendant has its own program */ 105 - if (desc->bpf.prog[type] && desc != cgrp) 135 + if (desc->bpf.prog[type] && desc != cgrp) { 106 136 pos = css_rightmost_descendant(pos); 107 - else 137 + } else { 108 138 rcu_assign_pointer(desc->bpf.effective[type], 109 139 effective); 140 + desc->bpf.disallow_override[type] = !overridable; 141 + } 110 142 } 111 143 112 144 if (prog) ··· 149 115 bpf_prog_put(old_prog); 150 116 static_branch_dec(&cgroup_bpf_enabled_key); 151 117 } 118 + return 0; 152 119 } 153 120 154 121 /**
+14 -6
kernel/bpf/syscall.c
··· 920 920 921 921 #ifdef CONFIG_CGROUP_BPF 922 922 923 - #define BPF_PROG_ATTACH_LAST_FIELD attach_type 923 + #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 924 924 925 925 static int bpf_prog_attach(const union bpf_attr *attr) 926 926 { 927 + enum bpf_prog_type ptype; 927 928 struct bpf_prog *prog; 928 929 struct cgroup *cgrp; 929 - enum bpf_prog_type ptype; 930 + int ret; 930 931 931 932 if (!capable(CAP_NET_ADMIN)) 932 933 return -EPERM; 933 934 934 935 if (CHECK_ATTR(BPF_PROG_ATTACH)) 936 + return -EINVAL; 937 + 938 + if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) 935 939 return -EINVAL; 936 940 937 941 switch (attr->attach_type) { ··· 960 956 return PTR_ERR(cgrp); 961 957 } 962 958 963 - cgroup_bpf_update(cgrp, prog, attr->attach_type); 959 + ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, 960 + attr->attach_flags & BPF_F_ALLOW_OVERRIDE); 961 + if (ret) 962 + bpf_prog_put(prog); 964 963 cgroup_put(cgrp); 965 964 966 - return 0; 965 + return ret; 967 966 } 968 967 969 968 #define BPF_PROG_DETACH_LAST_FIELD attach_type ··· 974 967 static int bpf_prog_detach(const union bpf_attr *attr) 975 968 { 976 969 struct cgroup *cgrp; 970 + int ret; 977 971 978 972 if (!capable(CAP_NET_ADMIN)) 979 973 return -EPERM; ··· 990 982 if (IS_ERR(cgrp)) 991 983 return PTR_ERR(cgrp); 992 984 993 - cgroup_bpf_update(cgrp, NULL, attr->attach_type); 985 + ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); 994 986 cgroup_put(cgrp); 995 987 break; 996 988 ··· 998 990 return -EINVAL; 999 991 } 1000 992 1001 - return 0; 993 + return ret; 1002 994 } 1003 995 #endif /* CONFIG_CGROUP_BPF */ 1004 996
+5 -4
kernel/cgroup.c
··· 6498 6498 subsys_initcall(cgroup_namespaces_init); 6499 6499 6500 6500 #ifdef CONFIG_CGROUP_BPF 6501 - void cgroup_bpf_update(struct cgroup *cgrp, 6502 - struct bpf_prog *prog, 6503 - enum bpf_attach_type type) 6501 + int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, 6502 + enum bpf_attach_type type, bool overridable) 6504 6503 { 6505 6504 struct cgroup *parent = cgroup_parent(cgrp); 6505 + int ret; 6506 6506 6507 6507 mutex_lock(&cgroup_mutex); 6508 - __cgroup_bpf_update(cgrp, parent, prog, type); 6508 + ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable); 6509 6509 mutex_unlock(&cgroup_mutex); 6510 + return ret; 6510 6511 } 6511 6512 #endif /* CONFIG_CGROUP_BPF */ 6512 6513
+1 -1
samples/bpf/test_cgrp2_attach.c
··· 104 104 return EXIT_FAILURE; 105 105 } 106 106 107 - ret = bpf_prog_attach(prog_fd, cg_fd, type); 107 + ret = bpf_prog_attach(prog_fd, cg_fd, type, 0); 108 108 if (ret < 0) { 109 109 printf("Failed to attach prog to cgroup: '%s'\n", 110 110 strerror(errno));
+64 -4
samples/bpf/test_cgrp2_attach2.c
··· 79 79 if (join_cgroup(FOO)) 80 80 goto err; 81 81 82 - if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS)) { 82 + if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) { 83 83 log_err("Attaching prog to /foo"); 84 84 goto err; 85 85 } 86 86 87 + printf("Attached DROP prog. This ping in cgroup /foo should fail...\n"); 87 88 assert(system(PING_CMD) != 0); 88 89 89 90 /* Create cgroup /foo/bar, get fd, and join it */ ··· 95 94 if (join_cgroup(BAR)) 96 95 goto err; 97 96 97 + printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n"); 98 98 assert(system(PING_CMD) != 0); 99 99 100 - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) { 100 + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { 101 101 log_err("Attaching prog to /foo/bar"); 102 102 goto err; 103 103 } 104 104 105 + printf("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n"); 105 106 assert(system(PING_CMD) == 0); 106 - 107 107 108 108 if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { 109 109 log_err("Detaching program from /foo/bar"); 110 110 goto err; 111 111 } 112 112 113 + printf("Detached PASS from /foo/bar while DROP is attached to /foo.\n" 114 + "This ping in cgroup /foo/bar should fail...\n"); 113 115 assert(system(PING_CMD) != 0); 114 116 115 - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) { 117 + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { 116 118 log_err("Attaching prog to /foo/bar"); 117 119 goto err; 118 120 } ··· 125 121 goto err; 126 122 } 127 123 124 + printf("Attached PASS from /foo/bar and detached DROP from /foo.\n" 125 + "This ping in cgroup /foo/bar should pass...\n"); 128 126 assert(system(PING_CMD) == 0); 127 + 128 + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { 129 + log_err("Attaching prog to /foo/bar"); 130 + goto err; 131 + } 132 + 133 + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { 134 + errno = 0; 135 + log_err("Unexpected success attaching prog to /foo/bar"); 136 + goto err; 137 + } 138 + 139 + if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { 140 + log_err("Detaching program from /foo/bar"); 141 + goto err; 142 + } 143 + 144 + if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) { 145 + errno = 0; 146 + log_err("Unexpected success in double detach from /foo"); 147 + goto err; 148 + } 149 + 150 + if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { 151 + log_err("Attaching non-overridable prog to /foo"); 152 + goto err; 153 + } 154 + 155 + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { 156 + errno = 0; 157 + log_err("Unexpected success attaching non-overridable prog to /foo/bar"); 158 + goto err; 159 + } 160 + 161 + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { 162 + errno = 0; 163 + log_err("Unexpected success attaching overridable prog to /foo/bar"); 164 + goto err; 165 + } 166 + 167 + if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) { 168 + errno = 0; 169 + log_err("Unexpected success attaching overridable prog to /foo"); 170 + goto err; 171 + } 172 + 173 + if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { 174 + log_err("Attaching different non-overridable prog to /foo"); 175 + goto err; 176 + } 129 177 130 178 goto out; 131 179 ··· 188 132 close(foo); 189 133 close(bar); 190 134 cleanup_cgroup_environment(); 135 + if (!rc) 136 + printf("PASS\n"); 137 + else 138 + printf("FAIL\n"); 191 139 return rc; 192 140 }
+1 -1
samples/bpf/test_cgrp2_sock.c
··· 75 75 return EXIT_FAILURE; 76 76 } 77 77 78 - ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE); 78 + ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE, 0); 79 79 if (ret < 0) { 80 80 printf("Failed to attach prog to cgroup: '%s'\n", 81 81 strerror(errno));
+1 -1
samples/bpf/test_cgrp2_sock2.c
··· 55 55 } 56 56 57 57 ret = bpf_prog_attach(prog_fd[filter_id], cg_fd, 58 - BPF_CGROUP_INET_SOCK_CREATE); 58 + BPF_CGROUP_INET_SOCK_CREATE, 0); 59 59 if (ret < 0) { 60 60 printf("Failed to attach prog to cgroup: '%s'\n", 61 61 strerror(errno));
+3 -1
tools/lib/bpf/bpf.c
··· 168 168 return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); 169 169 } 170 170 171 - int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) 171 + int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, 172 + unsigned int flags) 172 173 { 173 174 union bpf_attr attr; 174 175 ··· 177 176 attr.target_fd = target_fd; 178 177 attr.attach_bpf_fd = prog_fd; 179 178 attr.attach_type = type; 179 + attr.attach_flags = flags; 180 180 181 181 return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); 182 182 }
+2 -1
tools/lib/bpf/bpf.h
··· 41 41 int bpf_map_get_next_key(int fd, void *key, void *next_key); 42 42 int bpf_obj_pin(int fd, const char *pathname); 43 43 int bpf_obj_get(const char *pathname); 44 - int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type); 44 + int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, 45 + unsigned int flags); 45 46 int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); 46 47 47 48