bpf: introduce BPF_F_ALLOW_OVERRIDE flag

+6 -7

include/linux/bpf-cgroup.h

··· 21 21 */ 22 22 struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE]; 23 23 struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE]; 24 + bool disallow_override[MAX_BPF_ATTACH_TYPE]; 24 25 }; 25 26 26 27 void cgroup_bpf_put(struct cgroup *cgrp); 27 28 void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent); 28 29 29 - void __cgroup_bpf_update(struct cgroup *cgrp, 30 - struct cgroup *parent, 31 - struct bpf_prog *prog, 32 - enum bpf_attach_type type); 30 + int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, 31 + struct bpf_prog *prog, enum bpf_attach_type type, 32 + bool overridable); 33 33 34 34 /* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */ 35 - void cgroup_bpf_update(struct cgroup *cgrp, 36 - struct bpf_prog *prog, 37 - enum bpf_attach_type type); 35 + int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, 36 + enum bpf_attach_type type, bool overridable); 38 37 39 38 int __cgroup_bpf_run_filter_skb(struct sock *sk, 40 39 struct sk_buff *skb,

+7

include/uapi/linux/bpf.h

··· 116 116 117 117 #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE 118 118 119 + /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command 120 + * to the given target_fd cgroup the descendent cgroup will be able to 121 + * override effective bpf program that was inherited from this cgroup 122 + */ 123 + #define BPF_F_ALLOW_OVERRIDE (1U << 0) 124 + 119 125 #define BPF_PSEUDO_MAP_FD 1 120 126 121 127 /* flags for BPF_MAP_UPDATE_ELEM command */ ··· 177 171 __u32 target_fd; /* container object to attach to */ 178 172 __u32 attach_bpf_fd; /* eBPF program to attach */ 179 173 __u32 attach_type; 174 + __u32 attach_flags; 180 175 }; 181 176 } __attribute__((aligned(8))); 182 177

+47 -12

kernel/bpf/cgroup.c

··· 52 52 e = rcu_dereference_protected(parent->bpf.effective[type], 53 53 lockdep_is_held(&cgroup_mutex)); 54 54 rcu_assign_pointer(cgrp->bpf.effective[type], e); 55 + cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type]; 55 56 } 56 57 } 57 58 ··· 83 82 * 84 83 * Must be called with cgroup_mutex held. 85 84 */ 86 - void __cgroup_bpf_update(struct cgroup *cgrp, 87 - struct cgroup *parent, 88 - struct bpf_prog *prog, 89 - enum bpf_attach_type type) 85 + int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, 86 + struct bpf_prog *prog, enum bpf_attach_type type, 87 + bool new_overridable) 90 88 { 91 - struct bpf_prog *old_prog, *effective; 89 + struct bpf_prog *old_prog, *effective = NULL; 92 90 struct cgroup_subsys_state *pos; 91 + bool overridable = true; 93 92 94 - old_prog = xchg(cgrp->bpf.prog + type, prog); 93 + if (parent) { 94 + overridable = !parent->bpf.disallow_override[type]; 95 + effective = rcu_dereference_protected(parent->bpf.effective[type], 96 + lockdep_is_held(&cgroup_mutex)); 97 + } 95 98 96 - effective = (!prog && parent) ? 97 - rcu_dereference_protected(parent->bpf.effective[type], 98 - lockdep_is_held(&cgroup_mutex)) : 99 - prog; 99 + if (prog && effective && !overridable) 100 + /* if parent has non-overridable prog attached, disallow 101 + * attaching new programs to descendent cgroup 102 + */ 103 + return -EPERM; 104 + 105 + if (prog && effective && overridable != new_overridable) 106 + /* if parent has overridable prog attached, only 107 + * allow overridable programs in descendent cgroup 108 + */ 109 + return -EPERM; 110 + 111 + old_prog = cgrp->bpf.prog[type]; 112 + 113 + if (prog) { 114 + overridable = new_overridable; 115 + effective = prog; 116 + if (old_prog && 117 + cgrp->bpf.disallow_override[type] == new_overridable) 118 + /* disallow attaching non-overridable on top 119 + * of existing overridable in this cgroup 120 + * and vice versa 121 + */ 122 + return -EPERM; 123 + } 124 + 125 + if (!prog && !old_prog) 126 + /* report error when trying to detach and nothing is attached */ 127 + return -ENOENT; 128 + 129 + cgrp->bpf.prog[type] = prog; 100 130 101 131 css_for_each_descendant_pre(pos, &cgrp->self) { 102 132 struct cgroup *desc = container_of(pos, struct cgroup, self); 103 133 104 134 /* skip the subtree if the descendant has its own program */ 105 - if (desc->bpf.prog[type] && desc != cgrp) 135 + if (desc->bpf.prog[type] && desc != cgrp) { 106 136 pos = css_rightmost_descendant(pos); 107 - else 137 + } else { 108 138 rcu_assign_pointer(desc->bpf.effective[type], 109 139 effective); 140 + desc->bpf.disallow_override[type] = !overridable; 141 + } 110 142 } 111 143 112 144 if (prog) ··· 149 115 bpf_prog_put(old_prog); 150 116 static_branch_dec(&cgroup_bpf_enabled_key); 151 117 } 118 + return 0; 152 119 } 153 120 154 121 /**

+14 -6

kernel/bpf/syscall.c

··· 920 920 921 921 #ifdef CONFIG_CGROUP_BPF 922 922 923 - #define BPF_PROG_ATTACH_LAST_FIELD attach_type 923 + #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 924 924 925 925 static int bpf_prog_attach(const union bpf_attr *attr) 926 926 { 927 + enum bpf_prog_type ptype; 927 928 struct bpf_prog *prog; 928 929 struct cgroup *cgrp; 929 - enum bpf_prog_type ptype; 930 + int ret; 930 931 931 932 if (!capable(CAP_NET_ADMIN)) 932 933 return -EPERM; 933 934 934 935 if (CHECK_ATTR(BPF_PROG_ATTACH)) 936 + return -EINVAL; 937 + 938 + if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) 935 939 return -EINVAL; 936 940 937 941 switch (attr->attach_type) { ··· 960 956 return PTR_ERR(cgrp); 961 957 } 962 958 963 - cgroup_bpf_update(cgrp, prog, attr->attach_type); 959 + ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, 960 + attr->attach_flags & BPF_F_ALLOW_OVERRIDE); 961 + if (ret) 962 + bpf_prog_put(prog); 964 963 cgroup_put(cgrp); 965 964 966 - return 0; 965 + return ret; 967 966 } 968 967 969 968 #define BPF_PROG_DETACH_LAST_FIELD attach_type ··· 974 967 static int bpf_prog_detach(const union bpf_attr *attr) 975 968 { 976 969 struct cgroup *cgrp; 970 + int ret; 977 971 978 972 if (!capable(CAP_NET_ADMIN)) 979 973 return -EPERM; ··· 990 982 if (IS_ERR(cgrp)) 991 983 return PTR_ERR(cgrp); 992 984 993 - cgroup_bpf_update(cgrp, NULL, attr->attach_type); 985 + ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); 994 986 cgroup_put(cgrp); 995 987 break; 996 988 ··· 998 990 return -EINVAL; 999 991 } 1000 992 1001 - return 0; 993 + return ret; 1002 994 } 1003 995 #endif /* CONFIG_CGROUP_BPF */ 1004 996

+5 -4

kernel/cgroup.c

··· 6498 6498 subsys_initcall(cgroup_namespaces_init); 6499 6499 6500 6500 #ifdef CONFIG_CGROUP_BPF 6501 - void cgroup_bpf_update(struct cgroup *cgrp, 6502 - struct bpf_prog *prog, 6503 - enum bpf_attach_type type) 6501 + int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, 6502 + enum bpf_attach_type type, bool overridable) 6504 6503 { 6505 6504 struct cgroup *parent = cgroup_parent(cgrp); 6505 + int ret; 6506 6506 6507 6507 mutex_lock(&cgroup_mutex); 6508 - __cgroup_bpf_update(cgrp, parent, prog, type); 6508 + ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable); 6509 6509 mutex_unlock(&cgroup_mutex); 6510 + return ret; 6510 6511 } 6511 6512 #endif /* CONFIG_CGROUP_BPF */ 6512 6513

+1 -1

samples/bpf/test_cgrp2_attach.c

··· 104 104 return EXIT_FAILURE; 105 105 } 106 106 107 - ret = bpf_prog_attach(prog_fd, cg_fd, type); 107 + ret = bpf_prog_attach(prog_fd, cg_fd, type, 0); 108 108 if (ret < 0) { 109 109 printf("Failed to attach prog to cgroup: '%s'\n", 110 110 strerror(errno));

+64 -4

samples/bpf/test_cgrp2_attach2.c

··· 79 79 if (join_cgroup(FOO)) 80 80 goto err; 81 81 82 - if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS)) { 82 + if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) { 83 83 log_err("Attaching prog to /foo"); 84 84 goto err; 85 85 } 86 86 87 + printf("Attached DROP prog. This ping in cgroup /foo should fail...\n"); 87 88 assert(system(PING_CMD) != 0); 88 89 89 90 /* Create cgroup /foo/bar, get fd, and join it */ ··· 95 94 if (join_cgroup(BAR)) 96 95 goto err; 97 96 97 + printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n"); 98 98 assert(system(PING_CMD) != 0); 99 99 100 - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) { 100 + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { 101 101 log_err("Attaching prog to /foo/bar"); 102 102 goto err; 103 103 } 104 104 105 + printf("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n"); 105 106 assert(system(PING_CMD) == 0); 106 - 107 107 108 108 if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { 109 109 log_err("Detaching program from /foo/bar"); 110 110 goto err; 111 111 } 112 112 113 + printf("Detached PASS from /foo/bar while DROP is attached to /foo.\n" 114 + "This ping in cgroup /foo/bar should fail...\n"); 113 115 assert(system(PING_CMD) != 0); 114 116 115 - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) { 117 + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { 116 118 log_err("Attaching prog to /foo/bar"); 117 119 goto err; 118 120 } ··· 125 121 goto err; 126 122 } 127 123 124 + printf("Attached PASS from /foo/bar and detached DROP from /foo.\n" 125 + "This ping in cgroup /foo/bar should pass...\n"); 128 126 assert(system(PING_CMD) == 0); 127 + 128 + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { 129 + log_err("Attaching prog to /foo/bar"); 130 + goto err; 131 + } 132 + 133 + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { 134 + errno = 0; 135 + log_err("Unexpected success attaching prog to /foo/bar"); 136 + goto err; 137 + } 138 + 139 + if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { 140 + log_err("Detaching program from /foo/bar"); 141 + goto err; 142 + } 143 + 144 + if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) { 145 + errno = 0; 146 + log_err("Unexpected success in double detach from /foo"); 147 + goto err; 148 + } 149 + 150 + if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { 151 + log_err("Attaching non-overridable prog to /foo"); 152 + goto err; 153 + } 154 + 155 + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { 156 + errno = 0; 157 + log_err("Unexpected success attaching non-overridable prog to /foo/bar"); 158 + goto err; 159 + } 160 + 161 + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { 162 + errno = 0; 163 + log_err("Unexpected success attaching overridable prog to /foo/bar"); 164 + goto err; 165 + } 166 + 167 + if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) { 168 + errno = 0; 169 + log_err("Unexpected success attaching overridable prog to /foo"); 170 + goto err; 171 + } 172 + 173 + if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { 174 + log_err("Attaching different non-overridable prog to /foo"); 175 + goto err; 176 + } 129 177 130 178 goto out; 131 179 ··· 188 132 close(foo); 189 133 close(bar); 190 134 cleanup_cgroup_environment(); 135 + if (!rc) 136 + printf("PASS\n"); 137 + else 138 + printf("FAIL\n"); 191 139 return rc; 192 140 }

+1 -1

samples/bpf/test_cgrp2_sock.c

··· 75 75 return EXIT_FAILURE; 76 76 } 77 77 78 - ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE); 78 + ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE, 0); 79 79 if (ret < 0) { 80 80 printf("Failed to attach prog to cgroup: '%s'\n", 81 81 strerror(errno));

+1 -1

samples/bpf/test_cgrp2_sock2.c

··· 55 55 } 56 56 57 57 ret = bpf_prog_attach(prog_fd[filter_id], cg_fd, 58 - BPF_CGROUP_INET_SOCK_CREATE); 58 + BPF_CGROUP_INET_SOCK_CREATE, 0); 59 59 if (ret < 0) { 60 60 printf("Failed to attach prog to cgroup: '%s'\n", 61 61 strerror(errno));

+3 -1

tools/lib/bpf/bpf.c

··· 168 168 return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); 169 169 } 170 170 171 - int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) 171 + int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, 172 + unsigned int flags) 172 173 { 173 174 union bpf_attr attr; 174 175 ··· 177 176 attr.target_fd = target_fd; 178 177 attr.attach_bpf_fd = prog_fd; 179 178 attr.attach_type = type; 179 + attr.attach_flags = flags; 180 180 181 181 return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); 182 182 }

+2 -1

tools/lib/bpf/bpf.h

··· 41 41 int bpf_map_get_next_key(int fd, void *key, void *next_key); 42 42 int bpf_obj_pin(int fd, const char *pathname); 43 43 int bpf_obj_get(const char *pathname); 44 - int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type); 44 + int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, 45 + unsigned int flags); 45 46 int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); 46 47 47 48