Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2021-09-14

The following pull-request contains BPF updates for your *net* tree.

We've added 7 non-merge commits during the last 13 day(s) which contain
a total of 18 files changed, 334 insertions(+), 193 deletions(-).

The main changes are:

1) Fix mmap_lock lockdep splat in BPF stack map's build_id lookup, from Yonghong Song.

2) Fix BPF cgroup v2 program bypass upon net_cls/prio activation, from Daniel Borkmann.

3) Fix kvcalloc() BTF line info splat on oversized allocation attempts, from Bixuan Cui.

4) Fix BPF selftest build of task_pt_regs test for arm64/s390, from Jean-Philippe Brucker.

5) Fix BPF's disasm.{c,h} to dual-license so that it is aligned with bpftool given the former
is a build dependency for the latter, from Daniel Borkmann with ACKs from contributors.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

David S. Miller 4 years ago 2865ba82 550ac9c1

+336 -195

18 changed files

expand all collapse all

include

linux

cgroup-defs.h

cgroup.h

mmap_lock.h

kernel

bpf

disasm.c

disasm.h

stackmap.c

verifier.c

cgroup

cgroup.c

net

core

netclassid_cgroup.c

netprio_cgroup.c

tools

testing

selftests

bpf

cgroup_helpers.c

cgroup_helpers.h

network_helpers.c

network_helpers.h

prog_tests

cgroup_v1v2.c

task_pt_regs.c

progs

connect4_dropper.c

test_task_pt_regs.c

+27 -80

include/linux/cgroup-defs.h

reviewed

··· 752 752 * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains 753 753 * per-socket cgroup information except for memcg association. 754 754 * 755 755 - * On legacy hierarchies, net_prio and net_cls controllers directly set 756 756 - * attributes on each sock which can then be tested by the network layer. 757 757 - * On the default hierarchy, each sock is associated with the cgroup it was 758 758 - * created in and the networking layer can match the cgroup directly. 759 759 - * 760 760 - * To avoid carrying all three cgroup related fields separately in sock, 761 761 - * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer. 762 762 - * On boot, sock_cgroup_data records the cgroup that the sock was created 763 763 - * in so that cgroup2 matches can be made; however, once either net_prio or 764 764 - * net_cls starts being used, the area is overridden to carry prioidx and/or 765 765 - * classid. The two modes are distinguished by whether the lowest bit is 766 766 - * set. Clear bit indicates cgroup pointer while set bit prioidx and 767 767 - * classid. 768 768 - * 769 769 - * While userland may start using net_prio or net_cls at any time, once 770 770 - * either is used, cgroup2 matching no longer works. There is no reason to 771 771 - * mix the two and this is in line with how legacy and v2 compatibility is 772 772 - * handled. On mode switch, cgroup references which are already being 773 773 - * pointed to by socks may be leaked. While this can be remedied by adding 774 774 - * synchronization around sock_cgroup_data, given that the number of leaked 775 775 - * cgroups is bound and highly unlikely to be high, this seems to be the 776 776 - * better trade-off. 755 755 + * On legacy hierarchies, net_prio and net_cls controllers directly 756 756 + * set attributes on each sock which can then be tested by the network 757 757 + * layer. On the default hierarchy, each sock is associated with the 758 758 + * cgroup it was created in and the networking layer can match the 759 759 + * cgroup directly. 777 760 */ 778 761 struct sock_cgroup_data { 779 779 - union { 780 780 - #ifdef __LITTLE_ENDIAN 781 781 - struct { 782 782 - u8 is_data : 1; 783 783 - u8 no_refcnt : 1; 784 784 - u8 unused : 6; 785 785 - u8 padding; 786 786 - u16 prioidx; 787 787 - u32 classid; 788 788 - } __packed; 789 789 - #else 790 790 - struct { 791 791 - u32 classid; 792 792 - u16 prioidx; 793 793 - u8 padding; 794 794 - u8 unused : 6; 795 795 - u8 no_refcnt : 1; 796 796 - u8 is_data : 1; 797 797 - } __packed; 762 762 + struct cgroup *cgroup; /* v2 */ 763 763 + #ifdef CONFIG_CGROUP_NET_CLASSID 764 764 + u32 classid; /* v1 */ 798 765 #endif 799 799 - u64 val; 800 800 - }; 766 766 + #ifdef CONFIG_CGROUP_NET_PRIO 767 767 + u16 prioidx; /* v1 */ 768 768 + #endif 801 769 }; 802 770 803 803 - /* 804 804 - * There's a theoretical window where the following accessors race with 805 805 - * updaters and return part of the previous pointer as the prioidx or 806 806 - * classid. Such races are short-lived and the result isn't critical. 807 807 - */ 808 771 static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd) 809 772 { 810 810 - /* fallback to 1 which is always the ID of the root cgroup */ 811 811 - return (skcd->is_data & 1) ? skcd->prioidx : 1; 773 773 + #ifdef CONFIG_CGROUP_NET_PRIO 774 774 + return READ_ONCE(skcd->prioidx); 775 775 + #else 776 776 + return 1; 777 777 + #endif 812 778 } 813 779 814 780 static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd) 815 781 { 816 816 - /* fallback to 0 which is the unconfigured default classid */ 817 817 - return (skcd->is_data & 1) ? skcd->classid : 0; 782 782 + #ifdef CONFIG_CGROUP_NET_CLASSID 783 783 + return READ_ONCE(skcd->classid); 784 784 + #else 785 785 + return 0; 786 786 + #endif 818 787 } 819 788 820 820 - /* 821 821 - * If invoked concurrently, the updaters may clobber each other. The 822 822 - * caller is responsible for synchronization. 823 823 - */ 824 789 static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, 825 790 u16 prioidx) 826 791 { 827 827 - struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }}; 828 828 - 829 829 - if (sock_cgroup_prioidx(&skcd_buf) == prioidx) 830 830 - return; 831 831 - 832 832 - if (!(skcd_buf.is_data & 1)) { 833 833 - skcd_buf.val = 0; 834 834 - skcd_buf.is_data = 1; 835 835 - } 836 836 - 837 837 - skcd_buf.prioidx = prioidx; 838 838 - WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ 792 792 + #ifdef CONFIG_CGROUP_NET_PRIO 793 793 + WRITE_ONCE(skcd->prioidx, prioidx); 794 794 + #endif 839 795 } 840 796 841 797 static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, 842 798 u32 classid) 843 799 { 844 844 - struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }}; 845 845 - 846 846 - if (sock_cgroup_classid(&skcd_buf) == classid) 847 847 - return; 848 848 - 849 849 - if (!(skcd_buf.is_data & 1)) { 850 850 - skcd_buf.val = 0; 851 851 - skcd_buf.is_data = 1; 852 852 - } 853 853 - 854 854 - skcd_buf.classid = classid; 855 855 - WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ 800 800 + #ifdef CONFIG_CGROUP_NET_CLASSID 801 801 + WRITE_ONCE(skcd->classid, classid); 802 802 + #endif 856 803 } 857 804 858 805 #else /* CONFIG_SOCK_CGROUP_DATA */

+1 -21

include/linux/cgroup.h

reviewed

··· 829 829 */ 830 830 #ifdef CONFIG_SOCK_CGROUP_DATA 831 831 832 832 - #if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) 833 833 - extern spinlock_t cgroup_sk_update_lock; 834 834 - #endif 835 835 - 836 836 - void cgroup_sk_alloc_disable(void); 837 832 void cgroup_sk_alloc(struct sock_cgroup_data *skcd); 838 833 void cgroup_sk_clone(struct sock_cgroup_data *skcd); 839 834 void cgroup_sk_free(struct sock_cgroup_data *skcd); 840 835 841 836 static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) 842 837 { 843 843 - #if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) 844 844 - unsigned long v; 845 845 - 846 846 - /* 847 847 - * @skcd->val is 64bit but the following is safe on 32bit too as we 848 848 - * just need the lower ulong to be written and read atomically. 849 849 - */ 850 850 - v = READ_ONCE(skcd->val); 851 851 - 852 852 - if (v & 3) 853 853 - return &cgrp_dfl_root.cgrp; 854 854 - 855 855 - return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp; 856 856 - #else 857 857 - return (struct cgroup *)(unsigned long)skcd->val; 858 858 - #endif 838 838 + return skcd->cgroup; 859 839 } 860 840 861 841 #else /* CONFIG_CGROUP_DATA */

-9

include/linux/mmap_lock.h

reviewed

··· 144 144 __mmap_lock_trace_released(mm, false); 145 145 } 146 146 147 147 - static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm) 148 148 - { 149 149 - if (mmap_read_trylock(mm)) { 150 150 - rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_); 151 151 - return true; 152 152 - } 153 153 - return false; 154 154 - } 155 155 - 156 147 static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) 157 148 { 158 149 up_read_non_owner(&mm->mmap_lock);

+1 -1

kernel/bpf/disasm.c

reviewed

··· 1 1 - // SPDX-License-Identifier: GPL-2.0-only 1 1 + // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 3 * Copyright (c) 2016 Facebook 4 4 */

+1 -1

kernel/bpf/disasm.h

reviewed

··· 1 1 - /* SPDX-License-Identifier: GPL-2.0-only */ 1 1 + /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ 2 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 3 * Copyright (c) 2016 Facebook 4 4 */

+8 -2

kernel/bpf/stackmap.c

reviewed

··· 179 179 * with build_id. 180 180 */ 181 181 if (!user || !current || !current->mm || irq_work_busy || 182 182 - !mmap_read_trylock_non_owner(current->mm)) { 182 182 + !mmap_read_trylock(current->mm)) { 183 183 /* cannot access current->mm, fall back to ips */ 184 184 for (i = 0; i < trace_nr; i++) { 185 185 id_offs[i].status = BPF_STACK_BUILD_ID_IP; ··· 204 204 } 205 205 206 206 if (!work) { 207 207 - mmap_read_unlock_non_owner(current->mm); 207 207 + mmap_read_unlock(current->mm); 208 208 } else { 209 209 work->mm = current->mm; 210 210 + 211 211 + /* The lock will be released once we're out of interrupt 212 212 + * context. Tell lockdep that we've released it now so 213 213 + * it doesn't complain that we forgot to release it. 214 214 + */ 215 215 + rwsem_release(&current->mm->mmap_lock.dep_map, _RET_IP_); 210 216 irq_work_queue(&work->irq_work); 211 217 } 212 218 }

kernel/bpf/verifier.c

reviewed

··· 9912 9912 nr_linfo = attr->line_info_cnt; 9913 9913 if (!nr_linfo) 9914 9914 return 0; 9915 9915 + if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info)) 9916 9916 + return -EINVAL; 9915 9917 9916 9918 rec_size = attr->line_info_rec_size; 9917 9919 if (rec_size < MIN_BPF_LINEINFO_SIZE ||

+10 -40

kernel/cgroup/cgroup.c

reviewed

··· 6572 6572 */ 6573 6573 #ifdef CONFIG_SOCK_CGROUP_DATA 6574 6574 6575 6575 - #if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) 6576 6576 - 6577 6577 - DEFINE_SPINLOCK(cgroup_sk_update_lock); 6578 6578 - static bool cgroup_sk_alloc_disabled __read_mostly; 6579 6579 - 6580 6580 - void cgroup_sk_alloc_disable(void) 6581 6581 - { 6582 6582 - if (cgroup_sk_alloc_disabled) 6583 6583 - return; 6584 6584 - pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n"); 6585 6585 - cgroup_sk_alloc_disabled = true; 6586 6586 - } 6587 6587 - 6588 6588 - #else 6589 6589 - 6590 6590 - #define cgroup_sk_alloc_disabled false 6591 6591 - 6592 6592 - #endif 6593 6593 - 6594 6575 void cgroup_sk_alloc(struct sock_cgroup_data *skcd) 6595 6576 { 6596 6596 - if (cgroup_sk_alloc_disabled) { 6597 6597 - skcd->no_refcnt = 1; 6598 6598 - return; 6599 6599 - } 6600 6600 - 6601 6577 /* Don't associate the sock with unrelated interrupted task's cgroup. */ 6602 6578 if (in_interrupt()) 6603 6579 return; 6604 6580 6605 6581 rcu_read_lock(); 6606 6606 - 6607 6582 while (true) { 6608 6583 struct css_set *cset; 6609 6584 6610 6585 cset = task_css_set(current); 6611 6586 if (likely(cgroup_tryget(cset->dfl_cgrp))) { 6612 6612 - skcd->val = (unsigned long)cset->dfl_cgrp; 6587 6587 + skcd->cgroup = cset->dfl_cgrp; 6613 6588 cgroup_bpf_get(cset->dfl_cgrp); 6614 6589 break; 6615 6590 } 6616 6591 cpu_relax(); 6617 6592 } 6618 6618 - 6619 6593 rcu_read_unlock(); 6620 6594 } 6621 6595 6622 6596 void cgroup_sk_clone(struct sock_cgroup_data *skcd) 6623 6597 { 6624 6624 - if (skcd->val) { 6625 6625 - if (skcd->no_refcnt) 6626 6626 - return; 6627 6627 - /* 6628 6628 - * We might be cloning a socket which is left in an empty 6629 6629 - * cgroup and the cgroup might have already been rmdir'd. 6630 6630 - * Don't use cgroup_get_live(). 6631 6631 - */ 6632 6632 - cgroup_get(sock_cgroup_ptr(skcd)); 6633 6633 - cgroup_bpf_get(sock_cgroup_ptr(skcd)); 6634 6634 - } 6598 6598 + struct cgroup *cgrp = sock_cgroup_ptr(skcd); 6599 6599 + 6600 6600 + /* 6601 6601 + * We might be cloning a socket which is left in an empty 6602 6602 + * cgroup and the cgroup might have already been rmdir'd. 6603 6603 + * Don't use cgroup_get_live(). 6604 6604 + */ 6605 6605 + cgroup_get(cgrp); 6606 6606 + cgroup_bpf_get(cgrp); 6635 6607 } 6636 6608 6637 6609 void cgroup_sk_free(struct sock_cgroup_data *skcd) 6638 6610 { 6639 6611 struct cgroup *cgrp = sock_cgroup_ptr(skcd); 6640 6612 6641 6641 - if (skcd->no_refcnt) 6642 6642 - return; 6643 6613 cgroup_bpf_put(cgrp); 6644 6614 cgroup_put(cgrp); 6645 6615 }

+1 -6

net/core/netclassid_cgroup.c

reviewed

··· 71 71 struct update_classid_context *ctx = (void *)v; 72 72 struct socket *sock = sock_from_file(file); 73 73 74 74 - if (sock) { 75 75 - spin_lock(&cgroup_sk_update_lock); 74 74 + if (sock) 76 75 sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid); 77 77 - spin_unlock(&cgroup_sk_update_lock); 78 78 - } 79 76 if (--ctx->batch == 0) { 80 77 ctx->batch = UPDATE_CLASSID_BATCH; 81 78 return n + 1; ··· 117 120 struct cgroup_cls_state *cs = css_cls_state(css); 118 121 struct css_task_iter it; 119 122 struct task_struct *p; 120 120 - 121 121 - cgroup_sk_alloc_disable(); 122 123 123 124 cs->classid = (u32)value; 124 125

+2 -8

net/core/netprio_cgroup.c

reviewed

··· 207 207 if (!dev) 208 208 return -ENODEV; 209 209 210 210 - cgroup_sk_alloc_disable(); 211 211 - 212 210 rtnl_lock(); 213 211 214 212 ret = netprio_set_prio(of_css(of), dev, prio); ··· 219 221 static int update_netprio(const void *v, struct file *file, unsigned n) 220 222 { 221 223 struct socket *sock = sock_from_file(file); 222 222 - if (sock) { 223 223 - spin_lock(&cgroup_sk_update_lock); 224 224 + 225 225 + if (sock) 224 226 sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data, 225 227 (unsigned long)v); 226 226 - spin_unlock(&cgroup_sk_update_lock); 227 227 - } 228 228 return 0; 229 229 } 230 230 ··· 230 234 { 231 235 struct task_struct *p; 232 236 struct cgroup_subsys_state *css; 233 233 - 234 234 - cgroup_sk_alloc_disable(); 235 237 236 238 cgroup_taskset_for_each(p, css, tset) { 237 239 void *v = (void *)(unsigned long)css->id;

+128 -9

tools/testing/selftests/bpf/cgroup_helpers.c

reviewed

··· 12 12 #include <unistd.h> 13 13 #include <ftw.h> 14 14 15 15 - 16 15 #include "cgroup_helpers.h" 17 16 18 17 /* 19 18 * To avoid relying on the system setup, when setup_cgroup_env is called 20 20 - * we create a new mount namespace, and cgroup namespace. The cgroup2 21 21 - * root is mounted at CGROUP_MOUNT_PATH 19 19 + * we create a new mount namespace, and cgroup namespace. The cgroupv2 20 20 + * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't 21 21 + * have cgroupv2 enabled at this point in time. It's easier to create our 22 22 + * own mount namespace and manage it ourselves. We assume /mnt exists. 22 23 * 23 23 - * Unfortunately, most people don't have cgroupv2 enabled at this point in time. 24 24 - * It's easier to create our own mount namespace and manage it ourselves. 25 25 - * 26 26 - * We assume /mnt exists. 24 24 + * Related cgroupv1 helpers are named *classid*(), since we only use the 25 25 + * net_cls controller for tagging net_cls.classid. We assume the default 26 26 + * mount under /sys/fs/cgroup/net_cls, which should be the case for the 27 27 + * vast majority of users. 27 28 */ 28 29 29 30 #define WALK_FD_LIMIT 16 31 31 + 30 32 #define CGROUP_MOUNT_PATH "/mnt" 33 33 + #define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" 34 34 + #define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls" 31 35 #define CGROUP_WORK_DIR "/cgroup-test-work-dir" 36 36 + 32 37 #define format_cgroup_path(buf, path) \ 33 38 snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \ 34 39 CGROUP_WORK_DIR, path) 40 40 + 41 41 + #define format_classid_path(buf) \ 42 42 + snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ 43 43 + CGROUP_WORK_DIR) 35 44 36 45 /** 37 46 * enable_all_controllers() - Enable all available cgroup v2 controllers ··· 148 139 return 0; 149 140 } 150 141 151 151 - 152 152 - static int join_cgroup_from_top(char *cgroup_path) 142 142 + static int join_cgroup_from_top(const char *cgroup_path) 153 143 { 154 144 char cgroup_procs_path[PATH_MAX + 1]; 155 145 pid_t pid = getpid(); ··· 320 312 return -EINVAL; 321 313 } 322 314 return cg_fd; 315 315 + } 316 316 + 317 317 + /** 318 318 + * setup_classid_environment() - Setup the cgroupv1 net_cls environment 319 319 + * 320 320 + * After calling this function, cleanup_classid_environment should be called 321 321 + * once testing is complete. 322 322 + * 323 323 + * This function will print an error to stderr and return 1 if it is unable 324 324 + * to setup the cgroup environment. If setup is successful, 0 is returned. 325 325 + */ 326 326 + int setup_classid_environment(void) 327 327 + { 328 328 + char cgroup_workdir[PATH_MAX + 1]; 329 329 + 330 330 + format_classid_path(cgroup_workdir); 331 331 + 332 332 + if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) && 333 333 + errno != EBUSY) { 334 334 + log_err("mount cgroup base"); 335 335 + return 1; 336 336 + } 337 337 + 338 338 + if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) { 339 339 + log_err("mkdir cgroup net_cls"); 340 340 + return 1; 341 341 + } 342 342 + 343 343 + if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") && 344 344 + errno != EBUSY) { 345 345 + log_err("mount cgroup net_cls"); 346 346 + return 1; 347 347 + } 348 348 + 349 349 + cleanup_classid_environment(); 350 350 + 351 351 + if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { 352 352 + log_err("mkdir cgroup work dir"); 353 353 + return 1; 354 354 + } 355 355 + 356 356 + return 0; 357 357 + } 358 358 + 359 359 + /** 360 360 + * set_classid() - Set a cgroupv1 net_cls classid 361 361 + * @id: the numeric classid 362 362 + * 363 363 + * Writes the passed classid into the cgroup work dir's net_cls.classid 364 364 + * file in order to later on trigger socket tagging. 365 365 + * 366 366 + * On success, it returns 0, otherwise on failure it returns 1. If there 367 367 + * is a failure, it prints the error to stderr. 368 368 + */ 369 369 + int set_classid(unsigned int id) 370 370 + { 371 371 + char cgroup_workdir[PATH_MAX - 42]; 372 372 + char cgroup_classid_path[PATH_MAX + 1]; 373 373 + int fd, rc = 0; 374 374 + 375 375 + format_classid_path(cgroup_workdir); 376 376 + snprintf(cgroup_classid_path, sizeof(cgroup_classid_path), 377 377 + "%s/net_cls.classid", cgroup_workdir); 378 378 + 379 379 + fd = open(cgroup_classid_path, O_WRONLY); 380 380 + if (fd < 0) { 381 381 + log_err("Opening cgroup classid: %s", cgroup_classid_path); 382 382 + return 1; 383 383 + } 384 384 + 385 385 + if (dprintf(fd, "%u\n", id) < 0) { 386 386 + log_err("Setting cgroup classid"); 387 387 + rc = 1; 388 388 + } 389 389 + 390 390 + close(fd); 391 391 + return rc; 392 392 + } 393 393 + 394 394 + /** 395 395 + * join_classid() - Join a cgroupv1 net_cls classid 396 396 + * 397 397 + * This function expects the cgroup work dir to be already created, as we 398 398 + * join it here. This causes the process sockets to be tagged with the given 399 399 + * net_cls classid. 400 400 + * 401 401 + * On success, it returns 0, otherwise on failure it returns 1. 402 402 + */ 403 403 + int join_classid(void) 404 404 + { 405 405 + char cgroup_workdir[PATH_MAX + 1]; 406 406 + 407 407 + format_classid_path(cgroup_workdir); 408 408 + return join_cgroup_from_top(cgroup_workdir); 409 409 + } 410 410 + 411 411 + /** 412 412 + * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment 413 413 + * 414 414 + * At call time, it moves the calling process to the root cgroup, and then 415 415 + * runs the deletion process. 416 416 + * 417 417 + * On failure, it will print an error to stderr, and try to continue. 418 418 + */ 419 419 + void cleanup_classid_environment(void) 420 420 + { 421 421 + char cgroup_workdir[PATH_MAX + 1]; 422 422 + 423 423 + format_classid_path(cgroup_workdir); 424 424 + join_cgroup_from_top(NETCLS_MOUNT_PATH); 425 425 + nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); 323 426 }

+15 -5

tools/testing/selftests/bpf/cgroup_helpers.h

reviewed

··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 2 #ifndef __CGROUP_HELPERS_H 3 3 #define __CGROUP_HELPERS_H 4 4 + 4 5 #include <errno.h> 5 6 #include <string.h> 6 7 ··· 9 8 #define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ 10 9 __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) 11 10 12 12 - 11 11 + /* cgroupv2 related */ 13 12 int cgroup_setup_and_join(const char *path); 14 13 int create_and_get_cgroup(const char *path); 15 15 - int join_cgroup(const char *path); 16 16 - int setup_cgroup_environment(void); 17 17 - void cleanup_cgroup_environment(void); 18 14 unsigned long long get_cgroup_id(const char *path); 19 15 20 20 - #endif 16 16 + int join_cgroup(const char *path); 17 17 + 18 18 + int setup_cgroup_environment(void); 19 19 + void cleanup_cgroup_environment(void); 20 20 + 21 21 + /* cgroupv1 related */ 22 22 + int set_classid(unsigned int id); 23 23 + int join_classid(void); 24 24 + 25 25 + int setup_classid_environment(void); 26 26 + void cleanup_classid_environment(void); 27 27 + 28 28 + #endif /* __CGROUP_HELPERS_H */

+21 -6

tools/testing/selftests/bpf/network_helpers.c

reviewed

··· 208 208 209 209 static int connect_fd_to_addr(int fd, 210 210 const struct sockaddr_storage *addr, 211 211 - socklen_t addrlen) 211 211 + socklen_t addrlen, const bool must_fail) 212 212 { 213 213 - if (connect(fd, (const struct sockaddr *)addr, addrlen)) { 214 214 - log_err("Failed to connect to server"); 215 215 - return -1; 213 213 + int ret; 214 214 + 215 215 + errno = 0; 216 216 + ret = connect(fd, (const struct sockaddr *)addr, addrlen); 217 217 + if (must_fail) { 218 218 + if (!ret) { 219 219 + log_err("Unexpected success to connect to server"); 220 220 + return -1; 221 221 + } 222 222 + if (errno != EPERM) { 223 223 + log_err("Unexpected error from connect to server"); 224 224 + return -1; 225 225 + } 226 226 + } else { 227 227 + if (ret) { 228 228 + log_err("Failed to connect to server"); 229 229 + return -1; 230 230 + } 216 231 } 217 232 218 233 return 0; ··· 272 257 strlen(opts->cc) + 1)) 273 258 goto error_close; 274 259 275 275 - if (connect_fd_to_addr(fd, &addr, addrlen)) 260 260 + if (connect_fd_to_addr(fd, &addr, addrlen, opts->must_fail)) 276 261 goto error_close; 277 262 278 263 return fd; ··· 304 289 return -1; 305 290 } 306 291 307 307 - if (connect_fd_to_addr(client_fd, &addr, len)) 292 292 + if (connect_fd_to_addr(client_fd, &addr, len, false)) 308 293 return -1; 309 294 310 295 return 0;

tools/testing/selftests/bpf/network_helpers.h

reviewed

··· 20 20 struct network_helper_opts { 21 21 const char *cc; 22 22 int timeout_ms; 23 23 + bool must_fail; 23 24 }; 24 25 25 26 /* ipv4 test vector */

+79

tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c

reviewed

··· 1 1 + // SPDX-License-Identifier: GPL-2.0 2 2 + 3 3 + #include <test_progs.h> 4 4 + 5 5 + #include "connect4_dropper.skel.h" 6 6 + 7 7 + #include "cgroup_helpers.h" 8 8 + #include "network_helpers.h" 9 9 + 10 10 + static int run_test(int cgroup_fd, int server_fd, bool classid) 11 11 + { 12 12 + struct network_helper_opts opts = { 13 13 + .must_fail = true, 14 14 + }; 15 15 + struct connect4_dropper *skel; 16 16 + int fd, err = 0; 17 17 + 18 18 + skel = connect4_dropper__open_and_load(); 19 19 + if (!ASSERT_OK_PTR(skel, "skel_open")) 20 20 + return -1; 21 21 + 22 22 + skel->links.connect_v4_dropper = 23 23 + bpf_program__attach_cgroup(skel->progs.connect_v4_dropper, 24 24 + cgroup_fd); 25 25 + if (!ASSERT_OK_PTR(skel->links.connect_v4_dropper, "prog_attach")) { 26 26 + err = -1; 27 27 + goto out; 28 28 + } 29 29 + 30 30 + if (classid && !ASSERT_OK(join_classid(), "join_classid")) { 31 31 + err = -1; 32 32 + goto out; 33 33 + } 34 34 + 35 35 + fd = connect_to_fd_opts(server_fd, &opts); 36 36 + if (fd < 0) 37 37 + err = -1; 38 38 + else 39 39 + close(fd); 40 40 + out: 41 41 + connect4_dropper__destroy(skel); 42 42 + return err; 43 43 + } 44 44 + 45 45 + void test_cgroup_v1v2(void) 46 46 + { 47 47 + struct network_helper_opts opts = {}; 48 48 + int server_fd, client_fd, cgroup_fd; 49 49 + static const int port = 60123; 50 50 + 51 51 + /* Step 1: Check base connectivity works without any BPF. */ 52 52 + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); 53 53 + if (!ASSERT_GE(server_fd, 0, "server_fd")) 54 54 + return; 55 55 + client_fd = connect_to_fd_opts(server_fd, &opts); 56 56 + if (!ASSERT_GE(client_fd, 0, "client_fd")) { 57 57 + close(server_fd); 58 58 + return; 59 59 + } 60 60 + close(client_fd); 61 61 + close(server_fd); 62 62 + 63 63 + /* Step 2: Check BPF policy prog attached to cgroups drops connectivity. */ 64 64 + cgroup_fd = test__join_cgroup("/connect_dropper"); 65 65 + if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd")) 66 66 + return; 67 67 + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); 68 68 + if (!ASSERT_GE(server_fd, 0, "server_fd")) { 69 69 + close(cgroup_fd); 70 70 + return; 71 71 + } 72 72 + ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only"); 73 73 + setup_classid_environment(); 74 74 + set_classid(42); 75 75 + ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2"); 76 76 + cleanup_classid_environment(); 77 77 + close(server_fd); 78 78 + close(cgroup_fd); 79 79 + }

-1

tools/testing/selftests/bpf/prog_tests/task_pt_regs.c

reviewed

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #define _GNU_SOURCE 3 3 #include <test_progs.h> 4 4 - #include <linux/ptrace.h> 5 4 #include "test_task_pt_regs.skel.h" 6 5 7 6 void test_task_pt_regs(void)

+26

tools/testing/selftests/bpf/progs/connect4_dropper.c

reviewed

··· 1 1 + // SPDX-License-Identifier: GPL-2.0 2 2 + 3 3 + #include <string.h> 4 4 + 5 5 + #include <linux/stddef.h> 6 6 + #include <linux/bpf.h> 7 7 + 8 8 + #include <sys/socket.h> 9 9 + 10 10 + #include <bpf/bpf_helpers.h> 11 11 + #include <bpf/bpf_endian.h> 12 12 + 13 13 + #define VERDICT_REJECT 0 14 14 + #define VERDICT_PROCEED 1 15 15 + 16 16 + SEC("cgroup/connect4") 17 17 + int connect_v4_dropper(struct bpf_sock_addr *ctx) 18 18 + { 19 19 + if (ctx->type != SOCK_STREAM) 20 20 + return VERDICT_PROCEED; 21 21 + if (ctx->user_port == bpf_htons(60123)) 22 22 + return VERDICT_REJECT; 23 23 + return VERDICT_PROCEED; 24 24 + } 25 25 + 26 26 + char _license[] SEC("license") = "GPL";

+13 -6

tools/testing/selftests/bpf/progs/test_task_pt_regs.c

reviewed

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 3 - #include <linux/ptrace.h> 4 4 - #include <linux/bpf.h> 3 3 + #include "vmlinux.h" 5 4 #include <bpf/bpf_helpers.h> 6 5 #include <bpf/bpf_tracing.h> 7 6 8 8 - struct pt_regs current_regs = {}; 9 9 - struct pt_regs ctx_regs = {}; 7 7 + #define PT_REGS_SIZE sizeof(struct pt_regs) 8 8 + 9 9 + /* 10 10 + * The kernel struct pt_regs isn't exported in its entirety to userspace. 11 11 + * Pass it as an array to task_pt_regs.c 12 12 + */ 13 13 + char current_regs[PT_REGS_SIZE] = {}; 14 14 + char ctx_regs[PT_REGS_SIZE] = {}; 10 15 int uprobe_res = 0; 11 16 12 17 SEC("uprobe/trigger_func") ··· 22 17 23 18 current = bpf_get_current_task_btf(); 24 19 regs = (struct pt_regs *) bpf_task_pt_regs(current); 25 25 - __builtin_memcpy(&current_regs, regs, sizeof(*regs)); 26 26 - __builtin_memcpy(&ctx_regs, ctx, sizeof(*ctx)); 20 20 + if (bpf_probe_read_kernel(current_regs, PT_REGS_SIZE, regs)) 21 21 + return 0; 22 22 + if (bpf_probe_read_kernel(ctx_regs, PT_REGS_SIZE, ctx)) 23 23 + return 0; 27 24 28 25 /* Prove that uprobe was run */ 29 26 uprobe_res = 1;