Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: selftest: Test batching and bpf_(get|set)sockopt in bpf tcp iter

This patch adds tests for the batching and bpf_(get|set)sockopt in
bpf tcp iter.

It first creates:
a) 1 non SO_REUSEPORT listener in lhash2.
b) 256 passive and active fds connected to the listener in (a).
c) 256 SO_REUSEPORT listeners in one of the lhash2 bucket.

The test sets all listeners and connections to bpf_cubic before
running the bpf iter.

The bpf iter then calls setsockopt(TCP_CONGESTION) to switch
each listener and connection from bpf_cubic to bpf_dctcp.

The bpf iter has a random_retry mode such that it can return EAGAIN
to the usespace in the middle of a batch.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210701200625.1036874-1-kafai@fb.com

authored by

Martin KaFai Lau and committed by
Andrii Nakryiko
eed92afd 3cee6fb8

+384 -9
+76 -9
tools/testing/selftests/bpf/network_helpers.c
··· 66 66 67 67 #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; }) 68 68 69 - int start_server(int family, int type, const char *addr_str, __u16 port, 70 - int timeout_ms) 69 + static int __start_server(int type, const struct sockaddr *addr, 70 + socklen_t addrlen, int timeout_ms, bool reuseport) 71 71 { 72 - struct sockaddr_storage addr = {}; 73 - socklen_t len; 72 + int on = 1; 74 73 int fd; 75 74 76 - if (make_sockaddr(family, addr_str, port, &addr, &len)) 77 - return -1; 78 - 79 - fd = socket(family, type, 0); 75 + fd = socket(addr->sa_family, type, 0); 80 76 if (fd < 0) { 81 77 log_err("Failed to create server socket"); 82 78 return -1; ··· 81 85 if (settimeo(fd, timeout_ms)) 82 86 goto error_close; 83 87 84 - if (bind(fd, (const struct sockaddr *)&addr, len) < 0) { 88 + if (reuseport && 89 + setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) { 90 + log_err("Failed to set SO_REUSEPORT"); 91 + return -1; 92 + } 93 + 94 + if (bind(fd, addr, addrlen) < 0) { 85 95 log_err("Failed to bind socket"); 86 96 goto error_close; 87 97 } ··· 104 102 error_close: 105 103 save_errno_close(fd); 106 104 return -1; 105 + } 106 + 107 + int start_server(int family, int type, const char *addr_str, __u16 port, 108 + int timeout_ms) 109 + { 110 + struct sockaddr_storage addr; 111 + socklen_t addrlen; 112 + 113 + if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 114 + return -1; 115 + 116 + return __start_server(type, (struct sockaddr *)&addr, 117 + addrlen, timeout_ms, false); 118 + } 119 + 120 + int *start_reuseport_server(int family, int type, const char *addr_str, 121 + __u16 port, int timeout_ms, unsigned int nr_listens) 122 + { 123 + struct sockaddr_storage addr; 124 + unsigned int nr_fds = 0; 125 + socklen_t addrlen; 126 + int *fds; 127 + 128 + if (!nr_listens) 129 + return NULL; 130 + 131 + if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 132 + return NULL; 133 + 134 + fds = malloc(sizeof(*fds) * nr_listens); 135 + if (!fds) 136 + return NULL; 137 + 138 + fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen, 139 + timeout_ms, true); 140 + if (fds[0] == -1) 141 + goto close_fds; 142 + nr_fds = 1; 143 + 144 + if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen)) 145 + goto close_fds; 146 + 147 + for (; nr_fds < nr_listens; nr_fds++) { 148 + fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr, 149 + addrlen, timeout_ms, true); 150 + if (fds[nr_fds] == -1) 151 + goto close_fds; 152 + } 153 + 154 + return fds; 155 + 156 + close_fds: 157 + free_fds(fds, nr_fds); 158 + return NULL; 159 + } 160 + 161 + void free_fds(int *fds, unsigned int nr_close_fds) 162 + { 163 + if (fds) { 164 + while (nr_close_fds) 165 + close(fds[--nr_close_fds]); 166 + free(fds); 167 + } 107 168 } 108 169 109 170 int fastopen_connect(int server_fd, const char *data, unsigned int data_len, ··· 282 217 if (family == AF_INET) { 283 218 struct sockaddr_in *sin = (void *)addr; 284 219 220 + memset(addr, 0, sizeof(*sin)); 285 221 sin->sin_family = AF_INET; 286 222 sin->sin_port = htons(port); 287 223 if (addr_str && ··· 296 230 } else if (family == AF_INET6) { 297 231 struct sockaddr_in6 *sin6 = (void *)addr; 298 232 233 + memset(addr, 0, sizeof(*sin6)); 299 234 sin6->sin6_family = AF_INET6; 300 235 sin6->sin6_port = htons(port); 301 236 if (addr_str &&
+4
tools/testing/selftests/bpf/network_helpers.h
··· 36 36 int settimeo(int fd, int timeout_ms); 37 37 int start_server(int family, int type, const char *addr, __u16 port, 38 38 int timeout_ms); 39 + int *start_reuseport_server(int family, int type, const char *addr_str, 40 + __u16 port, int timeout_ms, 41 + unsigned int nr_listens); 42 + void free_fds(int *fds, unsigned int nr_close_fds); 39 43 int connect_to_fd(int server_fd, int timeout_ms); 40 44 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); 41 45 int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
+226
tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2021 Facebook */ 3 + #define _GNU_SOURCE 4 + #include <sched.h> 5 + #include <test_progs.h> 6 + #include "network_helpers.h" 7 + #include "bpf_dctcp.skel.h" 8 + #include "bpf_cubic.skel.h" 9 + #include "bpf_iter_setsockopt.skel.h" 10 + 11 + static int create_netns(void) 12 + { 13 + if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) 14 + return -1; 15 + 16 + if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo")) 17 + return -1; 18 + 19 + return 0; 20 + } 21 + 22 + static unsigned int set_bpf_cubic(int *fds, unsigned int nr_fds) 23 + { 24 + unsigned int i; 25 + 26 + for (i = 0; i < nr_fds; i++) { 27 + if (setsockopt(fds[i], SOL_TCP, TCP_CONGESTION, "bpf_cubic", 28 + sizeof("bpf_cubic"))) 29 + return i; 30 + } 31 + 32 + return nr_fds; 33 + } 34 + 35 + static unsigned int check_bpf_dctcp(int *fds, unsigned int nr_fds) 36 + { 37 + char tcp_cc[16]; 38 + socklen_t optlen = sizeof(tcp_cc); 39 + unsigned int i; 40 + 41 + for (i = 0; i < nr_fds; i++) { 42 + if (getsockopt(fds[i], SOL_TCP, TCP_CONGESTION, 43 + tcp_cc, &optlen) || 44 + strcmp(tcp_cc, "bpf_dctcp")) 45 + return i; 46 + } 47 + 48 + return nr_fds; 49 + } 50 + 51 + static int *make_established(int listen_fd, unsigned int nr_est, 52 + int **paccepted_fds) 53 + { 54 + int *est_fds, *accepted_fds; 55 + unsigned int i; 56 + 57 + est_fds = malloc(sizeof(*est_fds) * nr_est); 58 + if (!est_fds) 59 + return NULL; 60 + 61 + accepted_fds = malloc(sizeof(*accepted_fds) * nr_est); 62 + if (!accepted_fds) { 63 + free(est_fds); 64 + return NULL; 65 + } 66 + 67 + for (i = 0; i < nr_est; i++) { 68 + est_fds[i] = connect_to_fd(listen_fd, 0); 69 + if (est_fds[i] == -1) 70 + break; 71 + if (set_bpf_cubic(&est_fds[i], 1) != 1) { 72 + close(est_fds[i]); 73 + break; 74 + } 75 + 76 + accepted_fds[i] = accept(listen_fd, NULL, 0); 77 + if (accepted_fds[i] == -1) { 78 + close(est_fds[i]); 79 + break; 80 + } 81 + } 82 + 83 + if (!ASSERT_EQ(i, nr_est, "create established fds")) { 84 + free_fds(accepted_fds, i); 85 + free_fds(est_fds, i); 86 + return NULL; 87 + } 88 + 89 + *paccepted_fds = accepted_fds; 90 + return est_fds; 91 + } 92 + 93 + static unsigned short get_local_port(int fd) 94 + { 95 + struct sockaddr_in6 addr; 96 + socklen_t addrlen = sizeof(addr); 97 + 98 + if (!getsockname(fd, &addr, &addrlen)) 99 + return ntohs(addr.sin6_port); 100 + 101 + return 0; 102 + } 103 + 104 + static void do_bpf_iter_setsockopt(struct bpf_iter_setsockopt *iter_skel, 105 + bool random_retry) 106 + { 107 + int *reuse_listen_fds = NULL, *accepted_fds = NULL, *est_fds = NULL; 108 + unsigned int nr_reuse_listens = 256, nr_est = 256; 109 + int err, iter_fd = -1, listen_fd = -1; 110 + char buf; 111 + 112 + /* Prepare non-reuseport listen_fd */ 113 + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); 114 + if (!ASSERT_GE(listen_fd, 0, "start_server")) 115 + return; 116 + if (!ASSERT_EQ(set_bpf_cubic(&listen_fd, 1), 1, 117 + "set listen_fd to cubic")) 118 + goto done; 119 + iter_skel->bss->listen_hport = get_local_port(listen_fd); 120 + if (!ASSERT_NEQ(iter_skel->bss->listen_hport, 0, 121 + "get_local_port(listen_fd)")) 122 + goto done; 123 + 124 + /* Connect to non-reuseport listen_fd */ 125 + est_fds = make_established(listen_fd, nr_est, &accepted_fds); 126 + if (!ASSERT_OK_PTR(est_fds, "create established")) 127 + goto done; 128 + 129 + /* Prepare reuseport listen fds */ 130 + reuse_listen_fds = start_reuseport_server(AF_INET6, SOCK_STREAM, 131 + "::1", 0, 0, 132 + nr_reuse_listens); 133 + if (!ASSERT_OK_PTR(reuse_listen_fds, "start_reuseport_server")) 134 + goto done; 135 + if (!ASSERT_EQ(set_bpf_cubic(reuse_listen_fds, nr_reuse_listens), 136 + nr_reuse_listens, "set reuse_listen_fds to cubic")) 137 + goto done; 138 + iter_skel->bss->reuse_listen_hport = get_local_port(reuse_listen_fds[0]); 139 + if (!ASSERT_NEQ(iter_skel->bss->reuse_listen_hport, 0, 140 + "get_local_port(reuse_listen_fds[0])")) 141 + goto done; 142 + 143 + /* Run bpf tcp iter to switch from bpf_cubic to bpf_dctcp */ 144 + iter_skel->bss->random_retry = random_retry; 145 + iter_fd = bpf_iter_create(bpf_link__fd(iter_skel->links.change_tcp_cc)); 146 + if (!ASSERT_GE(iter_fd, 0, "create iter_fd")) 147 + goto done; 148 + 149 + while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 && 150 + errno == EAGAIN) 151 + ; 152 + if (!ASSERT_OK(err, "read iter error")) 153 + goto done; 154 + 155 + /* Check reuseport listen fds for dctcp */ 156 + ASSERT_EQ(check_bpf_dctcp(reuse_listen_fds, nr_reuse_listens), 157 + nr_reuse_listens, 158 + "check reuse_listen_fds dctcp"); 159 + 160 + /* Check non reuseport listen fd for dctcp */ 161 + ASSERT_EQ(check_bpf_dctcp(&listen_fd, 1), 1, 162 + "check listen_fd dctcp"); 163 + 164 + /* Check established fds for dctcp */ 165 + ASSERT_EQ(check_bpf_dctcp(est_fds, nr_est), nr_est, 166 + "check est_fds dctcp"); 167 + 168 + /* Check accepted fds for dctcp */ 169 + ASSERT_EQ(check_bpf_dctcp(accepted_fds, nr_est), nr_est, 170 + "check accepted_fds dctcp"); 171 + 172 + done: 173 + if (iter_fd != -1) 174 + close(iter_fd); 175 + if (listen_fd != -1) 176 + close(listen_fd); 177 + free_fds(reuse_listen_fds, nr_reuse_listens); 178 + free_fds(accepted_fds, nr_est); 179 + free_fds(est_fds, nr_est); 180 + } 181 + 182 + void test_bpf_iter_setsockopt(void) 183 + { 184 + struct bpf_iter_setsockopt *iter_skel = NULL; 185 + struct bpf_cubic *cubic_skel = NULL; 186 + struct bpf_dctcp *dctcp_skel = NULL; 187 + struct bpf_link *cubic_link = NULL; 188 + struct bpf_link *dctcp_link = NULL; 189 + 190 + if (create_netns()) 191 + return; 192 + 193 + /* Load iter_skel */ 194 + iter_skel = bpf_iter_setsockopt__open_and_load(); 195 + if (!ASSERT_OK_PTR(iter_skel, "iter_skel")) 196 + return; 197 + iter_skel->links.change_tcp_cc = bpf_program__attach_iter(iter_skel->progs.change_tcp_cc, NULL); 198 + if (!ASSERT_OK_PTR(iter_skel->links.change_tcp_cc, "attach iter")) 199 + goto done; 200 + 201 + /* Load bpf_cubic */ 202 + cubic_skel = bpf_cubic__open_and_load(); 203 + if (!ASSERT_OK_PTR(cubic_skel, "cubic_skel")) 204 + goto done; 205 + cubic_link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic); 206 + if (!ASSERT_OK_PTR(cubic_link, "cubic_link")) 207 + goto done; 208 + 209 + /* Load bpf_dctcp */ 210 + dctcp_skel = bpf_dctcp__open_and_load(); 211 + if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel")) 212 + goto done; 213 + dctcp_link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp); 214 + if (!ASSERT_OK_PTR(dctcp_link, "dctcp_link")) 215 + goto done; 216 + 217 + do_bpf_iter_setsockopt(iter_skel, true); 218 + do_bpf_iter_setsockopt(iter_skel, false); 219 + 220 + done: 221 + bpf_link__destroy(cubic_link); 222 + bpf_link__destroy(dctcp_link); 223 + bpf_cubic__destroy(cubic_skel); 224 + bpf_dctcp__destroy(dctcp_skel); 225 + bpf_iter_setsockopt__destroy(iter_skel); 226 + }
+72
tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2021 Facebook */ 3 + #include "bpf_iter.h" 4 + #include "bpf_tracing_net.h" 5 + #include <bpf/bpf_helpers.h> 6 + #include <bpf/bpf_endian.h> 7 + 8 + #define bpf_tcp_sk(skc) ({ \ 9 + struct sock_common *_skc = skc; \ 10 + sk = NULL; \ 11 + tp = NULL; \ 12 + if (_skc) { \ 13 + tp = bpf_skc_to_tcp_sock(_skc); \ 14 + sk = (struct sock *)tp; \ 15 + } \ 16 + tp; \ 17 + }) 18 + 19 + unsigned short reuse_listen_hport = 0; 20 + unsigned short listen_hport = 0; 21 + char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic"; 22 + char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp"; 23 + bool random_retry = false; 24 + 25 + static bool tcp_cc_eq(const char *a, const char *b) 26 + { 27 + int i; 28 + 29 + for (i = 0; i < TCP_CA_NAME_MAX; i++) { 30 + if (a[i] != b[i]) 31 + return false; 32 + if (!a[i]) 33 + break; 34 + } 35 + 36 + return true; 37 + } 38 + 39 + SEC("iter/tcp") 40 + int change_tcp_cc(struct bpf_iter__tcp *ctx) 41 + { 42 + char cur_cc[TCP_CA_NAME_MAX]; 43 + struct tcp_sock *tp; 44 + struct sock *sk; 45 + int ret; 46 + 47 + if (!bpf_tcp_sk(ctx->sk_common)) 48 + return 0; 49 + 50 + if (sk->sk_family != AF_INET6 || 51 + (sk->sk_state != TCP_LISTEN && 52 + sk->sk_state != TCP_ESTABLISHED) || 53 + (sk->sk_num != reuse_listen_hport && 54 + sk->sk_num != listen_hport && 55 + bpf_ntohs(sk->sk_dport) != listen_hport)) 56 + return 0; 57 + 58 + if (bpf_getsockopt(tp, SOL_TCP, TCP_CONGESTION, 59 + cur_cc, sizeof(cur_cc))) 60 + return 0; 61 + 62 + if (!tcp_cc_eq(cur_cc, cubic_cc)) 63 + return 0; 64 + 65 + if (random_retry && bpf_get_prandom_u32() % 4 == 1) 66 + return 1; 67 + 68 + bpf_setsockopt(tp, SOL_TCP, TCP_CONGESTION, dctcp_cc, sizeof(dctcp_cc)); 69 + return 0; 70 + } 71 + 72 + char _license[] SEC("license") = "GPL";
+6
tools/testing/selftests/bpf/progs/bpf_tracing_net.h
··· 5 5 #define AF_INET 2 6 6 #define AF_INET6 10 7 7 8 + #define SOL_TCP 6 9 + #define TCP_CONGESTION 13 10 + #define TCP_CA_NAME_MAX 16 11 + 8 12 #define ICSK_TIME_RETRANS 1 9 13 #define ICSK_TIME_PROBE0 3 10 14 #define ICSK_TIME_LOSS_PROBE 5 ··· 36 32 #define ir_v6_rmt_addr req.__req_common.skc_v6_daddr 37 33 #define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr 38 34 35 + #define sk_num __sk_common.skc_num 36 + #define sk_dport __sk_common.skc_dport 39 37 #define sk_family __sk_common.skc_family 40 38 #define sk_rmem_alloc sk_backlog.rmem_alloc 41 39 #define sk_refcnt __sk_common.skc_refcnt