Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests/bpf: Add selftest for sockmap/hashmap redirection

Test redirection logic. All supported and unsupported redirect combinations
are tested for success and failure respectively.

BPF_MAP_TYPE_SOCKMAP
BPF_MAP_TYPE_SOCKHASH
x
sk_msg-to-egress
sk_msg-to-ingress
sk_skb-to-egress
sk_skb-to-ingress
x
AF_INET, SOCK_STREAM
AF_INET6, SOCK_STREAM
AF_INET, SOCK_DGRAM
AF_INET6, SOCK_DGRAM
AF_UNIX, SOCK_STREAM
AF_UNIX, SOCK_DGRAM
AF_VSOCK, SOCK_STREAM
AF_VSOCK, SOCK_SEQPACKET

Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Michal Luczaj <mhal@rbox.co>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/r/20250515-selftests-sockmap-redir-v3-5-a1ea723f7e7e@rbox.co

authored by

Michal Luczaj and committed by
Martin KaFai Lau
f0709263 f266905b

+465
+465
tools/testing/selftests/bpf/prog_tests/sockmap_redir.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Test for sockmap/sockhash redirection. 4 + * 5 + * BPF_MAP_TYPE_SOCKMAP 6 + * BPF_MAP_TYPE_SOCKHASH 7 + * x 8 + * sk_msg-to-egress 9 + * sk_msg-to-ingress 10 + * sk_skb-to-egress 11 + * sk_skb-to-ingress 12 + * x 13 + * AF_INET, SOCK_STREAM 14 + * AF_INET6, SOCK_STREAM 15 + * AF_INET, SOCK_DGRAM 16 + * AF_INET6, SOCK_DGRAM 17 + * AF_UNIX, SOCK_STREAM 18 + * AF_UNIX, SOCK_DGRAM 19 + * AF_VSOCK, SOCK_STREAM 20 + * AF_VSOCK, SOCK_SEQPACKET 21 + */ 22 + 23 + #include <errno.h> 24 + #include <error.h> 25 + #include <sched.h> 26 + #include <stdio.h> 27 + #include <unistd.h> 28 + 29 + #include <netinet/in.h> 30 + #include <sys/socket.h> 31 + #include <sys/types.h> 32 + #include <sys/un.h> 33 + #include <linux/string.h> 34 + #include <linux/vm_sockets.h> 35 + 36 + #include <bpf/bpf.h> 37 + #include <bpf/libbpf.h> 38 + 39 + #include "linux/const.h" 40 + #include "test_progs.h" 41 + #include "sockmap_helpers.h" 42 + #include "test_sockmap_redir.skel.h" 43 + 44 + /* The meaning of SUPPORTED is "will redirect packet as expected". 45 + */ 46 + #define SUPPORTED _BITUL(0) 47 + 48 + /* Note on sk_skb-to-ingress ->af_vsock: 49 + * 50 + * Peer socket may receive the packet some time after the return from sendmsg(). 51 + * In a typical usage scenario, recvmsg() will block until the redirected packet 52 + * appears in the destination queue, or timeout if the packet was dropped. By 53 + * that point, the verdict map has already been updated to reflect what has 54 + * happened. 55 + * 56 + * But sk_skb-to-ingress/af_vsock is an unsupported combination, so no recvmsg() 57 + * takes place. Which means we may race the execution of the verdict logic and 58 + * read map_verd before it has been updated, i.e. we might observe 59 + * map_verd[SK_DROP]=0 instead of map_verd[SK_DROP]=1. 60 + * 61 + * This confuses the selftest logic: if there was no packet dropped, where's the 62 + * packet? So here's a heuristic: on map_verd[SK_DROP]=map_verd[SK_PASS]=0 63 + * (which implies the verdict program has not been ran) just re-read the verdict 64 + * map again. 65 + */ 66 + #define UNSUPPORTED_RACY_VERD _BITUL(1) 67 + 68 + enum prog_type { 69 + SK_MSG_EGRESS, 70 + SK_MSG_INGRESS, 71 + SK_SKB_EGRESS, 72 + SK_SKB_INGRESS, 73 + }; 74 + 75 + enum { 76 + SEND_INNER = 0, 77 + SEND_OUTER, 78 + }; 79 + 80 + enum { 81 + RECV_INNER = 0, 82 + RECV_OUTER, 83 + }; 84 + 85 + struct maps { 86 + int in; 87 + int out; 88 + int verd; 89 + }; 90 + 91 + struct combo_spec { 92 + enum prog_type prog_type; 93 + const char *in, *out; 94 + }; 95 + 96 + struct redir_spec { 97 + const char *name; 98 + int idx_send; 99 + int idx_recv; 100 + enum prog_type prog_type; 101 + }; 102 + 103 + struct socket_spec { 104 + int family; 105 + int sotype; 106 + int send_flags; 107 + int in[2]; 108 + int out[2]; 109 + }; 110 + 111 + static int socket_spec_pairs(struct socket_spec *s) 112 + { 113 + return create_socket_pairs(s->family, s->sotype, 114 + &s->in[0], &s->out[0], 115 + &s->in[1], &s->out[1]); 116 + } 117 + 118 + static void socket_spec_close(struct socket_spec *s) 119 + { 120 + xclose(s->in[0]); 121 + xclose(s->in[1]); 122 + xclose(s->out[0]); 123 + xclose(s->out[1]); 124 + } 125 + 126 + static void get_redir_params(struct redir_spec *redir, 127 + struct test_sockmap_redir *skel, int *prog_fd, 128 + enum bpf_attach_type *attach_type, 129 + int *redirect_flags) 130 + { 131 + enum prog_type type = redir->prog_type; 132 + struct bpf_program *prog; 133 + bool sk_msg; 134 + 135 + sk_msg = type == SK_MSG_INGRESS || type == SK_MSG_EGRESS; 136 + prog = sk_msg ? skel->progs.prog_msg_verdict : skel->progs.prog_skb_verdict; 137 + 138 + *prog_fd = bpf_program__fd(prog); 139 + *attach_type = sk_msg ? BPF_SK_MSG_VERDICT : BPF_SK_SKB_VERDICT; 140 + 141 + if (type == SK_MSG_INGRESS || type == SK_SKB_INGRESS) 142 + *redirect_flags = BPF_F_INGRESS; 143 + else 144 + *redirect_flags = 0; 145 + } 146 + 147 + static void try_recv(const char *prefix, int fd, int flags, bool expect_success) 148 + { 149 + ssize_t n; 150 + char buf; 151 + 152 + errno = 0; 153 + n = recv(fd, &buf, 1, flags); 154 + if (n < 0 && expect_success) 155 + FAIL_ERRNO("%s: unexpected failure: retval=%zd", prefix, n); 156 + if (!n && !expect_success) 157 + FAIL("%s: expected failure: retval=%zd", prefix, n); 158 + } 159 + 160 + static void handle_unsupported(int sd_send, int sd_peer, int sd_in, int sd_out, 161 + int sd_recv, int map_verd, int status) 162 + { 163 + unsigned int drop, pass; 164 + char recv_buf; 165 + ssize_t n; 166 + 167 + get_verdict: 168 + if (xbpf_map_lookup_elem(map_verd, &u32(SK_DROP), &drop) || 169 + xbpf_map_lookup_elem(map_verd, &u32(SK_PASS), &pass)) 170 + return; 171 + 172 + if (pass == 0 && drop == 0 && (status & UNSUPPORTED_RACY_VERD)) { 173 + sched_yield(); 174 + goto get_verdict; 175 + } 176 + 177 + if (pass != 0) { 178 + FAIL("unsupported: wanted verdict pass 0, have %u", pass); 179 + return; 180 + } 181 + 182 + /* If nothing was dropped, packet should have reached the peer */ 183 + if (drop == 0) { 184 + errno = 0; 185 + n = recv_timeout(sd_peer, &recv_buf, 1, 0, IO_TIMEOUT_SEC); 186 + if (n != 1) 187 + FAIL_ERRNO("unsupported: packet missing, retval=%zd", n); 188 + } 189 + 190 + /* Ensure queues are empty */ 191 + try_recv("bpf.recv(sd_send)", sd_send, MSG_DONTWAIT, false); 192 + if (sd_in != sd_send) 193 + try_recv("bpf.recv(sd_in)", sd_in, MSG_DONTWAIT, false); 194 + 195 + try_recv("bpf.recv(sd_out)", sd_out, MSG_DONTWAIT, false); 196 + if (sd_recv != sd_out) 197 + try_recv("bpf.recv(sd_recv)", sd_recv, MSG_DONTWAIT, false); 198 + } 199 + 200 + static void test_send_redir_recv(int sd_send, int send_flags, int sd_peer, 201 + int sd_in, int sd_out, int sd_recv, 202 + struct maps *maps, int status) 203 + { 204 + unsigned int drop, pass; 205 + char *send_buf = "ab"; 206 + char recv_buf = '\0'; 207 + ssize_t n, len = 1; 208 + 209 + /* Zero out the verdict map */ 210 + if (xbpf_map_update_elem(maps->verd, &u32(SK_DROP), &u32(0), BPF_ANY) || 211 + xbpf_map_update_elem(maps->verd, &u32(SK_PASS), &u32(0), BPF_ANY)) 212 + return; 213 + 214 + if (xbpf_map_update_elem(maps->in, &u32(0), &u64(sd_in), BPF_NOEXIST)) 215 + return; 216 + 217 + if (xbpf_map_update_elem(maps->out, &u32(0), &u64(sd_out), BPF_NOEXIST)) 218 + goto del_in; 219 + 220 + /* Last byte is OOB data when send_flags has MSG_OOB bit set */ 221 + if (send_flags & MSG_OOB) 222 + len++; 223 + n = send(sd_send, send_buf, len, send_flags); 224 + if (n >= 0 && n < len) 225 + FAIL("incomplete send"); 226 + if (n < 0) { 227 + /* sk_msg redirect combo not supported? */ 228 + if (status & SUPPORTED || errno != EACCES) 229 + FAIL_ERRNO("send"); 230 + goto out; 231 + } 232 + 233 + if (!(status & SUPPORTED)) { 234 + handle_unsupported(sd_send, sd_peer, sd_in, sd_out, sd_recv, 235 + maps->verd, status); 236 + goto out; 237 + } 238 + 239 + errno = 0; 240 + n = recv_timeout(sd_recv, &recv_buf, 1, 0, IO_TIMEOUT_SEC); 241 + if (n != 1) { 242 + FAIL_ERRNO("recv_timeout()"); 243 + goto out; 244 + } 245 + 246 + /* Check verdict _after_ recv(); af_vsock may need time to catch up */ 247 + if (xbpf_map_lookup_elem(maps->verd, &u32(SK_DROP), &drop) || 248 + xbpf_map_lookup_elem(maps->verd, &u32(SK_PASS), &pass)) 249 + goto out; 250 + 251 + if (drop != 0 || pass != 1) 252 + FAIL("unexpected verdict drop/pass: wanted 0/1, have %u/%u", 253 + drop, pass); 254 + 255 + if (recv_buf != send_buf[0]) 256 + FAIL("recv(): payload check, %02x != %02x", recv_buf, send_buf[0]); 257 + 258 + if (send_flags & MSG_OOB) { 259 + /* Fail reading OOB while in sockmap */ 260 + try_recv("bpf.recv(sd_out, MSG_OOB)", sd_out, 261 + MSG_OOB | MSG_DONTWAIT, false); 262 + 263 + /* Remove sd_out from sockmap */ 264 + xbpf_map_delete_elem(maps->out, &u32(0)); 265 + 266 + /* Check that OOB was dropped on redirect */ 267 + try_recv("recv(sd_out, MSG_OOB)", sd_out, 268 + MSG_OOB | MSG_DONTWAIT, false); 269 + 270 + goto del_in; 271 + } 272 + out: 273 + xbpf_map_delete_elem(maps->out, &u32(0)); 274 + del_in: 275 + xbpf_map_delete_elem(maps->in, &u32(0)); 276 + } 277 + 278 + static int is_redir_supported(enum prog_type type, const char *in, 279 + const char *out) 280 + { 281 + /* Matching based on strings returned by socket_kind_to_str(): 282 + * tcp4, udp4, tcp6, udp6, u_str, u_dgr, v_str, v_seq 283 + * Plus a wildcard: any 284 + * Not in use: u_seq, v_dgr 285 + */ 286 + struct combo_spec *c, combos[] = { 287 + /* Send to local: TCP -> any, but vsock */ 288 + { SK_MSG_INGRESS, "tcp", "tcp" }, 289 + { SK_MSG_INGRESS, "tcp", "udp" }, 290 + { SK_MSG_INGRESS, "tcp", "u_str" }, 291 + { SK_MSG_INGRESS, "tcp", "u_dgr" }, 292 + 293 + /* Send to egress: TCP -> TCP */ 294 + { SK_MSG_EGRESS, "tcp", "tcp" }, 295 + 296 + /* Ingress to egress: any -> any */ 297 + { SK_SKB_EGRESS, "any", "any" }, 298 + 299 + /* Ingress to local: any -> any, but vsock */ 300 + { SK_SKB_INGRESS, "any", "tcp" }, 301 + { SK_SKB_INGRESS, "any", "udp" }, 302 + { SK_SKB_INGRESS, "any", "u_str" }, 303 + { SK_SKB_INGRESS, "any", "u_dgr" }, 304 + }; 305 + 306 + for (c = combos; c < combos + ARRAY_SIZE(combos); c++) { 307 + if (c->prog_type == type && 308 + (!strcmp(c->in, "any") || strstarts(in, c->in)) && 309 + (!strcmp(c->out, "any") || strstarts(out, c->out))) 310 + return SUPPORTED; 311 + } 312 + 313 + return 0; 314 + } 315 + 316 + static int get_support_status(enum prog_type type, const char *in, 317 + const char *out) 318 + { 319 + int status = is_redir_supported(type, in, out); 320 + 321 + if (type == SK_SKB_INGRESS && strstarts(out, "v_")) 322 + status |= UNSUPPORTED_RACY_VERD; 323 + 324 + return status; 325 + } 326 + 327 + static void test_socket(enum bpf_map_type type, struct redir_spec *redir, 328 + struct maps *maps, struct socket_spec *s_in, 329 + struct socket_spec *s_out) 330 + { 331 + int fd_in, fd_out, fd_send, fd_peer, fd_recv, flags, status; 332 + const char *in_str, *out_str; 333 + char s[MAX_TEST_NAME]; 334 + 335 + fd_in = s_in->in[0]; 336 + fd_out = s_out->out[0]; 337 + fd_send = s_in->in[redir->idx_send]; 338 + fd_peer = s_in->in[redir->idx_send ^ 1]; 339 + fd_recv = s_out->out[redir->idx_recv]; 340 + flags = s_in->send_flags; 341 + 342 + in_str = socket_kind_to_str(fd_in); 343 + out_str = socket_kind_to_str(fd_out); 344 + status = get_support_status(redir->prog_type, in_str, out_str); 345 + 346 + snprintf(s, sizeof(s), 347 + "%-4s %-17s %-5s %s %-5s%6s", 348 + /* hash sk_skb-to-ingress u_str → v_str (OOB) */ 349 + type == BPF_MAP_TYPE_SOCKMAP ? "map" : "hash", 350 + redir->name, 351 + in_str, 352 + status & SUPPORTED ? "→" : " ", 353 + out_str, 354 + (flags & MSG_OOB) ? "(OOB)" : ""); 355 + 356 + if (!test__start_subtest(s)) 357 + return; 358 + 359 + test_send_redir_recv(fd_send, flags, fd_peer, fd_in, fd_out, fd_recv, 360 + maps, status); 361 + } 362 + 363 + static void test_redir(enum bpf_map_type type, struct redir_spec *redir, 364 + struct maps *maps) 365 + { 366 + struct socket_spec *s, sockets[] = { 367 + { AF_INET, SOCK_STREAM }, 368 + // { AF_INET, SOCK_STREAM, MSG_OOB }, /* Known to be broken */ 369 + { AF_INET6, SOCK_STREAM }, 370 + { AF_INET, SOCK_DGRAM }, 371 + { AF_INET6, SOCK_DGRAM }, 372 + { AF_UNIX, SOCK_STREAM }, 373 + { AF_UNIX, SOCK_STREAM, MSG_OOB }, 374 + { AF_UNIX, SOCK_DGRAM }, 375 + // { AF_UNIX, SOCK_SEQPACKET}, /* Unsupported BPF_MAP_UPDATE_ELEM */ 376 + { AF_VSOCK, SOCK_STREAM }, 377 + // { AF_VSOCK, SOCK_DGRAM }, /* Unsupported socket() */ 378 + { AF_VSOCK, SOCK_SEQPACKET }, 379 + }; 380 + 381 + for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++) 382 + if (socket_spec_pairs(s)) 383 + goto out; 384 + 385 + /* Intra-proto */ 386 + for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++) 387 + test_socket(type, redir, maps, s, s); 388 + 389 + /* Cross-proto */ 390 + for (int i = 0; i < ARRAY_SIZE(sockets); i++) { 391 + for (int j = 0; j < ARRAY_SIZE(sockets); j++) { 392 + struct socket_spec *out = &sockets[j]; 393 + struct socket_spec *in = &sockets[i]; 394 + 395 + /* Skip intra-proto and between variants */ 396 + if (out->send_flags || 397 + (in->family == out->family && 398 + in->sotype == out->sotype)) 399 + continue; 400 + 401 + test_socket(type, redir, maps, in, out); 402 + } 403 + } 404 + out: 405 + while (--s >= sockets) 406 + socket_spec_close(s); 407 + } 408 + 409 + static void test_map(enum bpf_map_type type) 410 + { 411 + struct redir_spec *r, redirs[] = { 412 + { "sk_msg-to-ingress", SEND_INNER, RECV_INNER, SK_MSG_INGRESS }, 413 + { "sk_msg-to-egress", SEND_INNER, RECV_OUTER, SK_MSG_EGRESS }, 414 + { "sk_skb-to-egress", SEND_OUTER, RECV_OUTER, SK_SKB_EGRESS }, 415 + { "sk_skb-to-ingress", SEND_OUTER, RECV_INNER, SK_SKB_INGRESS }, 416 + }; 417 + 418 + for (r = redirs; r < redirs + ARRAY_SIZE(redirs); r++) { 419 + enum bpf_attach_type attach_type; 420 + struct test_sockmap_redir *skel; 421 + struct maps maps; 422 + int prog_fd; 423 + 424 + skel = test_sockmap_redir__open_and_load(); 425 + if (!skel) { 426 + FAIL("open_and_load"); 427 + return; 428 + } 429 + 430 + switch (type) { 431 + case BPF_MAP_TYPE_SOCKMAP: 432 + maps.in = bpf_map__fd(skel->maps.nop_map); 433 + maps.out = bpf_map__fd(skel->maps.sock_map); 434 + break; 435 + case BPF_MAP_TYPE_SOCKHASH: 436 + maps.in = bpf_map__fd(skel->maps.nop_hash); 437 + maps.out = bpf_map__fd(skel->maps.sock_hash); 438 + break; 439 + default: 440 + FAIL("Unsupported bpf_map_type"); 441 + return; 442 + } 443 + 444 + skel->bss->redirect_type = type; 445 + maps.verd = bpf_map__fd(skel->maps.verdict_map); 446 + get_redir_params(r, skel, &prog_fd, &attach_type, 447 + &skel->bss->redirect_flags); 448 + 449 + if (xbpf_prog_attach(prog_fd, maps.in, attach_type, 0)) 450 + return; 451 + 452 + test_redir(type, r, &maps); 453 + 454 + if (xbpf_prog_detach2(prog_fd, maps.in, attach_type)) 455 + return; 456 + 457 + test_sockmap_redir__destroy(skel); 458 + } 459 + } 460 + 461 + void serial_test_sockmap_redir(void) 462 + { 463 + test_map(BPF_MAP_TYPE_SOCKMAP); 464 + test_map(BPF_MAP_TYPE_SOCKHASH); 465 + }