Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: selftest additions for SOCKHASH

This runs existing SOCKMAP tests with SOCKHASH map type. To do this
we push programs into include file and build two BPF programs. One
for SOCKHASH and one for SOCKMAP.

We then run the entire test suite with each type.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>

authored by

John Fastabend and committed by
Daniel Borkmann
b8b394fa 81110384

+453 -349
+52 -2
tools/include/uapi/linux/bpf.h
··· 118 118 BPF_MAP_TYPE_SOCKMAP, 119 119 BPF_MAP_TYPE_CPUMAP, 120 120 BPF_MAP_TYPE_XSKMAP, 121 + BPF_MAP_TYPE_SOCKHASH, 121 122 }; 122 123 123 124 enum bpf_prog_type { ··· 1829 1828 * Return 1830 1829 * 0 on success, or a negative error in case of failure. 1831 1830 * 1832 - * 1833 1831 * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) 1834 1832 * Description 1835 1833 * Do FIB lookup in kernel tables using parameters in *params*. ··· 1855 1855 * Egress device index on success, 0 if packet needs to continue 1856 1856 * up the stack for further processing or a negative error in case 1857 1857 * of failure. 1858 + * 1859 + * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) 1860 + * Description 1861 + * Add an entry to, or update a sockhash *map* referencing sockets. 1862 + * The *skops* is used as a new value for the entry associated to 1863 + * *key*. *flags* is one of: 1864 + * 1865 + * **BPF_NOEXIST** 1866 + * The entry for *key* must not exist in the map. 1867 + * **BPF_EXIST** 1868 + * The entry for *key* must already exist in the map. 1869 + * **BPF_ANY** 1870 + * No condition on the existence of the entry for *key*. 1871 + * 1872 + * If the *map* has eBPF programs (parser and verdict), those will 1873 + * be inherited by the socket being added. If the socket is 1874 + * already attached to eBPF programs, this results in an error. 1875 + * Return 1876 + * 0 on success, or a negative error in case of failure. 1877 + * 1878 + * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) 1879 + * Description 1880 + * This helper is used in programs implementing policies at the 1881 + * socket level. If the message *msg* is allowed to pass (i.e. if 1882 + * the verdict eBPF program returns **SK_PASS**), redirect it to 1883 + * the socket referenced by *map* (of type 1884 + * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and 1885 + * egress interfaces can be used for redirection. The 1886 + * **BPF_F_INGRESS** value in *flags* is used to make the 1887 + * distinction (ingress path is selected if the flag is present, 1888 + * egress path otherwise). This is the only flag supported for now. 1889 + * Return 1890 + * **SK_PASS** on success, or **SK_DROP** on error. 1891 + * 1892 + * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) 1893 + * Description 1894 + * This helper is used in programs implementing policies at the 1895 + * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. 1896 + * if the verdeict eBPF program returns **SK_PASS**), redirect it 1897 + * to the socket referenced by *map* (of type 1898 + * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and 1899 + * egress interfaces can be used for redirection. The 1900 + * **BPF_F_INGRESS** value in *flags* is used to make the 1901 + * distinction (ingress path is selected if the flag is present, 1902 + * egress otherwise). This is the only flag supported for now. 1903 + * Return 1904 + * **SK_PASS** on success, or **SK_DROP** on error. 1858 1905 */ 1859 1906 #define __BPF_FUNC_MAPPER(FN) \ 1860 1907 FN(unspec), \ ··· 1973 1926 FN(skb_get_xfrm_state), \ 1974 1927 FN(get_stack), \ 1975 1928 FN(skb_load_bytes_relative), \ 1976 - FN(fib_lookup), 1929 + FN(fib_lookup), \ 1930 + FN(sock_hash_update), \ 1931 + FN(msg_redirect_hash), \ 1932 + FN(sk_redirect_hash), 1977 1933 1978 1934 /* integer value in 'imm' field of BPF_CALL instruction selects which helper 1979 1935 * function eBPF program intends to call
+1 -1
tools/testing/selftests/bpf/Makefile
··· 33 33 sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ 34 34 sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \ 35 35 test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \ 36 - test_get_stack_rawtp.o 36 + test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o 37 37 38 38 # Order correspond to 'make run_tests' order 39 39 TEST_PROGS := test_kmod.sh \
+8
tools/testing/selftests/bpf/bpf_helpers.h
··· 75 75 (void *) BPF_FUNC_sock_ops_cb_flags_set; 76 76 static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) = 77 77 (void *) BPF_FUNC_sk_redirect_map; 78 + static int (*bpf_sk_redirect_hash)(void *ctx, void *map, void *key, int flags) = 79 + (void *) BPF_FUNC_sk_redirect_hash; 78 80 static int (*bpf_sock_map_update)(void *map, void *key, void *value, 79 81 unsigned long long flags) = 80 82 (void *) BPF_FUNC_sock_map_update; 83 + static int (*bpf_sock_hash_update)(void *map, void *key, void *value, 84 + unsigned long long flags) = 85 + (void *) BPF_FUNC_sock_hash_update; 81 86 static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, 82 87 void *buf, unsigned int buf_size) = 83 88 (void *) BPF_FUNC_perf_event_read_value; ··· 93 88 (void *) BPF_FUNC_override_return; 94 89 static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) = 95 90 (void *) BPF_FUNC_msg_redirect_map; 91 + static int (*bpf_msg_redirect_hash)(void *ctx, 92 + void *map, void *key, int flags) = 93 + (void *) BPF_FUNC_msg_redirect_hash; 96 94 static int (*bpf_msg_apply_bytes)(void *ctx, int len) = 97 95 (void *) BPF_FUNC_msg_apply_bytes; 98 96 static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
+5
tools/testing/selftests/bpf/test_sockhash_kern.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (c) 2018 Covalent IO, Inc. http://covalent.io 3 + #undef SOCKMAP 4 + #define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKHASH 5 + #include "./test_sockmap_kern.h"
+20 -7
tools/testing/selftests/bpf/test_sockmap.c
··· 47 47 #define S1_PORT 10000 48 48 #define S2_PORT 10001 49 49 50 - #define BPF_FILENAME "test_sockmap_kern.o" 50 + #define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o" 51 + #define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o" 51 52 #define CG_PATH "/sockmap" 52 53 53 54 /* global sockets */ ··· 1261 1260 BPF_PROG_TYPE_SK_MSG, 1262 1261 }; 1263 1262 1264 - static int populate_progs(void) 1263 + static int populate_progs(char *bpf_file) 1265 1264 { 1266 - char *bpf_file = BPF_FILENAME; 1267 1265 struct bpf_program *prog; 1268 1266 struct bpf_object *obj; 1269 1267 int i = 0; ··· 1306 1306 return 0; 1307 1307 } 1308 1308 1309 - static int test_suite(void) 1309 + static int __test_suite(char *bpf_file) 1310 1310 { 1311 1311 int cg_fd, err; 1312 1312 1313 - err = populate_progs(); 1313 + err = populate_progs(bpf_file); 1314 1314 if (err < 0) { 1315 1315 fprintf(stderr, "ERROR: (%i) load bpf failed\n", err); 1316 1316 return err; ··· 1347 1347 1348 1348 out: 1349 1349 printf("Summary: %i PASSED %i FAILED\n", passed, failed); 1350 + cleanup_cgroup_environment(); 1350 1351 close(cg_fd); 1352 + return err; 1353 + } 1354 + 1355 + static int test_suite(void) 1356 + { 1357 + int err; 1358 + 1359 + err = __test_suite(BPF_SOCKMAP_FILENAME); 1360 + if (err) 1361 + goto out; 1362 + err = __test_suite(BPF_SOCKHASH_FILENAME); 1363 + out: 1351 1364 return err; 1352 1365 } 1353 1366 ··· 1370 1357 int iov_count = 1, length = 1024, rate = 1; 1371 1358 struct sockmap_options options = {0}; 1372 1359 int opt, longindex, err, cg_fd = 0; 1373 - char *bpf_file = BPF_FILENAME; 1360 + char *bpf_file = BPF_SOCKMAP_FILENAME; 1374 1361 int test = PING_PONG; 1375 1362 1376 1363 if (setrlimit(RLIMIT_MEMLOCK, &r)) { ··· 1451 1438 return -1; 1452 1439 } 1453 1440 1454 - err = populate_progs(); 1441 + err = populate_progs(bpf_file); 1455 1442 if (err) { 1456 1443 fprintf(stderr, "populate program: (%s) %s\n", 1457 1444 bpf_file, strerror(errno));
+4 -339
tools/testing/selftests/bpf/test_sockmap_kern.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 - // Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io 3 - #include <stddef.h> 4 - #include <string.h> 5 - #include <linux/bpf.h> 6 - #include <linux/if_ether.h> 7 - #include <linux/if_packet.h> 8 - #include <linux/ip.h> 9 - #include <linux/ipv6.h> 10 - #include <linux/in.h> 11 - #include <linux/udp.h> 12 - #include <linux/tcp.h> 13 - #include <linux/pkt_cls.h> 14 - #include <sys/socket.h> 15 - #include "bpf_helpers.h" 16 - #include "bpf_endian.h" 17 - 18 - /* Sockmap sample program connects a client and a backend together 19 - * using cgroups. 20 - * 21 - * client:X <---> frontend:80 client:X <---> backend:80 22 - * 23 - * For simplicity we hard code values here and bind 1:1. The hard 24 - * coded values are part of the setup in sockmap.sh script that 25 - * is associated with this BPF program. 26 - * 27 - * The bpf_printk is verbose and prints information as connections 28 - * are established and verdicts are decided. 29 - */ 30 - 31 - #define bpf_printk(fmt, ...) \ 32 - ({ \ 33 - char ____fmt[] = fmt; \ 34 - bpf_trace_printk(____fmt, sizeof(____fmt), \ 35 - ##__VA_ARGS__); \ 36 - }) 37 - 38 - struct bpf_map_def SEC("maps") sock_map = { 39 - .type = BPF_MAP_TYPE_SOCKMAP, 40 - .key_size = sizeof(int), 41 - .value_size = sizeof(int), 42 - .max_entries = 20, 43 - }; 44 - 45 - struct bpf_map_def SEC("maps") sock_map_txmsg = { 46 - .type = BPF_MAP_TYPE_SOCKMAP, 47 - .key_size = sizeof(int), 48 - .value_size = sizeof(int), 49 - .max_entries = 20, 50 - }; 51 - 52 - struct bpf_map_def SEC("maps") sock_map_redir = { 53 - .type = BPF_MAP_TYPE_SOCKMAP, 54 - .key_size = sizeof(int), 55 - .value_size = sizeof(int), 56 - .max_entries = 20, 57 - }; 58 - 59 - struct bpf_map_def SEC("maps") sock_apply_bytes = { 60 - .type = BPF_MAP_TYPE_ARRAY, 61 - .key_size = sizeof(int), 62 - .value_size = sizeof(int), 63 - .max_entries = 1 64 - }; 65 - 66 - struct bpf_map_def SEC("maps") sock_cork_bytes = { 67 - .type = BPF_MAP_TYPE_ARRAY, 68 - .key_size = sizeof(int), 69 - .value_size = sizeof(int), 70 - .max_entries = 1 71 - }; 72 - 73 - struct bpf_map_def SEC("maps") sock_pull_bytes = { 74 - .type = BPF_MAP_TYPE_ARRAY, 75 - .key_size = sizeof(int), 76 - .value_size = sizeof(int), 77 - .max_entries = 2 78 - }; 79 - 80 - struct bpf_map_def SEC("maps") sock_redir_flags = { 81 - .type = BPF_MAP_TYPE_ARRAY, 82 - .key_size = sizeof(int), 83 - .value_size = sizeof(int), 84 - .max_entries = 1 85 - }; 86 - 87 - struct bpf_map_def SEC("maps") sock_skb_opts = { 88 - .type = BPF_MAP_TYPE_ARRAY, 89 - .key_size = sizeof(int), 90 - .value_size = sizeof(int), 91 - .max_entries = 1 92 - }; 93 - 94 - SEC("sk_skb1") 95 - int bpf_prog1(struct __sk_buff *skb) 96 - { 97 - return skb->len; 98 - } 99 - 100 - SEC("sk_skb2") 101 - int bpf_prog2(struct __sk_buff *skb) 102 - { 103 - __u32 lport = skb->local_port; 104 - __u32 rport = skb->remote_port; 105 - int len, *f, ret, zero = 0; 106 - __u64 flags = 0; 107 - 108 - if (lport == 10000) 109 - ret = 10; 110 - else 111 - ret = 1; 112 - 113 - len = (__u32)skb->data_end - (__u32)skb->data; 114 - f = bpf_map_lookup_elem(&sock_skb_opts, &zero); 115 - if (f && *f) { 116 - ret = 3; 117 - flags = *f; 118 - } 119 - 120 - bpf_printk("sk_skb2: redirect(%iB) flags=%i\n", 121 - len, flags); 122 - return bpf_sk_redirect_map(skb, &sock_map, ret, flags); 123 - } 124 - 125 - SEC("sockops") 126 - int bpf_sockmap(struct bpf_sock_ops *skops) 127 - { 128 - __u32 lport, rport; 129 - int op, err = 0, index, key, ret; 130 - 131 - 132 - op = (int) skops->op; 133 - 134 - switch (op) { 135 - case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: 136 - lport = skops->local_port; 137 - rport = skops->remote_port; 138 - 139 - if (lport == 10000) { 140 - ret = 1; 141 - err = bpf_sock_map_update(skops, &sock_map, &ret, 142 - BPF_NOEXIST); 143 - bpf_printk("passive(%i -> %i) map ctx update err: %d\n", 144 - lport, bpf_ntohl(rport), err); 145 - } 146 - break; 147 - case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: 148 - lport = skops->local_port; 149 - rport = skops->remote_port; 150 - 151 - if (bpf_ntohl(rport) == 10001) { 152 - ret = 10; 153 - err = bpf_sock_map_update(skops, &sock_map, &ret, 154 - BPF_NOEXIST); 155 - bpf_printk("active(%i -> %i) map ctx update err: %d\n", 156 - lport, bpf_ntohl(rport), err); 157 - } 158 - break; 159 - default: 160 - break; 161 - } 162 - 163 - return 0; 164 - } 165 - 166 - SEC("sk_msg1") 167 - int bpf_prog4(struct sk_msg_md *msg) 168 - { 169 - int *bytes, zero = 0, one = 1; 170 - int *start, *end; 171 - 172 - bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 173 - if (bytes) 174 - bpf_msg_apply_bytes(msg, *bytes); 175 - bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 176 - if (bytes) 177 - bpf_msg_cork_bytes(msg, *bytes); 178 - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 179 - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 180 - if (start && end) 181 - bpf_msg_pull_data(msg, *start, *end, 0); 182 - return SK_PASS; 183 - } 184 - 185 - SEC("sk_msg2") 186 - int bpf_prog5(struct sk_msg_md *msg) 187 - { 188 - int err1 = -1, err2 = -1, zero = 0, one = 1; 189 - int *bytes, *start, *end, len1, len2; 190 - 191 - bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 192 - if (bytes) 193 - err1 = bpf_msg_apply_bytes(msg, *bytes); 194 - bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 195 - if (bytes) 196 - err2 = bpf_msg_cork_bytes(msg, *bytes); 197 - len1 = (__u64)msg->data_end - (__u64)msg->data; 198 - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 199 - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 200 - if (start && end) { 201 - int err; 202 - 203 - bpf_printk("sk_msg2: pull(%i:%i)\n", 204 - start ? *start : 0, end ? *end : 0); 205 - err = bpf_msg_pull_data(msg, *start, *end, 0); 206 - if (err) 207 - bpf_printk("sk_msg2: pull_data err %i\n", 208 - err); 209 - len2 = (__u64)msg->data_end - (__u64)msg->data; 210 - bpf_printk("sk_msg2: length update %i->%i\n", 211 - len1, len2); 212 - } 213 - bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n", 214 - len1, err1, err2); 215 - return SK_PASS; 216 - } 217 - 218 - SEC("sk_msg3") 219 - int bpf_prog6(struct sk_msg_md *msg) 220 - { 221 - int *bytes, zero = 0, one = 1, key = 0; 222 - int *start, *end, *f; 223 - __u64 flags = 0; 224 - 225 - bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 226 - if (bytes) 227 - bpf_msg_apply_bytes(msg, *bytes); 228 - bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 229 - if (bytes) 230 - bpf_msg_cork_bytes(msg, *bytes); 231 - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 232 - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 233 - if (start && end) 234 - bpf_msg_pull_data(msg, *start, *end, 0); 235 - f = bpf_map_lookup_elem(&sock_redir_flags, &zero); 236 - if (f && *f) { 237 - key = 2; 238 - flags = *f; 239 - } 240 - return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags); 241 - } 242 - 243 - SEC("sk_msg4") 244 - int bpf_prog7(struct sk_msg_md *msg) 245 - { 246 - int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0; 247 - int *f, *bytes, *start, *end, len1, len2; 248 - __u64 flags = 0; 249 - 250 - int err; 251 - bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 252 - if (bytes) 253 - err1 = bpf_msg_apply_bytes(msg, *bytes); 254 - bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 255 - if (bytes) 256 - err2 = bpf_msg_cork_bytes(msg, *bytes); 257 - len1 = (__u64)msg->data_end - (__u64)msg->data; 258 - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 259 - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 260 - if (start && end) { 261 - 262 - bpf_printk("sk_msg2: pull(%i:%i)\n", 263 - start ? *start : 0, end ? *end : 0); 264 - err = bpf_msg_pull_data(msg, *start, *end, 0); 265 - if (err) 266 - bpf_printk("sk_msg2: pull_data err %i\n", 267 - err); 268 - len2 = (__u64)msg->data_end - (__u64)msg->data; 269 - bpf_printk("sk_msg2: length update %i->%i\n", 270 - len1, len2); 271 - } 272 - f = bpf_map_lookup_elem(&sock_redir_flags, &zero); 273 - if (f && *f) { 274 - key = 2; 275 - flags = *f; 276 - } 277 - bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n", 278 - len1, flags, err1 ? err1 : err2); 279 - err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags); 280 - bpf_printk("sk_msg3: err %i\n", err); 281 - return err; 282 - } 283 - 284 - SEC("sk_msg5") 285 - int bpf_prog8(struct sk_msg_md *msg) 286 - { 287 - void *data_end = (void *)(long) msg->data_end; 288 - void *data = (void *)(long) msg->data; 289 - int ret = 0, *bytes, zero = 0; 290 - 291 - bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 292 - if (bytes) { 293 - ret = bpf_msg_apply_bytes(msg, *bytes); 294 - if (ret) 295 - return SK_DROP; 296 - } else { 297 - return SK_DROP; 298 - } 299 - return SK_PASS; 300 - } 301 - SEC("sk_msg6") 302 - int bpf_prog9(struct sk_msg_md *msg) 303 - { 304 - void *data_end = (void *)(long) msg->data_end; 305 - void *data = (void *)(long) msg->data; 306 - int ret = 0, *bytes, zero = 0; 307 - 308 - bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 309 - if (bytes) { 310 - if (((__u64)data_end - (__u64)data) >= *bytes) 311 - return SK_PASS; 312 - ret = bpf_msg_cork_bytes(msg, *bytes); 313 - if (ret) 314 - return SK_DROP; 315 - } 316 - return SK_PASS; 317 - } 318 - 319 - SEC("sk_msg7") 320 - int bpf_prog10(struct sk_msg_md *msg) 321 - { 322 - int *bytes, zero = 0, one = 1; 323 - int *start, *end; 324 - 325 - bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 326 - if (bytes) 327 - bpf_msg_apply_bytes(msg, *bytes); 328 - bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 329 - if (bytes) 330 - bpf_msg_cork_bytes(msg, *bytes); 331 - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 332 - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 333 - if (start && end) 334 - bpf_msg_pull_data(msg, *start, *end, 0); 335 - 336 - return SK_DROP; 337 - } 338 - 339 - int _version SEC("version") = 1; 340 - char _license[] SEC("license") = "GPL"; 2 + // Copyright (c) 2018 Covalent IO, Inc. http://covalent.io 3 + #define SOCKMAP 4 + #define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKMAP 5 + #include "./test_sockmap_kern.h"
+363
tools/testing/selftests/bpf/test_sockmap_kern.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io */ 3 + #include <stddef.h> 4 + #include <string.h> 5 + #include <linux/bpf.h> 6 + #include <linux/if_ether.h> 7 + #include <linux/if_packet.h> 8 + #include <linux/ip.h> 9 + #include <linux/ipv6.h> 10 + #include <linux/in.h> 11 + #include <linux/udp.h> 12 + #include <linux/tcp.h> 13 + #include <linux/pkt_cls.h> 14 + #include <sys/socket.h> 15 + #include "bpf_helpers.h" 16 + #include "bpf_endian.h" 17 + 18 + /* Sockmap sample program connects a client and a backend together 19 + * using cgroups. 20 + * 21 + * client:X <---> frontend:80 client:X <---> backend:80 22 + * 23 + * For simplicity we hard code values here and bind 1:1. The hard 24 + * coded values are part of the setup in sockmap.sh script that 25 + * is associated with this BPF program. 26 + * 27 + * The bpf_printk is verbose and prints information as connections 28 + * are established and verdicts are decided. 29 + */ 30 + 31 + #define bpf_printk(fmt, ...) \ 32 + ({ \ 33 + char ____fmt[] = fmt; \ 34 + bpf_trace_printk(____fmt, sizeof(____fmt), \ 35 + ##__VA_ARGS__); \ 36 + }) 37 + 38 + struct bpf_map_def SEC("maps") sock_map = { 39 + .type = TEST_MAP_TYPE, 40 + .key_size = sizeof(int), 41 + .value_size = sizeof(int), 42 + .max_entries = 20, 43 + }; 44 + 45 + struct bpf_map_def SEC("maps") sock_map_txmsg = { 46 + .type = TEST_MAP_TYPE, 47 + .key_size = sizeof(int), 48 + .value_size = sizeof(int), 49 + .max_entries = 20, 50 + }; 51 + 52 + struct bpf_map_def SEC("maps") sock_map_redir = { 53 + .type = TEST_MAP_TYPE, 54 + .key_size = sizeof(int), 55 + .value_size = sizeof(int), 56 + .max_entries = 20, 57 + }; 58 + 59 + struct bpf_map_def SEC("maps") sock_apply_bytes = { 60 + .type = BPF_MAP_TYPE_ARRAY, 61 + .key_size = sizeof(int), 62 + .value_size = sizeof(int), 63 + .max_entries = 1 64 + }; 65 + 66 + struct bpf_map_def SEC("maps") sock_cork_bytes = { 67 + .type = BPF_MAP_TYPE_ARRAY, 68 + .key_size = sizeof(int), 69 + .value_size = sizeof(int), 70 + .max_entries = 1 71 + }; 72 + 73 + struct bpf_map_def SEC("maps") sock_pull_bytes = { 74 + .type = BPF_MAP_TYPE_ARRAY, 75 + .key_size = sizeof(int), 76 + .value_size = sizeof(int), 77 + .max_entries = 2 78 + }; 79 + 80 + struct bpf_map_def SEC("maps") sock_redir_flags = { 81 + .type = BPF_MAP_TYPE_ARRAY, 82 + .key_size = sizeof(int), 83 + .value_size = sizeof(int), 84 + .max_entries = 1 85 + }; 86 + 87 + struct bpf_map_def SEC("maps") sock_skb_opts = { 88 + .type = BPF_MAP_TYPE_ARRAY, 89 + .key_size = sizeof(int), 90 + .value_size = sizeof(int), 91 + .max_entries = 1 92 + }; 93 + 94 + SEC("sk_skb1") 95 + int bpf_prog1(struct __sk_buff *skb) 96 + { 97 + return skb->len; 98 + } 99 + 100 + SEC("sk_skb2") 101 + int bpf_prog2(struct __sk_buff *skb) 102 + { 103 + __u32 lport = skb->local_port; 104 + __u32 rport = skb->remote_port; 105 + int len, *f, ret, zero = 0; 106 + __u64 flags = 0; 107 + 108 + if (lport == 10000) 109 + ret = 10; 110 + else 111 + ret = 1; 112 + 113 + len = (__u32)skb->data_end - (__u32)skb->data; 114 + f = bpf_map_lookup_elem(&sock_skb_opts, &zero); 115 + if (f && *f) { 116 + ret = 3; 117 + flags = *f; 118 + } 119 + 120 + bpf_printk("sk_skb2: redirect(%iB) flags=%i\n", 121 + len, flags); 122 + #ifdef SOCKMAP 123 + return bpf_sk_redirect_map(skb, &sock_map, ret, flags); 124 + #else 125 + return bpf_sk_redirect_hash(skb, &sock_map, &ret, flags); 126 + #endif 127 + 128 + } 129 + 130 + SEC("sockops") 131 + int bpf_sockmap(struct bpf_sock_ops *skops) 132 + { 133 + __u32 lport, rport; 134 + int op, err = 0, index, key, ret; 135 + 136 + 137 + op = (int) skops->op; 138 + 139 + switch (op) { 140 + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: 141 + lport = skops->local_port; 142 + rport = skops->remote_port; 143 + 144 + if (lport == 10000) { 145 + ret = 1; 146 + #ifdef SOCKMAP 147 + err = bpf_sock_map_update(skops, &sock_map, &ret, 148 + BPF_NOEXIST); 149 + #else 150 + err = bpf_sock_hash_update(skops, &sock_map, &ret, 151 + BPF_NOEXIST); 152 + #endif 153 + bpf_printk("passive(%i -> %i) map ctx update err: %d\n", 154 + lport, bpf_ntohl(rport), err); 155 + } 156 + break; 157 + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: 158 + lport = skops->local_port; 159 + rport = skops->remote_port; 160 + 161 + if (bpf_ntohl(rport) == 10001) { 162 + ret = 10; 163 + #ifdef SOCKMAP 164 + err = bpf_sock_map_update(skops, &sock_map, &ret, 165 + BPF_NOEXIST); 166 + #else 167 + err = bpf_sock_hash_update(skops, &sock_map, &ret, 168 + BPF_NOEXIST); 169 + #endif 170 + bpf_printk("active(%i -> %i) map ctx update err: %d\n", 171 + lport, bpf_ntohl(rport), err); 172 + } 173 + break; 174 + default: 175 + break; 176 + } 177 + 178 + return 0; 179 + } 180 + 181 + SEC("sk_msg1") 182 + int bpf_prog4(struct sk_msg_md *msg) 183 + { 184 + int *bytes, zero = 0, one = 1; 185 + int *start, *end; 186 + 187 + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 188 + if (bytes) 189 + bpf_msg_apply_bytes(msg, *bytes); 190 + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 191 + if (bytes) 192 + bpf_msg_cork_bytes(msg, *bytes); 193 + start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 194 + end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 195 + if (start && end) 196 + bpf_msg_pull_data(msg, *start, *end, 0); 197 + return SK_PASS; 198 + } 199 + 200 + SEC("sk_msg2") 201 + int bpf_prog5(struct sk_msg_md *msg) 202 + { 203 + int err1 = -1, err2 = -1, zero = 0, one = 1; 204 + int *bytes, *start, *end, len1, len2; 205 + 206 + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 207 + if (bytes) 208 + err1 = bpf_msg_apply_bytes(msg, *bytes); 209 + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 210 + if (bytes) 211 + err2 = bpf_msg_cork_bytes(msg, *bytes); 212 + len1 = (__u64)msg->data_end - (__u64)msg->data; 213 + start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 214 + end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 215 + if (start && end) { 216 + int err; 217 + 218 + bpf_printk("sk_msg2: pull(%i:%i)\n", 219 + start ? *start : 0, end ? *end : 0); 220 + err = bpf_msg_pull_data(msg, *start, *end, 0); 221 + if (err) 222 + bpf_printk("sk_msg2: pull_data err %i\n", 223 + err); 224 + len2 = (__u64)msg->data_end - (__u64)msg->data; 225 + bpf_printk("sk_msg2: length update %i->%i\n", 226 + len1, len2); 227 + } 228 + bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n", 229 + len1, err1, err2); 230 + return SK_PASS; 231 + } 232 + 233 + SEC("sk_msg3") 234 + int bpf_prog6(struct sk_msg_md *msg) 235 + { 236 + int *bytes, zero = 0, one = 1, key = 0; 237 + int *start, *end, *f; 238 + __u64 flags = 0; 239 + 240 + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 241 + if (bytes) 242 + bpf_msg_apply_bytes(msg, *bytes); 243 + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 244 + if (bytes) 245 + bpf_msg_cork_bytes(msg, *bytes); 246 + start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 247 + end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 248 + if (start && end) 249 + bpf_msg_pull_data(msg, *start, *end, 0); 250 + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); 251 + if (f && *f) { 252 + key = 2; 253 + flags = *f; 254 + } 255 + #ifdef SOCKMAP 256 + return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags); 257 + #else 258 + return bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags); 259 + #endif 260 + } 261 + 262 + SEC("sk_msg4") 263 + int bpf_prog7(struct sk_msg_md *msg) 264 + { 265 + int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0; 266 + int *f, *bytes, *start, *end, len1, len2; 267 + __u64 flags = 0; 268 + 269 + int err; 270 + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 271 + if (bytes) 272 + err1 = bpf_msg_apply_bytes(msg, *bytes); 273 + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 274 + if (bytes) 275 + err2 = bpf_msg_cork_bytes(msg, *bytes); 276 + len1 = (__u64)msg->data_end - (__u64)msg->data; 277 + start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 278 + end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 279 + if (start && end) { 280 + 281 + bpf_printk("sk_msg2: pull(%i:%i)\n", 282 + start ? *start : 0, end ? *end : 0); 283 + err = bpf_msg_pull_data(msg, *start, *end, 0); 284 + if (err) 285 + bpf_printk("sk_msg2: pull_data err %i\n", 286 + err); 287 + len2 = (__u64)msg->data_end - (__u64)msg->data; 288 + bpf_printk("sk_msg2: length update %i->%i\n", 289 + len1, len2); 290 + } 291 + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); 292 + if (f && *f) { 293 + key = 2; 294 + flags = *f; 295 + } 296 + bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n", 297 + len1, flags, err1 ? err1 : err2); 298 + #ifdef SOCKMAP 299 + err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags); 300 + #else 301 + err = bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags); 302 + #endif 303 + bpf_printk("sk_msg3: err %i\n", err); 304 + return err; 305 + } 306 + 307 + SEC("sk_msg5") 308 + int bpf_prog8(struct sk_msg_md *msg) 309 + { 310 + void *data_end = (void *)(long) msg->data_end; 311 + void *data = (void *)(long) msg->data; 312 + int ret = 0, *bytes, zero = 0; 313 + 314 + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 315 + if (bytes) { 316 + ret = bpf_msg_apply_bytes(msg, *bytes); 317 + if (ret) 318 + return SK_DROP; 319 + } else { 320 + return SK_DROP; 321 + } 322 + return SK_PASS; 323 + } 324 + SEC("sk_msg6") 325 + int bpf_prog9(struct sk_msg_md *msg) 326 + { 327 + void *data_end = (void *)(long) msg->data_end; 328 + void *data = (void *)(long) msg->data; 329 + int ret = 0, *bytes, zero = 0; 330 + 331 + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 332 + if (bytes) { 333 + if (((__u64)data_end - (__u64)data) >= *bytes) 334 + return SK_PASS; 335 + ret = bpf_msg_cork_bytes(msg, *bytes); 336 + if (ret) 337 + return SK_DROP; 338 + } 339 + return SK_PASS; 340 + } 341 + 342 + SEC("sk_msg7") 343 + int bpf_prog10(struct sk_msg_md *msg) 344 + { 345 + int *bytes, zero = 0, one = 1; 346 + int *start, *end; 347 + 348 + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 349 + if (bytes) 350 + bpf_msg_apply_bytes(msg, *bytes); 351 + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 352 + if (bytes) 353 + bpf_msg_cork_bytes(msg, *bytes); 354 + start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 355 + end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 356 + if (start && end) 357 + bpf_msg_pull_data(msg, *start, *end, 0); 358 + 359 + return SK_DROP; 360 + } 361 + 362 + int _version SEC("version") = 1; 363 + char _license[] SEC("license") = "GPL";