Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'skb_sk-sk_fullsock-tcp_sock'

Martin KaFai Lau says:

====================
This series adds __sk_buff->sk, "struct bpf_tcp_sock",
BPF_FUNC_sk_fullsock and BPF_FUNC_tcp_sock. Together, they provide
a common way to expose the members of "struct tcp_sock" and
"struct bpf_sock" for the bpf_prog to access.

The patch series first adds a bpf_sock pointer to __sk_buff
and a new helper BPF_FUNC_sk_fullsock.

It then adds BPF_FUNC_tcp_sock to get a bpf_tcp_sock
pointer from a bpf_sock pointer.

The current use case is to allow a cg_skb_bpf_prog to provide
per cgroup traffic policing/shaping.

Please see individual patch for details.

v2:
- Patch 1 depends on
commit d623876646be ("bpf: Fix narrow load on a bpf_sock returned from sk_lookup()")
in the bpf branch.
- Add sk_to_full_sk() to bpf_sk_fullsock() and bpf_tcp_sock()
such that there is a way to access the listener's sk and tcp_sk
when __sk_buff->sk is a request_sock.
The comments in the uapi bpf.h is updated accordingly.
- bpf_ctx_range_till() is used in bpf_sock_common_is_valid_access()
in patch 1. Saved a few lines.
- Patch 2 is new in v2 and it adds "state", "dst_ip4", "dst_ip6" and
"dst_port" to the bpf_sock. Narrow load is allowed on them.
The "state" (i.e. sk_state) has already been used in
INET_DIAG (e.g. ss -t) and getsockopt(TCP_INFO).
- While at it in the new patch 2, also allow narrow load on some
existing fields of the bpf_sock, which are "family", "type", "protocol"
and "src_port". Only allow loading from first byte for now.
i.e. does not allow narrow load starting from the 2nd byte.
- Add some narrow load tests to the test_verifier's sock.c
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+1493 -235
+42
include/linux/bpf.h
··· 194 194 ARG_ANYTHING, /* any (initialized) argument is ok */ 195 195 ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */ 196 196 ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ 197 + ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ 197 198 }; 198 199 199 200 /* type of values returned from helper functions */ ··· 204 203 RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ 205 204 RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ 206 205 RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ 206 + RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ 207 207 }; 208 208 209 209 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs ··· 258 256 PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */ 259 257 PTR_TO_SOCKET, /* reg points to struct bpf_sock */ 260 258 PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ 259 + PTR_TO_SOCK_COMMON, /* reg points to sock_common */ 260 + PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ 261 + PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ 262 + PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ 261 263 }; 262 264 263 265 /* The information passed from prog-specific *_is_valid_access ··· 926 920 u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); 927 921 928 922 #if defined(CONFIG_NET) 923 + bool bpf_sock_common_is_valid_access(int off, int size, 924 + enum bpf_access_type type, 925 + struct bpf_insn_access_aux *info); 929 926 bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, 930 927 struct bpf_insn_access_aux *info); 931 928 u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, ··· 937 928 struct bpf_prog *prog, 938 929 u32 *target_size); 939 930 #else 931 + static inline bool bpf_sock_common_is_valid_access(int off, int size, 932 + enum bpf_access_type type, 933 + struct bpf_insn_access_aux *info) 934 + { 935 + return false; 936 + } 940 937 static inline bool bpf_sock_is_valid_access(int off, int size, 941 938 enum bpf_access_type type, 942 939 struct bpf_insn_access_aux *info) ··· 958 943 return 0; 959 944 } 960 945 #endif 946 + 947 + #ifdef CONFIG_INET 948 + bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, 949 + struct bpf_insn_access_aux *info); 950 + 951 + u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, 952 + const struct bpf_insn *si, 953 + struct bpf_insn *insn_buf, 954 + struct bpf_prog *prog, 955 + u32 *target_size); 956 + #else 957 + static inline bool bpf_tcp_sock_is_valid_access(int off, int size, 958 + enum bpf_access_type type, 959 + struct bpf_insn_access_aux *info) 960 + { 961 + return false; 962 + } 963 + 964 + static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, 965 + const struct bpf_insn *si, 966 + struct bpf_insn *insn_buf, 967 + struct bpf_prog *prog, 968 + u32 *target_size) 969 + { 970 + return 0; 971 + } 972 + #endif /* CONFIG_INET */ 961 973 962 974 #endif /* _LINUX_BPF_H */
+65 -7
include/uapi/linux/bpf.h
··· 2329 2329 * "**y**". 2330 2330 * Return 2331 2331 * 0 2332 + * 2333 + * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) 2334 + * Description 2335 + * This helper gets a **struct bpf_sock** pointer such 2336 + * that all the fields in bpf_sock can be accessed. 2337 + * Return 2338 + * A **struct bpf_sock** pointer on success, or NULL in 2339 + * case of failure. 2340 + * 2341 + * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) 2342 + * Description 2343 + * This helper gets a **struct bpf_tcp_sock** pointer from a 2344 + * **struct bpf_sock** pointer. 2345 + * 2346 + * Return 2347 + * A **struct bpf_tcp_sock** pointer on success, or NULL in 2348 + * case of failure. 2332 2349 */ 2333 2350 #define __BPF_FUNC_MAPPER(FN) \ 2334 2351 FN(unspec), \ ··· 2442 2425 FN(msg_pop_data), \ 2443 2426 FN(rc_pointer_rel), \ 2444 2427 FN(spin_lock), \ 2445 - FN(spin_unlock), 2428 + FN(spin_unlock), \ 2429 + FN(sk_fullsock), \ 2430 + FN(tcp_sock), 2446 2431 2447 2432 /* integer value in 'imm' field of BPF_CALL instruction selects which helper 2448 2433 * function eBPF program intends to call ··· 2564 2545 __u64 tstamp; 2565 2546 __u32 wire_len; 2566 2547 __u32 gso_segs; 2548 + __bpf_md_ptr(struct bpf_sock *, sk); 2567 2549 }; 2568 2550 2569 2551 struct bpf_tunnel_key { ··· 2616 2596 __u32 protocol; 2617 2597 __u32 mark; 2618 2598 __u32 priority; 2619 - __u32 src_ip4; /* Allows 1,2,4-byte read. 2620 - * Stored in network byte order. 2599 + /* IP address also allows 1 and 2 bytes access */ 2600 + __u32 src_ip4; 2601 + __u32 src_ip6[4]; 2602 + __u32 src_port; /* host byte order */ 2603 + __u32 dst_port; /* network byte order */ 2604 + __u32 dst_ip4; 2605 + __u32 dst_ip6[4]; 2606 + __u32 state; 2607 + }; 2608 + 2609 + struct bpf_tcp_sock { 2610 + __u32 snd_cwnd; /* Sending congestion window */ 2611 + __u32 srtt_us; /* smoothed round trip time << 3 in usecs */ 2612 + __u32 rtt_min; 2613 + __u32 snd_ssthresh; /* Slow start size threshold */ 2614 + __u32 rcv_nxt; /* What we want to receive next */ 2615 + __u32 snd_nxt; /* Next sequence we send */ 2616 + __u32 snd_una; /* First byte we want an ack for */ 2617 + __u32 mss_cache; /* Cached effective mss, not including SACKS */ 2618 + __u32 ecn_flags; /* ECN status bits. */ 2619 + __u32 rate_delivered; /* saved rate sample: packets delivered */ 2620 + __u32 rate_interval_us; /* saved rate sample: time elapsed */ 2621 + __u32 packets_out; /* Packets which are "in flight" */ 2622 + __u32 retrans_out; /* Retransmitted packets out */ 2623 + __u32 total_retrans; /* Total retransmits for entire connection */ 2624 + __u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn 2625 + * total number of segments in. 2621 2626 */ 2622 - __u32 src_ip6[4]; /* Allows 1,2,4-byte read. 2623 - * Stored in network byte order. 2627 + __u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn 2628 + * total number of data segments in. 2624 2629 */ 2625 - __u32 src_port; /* Allows 4-byte read. 2626 - * Stored in host byte order 2630 + __u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut 2631 + * The total number of segments sent. 2632 + */ 2633 + __u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut 2634 + * total number of data segments sent. 2635 + */ 2636 + __u32 lost_out; /* Lost packets */ 2637 + __u32 sacked_out; /* SACK'd packets */ 2638 + __u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived 2639 + * sum(delta(rcv_nxt)), or how many bytes 2640 + * were acked. 2641 + */ 2642 + __u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked 2643 + * sum(delta(snd_una)), or how many bytes 2644 + * were acked. 2627 2645 */ 2628 2646 }; 2629 2647
+119 -40
kernel/bpf/verifier.c
··· 331 331 type == PTR_TO_PACKET_META; 332 332 } 333 333 334 + static bool type_is_sk_pointer(enum bpf_reg_type type) 335 + { 336 + return type == PTR_TO_SOCKET || 337 + type == PTR_TO_SOCK_COMMON || 338 + type == PTR_TO_TCP_SOCK; 339 + } 340 + 334 341 static bool reg_type_may_be_null(enum bpf_reg_type type) 335 342 { 336 343 return type == PTR_TO_MAP_VALUE_OR_NULL || 337 - type == PTR_TO_SOCKET_OR_NULL; 344 + type == PTR_TO_SOCKET_OR_NULL || 345 + type == PTR_TO_SOCK_COMMON_OR_NULL || 346 + type == PTR_TO_TCP_SOCK_OR_NULL; 338 347 } 339 348 340 349 static bool type_is_refcounted(enum bpf_reg_type type) ··· 386 377 return func_id == BPF_FUNC_sk_release; 387 378 } 388 379 380 + static bool is_acquire_function(enum bpf_func_id func_id) 381 + { 382 + return func_id == BPF_FUNC_sk_lookup_tcp || 383 + func_id == BPF_FUNC_sk_lookup_udp; 384 + } 385 + 389 386 /* string representation of 'enum bpf_reg_type' */ 390 387 static const char * const reg_type_str[] = { 391 388 [NOT_INIT] = "?", ··· 407 392 [PTR_TO_FLOW_KEYS] = "flow_keys", 408 393 [PTR_TO_SOCKET] = "sock", 409 394 [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", 395 + [PTR_TO_SOCK_COMMON] = "sock_common", 396 + [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", 397 + [PTR_TO_TCP_SOCK] = "tcp_sock", 398 + [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", 410 399 }; 411 400 412 401 static char slot_type_char[] = { ··· 637 618 } 638 619 639 620 /* release function corresponding to acquire_reference_state(). Idempotent. */ 640 - static int __release_reference_state(struct bpf_func_state *state, int ptr_id) 621 + static int release_reference_state(struct bpf_func_state *state, int ptr_id) 641 622 { 642 623 int i, last_idx; 643 - 644 - if (!ptr_id) 645 - return -EFAULT; 646 624 647 625 last_idx = state->acquired_refs - 1; 648 626 for (i = 0; i < state->acquired_refs; i++) { ··· 652 636 return 0; 653 637 } 654 638 } 655 - return -EFAULT; 656 - } 657 - 658 - /* variation on the above for cases where we expect that there must be an 659 - * outstanding reference for the specified ptr_id. 660 - */ 661 - static int release_reference_state(struct bpf_verifier_env *env, int ptr_id) 662 - { 663 - struct bpf_func_state *state = cur_func(env); 664 - int err; 665 - 666 - err = __release_reference_state(state, ptr_id); 667 - if (WARN_ON_ONCE(err != 0)) 668 - verbose(env, "verifier internal error: can't release reference\n"); 669 - return err; 639 + return -EINVAL; 670 640 } 671 641 672 642 static int transfer_reference_state(struct bpf_func_state *dst, ··· 1211 1209 case CONST_PTR_TO_MAP: 1212 1210 case PTR_TO_SOCKET: 1213 1211 case PTR_TO_SOCKET_OR_NULL: 1212 + case PTR_TO_SOCK_COMMON: 1213 + case PTR_TO_SOCK_COMMON_OR_NULL: 1214 + case PTR_TO_TCP_SOCK: 1215 + case PTR_TO_TCP_SOCK_OR_NULL: 1214 1216 return true; 1215 1217 default: 1216 1218 return false; ··· 1653 1647 struct bpf_reg_state *regs = cur_regs(env); 1654 1648 struct bpf_reg_state *reg = &regs[regno]; 1655 1649 struct bpf_insn_access_aux info = {}; 1650 + bool valid; 1656 1651 1657 1652 if (reg->smin_value < 0) { 1658 1653 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", ··· 1661 1654 return -EACCES; 1662 1655 } 1663 1656 1664 - if (!bpf_sock_is_valid_access(off, size, t, &info)) { 1665 - verbose(env, "invalid bpf_sock access off=%d size=%d\n", 1666 - off, size); 1667 - return -EACCES; 1657 + switch (reg->type) { 1658 + case PTR_TO_SOCK_COMMON: 1659 + valid = bpf_sock_common_is_valid_access(off, size, t, &info); 1660 + break; 1661 + case PTR_TO_SOCKET: 1662 + valid = bpf_sock_is_valid_access(off, size, t, &info); 1663 + break; 1664 + case PTR_TO_TCP_SOCK: 1665 + valid = bpf_tcp_sock_is_valid_access(off, size, t, &info); 1666 + break; 1667 + default: 1668 + valid = false; 1668 1669 } 1669 1670 1670 - env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; 1671 1671 1672 - return 0; 1672 + if (valid) { 1673 + env->insn_aux_data[insn_idx].ctx_field_size = 1674 + info.ctx_field_size; 1675 + return 0; 1676 + } 1677 + 1678 + verbose(env, "R%d invalid %s access off=%d size=%d\n", 1679 + regno, reg_type_str[reg->type], off, size); 1680 + 1681 + return -EACCES; 1673 1682 } 1674 1683 1675 1684 static bool __is_pointer_value(bool allow_ptr_leaks, ··· 1711 1688 { 1712 1689 const struct bpf_reg_state *reg = reg_state(env, regno); 1713 1690 1714 - return reg->type == PTR_TO_CTX || 1715 - reg->type == PTR_TO_SOCKET; 1691 + return reg->type == PTR_TO_CTX; 1692 + } 1693 + 1694 + static bool is_sk_reg(struct bpf_verifier_env *env, int regno) 1695 + { 1696 + const struct bpf_reg_state *reg = reg_state(env, regno); 1697 + 1698 + return type_is_sk_pointer(reg->type); 1716 1699 } 1717 1700 1718 1701 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) ··· 1828 1799 break; 1829 1800 case PTR_TO_SOCKET: 1830 1801 pointer_desc = "sock "; 1802 + break; 1803 + case PTR_TO_SOCK_COMMON: 1804 + pointer_desc = "sock_common "; 1805 + break; 1806 + case PTR_TO_TCP_SOCK: 1807 + pointer_desc = "tcp_sock "; 1831 1808 break; 1832 1809 default: 1833 1810 break; ··· 2038 2003 * PTR_TO_PACKET[_META,_END]. In the latter 2039 2004 * case, we know the offset is zero. 2040 2005 */ 2041 - if (reg_type == SCALAR_VALUE) 2006 + if (reg_type == SCALAR_VALUE) { 2042 2007 mark_reg_unknown(env, regs, value_regno); 2043 - else 2008 + } else { 2044 2009 mark_reg_known_zero(env, regs, 2045 2010 value_regno); 2011 + if (reg_type_may_be_null(reg_type)) 2012 + regs[value_regno].id = ++env->id_gen; 2013 + } 2046 2014 regs[value_regno].type = reg_type; 2047 2015 } 2048 2016 ··· 2091 2053 err = check_flow_keys_access(env, off, size); 2092 2054 if (!err && t == BPF_READ && value_regno >= 0) 2093 2055 mark_reg_unknown(env, regs, value_regno); 2094 - } else if (reg->type == PTR_TO_SOCKET) { 2056 + } else if (type_is_sk_pointer(reg->type)) { 2095 2057 if (t == BPF_WRITE) { 2096 - verbose(env, "cannot write into socket\n"); 2058 + verbose(env, "R%d cannot write into %s\n", 2059 + regno, reg_type_str[reg->type]); 2097 2060 return -EACCES; 2098 2061 } 2099 2062 err = check_sock_access(env, insn_idx, regno, off, size, t); ··· 2141 2102 2142 2103 if (is_ctx_reg(env, insn->dst_reg) || 2143 2104 is_pkt_reg(env, insn->dst_reg) || 2144 - is_flow_key_reg(env, insn->dst_reg)) { 2105 + is_flow_key_reg(env, insn->dst_reg) || 2106 + is_sk_reg(env, insn->dst_reg)) { 2145 2107 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", 2146 2108 insn->dst_reg, 2147 2109 reg_type_str[reg_state(env, insn->dst_reg)->type]); ··· 2409 2369 err = check_ctx_reg(env, reg, regno); 2410 2370 if (err < 0) 2411 2371 return err; 2372 + } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) { 2373 + expected_type = PTR_TO_SOCK_COMMON; 2374 + /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */ 2375 + if (!type_is_sk_pointer(type)) 2376 + goto err_type; 2412 2377 } else if (arg_type == ARG_PTR_TO_SOCKET) { 2413 2378 expected_type = PTR_TO_SOCKET; 2414 2379 if (type != expected_type) ··· 2828 2783 for (i = 0; i <= vstate->curframe; i++) 2829 2784 release_reg_references(env, vstate->frame[i], meta->ptr_id); 2830 2785 2831 - return release_reference_state(env, meta->ptr_id); 2786 + return release_reference_state(cur_func(env), meta->ptr_id); 2832 2787 } 2833 2788 2834 2789 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, ··· 3094 3049 } 3095 3050 } else if (is_release_function(func_id)) { 3096 3051 err = release_reference(env, &meta); 3097 - if (err) 3052 + if (err) { 3053 + verbose(env, "func %s#%d reference has not been acquired before\n", 3054 + func_id_name(func_id), func_id); 3098 3055 return err; 3056 + } 3099 3057 } 3100 3058 3101 3059 regs = cur_regs(env); ··· 3147 3099 regs[BPF_REG_0].id = ++env->id_gen; 3148 3100 } 3149 3101 } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { 3150 - int id = acquire_reference_state(env, insn_idx); 3151 - if (id < 0) 3152 - return id; 3153 3102 mark_reg_known_zero(env, regs, BPF_REG_0); 3154 3103 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; 3155 - regs[BPF_REG_0].id = id; 3104 + if (is_acquire_function(func_id)) { 3105 + int id = acquire_reference_state(env, insn_idx); 3106 + 3107 + if (id < 0) 3108 + return id; 3109 + /* For release_reference() */ 3110 + regs[BPF_REG_0].id = id; 3111 + } else { 3112 + /* For mark_ptr_or_null_reg() */ 3113 + regs[BPF_REG_0].id = ++env->id_gen; 3114 + } 3115 + } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { 3116 + mark_reg_known_zero(env, regs, BPF_REG_0); 3117 + regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; 3118 + regs[BPF_REG_0].id = ++env->id_gen; 3156 3119 } else { 3157 3120 verbose(env, "unknown return type %d of func %s#%d\n", 3158 3121 fn->ret_type, func_id_name(func_id), func_id); ··· 3423 3364 case PTR_TO_PACKET_END: 3424 3365 case PTR_TO_SOCKET: 3425 3366 case PTR_TO_SOCKET_OR_NULL: 3367 + case PTR_TO_SOCK_COMMON: 3368 + case PTR_TO_SOCK_COMMON_OR_NULL: 3369 + case PTR_TO_TCP_SOCK: 3370 + case PTR_TO_TCP_SOCK_OR_NULL: 3426 3371 verbose(env, "R%d pointer arithmetic on %s prohibited\n", 3427 3372 dst, reg_type_str[ptr_reg->type]); 3428 3373 return -EACCES; ··· 4660 4597 } 4661 4598 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { 4662 4599 reg->type = PTR_TO_SOCKET; 4600 + } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) { 4601 + reg->type = PTR_TO_SOCK_COMMON; 4602 + } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) { 4603 + reg->type = PTR_TO_TCP_SOCK; 4663 4604 } 4664 4605 if (is_null || !(reg_is_refcounted(reg) || 4665 4606 reg_may_point_to_spin_lock(reg))) { ··· 4688 4621 int i, j; 4689 4622 4690 4623 if (reg_is_refcounted_or_null(&regs[regno]) && is_null) 4691 - __release_reference_state(state, id); 4624 + release_reference_state(state, id); 4692 4625 4693 4626 for (i = 0; i < MAX_BPF_REG; i++) 4694 4627 mark_ptr_or_null_reg(state, &regs[i], id, is_null); ··· 5857 5790 case PTR_TO_FLOW_KEYS: 5858 5791 case PTR_TO_SOCKET: 5859 5792 case PTR_TO_SOCKET_OR_NULL: 5793 + case PTR_TO_SOCK_COMMON: 5794 + case PTR_TO_SOCK_COMMON_OR_NULL: 5795 + case PTR_TO_TCP_SOCK: 5796 + case PTR_TO_TCP_SOCK_OR_NULL: 5860 5797 /* Only valid matches are exact, which memcmp() above 5861 5798 * would have accepted 5862 5799 */ ··· 6181 6110 case PTR_TO_CTX: 6182 6111 case PTR_TO_SOCKET: 6183 6112 case PTR_TO_SOCKET_OR_NULL: 6113 + case PTR_TO_SOCK_COMMON: 6114 + case PTR_TO_SOCK_COMMON_OR_NULL: 6115 + case PTR_TO_TCP_SOCK: 6116 + case PTR_TO_TCP_SOCK_OR_NULL: 6184 6117 return false; 6185 6118 default: 6186 6119 return true; ··· 7187 7112 convert_ctx_access = ops->convert_ctx_access; 7188 7113 break; 7189 7114 case PTR_TO_SOCKET: 7115 + case PTR_TO_SOCK_COMMON: 7190 7116 convert_ctx_access = bpf_sock_convert_ctx_access; 7117 + break; 7118 + case PTR_TO_TCP_SOCK: 7119 + convert_ctx_access = bpf_tcp_sock_convert_ctx_access; 7191 7120 break; 7192 7121 default: 7193 7122 continue;
+319 -176
net/core/filter.c
··· 1793 1793 .arg2_type = ARG_ANYTHING, 1794 1794 }; 1795 1795 1796 + BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk) 1797 + { 1798 + sk = sk_to_full_sk(sk); 1799 + 1800 + return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL; 1801 + } 1802 + 1803 + static const struct bpf_func_proto bpf_sk_fullsock_proto = { 1804 + .func = bpf_sk_fullsock, 1805 + .gpl_only = false, 1806 + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 1807 + .arg1_type = ARG_PTR_TO_SOCK_COMMON, 1808 + }; 1809 + 1796 1810 static inline int sk_skb_try_make_writable(struct sk_buff *skb, 1797 1811 unsigned int write_len) 1798 1812 { ··· 5030 5016 }; 5031 5017 #endif /* CONFIG_IPV6_SEG6_BPF */ 5032 5018 5019 + #define CONVERT_COMMON_TCP_SOCK_FIELDS(md_type, CONVERT) \ 5020 + do { \ 5021 + switch (si->off) { \ 5022 + case offsetof(md_type, snd_cwnd): \ 5023 + CONVERT(snd_cwnd); break; \ 5024 + case offsetof(md_type, srtt_us): \ 5025 + CONVERT(srtt_us); break; \ 5026 + case offsetof(md_type, snd_ssthresh): \ 5027 + CONVERT(snd_ssthresh); break; \ 5028 + case offsetof(md_type, rcv_nxt): \ 5029 + CONVERT(rcv_nxt); break; \ 5030 + case offsetof(md_type, snd_nxt): \ 5031 + CONVERT(snd_nxt); break; \ 5032 + case offsetof(md_type, snd_una): \ 5033 + CONVERT(snd_una); break; \ 5034 + case offsetof(md_type, mss_cache): \ 5035 + CONVERT(mss_cache); break; \ 5036 + case offsetof(md_type, ecn_flags): \ 5037 + CONVERT(ecn_flags); break; \ 5038 + case offsetof(md_type, rate_delivered): \ 5039 + CONVERT(rate_delivered); break; \ 5040 + case offsetof(md_type, rate_interval_us): \ 5041 + CONVERT(rate_interval_us); break; \ 5042 + case offsetof(md_type, packets_out): \ 5043 + CONVERT(packets_out); break; \ 5044 + case offsetof(md_type, retrans_out): \ 5045 + CONVERT(retrans_out); break; \ 5046 + case offsetof(md_type, total_retrans): \ 5047 + CONVERT(total_retrans); break; \ 5048 + case offsetof(md_type, segs_in): \ 5049 + CONVERT(segs_in); break; \ 5050 + case offsetof(md_type, data_segs_in): \ 5051 + CONVERT(data_segs_in); break; \ 5052 + case offsetof(md_type, segs_out): \ 5053 + CONVERT(segs_out); break; \ 5054 + case offsetof(md_type, data_segs_out): \ 5055 + CONVERT(data_segs_out); break; \ 5056 + case offsetof(md_type, lost_out): \ 5057 + CONVERT(lost_out); break; \ 5058 + case offsetof(md_type, sacked_out): \ 5059 + CONVERT(sacked_out); break; \ 5060 + case offsetof(md_type, bytes_received): \ 5061 + CONVERT(bytes_received); break; \ 5062 + case offsetof(md_type, bytes_acked): \ 5063 + CONVERT(bytes_acked); break; \ 5064 + } \ 5065 + } while (0) 5066 + 5033 5067 #ifdef CONFIG_INET 5034 5068 static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, 5035 5069 int dif, int sdif, u8 family, u8 proto) ··· 5315 5253 .arg5_type = ARG_ANYTHING, 5316 5254 }; 5317 5255 5256 + bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, 5257 + struct bpf_insn_access_aux *info) 5258 + { 5259 + if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked)) 5260 + return false; 5261 + 5262 + if (off % size != 0) 5263 + return false; 5264 + 5265 + switch (off) { 5266 + case offsetof(struct bpf_tcp_sock, bytes_received): 5267 + case offsetof(struct bpf_tcp_sock, bytes_acked): 5268 + return size == sizeof(__u64); 5269 + default: 5270 + return size == sizeof(__u32); 5271 + } 5272 + } 5273 + 5274 + u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, 5275 + const struct bpf_insn *si, 5276 + struct bpf_insn *insn_buf, 5277 + struct bpf_prog *prog, u32 *target_size) 5278 + { 5279 + struct bpf_insn *insn = insn_buf; 5280 + 5281 + #define BPF_TCP_SOCK_GET_COMMON(FIELD) \ 5282 + do { \ 5283 + BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) > \ 5284 + FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \ 5285 + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\ 5286 + si->dst_reg, si->src_reg, \ 5287 + offsetof(struct tcp_sock, FIELD)); \ 5288 + } while (0) 5289 + 5290 + CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock, 5291 + BPF_TCP_SOCK_GET_COMMON); 5292 + 5293 + if (insn > insn_buf) 5294 + return insn - insn_buf; 5295 + 5296 + switch (si->off) { 5297 + case offsetof(struct bpf_tcp_sock, rtt_min): 5298 + BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) != 5299 + sizeof(struct minmax)); 5300 + BUILD_BUG_ON(sizeof(struct minmax) < 5301 + sizeof(struct minmax_sample)); 5302 + 5303 + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 5304 + offsetof(struct tcp_sock, rtt_min) + 5305 + offsetof(struct minmax_sample, v)); 5306 + break; 5307 + } 5308 + 5309 + return insn - insn_buf; 5310 + } 5311 + 5312 + BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) 5313 + { 5314 + sk = sk_to_full_sk(sk); 5315 + 5316 + if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) 5317 + return (unsigned long)sk; 5318 + 5319 + return (unsigned long)NULL; 5320 + } 5321 + 5322 + static const struct bpf_func_proto bpf_tcp_sock_proto = { 5323 + .func = bpf_tcp_sock, 5324 + .gpl_only = false, 5325 + .ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL, 5326 + .arg1_type = ARG_PTR_TO_SOCK_COMMON, 5327 + }; 5328 + 5318 5329 #endif /* CONFIG_INET */ 5319 5330 5320 5331 bool bpf_helper_changes_pkt_data(void *func) ··· 5541 5406 switch (func_id) { 5542 5407 case BPF_FUNC_get_local_storage: 5543 5408 return &bpf_get_local_storage_proto; 5409 + case BPF_FUNC_sk_fullsock: 5410 + return &bpf_sk_fullsock_proto; 5411 + #ifdef CONFIG_INET 5412 + case BPF_FUNC_tcp_sock: 5413 + return &bpf_tcp_sock_proto; 5414 + #endif 5544 5415 default: 5545 5416 return sk_filter_func_proto(func_id, prog); 5546 5417 } ··· 5618 5477 return &bpf_get_socket_uid_proto; 5619 5478 case BPF_FUNC_fib_lookup: 5620 5479 return &bpf_skb_fib_lookup_proto; 5480 + case BPF_FUNC_sk_fullsock: 5481 + return &bpf_sk_fullsock_proto; 5621 5482 #ifdef CONFIG_XFRM 5622 5483 case BPF_FUNC_skb_get_xfrm_state: 5623 5484 return &bpf_skb_get_xfrm_state_proto; ··· 5637 5494 return &bpf_sk_lookup_udp_proto; 5638 5495 case BPF_FUNC_sk_release: 5639 5496 return &bpf_sk_release_proto; 5497 + case BPF_FUNC_tcp_sock: 5498 + return &bpf_tcp_sock_proto; 5640 5499 #endif 5641 5500 default: 5642 5501 return bpf_base_func_proto(func_id); ··· 5909 5764 if (size != sizeof(__u64)) 5910 5765 return false; 5911 5766 break; 5767 + case offsetof(struct __sk_buff, sk): 5768 + if (type == BPF_WRITE || size != sizeof(__u64)) 5769 + return false; 5770 + info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL; 5771 + break; 5912 5772 default: 5913 5773 /* Only narrow read access allowed for now. */ 5914 5774 if (type == BPF_WRITE) { ··· 6085 5935 return true; 6086 5936 } 6087 5937 6088 - static bool __sock_filter_check_size(int off, int size, 5938 + bool bpf_sock_common_is_valid_access(int off, int size, 5939 + enum bpf_access_type type, 6089 5940 struct bpf_insn_access_aux *info) 6090 5941 { 6091 - const int size_default = sizeof(__u32); 6092 - 6093 5942 switch (off) { 6094 - case bpf_ctx_range(struct bpf_sock, src_ip4): 6095 - case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): 6096 - bpf_ctx_record_field_size(info, size_default); 6097 - return bpf_ctx_narrow_access_ok(off, size, size_default); 5943 + case bpf_ctx_range_till(struct bpf_sock, type, priority): 5944 + return false; 5945 + default: 5946 + return bpf_sock_is_valid_access(off, size, type, info); 6098 5947 } 6099 - 6100 - return size == size_default; 6101 5948 } 6102 5949 6103 5950 bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, 6104 5951 struct bpf_insn_access_aux *info) 6105 5952 { 5953 + const int size_default = sizeof(__u32); 5954 + 6106 5955 if (off < 0 || off >= sizeof(struct bpf_sock)) 6107 5956 return false; 6108 5957 if (off % size != 0) 6109 5958 return false; 6110 - if (!__sock_filter_check_size(off, size, info)) 6111 - return false; 6112 - return true; 5959 + 5960 + switch (off) { 5961 + case offsetof(struct bpf_sock, state): 5962 + case offsetof(struct bpf_sock, family): 5963 + case offsetof(struct bpf_sock, type): 5964 + case offsetof(struct bpf_sock, protocol): 5965 + case offsetof(struct bpf_sock, dst_port): 5966 + case offsetof(struct bpf_sock, src_port): 5967 + case bpf_ctx_range(struct bpf_sock, src_ip4): 5968 + case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): 5969 + case bpf_ctx_range(struct bpf_sock, dst_ip4): 5970 + case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]): 5971 + bpf_ctx_record_field_size(info, size_default); 5972 + return bpf_ctx_narrow_access_ok(off, size, size_default); 5973 + } 5974 + 5975 + return size == size_default; 6113 5976 } 6114 5977 6115 5978 static bool sock_filter_is_valid_access(int off, int size, ··· 6911 6748 off += offsetof(struct qdisc_skb_cb, pkt_len); 6912 6749 *target_size = 4; 6913 6750 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off); 6751 + break; 6752 + 6753 + case offsetof(struct __sk_buff, sk): 6754 + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 6755 + si->dst_reg, si->src_reg, 6756 + offsetof(struct sk_buff, sk)); 6757 + break; 6914 6758 } 6915 6759 6916 6760 return insn - insn_buf; ··· 6966 6796 break; 6967 6797 6968 6798 case offsetof(struct bpf_sock, family): 6969 - BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2); 6970 - 6971 - *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 6972 - offsetof(struct sock, sk_family)); 6799 + *insn++ = BPF_LDX_MEM( 6800 + BPF_FIELD_SIZEOF(struct sock_common, skc_family), 6801 + si->dst_reg, si->src_reg, 6802 + bpf_target_off(struct sock_common, 6803 + skc_family, 6804 + FIELD_SIZEOF(struct sock_common, 6805 + skc_family), 6806 + target_size)); 6973 6807 break; 6974 6808 6975 6809 case offsetof(struct bpf_sock, type): 6810 + BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2); 6976 6811 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 6977 6812 offsetof(struct sock, __sk_flags_offset)); 6978 6813 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK); 6979 6814 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT); 6815 + *target_size = 2; 6980 6816 break; 6981 6817 6982 6818 case offsetof(struct bpf_sock, protocol): 6819 + BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE); 6983 6820 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 6984 6821 offsetof(struct sock, __sk_flags_offset)); 6985 6822 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); 6986 6823 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT); 6824 + *target_size = 1; 6987 6825 break; 6988 6826 6989 6827 case offsetof(struct bpf_sock, src_ip4): ··· 7000 6822 bpf_target_off(struct sock_common, skc_rcv_saddr, 7001 6823 FIELD_SIZEOF(struct sock_common, 7002 6824 skc_rcv_saddr), 6825 + target_size)); 6826 + break; 6827 + 6828 + case offsetof(struct bpf_sock, dst_ip4): 6829 + *insn++ = BPF_LDX_MEM( 6830 + BPF_SIZE(si->code), si->dst_reg, si->src_reg, 6831 + bpf_target_off(struct sock_common, skc_daddr, 6832 + FIELD_SIZEOF(struct sock_common, 6833 + skc_daddr), 7003 6834 target_size)); 7004 6835 break; 7005 6836 ··· 7030 6843 #endif 7031 6844 break; 7032 6845 6846 + case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]): 6847 + #if IS_ENABLED(CONFIG_IPV6) 6848 + off = si->off; 6849 + off -= offsetof(struct bpf_sock, dst_ip6[0]); 6850 + *insn++ = BPF_LDX_MEM( 6851 + BPF_SIZE(si->code), si->dst_reg, si->src_reg, 6852 + bpf_target_off(struct sock_common, 6853 + skc_v6_daddr.s6_addr32[0], 6854 + FIELD_SIZEOF(struct sock_common, 6855 + skc_v6_daddr.s6_addr32[0]), 6856 + target_size) + off); 6857 + #else 6858 + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 6859 + *target_size = 4; 6860 + #endif 6861 + break; 6862 + 7033 6863 case offsetof(struct bpf_sock, src_port): 7034 6864 *insn++ = BPF_LDX_MEM( 7035 6865 BPF_FIELD_SIZEOF(struct sock_common, skc_num), ··· 7054 6850 bpf_target_off(struct sock_common, skc_num, 7055 6851 FIELD_SIZEOF(struct sock_common, 7056 6852 skc_num), 6853 + target_size)); 6854 + break; 6855 + 6856 + case offsetof(struct bpf_sock, dst_port): 6857 + *insn++ = BPF_LDX_MEM( 6858 + BPF_FIELD_SIZEOF(struct sock_common, skc_dport), 6859 + si->dst_reg, si->src_reg, 6860 + bpf_target_off(struct sock_common, skc_dport, 6861 + FIELD_SIZEOF(struct sock_common, 6862 + skc_dport), 6863 + target_size)); 6864 + break; 6865 + 6866 + case offsetof(struct bpf_sock, state): 6867 + *insn++ = BPF_LDX_MEM( 6868 + BPF_FIELD_SIZEOF(struct sock_common, skc_state), 6869 + si->dst_reg, si->src_reg, 6870 + bpf_target_off(struct sock_common, skc_state, 6871 + FIELD_SIZEOF(struct sock_common, 6872 + skc_state), 7057 6873 target_size)); 7058 6874 break; 7059 6875 } ··· 7323 7099 struct bpf_insn *insn = insn_buf; 7324 7100 int off; 7325 7101 7102 + /* Helper macro for adding read access to tcp_sock or sock fields. */ 7103 + #define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ 7104 + do { \ 7105 + BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ 7106 + FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ 7107 + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 7108 + struct bpf_sock_ops_kern, \ 7109 + is_fullsock), \ 7110 + si->dst_reg, si->src_reg, \ 7111 + offsetof(struct bpf_sock_ops_kern, \ 7112 + is_fullsock)); \ 7113 + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \ 7114 + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 7115 + struct bpf_sock_ops_kern, sk),\ 7116 + si->dst_reg, si->src_reg, \ 7117 + offsetof(struct bpf_sock_ops_kern, sk));\ 7118 + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \ 7119 + OBJ_FIELD), \ 7120 + si->dst_reg, si->dst_reg, \ 7121 + offsetof(OBJ, OBJ_FIELD)); \ 7122 + } while (0) 7123 + 7124 + #define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \ 7125 + SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock) 7126 + 7127 + /* Helper macro for adding write access to tcp_sock or sock fields. 7128 + * The macro is called with two registers, dst_reg which contains a pointer 7129 + * to ctx (context) and src_reg which contains the value that should be 7130 + * stored. However, we need an additional register since we cannot overwrite 7131 + * dst_reg because it may be used later in the program. 7132 + * Instead we "borrow" one of the other register. We first save its value 7133 + * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore 7134 + * it at the end of the macro. 7135 + */ 7136 + #define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ 7137 + do { \ 7138 + int reg = BPF_REG_9; \ 7139 + BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ 7140 + FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ 7141 + if (si->dst_reg == reg || si->src_reg == reg) \ 7142 + reg--; \ 7143 + if (si->dst_reg == reg || si->src_reg == reg) \ 7144 + reg--; \ 7145 + *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \ 7146 + offsetof(struct bpf_sock_ops_kern, \ 7147 + temp)); \ 7148 + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 7149 + struct bpf_sock_ops_kern, \ 7150 + is_fullsock), \ 7151 + reg, si->dst_reg, \ 7152 + offsetof(struct bpf_sock_ops_kern, \ 7153 + is_fullsock)); \ 7154 + *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \ 7155 + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 7156 + struct bpf_sock_ops_kern, sk),\ 7157 + reg, si->dst_reg, \ 7158 + offsetof(struct bpf_sock_ops_kern, sk));\ 7159 + *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \ 7160 + reg, si->src_reg, \ 7161 + offsetof(OBJ, OBJ_FIELD)); \ 7162 + *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \ 7163 + offsetof(struct bpf_sock_ops_kern, \ 7164 + temp)); \ 7165 + } while (0) 7166 + 7167 + #define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \ 7168 + do { \ 7169 + if (TYPE == BPF_WRITE) \ 7170 + SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ 7171 + else \ 7172 + SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ 7173 + } while (0) 7174 + 7175 + CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_sock_ops, 7176 + SOCK_OPS_GET_TCP_SOCK_FIELD); 7177 + 7178 + if (insn > insn_buf) 7179 + return insn - insn_buf; 7180 + 7326 7181 switch (si->off) { 7327 7182 case offsetof(struct bpf_sock_ops, op) ... 7328 7183 offsetof(struct bpf_sock_ops, replylong[3]): ··· 7559 7256 FIELD_SIZEOF(struct minmax_sample, t)); 7560 7257 break; 7561 7258 7562 - /* Helper macro for adding read access to tcp_sock or sock fields. */ 7563 - #define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ 7564 - do { \ 7565 - BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ 7566 - FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ 7567 - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 7568 - struct bpf_sock_ops_kern, \ 7569 - is_fullsock), \ 7570 - si->dst_reg, si->src_reg, \ 7571 - offsetof(struct bpf_sock_ops_kern, \ 7572 - is_fullsock)); \ 7573 - *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \ 7574 - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 7575 - struct bpf_sock_ops_kern, sk),\ 7576 - si->dst_reg, si->src_reg, \ 7577 - offsetof(struct bpf_sock_ops_kern, sk));\ 7578 - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \ 7579 - OBJ_FIELD), \ 7580 - si->dst_reg, si->dst_reg, \ 7581 - offsetof(OBJ, OBJ_FIELD)); \ 7582 - } while (0) 7583 - 7584 - /* Helper macro for adding write access to tcp_sock or sock fields. 7585 - * The macro is called with two registers, dst_reg which contains a pointer 7586 - * to ctx (context) and src_reg which contains the value that should be 7587 - * stored. However, we need an additional register since we cannot overwrite 7588 - * dst_reg because it may be used later in the program. 7589 - * Instead we "borrow" one of the other register. We first save its value 7590 - * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore 7591 - * it at the end of the macro. 7592 - */ 7593 - #define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ 7594 - do { \ 7595 - int reg = BPF_REG_9; \ 7596 - BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ 7597 - FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ 7598 - if (si->dst_reg == reg || si->src_reg == reg) \ 7599 - reg--; \ 7600 - if (si->dst_reg == reg || si->src_reg == reg) \ 7601 - reg--; \ 7602 - *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \ 7603 - offsetof(struct bpf_sock_ops_kern, \ 7604 - temp)); \ 7605 - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 7606 - struct bpf_sock_ops_kern, \ 7607 - is_fullsock), \ 7608 - reg, si->dst_reg, \ 7609 - offsetof(struct bpf_sock_ops_kern, \ 7610 - is_fullsock)); \ 7611 - *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \ 7612 - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 7613 - struct bpf_sock_ops_kern, sk),\ 7614 - reg, si->dst_reg, \ 7615 - offsetof(struct bpf_sock_ops_kern, sk));\ 7616 - *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \ 7617 - reg, si->src_reg, \ 7618 - offsetof(OBJ, OBJ_FIELD)); \ 7619 - *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \ 7620 - offsetof(struct bpf_sock_ops_kern, \ 7621 - temp)); \ 7622 - } while (0) 7623 - 7624 - #define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \ 7625 - do { \ 7626 - if (TYPE == BPF_WRITE) \ 7627 - SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ 7628 - else \ 7629 - SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ 7630 - } while (0) 7631 - 7632 - case offsetof(struct bpf_sock_ops, snd_cwnd): 7633 - SOCK_OPS_GET_FIELD(snd_cwnd, snd_cwnd, struct tcp_sock); 7634 - break; 7635 - 7636 - case offsetof(struct bpf_sock_ops, srtt_us): 7637 - SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock); 7638 - break; 7639 - 7640 7259 case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags): 7641 7260 SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags, 7642 7261 struct tcp_sock); 7643 - break; 7644 - 7645 - case offsetof(struct bpf_sock_ops, snd_ssthresh): 7646 - SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock); 7647 - break; 7648 - 7649 - case offsetof(struct bpf_sock_ops, rcv_nxt): 7650 - SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock); 7651 - break; 7652 - 7653 - case offsetof(struct bpf_sock_ops, snd_nxt): 7654 - SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock); 7655 - break; 7656 - 7657 - case offsetof(struct bpf_sock_ops, snd_una): 7658 - SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock); 7659 - break; 7660 - 7661 - case offsetof(struct bpf_sock_ops, mss_cache): 7662 - SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock); 7663 - break; 7664 - 7665 - case offsetof(struct bpf_sock_ops, ecn_flags): 7666 - SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock); 7667 - break; 7668 - 7669 - case offsetof(struct bpf_sock_ops, rate_delivered): 7670 - SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered, 7671 - struct tcp_sock); 7672 - break; 7673 - 7674 - case offsetof(struct bpf_sock_ops, rate_interval_us): 7675 - SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us, 7676 - struct tcp_sock); 7677 - break; 7678 - 7679 - case offsetof(struct bpf_sock_ops, packets_out): 7680 - SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock); 7681 - break; 7682 - 7683 - case offsetof(struct bpf_sock_ops, retrans_out): 7684 - SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock); 7685 - break; 7686 - 7687 - case offsetof(struct bpf_sock_ops, total_retrans): 7688 - SOCK_OPS_GET_FIELD(total_retrans, total_retrans, 7689 - struct tcp_sock); 7690 - break; 7691 - 7692 - case offsetof(struct bpf_sock_ops, segs_in): 7693 - SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock); 7694 - break; 7695 - 7696 - case offsetof(struct bpf_sock_ops, data_segs_in): 7697 - SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock); 7698 - break; 7699 - 7700 - case offsetof(struct bpf_sock_ops, segs_out): 7701 - SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock); 7702 - break; 7703 - 7704 - case offsetof(struct bpf_sock_ops, data_segs_out): 7705 - SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out, 7706 - struct tcp_sock); 7707 - break; 7708 - 7709 - case offsetof(struct bpf_sock_ops, lost_out): 7710 - SOCK_OPS_GET_FIELD(lost_out, lost_out, struct tcp_sock); 7711 - break; 7712 - 7713 - case offsetof(struct bpf_sock_ops, sacked_out): 7714 - SOCK_OPS_GET_FIELD(sacked_out, sacked_out, struct tcp_sock); 7715 7262 break; 7716 7263 7717 7264 case offsetof(struct bpf_sock_ops, sk_txhash): 7718 7265 SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash, 7719 7266 struct sock, type); 7720 7267 break; 7721 - 7722 - case offsetof(struct bpf_sock_ops, bytes_received): 7723 - SOCK_OPS_GET_FIELD(bytes_received, bytes_received, 7724 - struct tcp_sock); 7725 - break; 7726 - 7727 - case offsetof(struct bpf_sock_ops, bytes_acked): 7728 - SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock); 7729 - break; 7730 - 7731 7268 } 7732 7269 return insn - insn_buf; 7733 7270 }
+65 -7
tools/include/uapi/linux/bpf.h
··· 2329 2329 * "**y**". 2330 2330 * Return 2331 2331 * 0 2332 + * 2333 + * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) 2334 + * Description 2335 + * This helper gets a **struct bpf_sock** pointer such 2336 + * that all the fields in bpf_sock can be accessed. 2337 + * Return 2338 + * A **struct bpf_sock** pointer on success, or NULL in 2339 + * case of failure. 2340 + * 2341 + * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) 2342 + * Description 2343 + * This helper gets a **struct bpf_tcp_sock** pointer from a 2344 + * **struct bpf_sock** pointer. 2345 + * 2346 + * Return 2347 + * A **struct bpf_tcp_sock** pointer on success, or NULL in 2348 + * case of failure. 2332 2349 */ 2333 2350 #define __BPF_FUNC_MAPPER(FN) \ 2334 2351 FN(unspec), \ ··· 2442 2425 FN(msg_pop_data), \ 2443 2426 FN(rc_pointer_rel), \ 2444 2427 FN(spin_lock), \ 2445 - FN(spin_unlock), 2428 + FN(spin_unlock), \ 2429 + FN(sk_fullsock), \ 2430 + FN(tcp_sock), 2446 2431 2447 2432 /* integer value in 'imm' field of BPF_CALL instruction selects which helper 2448 2433 * function eBPF program intends to call ··· 2564 2545 __u64 tstamp; 2565 2546 __u32 wire_len; 2566 2547 __u32 gso_segs; 2548 + __bpf_md_ptr(struct bpf_sock *, sk); 2567 2549 }; 2568 2550 2569 2551 struct bpf_tunnel_key { ··· 2616 2596 __u32 protocol; 2617 2597 __u32 mark; 2618 2598 __u32 priority; 2619 - __u32 src_ip4; /* Allows 1,2,4-byte read. 2620 - * Stored in network byte order. 2599 + /* IP address also allows 1 and 2 bytes access */ 2600 + __u32 src_ip4; 2601 + __u32 src_ip6[4]; 2602 + __u32 src_port; /* host byte order */ 2603 + __u32 dst_port; /* network byte order */ 2604 + __u32 dst_ip4; 2605 + __u32 dst_ip6[4]; 2606 + __u32 state; 2607 + }; 2608 + 2609 + struct bpf_tcp_sock { 2610 + __u32 snd_cwnd; /* Sending congestion window */ 2611 + __u32 srtt_us; /* smoothed round trip time << 3 in usecs */ 2612 + __u32 rtt_min; 2613 + __u32 snd_ssthresh; /* Slow start size threshold */ 2614 + __u32 rcv_nxt; /* What we want to receive next */ 2615 + __u32 snd_nxt; /* Next sequence we send */ 2616 + __u32 snd_una; /* First byte we want an ack for */ 2617 + __u32 mss_cache; /* Cached effective mss, not including SACKS */ 2618 + __u32 ecn_flags; /* ECN status bits. */ 2619 + __u32 rate_delivered; /* saved rate sample: packets delivered */ 2620 + __u32 rate_interval_us; /* saved rate sample: time elapsed */ 2621 + __u32 packets_out; /* Packets which are "in flight" */ 2622 + __u32 retrans_out; /* Retransmitted packets out */ 2623 + __u32 total_retrans; /* Total retransmits for entire connection */ 2624 + __u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn 2625 + * total number of segments in. 2621 2626 */ 2622 - __u32 src_ip6[4]; /* Allows 1,2,4-byte read. 2623 - * Stored in network byte order. 2627 + __u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn 2628 + * total number of data segments in. 2624 2629 */ 2625 - __u32 src_port; /* Allows 4-byte read. 2626 - * Stored in host byte order 2630 + __u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut 2631 + * The total number of segments sent. 2632 + */ 2633 + __u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut 2634 + * total number of data segments sent. 2635 + */ 2636 + __u32 lost_out; /* Lost packets */ 2637 + __u32 sacked_out; /* SACK'd packets */ 2638 + __u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived 2639 + * sum(delta(rcv_nxt)), or how many bytes 2640 + * were acked. 2641 + */ 2642 + __u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked 2643 + * sum(delta(snd_una)), or how many bytes 2644 + * were acked. 2627 2645 */ 2628 2646 }; 2629 2647
+4 -2
tools/testing/selftests/bpf/Makefile
··· 23 23 test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ 24 24 test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \ 25 25 test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \ 26 - test_netcnt test_tcpnotify_user 26 + test_netcnt test_tcpnotify_user test_sock_fields 27 27 28 28 BPF_OBJ_FILES = \ 29 29 test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ ··· 35 35 sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ 36 36 get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ 37 37 test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_xdp_vlan.o \ 38 - xdp_dummy.o test_map_in_map.o test_spin_lock.o test_map_lock.o 38 + xdp_dummy.o test_map_in_map.o test_spin_lock.o test_map_lock.o \ 39 + test_sock_fields_kern.o 39 40 40 41 # Objects are built with default compilation flags and with sub-register 41 42 # code-gen enabled. ··· 112 111 $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c 113 112 $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c 114 113 $(OUTPUT)/test_netcnt: cgroup_helpers.c 114 + $(OUTPUT)/test_sock_fields: cgroup_helpers.c 115 115 116 116 .PHONY: force 117 117
+4
tools/testing/selftests/bpf/bpf_helpers.h
··· 176 176 (void *) BPF_FUNC_spin_lock; 177 177 static void (*bpf_spin_unlock)(struct bpf_spin_lock *lock) = 178 178 (void *) BPF_FUNC_spin_unlock; 179 + static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) = 180 + (void *) BPF_FUNC_sk_fullsock; 181 + static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = 182 + (void *) BPF_FUNC_tcp_sock; 179 183 180 184 /* llvm builtin functions that eBPF C program may use to 181 185 * emit BPF_LD_ABS and BPF_LD_IND instructions
+9
tools/testing/selftests/bpf/bpf_util.h
··· 48 48 # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 49 49 #endif 50 50 51 + #ifndef sizeof_field 52 + #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) 53 + #endif 54 + 55 + #ifndef offsetofend 56 + #define offsetofend(TYPE, MEMBER) \ 57 + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) 58 + #endif 59 + 51 60 #endif /* __BPF_UTIL__ */
+327
tools/testing/selftests/bpf/test_sock_fields.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + 4 + #include <sys/socket.h> 5 + #include <sys/epoll.h> 6 + #include <netinet/in.h> 7 + #include <arpa/inet.h> 8 + #include <unistd.h> 9 + #include <stdlib.h> 10 + #include <string.h> 11 + #include <errno.h> 12 + 13 + #include <bpf/bpf.h> 14 + #include <bpf/libbpf.h> 15 + 16 + #include "cgroup_helpers.h" 17 + 18 + enum bpf_array_idx { 19 + SRV_IDX, 20 + CLI_IDX, 21 + __NR_BPF_ARRAY_IDX, 22 + }; 23 + 24 + #define CHECK(condition, tag, format...) ({ \ 25 + int __ret = !!(condition); \ 26 + if (__ret) { \ 27 + printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \ 28 + printf(format); \ 29 + printf("\n"); \ 30 + exit(-1); \ 31 + } \ 32 + }) 33 + 34 + #define TEST_CGROUP "/test-bpf-sock-fields" 35 + #define DATA "Hello BPF!" 36 + #define DATA_LEN sizeof(DATA) 37 + 38 + static struct sockaddr_in6 srv_sa6, cli_sa6; 39 + static int linum_map_fd; 40 + static int addr_map_fd; 41 + static int tp_map_fd; 42 + static int sk_map_fd; 43 + static __u32 srv_idx = SRV_IDX; 44 + static __u32 cli_idx = CLI_IDX; 45 + 46 + static void init_loopback6(struct sockaddr_in6 *sa6) 47 + { 48 + memset(sa6, 0, sizeof(*sa6)); 49 + sa6->sin6_family = AF_INET6; 50 + sa6->sin6_addr = in6addr_loopback; 51 + } 52 + 53 + static void print_sk(const struct bpf_sock *sk) 54 + { 55 + char src_ip4[24], dst_ip4[24]; 56 + char src_ip6[64], dst_ip6[64]; 57 + 58 + inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4)); 59 + inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6)); 60 + inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4)); 61 + inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6)); 62 + 63 + printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u " 64 + "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u " 65 + "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n", 66 + sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol, 67 + sk->mark, sk->priority, 68 + sk->src_ip4, src_ip4, 69 + sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3], 70 + src_ip6, sk->src_port, 71 + sk->dst_ip4, dst_ip4, 72 + sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3], 73 + dst_ip6, ntohs(sk->dst_port)); 74 + } 75 + 76 + static void print_tp(const struct bpf_tcp_sock *tp) 77 + { 78 + printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u " 79 + "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u " 80 + "rate_delivered:%u rate_interval_us:%u packets_out:%u " 81 + "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u " 82 + "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u " 83 + "bytes_received:%llu bytes_acked:%llu\n", 84 + tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh, 85 + tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache, 86 + tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us, 87 + tp->packets_out, tp->retrans_out, tp->total_retrans, 88 + tp->segs_in, tp->data_segs_in, tp->segs_out, 89 + tp->data_segs_out, tp->lost_out, tp->sacked_out, 90 + tp->bytes_received, tp->bytes_acked); 91 + } 92 + 93 + static void check_result(void) 94 + { 95 + struct bpf_tcp_sock srv_tp, cli_tp; 96 + struct bpf_sock srv_sk, cli_sk; 97 + __u32 linum, idx0 = 0; 98 + int err; 99 + 100 + err = bpf_map_lookup_elem(linum_map_fd, &idx0, &linum); 101 + CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", 102 + "err:%d errno:%d", err, errno); 103 + 104 + err = bpf_map_lookup_elem(sk_map_fd, &srv_idx, &srv_sk); 105 + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &srv_idx)", 106 + "err:%d errno:%d", err, errno); 107 + err = bpf_map_lookup_elem(tp_map_fd, &srv_idx, &srv_tp); 108 + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &srv_idx)", 109 + "err:%d errno:%d", err, errno); 110 + 111 + err = bpf_map_lookup_elem(sk_map_fd, &cli_idx, &cli_sk); 112 + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &cli_idx)", 113 + "err:%d errno:%d", err, errno); 114 + err = bpf_map_lookup_elem(tp_map_fd, &cli_idx, &cli_tp); 115 + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &cli_idx)", 116 + "err:%d errno:%d", err, errno); 117 + 118 + printf("srv_sk: "); 119 + print_sk(&srv_sk); 120 + printf("\n"); 121 + 122 + printf("cli_sk: "); 123 + print_sk(&cli_sk); 124 + printf("\n"); 125 + 126 + printf("srv_tp: "); 127 + print_tp(&srv_tp); 128 + printf("\n"); 129 + 130 + printf("cli_tp: "); 131 + print_tp(&cli_tp); 132 + printf("\n"); 133 + 134 + CHECK(srv_sk.state == 10 || 135 + !srv_sk.state || 136 + srv_sk.family != AF_INET6 || 137 + srv_sk.protocol != IPPROTO_TCP || 138 + memcmp(srv_sk.src_ip6, &in6addr_loopback, 139 + sizeof(srv_sk.src_ip6)) || 140 + memcmp(srv_sk.dst_ip6, &in6addr_loopback, 141 + sizeof(srv_sk.dst_ip6)) || 142 + srv_sk.src_port != ntohs(srv_sa6.sin6_port) || 143 + srv_sk.dst_port != cli_sa6.sin6_port, 144 + "Unexpected srv_sk", "Check srv_sk output. linum:%u", linum); 145 + 146 + CHECK(cli_sk.state == 10 || 147 + !cli_sk.state || 148 + cli_sk.family != AF_INET6 || 149 + cli_sk.protocol != IPPROTO_TCP || 150 + memcmp(cli_sk.src_ip6, &in6addr_loopback, 151 + sizeof(cli_sk.src_ip6)) || 152 + memcmp(cli_sk.dst_ip6, &in6addr_loopback, 153 + sizeof(cli_sk.dst_ip6)) || 154 + cli_sk.src_port != ntohs(cli_sa6.sin6_port) || 155 + cli_sk.dst_port != srv_sa6.sin6_port, 156 + "Unexpected cli_sk", "Check cli_sk output. linum:%u", linum); 157 + 158 + CHECK(srv_tp.data_segs_out != 1 || 159 + srv_tp.data_segs_in || 160 + srv_tp.snd_cwnd != 10 || 161 + srv_tp.total_retrans || 162 + srv_tp.bytes_acked != DATA_LEN, 163 + "Unexpected srv_tp", "Check srv_tp output. linum:%u", linum); 164 + 165 + CHECK(cli_tp.data_segs_out || 166 + cli_tp.data_segs_in != 1 || 167 + cli_tp.snd_cwnd != 10 || 168 + cli_tp.total_retrans || 169 + cli_tp.bytes_received != DATA_LEN, 170 + "Unexpected cli_tp", "Check cli_tp output. linum:%u", linum); 171 + } 172 + 173 + static void test(void) 174 + { 175 + int listen_fd, cli_fd, accept_fd, epfd, err; 176 + struct epoll_event ev; 177 + socklen_t addrlen; 178 + 179 + addrlen = sizeof(struct sockaddr_in6); 180 + ev.events = EPOLLIN; 181 + 182 + epfd = epoll_create(1); 183 + CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno); 184 + 185 + /* Prepare listen_fd */ 186 + listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); 187 + CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d", 188 + listen_fd, errno); 189 + 190 + init_loopback6(&srv_sa6); 191 + err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6)); 192 + CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno); 193 + 194 + err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); 195 + CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno); 196 + 197 + err = listen(listen_fd, 1); 198 + CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno); 199 + 200 + /* Prepare cli_fd */ 201 + cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); 202 + CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno); 203 + 204 + init_loopback6(&cli_sa6); 205 + err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6)); 206 + CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno); 207 + 208 + err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen); 209 + CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d", 210 + err, errno); 211 + 212 + /* Update addr_map with srv_sa6 and cli_sa6 */ 213 + err = bpf_map_update_elem(addr_map_fd, &srv_idx, &srv_sa6, 0); 214 + CHECK(err, "map_update", "err:%d errno:%d", err, errno); 215 + 216 + err = bpf_map_update_elem(addr_map_fd, &cli_idx, &cli_sa6, 0); 217 + CHECK(err, "map_update", "err:%d errno:%d", err, errno); 218 + 219 + /* Connect from cli_sa6 to srv_sa6 */ 220 + err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen); 221 + printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n", 222 + ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port)); 223 + CHECK(err && errno != EINPROGRESS, 224 + "connect(cli_fd)", "err:%d errno:%d", err, errno); 225 + 226 + ev.data.fd = listen_fd; 227 + err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev); 228 + CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d", 229 + err, errno); 230 + 231 + /* Accept the connection */ 232 + /* Have some timeout in accept(listen_fd). Just in case. */ 233 + err = epoll_wait(epfd, &ev, 1, 1000); 234 + CHECK(err != 1 || ev.data.fd != listen_fd, 235 + "epoll_wait(listen_fd)", 236 + "err:%d errno:%d ev.data.fd:%d listen_fd:%d", 237 + err, errno, ev.data.fd, listen_fd); 238 + 239 + accept_fd = accept(listen_fd, NULL, NULL); 240 + CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d", 241 + accept_fd, errno); 242 + close(listen_fd); 243 + 244 + /* Send some data from accept_fd to cli_fd */ 245 + err = send(accept_fd, DATA, DATA_LEN, 0); 246 + CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", 247 + err, errno); 248 + 249 + /* Have some timeout in recv(cli_fd). Just in case. */ 250 + ev.data.fd = cli_fd; 251 + err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev); 252 + CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d", 253 + err, errno); 254 + 255 + err = epoll_wait(epfd, &ev, 1, 1000); 256 + CHECK(err != 1 || ev.data.fd != cli_fd, 257 + "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", 258 + err, errno, ev.data.fd, cli_fd); 259 + 260 + err = recv(cli_fd, NULL, 0, MSG_TRUNC); 261 + CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); 262 + 263 + close(epfd); 264 + close(accept_fd); 265 + close(cli_fd); 266 + 267 + check_result(); 268 + } 269 + 270 + int main(int argc, char **argv) 271 + { 272 + struct bpf_prog_load_attr attr = { 273 + .file = "test_sock_fields_kern.o", 274 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 275 + .expected_attach_type = BPF_CGROUP_INET_EGRESS, 276 + }; 277 + int cgroup_fd, prog_fd, err; 278 + struct bpf_object *obj; 279 + struct bpf_map *map; 280 + 281 + err = setup_cgroup_environment(); 282 + CHECK(err, "setup_cgroup_environment()", "err:%d errno:%d", 283 + err, errno); 284 + 285 + atexit(cleanup_cgroup_environment); 286 + 287 + /* Create a cgroup, get fd, and join it */ 288 + cgroup_fd = create_and_get_cgroup(TEST_CGROUP); 289 + CHECK(cgroup_fd == -1, "create_and_get_cgroup()", 290 + "cgroup_fd:%d errno:%d", cgroup_fd, errno); 291 + 292 + err = join_cgroup(TEST_CGROUP); 293 + CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno); 294 + 295 + err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); 296 + CHECK(err, "bpf_prog_load_xattr()", "err:%d", err); 297 + 298 + err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0); 299 + CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)", 300 + "err:%d errno%d", err, errno); 301 + close(cgroup_fd); 302 + 303 + map = bpf_object__find_map_by_name(obj, "addr_map"); 304 + CHECK(!map, "cannot find addr_map", "(null)"); 305 + addr_map_fd = bpf_map__fd(map); 306 + 307 + map = bpf_object__find_map_by_name(obj, "sock_result_map"); 308 + CHECK(!map, "cannot find sock_result_map", "(null)"); 309 + sk_map_fd = bpf_map__fd(map); 310 + 311 + map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map"); 312 + CHECK(!map, "cannot find tcp_sock_result_map", "(null)"); 313 + tp_map_fd = bpf_map__fd(map); 314 + 315 + map = bpf_object__find_map_by_name(obj, "linum_map"); 316 + CHECK(!map, "cannot find linum_map", "(null)"); 317 + linum_map_fd = bpf_map__fd(map); 318 + 319 + test(); 320 + 321 + bpf_object__close(obj); 322 + cleanup_cgroup_environment(); 323 + 324 + printf("PASS\n"); 325 + 326 + return 0; 327 + }
+152
tools/testing/selftests/bpf/test_sock_fields_kern.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + 4 + #include <linux/bpf.h> 5 + #include <netinet/in.h> 6 + #include <stdbool.h> 7 + 8 + #include "bpf_helpers.h" 9 + #include "bpf_endian.h" 10 + 11 + enum bpf_array_idx { 12 + SRV_IDX, 13 + CLI_IDX, 14 + __NR_BPF_ARRAY_IDX, 15 + }; 16 + 17 + struct bpf_map_def SEC("maps") addr_map = { 18 + .type = BPF_MAP_TYPE_ARRAY, 19 + .key_size = sizeof(__u32), 20 + .value_size = sizeof(struct sockaddr_in6), 21 + .max_entries = __NR_BPF_ARRAY_IDX, 22 + }; 23 + 24 + struct bpf_map_def SEC("maps") sock_result_map = { 25 + .type = BPF_MAP_TYPE_ARRAY, 26 + .key_size = sizeof(__u32), 27 + .value_size = sizeof(struct bpf_sock), 28 + .max_entries = __NR_BPF_ARRAY_IDX, 29 + }; 30 + 31 + struct bpf_map_def SEC("maps") tcp_sock_result_map = { 32 + .type = BPF_MAP_TYPE_ARRAY, 33 + .key_size = sizeof(__u32), 34 + .value_size = sizeof(struct bpf_tcp_sock), 35 + .max_entries = __NR_BPF_ARRAY_IDX, 36 + }; 37 + 38 + struct bpf_map_def SEC("maps") linum_map = { 39 + .type = BPF_MAP_TYPE_ARRAY, 40 + .key_size = sizeof(__u32), 41 + .value_size = sizeof(__u32), 42 + .max_entries = 1, 43 + }; 44 + 45 + static bool is_loopback6(__u32 *a6) 46 + { 47 + return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); 48 + } 49 + 50 + static void skcpy(struct bpf_sock *dst, 51 + const struct bpf_sock *src) 52 + { 53 + dst->bound_dev_if = src->bound_dev_if; 54 + dst->family = src->family; 55 + dst->type = src->type; 56 + dst->protocol = src->protocol; 57 + dst->mark = src->mark; 58 + dst->priority = src->priority; 59 + dst->src_ip4 = src->src_ip4; 60 + dst->src_ip6[0] = src->src_ip6[0]; 61 + dst->src_ip6[1] = src->src_ip6[1]; 62 + dst->src_ip6[2] = src->src_ip6[2]; 63 + dst->src_ip6[3] = src->src_ip6[3]; 64 + dst->src_port = src->src_port; 65 + dst->dst_ip4 = src->dst_ip4; 66 + dst->dst_ip6[0] = src->dst_ip6[0]; 67 + dst->dst_ip6[1] = src->dst_ip6[1]; 68 + dst->dst_ip6[2] = src->dst_ip6[2]; 69 + dst->dst_ip6[3] = src->dst_ip6[3]; 70 + dst->dst_port = src->dst_port; 71 + dst->state = src->state; 72 + } 73 + 74 + static void tpcpy(struct bpf_tcp_sock *dst, 75 + const struct bpf_tcp_sock *src) 76 + { 77 + dst->snd_cwnd = src->snd_cwnd; 78 + dst->srtt_us = src->srtt_us; 79 + dst->rtt_min = src->rtt_min; 80 + dst->snd_ssthresh = src->snd_ssthresh; 81 + dst->rcv_nxt = src->rcv_nxt; 82 + dst->snd_nxt = src->snd_nxt; 83 + dst->snd_una = src->snd_una; 84 + dst->mss_cache = src->mss_cache; 85 + dst->ecn_flags = src->ecn_flags; 86 + dst->rate_delivered = src->rate_delivered; 87 + dst->rate_interval_us = src->rate_interval_us; 88 + dst->packets_out = src->packets_out; 89 + dst->retrans_out = src->retrans_out; 90 + dst->total_retrans = src->total_retrans; 91 + dst->segs_in = src->segs_in; 92 + dst->data_segs_in = src->data_segs_in; 93 + dst->segs_out = src->segs_out; 94 + dst->data_segs_out = src->data_segs_out; 95 + dst->lost_out = src->lost_out; 96 + dst->sacked_out = src->sacked_out; 97 + dst->bytes_received = src->bytes_received; 98 + dst->bytes_acked = src->bytes_acked; 99 + } 100 + 101 + #define RETURN { \ 102 + linum = __LINE__; \ 103 + bpf_map_update_elem(&linum_map, &idx0, &linum, 0); \ 104 + return 1; \ 105 + } 106 + 107 + SEC("cgroup_skb/egress") 108 + int read_sock_fields(struct __sk_buff *skb) 109 + { 110 + __u32 srv_idx = SRV_IDX, cli_idx = CLI_IDX, idx; 111 + struct sockaddr_in6 *srv_sa6, *cli_sa6; 112 + struct bpf_tcp_sock *tp, *tp_ret; 113 + struct bpf_sock *sk, *sk_ret; 114 + __u32 linum, idx0 = 0; 115 + 116 + sk = skb->sk; 117 + if (!sk || sk->state == 10) 118 + RETURN; 119 + 120 + sk = bpf_sk_fullsock(sk); 121 + if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP || 122 + !is_loopback6(sk->src_ip6)) 123 + RETURN; 124 + 125 + tp = bpf_tcp_sock(sk); 126 + if (!tp) 127 + RETURN; 128 + 129 + srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx); 130 + cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx); 131 + if (!srv_sa6 || !cli_sa6) 132 + RETURN; 133 + 134 + if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port)) 135 + idx = srv_idx; 136 + else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port)) 137 + idx = cli_idx; 138 + else 139 + RETURN; 140 + 141 + sk_ret = bpf_map_lookup_elem(&sock_result_map, &idx); 142 + tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &idx); 143 + if (!sk_ret || !tp_ret) 144 + RETURN; 145 + 146 + skcpy(sk_ret, sk); 147 + tpcpy(tp_ret, tp); 148 + 149 + RETURN; 150 + } 151 + 152 + char _license[] SEC("license") = "GPL";
+2 -2
tools/testing/selftests/bpf/verifier/ref_tracking.c
··· 547 547 BPF_EXIT_INSN(), 548 548 }, 549 549 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 550 - .errstr = "cannot write into socket", 550 + .errstr = "cannot write into sock", 551 551 .result = REJECT, 552 552 }, 553 553 { ··· 562 562 BPF_EXIT_INSN(), 563 563 }, 564 564 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 565 - .errstr = "invalid bpf_sock access off=0 size=8", 565 + .errstr = "invalid sock access off=0 size=8", 566 566 .result = REJECT, 567 567 }, 568 568 {
+384
tools/testing/selftests/bpf/verifier/sock.c
··· 1 + { 2 + "skb->sk: no NULL check", 3 + .insns = { 4 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 5 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), 6 + BPF_MOV64_IMM(BPF_REG_0, 0), 7 + BPF_EXIT_INSN(), 8 + }, 9 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 10 + .result = REJECT, 11 + .errstr = "invalid mem access 'sock_common_or_null'", 12 + }, 13 + { 14 + "skb->sk: sk->family [non fullsock field]", 15 + .insns = { 16 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 17 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 18 + BPF_MOV64_IMM(BPF_REG_0, 0), 19 + BPF_EXIT_INSN(), 20 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sock, family)), 21 + BPF_MOV64_IMM(BPF_REG_0, 0), 22 + BPF_EXIT_INSN(), 23 + }, 24 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 25 + .result = ACCEPT, 26 + }, 27 + { 28 + "skb->sk: sk->type [fullsock field]", 29 + .insns = { 30 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 31 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 32 + BPF_MOV64_IMM(BPF_REG_0, 0), 33 + BPF_EXIT_INSN(), 34 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sock, type)), 35 + BPF_MOV64_IMM(BPF_REG_0, 0), 36 + BPF_EXIT_INSN(), 37 + }, 38 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 39 + .result = REJECT, 40 + .errstr = "invalid sock_common access", 41 + }, 42 + { 43 + "bpf_sk_fullsock(skb->sk): no !skb->sk check", 44 + .insns = { 45 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 46 + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), 47 + BPF_MOV64_IMM(BPF_REG_0, 0), 48 + BPF_EXIT_INSN(), 49 + }, 50 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 51 + .result = REJECT, 52 + .errstr = "type=sock_common_or_null expected=sock_common", 53 + }, 54 + { 55 + "sk_fullsock(skb->sk): no NULL check on ret", 56 + .insns = { 57 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 58 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 59 + BPF_MOV64_IMM(BPF_REG_0, 0), 60 + BPF_EXIT_INSN(), 61 + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), 62 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)), 63 + BPF_MOV64_IMM(BPF_REG_0, 0), 64 + BPF_EXIT_INSN(), 65 + }, 66 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 67 + .result = REJECT, 68 + .errstr = "invalid mem access 'sock_or_null'", 69 + }, 70 + { 71 + "sk_fullsock(skb->sk): sk->type [fullsock field]", 72 + .insns = { 73 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 74 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 75 + BPF_MOV64_IMM(BPF_REG_0, 0), 76 + BPF_EXIT_INSN(), 77 + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), 78 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), 79 + BPF_MOV64_IMM(BPF_REG_0, 0), 80 + BPF_EXIT_INSN(), 81 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)), 82 + BPF_MOV64_IMM(BPF_REG_0, 0), 83 + BPF_EXIT_INSN(), 84 + }, 85 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 86 + .result = ACCEPT, 87 + }, 88 + { 89 + "sk_fullsock(skb->sk): sk->family [non fullsock field]", 90 + .insns = { 91 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 92 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 93 + BPF_MOV64_IMM(BPF_REG_0, 0), 94 + BPF_EXIT_INSN(), 95 + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), 96 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 97 + BPF_EXIT_INSN(), 98 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, family)), 99 + BPF_MOV64_IMM(BPF_REG_0, 0), 100 + BPF_EXIT_INSN(), 101 + }, 102 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 103 + .result = ACCEPT, 104 + }, 105 + { 106 + "sk_fullsock(skb->sk): sk->state [narrow load]", 107 + .insns = { 108 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 109 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 110 + BPF_MOV64_IMM(BPF_REG_0, 0), 111 + BPF_EXIT_INSN(), 112 + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), 113 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), 114 + BPF_MOV64_IMM(BPF_REG_0, 0), 115 + BPF_EXIT_INSN(), 116 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, state)), 117 + BPF_MOV64_IMM(BPF_REG_0, 0), 118 + BPF_EXIT_INSN(), 119 + }, 120 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 121 + .result = ACCEPT, 122 + }, 123 + { 124 + "sk_fullsock(skb->sk): sk->dst_port [narrow load]", 125 + .insns = { 126 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 127 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 128 + BPF_MOV64_IMM(BPF_REG_0, 0), 129 + BPF_EXIT_INSN(), 130 + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), 131 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), 132 + BPF_MOV64_IMM(BPF_REG_0, 0), 133 + BPF_EXIT_INSN(), 134 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)), 135 + BPF_MOV64_IMM(BPF_REG_0, 0), 136 + BPF_EXIT_INSN(), 137 + }, 138 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 139 + .result = ACCEPT, 140 + }, 141 + { 142 + "sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]", 143 + .insns = { 144 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 145 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 146 + BPF_MOV64_IMM(BPF_REG_0, 0), 147 + BPF_EXIT_INSN(), 148 + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), 149 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), 150 + BPF_MOV64_IMM(BPF_REG_0, 0), 151 + BPF_EXIT_INSN(), 152 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1), 153 + BPF_MOV64_IMM(BPF_REG_0, 0), 154 + BPF_EXIT_INSN(), 155 + }, 156 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 157 + .result = REJECT, 158 + .errstr = "invalid sock access", 159 + }, 160 + { 161 + "sk_fullsock(skb->sk): sk->dst_ip6 [load 2nd byte]", 162 + .insns = { 163 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 164 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 165 + BPF_MOV64_IMM(BPF_REG_0, 0), 166 + BPF_EXIT_INSN(), 167 + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), 168 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), 169 + BPF_MOV64_IMM(BPF_REG_0, 0), 170 + BPF_EXIT_INSN(), 171 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_ip6[0]) + 1), 172 + BPF_MOV64_IMM(BPF_REG_0, 0), 173 + BPF_EXIT_INSN(), 174 + }, 175 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 176 + .result = ACCEPT, 177 + }, 178 + { 179 + "sk_fullsock(skb->sk): sk->type [narrow load]", 180 + .insns = { 181 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 182 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 183 + BPF_MOV64_IMM(BPF_REG_0, 0), 184 + BPF_EXIT_INSN(), 185 + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), 186 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), 187 + BPF_MOV64_IMM(BPF_REG_0, 0), 188 + BPF_EXIT_INSN(), 189 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)), 190 + BPF_MOV64_IMM(BPF_REG_0, 0), 191 + BPF_EXIT_INSN(), 192 + }, 193 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 194 + .result = ACCEPT, 195 + }, 196 + { 197 + "sk_fullsock(skb->sk): sk->protocol [narrow load]", 198 + .insns = { 199 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 200 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 201 + BPF_MOV64_IMM(BPF_REG_0, 0), 202 + BPF_EXIT_INSN(), 203 + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), 204 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), 205 + BPF_MOV64_IMM(BPF_REG_0, 0), 206 + BPF_EXIT_INSN(), 207 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, protocol)), 208 + BPF_MOV64_IMM(BPF_REG_0, 0), 209 + BPF_EXIT_INSN(), 210 + }, 211 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 212 + .result = ACCEPT, 213 + }, 214 + { 215 + "sk_fullsock(skb->sk): beyond last field", 216 + .insns = { 217 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 218 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 219 + BPF_MOV64_IMM(BPF_REG_0, 0), 220 + BPF_EXIT_INSN(), 221 + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), 222 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), 223 + BPF_MOV64_IMM(BPF_REG_0, 0), 224 + BPF_EXIT_INSN(), 225 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, state)), 226 + BPF_MOV64_IMM(BPF_REG_0, 0), 227 + BPF_EXIT_INSN(), 228 + }, 229 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 230 + .result = REJECT, 231 + .errstr = "invalid sock access", 232 + }, 233 + { 234 + "bpf_tcp_sock(skb->sk): no !skb->sk check", 235 + .insns = { 236 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 237 + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), 238 + BPF_MOV64_IMM(BPF_REG_0, 0), 239 + BPF_EXIT_INSN(), 240 + }, 241 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 242 + .result = REJECT, 243 + .errstr = "type=sock_common_or_null expected=sock_common", 244 + }, 245 + { 246 + "bpf_tcp_sock(skb->sk): no NULL check on ret", 247 + .insns = { 248 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 249 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 250 + BPF_MOV64_IMM(BPF_REG_0, 0), 251 + BPF_EXIT_INSN(), 252 + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), 253 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)), 254 + BPF_MOV64_IMM(BPF_REG_0, 0), 255 + BPF_EXIT_INSN(), 256 + }, 257 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 258 + .result = REJECT, 259 + .errstr = "invalid mem access 'tcp_sock_or_null'", 260 + }, 261 + { 262 + "bpf_tcp_sock(skb->sk): tp->snd_cwnd", 263 + .insns = { 264 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 265 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 266 + BPF_MOV64_IMM(BPF_REG_0, 0), 267 + BPF_EXIT_INSN(), 268 + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), 269 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 270 + BPF_EXIT_INSN(), 271 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)), 272 + BPF_MOV64_IMM(BPF_REG_0, 0), 273 + BPF_EXIT_INSN(), 274 + }, 275 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 276 + .result = ACCEPT, 277 + }, 278 + { 279 + "bpf_tcp_sock(skb->sk): tp->bytes_acked", 280 + .insns = { 281 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 282 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 283 + BPF_MOV64_IMM(BPF_REG_0, 0), 284 + BPF_EXIT_INSN(), 285 + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), 286 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 287 + BPF_EXIT_INSN(), 288 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, bytes_acked)), 289 + BPF_MOV64_IMM(BPF_REG_0, 0), 290 + BPF_EXIT_INSN(), 291 + }, 292 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 293 + .result = ACCEPT, 294 + }, 295 + { 296 + "bpf_tcp_sock(skb->sk): beyond last field", 297 + .insns = { 298 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 299 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 300 + BPF_MOV64_IMM(BPF_REG_0, 0), 301 + BPF_EXIT_INSN(), 302 + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), 303 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 304 + BPF_EXIT_INSN(), 305 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_tcp_sock, bytes_acked)), 306 + BPF_MOV64_IMM(BPF_REG_0, 0), 307 + BPF_EXIT_INSN(), 308 + }, 309 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 310 + .result = REJECT, 311 + .errstr = "invalid tcp_sock access", 312 + }, 313 + { 314 + "bpf_tcp_sock(bpf_sk_fullsock(skb->sk)): tp->snd_cwnd", 315 + .insns = { 316 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 317 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 318 + BPF_MOV64_IMM(BPF_REG_0, 0), 319 + BPF_EXIT_INSN(), 320 + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), 321 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 322 + BPF_EXIT_INSN(), 323 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), 324 + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), 325 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 326 + BPF_EXIT_INSN(), 327 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)), 328 + BPF_MOV64_IMM(BPF_REG_0, 0), 329 + BPF_EXIT_INSN(), 330 + }, 331 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 332 + .result = ACCEPT, 333 + }, 334 + { 335 + "bpf_sk_release(skb->sk)", 336 + .insns = { 337 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 338 + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), 339 + BPF_EMIT_CALL(BPF_FUNC_sk_release), 340 + BPF_MOV64_IMM(BPF_REG_0, 0), 341 + BPF_EXIT_INSN(), 342 + }, 343 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 344 + .result = REJECT, 345 + .errstr = "type=sock_common expected=sock", 346 + }, 347 + { 348 + "bpf_sk_release(bpf_sk_fullsock(skb->sk))", 349 + .insns = { 350 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 351 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 352 + BPF_MOV64_IMM(BPF_REG_0, 0), 353 + BPF_EXIT_INSN(), 354 + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), 355 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 356 + BPF_EXIT_INSN(), 357 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), 358 + BPF_EMIT_CALL(BPF_FUNC_sk_release), 359 + BPF_MOV64_IMM(BPF_REG_0, 1), 360 + BPF_EXIT_INSN(), 361 + }, 362 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 363 + .result = REJECT, 364 + .errstr = "reference has not been acquired before", 365 + }, 366 + { 367 + "bpf_sk_release(bpf_tcp_sock(skb->sk))", 368 + .insns = { 369 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), 370 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), 371 + BPF_MOV64_IMM(BPF_REG_0, 0), 372 + BPF_EXIT_INSN(), 373 + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), 374 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 375 + BPF_EXIT_INSN(), 376 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), 377 + BPF_EMIT_CALL(BPF_FUNC_sk_release), 378 + BPF_MOV64_IMM(BPF_REG_0, 1), 379 + BPF_EXIT_INSN(), 380 + }, 381 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 382 + .result = REJECT, 383 + .errstr = "type=tcp_sock expected=sock", 384 + },
+1 -1
tools/testing/selftests/bpf/verifier/unpriv.c
··· 365 365 }, 366 366 .result = REJECT, 367 367 //.errstr = "same insn cannot be used with different pointers", 368 - .errstr = "cannot write into socket", 368 + .errstr = "cannot write into sock", 369 369 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 370 370 }, 371 371 {