Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'ktls-sockmap-fix-missing-uncharge-operation-and-add-selfttest'

Jiayuan Chen says:

====================
ktls, sockmap: Fix missing uncharge operation and add selfttest

Cong reported a warning when running ./test_sockmp:
https://lore.kernel.org/bpf/aAmIi0vlycHtbXeb@pop-os.localdomain/T/#t

------------[ cut here ]------------
WARNING: CPU: 1 PID: 40 at net/ipv4/af_inet.c inet_sock_destruct+0x173/0x1d5
Tainted: [W]=WARN
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014
Workqueue: events sk_psock_destroy
RIP: 0010:inet_sock_destruct+0x173/0x1d5
RSP: 0018:ffff8880085cfc18 EFLAGS: 00010202
RAX: 1ffff11003dbfc00 RBX: ffff88801edfe3e8 RCX: ffffffff822f5af4
RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff88801edfe16c
RBP: ffff88801edfe184 R08: ffffed1003dbfc31 R09: 0000000000000000
R10: ffffffff822f5ab7 R11: ffff88801edfe187 R12: ffff88801edfdec0
R13: ffff888020376ac0 R14: ffff888020376ac0 R15: ffff888020376a60
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000556365155830 CR3: 000000001d6aa000 CR4: 0000000000350ef0
Call Trace:
<TASK>
__sk_destruct+0x46/0x222
sk_psock_destroy+0x22f/0x242
process_one_work+0x504/0x8a8
? process_one_work+0x39d/0x8a8
? __pfx_process_one_work+0x10/0x10
? worker_thread+0x44/0x2ae
? __list_add_valid_or_report+0x83/0xea
? srso_return_thunk+0x5/0x5f
? __list_add+0x45/0x52
process_scheduled_works+0x73/0x82
worker_thread+0x1ce/0x2ae

When we specify apply_bytes, we divide the msg into multiple segments,
each with a length of 'send', and every time we send this part of the data
using tcp_bpf_sendmsg_redir(), we use sk_msg_return_zero() to uncharge the
memory of the specified 'send' size.

However, if the first segment of data fails to send, for example, the
peer's buffer is full, we need to release all of the msg. When releasing
the msg, we haven't uncharged the memory of the subsequent segments.

This modification does not make significant logical changes, but only
fills in the missing uncharge places.

This issue has existed all along, until it was exposed after we added the
apply test in test_sockmap:

commit 3448ad23b34e ("selftests/bpf: Add apply_bytes test to test_txmsg_redir_wait_sndmem in test_sockmap")
====================

Link: https://patch.msgid.link/20250425060015.6968-1-jiayuan.chen@linux.dev
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>

+93
+7
net/tls/tls_sw.c
··· 908 908 &msg_redir, send, flags); 909 909 lock_sock(sk); 910 910 if (err < 0) { 911 + /* Regardless of whether the data represented by 912 + * msg_redir is sent successfully, we have already 913 + * uncharged it via sk_msg_return_zero(). The 914 + * msg->sg.size represents the remaining unprocessed 915 + * data, which needs to be uncharged here. 916 + */ 917 + sk_mem_uncharge(sk, msg->sg.size); 911 918 *copied -= sk_msg_free_nocharge(sk, &msg_redir); 912 919 msg->sg.size = 0; 913 920 }
+76
tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
··· 240 240 test_sockmap_ktls__destroy(skel); 241 241 } 242 242 243 + static void test_sockmap_ktls_tx_no_buf(int family, int sotype, bool push) 244 + { 245 + int c = -1, p = -1, one = 1, two = 2; 246 + struct test_sockmap_ktls *skel; 247 + unsigned char *data = NULL; 248 + struct msghdr msg = {0}; 249 + struct iovec iov[2]; 250 + int prog_fd, map_fd; 251 + int txrx_buf = 1024; 252 + int iov_length = 8192; 253 + int err; 254 + 255 + skel = test_sockmap_ktls__open_and_load(); 256 + if (!ASSERT_TRUE(skel, "open ktls skel")) 257 + return; 258 + 259 + err = create_pair(family, sotype, &c, &p); 260 + if (!ASSERT_OK(err, "create_pair()")) 261 + goto out; 262 + 263 + err = setsockopt(c, SOL_SOCKET, SO_RCVBUFFORCE, &txrx_buf, sizeof(int)); 264 + err |= setsockopt(p, SOL_SOCKET, SO_SNDBUFFORCE, &txrx_buf, sizeof(int)); 265 + if (!ASSERT_OK(err, "set buf limit")) 266 + goto out; 267 + 268 + prog_fd = bpf_program__fd(skel->progs.prog_sk_policy_redir); 269 + map_fd = bpf_map__fd(skel->maps.sock_map); 270 + 271 + err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0); 272 + if (!ASSERT_OK(err, "bpf_prog_attach sk msg")) 273 + goto out; 274 + 275 + err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST); 276 + if (!ASSERT_OK(err, "bpf_map_update_elem(c)")) 277 + goto out; 278 + 279 + err = bpf_map_update_elem(map_fd, &two, &p, BPF_NOEXIST); 280 + if (!ASSERT_OK(err, "bpf_map_update_elem(p)")) 281 + goto out; 282 + 283 + skel->bss->apply_bytes = 1024; 284 + 285 + err = init_ktls_pairs(c, p); 286 + if (!ASSERT_OK(err, "init_ktls_pairs(c, p)")) 287 + goto out; 288 + 289 + data = calloc(iov_length, sizeof(char)); 290 + if (!data) 291 + goto out; 292 + 293 + iov[0].iov_base = data; 294 + iov[0].iov_len = iov_length; 295 + iov[1].iov_base = data; 296 + iov[1].iov_len = iov_length; 297 + msg.msg_iov = iov; 298 + msg.msg_iovlen = 2; 299 + 300 + for (;;) { 301 + err = sendmsg(c, &msg, MSG_DONTWAIT); 302 + if (err <= 0) 303 + break; 304 + } 305 + 306 + out: 307 + if (data) 308 + free(data); 309 + if (c != -1) 310 + close(c); 311 + if (p != -1) 312 + close(p); 313 + 314 + test_sockmap_ktls__destroy(skel); 315 + } 316 + 243 317 static void run_tests(int family, enum bpf_map_type map_type) 244 318 { 245 319 int map; ··· 336 262 test_sockmap_ktls_tx_cork(family, sotype, false); 337 263 if (test__start_subtest("tls tx cork with push")) 338 264 test_sockmap_ktls_tx_cork(family, sotype, true); 265 + if (test__start_subtest("tls tx egress with no buf")) 266 + test_sockmap_ktls_tx_no_buf(family, sotype, true); 339 267 } 340 268 341 269 void test_sockmap_ktls(void)
+10
tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
··· 6 6 int cork_byte; 7 7 int push_start; 8 8 int push_end; 9 + int apply_bytes; 9 10 10 11 struct { 11 12 __uint(type, BPF_MAP_TYPE_SOCKMAP); ··· 24 23 bpf_msg_push_data(msg, push_start, push_end, 0); 25 24 26 25 return SK_PASS; 26 + } 27 + 28 + SEC("sk_msg") 29 + int prog_sk_policy_redir(struct sk_msg_md *msg) 30 + { 31 + int two = 2; 32 + 33 + bpf_msg_apply_bytes(msg, apply_bytes); 34 + return bpf_msg_redirect_map(msg, &sock_map, two, 0); 27 35 }