Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vsock/test: Add test for null ptr deref when transport changes

Add a new test to ensure that when the transport changes a null pointer
dereference does not occur. The bug was reported upstream [1] and fixed
with commit 2cb7c756f605 ("vsock/virtio: discard packets if the
transport changes").

KASAN: null-ptr-deref in range [0x0000000000000060-0x0000000000000067]
CPU: 2 UID: 0 PID: 463 Comm: kworker/2:3 Not tainted
Workqueue: vsock-loopback vsock_loopback_work
RIP: 0010:vsock_stream_has_data+0x44/0x70
Call Trace:
virtio_transport_do_close+0x68/0x1a0
virtio_transport_recv_pkt+0x1045/0x2ae4
vsock_loopback_work+0x27d/0x3f0
process_one_work+0x846/0x1420
worker_thread+0x5b3/0xf80
kthread+0x35a/0x700
ret_from_fork+0x2d/0x70
ret_from_fork_asm+0x1a/0x30

Note that this test may not fail in a kernel without the fix, but it may
hang on the client side if it triggers a kernel oops.

This works by creating a socket, trying to connect to a server, and then
executing a second connect operation on the same socket but to a
different CID (0). This triggers a transport change. If the connect
operation is interrupted by a signal, this could cause a null-ptr-deref.

Since this bug is non-deterministic, we need to try several times. It
is reasonable to assume that the bug will show up within the timeout
period.

If there is a G2H transport loaded in the system, the bug is not
triggered and this test will always pass. This is because
`vsock_assign_transport`, when using CID 0, like in this case, sets
vsk->transport to `transport_g2h` that is not NULL if a G2H transport is
available.

[1]https://lore.kernel.org/netdev/Z2LvdTTQR7dBmPb5@v4bel-B760M-AORUS-ELITE-AX/

Suggested-by: Hyunwoo Kim <v4bel@theori.io>
Suggested-by: Michal Luczaj <mhal@rbox.co>
Signed-off-by: Luigi Leonardi <leonardi@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://patch.msgid.link/20250630-test_vsock-v5-2-2492e141e80b@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Luigi Leonardi and committed by
Jakub Kicinski
3a764d93 e84b20b2

+171
+1
tools/testing/vsock/Makefile
··· 5 5 vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o 6 6 vsock_perf: vsock_perf.o msg_zerocopy_common.o 7 7 8 + vsock_test: LDLIBS = -lpthread 8 9 vsock_uring_test: LDLIBS = -luring 9 10 vsock_uring_test: control.o util.o vsock_uring_test.o timeout.o msg_zerocopy_common.o 10 11
+170
tools/testing/vsock/vsock_test.c
··· 22 22 #include <signal.h> 23 23 #include <sys/ioctl.h> 24 24 #include <linux/time64.h> 25 + #include <pthread.h> 26 + #include <fcntl.h> 25 27 26 28 #include "vsock_test_zerocopy.h" 27 29 #include "timeout.h" ··· 1869 1867 close(fd); 1870 1868 } 1871 1869 1870 + #define TRANSPORT_CHANGE_TIMEOUT 2 /* seconds */ 1871 + 1872 + static void *test_stream_transport_change_thread(void *vargp) 1873 + { 1874 + pid_t *pid = (pid_t *)vargp; 1875 + int ret; 1876 + 1877 + ret = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); 1878 + if (ret) { 1879 + fprintf(stderr, "pthread_setcanceltype: %d\n", ret); 1880 + exit(EXIT_FAILURE); 1881 + } 1882 + 1883 + while (true) { 1884 + if (kill(*pid, SIGUSR1) < 0) { 1885 + perror("kill"); 1886 + exit(EXIT_FAILURE); 1887 + } 1888 + } 1889 + return NULL; 1890 + } 1891 + 1892 + static void test_transport_change_signal_handler(int signal) 1893 + { 1894 + /* We need a custom handler for SIGUSR1 as the default one terminates the process. */ 1895 + } 1896 + 1897 + static void test_stream_transport_change_client(const struct test_opts *opts) 1898 + { 1899 + __sighandler_t old_handler; 1900 + pid_t pid = getpid(); 1901 + pthread_t thread_id; 1902 + time_t tout; 1903 + int ret, tr; 1904 + 1905 + tr = get_transports(); 1906 + 1907 + /* Print a warning if there is a G2H transport loaded. 1908 + * This is on a best effort basis because VMCI can be either G2H and H2G, and there is 1909 + * no easy way to understand it. 1910 + * The bug we are testing only appears when G2H transports are not loaded. 1911 + * This is because `vsock_assign_transport`, when using CID 0, assigns a G2H transport 1912 + * to vsk->transport. If none is available it is set to NULL, causing the null-ptr-deref. 1913 + */ 1914 + if (tr & TRANSPORTS_G2H) 1915 + fprintf(stderr, "G2H Transport detected. This test will not fail.\n"); 1916 + 1917 + old_handler = signal(SIGUSR1, test_transport_change_signal_handler); 1918 + if (old_handler == SIG_ERR) { 1919 + perror("signal"); 1920 + exit(EXIT_FAILURE); 1921 + } 1922 + 1923 + ret = pthread_create(&thread_id, NULL, test_stream_transport_change_thread, &pid); 1924 + if (ret) { 1925 + fprintf(stderr, "pthread_create: %d\n", ret); 1926 + exit(EXIT_FAILURE); 1927 + } 1928 + 1929 + control_expectln("LISTENING"); 1930 + 1931 + tout = current_nsec() + TRANSPORT_CHANGE_TIMEOUT * NSEC_PER_SEC; 1932 + do { 1933 + struct sockaddr_vm sa = { 1934 + .svm_family = AF_VSOCK, 1935 + .svm_cid = opts->peer_cid, 1936 + .svm_port = opts->peer_port, 1937 + }; 1938 + int s; 1939 + 1940 + s = socket(AF_VSOCK, SOCK_STREAM, 0); 1941 + if (s < 0) { 1942 + perror("socket"); 1943 + exit(EXIT_FAILURE); 1944 + } 1945 + 1946 + ret = connect(s, (struct sockaddr *)&sa, sizeof(sa)); 1947 + /* The connect can fail due to signals coming from the thread, 1948 + * or because the receiver connection queue is full. 1949 + * Ignoring also the latter case because there is no way 1950 + * of synchronizing client's connect and server's accept when 1951 + * connect(s) are constantly being interrupted by signals. 1952 + */ 1953 + if (ret == -1 && (errno != EINTR && errno != ECONNRESET)) { 1954 + perror("connect"); 1955 + exit(EXIT_FAILURE); 1956 + } 1957 + 1958 + /* Set CID to 0 cause a transport change. */ 1959 + sa.svm_cid = 0; 1960 + 1961 + /* Ignore return value since it can fail or not. 1962 + * If the previous connect is interrupted while the 1963 + * connection request is already sent, the second 1964 + * connect() will wait for the response. 1965 + */ 1966 + connect(s, (struct sockaddr *)&sa, sizeof(sa)); 1967 + 1968 + close(s); 1969 + 1970 + control_writeulong(CONTROL_CONTINUE); 1971 + 1972 + } while (current_nsec() < tout); 1973 + 1974 + control_writeulong(CONTROL_DONE); 1975 + 1976 + ret = pthread_cancel(thread_id); 1977 + if (ret) { 1978 + fprintf(stderr, "pthread_cancel: %d\n", ret); 1979 + exit(EXIT_FAILURE); 1980 + } 1981 + 1982 + ret = pthread_join(thread_id, NULL); 1983 + if (ret) { 1984 + fprintf(stderr, "pthread_join: %d\n", ret); 1985 + exit(EXIT_FAILURE); 1986 + } 1987 + 1988 + if (signal(SIGUSR1, old_handler) == SIG_ERR) { 1989 + perror("signal"); 1990 + exit(EXIT_FAILURE); 1991 + } 1992 + } 1993 + 1994 + static void test_stream_transport_change_server(const struct test_opts *opts) 1995 + { 1996 + int s = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port); 1997 + 1998 + /* Set the socket to be nonblocking because connects that have been interrupted 1999 + * (EINTR) can fill the receiver's accept queue anyway, leading to connect failure. 2000 + * As of today (6.15) in such situation there is no way to understand, from the 2001 + * client side, if the connection has been queued in the server or not. 2002 + */ 2003 + if (fcntl(s, F_SETFL, fcntl(s, F_GETFL, 0) | O_NONBLOCK) < 0) { 2004 + perror("fcntl"); 2005 + exit(EXIT_FAILURE); 2006 + } 2007 + control_writeln("LISTENING"); 2008 + 2009 + while (control_readulong() == CONTROL_CONTINUE) { 2010 + /* Must accept the connection, otherwise the `listen` 2011 + * queue will fill up and new connections will fail. 2012 + * There can be more than one queued connection, 2013 + * clear them all. 2014 + */ 2015 + while (true) { 2016 + int client = accept(s, NULL, NULL); 2017 + 2018 + if (client < 0) { 2019 + if (errno == EAGAIN) 2020 + break; 2021 + 2022 + perror("accept"); 2023 + exit(EXIT_FAILURE); 2024 + } 2025 + 2026 + close(client); 2027 + } 2028 + } 2029 + 2030 + close(s); 2031 + } 2032 + 1872 2033 static void test_stream_linger_client(const struct test_opts *opts) 1873 2034 { 1874 2035 int fd; ··· 2270 2105 .name = "SOCK_STREAM SO_LINGER close() on unread", 2271 2106 .run_client = test_stream_nolinger_client, 2272 2107 .run_server = test_stream_nolinger_server, 2108 + }, 2109 + { 2110 + .name = "SOCK_STREAM transport change null-ptr-deref", 2111 + .run_client = test_stream_transport_change_client, 2112 + .run_server = test_stream_transport_change_server, 2273 2113 }, 2274 2114 {}, 2275 2115 };