Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

af_unix: Introduce SO_INQ.

We have an application that uses almost the same code for TCP and
AF_UNIX (SOCK_STREAM).

TCP can use TCP_INQ, but AF_UNIX doesn't have it and requires an
extra syscall, ioctl(SIOCINQ) or getsockopt(SO_MEMINFO) as an
alternative.

Let's introduce the generic version of TCP_INQ.

If SO_INQ is enabled, recvmsg() will put a cmsg of SCM_INQ that
contains the exact value of ioctl(SIOCINQ). The cmsg is also
included when msg->msg_get_inq is non-zero to make sockets
io_uring-friendly.

Note that SOCK_CUSTOM_SOCKOPT is flagged only for SOCK_STREAM to
override setsockopt() for SOL_SOCKET.

By having the flag in struct unix_sock, instead of struct sock, we
can later add SO_INQ support for TCP and reuse tcp_sk(sk)->recvmsg_inq.

Note also that supporting custom getsockopt() for SOL_SOCKET will need
preparation for other SOCK_CUSTOM_SOCKOPT users (UDP, vsock, MPTCP).

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20250702223606.1054680-7-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Kuniyuki Iwashima and committed by
Jakub Kicinski
df30285b 8b77338e

+76 -2
+3
arch/alpha/include/uapi/asm/socket.h
··· 152 152 153 153 #define SO_PASSRIGHTS 83 154 154 155 + #define SO_INQ 84 156 + #define SCM_INQ SO_INQ 157 + 155 158 #if !defined(__KERNEL__) 156 159 157 160 #if __BITS_PER_LONG == 64
+3
arch/mips/include/uapi/asm/socket.h
··· 163 163 164 164 #define SO_PASSRIGHTS 83 165 165 166 + #define SO_INQ 84 167 + #define SCM_INQ SO_INQ 168 + 166 169 #if !defined(__KERNEL__) 167 170 168 171 #if __BITS_PER_LONG == 64
+3
arch/parisc/include/uapi/asm/socket.h
··· 144 144 145 145 #define SO_PASSRIGHTS 0x4051 146 146 147 + #define SO_INQ 0x4052 148 + #define SCM_INQ SO_INQ 149 + 147 150 #if !defined(__KERNEL__) 148 151 149 152 #if __BITS_PER_LONG == 64
+3
arch/sparc/include/uapi/asm/socket.h
··· 145 145 146 146 #define SO_PASSRIGHTS 0x005c 147 147 148 + #define SO_INQ 0x005d 149 + #define SCM_INQ SO_INQ 150 + 148 151 #if !defined(__KERNEL__) 149 152 150 153
+1
include/net/af_unix.h
··· 48 48 wait_queue_entry_t peer_wake; 49 49 struct scm_stat scm_stat; 50 50 int inq_len; 51 + bool recvmsg_inq; 51 52 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 52 53 struct sk_buff *oob_skb; 53 54 #endif
+3
include/uapi/asm-generic/socket.h
··· 147 147 148 148 #define SO_PASSRIGHTS 83 149 149 150 + #define SO_INQ 84 151 + #define SCM_INQ SO_INQ 152 + 150 153 #if !defined(__KERNEL__) 151 154 152 155 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
+60 -2
net/unix/af_unix.c
··· 934 934 #define unix_show_fdinfo NULL 935 935 #endif 936 936 937 + static bool unix_custom_sockopt(int optname) 938 + { 939 + switch (optname) { 940 + case SO_INQ: 941 + return true; 942 + default: 943 + return false; 944 + } 945 + } 946 + 947 + static int unix_setsockopt(struct socket *sock, int level, int optname, 948 + sockptr_t optval, unsigned int optlen) 949 + { 950 + struct unix_sock *u = unix_sk(sock->sk); 951 + struct sock *sk = sock->sk; 952 + int val; 953 + 954 + if (level != SOL_SOCKET) 955 + return -EOPNOTSUPP; 956 + 957 + if (!unix_custom_sockopt(optname)) 958 + return sock_setsockopt(sock, level, optname, optval, optlen); 959 + 960 + if (optlen != sizeof(int)) 961 + return -EINVAL; 962 + 963 + if (copy_from_sockptr(&val, optval, sizeof(val))) 964 + return -EFAULT; 965 + 966 + switch (optname) { 967 + case SO_INQ: 968 + if (sk->sk_type != SOCK_STREAM) 969 + return -EINVAL; 970 + 971 + if (val > 1 || val < 0) 972 + return -EINVAL; 973 + 974 + WRITE_ONCE(u->recvmsg_inq, val); 975 + break; 976 + default: 977 + return -ENOPROTOOPT; 978 + } 979 + 980 + return 0; 981 + } 982 + 937 983 static const struct proto_ops unix_stream_ops = { 938 984 .family = PF_UNIX, 939 985 .owner = THIS_MODULE, ··· 996 950 #endif 997 951 .listen = unix_listen, 998 952 .shutdown = unix_shutdown, 953 + .setsockopt = unix_setsockopt, 999 954 .sendmsg = unix_stream_sendmsg, 1000 955 .recvmsg = unix_stream_recvmsg, 1001 956 .read_skb = unix_stream_read_skb, ··· 1163 1116 1164 1117 switch (sock->type) { 1165 1118 case SOCK_STREAM: 1119 + set_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags); 1166 1120 sock->ops = &unix_stream_ops; 1167 1121 break; 1168 1122 /* ··· 1894 1846 tsk = skb->sk; 1895 1847 skb_free_datagram(sk, skb); 1896 1848 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1849 + 1850 + if (tsk->sk_type == SOCK_STREAM) 1851 + set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); 1897 1852 1898 1853 /* attach accepted sock to socket */ 1899 1854 unix_state_lock(tsk); ··· 3085 3034 } while (size); 3086 3035 3087 3036 mutex_unlock(&u->iolock); 3088 - if (msg) 3037 + if (msg) { 3089 3038 scm_recv_unix(sock, msg, &scm, flags); 3090 - else 3039 + 3040 + if (READ_ONCE(u->recvmsg_inq) || msg->msg_get_inq) { 3041 + msg->msg_inq = READ_ONCE(u->inq_len); 3042 + put_cmsg(msg, SOL_SOCKET, SCM_INQ, 3043 + sizeof(msg->msg_inq), &msg->msg_inq); 3044 + } 3045 + } else { 3091 3046 scm_destroy(&scm); 3047 + } 3092 3048 out: 3093 3049 return copied ? : err; 3094 3050 }