Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: add SO_DEVMEM_DONTNEED setsockopt to release RX frags

Add an interface for the user to notify the kernel that it is done
reading the devmem dmabuf frags returned as cmsg. The kernel will
drop the reference on the frags to make them available for reuse.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Kaiyuan Zhang <kaiyuanz@google.com>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20240910171458.219195-11-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Mina Almasry and committed by
Jakub Kicinski
678f6e28 8f0b3cc9

+78
+1
arch/alpha/include/uapi/asm/socket.h
··· 144 144 #define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR 145 145 #define SO_DEVMEM_DMABUF 79 146 146 #define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF 147 + #define SO_DEVMEM_DONTNEED 80 147 148 148 149 #if !defined(__KERNEL__) 149 150
+1
arch/mips/include/uapi/asm/socket.h
··· 155 155 #define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR 156 156 #define SO_DEVMEM_DMABUF 79 157 157 #define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF 158 + #define SO_DEVMEM_DONTNEED 80 158 159 159 160 #if !defined(__KERNEL__) 160 161
+1
arch/parisc/include/uapi/asm/socket.h
··· 136 136 #define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR 137 137 #define SO_DEVMEM_DMABUF 79 138 138 #define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF 139 + #define SO_DEVMEM_DONTNEED 80 139 140 140 141 #if !defined(__KERNEL__) 141 142
+1
arch/sparc/include/uapi/asm/socket.h
··· 137 137 #define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR 138 138 #define SO_DEVMEM_DMABUF 0x0058 139 139 #define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF 140 + #define SO_DEVMEM_DONTNEED 0x0059 140 141 141 142 #if !defined(__KERNEL__) 142 143
+1
include/uapi/asm-generic/socket.h
··· 139 139 #define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR 140 140 #define SO_DEVMEM_DMABUF 79 141 141 #define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF 142 + #define SO_DEVMEM_DONTNEED 80 142 143 143 144 #if !defined(__KERNEL__) 144 145
+5
include/uapi/linux/uio.h
··· 33 33 */ 34 34 }; 35 35 36 + struct dmabuf_token { 37 + __u32 token_start; 38 + __u32 token_count; 39 + }; 40 + 36 41 /* 37 42 * UIO_MAXIOV shall be at least 16 1003.1g (5.4.1.1) 38 43 */
+68
net/core/sock.c
··· 124 124 #include <linux/netdevice.h> 125 125 #include <net/protocol.h> 126 126 #include <linux/skbuff.h> 127 + #include <linux/skbuff_ref.h> 127 128 #include <net/net_namespace.h> 128 129 #include <net/request_sock.h> 129 130 #include <net/sock.h> ··· 1050 1049 return 0; 1051 1050 } 1052 1051 1052 + #ifdef CONFIG_PAGE_POOL 1053 + 1054 + /* This is the number of tokens that the user can SO_DEVMEM_DONTNEED in 1055 + * 1 syscall. The limit exists to limit the amount of memory the kernel 1056 + * allocates to copy these tokens. 1057 + */ 1058 + #define MAX_DONTNEED_TOKENS 128 1059 + 1060 + static noinline_for_stack int 1061 + sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen) 1062 + { 1063 + unsigned int num_tokens, i, j, k, netmem_num = 0; 1064 + struct dmabuf_token *tokens; 1065 + netmem_ref netmems[16]; 1066 + int ret = 0; 1067 + 1068 + if (!sk_is_tcp(sk)) 1069 + return -EBADF; 1070 + 1071 + if (optlen % sizeof(struct dmabuf_token) || 1072 + optlen > sizeof(*tokens) * MAX_DONTNEED_TOKENS) 1073 + return -EINVAL; 1074 + 1075 + tokens = kvmalloc_array(optlen, sizeof(*tokens), GFP_KERNEL); 1076 + if (!tokens) 1077 + return -ENOMEM; 1078 + 1079 + num_tokens = optlen / sizeof(struct dmabuf_token); 1080 + if (copy_from_sockptr(tokens, optval, optlen)) { 1081 + kvfree(tokens); 1082 + return -EFAULT; 1083 + } 1084 + 1085 + xa_lock_bh(&sk->sk_user_frags); 1086 + for (i = 0; i < num_tokens; i++) { 1087 + for (j = 0; j < tokens[i].token_count; j++) { 1088 + netmem_ref netmem = (__force netmem_ref)__xa_erase( 1089 + &sk->sk_user_frags, tokens[i].token_start + j); 1090 + 1091 + if (netmem && 1092 + !WARN_ON_ONCE(!netmem_is_net_iov(netmem))) { 1093 + netmems[netmem_num++] = netmem; 1094 + if (netmem_num == ARRAY_SIZE(netmems)) { 1095 + xa_unlock_bh(&sk->sk_user_frags); 1096 + for (k = 0; k < netmem_num; k++) 1097 + WARN_ON_ONCE(!napi_pp_put_page(netmems[k])); 1098 + netmem_num = 0; 1099 + xa_lock_bh(&sk->sk_user_frags); 1100 + } 1101 + ret++; 1102 + } 1103 + } 1104 + } 1105 + 1106 + xa_unlock_bh(&sk->sk_user_frags); 1107 + for (k = 0; k < netmem_num; k++) 1108 + WARN_ON_ONCE(!napi_pp_put_page(netmems[k])); 1109 + 1110 + kvfree(tokens); 1111 + return ret; 1112 + } 1113 + #endif 1114 + 1053 1115 void sockopt_lock_sock(struct sock *sk) 1054 1116 { 1055 1117 /* When current->bpf_ctx is set, the setsockopt is called from ··· 1275 1211 ret = -EOPNOTSUPP; 1276 1212 return ret; 1277 1213 } 1214 + #ifdef CONFIG_PAGE_POOL 1215 + case SO_DEVMEM_DONTNEED: 1216 + return sock_devmem_dontneed(sk, optval, optlen); 1217 + #endif 1278 1218 } 1279 1219 1280 1220 sockopt_lock_sock(sk);