Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mptcp: introduce MPTCP_FULL_INFO getsockopt

Some user-space applications want to monitor the subflows utilization.

Dumping the per subflow tcp_info is not enough, as the PM could close
and re-create the subflows under-the-hood, fooling the accounting.
Even checking the src/dst addresses used by each subflow could not
be enough, because new subflows could re-use the same address/port of
the just closed one.

This patch introduces a new socket option, allow dumping all the relevant
information all-at-once (everything, everywhere...), in a consistent
manner.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/388
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Paolo Abeni and committed by
Jakub Kicinski
49243207 6f06b4d4

+149 -2
+24
include/uapi/linux/mptcp.h
··· 249 249 }; 250 250 }; 251 251 252 + struct mptcp_subflow_info { 253 + __u32 id; 254 + struct mptcp_subflow_addrs addrs; 255 + }; 256 + 257 + struct mptcp_full_info { 258 + __u32 size_tcpinfo_kernel; /* must be 0, set by kernel */ 259 + __u32 size_tcpinfo_user; 260 + __u32 size_sfinfo_kernel; /* must be 0, set by kernel */ 261 + __u32 size_sfinfo_user; 262 + __u32 num_subflows; /* must be 0, set by kernel (real subflow count) */ 263 + __u32 size_arrays_user; /* max subflows that userspace is interested in; 264 + * the buffers at subflow_info/tcp_info 265 + * are respectively at least: 266 + * size_arrays * size_sfinfo_user 267 + * size_arrays * size_tcpinfo_user 268 + * bytes wide 269 + */ 270 + __aligned_u64 subflow_info; 271 + __aligned_u64 tcp_info; 272 + struct mptcp_info mptcp_info; 273 + }; 274 + 252 275 /* MPTCP socket options */ 253 276 #define MPTCP_INFO 1 254 277 #define MPTCP_TCPINFO 2 255 278 #define MPTCP_SUBFLOW_ADDRS 3 279 + #define MPTCP_FULL_INFO 4 256 280 257 281 #endif /* _UAPI_MPTCP_H */
+125 -2
net/mptcp/sockopt.c
··· 14 14 #include <net/mptcp.h> 15 15 #include "protocol.h" 16 16 17 - #define MIN_INFO_OPTLEN_SIZE 16 17 + #define MIN_INFO_OPTLEN_SIZE 16 18 + #define MIN_FULL_INFO_OPTLEN_SIZE 40 18 19 19 20 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) 20 21 { ··· 982 981 } 983 982 984 983 static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, 985 - char __user *optval, int __user *optlen) 984 + char __user *optval, 985 + int __user *optlen) 986 986 { 987 987 int len, copylen; 988 988 ··· 1164 1162 return 0; 1165 1163 } 1166 1164 1165 + static int mptcp_get_full_info(struct mptcp_full_info *mfi, 1166 + char __user *optval, 1167 + int __user *optlen) 1168 + { 1169 + int len; 1170 + 1171 + BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != 1172 + MIN_FULL_INFO_OPTLEN_SIZE); 1173 + 1174 + if (get_user(len, optlen)) 1175 + return -EFAULT; 1176 + 1177 + if (len < MIN_FULL_INFO_OPTLEN_SIZE) 1178 + return -EINVAL; 1179 + 1180 + memset(mfi, 0, sizeof(*mfi)); 1181 + if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE)) 1182 + return -EFAULT; 1183 + 1184 + if (mfi->size_tcpinfo_kernel || 1185 + mfi->size_sfinfo_kernel || 1186 + mfi->num_subflows) 1187 + return -EINVAL; 1188 + 1189 + if (mfi->size_sfinfo_user > INT_MAX || 1190 + mfi->size_tcpinfo_user > INT_MAX) 1191 + return -EINVAL; 1192 + 1193 + return len - MIN_FULL_INFO_OPTLEN_SIZE; 1194 + } 1195 + 1196 + static int mptcp_put_full_info(struct mptcp_full_info *mfi, 1197 + char __user *optval, 1198 + u32 copylen, 1199 + int __user *optlen) 1200 + { 1201 + copylen += MIN_FULL_INFO_OPTLEN_SIZE; 1202 + if (put_user(copylen, optlen)) 1203 + return -EFAULT; 1204 + 1205 + if (copy_to_user(optval, mfi, copylen)) 1206 + return -EFAULT; 1207 + return 0; 1208 + } 1209 + 1210 + static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, 1211 + int __user *optlen) 1212 + { 1213 + unsigned int sfcount = 0, copylen = 0; 1214 + struct mptcp_subflow_context *subflow; 1215 + struct sock *sk = (struct sock *)msk; 1216 + void __user *tcpinfoptr, *sfinfoptr; 1217 + struct mptcp_full_info mfi; 1218 + int len; 1219 + 1220 + len = mptcp_get_full_info(&mfi, optval, optlen); 1221 + if (len < 0) 1222 + return len; 1223 + 1224 + /* don't bother filling the mptcp info if there is not enough 1225 + * user-space-provided storage 1226 + */ 1227 + if (len > 0) { 1228 + mptcp_diag_fill_info(msk, &mfi.mptcp_info); 1229 + copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); 1230 + } 1231 + 1232 + mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); 1233 + mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, 1234 + sizeof(struct tcp_info)); 1235 + sfinfoptr = u64_to_user_ptr(mfi.subflow_info); 1236 + mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); 1237 + mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, 1238 + sizeof(struct mptcp_subflow_info)); 1239 + tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); 1240 + 1241 + lock_sock(sk); 1242 + mptcp_for_each_subflow(msk, subflow) { 1243 + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1244 + struct mptcp_subflow_info sfinfo; 1245 + struct tcp_info tcp_info; 1246 + 1247 + if (sfcount++ >= mfi.size_arrays_user) 1248 + continue; 1249 + 1250 + /* fetch addr/tcp_info only if the user space buffers 1251 + * are wide enough 1252 + */ 1253 + memset(&sfinfo, 0, sizeof(sfinfo)); 1254 + sfinfo.id = subflow->subflow_id; 1255 + if (mfi.size_sfinfo_user > 1256 + offsetof(struct mptcp_subflow_info, addrs)) 1257 + mptcp_get_sub_addrs(ssk, &sfinfo.addrs); 1258 + if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user)) 1259 + goto fail_release; 1260 + 1261 + if (mfi.size_tcpinfo_user) { 1262 + tcp_get_info(ssk, &tcp_info); 1263 + if (copy_to_user(tcpinfoptr, &tcp_info, 1264 + mfi.size_tcpinfo_user)) 1265 + goto fail_release; 1266 + } 1267 + 1268 + tcpinfoptr += mfi.size_tcpinfo_user; 1269 + sfinfoptr += mfi.size_sfinfo_user; 1270 + } 1271 + release_sock(sk); 1272 + 1273 + mfi.num_subflows = sfcount; 1274 + if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) 1275 + return -EFAULT; 1276 + 1277 + return 0; 1278 + 1279 + fail_release: 1280 + release_sock(sk); 1281 + return -EFAULT; 1282 + } 1283 + 1167 1284 static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, 1168 1285 int __user *optlen, int val) 1169 1286 { ··· 1356 1235 switch (optname) { 1357 1236 case MPTCP_INFO: 1358 1237 return mptcp_getsockopt_info(msk, optval, optlen); 1238 + case MPTCP_FULL_INFO: 1239 + return mptcp_getsockopt_full_info(msk, optval, optlen); 1359 1240 case MPTCP_TCPINFO: 1360 1241 return mptcp_getsockopt_tcpinfo(msk, optval, optlen); 1361 1242 case MPTCP_SUBFLOW_ADDRS: