Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'mptcp-more-socket-options'

Mat Martineau says:

====================
mptcp: More socket option support

These patches add MPTCP socket support for a few additional socket
options: IP_TOS, IP_FREEBIND, IP_TRANSPARENT, IPV6_FREEBIND, and
IPV6_TRANSPARENT.

Patch 1 exposes __ip_sock_set_tos() for use in patch 2.

Patch 2 adds IP_TOS support.

Patches 3 and 4 add the freebind and transparent support, with a
selftest for the latter.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+245 -6
+1
include/net/ip.h
··· 783 783 void ip_sock_set_pktinfo(struct sock *sk); 784 784 void ip_sock_set_recverr(struct sock *sk); 785 785 void ip_sock_set_tos(struct sock *sk, int val); 786 + void __ip_sock_set_tos(struct sock *sk, int val); 786 787 787 788 #endif /* _IP_H */
+1 -1
net/ipv4/ip_sockglue.c
··· 576 576 return err; 577 577 } 578 578 579 - static void __ip_sock_set_tos(struct sock *sk, int val) 579 + void __ip_sock_set_tos(struct sock *sk, int val) 580 580 { 581 581 if (sk->sk_type == SOCK_STREAM) { 582 582 val &= ~INET_ECN_MASK;
+105 -1
net/mptcp/sockopt.c
··· 390 390 391 391 switch (optname) { 392 392 case IPV6_V6ONLY: 393 + case IPV6_TRANSPARENT: 394 + case IPV6_FREEBIND: 393 395 lock_sock(sk); 394 396 ssock = __mptcp_nmpc_socket(msk); 395 397 if (!ssock) { ··· 400 398 } 401 399 402 400 ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); 403 - if (ret == 0) 401 + if (ret != 0) { 402 + release_sock(sk); 403 + return ret; 404 + } 405 + 406 + sockopt_seq_inc(msk); 407 + 408 + switch (optname) { 409 + case IPV6_V6ONLY: 404 410 sk->sk_ipv6only = ssock->sk->sk_ipv6only; 411 + break; 412 + case IPV6_TRANSPARENT: 413 + inet_sk(sk)->transparent = inet_sk(ssock->sk)->transparent; 414 + break; 415 + case IPV6_FREEBIND: 416 + inet_sk(sk)->freebind = inet_sk(ssock->sk)->freebind; 417 + break; 418 + } 405 419 406 420 release_sock(sk); 407 421 break; ··· 616 598 return ret; 617 599 } 618 600 601 + static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname, 602 + sockptr_t optval, unsigned int optlen) 603 + { 604 + struct sock *sk = (struct sock *)msk; 605 + struct inet_sock *issk; 606 + struct socket *ssock; 607 + int err; 608 + 609 + err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); 610 + if (err != 0) 611 + return err; 612 + 613 + lock_sock(sk); 614 + 615 + ssock = __mptcp_nmpc_socket(msk); 616 + if (!ssock) { 617 + release_sock(sk); 618 + return -EINVAL; 619 + } 620 + 621 + issk = inet_sk(ssock->sk); 622 + 623 + switch (optname) { 624 + case IP_FREEBIND: 625 + issk->freebind = inet_sk(sk)->freebind; 626 + break; 627 + case IP_TRANSPARENT: 628 + issk->transparent = inet_sk(sk)->transparent; 629 + break; 630 + default: 631 + release_sock(sk); 632 + WARN_ON_ONCE(1); 633 + return -EOPNOTSUPP; 634 + } 635 + 636 + sockopt_seq_inc(msk); 637 + release_sock(sk); 638 + return 0; 639 + } 640 + 641 + static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, 642 + sockptr_t optval, unsigned int optlen) 643 + { 644 + struct mptcp_subflow_context *subflow; 645 + struct sock *sk = (struct sock *)msk; 646 + int err, val; 647 + 648 + err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); 649 + 650 + if (err != 0) 651 + return err; 652 + 653 + lock_sock(sk); 654 + sockopt_seq_inc(msk); 655 + val = inet_sk(sk)->tos; 656 + mptcp_for_each_subflow(msk, subflow) { 657 + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 658 + 659 + __ip_sock_set_tos(ssk, val); 660 + } 661 + release_sock(sk); 662 + 663 + return err; 664 + } 665 + 666 + static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, 667 + sockptr_t optval, unsigned int optlen) 668 + { 669 + switch (optname) { 670 + case IP_FREEBIND: 671 + case IP_TRANSPARENT: 672 + return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen); 673 + case IP_TOS: 674 + return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); 675 + } 676 + 677 + return -EOPNOTSUPP; 678 + } 679 + 619 680 static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 620 681 sockptr_t optval, unsigned int optlen) 621 682 { ··· 733 636 release_sock(sk); 734 637 if (ssk) 735 638 return tcp_setsockopt(ssk, level, optname, optval, optlen); 639 + 640 + if (level == SOL_IP) 641 + return mptcp_setsockopt_v4(msk, optname, optval, optlen); 736 642 737 643 if (level == SOL_IPV6) 738 644 return mptcp_setsockopt_v6(msk, optname, optval, optlen); ··· 1103 1003 ssk->sk_priority = sk->sk_priority; 1104 1004 ssk->sk_bound_dev_if = sk->sk_bound_dev_if; 1105 1005 ssk->sk_incoming_cpu = sk->sk_incoming_cpu; 1006 + __ip_sock_set_tos(ssk, inet_sk(sk)->tos); 1106 1007 1107 1008 if (sk->sk_userlocks & tx_rx_locks) { 1108 1009 ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; ··· 1129 1028 1130 1029 if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) 1131 1030 tcp_set_congestion_control(ssk, msk->ca_name, false, true); 1031 + 1032 + inet_sk(ssk)->transparent = inet_sk(sk)->transparent; 1033 + inet_sk(ssk)->freebind = inet_sk(sk)->freebind; 1132 1034 } 1133 1035 1134 1036 static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
+2 -1
net/mptcp/subflow.c
··· 1425 1425 if (addr.ss_family == AF_INET6) 1426 1426 addrlen = sizeof(struct sockaddr_in6); 1427 1427 #endif 1428 + mptcp_sockopt_sync(msk, ssk); 1429 + 1428 1430 ssk->sk_bound_dev_if = ifindex; 1429 1431 err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen); 1430 1432 if (err) ··· 1443 1441 mptcp_info2sockaddr(remote, &addr, ssk->sk_family); 1444 1442 1445 1443 mptcp_add_pending_subflow(msk, subflow); 1446 - mptcp_sockopt_sync(msk, ssk); 1447 1444 err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK); 1448 1445 if (err && err != -EINPROGRESS) 1449 1446 goto failed_unlink;
+6 -2
tools/testing/selftests/net/mptcp/config
··· 13 13 CONFIG_NFT_COMPAT=m 14 14 CONFIG_NETFILTER_XTABLES=m 15 15 CONFIG_NETFILTER_XT_MATCH_BPF=m 16 - CONFIG_NF_TABLES_IPV4=y 17 - CONFIG_NF_TABLES_IPV6=y 16 + CONFIG_NF_TABLES_INET=y 17 + CONFIG_NFT_TPROXY=m 18 + CONFIG_NFT_SOCKET=m 19 + CONFIG_IP_ADVANCED_ROUTER=y 20 + CONFIG_IP_MULTIPLE_TABLES=y 21 + CONFIG_IPV6_MULTIPLE_TABLES=y
+50 -1
tools/testing/selftests/net/mptcp/mptcp_connect.c
··· 75 75 unsigned int timestampns:1; 76 76 }; 77 77 78 + struct cfg_sockopt_types { 79 + unsigned int transparent:1; 80 + }; 81 + 78 82 static struct cfg_cmsg_types cfg_cmsg_types; 83 + static struct cfg_sockopt_types cfg_sockopt_types; 79 84 80 85 static void die_usage(void) 81 86 { ··· 98 93 fprintf(stderr, "\t-u -- check mptcp ulp\n"); 99 94 fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); 100 95 fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n"); 96 + fprintf(stderr, "\t-o option -- test sockopt <option>\n"); 101 97 fprintf(stderr, 102 98 "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n"); 103 99 exit(1); ··· 191 185 } 192 186 } 193 187 188 + static void set_transparent(int fd, int pf) 189 + { 190 + int one = 1; 191 + 192 + switch (pf) { 193 + case AF_INET: 194 + if (-1 == setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one))) 195 + perror("IP_TRANSPARENT"); 196 + break; 197 + case AF_INET6: 198 + if (-1 == setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one))) 199 + perror("IPV6_TRANSPARENT"); 200 + break; 201 + } 202 + } 203 + 194 204 static int sock_listen_mptcp(const char * const listenaddr, 195 205 const char * const port) 196 206 { ··· 233 211 if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, 234 212 sizeof(one))) 235 213 perror("setsockopt"); 214 + 215 + if (cfg_sockopt_types.transparent) 216 + set_transparent(sock, pf); 236 217 237 218 if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) 238 219 break; /* success */ ··· 969 944 exit(1); 970 945 } 971 946 947 + static void parse_setsock_options(const char *name) 948 + { 949 + char *next = strchr(name, ','); 950 + unsigned int len = 0; 951 + 952 + if (next) { 953 + parse_setsock_options(next + 1); 954 + len = next - name; 955 + } else { 956 + len = strlen(name); 957 + } 958 + 959 + if (strncmp(name, "TRANSPARENT", len) == 0) { 960 + cfg_sockopt_types.transparent = 1; 961 + return; 962 + } 963 + 964 + fprintf(stderr, "Unrecognized setsockopt option %s\n", name); 965 + exit(1); 966 + } 967 + 972 968 int main_loop(void) 973 969 { 974 970 int fd; ··· 1093 1047 { 1094 1048 int c; 1095 1049 1096 - while ((c = getopt(argc, argv, "6jr:lp:s:hut:T:m:S:R:w:M:P:c:")) != -1) { 1050 + while ((c = getopt(argc, argv, "6jr:lp:s:hut:T:m:S:R:w:M:P:c:o:")) != -1) { 1097 1051 switch (c) { 1098 1052 case 'j': 1099 1053 cfg_join = true; ··· 1153 1107 break; 1154 1108 case 'c': 1155 1109 parse_cmsg_types(optarg); 1110 + break; 1111 + case 'o': 1112 + parse_setsock_options(optarg); 1156 1113 break; 1157 1114 } 1158 1115 }
+80
tools/testing/selftests/net/mptcp/mptcp_connect.sh
··· 671 671 run_tests_lo $1 $2 $3 0 672 672 } 673 673 674 + run_test_transparent() 675 + { 676 + local connect_addr="$1" 677 + local msg="$2" 678 + 679 + local connector_ns="$ns1" 680 + local listener_ns="$ns2" 681 + local lret=0 682 + local r6flag="" 683 + 684 + # skip if we don't want v6 685 + if ! $ipv6 && is_v6 "${connect_addr}"; then 686 + return 0 687 + fi 688 + 689 + ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF" 690 + flush ruleset 691 + table inet mangle { 692 + chain divert { 693 + type filter hook prerouting priority -150; 694 + 695 + meta l4proto tcp socket transparent 1 meta mark set 1 accept 696 + tcp dport 20000 tproxy to :20000 meta mark set 1 accept 697 + } 698 + } 699 + EOF 700 + if [ $? -ne 0 ]; then 701 + echo "SKIP: $msg, could not load nft ruleset" 702 + return 703 + fi 704 + 705 + local local_addr 706 + if is_v6 "${connect_addr}"; then 707 + local_addr="::" 708 + r6flag="-6" 709 + else 710 + local_addr="0.0.0.0" 711 + fi 712 + 713 + ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100 714 + if [ $? -ne 0 ]; then 715 + ip netns exec "$listener_ns" nft flush ruleset 716 + echo "SKIP: $msg, ip $r6flag rule failed" 717 + return 718 + fi 719 + 720 + ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100 721 + if [ $? -ne 0 ]; then 722 + ip netns exec "$listener_ns" nft flush ruleset 723 + ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 724 + echo "SKIP: $msg, ip route add local $local_addr failed" 725 + return 726 + fi 727 + 728 + echo "INFO: test $msg" 729 + 730 + TEST_COUNT=10000 731 + local extra_args="-o TRANSPARENT" 732 + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \ 733 + ${connect_addr} ${local_addr} "${extra_args}" 734 + lret=$? 735 + 736 + ip netns exec "$listener_ns" nft flush ruleset 737 + ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 738 + ip -net "$listener_ns" route del local $local_addr/0 dev lo table 100 739 + 740 + if [ $lret -ne 0 ]; then 741 + echo "FAIL: $msg, mptcp connection error" 1>&2 742 + ret=$lret 743 + return 1 744 + fi 745 + 746 + echo "PASS: $msg" 747 + return 0 748 + } 749 + 674 750 run_tests_peekmode() 675 751 { 676 752 local peekmode="$1" ··· 869 793 run_tests_peekmode "saveWithPeek" 870 794 run_tests_peekmode "saveAfterPeek" 871 795 stop_if_error "Tests with peek mode have failed" 796 + 797 + # connect to ns4 ip address, ns2 should intercept/proxy 798 + run_test_transparent 10.0.3.1 "tproxy ipv4" 799 + run_test_transparent dead:beef:3::1 "tproxy ipv6" 872 800 873 801 display_time 874 802 exit $ret