aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller2021-11-20 14:11:00 +0000
committerDavid S. Miller2021-11-20 14:11:00 +0000
commit89f971182417cb27abd82cfc48a7f36b99352ddc (patch)
treeb2ba5218fe17bae705e846e1af473c82fd69e915
parent979594c5ff7b82e4787c8491680a2658bd88b780 (diff)
parent5fb62e9cd3adadd95303447ee8e3f62ee98b0e73 (diff)
Merge branch 'mptcp-more-socket-options'
Mat Martineau says: ==================== mptcp: More socket option support These patches add MPTCP socket support for a few additional socket options: IP_TOS, IP_FREEBIND, IP_TRANSPARENT, IPV6_FREEBIND, and IPV6_TRANSPARENT. Patch 1 exposes __ip_sock_set_tos() for use in patch 2. Patch 2 adds IP_TOS support. Patches 3 and 4 add the freebind and transparent support, with a selftest for the latter. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip.h1
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/mptcp/sockopt.c106
-rw-r--r--net/mptcp/subflow.c3
-rw-r--r--tools/testing/selftests/net/mptcp/config8
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c51
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh80
7 files changed, 245 insertions, 6 deletions
diff --git a/include/net/ip.h b/include/net/ip.h
index 7d1088888c10..81e23a102a0d 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -783,5 +783,6 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val);
void ip_sock_set_pktinfo(struct sock *sk);
void ip_sock_set_recverr(struct sock *sk);
void ip_sock_set_tos(struct sock *sk, int val);
+void __ip_sock_set_tos(struct sock *sk, int val);
#endif /* _IP_H */
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 38d29b175ca6..445a9ecaefa1 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -576,7 +576,7 @@ out:
return err;
}
-static void __ip_sock_set_tos(struct sock *sk, int val)
+void __ip_sock_set_tos(struct sock *sk, int val)
{
if (sk->sk_type == SOCK_STREAM) {
val &= ~INET_ECN_MASK;
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 0f1e661c2032..fb43e145cb57 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -390,6 +390,8 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
switch (optname) {
case IPV6_V6ONLY:
+ case IPV6_TRANSPARENT:
+ case IPV6_FREEBIND:
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (!ssock) {
@@ -398,8 +400,24 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
}
ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
- if (ret == 0)
+ if (ret != 0) {
+ release_sock(sk);
+ return ret;
+ }
+
+ sockopt_seq_inc(msk);
+
+ switch (optname) {
+ case IPV6_V6ONLY:
sk->sk_ipv6only = ssock->sk->sk_ipv6only;
+ break;
+ case IPV6_TRANSPARENT:
+ inet_sk(sk)->transparent = inet_sk(ssock->sk)->transparent;
+ break;
+ case IPV6_FREEBIND:
+ inet_sk(sk)->freebind = inet_sk(ssock->sk)->freebind;
+ break;
+ }
release_sock(sk);
break;
@@ -598,6 +616,85 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t
return ret;
}
+static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ struct sock *sk = (struct sock *)msk;
+ struct inet_sock *issk;
+ struct socket *ssock;
+ int err;
+
+ err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
+ if (err != 0)
+ return err;
+
+ lock_sock(sk);
+
+ ssock = __mptcp_nmpc_socket(msk);
+ if (!ssock) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
+ issk = inet_sk(ssock->sk);
+
+ switch (optname) {
+ case IP_FREEBIND:
+ issk->freebind = inet_sk(sk)->freebind;
+ break;
+ case IP_TRANSPARENT:
+ issk->transparent = inet_sk(sk)->transparent;
+ break;
+ default:
+ release_sock(sk);
+ WARN_ON_ONCE(1);
+ return -EOPNOTSUPP;
+ }
+
+ sockopt_seq_inc(msk);
+ release_sock(sk);
+ return 0;
+}
+
+static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ int err, val;
+
+ err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
+
+ if (err != 0)
+ return err;
+
+ lock_sock(sk);
+ sockopt_seq_inc(msk);
+ val = inet_sk(sk)->tos;
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ __ip_sock_set_tos(ssk, val);
+ }
+ release_sock(sk);
+
+ return err;
+}
+
+static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ switch (optname) {
+ case IP_FREEBIND:
+ case IP_TRANSPARENT:
+ return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen);
+ case IP_TOS:
+ return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen);
+ }
+
+ return -EOPNOTSUPP;
+}
+
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -637,6 +734,9 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname,
if (ssk)
return tcp_setsockopt(ssk, level, optname, optval, optlen);
+ if (level == SOL_IP)
+ return mptcp_setsockopt_v4(msk, optname, optval, optlen);
+
if (level == SOL_IPV6)
return mptcp_setsockopt_v6(msk, optname, optval, optlen);
@@ -1003,6 +1103,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
ssk->sk_priority = sk->sk_priority;
ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
+ __ip_sock_set_tos(ssk, inet_sk(sk)->tos);
if (sk->sk_userlocks & tx_rx_locks) {
ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks;
@@ -1028,6 +1129,9 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops)
tcp_set_congestion_control(ssk, msk->ca_name, false, true);
+
+ inet_sk(ssk)->transparent = inet_sk(sk)->transparent;
+ inet_sk(ssk)->freebind = inet_sk(sk)->freebind;
}
static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 49787a1d7b34..b8dd3441f7d0 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1425,6 +1425,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
if (addr.ss_family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
+ mptcp_sockopt_sync(msk, ssk);
+
ssk->sk_bound_dev_if = ifindex;
err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
if (err)
@@ -1441,7 +1443,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
mptcp_add_pending_subflow(msk, subflow);
- mptcp_sockopt_sync(msk, ssk);
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
if (err && err != -EINPROGRESS)
goto failed_unlink;
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 0faaccd21447..419e71560fd1 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -13,5 +13,9 @@ CONFIG_NFT_COUNTER=m
CONFIG_NFT_COMPAT=m
CONFIG_NETFILTER_XTABLES=m
CONFIG_NETFILTER_XT_MATCH_BPF=m
-CONFIG_NF_TABLES_IPV4=y
-CONFIG_NF_TABLES_IPV6=y
+CONFIG_NF_TABLES_INET=y
+CONFIG_NFT_TPROXY=m
+CONFIG_NFT_SOCKET=m
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 95e81d557b08..ada9b80774d4 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -75,7 +75,12 @@ struct cfg_cmsg_types {
unsigned int timestampns:1;
};
+struct cfg_sockopt_types {
+ unsigned int transparent:1;
+};
+
static struct cfg_cmsg_types cfg_cmsg_types;
+static struct cfg_sockopt_types cfg_sockopt_types;
static void die_usage(void)
{
@@ -93,6 +98,7 @@ static void die_usage(void)
fprintf(stderr, "\t-u -- check mptcp ulp\n");
fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
+ fprintf(stderr, "\t-o option -- test sockopt <option>\n");
fprintf(stderr,
"\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n");
exit(1);
@@ -185,6 +191,22 @@ static void set_mark(int fd, uint32_t mark)
}
}
+static void set_transparent(int fd, int pf)
+{
+ int one = 1;
+
+ switch (pf) {
+ case AF_INET:
+ if (-1 == setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)))
+ perror("IP_TRANSPARENT");
+ break;
+ case AF_INET6:
+ if (-1 == setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)))
+ perror("IPV6_TRANSPARENT");
+ break;
+ }
+}
+
static int sock_listen_mptcp(const char * const listenaddr,
const char * const port)
{
@@ -212,6 +234,9 @@ static int sock_listen_mptcp(const char * const listenaddr,
sizeof(one)))
perror("setsockopt");
+ if (cfg_sockopt_types.transparent)
+ set_transparent(sock, pf);
+
if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
break; /* success */
@@ -944,6 +969,27 @@ static void parse_cmsg_types(const char *type)
exit(1);
}
+static void parse_setsock_options(const char *name)
+{
+ char *next = strchr(name, ',');
+ unsigned int len = 0;
+
+ if (next) {
+ parse_setsock_options(next + 1);
+ len = next - name;
+ } else {
+ len = strlen(name);
+ }
+
+ if (strncmp(name, "TRANSPARENT", len) == 0) {
+ cfg_sockopt_types.transparent = 1;
+ return;
+ }
+
+ fprintf(stderr, "Unrecognized setsockopt option %s\n", name);
+ exit(1);
+}
+
int main_loop(void)
{
int fd;
@@ -1047,7 +1093,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "6jr:lp:s:hut:T:m:S:R:w:M:P:c:")) != -1) {
+ while ((c = getopt(argc, argv, "6jr:lp:s:hut:T:m:S:R:w:M:P:c:o:")) != -1) {
switch (c) {
case 'j':
cfg_join = true;
@@ -1108,6 +1154,9 @@ static void parse_opts(int argc, char **argv)
case 'c':
parse_cmsg_types(optarg);
break;
+ case 'o':
+ parse_setsock_options(optarg);
+ break;
}
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 559173a8e387..a4226b608c68 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -671,6 +671,82 @@ run_tests()
run_tests_lo $1 $2 $3 0
}
+run_test_transparent()
+{
+ local connect_addr="$1"
+ local msg="$2"
+
+ local connector_ns="$ns1"
+ local listener_ns="$ns2"
+ local lret=0
+ local r6flag=""
+
+ # skip if we don't want v6
+ if ! $ipv6 && is_v6 "${connect_addr}"; then
+ return 0
+ fi
+
+ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
+flush ruleset
+table inet mangle {
+ chain divert {
+ type filter hook prerouting priority -150;
+
+ meta l4proto tcp socket transparent 1 meta mark set 1 accept
+ tcp dport 20000 tproxy to :20000 meta mark set 1 accept
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: $msg, could not load nft ruleset"
+ return
+ fi
+
+ local local_addr
+ if is_v6 "${connect_addr}"; then
+ local_addr="::"
+ r6flag="-6"
+ else
+ local_addr="0.0.0.0"
+ fi
+
+ ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100
+ if [ $? -ne 0 ]; then
+ ip netns exec "$listener_ns" nft flush ruleset
+ echo "SKIP: $msg, ip $r6flag rule failed"
+ return
+ fi
+
+ ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100
+ if [ $? -ne 0 ]; then
+ ip netns exec "$listener_ns" nft flush ruleset
+ ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
+ echo "SKIP: $msg, ip route add local $local_addr failed"
+ return
+ fi
+
+ echo "INFO: test $msg"
+
+ TEST_COUNT=10000
+ local extra_args="-o TRANSPARENT"
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \
+ ${connect_addr} ${local_addr} "${extra_args}"
+ lret=$?
+
+ ip netns exec "$listener_ns" nft flush ruleset
+ ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
+ ip -net "$listener_ns" route del local $local_addr/0 dev lo table 100
+
+ if [ $lret -ne 0 ]; then
+ echo "FAIL: $msg, mptcp connection error" 1>&2
+ ret=$lret
+ return 1
+ fi
+
+ echo "PASS: $msg"
+ return 0
+}
+
run_tests_peekmode()
{
local peekmode="$1"
@@ -794,5 +870,9 @@ run_tests_peekmode "saveWithPeek"
run_tests_peekmode "saveAfterPeek"
stop_if_error "Tests with peek mode have failed"
+# connect to ns4 ip address, ns2 should intercept/proxy
+run_test_transparent 10.0.3.1 "tproxy ipv4"
+run_test_transparent dead:beef:3::1 "tproxy ipv6"
+
display_time
exit $ret