aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller2018-11-07 16:12:39 -0800
committerDavid S. Miller2018-11-07 16:12:39 -0800
commit7e225619e8afc432fb054ef135f10c11cf8cfc85 (patch)
tree586b21ba8166dc7582625e559c06811ab2940aae /include
parentf601a85bd7883708f48911d8c88e69fe5bde2b4d (diff)
parent7bd2db404efa3daff700e8c88561b435611d07a5 (diff)
Merge branch 'vrf-allow-simultaneous-service-instances-in-default-and-other-VRFs'
Mike Manning says: ==================== vrf: allow simultaneous service instances in default and other VRFs Services currently have to be VRF-aware if they are using an unbound socket. One cannot have multiple service instances running in the default and other VRFs for services that are not VRF-aware and listen on an unbound socket. This is because there is no easy way of isolating packets received in the default VRF from those arriving in other VRFs. This series provides this isolation for stream sockets subject to the existing kernel parameter net.ipv4.tcp_l3mdev_accept not being set, given that this is documented as allowing a single service instance to work across all VRF domains. Similarly, net.ipv4.udp_l3mdev_accept is checked for datagram sockets, and net.ipv4.raw_l3mdev_accept is introduced for raw sockets. The functionality applies to UDP & TCP services as well as those using raw sockets, and is for IPv4 and IPv6. Example of running ssh instances in default and blue VRF: $ /usr/sbin/sshd -D $ ip vrf exec vrf-blue /usr/sbin/sshd $ ss -ta | egrep 'State|ssh' State Recv-Q Send-Q Local Address:Port Peer Address:Port LISTEN 0 128 0.0.0.0%vrf-blue:ssh 0.0.0.0:* LISTEN 0 128 0.0.0.0:ssh 0.0.0.0:* ESTAB 0 0 192.168.122.220:ssh 192.168.122.1:50282 LISTEN 0 128 [::]%vrf-blue:ssh [::]:* LISTEN 0 128 [::]:ssh [::]:* ESTAB 0 0 [3000::2]%vrf-blue:ssh [3000::9]:45896 ESTAB 0 0 [2000::2]:ssh [2000::9]:46398 v1: - Address Paolo Abeni's comments (patch 4/5) - Fix build when CONFIG_NET_L3_MASTER_DEV not defined (patch 1/5) v2: - Address David Aherns' comments (patches 4/5 and 5/5) - Remove patches 3/5 and 5/5 from series for individual submissions - Include a sysctl for raw sockets as recommended by David Ahern - Expand series into 10 patches and provide improved descriptions v3: - Update description for patch 1/10 and remove patch 6/10 v4: - Set default to enabled for raw socket sysctl as recommended by David Ahern v5: - Address review comments from David Ahern in patches 2-5 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/net/inet6_hashtables.h5
-rw-r--r--include/net/inet_hashtables.h24
-rw-r--r--include/net/inet_sock.h21
-rw-r--r--include/net/netns/ipv4.h3
-rw-r--r--include/net/raw.h14
-rw-r--r--include/net/udp.h11
6 files changed, 67 insertions, 11 deletions
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 6e91e38a31da..9db98af46985 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -115,9 +115,8 @@ int inet6_hash(struct sock *sk);
((__sk)->sk_family == AF_INET6) && \
ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \
ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \
- (!(__sk)->sk_bound_dev_if || \
- ((__sk)->sk_bound_dev_if == (__dif)) || \
- ((__sk)->sk_bound_dev_if == (__sdif))) && \
+ (((__sk)->sk_bound_dev_if == (__dif)) || \
+ ((__sk)->sk_bound_dev_if == (__sdif))) && \
net_eq(sock_net(__sk), (__net)))
#endif /* _INET6_HASHTABLES_H */
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 9141e95529e7..0ce460e93dc4 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -79,6 +79,7 @@ struct inet_ehash_bucket {
struct inet_bind_bucket {
possible_net_t ib_net;
+ int l3mdev;
unsigned short port;
signed char fastreuse;
signed char fastreuseport;
@@ -188,10 +189,21 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
hashinfo->ehash_locks = NULL;
}
+static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if,
+ int dif, int sdif)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept,
+ bound_dev_if, dif, sdif);
+#else
+ return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
+#endif
+}
+
struct inet_bind_bucket *
inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
struct inet_bind_hashbucket *head,
- const unsigned short snum);
+ const unsigned short snum, int l3mdev);
void inet_bind_bucket_destroy(struct kmem_cache *cachep,
struct inet_bind_bucket *tb);
@@ -282,9 +294,8 @@ static inline struct sock *inet_lookup_listener(struct net *net,
#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
(((__sk)->sk_portpair == (__ports)) && \
((__sk)->sk_addrpair == (__cookie)) && \
- (!(__sk)->sk_bound_dev_if || \
- ((__sk)->sk_bound_dev_if == (__dif)) || \
- ((__sk)->sk_bound_dev_if == (__sdif))) && \
+ (((__sk)->sk_bound_dev_if == (__dif)) || \
+ ((__sk)->sk_bound_dev_if == (__sdif))) && \
net_eq(sock_net(__sk), (__net)))
#else /* 32-bit arch */
#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
@@ -294,9 +305,8 @@ static inline struct sock *inet_lookup_listener(struct net *net,
(((__sk)->sk_portpair == (__ports)) && \
((__sk)->sk_daddr == (__saddr)) && \
((__sk)->sk_rcv_saddr == (__daddr)) && \
- (!(__sk)->sk_bound_dev_if || \
- ((__sk)->sk_bound_dev_if == (__dif)) || \
- ((__sk)->sk_bound_dev_if == (__sdif))) && \
+ (((__sk)->sk_bound_dev_if == (__dif)) || \
+ ((__sk)->sk_bound_dev_if == (__sdif))) && \
net_eq(sock_net(__sk), (__net)))
#endif /* 64-bit arch */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index a80fd0ac4563..e8eef85006aa 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -130,6 +130,27 @@ static inline int inet_request_bound_dev_if(const struct sock *sk,
return sk->sk_bound_dev_if;
}
+static inline int inet_sk_bound_l3mdev(const struct sock *sk)
+{
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ struct net *net = sock_net(sk);
+
+ if (!net->ipv4.sysctl_tcp_l3mdev_accept)
+ return l3mdev_master_ifindex_by_index(net,
+ sk->sk_bound_dev_if);
+#endif
+
+ return 0;
+}
+
+static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if,
+ int dif, int sdif)
+{
+ if (!bound_dev_if)
+ return !sdif || l3mdev_accept;
+ return bound_dev_if == dif || bound_dev_if == sdif;
+}
+
struct inet_cork {
unsigned int flags;
__be32 addr;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index e47503b4e4d1..104a6669e344 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -103,6 +103,9 @@ struct netns_ipv4 {
/* Shall we try to damage output packets if routing dev changes? */
int sysctl_ip_dynaddr;
int sysctl_ip_early_demux;
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ int sysctl_raw_l3mdev_accept;
+#endif
int sysctl_tcp_early_demux;
int sysctl_udp_early_demux;
diff --git a/include/net/raw.h b/include/net/raw.h
index 9c9fa98a91a4..821ff4887f77 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -17,7 +17,7 @@
#ifndef _RAW_H
#define _RAW_H
-
+#include <net/inet_sock.h>
#include <net/protocol.h>
#include <linux/icmp.h>
@@ -61,6 +61,7 @@ void raw_seq_stop(struct seq_file *seq, void *v);
int raw_hash_sk(struct sock *sk);
void raw_unhash_sk(struct sock *sk);
+void raw_init(void);
struct raw_sock {
/* inet_sock has to be the first member */
@@ -74,4 +75,15 @@ static inline struct raw_sock *raw_sk(const struct sock *sk)
return (struct raw_sock *)sk;
}
+static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if,
+ int dif, int sdif)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept,
+ bound_dev_if, dif, sdif);
+#else
+ return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
+#endif
+}
+
#endif /* _RAW_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index 9e82cb391dea..a496e441645e 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -252,6 +252,17 @@ static inline int udp_rqueue_get(struct sock *sk)
return sk_rmem_alloc_get(sk) - READ_ONCE(udp_sk(sk)->forward_deficit);
}
+static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if,
+ int dif, int sdif)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept,
+ bound_dev_if, dif, sdif);
+#else
+ return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
+#endif
+}
+
/* net/ipv4/udp.c */
void udp_destruct_sock(struct sock *sk);
void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len);