From 0b392be9a86560dae3af2e7528f226ff465ab549 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 10 Jun 2016 12:11:06 +0100 Subject: net: ipconfig: avoid warning by making ic_addrservaddr static The symbol ic_addrservaddr is not static, but has no declaration to match so make it static to fix the following warning: net/ipv4/ipconfig.c:130:8: warning: symbol 'ic_addrservaddr' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 2ed9dd2b5f2f..eccf9fd190c0 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -127,7 +127,7 @@ __be32 ic_myaddr = NONE; /* My IP address */ static __be32 ic_netmask = NONE; /* Netmask for local subnet */ __be32 ic_gateway = NONE; /* Gateway IP address */ -__be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ +static __be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ __be32 ic_servaddr = NONE; /* Boot server IP address */ -- cgit v1.2.3 From 86ef7f9cbfd564377028098cf20cc1c3ec2c776d Mon Sep 17 00:00:00 2001 From: David S. Miller Date: Sat, 11 Jun 2016 20:40:24 -0700 Subject: ipconfig: Protect ic_addrservaddr with IPCONFIG_DYNAMIC. >> net/ipv4/ipconfig.c:130:15: warning: 'ic_addrservaddr' defined but not used [-Wunused-variable] static __be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ Reported-by: kbuild test robot Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index eccf9fd190c0..1d71c40eaaf3 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -127,7 +127,9 @@ __be32 ic_myaddr = NONE; /* My IP address */ static __be32 ic_netmask = NONE; /* Netmask for local subnet */ __be32 ic_gateway = NONE; /* Gateway IP address */ +#ifdef IPCONFIG_DYNAMIC static __be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ +#endif __be32 ic_servaddr = NONE; /* Boot server IP address */ -- cgit v1.2.3 From b46d9f625b07f843c706c2c7d0210a90ccdf143b Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 12 Jun 2016 12:02:46 +0200 Subject: ipv4: fix checksum annotation in udp4_csum_init Reported-by: Cong Wang Cc: Cong Wang Cc: Tom Herbert Fixes: 4068579e1e098fa ("net: Implmement RFC 6936 (zero RX csums for UDP/IPv6") Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/udp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0ff31d97d485..ba0d8b8b7690 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1755,8 +1755,11 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, return err; } - return skb_checksum_init_zero_check(skb, proto, uh->check, - inet_compute_pseudo); + /* Note, we are only interested in != 0 or == 0, thus the + * force to int. + */ + return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + inet_compute_pseudo); } /* -- cgit v1.2.3 From d1e37288c9146dccff830e3253e403af8705b51f Mon Sep 17 00:00:00 2001 From: Su, Xuemin Date: Mon, 13 Jun 2016 11:02:50 +0800 Subject: udp reuseport: fix packet of same flow hashed to different socket There is a corner case in which udp packets belonging to a same flow are hashed to different socket when hslot->count changes from 10 to 11: 1) When hslot->count <= 10, __udp_lib_lookup() searches udp_table->hash, and always passes 'daddr' to udp_ehashfn(). 2) When hslot->count > 10, __udp_lib_lookup() searches udp_table->hash2, but may pass 'INADDR_ANY' to udp_ehashfn() if the sockets are bound to INADDR_ANY instead of some specific addr. That means when hslot->count changes from 10 to 11, the hash calculated by udp_ehashfn() is also changed, and the udp packets belonging to a same flow will be hashed to different socket. This is easily reproduced: 1) Create 10 udp sockets and bind all of them to 0.0.0.0:40000. 2) From the same host send udp packets to 127.0.0.1:40000, record the socket index which receives the packets. 3) Create 1 more udp socket and bind it to 0.0.0.0:44096. The number 44096 is 40000 + UDP_HASH_SIZE(4096), this makes the new socket put into the same hslot as the aformentioned 10 sockets, and makes the hslot->count change from 10 to 11. 4) From the same host send udp packets to 127.0.0.1:40000, and the socket index which receives the packets will be different from the one received in step 2. This should not happen as the socket bound to 0.0.0.0:44096 should not change the behavior of the sockets bound to 0.0.0.0:40000. It's the same case for IPv6, and this patch also fixes that. Signed-off-by: Su, Xuemin Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 73 +++++++++++++--------------------------------------------- net/ipv6/udp.c | 71 +++++++++++++------------------------------------------- 2 files changed, 32 insertions(+), 112 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ba0d8b8b7690..ca5e8ea29538 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -391,9 +391,9 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); } -static inline int compute_score(struct sock *sk, struct net *net, - __be32 saddr, unsigned short hnum, __be16 sport, - __be32 daddr, __be16 dport, int dif) +static int compute_score(struct sock *sk, struct net *net, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned short hnum, int dif) { int score; struct inet_sock *inet; @@ -434,52 +434,6 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } -/* - * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num) - */ -static inline int compute_score2(struct sock *sk, struct net *net, - __be32 saddr, __be16 sport, - __be32 daddr, unsigned int hnum, int dif) -{ - int score; - struct inet_sock *inet; - - if (!net_eq(sock_net(sk), net) || - ipv6_only_sock(sk)) - return -1; - - inet = inet_sk(sk); - - if (inet->inet_rcv_saddr != daddr || - inet->inet_num != hnum) - return -1; - - score = (sk->sk_family == PF_INET) ? 2 : 1; - - if (inet->inet_daddr) { - if (inet->inet_daddr != saddr) - return -1; - score += 4; - } - - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score += 4; - } - - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score += 4; - } - - if (sk->sk_incoming_cpu == raw_smp_processor_id()) - score++; - - return score; -} - static u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) @@ -492,11 +446,11 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, udp_ehash_secret + net_hash_mix(net)); } -/* called with read_rcu_lock() */ +/* called with rcu_read_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, unsigned int slot2, + struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; @@ -506,7 +460,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, result = NULL; badness = 0; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - score = compute_score2(sk, net, saddr, sport, + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; @@ -554,17 +508,22 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, result = udp4_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, slot2, skb); + hslot2, skb); if (!result) { + unsigned int old_slot2 = slot2; hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); slot2 = hash2 & udptable->mask; + /* avoid searching the same slot again. */ + if (unlikely(slot2 == old_slot2)) + return result; + hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) goto begin; result = udp4_lib_lookup2(net, saddr, sport, - htonl(INADDR_ANY), hnum, dif, - hslot2, slot2, skb); + daddr, hnum, dif, + hslot2, skb); } return result; } @@ -572,8 +531,8 @@ begin: result = NULL; badness = 0; sk_for_each_rcu(sk, &hslot->head) { - score = compute_score(sk, net, saddr, hnum, sport, - daddr, dport, dif); + score = compute_score(sk, net, saddr, sport, + daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f421c9f23c5b..005dc82c2138 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -115,11 +115,10 @@ static void udp_v6_rehash(struct sock *sk) udp_lib_rehash(sk, new_hash); } -static inline int compute_score(struct sock *sk, struct net *net, - unsigned short hnum, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, __be16 dport, - int dif) +static int compute_score(struct sock *sk, struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, unsigned short hnum, + int dif) { int score; struct inet_sock *inet; @@ -162,54 +161,11 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } -static inline int compute_score2(struct sock *sk, struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, - unsigned short hnum, int dif) -{ - int score; - struct inet_sock *inet; - - if (!net_eq(sock_net(sk), net) || - udp_sk(sk)->udp_port_hash != hnum || - sk->sk_family != PF_INET6) - return -1; - - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) - return -1; - - score = 0; - inet = inet_sk(sk); - - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score++; - } - - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) - return -1; - score++; - } - - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score++; - } - - if (sk->sk_incoming_cpu == raw_smp_processor_id()) - score++; - - return score; -} - -/* called with read_rcu_lock() */ +/* called with rcu_read_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, unsigned int slot2, + struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; @@ -219,7 +175,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, result = NULL; badness = -1; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - score = compute_score2(sk, net, saddr, sport, + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; @@ -268,17 +224,22 @@ struct sock *__udp6_lib_lookup(struct net *net, result = udp6_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, slot2, skb); + hslot2, skb); if (!result) { + unsigned int old_slot2 = slot2; hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); slot2 = hash2 & udptable->mask; + /* avoid searching the same slot again. */ + if (unlikely(slot2 == old_slot2)) + return result; + hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) goto begin; result = udp6_lib_lookup2(net, saddr, sport, - &in6addr_any, hnum, dif, - hslot2, slot2, skb); + daddr, hnum, dif, + hslot2, skb); } return result; } @@ -286,7 +247,7 @@ begin: result = NULL; badness = -1; sk_for_each_rcu(sk, &hslot->head) { - score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { -- cgit v1.2.3 From 106da663ff495e0aea3ac15b8317aa410754fcac Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 13 Jun 2016 10:31:04 +0200 Subject: ovs/gre,geneve: fix error path when creating an iface After ipgre_newlink()/geneve_configure() call, the netdev is registered. Fixes: 7e059158d57b ("vxlan, gre, geneve: Set a large MTU on ovs-created tunnel devices") CC: David Wragg Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/net/geneve.c | 10 +++++++--- net/ipv4/ip_gre.c | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'net/ipv4') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index cadefe4fdaa2..086c2dae4c3d 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1508,6 +1508,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, { struct nlattr *tb[IFLA_MAX + 1]; struct net_device *dev; + LIST_HEAD(list_kill); int err; memset(tb, 0, sizeof(tb)); @@ -1519,8 +1520,10 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, err = geneve_configure(net, dev, &geneve_remote_unspec, 0, 0, 0, 0, htons(dst_port), true, GENEVE_F_UDP_ZERO_CSUM6_RX); - if (err) - goto err; + if (err) { + free_netdev(dev); + return ERR_PTR(err); + } /* openvswitch users expect packet sizes to be unrestricted, * so set the largest MTU we can. @@ -1532,7 +1535,8 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, return dev; err: - free_netdev(dev); + geneve_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(geneve_dev_create_fb); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 4d2025f7ec57..08deba679c8c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1121,6 +1121,7 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, { struct nlattr *tb[IFLA_MAX + 1]; struct net_device *dev; + LIST_HEAD(list_kill); struct ip_tunnel *t; int err; @@ -1136,8 +1137,10 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, t->collect_md = true; err = ipgre_newlink(net, dev, tb, NULL); - if (err < 0) - goto out; + if (err < 0) { + free_netdev(dev); + return ERR_PTR(err); + } /* openvswitch users expect packet sizes to be unrestricted, * so set the largest MTU we can. @@ -1148,7 +1151,8 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, return dev; out: - free_netdev(dev); + ip_tunnel_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(gretap_fb_dev_create); -- cgit v1.2.3 From da6f1da819d4b9c081a477dec74dc468a0b44290 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 13 Jun 2016 10:31:06 +0200 Subject: ovs/gre: fix rtnl notifications on iface deletion The function gretap_fb_dev_create() (only used by ovs) never calls rtnl_configure_link(). The consequence is that dev->rtnl_link_state is never set to RTNL_LINK_INITIALIZED. During the deletion phase, the function rollback_registered_many() sends a RTM_DELLINK only if dev->rtnl_link_state is set to RTNL_LINK_INITIALIZED. Fixes: b2acd1dc3949 ("openvswitch: Use regular GRE net_device instead of vport") CC: Thomas Graf CC: Pravin B Shelar Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 08deba679c8c..07c5cf1838d8 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1149,6 +1149,10 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, if (err) goto out; + err = rtnl_configure_link(dev, NULL); + if (err < 0) + goto out; + return dev; out: ip_tunnel_dellink(dev, &list_kill); -- cgit v1.2.3 From e582615ad33dbd39623084a02e95567b116e1eea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Jun 2016 06:24:00 -0700 Subject: gre: fix error handler 1) gre_parse_header() can be called from gre_err() At this point transport header points to ICMP header, not the inner header. 2) We can not really change transport header as ipgre_err() will later assume transport header still points to ICMP header (using icmp_hdr()) 3) pskb_may_pull() logic in gre_parse_header() really works if we are interested at zone pointed by skb->data 4) As Jiri explained in commit b7f8fe251e46 ("gre: do not pull header in ICMP error processing") we should not pull headers in error handler. So this fix : A) changes gre_parse_header() to use skb->data instead of skb_transport_header() B) Adds a nhs parameter to gre_parse_header() so that we can skip the not pulled IP header from error path. This offset is 0 for normal receive path. C) remove obsolete IPV6 includes Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Maciej Żenczykowski Cc: Jiri Benc Signed-off-by: David S. Miller --- include/net/gre.h | 2 +- net/ipv4/gre_demux.c | 10 +++++----- net/ipv4/ip_gre.c | 12 ++++-------- net/ipv6/ip6_gre.c | 2 +- 4 files changed, 11 insertions(+), 15 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/gre.h b/include/net/gre.h index 5dce30a6abe3..7a54a31d1d4c 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -26,7 +26,7 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version); struct net_device *gretap_fb_dev_create(struct net *net, const char *name, u8 name_assign_type); int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err, __be16 proto); + bool *csum_err, __be16 proto, int nhs); static inline int gre_calc_hlen(__be16 o_flags) { diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 4c39f4fd332a..de1d119a4497 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -62,26 +62,26 @@ EXPORT_SYMBOL_GPL(gre_del_protocol); /* Fills in tpi and returns header length to be pulled. */ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err, __be16 proto) + bool *csum_err, __be16 proto, int nhs) { const struct gre_base_hdr *greh; __be32 *options; int hdr_len; - if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + if (unlikely(!pskb_may_pull(skb, nhs + sizeof(struct gre_base_hdr)))) return -EINVAL; - greh = (struct gre_base_hdr *)skb_transport_header(skb); + greh = (struct gre_base_hdr *)(skb->data + nhs); if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; tpi->flags = gre_flags_to_tnl_flags(greh->flags); hdr_len = gre_calc_hlen(tpi->flags); - if (!pskb_may_pull(skb, hdr_len)) + if (!pskb_may_pull(skb, nhs + hdr_len)) return -EINVAL; - greh = (struct gre_base_hdr *)skb_transport_header(skb); + greh = (struct gre_base_hdr *)(skb->data + nhs); tpi->proto = greh->protocol; options = (__be32 *)(greh + 1); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 07c5cf1838d8..1d000af7f561 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -49,12 +49,6 @@ #include #include -#if IS_ENABLED(CONFIG_IPV6) -#include -#include -#include -#endif - /* Problems & solutions -------------------- @@ -217,12 +211,14 @@ static void gre_err(struct sk_buff *skb, u32 info) * by themselves??? */ + const struct iphdr *iph = (struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct tnl_ptk_info tpi; bool csum_err = false; - if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)) < 0) { + if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), + iph->ihl * 4) < 0) { if (!csum_err) /* ignore csum errors. */ return; } @@ -338,7 +334,7 @@ static int gre_rcv(struct sk_buff *skb) } #endif - hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)); + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0); if (hdr_len < 0) goto drop; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index fdc9de276ab1..776d145113e1 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -468,7 +468,7 @@ static int gre_rcv(struct sk_buff *skb) bool csum_err = false; int hdr_len; - hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6)); + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6), 0); if (hdr_len < 0) goto drop; -- cgit v1.2.3 From 962fcef33b03395051367181a0549d29d109d9a4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 18 Jun 2016 13:03:36 +0800 Subject: esp: Fix ESN generation under UDP encapsulation Blair Steven noticed that ESN in conjunction with UDP encapsulation is broken because we set the temporary ESP header to the wrong spot. This patch fixes this by first of all using the right spot, i.e., 4 bytes off the real ESP header, and then saving this information so that after encryption we can restore it properly. Fixes: 7021b2e1cddd ("esp4: Switch to new AEAD interface") Reported-by: Blair Steven Signed-off-by: Herbert Xu Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/esp4.c | 52 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 20 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 477937465a20..d95631d09248 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -23,6 +23,11 @@ struct esp_skb_cb { void *tmp; }; +struct esp_output_extra { + __be32 seqhi; + u32 esphoff; +}; + #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); @@ -35,11 +40,11 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); * * TODO: Use spare space in skb for this where possible. */ -static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) +static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int extralen) { unsigned int len; - len = seqhilen; + len = extralen; len += crypto_aead_ivsize(aead); @@ -57,15 +62,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) return kmalloc(len, GFP_ATOMIC); } -static inline __be32 *esp_tmp_seqhi(void *tmp) +static inline void *esp_tmp_extra(void *tmp) { - return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); + return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra)); } -static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) + +static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int extralen) { return crypto_aead_ivsize(aead) ? - PTR_ALIGN((u8 *)tmp + seqhilen, - crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; + PTR_ALIGN((u8 *)tmp + extralen, + crypto_aead_alignmask(aead) + 1) : tmp + extralen; } static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv) @@ -99,7 +105,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) { struct ip_esp_hdr *esph = (void *)(skb->data + offset); void *tmp = ESP_SKB_CB(skb)->tmp; - __be32 *seqhi = esp_tmp_seqhi(tmp); + __be32 *seqhi = esp_tmp_extra(tmp); esph->seq_no = esph->spi; esph->spi = *seqhi; @@ -107,7 +113,11 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) static void esp_output_restore_header(struct sk_buff *skb) { - esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32)); + void *tmp = ESP_SKB_CB(skb)->tmp; + struct esp_output_extra *extra = esp_tmp_extra(tmp); + + esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff - + sizeof(__be32)); } static void esp_output_done_esn(struct crypto_async_request *base, int err) @@ -121,6 +131,7 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) static int esp_output(struct xfrm_state *x, struct sk_buff *skb) { int err; + struct esp_output_extra *extra; struct ip_esp_hdr *esph; struct crypto_aead *aead; struct aead_request *req; @@ -137,8 +148,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int tfclen; int nfrags; int assoclen; - int seqhilen; - __be32 *seqhi; + int extralen; __be64 seqno; /* skb is pure payload to encrypt */ @@ -166,21 +176,21 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) nfrags = err; assoclen = sizeof(*esph); - seqhilen = 0; + extralen = 0; if (x->props.flags & XFRM_STATE_ESN) { - seqhilen += sizeof(__be32); - assoclen += seqhilen; + extralen += sizeof(*extra); + assoclen += sizeof(__be32); } - tmp = esp_alloc_tmp(aead, nfrags, seqhilen); + tmp = esp_alloc_tmp(aead, nfrags, extralen); if (!tmp) { err = -ENOMEM; goto error; } - seqhi = esp_tmp_seqhi(tmp); - iv = esp_tmp_iv(aead, tmp, seqhilen); + extra = esp_tmp_extra(tmp); + iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); @@ -247,8 +257,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { - esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); - *seqhi = esph->spi; + extra->esphoff = (unsigned char *)esph - + skb_transport_header(skb); + esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); + extra->seqhi = esph->spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); aead_request_set_callback(req, 0, esp_output_done_esn, skb); } @@ -445,7 +457,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) goto out; ESP_SKB_CB(skb)->tmp = tmp; - seqhi = esp_tmp_seqhi(tmp); + seqhi = esp_tmp_extra(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); -- cgit v1.2.3 From 70a0dec45174c976c64b4c8c1d0898581f759948 Mon Sep 17 00:00:00 2001 From: Tom Goff Date: Thu, 23 Jun 2016 16:11:57 -0400 Subject: ipmr/ip6mr: Initialize the last assert time of mfc entries. This fixes wrong-interface signaling on 32-bit platforms for entries created when jiffies > 2^31 + MFC_ASSERT_THRESH. Signed-off-by: Tom Goff Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 4 +++- net/ipv6/ip6mr.c | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 21a38e296fe2..5ad48ec77710 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -891,8 +891,10 @@ static struct mfc_cache *ipmr_cache_alloc(void) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); - if (c) + if (c) { + c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXVIFS; + } return c; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index f2e2013f8346..487ef3bc7bbc 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1074,6 +1074,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void) struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (!c) return NULL; + c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXMIFS; return c; } -- cgit v1.2.3 From a3d2e9f8eb1487f4191ff08ce2d3d63702c65a90 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Jun 2016 17:38:50 +0200 Subject: tcp: do not send too big packets at retransmit time Arjun reported a bug in TCP stack and bisected it to a recent commit. In case where we process SACK, we can coalesce multiple skbs into fat ones (tcp_shift_skb_data()), to lower write queue overhead, because we do not expect to retransmit these packets. However, SACK reneging can happen, forcing the sender to retransmit all these packets. If skb->len is above 64KB, we then send buggy IP packets that could hang TSO engine on cxgb4. Neal suggested to use tcp_tso_autosize() instead of tp->gso_segs so that we cook packets of optimal size vs TCP/pacing. Thanks to Arjun for reporting the bug and running the tests ! Fixes: 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time") Signed-off-by: Eric Dumazet Reported-by: Arjun V Tested-by: Arjun V Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8bd9911fdd16..e00e972c4e6a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2751,7 +2751,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; struct sk_buff *hole = NULL; - u32 last_lost; + u32 max_segs, last_lost; int mib_idx; int fwd_rexmitting = 0; @@ -2771,6 +2771,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) last_lost = tp->snd_una; } + max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk)); tcp_for_write_queue_from(skb, sk) { __u8 sacked = TCP_SKB_CB(skb)->sacked; int segs; @@ -2784,6 +2785,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk) segs = tp->snd_cwnd - tcp_packets_in_flight(tp); if (segs <= 0) return; + /* In case tcp_shift_skb_data() have aggregated large skbs, + * we need to make sure not sending too bigs TSO packets + */ + segs = min_t(int, segs, max_segs); if (fwd_rexmitting) { begin_fwd: -- cgit v1.2.3